From ec7a8f72af8c34cfadcb19b574cef2a5e5d49593 Mon Sep 17 00:00:00 2001 From: netic-rkm <127091760+netic-rkm@users.noreply.github.com> Date: Wed, 7 Feb 2024 15:06:43 +0100 Subject: [PATCH] Feature/add support for vm alert (#15) * init vmalert * helm-docs + aoi alerting enablement feat * change default clusterid * helm-docs * disable alerting by default update version update docs * update helm package to support deploying alerting stack --------- Co-authored-by: Alexander Soelberg Heidarsson <89837986+alex5517@users.noreply.github.com> --- charts/aoi/.gitignore | 2 + charts/aoi/.helmignore | 2 + charts/aoi/CHANGELOG.MD | 1 + charts/aoi/Chart.lock | 15 +++ charts/aoi/Chart.yaml | 6 +- charts/aoi/README.md | 59 ++++++++++- charts/aoi/templates/_helpers.tpl | 23 ++++- .../HelmRelease.yaml | 93 ++++++++++++++++++ .../HelmRepository.yaml | 14 +++ .../_helpers.tpl | 67 +++++++++++++ .../bootstrapconfig.yaml | 69 +++++++++++++ .../networkpolicy.yaml | 98 +++++++++++++++++++ .../projectbootstrap.yaml | 24 +++++ .../application-operations-alerting/rbac.yaml | 39 ++++++++ .../bootstrapconfig.yaml | 3 +- .../projectbootstrap.yaml | 2 +- .../prometheus/_helpers-prometheus.tpl | 6 +- charts/aoi/templates/validate.yaml | 16 +++ charts/aoi/values.yaml | 96 ++++++++++++++++++ 19 files changed, 625 insertions(+), 10 deletions(-) create mode 100644 charts/aoi/.gitignore create mode 100644 charts/aoi/.helmignore create mode 100644 charts/aoi/CHANGELOG.MD create mode 100644 charts/aoi/Chart.lock create mode 100644 charts/aoi/templates/application-operations-alerting/HelmRelease.yaml create mode 100644 charts/aoi/templates/application-operations-alerting/HelmRepository.yaml create mode 100644 charts/aoi/templates/application-operations-alerting/_helpers.tpl create mode 100644 charts/aoi/templates/application-operations-alerting/bootstrapconfig.yaml create mode 100644 charts/aoi/templates/application-operations-alerting/networkpolicy.yaml create mode 100644 charts/aoi/templates/application-operations-alerting/projectbootstrap.yaml create mode 100644 charts/aoi/templates/application-operations-alerting/rbac.yaml create mode 100644 charts/aoi/templates/validate.yaml diff --git a/charts/aoi/.gitignore b/charts/aoi/.gitignore new file mode 100644 index 0000000..d2fc687 --- /dev/null +++ b/charts/aoi/.gitignore @@ -0,0 +1,2 @@ +# Ignore dependencies downloaded by helm +charts/ \ No newline at end of file diff --git a/charts/aoi/.helmignore b/charts/aoi/.helmignore new file mode 100644 index 0000000..0269484 --- /dev/null +++ b/charts/aoi/.helmignore @@ -0,0 +1,2 @@ +.gitignore +CHANGELOG.md \ No newline at end of file diff --git a/charts/aoi/CHANGELOG.MD b/charts/aoi/CHANGELOG.MD new file mode 100644 index 0000000..3e94b9e --- /dev/null +++ b/charts/aoi/CHANGELOG.MD @@ -0,0 +1 @@ +# WiP \ No newline at end of file diff --git a/charts/aoi/Chart.lock b/charts/aoi/Chart.lock new file mode 100644 index 0000000..84a1163 --- /dev/null +++ b/charts/aoi/Chart.lock @@ -0,0 +1,15 @@ +dependencies: +- name: grafana + repository: https://grafana.github.io/helm-charts + version: 7.0.9 +- name: victoria-metrics-single + repository: https://victoriametrics.github.io/helm-charts/ + version: 0.9.12 +- name: victoria-metrics-single + repository: https://victoriametrics.github.io/helm-charts/ + version: 0.9.12 +- name: victoria-metrics-alert + repository: https://victoriametrics.github.io/helm-charts/ + version: 0.8.3 +digest: sha256:3f7d3160e0f43e6932e9e8e0c7fc3217457ed705a06a2f232ac4c273908919ba +generated: "2024-02-02T09:25:35.926638+01:00" diff --git a/charts/aoi/Chart.yaml b/charts/aoi/Chart.yaml index 56df20c..c36504d 100644 --- a/charts/aoi/Chart.yaml +++ b/charts/aoi/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: aoi description: A Helm chart for Netic application operations infrastructure type: application -version: 0.1.2 +version: 0.1.3 dependencies: - name: grafana version: 7.0.9 @@ -17,3 +17,7 @@ dependencies: version: 0.9.12 repository: https://victoriametrics.github.io/helm-charts/ condition: global.tsdb.high_availability.enabled + - name: victoria-metrics-alert + version: 0.8.3 + repository: https://victoriametrics.github.io/helm-charts/ + condition: alerting.enabled diff --git a/charts/aoi/README.md b/charts/aoi/README.md index 5a3a2a3..696f7fa 100644 --- a/charts/aoi/README.md +++ b/charts/aoi/README.md @@ -1,6 +1,6 @@ # aoi -![Version: 0.1.4](https://img.shields.io/badge/Version-0.1.4-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) +![Version: 0.1.3-rc1](https://img.shields.io/badge/Version-0.1.3--rc1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) A Helm chart for Netic application operations infrastructure @@ -9,6 +9,7 @@ A Helm chart for Netic application operations infrastructure | Repository | Name | Version | |------------|------|---------| | https://grafana.github.io/helm-charts | grafana | 7.0.9 | +| https://victoriametrics.github.io/helm-charts/ | victoria-metrics-alert | 0.8.3 | | https://victoriametrics.github.io/helm-charts/ | victoria-metrics-single-1(victoria-metrics-single) | 0.9.12 | | https://victoriametrics.github.io/helm-charts/ | victoria-metrics-single-2(victoria-metrics-single) | 0.9.12 | @@ -16,6 +17,55 @@ A Helm chart for Netic application operations infrastructure | Key | Type | Default | Description | |-----|------|---------|-------------| +| alerting.clusterId | string | `""` | | +| alerting.clusterWideNamespace.bootstrapConfig.externalSecretsStore | object | `{}` | | +| alerting.clusterWideNamespace.bootstrapConfig.git.github.secretRef | string | `"tcs-github-auth"` | | +| alerting.clusterWideNamespace.bootstrapConfig.git.github.template.adminTeam | string | `"oaas-team"` | | +| alerting.clusterWideNamespace.bootstrapConfig.git.github.template.owner | string | `"neticdk-k8s"` | | +| alerting.clusterWideNamespace.bootstrapConfig.git.github.template.repo | string | `"tenant-alerting-template"` | | +| alerting.clusterWideNamespace.bootstrapConfig.vault | object | `{}` | | +| alerting.clusterWideNamespace.enabled | bool | `false` | | +| alerting.clusterWideNamespace.name | string | `"application-operations-alerting"` | | +| alerting.clusterWideNamespace.projectBootstrap.git | object | `{}` | | +| alerting.enabled | bool | `false` | | +| alerting.helmRelease.values.alertmanager.image.pullPolicy | string | `"Always"` | | +| alerting.helmRelease.values.alertmanager.image.registry | string | `"docker.io"` | | +| alerting.helmRelease.values.alertmanager.image.repository | string | `"prom/alertmanager"` | | +| alerting.helmRelease.values.alertmanager.podSecurityContext.fsGroup | int | `2000` | | +| alerting.helmRelease.values.alertmanager.podSecurityContext.runAsGroup | int | `3000` | | +| alerting.helmRelease.values.alertmanager.podSecurityContext.runAsUser | int | `1000` | | +| alerting.helmRelease.values.alertmanager.priorityClassName | string | `"secure-cloud-stack-tenant-namespace-application-critical"` | | +| alerting.helmRelease.values.alertmanager.resources.limits.memory | string | `"64Mi"` | | +| alerting.helmRelease.values.alertmanager.resources.requests.cpu | string | `"10m"` | | +| alerting.helmRelease.values.alertmanager.resources.requests.memory | string | `"64Mi"` | | +| alerting.helmRelease.values.alertmanager.securityContext.allowPrivilegeEscalation | bool | `false` | | +| alerting.helmRelease.values.alertmanager.securityContext.capabilities.drop[0] | string | `"ALL"` | | +| alerting.helmRelease.values.alertmanager.securityContext.readOnlyRootFilesystem | bool | `true` | | +| alerting.helmRelease.values.server.configReloader.image.pullPolicy | string | `"Always"` | | +| alerting.helmRelease.values.server.configReloader.image.registry | string | `"docker.io"` | | +| alerting.helmRelease.values.server.configReloader.image.repository | string | `"kiwigrid/k8s-sidecar"` | | +| alerting.helmRelease.values.server.configReloader.image.tag | string | `"1.25.2"` | | +| alerting.helmRelease.values.server.configReloader.resources.limits.memory | string | `"96Mi"` | | +| alerting.helmRelease.values.server.configReloader.resources.requests.cpu | string | `"10m"` | | +| alerting.helmRelease.values.server.configReloader.resources.requests.memory | string | `"96Mi"` | | +| alerting.helmRelease.values.server.configReloader.securityContext.allowPrivilegeEscalation | bool | `false` | | +| alerting.helmRelease.values.server.configReloader.securityContext.capabilities.drop[0] | string | `"ALL"` | | +| alerting.helmRelease.values.server.configReloader.securityContext.readOnlyRootFilesystem | bool | `true` | | +| alerting.helmRelease.values.server.image.pullPolicy | string | `"Always"` | | +| alerting.helmRelease.values.server.image.registry | string | `"docker.io"` | | +| alerting.helmRelease.values.server.image.repository | string | `"victoriametrics/vmalert"` | | +| alerting.helmRelease.values.server.podSecurityContext.fsGroup | int | `2000` | | +| alerting.helmRelease.values.server.podSecurityContext.runAsGroup | int | `3000` | | +| alerting.helmRelease.values.server.podSecurityContext.runAsUser | int | `1000` | | +| alerting.helmRelease.values.server.priorityClassName | string | `"secure-cloud-stack-tenant-namespace-application-critical"` | | +| alerting.helmRelease.values.server.resources.limits.memory | string | `"64Mi"` | | +| alerting.helmRelease.values.server.resources.requests.cpu | string | `"10m"` | | +| alerting.helmRelease.values.server.resources.requests.memory | string | `"64Mi"` | | +| alerting.helmRelease.values.server.securityContext.allowPrivilegeEscalation | bool | `false` | | +| alerting.helmRelease.values.server.securityContext.capabilities.drop[0] | string | `"all"` | | +| alerting.helmRelease.values.server.securityContext.readOnlyRootFilesystem | bool | `true` | | +| alerting.helmRepository | string | `nil` | | +| alerting.namespaces | list | `[]` | | | authProxy.affinity | list | `[]` | | | authProxy.annotations | object | `{}` | | | authProxy.enabled | bool | `true` | | @@ -90,10 +140,13 @@ A Helm chart for Netic application operations infrastructure | prometheus.configReloader.resources.limits.memory | string | `"25Mi"` | | | prometheus.configReloader.resources.requests.cpu | string | `"10m"` | | | prometheus.configReloader.resources.requests.memory | string | `"25Mi"` | | +| prometheus.extraVolumeMounts | list | `[]` | | +| prometheus.extraVolumes | list | `[]` | | | prometheus.image.registry | string | `"docker.io"` | | | prometheus.image.repository | string | `"victoriametrics/vmagent"` | | | prometheus.image.tag | string | `"v1.91.2"` | | | prometheus.persistence.size | string | `"60Gi"` | | +| prometheus.podAnnotations | object | `{}` | | | prometheus.podSecurityContext.fsGroup | int | `2000` | | | prometheus.podSecurityContext.runAsGroup | int | `3000` | | | prometheus.podSecurityContext.runAsNonRoot | bool | `true` | | @@ -131,6 +184,10 @@ A Helm chart for Netic application operations infrastructure | promxy.terminationGracePeriodSeconds | int | `30` | | | promxy.tolerations | list | `[]` | | | promxy.topologySpauthProxyConstraints | list | `[]` | | +| victoria-metrics-alert.rbac.create | bool | `false` | | +| victoria-metrics-alert.server.configMap | string | `"null"` | | +| victoria-metrics-alert.server.enabled | bool | `false` | | +| victoria-metrics-alert.serviceAccount.create | bool | `false` | | | victoria-metrics-single-1.rbac.create | bool | `false` | | | victoria-metrics-single-1.server.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[0].key | string | `"app.kubernetes.io/name"` | | | victoria-metrics-single-1.server.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[0].operator | string | `"In"` | | diff --git a/charts/aoi/templates/_helpers.tpl b/charts/aoi/templates/_helpers.tpl index 4857653..f4c21ed 100644 --- a/charts/aoi/templates/_helpers.tpl +++ b/charts/aoi/templates/_helpers.tpl @@ -84,12 +84,31 @@ Create the port for the read path to tsdb. {{- end }} {{/* -Grafana read url +metrics read url +*/}} +{{- define "aoi.readUrl" -}} +{{- printf "http://%s-%s.%s.svc.%s:%s" (include "aoi.name" . ) (include "aoi.readHost" . ) .Release.Namespace .Values.global.clusterDomain (include "aoi.readPort" . ) }} +{{- end }} + + +{{/* +metrics read url for grafana */}} {{- define "aoi.grafanaReadUrl" -}} {{- if .Values.authProxy.enabled }} {{- printf "http://%s-auth-proxy.%s.svc.%s:8080" (include "aoi.name" . ) .Release.Namespace .Values.global.clusterDomain }} {{- else }} -{{- printf "http://%s-%s.%s.svc.%s:8082" (include "aoi.name" . ) (include "aoi.readHost" . ) .Release.Namespace .Values.global.clusterDomain (include "aoi.readPort" . ) }} +{{- printf "http://%s-%s.%s.svc.%s:%s" (include "aoi.name" . ) (include "aoi.readHost" . ) .Release.Namespace .Values.global.clusterDomain (include "aoi.readPort" . ) }} +{{- end }} +{{- end }} + +{{/* +Create the lable value for victoria-metrics kubernetes/name lable. +*/}} +{{- define "aoi.vmLableName" -}} +{{- if .Values.global.tsdb.high_availability.enabled }} +{{- printf "promxy" }} +{{- else }} +{{- printf "victoria-metrics-single-1" }} {{- end }} {{- end }} diff --git a/charts/aoi/templates/application-operations-alerting/HelmRelease.yaml b/charts/aoi/templates/application-operations-alerting/HelmRelease.yaml new file mode 100644 index 0000000..76db54f --- /dev/null +++ b/charts/aoi/templates/application-operations-alerting/HelmRelease.yaml @@ -0,0 +1,93 @@ +{{- if .Values.alerting.enabled }} +{{- range $i, $namespace := (include "aoi.alerting.namespaces" . | fromJsonArray) }} +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: victoria-metrics-alert + namespace: {{ $namespace }} +spec: + chart: + spec: + chart: victoria-metrics-alert + version: {{ include "aoi.alerting.chartVersion" $ }} + sourceRef: + kind: HelmRepository + name: victoria-metrics + interval: 5m + interval: 5m + timeout: 15m + values: + serviceAccount: + create: false + name: victoria-metrics-alert-config-reloader + rbac: + create: false + server: + {{- include "aoi.serverPriorityClassName" $ | nindent 6 }} + image: + repository: "{{ $.Values.alerting.helmRelease.values.server.image.registry }}/{{ $.Values.alerting.helmRelease.values.server.image.repository }}" + pullPolicy: {{ $.Values.alerting.helmRelease.values.server.image.pullPolicy }} + resources: + {{- toYaml $.Values.alerting.helmRelease.values.server.resources | nindent 8 }} + extraArgs: + rule: '/tmp/rules/*.yaml' + extraVolumes: + - name: alert-rules + emptyDir: {} + extraVolumeMounts: + - name: alert-rules + mountPath: /tmp/rules + datasource: + {{- if eq $namespace $.Values.alerting.clusterWideNamespace.name }} + url: "{{ include "aoi.readUrl" $ }}" + {{- else }} + url: "{{ include "aoi.readUrl" $ }}?extra_label=namespace_id={{ $.Values.alerting.clusterId }}_{{ $namespace }}" + {{- end }} + securityContext: + {{- toYaml $.Values.alerting.helmRelease.values.server.securityContext | nindent 8 }} + extraContainers: + - name: config-reloader + image: "{{ $.Values.alerting.helmRelease.values.server.configReloader.image.registry }}/{{ $.Values.alerting.helmRelease.values.server.configReloader.image.repository }}:{{ $.Values.alerting.helmRelease.values.server.configReloader.image.tag }}" + imagePullPolicy: {{ $.Values.alerting.helmRelease.values.server.configReloader.image.pullPolicy }} + resources: + {{- toYaml $.Values.alerting.helmRelease.values.server.configReloader.resources | nindent 12 }} + securityContext: + {{- toYaml $.Values.alerting.helmRelease.values.server.configReloader.securityContext | nindent 12 }} + env: + - name: IGNORE_ALREADY_PROCESSED + value: "true" + - name: METHOD + value: WATCH + - name: LABEL + value: application-operations-alerting + - name: FOLDER + value: /tmp/rules + - name: RESOURCE + value: configmap + - name: REQ_URL + value: http://localhost:8880/-/reload + - name: REQ_METHOD + value: GET + volumeMounts: + - name: alert-rules + mountPath: /tmp/rules + podSecurityContext: + {{- toYaml $.Values.alerting.helmRelease.values.server.podSecurityContext | nindent 8 }} + alertmanager: + enabled: true + {{- include "aoi.alertmanagerPriorityClassName" $ | nindent 6 }} + image: "{{ $.Values.alerting.helmRelease.values.alertmanager.image.registry }}/{{ $.Values.alerting.helmRelease.values.alertmanager.image.repository }}" + resources: + {{- toYaml $.Values.alerting.helmRelease.values.alertmanager.resources | nindent 8 }} + configMap: "alertmanager-config" + securityContext: + {{- toYaml $.Values.alerting.helmRelease.values.alertmanager.securityContext | nindent 8 }} + podSecurityContext: + {{- toYaml $.Values.alerting.helmRelease.values.alertmanager.podSecurityContext | nindent 8 }} + serviceMonitor: + enabled: true + extraLabels: + netic.dk/monitoring: "true" +{{- end }} +{{- end }} diff --git a/charts/aoi/templates/application-operations-alerting/HelmRepository.yaml b/charts/aoi/templates/application-operations-alerting/HelmRepository.yaml new file mode 100644 index 0000000..f206d35 --- /dev/null +++ b/charts/aoi/templates/application-operations-alerting/HelmRepository.yaml @@ -0,0 +1,14 @@ +{{- if .Values.alerting.enabled }} +{{- range $i, $namespace := (include "aoi.alerting.namespaces" . | fromJsonArray) }} +--- +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: HelmRepository +metadata: + name: victoria-metrics + namespace: {{ $namespace }} +spec: + interval: 30m0s + url: {{ include "aoi.alerting.helmRepository" $ }} + type: {{ include "aoi.alerting.helmRepository.type" $ }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/aoi/templates/application-operations-alerting/_helpers.tpl b/charts/aoi/templates/application-operations-alerting/_helpers.tpl new file mode 100644 index 0000000..7471ebf --- /dev/null +++ b/charts/aoi/templates/application-operations-alerting/_helpers.tpl @@ -0,0 +1,67 @@ +{{/* +list of namespaces that should have alerting enabled +*/}} +{{- define "aoi.alerting.namespaces" -}} + {{ $newList := list }} + {{- if .Values.alerting.clusterWideNamespace.enabled }} + {{ $newList = prepend .Values.alerting.namespaces .Values.alerting.clusterWideNamespace.name }} + {{- else }} + {{ $newList = .Values.alerting.namespaces }} + {{- end }} + {{ toJson $newList }} +{{- end }} + +{{/* +HelmRepository for victoria-metrics-alert +*/}} +{{- define "aoi.alerting.helmRepository" -}} + {{- range $i, $dep := .Chart.Dependencies }} + {{- if eq $dep.Name "victoria-metrics-alert" }} + {{- default $dep.Repository $.Values.alerting.helmRepository }} + {{- end }} + {{- end }} +{{- end }} + +{{/* +HelmRepository type +*/}} +{{- define "aoi.alerting.helmRepository.type" -}} + {{- if hasPrefix "oci" ( include "aoi.alerting.helmRepository" . ) }} + {{- print "oci" -}} + {{- else }} + {{- print "default" -}} + {{- end }} +{{- end }} + +{{/* +Helm chart version for victoria-metrics-alert +*/}} +{{- define "aoi.alerting.chartVersion" -}} + {{- range $i, $dep := .Chart.Dependencies }} + {{- if eq $dep.Name "victoria-metrics-alert" }} + {{- $dep.Version }} + {{- end }} + {{- end }} +{{- end }} + +{{/* +promxy priority class name +*/}} +{{- define "aoi.serverPriorityClassName" -}} +{{- $pcn := coalesce .Values.global.priorityClassName .Values.alerting.helmRelease.values.server.priorityClassName -}} +{{- if $pcn -}} +priorityClassName: {{ $pcn }} +{{- end }} +{{- end }} + +{{/* +promxy priority class name +*/}} +{{- define "aoi.alertmanagerPriorityClassName" -}} +{{- $pcn := coalesce .Values.global.priorityClassName .Values.alerting.helmRelease.values.alertmanager.priorityClassName -}} +{{- if $pcn -}} +priorityClassName: {{ $pcn }} +{{- end }} +{{- end }} + + diff --git a/charts/aoi/templates/application-operations-alerting/bootstrapconfig.yaml b/charts/aoi/templates/application-operations-alerting/bootstrapconfig.yaml new file mode 100644 index 0000000..e08b4f6 --- /dev/null +++ b/charts/aoi/templates/application-operations-alerting/bootstrapconfig.yaml @@ -0,0 +1,69 @@ +{{- if and .Values.alerting.enabled .Values.alerting.clusterWideNamespace.enabled }} +--- +apiVersion: project.tcs.trifork.com/v1alpha1 +kind: BootstrapConfig +metadata: + name: application-operations-alerting + namespace: netic-gitops-system +spec: + sizes: + application-operations-alerting: + limitRange: + limits: + - type: Container + default: + memory: 1Mi + defaultRequest: + cpu: "1m" + memory: 1Mi + resourceQuota: + hard: + requests.cpu: "1" + requests.memory: "1Gi" + requests.storage: "100Gi" + limits.memory: "1Gi" + persistentvolumeclaims: "1" + count/deployments.apps: "2" + rules: # Role rules for deployment user + - apiGroups: [""] + resources: ["services", "configmaps", "secrets", "persistentvolumeclaims"] + verbs: ["*"] + - apiGroups: ["apps"] + resources: ["deployments", "statefulsets", "replicasets"] + verbs: ["*"] + - apiGroups: ["monitoring.coreos.com"] + resources: ["servicemonitors", "podmonitors"] + verbs: ["*"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["*"] + - apiGroups: ["helm.toolkit.fluxcd.io"] + resources: ["helmreleases"] + verbs: ["*"] + - apiGroups: ["source.toolkit.fluxcd.io"] + resources: ["helmrepositories"] + verbs: ["*"] + + networkPolicies: # Default network policies - deny-all will always be added + default-egress: + podSelector: {} + policyTypes: + - Egress + egress: + - ports: + - protocol: TCP + port: 53 + - protocol: UDP + port: 53 + + labels: + netic.dk/enforce-policies: "true" + git: + {{- toYaml .Values.alerting.clusterWideNamespace.bootstrapConfig.git | nindent 4 }} + + vault: + {{- toYaml .Values.alerting.clusterWideNamespace.bootstrapConfig.vault | nindent 4 }} + + externalSecretsStore: + {{- toYaml .Values.alerting.clusterWideNamespace.bootstrapConfig.externalSecretsStore | nindent 4 }} +{{- end }} diff --git a/charts/aoi/templates/application-operations-alerting/networkpolicy.yaml b/charts/aoi/templates/application-operations-alerting/networkpolicy.yaml new file mode 100644 index 0000000..114a824 --- /dev/null +++ b/charts/aoi/templates/application-operations-alerting/networkpolicy.yaml @@ -0,0 +1,98 @@ +{{- if .Values.alerting.enabled }} +{{- range $i, $namespace := (include "aoi.alerting.namespaces" . | fromJsonArray) }} +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: vmalert-alertmanager-egress + namespace: {{ $namespace }} +spec: + podSelector: + matchLabels: + app: alertmanager + app.kubernetes.io/instance: victoria-metrics-alert + app.kubernetes.io/name: victoria-metrics-alert + policyTypes: + - Egress + egress: + - ports: + - port: 443 + protocol: TCP +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: vmalert-alertmanager-ingress + namespace: {{ $namespace }} +spec: + podSelector: + matchLabels: + app: alertmanager + app.kubernetes.io/instance: victoria-metrics-alert + app.kubernetes.io/name: victoria-metrics-alert + policyTypes: + - Ingress + ingress: + - from: + - podSelector: + matchLabels: + app: server + app.kubernetes.io/instance: victoria-metrics-alert + app.kubernetes.io/name: victoria-metrics-alert + ports: + - port: web + protocol: TCP +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: vmalert-server-egress + namespace: {{ $namespace }} +spec: + podSelector: + matchLabels: + app: server + app.kubernetes.io/instance: victoria-metrics-alert + app.kubernetes.io/name: victoria-metrics-alert + policyTypes: + - Egress + egress: + - ports: + - port: 6443 + protocol: TCP + - to: + - podSelector: + matchLabels: + app: alertmanager + app.kubernetes.io/instance: victoria-metrics-alert + app.kubernetes.io/name: victoria-metrics-alert + ports: + - port: web + protocol: TCP + - to: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: {{ $.Release.Namespace }} + podSelector: + matchLabels: + app: server + app.kubernetes.io/name: {{ include "aoi.vmLableName" $ }} + ports: + - port: http + protocol: TCP +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: vmalert-server-ingress + namespace: {{ $namespace }} +spec: + podSelector: + matchLabels: + app: server + app.kubernetes.io/instance: victoria-metrics-alert + app.kubernetes.io/name: victoria-metrics-alert + policyTypes: + - Ingress +{{- end }} +{{- end }} diff --git a/charts/aoi/templates/application-operations-alerting/projectbootstrap.yaml b/charts/aoi/templates/application-operations-alerting/projectbootstrap.yaml new file mode 100644 index 0000000..2090bc0 --- /dev/null +++ b/charts/aoi/templates/application-operations-alerting/projectbootstrap.yaml @@ -0,0 +1,24 @@ +{{- if and .Values.alerting.enabled .Values.alerting.clusterWideNamespace.enabled }} +--- +apiVersion: v1 +kind: Namespace +metadata: + name: {{ .Values.alerting.clusterWideNamespace.name }} + labels: + netic.dk/monitoring: "true" + annotations: + netic.dk/alerting: "true" +--- +apiVersion: project.tcs.trifork.com/v1alpha1 +kind: ProjectBootstrap +metadata: + name: {{ .Values.alerting.clusterWideNamespace.name }} + namespace: netic-gitops-system +spec: + namespace: {{ .Values.alerting.clusterWideNamespace.name }} + config: + ref: application-operations-alerting + size: application-operations-alerting + git: + {{- toYaml .Values.alerting.clusterWideNamespace.projectBootstrap.git | nindent 4 }} +{{- end }} \ No newline at end of file diff --git a/charts/aoi/templates/application-operations-alerting/rbac.yaml b/charts/aoi/templates/application-operations-alerting/rbac.yaml new file mode 100644 index 0000000..776068c --- /dev/null +++ b/charts/aoi/templates/application-operations-alerting/rbac.yaml @@ -0,0 +1,39 @@ +{{- if .Values.alerting.enabled }} +{{- range $i, $namespace := (include "aoi.alerting.namespaces" . | fromJsonArray) }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: victoria-metrics-alert-config-reloader + namespace: {{ $namespace }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: config-reloader-role + namespace: {{ $namespace }} +rules: + - apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - watch + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: config-reloader-rolebinding + namespace: {{ $namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: config-reloader-role +subjects: + - kind: ServiceAccount + name: victoria-metrics-alert-config-reloader + namespace: {{ $namespace }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/aoi/templates/application-operations-dashboards/bootstrapconfig.yaml b/charts/aoi/templates/application-operations-dashboards/bootstrapconfig.yaml index 5c47768..cb910bf 100644 --- a/charts/aoi/templates/application-operations-dashboards/bootstrapconfig.yaml +++ b/charts/aoi/templates/application-operations-dashboards/bootstrapconfig.yaml @@ -25,7 +25,6 @@ spec: limits.memory: "0Gi" persistentvolumeclaims: "0" count/deployments.apps: "0" - rules: # Role rules for deployment user - apiGroups: [""] resources: ["configmaps"] @@ -40,4 +39,4 @@ spec: {{- toYaml .Values.dashboards.bootstrapConfig.vault | nindent 4 }} externalSecretsStore: - {{- toYaml .Values.dashboards.bootstrapConfig.externalSecretsStore | nindent 4 }} \ No newline at end of file + {{- toYaml .Values.dashboards.bootstrapConfig.externalSecretsStore | nindent 4 }} diff --git a/charts/aoi/templates/application-operations-dashboards/projectbootstrap.yaml b/charts/aoi/templates/application-operations-dashboards/projectbootstrap.yaml index e1f9a1e..a2406b8 100644 --- a/charts/aoi/templates/application-operations-dashboards/projectbootstrap.yaml +++ b/charts/aoi/templates/application-operations-dashboards/projectbootstrap.yaml @@ -7,7 +7,7 @@ metadata: spec: namespace: application-operations-dashboards config: - ref: application-operations-dashboards + ref: application-operations-infrastructure size: application-operations-dashboards git: {{- toYaml .Values.dashboards.projectBootstrap.git | nindent 4 }} diff --git a/charts/aoi/templates/prometheus/_helpers-prometheus.tpl b/charts/aoi/templates/prometheus/_helpers-prometheus.tpl index 13689ec..9bd4b84 100644 --- a/charts/aoi/templates/prometheus/_helpers-prometheus.tpl +++ b/charts/aoi/templates/prometheus/_helpers-prometheus.tpl @@ -37,9 +37,9 @@ Create remotewrite url for tsdb. */}} {{- define "aoi.prometheusWriteUrl" -}} {{- if .Values.global.tsdb.high_availability.enabled }} -{{- printf "http://victoria-metrics-single-1-server:8428/api/v1/write,http://victoria-metrics-single-2-server:8428/api/v1/write" }} +{{- printf "http://%s-victoria-metrics-single-1-server:8428/api/v1/write,http://%s-victoria-metrics-single-2-server:8428/api/v1/write" (include "aoi.name" . ) (include "aoi.name" . ) }} {{- else }} -{{- printf "http://victoria-metrics-single-1-server:8428/api/v1/write" }} +{{- printf "http://%s-victoria-metrics-single-1-server:8428/api/v1/write" (include "aoi.name" . ) }} {{- end }} {{- end }} @@ -55,4 +55,4 @@ Create remotewrite.MaxDiskUsagePerURL for vmagent. {{- $ModifiedDiskSize := sub $diskSize 10 | toString}} {{- printf "%sGB" $ModifiedDiskSize }} {{- end }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/charts/aoi/templates/validate.yaml b/charts/aoi/templates/validate.yaml new file mode 100644 index 0000000..42e7374 --- /dev/null +++ b/charts/aoi/templates/validate.yaml @@ -0,0 +1,16 @@ +{{/* validate flux CRDs */}} +{{- if .Values.alerting.enabled }} + {{- if not (.Capabilities.APIVersions.Has "source.toolkit.fluxcd.io/v1beta2") }} + {{ fail "alerting requires source.toolkit.fluxcd.io/v1beta2 CRDs." }} + {{- end }} + {{- if not (.Capabilities.APIVersions.Has "helm.toolkit.fluxcd.io/v2beta1") }} + {{ fail "alerting requires helm.toolkit.fluxcd.io/v2beta1 CRDs." }} + {{- end }} +{{- end }} + +{{/* validate cluster_id is defined */}} +{{- if .Values.alerting.enabled }} + {{- if (eq .Values.alerting.clusterId "") }} + {{ fail "alerting is enabled, clusterId must be defined and set to the value of the label cluster_id" }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/aoi/values.yaml b/charts/aoi/values.yaml index 8e1f1f6..feaeb21 100644 --- a/charts/aoi/values.yaml +++ b/charts/aoi/values.yaml @@ -286,3 +286,99 @@ victoria-metrics-single-2: enabled: true extraLabels: netic.dk/monitoring: "true" + +alerting: + enabled: false + # value of label cluster_id + clusterId: "" + clusterWideNamespace: + enabled: false + name: application-operations-alerting + bootstrapConfig: + git: + github: + secretRef: tcs-github-auth + template: + adminTeam: oaas-team + owner: neticdk-k8s + repo: tenant-alerting-template + vault: {} + externalSecretsStore: {} + projectBootstrap: + git: {} + namespaces: [] + helmRepository: null + helmRelease: + values: + server: + priorityClassName: "secure-cloud-stack-tenant-namespace-application-critical" + image: + registry: docker.io + repository: victoriametrics/vmalert + pullPolicy: Always + resources: + limits: + memory: 64Mi + requests: + cpu: 10m + memory: 64Mi + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - all + configReloader: + image: + registry: docker.io + repository: kiwigrid/k8s-sidecar + tag: "1.25.2" + pullPolicy: Always + resources: + limits: + memory: 96Mi + requests: + cpu: 10m + memory: 96Mi + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + podSecurityContext: + runAsUser: 1000 + runAsGroup: 3000 + fsGroup: 2000 + alertmanager: + priorityClassName: "secure-cloud-stack-tenant-namespace-application-critical" + image: + registry: docker.io + repository: prom/alertmanager + pullPolicy: Always + resources: + limits: + memory: 64Mi + requests: + cpu: 10m + memory: 64Mi + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + podSecurityContext: + runAsUser: 1000 + runAsGroup: 3000 + fsGroup: 2000 + +# This is here to disable everything from the victoria-metrics-alert helm chart in chart.yaml since it is only used for versioning +victoria-metrics-alert: + serviceAccount: + create: false + rbac: + create: false + server: + enabled: false + configMap: "null"