Skip to content

Commit

Permalink
Feature/add support for vm alert (#15)
Browse files Browse the repository at this point in the history
* init vmalert

* helm-docs + aoi alerting enablement feat

* change default clusterid

* helm-docs

* disable alerting by default
update version
update docs

* update helm package to support deploying alerting stack

---------

Co-authored-by: Alexander Soelberg Heidarsson <[email protected]>
  • Loading branch information
netic-rkm and alex5517 authored Feb 7, 2024
1 parent 692e3e1 commit ec7a8f7
Show file tree
Hide file tree
Showing 19 changed files with 625 additions and 10 deletions.
2 changes: 2 additions & 0 deletions charts/aoi/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Ignore dependencies downloaded by helm
charts/
2 changes: 2 additions & 0 deletions charts/aoi/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.gitignore
CHANGELOG.md
1 change: 1 addition & 0 deletions charts/aoi/CHANGELOG.MD
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# WiP
15 changes: 15 additions & 0 deletions charts/aoi/Chart.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
dependencies:
- name: grafana
repository: https://grafana.github.io/helm-charts
version: 7.0.9
- name: victoria-metrics-single
repository: https://victoriametrics.github.io/helm-charts/
version: 0.9.12
- name: victoria-metrics-single
repository: https://victoriametrics.github.io/helm-charts/
version: 0.9.12
- name: victoria-metrics-alert
repository: https://victoriametrics.github.io/helm-charts/
version: 0.8.3
digest: sha256:3f7d3160e0f43e6932e9e8e0c7fc3217457ed705a06a2f232ac4c273908919ba
generated: "2024-02-02T09:25:35.926638+01:00"
6 changes: 5 additions & 1 deletion charts/aoi/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: v2
name: aoi
description: A Helm chart for Netic application operations infrastructure
type: application
version: 0.1.2
version: 0.1.3
dependencies:
- name: grafana
version: 7.0.9
Expand All @@ -17,3 +17,7 @@ dependencies:
version: 0.9.12
repository: https://victoriametrics.github.io/helm-charts/
condition: global.tsdb.high_availability.enabled
- name: victoria-metrics-alert
version: 0.8.3
repository: https://victoriametrics.github.io/helm-charts/
condition: alerting.enabled
59 changes: 58 additions & 1 deletion charts/aoi/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# aoi

![Version: 0.1.4](https://img.shields.io/badge/Version-0.1.4-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
![Version: 0.1.3-rc1](https://img.shields.io/badge/Version-0.1.3--rc1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)

A Helm chart for Netic application operations infrastructure

Expand All @@ -9,13 +9,63 @@ A Helm chart for Netic application operations infrastructure
| Repository | Name | Version |
|------------|------|---------|
| https://grafana.github.io/helm-charts | grafana | 7.0.9 |
| https://victoriametrics.github.io/helm-charts/ | victoria-metrics-alert | 0.8.3 |
| https://victoriametrics.github.io/helm-charts/ | victoria-metrics-single-1(victoria-metrics-single) | 0.9.12 |
| https://victoriametrics.github.io/helm-charts/ | victoria-metrics-single-2(victoria-metrics-single) | 0.9.12 |

## Values

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| alerting.clusterId | string | `""` | |
| alerting.clusterWideNamespace.bootstrapConfig.externalSecretsStore | object | `{}` | |
| alerting.clusterWideNamespace.bootstrapConfig.git.github.secretRef | string | `"tcs-github-auth"` | |
| alerting.clusterWideNamespace.bootstrapConfig.git.github.template.adminTeam | string | `"oaas-team"` | |
| alerting.clusterWideNamespace.bootstrapConfig.git.github.template.owner | string | `"neticdk-k8s"` | |
| alerting.clusterWideNamespace.bootstrapConfig.git.github.template.repo | string | `"tenant-alerting-template"` | |
| alerting.clusterWideNamespace.bootstrapConfig.vault | object | `{}` | |
| alerting.clusterWideNamespace.enabled | bool | `false` | |
| alerting.clusterWideNamespace.name | string | `"application-operations-alerting"` | |
| alerting.clusterWideNamespace.projectBootstrap.git | object | `{}` | |
| alerting.enabled | bool | `false` | |
| alerting.helmRelease.values.alertmanager.image.pullPolicy | string | `"Always"` | |
| alerting.helmRelease.values.alertmanager.image.registry | string | `"docker.io"` | |
| alerting.helmRelease.values.alertmanager.image.repository | string | `"prom/alertmanager"` | |
| alerting.helmRelease.values.alertmanager.podSecurityContext.fsGroup | int | `2000` | |
| alerting.helmRelease.values.alertmanager.podSecurityContext.runAsGroup | int | `3000` | |
| alerting.helmRelease.values.alertmanager.podSecurityContext.runAsUser | int | `1000` | |
| alerting.helmRelease.values.alertmanager.priorityClassName | string | `"secure-cloud-stack-tenant-namespace-application-critical"` | |
| alerting.helmRelease.values.alertmanager.resources.limits.memory | string | `"64Mi"` | |
| alerting.helmRelease.values.alertmanager.resources.requests.cpu | string | `"10m"` | |
| alerting.helmRelease.values.alertmanager.resources.requests.memory | string | `"64Mi"` | |
| alerting.helmRelease.values.alertmanager.securityContext.allowPrivilegeEscalation | bool | `false` | |
| alerting.helmRelease.values.alertmanager.securityContext.capabilities.drop[0] | string | `"ALL"` | |
| alerting.helmRelease.values.alertmanager.securityContext.readOnlyRootFilesystem | bool | `true` | |
| alerting.helmRelease.values.server.configReloader.image.pullPolicy | string | `"Always"` | |
| alerting.helmRelease.values.server.configReloader.image.registry | string | `"docker.io"` | |
| alerting.helmRelease.values.server.configReloader.image.repository | string | `"kiwigrid/k8s-sidecar"` | |
| alerting.helmRelease.values.server.configReloader.image.tag | string | `"1.25.2"` | |
| alerting.helmRelease.values.server.configReloader.resources.limits.memory | string | `"96Mi"` | |
| alerting.helmRelease.values.server.configReloader.resources.requests.cpu | string | `"10m"` | |
| alerting.helmRelease.values.server.configReloader.resources.requests.memory | string | `"96Mi"` | |
| alerting.helmRelease.values.server.configReloader.securityContext.allowPrivilegeEscalation | bool | `false` | |
| alerting.helmRelease.values.server.configReloader.securityContext.capabilities.drop[0] | string | `"ALL"` | |
| alerting.helmRelease.values.server.configReloader.securityContext.readOnlyRootFilesystem | bool | `true` | |
| alerting.helmRelease.values.server.image.pullPolicy | string | `"Always"` | |
| alerting.helmRelease.values.server.image.registry | string | `"docker.io"` | |
| alerting.helmRelease.values.server.image.repository | string | `"victoriametrics/vmalert"` | |
| alerting.helmRelease.values.server.podSecurityContext.fsGroup | int | `2000` | |
| alerting.helmRelease.values.server.podSecurityContext.runAsGroup | int | `3000` | |
| alerting.helmRelease.values.server.podSecurityContext.runAsUser | int | `1000` | |
| alerting.helmRelease.values.server.priorityClassName | string | `"secure-cloud-stack-tenant-namespace-application-critical"` | |
| alerting.helmRelease.values.server.resources.limits.memory | string | `"64Mi"` | |
| alerting.helmRelease.values.server.resources.requests.cpu | string | `"10m"` | |
| alerting.helmRelease.values.server.resources.requests.memory | string | `"64Mi"` | |
| alerting.helmRelease.values.server.securityContext.allowPrivilegeEscalation | bool | `false` | |
| alerting.helmRelease.values.server.securityContext.capabilities.drop[0] | string | `"all"` | |
| alerting.helmRelease.values.server.securityContext.readOnlyRootFilesystem | bool | `true` | |
| alerting.helmRepository | string | `nil` | |
| alerting.namespaces | list | `[]` | |
| authProxy.affinity | list | `[]` | |
| authProxy.annotations | object | `{}` | |
| authProxy.enabled | bool | `true` | |
Expand Down Expand Up @@ -90,10 +140,13 @@ A Helm chart for Netic application operations infrastructure
| prometheus.configReloader.resources.limits.memory | string | `"25Mi"` | |
| prometheus.configReloader.resources.requests.cpu | string | `"10m"` | |
| prometheus.configReloader.resources.requests.memory | string | `"25Mi"` | |
| prometheus.extraVolumeMounts | list | `[]` | |
| prometheus.extraVolumes | list | `[]` | |
| prometheus.image.registry | string | `"docker.io"` | |
| prometheus.image.repository | string | `"victoriametrics/vmagent"` | |
| prometheus.image.tag | string | `"v1.91.2"` | |
| prometheus.persistence.size | string | `"60Gi"` | |
| prometheus.podAnnotations | object | `{}` | |
| prometheus.podSecurityContext.fsGroup | int | `2000` | |
| prometheus.podSecurityContext.runAsGroup | int | `3000` | |
| prometheus.podSecurityContext.runAsNonRoot | bool | `true` | |
Expand Down Expand Up @@ -131,6 +184,10 @@ A Helm chart for Netic application operations infrastructure
| promxy.terminationGracePeriodSeconds | int | `30` | |
| promxy.tolerations | list | `[]` | |
| promxy.topologySpauthProxyConstraints | list | `[]` | |
| victoria-metrics-alert.rbac.create | bool | `false` | |
| victoria-metrics-alert.server.configMap | string | `"null"` | |
| victoria-metrics-alert.server.enabled | bool | `false` | |
| victoria-metrics-alert.serviceAccount.create | bool | `false` | |
| victoria-metrics-single-1.rbac.create | bool | `false` | |
| victoria-metrics-single-1.server.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[0].key | string | `"app.kubernetes.io/name"` | |
| victoria-metrics-single-1.server.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].labelSelector.matchExpressions[0].operator | string | `"In"` | |
Expand Down
23 changes: 21 additions & 2 deletions charts/aoi/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,31 @@ Create the port for the read path to tsdb.
{{- end }}

{{/*
Grafana read url
metrics read url
*/}}
{{- define "aoi.readUrl" -}}
{{- printf "http://%s-%s.%s.svc.%s:%s" (include "aoi.name" . ) (include "aoi.readHost" . ) .Release.Namespace .Values.global.clusterDomain (include "aoi.readPort" . ) }}
{{- end }}


{{/*
metrics read url for grafana
*/}}
{{- define "aoi.grafanaReadUrl" -}}
{{- if .Values.authProxy.enabled }}
{{- printf "http://%s-auth-proxy.%s.svc.%s:8080" (include "aoi.name" . ) .Release.Namespace .Values.global.clusterDomain }}
{{- else }}
{{- printf "http://%s-%s.%s.svc.%s:8082" (include "aoi.name" . ) (include "aoi.readHost" . ) .Release.Namespace .Values.global.clusterDomain (include "aoi.readPort" . ) }}
{{- printf "http://%s-%s.%s.svc.%s:%s" (include "aoi.name" . ) (include "aoi.readHost" . ) .Release.Namespace .Values.global.clusterDomain (include "aoi.readPort" . ) }}
{{- end }}
{{- end }}

{{/*
Create the lable value for victoria-metrics kubernetes/name lable.
*/}}
{{- define "aoi.vmLableName" -}}
{{- if .Values.global.tsdb.high_availability.enabled }}
{{- printf "promxy" }}
{{- else }}
{{- printf "victoria-metrics-single-1" }}
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
{{- if .Values.alerting.enabled }}
{{- range $i, $namespace := (include "aoi.alerting.namespaces" . | fromJsonArray) }}
---
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: victoria-metrics-alert
namespace: {{ $namespace }}
spec:
chart:
spec:
chart: victoria-metrics-alert
version: {{ include "aoi.alerting.chartVersion" $ }}
sourceRef:
kind: HelmRepository
name: victoria-metrics
interval: 5m
interval: 5m
timeout: 15m
values:
serviceAccount:
create: false
name: victoria-metrics-alert-config-reloader
rbac:
create: false
server:
{{- include "aoi.serverPriorityClassName" $ | nindent 6 }}
image:
repository: "{{ $.Values.alerting.helmRelease.values.server.image.registry }}/{{ $.Values.alerting.helmRelease.values.server.image.repository }}"
pullPolicy: {{ $.Values.alerting.helmRelease.values.server.image.pullPolicy }}
resources:
{{- toYaml $.Values.alerting.helmRelease.values.server.resources | nindent 8 }}
extraArgs:
rule: '/tmp/rules/*.yaml'
extraVolumes:
- name: alert-rules
emptyDir: {}
extraVolumeMounts:
- name: alert-rules
mountPath: /tmp/rules
datasource:
{{- if eq $namespace $.Values.alerting.clusterWideNamespace.name }}
url: "{{ include "aoi.readUrl" $ }}"
{{- else }}
url: "{{ include "aoi.readUrl" $ }}?extra_label=namespace_id={{ $.Values.alerting.clusterId }}_{{ $namespace }}"
{{- end }}
securityContext:
{{- toYaml $.Values.alerting.helmRelease.values.server.securityContext | nindent 8 }}
extraContainers:
- name: config-reloader
image: "{{ $.Values.alerting.helmRelease.values.server.configReloader.image.registry }}/{{ $.Values.alerting.helmRelease.values.server.configReloader.image.repository }}:{{ $.Values.alerting.helmRelease.values.server.configReloader.image.tag }}"
imagePullPolicy: {{ $.Values.alerting.helmRelease.values.server.configReloader.image.pullPolicy }}
resources:
{{- toYaml $.Values.alerting.helmRelease.values.server.configReloader.resources | nindent 12 }}
securityContext:
{{- toYaml $.Values.alerting.helmRelease.values.server.configReloader.securityContext | nindent 12 }}
env:
- name: IGNORE_ALREADY_PROCESSED
value: "true"
- name: METHOD
value: WATCH
- name: LABEL
value: application-operations-alerting
- name: FOLDER
value: /tmp/rules
- name: RESOURCE
value: configmap
- name: REQ_URL
value: http://localhost:8880/-/reload
- name: REQ_METHOD
value: GET
volumeMounts:
- name: alert-rules
mountPath: /tmp/rules
podSecurityContext:
{{- toYaml $.Values.alerting.helmRelease.values.server.podSecurityContext | nindent 8 }}
alertmanager:
enabled: true
{{- include "aoi.alertmanagerPriorityClassName" $ | nindent 6 }}
image: "{{ $.Values.alerting.helmRelease.values.alertmanager.image.registry }}/{{ $.Values.alerting.helmRelease.values.alertmanager.image.repository }}"
resources:
{{- toYaml $.Values.alerting.helmRelease.values.alertmanager.resources | nindent 8 }}
configMap: "alertmanager-config"
securityContext:
{{- toYaml $.Values.alerting.helmRelease.values.alertmanager.securityContext | nindent 8 }}
podSecurityContext:
{{- toYaml $.Values.alerting.helmRelease.values.alertmanager.podSecurityContext | nindent 8 }}
serviceMonitor:
enabled: true
extraLabels:
netic.dk/monitoring: "true"
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{{- if .Values.alerting.enabled }}
{{- range $i, $namespace := (include "aoi.alerting.namespaces" . | fromJsonArray) }}
---
apiVersion: source.toolkit.fluxcd.io/v1beta2
kind: HelmRepository
metadata:
name: victoria-metrics
namespace: {{ $namespace }}
spec:
interval: 30m0s
url: {{ include "aoi.alerting.helmRepository" $ }}
type: {{ include "aoi.alerting.helmRepository.type" $ }}
{{- end }}
{{- end }}
67 changes: 67 additions & 0 deletions charts/aoi/templates/application-operations-alerting/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{{/*
list of namespaces that should have alerting enabled
*/}}
{{- define "aoi.alerting.namespaces" -}}
{{ $newList := list }}
{{- if .Values.alerting.clusterWideNamespace.enabled }}
{{ $newList = prepend .Values.alerting.namespaces .Values.alerting.clusterWideNamespace.name }}
{{- else }}
{{ $newList = .Values.alerting.namespaces }}
{{- end }}
{{ toJson $newList }}
{{- end }}

{{/*
HelmRepository for victoria-metrics-alert
*/}}
{{- define "aoi.alerting.helmRepository" -}}
{{- range $i, $dep := .Chart.Dependencies }}
{{- if eq $dep.Name "victoria-metrics-alert" }}
{{- default $dep.Repository $.Values.alerting.helmRepository }}
{{- end }}
{{- end }}
{{- end }}

{{/*
HelmRepository type
*/}}
{{- define "aoi.alerting.helmRepository.type" -}}
{{- if hasPrefix "oci" ( include "aoi.alerting.helmRepository" . ) }}
{{- print "oci" -}}
{{- else }}
{{- print "default" -}}
{{- end }}
{{- end }}

{{/*
Helm chart version for victoria-metrics-alert
*/}}
{{- define "aoi.alerting.chartVersion" -}}
{{- range $i, $dep := .Chart.Dependencies }}
{{- if eq $dep.Name "victoria-metrics-alert" }}
{{- $dep.Version }}
{{- end }}
{{- end }}
{{- end }}

{{/*
promxy priority class name
*/}}
{{- define "aoi.serverPriorityClassName" -}}
{{- $pcn := coalesce .Values.global.priorityClassName .Values.alerting.helmRelease.values.server.priorityClassName -}}
{{- if $pcn -}}
priorityClassName: {{ $pcn }}
{{- end }}
{{- end }}

{{/*
promxy priority class name
*/}}
{{- define "aoi.alertmanagerPriorityClassName" -}}
{{- $pcn := coalesce .Values.global.priorityClassName .Values.alerting.helmRelease.values.alertmanager.priorityClassName -}}
{{- if $pcn -}}
priorityClassName: {{ $pcn }}
{{- end }}
{{- end }}


Loading

0 comments on commit ec7a8f7

Please sign in to comment.