diff --git a/src/digma/Chart.yaml b/src/digma/Chart.yaml index a788d729..60f4c663 100644 --- a/src/digma/Chart.yaml +++ b/src/digma/Chart.yaml @@ -1,9 +1,9 @@ apiVersion: v2 # version: this Chart version -version: 1.0.195 +version: 1.0.318 name: digma description: A Helm chart containing Digma's services and dbs home: https://github.com/digma-ai/digma type: application # appVersion: digma version (affects image tag) -appVersion: 0.3.66 +appVersion: 0.3.76 \ No newline at end of file diff --git a/src/digma/templates/_helpers.tpl b/src/digma/templates/_helpers.tpl index d689637d..abad7c01 100644 --- a/src/digma/templates/_helpers.tpl +++ b/src/digma/templates/_helpers.tpl @@ -99,4 +99,4 @@ {{- define "env.digmaEnvType" -}} - name: DIGMA_ENV_TYPE value: {{ .Values.digma.environmentType }} -{{- end -}} \ No newline at end of file +{{- end -}} diff --git a/src/digma/templates/debug-services.yaml b/src/digma/templates/debug-services.yaml index 0a9e48e5..f46e116c 100644 --- a/src/digma/templates/debug-services.yaml +++ b/src/digma/templates/debug-services.yaml @@ -18,7 +18,7 @@ spec: containers: - name: kafka-ui - image: provectuslabs/kafka-ui:v0.7.1 + image: provectuslabs/kafka-ui:v0.7.2 env: - name: KAFKA_CLUSTERS_0_NAME value: h4U35I9QRnGhbgsEQAlXAw diff --git a/src/digma/templates/digma-analytics.yaml b/src/digma/templates/digma-analytics.yaml index 31f7f1ef..f0553266 100644 --- a/src/digma/templates/digma-analytics.yaml +++ b/src/digma/templates/digma-analytics.yaml @@ -34,6 +34,7 @@ spec: {{- include "env.digmaSite" . | nindent 8 }} {{- include "env.redis" . | nindent 8 }} {{- include "env.influx" . | nindent 8 }} + {{- include "env.kafka" . | nindent 8 }} {{- include "env.postgres" . | nindent 8 }} {{- include "env.otlpExporter" . | nindent 8 }} {{- include "env.otlpExportLogs" . | nindent 8 }} diff --git a/src/digma/templates/digma-collector-api.yaml b/src/digma/templates/digma-collector-api.yaml index 7482f957..ca35f427 100644 --- a/src/digma/templates/digma-collector-api.yaml +++ b/src/digma/templates/digma-collector-api.yaml @@ -49,4 +49,4 @@ spec: - name: OtlpSamplerProbability value: {{ .Values.digmaSelfDiagnosis.otlpSamplerProbability| quote}} - name: BACKEND_DEPLOYMENT_TYPE - value: {{ .Values.deploymentType | quote}} + value: {{ .Values.deploymentType | quote}} \ No newline at end of file diff --git a/src/digma/templates/digma-collector-worker.yaml b/src/digma/templates/digma-collector-worker.yaml index 44b5c5fb..107a7ed1 100644 --- a/src/digma/templates/digma-collector-worker.yaml +++ b/src/digma/templates/digma-collector-worker.yaml @@ -44,13 +44,3 @@ spec: value: {{ .Values.digmaSelfDiagnosis.otlpSamplerProbability| quote}} - name: BACKEND_DEPLOYMENT_TYPE value: {{ .Values.deploymentType | quote}} - - name: ThresholdOptions__RecentActivityUpdateThresholdSeconds - value: {{ .Values.traceCollectorThresholds.recentActivityUpdateThresholdSeconds | quote }} - - name: ThresholdOptions__UpsertEndpointThresholdSeconds - value: {{ .Values.traceCollectorThresholds.upsertEndpointThresholdSeconds | quote }} - - name: ThresholdOptions__UpsertSpansThresholdSeconds - value: {{ .Values.traceCollectorThresholds.upsertSpansThresholdSeconds | quote }} - - name: ThresholdOptions__UpsertSpanFlowMetadataThresholdSeconds - value: {{ .Values.traceCollectorThresholds.upsertSpanFlowMetadataThresholdSeconds | quote }} - - name: TraceTempStorage__TraceForJaegerExpirationInMinutes - value: {{ .Values.digmaCollectorWorker.traceForJaegerTTL | quote }} diff --git a/src/digma/templates/digma-measurement-analysis.yaml b/src/digma/templates/digma-measurement-analysis.yaml index bb1d6600..8bcb3f67 100644 --- a/src/digma/templates/digma-measurement-analysis.yaml +++ b/src/digma/templates/digma-measurement-analysis.yaml @@ -44,6 +44,16 @@ spec: value: {{ .Values.digmaSelfDiagnosis.otlpSamplerProbability| quote}} - name: BACKEND_DEPLOYMENT_TYPE value: {{ .Values.deploymentType | quote}} + - name: ThresholdOptions__RecentActivityUpdateThresholdSeconds + value: {{ .Values.traceCollectorThresholds.recentActivityUpdateThresholdSeconds | quote }} + - name: ThresholdOptions__UpsertEndpointThresholdSeconds + value: {{ .Values.traceCollectorThresholds.upsertEndpointThresholdSeconds | quote }} + - name: ThresholdOptions__UpsertSpansThresholdSeconds + value: {{ .Values.traceCollectorThresholds.upsertSpansThresholdSeconds | quote }} + - name: ThresholdOptions__UpsertSpanFlowMetadataThresholdSeconds + value: {{ .Values.traceCollectorThresholds.upsertSpanFlowMetadataThresholdSeconds | quote }} + - name: TraceTempStorage__TraceForJaegerExpirationInMinutes + value: {{ .Values.digmaCollectorWorker.traceForJaegerTTL | quote }} - name: Kafka__SpanDurationSummaryCG__Workers value: '2' - name: Kafka__SpanDurationTotalCG__Workers @@ -61,4 +71,6 @@ spec: - name: Kafka__BottleneckCG__WorkerBatchSize value: '1000' - name: Kafka__SpanMeasurementsProcessorCG__WorkerBatchSize - value: '1000' \ No newline at end of file + value: '1000' + - name: GCLatencyMode + value: 'SustainedLowLatency' \ No newline at end of file diff --git a/src/digma/templates/kafka.yaml b/src/digma/templates/kafka.yaml index d77126c1..17c626cc 100644 --- a/src/digma/templates/kafka.yaml +++ b/src/digma/templates/kafka.yaml @@ -1,7 +1,7 @@ apiVersion: apps/v1 kind: StatefulSet metadata: - name: {{ .Release.Name }}-kafka-stateful-set + name: {{ .Release.Name }}-kafka labels: app: kafka spec: @@ -56,7 +56,7 @@ spec: value: PLAINTEXT # Retention - name: KAFKA_CFG_LOG_RETENTION_MINUTES - value: "20" + value: "10" - name: KAFKA_CFG_LOG_RETENTION_CHECK_INTERVAL_MS value: "100000" # 1 min - name: KAFKA_CFG_LOG_ROLL_MS @@ -69,6 +69,6 @@ spec: - ReadWriteOnce resources: requests: - storage: 15Gi + storage: 80Gi diff --git a/src/digma/templates/otel-collector-df.yaml b/src/digma/templates/otel-collector-df.yaml new file mode 100644 index 00000000..151b1ef9 --- /dev/null +++ b/src/digma/templates/otel-collector-df.yaml @@ -0,0 +1,123 @@ +# OpenTelemetry Collector - for Digma dog-fooding (df) +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: cm-otel-collector-config +data: + key-collector-yaml: | + receivers: + otlp: + protocols: + grpc: + processors: + probabilistic_sampler/30: + sampling_percentage: 30 + probabilistic_sampler/10: + sampling_percentage: 10 + batch: + timeout: 1000ms + send_batch_size: 500 + send_batch_max_size: 500 + filter/petclinic: + spans: + include: + match_type: regexp + resources: + - key: digma.environment + value: PETCLINIC$ + filter/stag01: + spans: + include: + match_type: regexp + resources: + - key: digma.environment + value: ^STAG01-PERF$ + exporters: + logging: + verbosity: detailed + otlp/meloona: + endpoint: "https://collector.meloona01.digma.systems:443" + tls: + insecure: true + otlp/stag01: + endpoint: "digma-collector-api:5050" + tls: + insecure: true + # metrics - prometheus + prometheus: + endpoint: "0.0.0.0:8889" + send_timestamps: true + metric_expiration: 10m + service: + pipelines: + traces: + receivers: [otlp] + processors: [batch, filter/petclinic] + exporters: [otlp/stag01] + traces/meloona: + receivers: [otlp] + processors: [batch, filter/stag01] + exporters: [otlp/meloona] + metrics: + receivers: [otlp] + processors: [batch, filter/stag01] + exporters: [prometheus] +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: digma-otel-collector-df-deployment + labels: + app: otel-collector-df +spec: + replicas: 1 + selector: + matchLabels: + app: otel-collector-df + template: + metadata: + labels: + app: otel-collector-df + spec: + containers: + - name: otel-collector-df + image: otel/opentelemetry-collector-contrib:0.78.0 + resources: + limits: + cpu: 700m + memory: 1500Mi + requests: + cpu: 300m + memory: 800Mi + ports: + - containerPort: 4317 + - containerPort: 8889 # port to expose digma metrics towards prometheus + args: + - --config=/conf/collector.yaml + volumeMounts: + - name: vn-config + mountPath: /conf + readOnly: true + volumes: + - name: vn-config # volume name + configMap: + name: cm-otel-collector-config # configMap name + items: + - key: "key-collector-yaml" + path: "collector.yaml" +--- +apiVersion: v1 +kind: Service +metadata: + name: digma-otel-collector-df +spec: + selector: + app: otel-collector-df + ports: + - name: grpc + port: 4317 + protocol: TCP + - name: digma-metrics + port: 8889 + protocol: TCP \ No newline at end of file diff --git a/src/digma/templates/postgres.yaml b/src/digma/templates/postgres.yaml index 654dc3b4..7eee90ca 100644 --- a/src/digma/templates/postgres.yaml +++ b/src/digma/templates/postgres.yaml @@ -28,7 +28,15 @@ spec: cpu: {{ .Values.postgres.resources.limits.cpu }} ports: - containerPort: 5432 - args: ["-c" ,"max_connections={{ .Values.postgres.maxConnections }}", "-c", "shared_buffers={{ .Values.postgres.maxMemory }}", "-c", "logging_collector=on", "-c","log_directory=log"] + args: + - "-c" + - "max_connections={{ .Values.postgres.maxConnections }}" + - "-c" + - "shared_buffers={{ .Values.postgres.sharedBuffers }}" + - "-c" + - "logging_collector={{ .Values.postgres.loggingCollector }}" + - "-c" + - "log_directory=log" env: - name: POSTGRES_NAME value: postgres @@ -40,6 +48,13 @@ spec: - name: postgres-data mountPath: /var/lib/postgresql/data subPath: postgres + - name: dshm + mountPath: /dev/shm + volumes: + - name: dshm + emptyDir: + medium: Memory + sizeLimit: "800Mi" volumeClaimTemplates: - metadata: name: postgres-data diff --git a/src/digma/values.yaml b/src/digma/values.yaml index 1d98a8f0..c2705acd 100644 --- a/src/digma/values.yaml +++ b/src/digma/values.yaml @@ -78,6 +78,7 @@ influx: queryConcurrency: 30 queryQueueSize: 200 + postgres: host: "{{ .Release.Name }}-postgres" imageTag: 15.1 @@ -85,7 +86,8 @@ postgres: username: postgres password: postgres maxConnections: 400 - maxMemory: 800MB # mapped to shared_buffers + loggingCollector: off # for debugging turn it on + sharedBuffers: "800MB" resources: { requests: { memory: 1Gi, @@ -106,6 +108,9 @@ embeddedJaeger: host: "{{ .Release.Name }}-embedded-jaeger" imageTag: "1.44.0" enabled: true + exposed: true + service: + annotations: [] kibana: host: "{{ .Release.Name }}-kibana" @@ -136,6 +141,7 @@ digmaDs: digmaCollectorApi: host: "{{ .Release.Name }}-collector-api" + expose: false resources: { requests: { memory: 100Mi, @@ -146,6 +152,8 @@ digmaCollectorApi: cpu: 800m } } + service: + annotations: [] digmaCollectorWorker: host: "{{ .Release.Name }}-collector-worker" @@ -156,7 +164,7 @@ digmaCollectorWorker: cpu: 100m }, limits: { - memory: 400Mi, + memory: 800Mi, cpu: 600m } } @@ -165,7 +173,9 @@ digmaCollectorWorker: digmaAnalytics: host: "{{ .Release.Name }}-analytics" replicas: 1 - secured: true + expose: false + secured: false + accesstoken: pcr5UDZqdvzky1cgj resources: { requests: { memory: 300Mi, @@ -176,6 +186,8 @@ digmaAnalytics: cpu: 800m } } + service: + annotations: [] digmaScheduler: host: "{{ .Release.Name }}-scheduler" @@ -211,21 +223,21 @@ digmaMeasurementAnalysis: cpu: 200m }, limits: { - memory: 500Mi, + memory: 800Mi, cpu: 800m } } digmaSelfDiagnosis: - otlpExportTraces: false - otlpExportMetrics: false - otlpExportLogs: false - otlpExporterEndpoint: - otlpSamplerProbability: "0.3" + otlpExporterEndpoint: https://collector.meloona01.digma.systems:443 + otlpExportTraces: true + otlpExportMetrics: true + otlpSamplerProbability: "1" digma: - environmentName: - environmentType: - siteName: undefined + environmentName: STAG01-PERF + environmentType: public + siteName: stag01 isCentralize: true - licenseKey: + licenseKey: 529974cf70824f67aefa98232482f93e +