Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
73b2b59
works-150
asafchen-dig Feb 28, 2024
dc283c6
works-400
asafchen-dig Feb 28, 2024
f3c90b1
better-400
asafchen-dig Feb 29, 2024
b1ad21f
more
asafchen-dig Feb 29, 2024
6ca281b
fixed workers
asafchen-dig Mar 3, 2024
e06eebd
works for 800
asafchen-dig Mar 4, 2024
84b541c
supporting 1700 spans
asafchen-dig Mar 6, 2024
ebd0617
Merge branch 'main' into perf-test
asafchen-dig Apr 24, 2024
b4fa643
more
asafchen-dig Apr 24, 2024
d69bb95
rollback cpu and memory changes
oleksandrh Apr 30, 2024
600993d
upgraded version
oleksandrh Apr 30, 2024
ee661bf
changed cpu limits
oleksandrh May 6, 2024
d7b3051
Changed Chart.yaml
oleksandrh May 6, 2024
c0c9969
Merge branch 'main' into perf-test
oleksandrh May 10, 2024
8382880
updated helm chart
oleksandrh May 14, 2024
ee0a63e
changed helm
oleksandrh May 15, 2024
385f1b0
added traces exporting to meloona
oleksandrh May 17, 2024
21934e5
Merge branch 'main' into perf-test
oleksandrh May 21, 2024
2a7e678
changed dogfooding configuration
oleksandrh May 21, 2024
247b6f2
updated charts
oleksandrh May 27, 2024
a14b4f5
added export to meloona
oleksandrh May 28, 2024
250d9c5
helm changes
oleksandrh Jun 10, 2024
ba916f7
dshm
shaykeren Jun 13, 2024
8cc28b9
updated Chart.yaml
oleksandrh Jun 18, 2024
325843d
upgrade Chart.yaml
oleksandrh Jun 21, 2024
fb6cefe
Merge branch 'main' into perf-test
oleksandrh Jul 2, 2024
075008b
refactoring
oleksandrh Jul 2, 2024
6b46595
Merge branch 'main' of github.com:digma-ai/helm-chart into fix_2147_s…
shaykeren Jul 2, 2024
9b315f2
Merge branch 'fix_2147_shm_size' into perf-test
shaykeren Jul 2, 2024
d3ee963
scale up analytics
oleksandrh Jul 2, 2024
04a7deb
remove shared_buffers from values file
shaykeren Jul 3, 2024
c33fdf3
be default for postgres loggingCollector should be off
shaykeren Jul 3, 2024
1b40a70
ignore SpanRelationsDurationConsumer
oleksandrh Jul 3, 2024
8be8179
upgrade version
oleksandrh Jul 3, 2024
d4ca32e
changed helm chart
oleksandrh Jul 10, 2024
f46aba8
added kafka config to analytics
asafchen-dig Jul 10, 2024
d19dd01
bump to 0.3.74
asafchen-dig Jul 16, 2024
93cc0f2
upgraded chart values
oleksandrh Jul 17, 2024
eccacf3
Merge remote-tracking branch 'origin/perf-test' into perf-test
oleksandrh Jul 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/digma/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
apiVersion: v2
# version: this Chart version
version: 1.0.195
version: 1.0.318
name: digma
description: A Helm chart containing Digma's services and dbs
home: https://github.com/digma-ai/digma
type: application
# appVersion: digma version (affects image tag)
appVersion: 0.3.66
appVersion: 0.3.76
2 changes: 1 addition & 1 deletion src/digma/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -99,4 +99,4 @@
{{- define "env.digmaEnvType" -}}
- name: DIGMA_ENV_TYPE
value: {{ .Values.digma.environmentType }}
{{- end -}}
{{- end -}}
2 changes: 1 addition & 1 deletion src/digma/templates/debug-services.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
containers:

- name: kafka-ui
image: provectuslabs/kafka-ui:v0.7.1
image: provectuslabs/kafka-ui:v0.7.2
env:
- name: KAFKA_CLUSTERS_0_NAME
value: h4U35I9QRnGhbgsEQAlXAw
Expand Down
1 change: 1 addition & 0 deletions src/digma/templates/digma-analytics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ spec:
{{- include "env.digmaSite" . | nindent 8 }}
{{- include "env.redis" . | nindent 8 }}
{{- include "env.influx" . | nindent 8 }}
{{- include "env.kafka" . | nindent 8 }}
{{- include "env.postgres" . | nindent 8 }}
{{- include "env.otlpExporter" . | nindent 8 }}
{{- include "env.otlpExportLogs" . | nindent 8 }}
Expand Down
2 changes: 1 addition & 1 deletion src/digma/templates/digma-collector-api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,4 @@ spec:
- name: OtlpSamplerProbability
value: {{ .Values.digmaSelfDiagnosis.otlpSamplerProbability| quote}}
- name: BACKEND_DEPLOYMENT_TYPE
value: {{ .Values.deploymentType | quote}}
value: {{ .Values.deploymentType | quote}}
10 changes: 0 additions & 10 deletions src/digma/templates/digma-collector-worker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,3 @@ spec:
value: {{ .Values.digmaSelfDiagnosis.otlpSamplerProbability| quote}}
- name: BACKEND_DEPLOYMENT_TYPE
value: {{ .Values.deploymentType | quote}}
- name: ThresholdOptions__RecentActivityUpdateThresholdSeconds
value: {{ .Values.traceCollectorThresholds.recentActivityUpdateThresholdSeconds | quote }}
- name: ThresholdOptions__UpsertEndpointThresholdSeconds
value: {{ .Values.traceCollectorThresholds.upsertEndpointThresholdSeconds | quote }}
- name: ThresholdOptions__UpsertSpansThresholdSeconds
value: {{ .Values.traceCollectorThresholds.upsertSpansThresholdSeconds | quote }}
- name: ThresholdOptions__UpsertSpanFlowMetadataThresholdSeconds
value: {{ .Values.traceCollectorThresholds.upsertSpanFlowMetadataThresholdSeconds | quote }}
- name: TraceTempStorage__TraceForJaegerExpirationInMinutes
value: {{ .Values.digmaCollectorWorker.traceForJaegerTTL | quote }}
14 changes: 13 additions & 1 deletion src/digma/templates/digma-measurement-analysis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@ spec:
value: {{ .Values.digmaSelfDiagnosis.otlpSamplerProbability| quote}}
- name: BACKEND_DEPLOYMENT_TYPE
value: {{ .Values.deploymentType | quote}}
- name: ThresholdOptions__RecentActivityUpdateThresholdSeconds
value: {{ .Values.traceCollectorThresholds.recentActivityUpdateThresholdSeconds | quote }}
- name: ThresholdOptions__UpsertEndpointThresholdSeconds
value: {{ .Values.traceCollectorThresholds.upsertEndpointThresholdSeconds | quote }}
- name: ThresholdOptions__UpsertSpansThresholdSeconds
value: {{ .Values.traceCollectorThresholds.upsertSpansThresholdSeconds | quote }}
- name: ThresholdOptions__UpsertSpanFlowMetadataThresholdSeconds
value: {{ .Values.traceCollectorThresholds.upsertSpanFlowMetadataThresholdSeconds | quote }}
- name: TraceTempStorage__TraceForJaegerExpirationInMinutes
value: {{ .Values.digmaCollectorWorker.traceForJaegerTTL | quote }}
- name: Kafka__SpanDurationSummaryCG__Workers
value: '2'
- name: Kafka__SpanDurationTotalCG__Workers
Expand All @@ -61,4 +71,6 @@ spec:
- name: Kafka__BottleneckCG__WorkerBatchSize
value: '1000'
- name: Kafka__SpanMeasurementsProcessorCG__WorkerBatchSize
value: '1000'
value: '1000'
- name: GCLatencyMode
value: 'SustainedLowLatency'
6 changes: 3 additions & 3 deletions src/digma/templates/kafka.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: {{ .Release.Name }}-kafka-stateful-set
name: {{ .Release.Name }}-kafka
labels:
app: kafka
spec:
Expand Down Expand Up @@ -56,7 +56,7 @@ spec:
value: PLAINTEXT
# Retention
- name: KAFKA_CFG_LOG_RETENTION_MINUTES
value: "20"
value: "10"
- name: KAFKA_CFG_LOG_RETENTION_CHECK_INTERVAL_MS
value: "100000" # 1 min
- name: KAFKA_CFG_LOG_ROLL_MS
Expand All @@ -69,6 +69,6 @@ spec:
- ReadWriteOnce
resources:
requests:
storage: 15Gi
storage: 80Gi


123 changes: 123 additions & 0 deletions src/digma/templates/otel-collector-df.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# OpenTelemetry Collector - for Digma dog-fooding (df)
---
apiVersion: v1
kind: ConfigMap
metadata:
name: cm-otel-collector-config
data:
key-collector-yaml: |
receivers:
otlp:
protocols:
grpc:
processors:
probabilistic_sampler/30:
sampling_percentage: 30
probabilistic_sampler/10:
sampling_percentage: 10
batch:
timeout: 1000ms
send_batch_size: 500
send_batch_max_size: 500
filter/petclinic:
spans:
include:
match_type: regexp
resources:
- key: digma.environment
value: PETCLINIC$
filter/stag01:
spans:
include:
match_type: regexp
resources:
- key: digma.environment
value: ^STAG01-PERF$
exporters:
logging:
verbosity: detailed
otlp/meloona:
endpoint: "https://collector.meloona01.digma.systems:443"
tls:
insecure: true
otlp/stag01:
endpoint: "digma-collector-api:5050"
tls:
insecure: true
# metrics - prometheus
prometheus:
endpoint: "0.0.0.0:8889"
send_timestamps: true
metric_expiration: 10m
service:
pipelines:
traces:
receivers: [otlp]
processors: [batch, filter/petclinic]
exporters: [otlp/stag01]
traces/meloona:
receivers: [otlp]
processors: [batch, filter/stag01]
exporters: [otlp/meloona]
metrics:
receivers: [otlp]
processors: [batch, filter/stag01]
exporters: [prometheus]
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: digma-otel-collector-df-deployment
labels:
app: otel-collector-df
spec:
replicas: 1
selector:
matchLabels:
app: otel-collector-df
template:
metadata:
labels:
app: otel-collector-df
spec:
containers:
- name: otel-collector-df
image: otel/opentelemetry-collector-contrib:0.78.0
resources:
limits:
cpu: 700m
memory: 1500Mi
requests:
cpu: 300m
memory: 800Mi
ports:
- containerPort: 4317
- containerPort: 8889 # port to expose digma metrics towards prometheus
args:
- --config=/conf/collector.yaml
volumeMounts:
- name: vn-config
mountPath: /conf
readOnly: true
volumes:
- name: vn-config # volume name
configMap:
name: cm-otel-collector-config # configMap name
items:
- key: "key-collector-yaml"
path: "collector.yaml"
---
apiVersion: v1
kind: Service
metadata:
name: digma-otel-collector-df
spec:
selector:
app: otel-collector-df
ports:
- name: grpc
port: 4317
protocol: TCP
- name: digma-metrics
port: 8889
protocol: TCP
17 changes: 16 additions & 1 deletion src/digma/templates/postgres.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,15 @@ spec:
cpu: {{ .Values.postgres.resources.limits.cpu }}
ports:
- containerPort: 5432
args: ["-c" ,"max_connections={{ .Values.postgres.maxConnections }}", "-c", "shared_buffers={{ .Values.postgres.maxMemory }}", "-c", "logging_collector=on", "-c","log_directory=log"]
args:
- "-c"
- "max_connections={{ .Values.postgres.maxConnections }}"
- "-c"
- "shared_buffers={{ .Values.postgres.sharedBuffers }}"
- "-c"
- "logging_collector={{ .Values.postgres.loggingCollector }}"
- "-c"
- "log_directory=log"
env:
- name: POSTGRES_NAME
value: postgres
Expand All @@ -40,6 +48,13 @@ spec:
- name: postgres-data
mountPath: /var/lib/postgresql/data
subPath: postgres
- name: dshm
mountPath: /dev/shm
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: "800Mi"
volumeClaimTemplates:
- metadata:
name: postgres-data
Expand Down
38 changes: 25 additions & 13 deletions src/digma/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,16 @@ influx:
queryConcurrency: 30
queryQueueSize: 200


postgres:
host: "{{ .Release.Name }}-postgres"
imageTag: 15.1
port: 5432
username: postgres
password: postgres
maxConnections: 400
maxMemory: 800MB # mapped to shared_buffers
loggingCollector: off # for debugging turn it on
sharedBuffers: "800MB"
resources: {
requests: {
memory: 1Gi,
Expand All @@ -106,6 +108,9 @@ embeddedJaeger:
host: "{{ .Release.Name }}-embedded-jaeger"
imageTag: "1.44.0"
enabled: true
exposed: true
service:
annotations: []

kibana:
host: "{{ .Release.Name }}-kibana"
Expand Down Expand Up @@ -136,6 +141,7 @@ digmaDs:

digmaCollectorApi:
host: "{{ .Release.Name }}-collector-api"
expose: false
resources: {
requests: {
memory: 100Mi,
Expand All @@ -146,6 +152,8 @@ digmaCollectorApi:
cpu: 800m
}
}
service:
annotations: []

digmaCollectorWorker:
host: "{{ .Release.Name }}-collector-worker"
Expand All @@ -156,7 +164,7 @@ digmaCollectorWorker:
cpu: 100m
},
limits: {
memory: 400Mi,
memory: 800Mi,
cpu: 600m
}
}
Expand All @@ -165,7 +173,9 @@ digmaCollectorWorker:
digmaAnalytics:
host: "{{ .Release.Name }}-analytics"
replicas: 1
secured: true
expose: false
secured: false
accesstoken: pcr5UDZqdvzky1cgj
resources: {
requests: {
memory: 300Mi,
Expand All @@ -176,6 +186,8 @@ digmaAnalytics:
cpu: 800m
}
}
service:
annotations: []

digmaScheduler:
host: "{{ .Release.Name }}-scheduler"
Expand Down Expand Up @@ -211,21 +223,21 @@ digmaMeasurementAnalysis:
cpu: 200m
},
limits: {
memory: 500Mi,
memory: 800Mi,
cpu: 800m
}
}

digmaSelfDiagnosis:
otlpExportTraces: false
otlpExportMetrics: false
otlpExportLogs: false
otlpExporterEndpoint:
otlpSamplerProbability: "0.3"
otlpExporterEndpoint: https://collector.meloona01.digma.systems:443
otlpExportTraces: true
otlpExportMetrics: true
otlpSamplerProbability: "1"

digma:
environmentName:
environmentType:
siteName: undefined
environmentName: STAG01-PERF
environmentType: public
siteName: stag01
isCentralize: true
licenseKey:
licenseKey: 529974cf70824f67aefa98232482f93e