|
3 | 3 | | Key | Type | Default | Description |
|
4 | 4 | |-----|------|---------|-------------|
|
5 | 5 | | nameOverride | string | `""` | Unique identifier of SuperSONIC instance (equal to release name by default) |
|
| 6 | +| serverLoadMetric | string | `""` | A metric used by both KEDA autoscaler and Envoy's prometheus-based rate limiter. # Default metric (inference queue latency) is defined in templates/_helpers.tpl | |
| 7 | +| serverLoadThreshold | int | `100` | Threshold for the metric | |
6 | 8 | | triton.replicas | int | `1` | Number of Triton server instances (if autoscaling is disabled) |
|
7 | 9 | | triton.image | string | `"nvcr.io/nvidia/tritonserver:24.12-py3-min"` | Docker image for the Triton server |
|
8 | 10 | | triton.command | list | `["/bin/sh","-c"]` | Command and arguments to run in Triton container |
|
|
22 | 24 | | envoy.resources | object | `{"limits":{"cpu":2,"memory":"4G"},"requests":{"cpu":1,"memory":"2G"}}` | Resource requests and limits for Envoy Proxy. Note: an Envoy Proxy with too many connections might run out of CPU |
|
23 | 25 | | envoy.service.type | string | `"ClusterIP"` | This is the client-facing endpoint. In order to be able to connect to it, either enable ingress, or use type: LoadBalancer. |
|
24 | 26 | | envoy.service.ports | list | `[{"name":"grpc","port":8001,"targetPort":8001},{"name":"admin","port":9901,"targetPort":9901}]` | Envoy Service ports |
|
| 27 | +| envoy.ingress | object | `{"annotations":{},"enabled":false,"hostName":"","ingressClassName":""}` | Ingress configuration for Envoy | |
25 | 28 | | envoy.grpc_route_timeout | string | `"0s"` | Timeout for gRPC route in Envoy; disabled by default (0s), preventing Envoy from closing connections too early. |
|
26 | 29 | | envoy.rate_limiter.listener_level | object | `{"enabled":false,"fill_interval":"12s","max_tokens":5,"tokens_per_fill":1}` | This rate limiter explicitly controls the number of client connections to the Envoy Proxy. |
|
27 | 30 | | envoy.rate_limiter.listener_level.enabled | bool | `false` | Enable rate limiter |
|
|
47 | 50 | | autoscaler.scaleDown.window | int | `600` | |
|
48 | 51 | | autoscaler.scaleDown.period | int | `120` | |
|
49 | 52 | | autoscaler.scaleDown.stepsize | int | `1` | |
|
50 |
| -| prometheus | object | `{"external":true,"ingress":{"annotations":{},"enabled":false,"hostName":"","ingressClassName":""},"port":443,"scheme":"https","serverLoadMetric":"","serverLoadThreshold":100,"url":""}` | Connection to a Prometheus server is required for KEDA autoscaler and Envoy's prometheus-based rate limiter | |
51 |
| -| prometheus.external | bool | `true` | Whether to use external Prometheus instance (true) or deploy internal one (false) | |
52 |
| -| prometheus.url | string | `""` | External Prometheus server url and port number (find in documentation of a given cluster or ask admins) Only used when external=true | |
53 |
| -| prometheus.scheme | string | `"https"` | Specify whether external Prometheus endpoint is exposed as http or https Only used when external=true | |
54 |
| -| prometheus.serverLoadMetric | string | `""` | A metric used by both KEDA autoscaler and Envoy's prometheus-based rate limiter. # Default metric (inference queue latency) is defined in templates/_helpers.tpl | |
55 |
| -| prometheus.serverLoadThreshold | int | `100` | Threshold for the metric | |
56 |
| -| prometheus.ingress | object | `{"annotations":{},"enabled":false,"hostName":"","ingressClassName":""}` | Ingress configuration for internal Prometheus web UI (only used when external=false) | |
57 |
| -| ingress.enabled | bool | `false` | | |
58 |
| -| ingress.hostName | string | `""` | | |
59 |
| -| ingress.ingressClassName | string | `""` | | |
60 |
| -| ingress.annotations | object | `{}` | | |
61 | 53 | | nodeSelector | object | `{}` | Node selector for all pods (Triton and Envoy) |
|
62 | 54 | | tolerations | list | `[]` | Tolerations for all pods (Triton and Envoy) |
|
63 |
| -| grafana.enabled | bool | `false` | Enable or disable Grafana deployment | |
64 |
| -| grafana.ingress | object | `{"annotations":{},"enabled":false,"hostName":"","ingressClassName":"haproxy"}` | Ingress configuration for Grafana | |
65 |
| -| grafana.ingress.enabled | bool | `false` | Enable or disable ingress for Grafana | |
66 |
| -| grafana.ingress.hostName | string | `""` | Hostname for Grafana ingress | |
67 |
| -| grafana.ingress.ingressClassName | string | `"haproxy"` | Ingress class name (e.g. nginx, haproxy) | |
68 |
| -| grafana.ingress.annotations | object | `{}` | Additional annotations for Grafana ingress | |
| 55 | +| prometheus | object | `{"alertmanager":{"enabled":false},"configmapReload":{"prometheus":{"enabled":false}},"enabled":false,"external":{"enabled":false,"port":443,"scheme":"https","url":""},"kube-state-metrics":{"enabled":false},"prometheus-node-exporter":{"enabled":false},"prometheus-pushgateway":{"enabled":false},"pushgateway":{"enabled":false},"rbac":{"create":false},"server":{"configMapOverrideName":"prometheus-config","global":{"evaluation_interval":"5s","scrape_interval":"5s"},"ingress":{"annotations":{},"enabled":false,"hosts":[],"ingressClassName":"","tls":[{"hosts":[]}]},"persistentVolume":{"enabled":false},"releaseNamespace":true,"resources":{"limits":{"cpu":1,"memory":"1Gi"},"requests":{"cpu":"500m","memory":"512Mi"}},"retention":"15d","service":{"enabled":true,"servicePort":9090},"useExistingClusterRoleName":"supersonic-prometheus-role"},"serviceAccounts":{"server":{"create":false,"name":"supersonic-prometheus-sa"}}}` | Connection to a Prometheus server is required for KEDA autoscaler and Envoy's prometheus-based rate limiter | |
| 56 | +| prometheus.external.enabled | bool | `false` | Enable external Prometheus instance | |
| 57 | +| prometheus.external.url | string | `""` | External Prometheus server url | |
| 58 | +| prometheus.external.port | int | `443` | External Prometheus server port number | |
| 59 | +| prometheus.external.scheme | string | `"https"` | Specify whether external Prometheus endpoint is exposed as http or https | |
| 60 | +| prometheus.enabled | bool | `false` | Enable or disable Prometheus subchart deployment | |
| 61 | +| prometheus.server | object | `{"configMapOverrideName":"prometheus-config","global":{"evaluation_interval":"5s","scrape_interval":"5s"},"ingress":{"annotations":{},"enabled":false,"hosts":[],"ingressClassName":"","tls":[{"hosts":[]}]},"persistentVolume":{"enabled":false},"releaseNamespace":true,"resources":{"limits":{"cpu":1,"memory":"1Gi"},"requests":{"cpu":"500m","memory":"512Mi"}},"retention":"15d","service":{"enabled":true,"servicePort":9090},"useExistingClusterRoleName":"supersonic-prometheus-role"}` | Prometheus Helm chart configuration (https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus) | |
| 62 | +| grafana.enabled | bool | `false` | | |
| 63 | +| grafana.adminUser | string | `"admin"` | | |
| 64 | +| grafana.adminPassword | string | `"admin"` | | |
| 65 | +| grafana.persistence.enabled | bool | `false` | | |
| 66 | +| grafana.rbac.create | bool | `false` | | |
| 67 | +| grafana.serviceAccount.create | bool | `false` | | |
| 68 | +| grafana.datasources | object | `{"datasources.yaml":{"apiVersion":1,"datasources":[{"access":"proxy","isDefault":true,"jsonData":{"timeInterval":"5s","tlsSkipVerify":true},"name":"prometheus","type":"prometheus","url":"http://supersonic-prometheus-server:9090"}]}}` | Grafana datasources configuration | |
| 69 | +| grafana.dashboardProviders | object | `{"dashboardproviders.yaml":{"apiVersion":1,"providers":[{"disableDeletion":false,"editable":true,"folder":"","name":"default","options":{"path":"/var/lib/grafana/dashboards/default"},"orgId":1,"type":"file"}]}}` | Grafana dashboard providers configuration | |
| 70 | +| grafana.dashboardsConfigMaps | object | `{"default":"supersonic-grafana-default-dashboard"}` | Grafana dashboard ConfigMaps | |
| 71 | +| grafana."grafana.ini" | object | `{"auth":{"disable_login_form":true},"auth.anonymous":{"enabled":true,"org_role":"Admin"},"dashboards":{"default_home_dashboard_path":"/var/lib/grafana/dashboards/default/default.json"}}` | Grafana.ini configuration | |
| 72 | +| grafana.resources | object | `{"limits":{"cpu":1,"memory":"1Gi"},"requests":{"cpu":"100m","memory":"128Mi"}}` | Resource limits and requests for Grafana | |
| 73 | +| grafana.service | object | `{"port":80,"targetPort":3000,"type":"ClusterIP"}` | Service configuration | |
| 74 | +| grafana.ingress | object | `{"annotations":{},"enabled":false,"hosts":[],"ingressClassName":"","path":"/","pathType":"ImplementationSpecific","tls":[]}` | Ingress configuration | |
0 commit comments