Skip to content

Add option to enable dynamic routing in Envoy Proxy #66

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion docs/.values-table.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,12 @@
| envoy.rate_limiter.listener_level.max_tokens | int | `5` | Maximum number of simultaneous connections to the Envoy Proxy. Each new connection takes a "token" from the "bucket" which initially contains ``max_tokens`` tokens. |
| envoy.rate_limiter.listener_level.tokens_per_fill | int | `1` | ``tokens_per_fill`` tokens are added to the "bucket" every ``fill_interval``, allowing new connections to be established. |
| envoy.rate_limiter.listener_level.fill_interval | string | `"12s"` | For example, adding a new token every 12 seconds allows 5 new connections every minute. |
| envoy.rate_limiter.prometheus_based | object | `{"enabled":false,"luaConfig":"cfg/envoy-filter.lua"}` | This rate limiter rejects new connections based on metric extracted from Prometheus (e.g. inference queue latency). The metric is taken from parameter ``prometheus.serverLoadMetric``, and the threshold is set by ``prometheus.serverLoadThreshold``. These parameters are the same as those used by the KEDA autoscaler. |
| envoy.rate_limiter.prometheus_based | object | `{"enabled":false}` | This rate limiter rejects new connections based on metric extracted from Prometheus (e.g. inference queue latency). The metric is taken from parameter ``prometheus.serverLoadMetric``, and the threshold is set by ``prometheus.serverLoadThreshold``. These parameters are the same as those used by the KEDA autoscaler. |
| envoy.rate_limiter.prometheus_based.enabled | bool | `false` | Enable rate limiter |
| envoy.loadBalancerPolicy | string | `"LEAST_REQUEST"` | Envoy load balancer policy. Options: ROUND_ROBIN, LEAST_REQUEST, RING_HASH, RANDOM, MAGLEV |
| envoy.lua_filter.enabled | bool | `false` | |
| envoy.lua_filter.lua_config | string | `"cfg/envoy-filter.lua"` | |
| envoy.dynamic_routing | object | `{"enabled":false}` | Enable dynamic routing in Envoy proxy. |
| envoy.auth.enabled | bool | `false` | Enable authentication in Envoy proxy |
| envoy.auth.jwt_issuer | string | `""` | |
| envoy.auth.jwt_remote_jwks_uri | string | `""` | |
Expand Down
64 changes: 64 additions & 0 deletions helm/supersonic/cfg/envoy-filter-dynamic.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
function envoy_on_request(request_handle)
local path = request_handle:headers():get(":path")
local contentType = request_handle:headers():get("content-type")


---- Extract model_name from ModelInferRequest ----
if contentType == "application/grpc" then
-- request_handle:logInfo("path = " .. path)
if path == "/inference.GRPCInferenceService/ModelInfer" then

local model_name, model_version = extract_model_name_and_version(request_handle, body)
-- request_handle:logInfo("ModelInfer model_name = " .. model_name .. " model_version = " .. model_version)

-- log and propagate via dynamic metadata
if model_name and model_version then
local svc_name = "RELEASE-" .. model_name .. "-v" .. model_version
local header_value = svc_name .. ".NAMESPACE.svc.cluster.local:8001"
request_handle:logInfo("route-to = " .. header_value)
-- add header
request_handle:headers():add("route-to", header_value)
end
else
--- for non-inference calls, for now just forward to default service
request_handle:headers():add("route-to", "RELEASE-triton.NAMESPACE.svc.cluster.local:8001")
end
end
end

function extract_model_name_and_version(request_handle)
local model_name = ""
local model_version = ""
local body = request_handle:body():getBytes(0, request_handle:body():length())

if body and #body > 5 then
-- strip the 5-byte gRPC header (1-byte flag + 4-byte msg-len)
local msg = body:sub(6)

-- protobuf wire format for field 1, wire type 2: tag = 0x0A
-- field 1 is the model name - we know it from here:
-- wire type 2 means that the field is length-delimited
if msg:byte(1) == 0x0A then
-- next byte is a varint length (assumes <128 bytes)
local name_len = msg:byte(2)
-- extract UTF-8 model name
model_name = msg:sub(3, 2 + name_len)
-- request_handle:logInfo("ModelInfer model_name = " .. model_name)
local offset = 3 + name_len

-- Extract model version (field 2, wire type 2, tag 0x12)
if msg:byte(offset) == 0x12 then
local ver_len = msg:byte(offset + 1)
model_version = msg:sub(offset + 2, offset + 1 + ver_len)
-- request_handle:logInfo("ModelInfer model_version = " .. model_version)
offset = offset + 2 + ver_len
else
request_handle:logWarn(string.format("No model_version field (expected tag 0x12 at offset %d, got 0x%02X)",
offset, msg:byte(offset)))
end
else
request_handle:logErr("Unexpected protobuf tag: " .. string.format("0x%02X", msg:byte(1)))
end
end
return model_name, model_version
end
45 changes: 42 additions & 3 deletions helm/supersonic/templates/envoy/configmaps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,22 @@ static_resources:
routes:
- match:
prefix: "/"
{{- if .envoy.dynamic_routing.enabled }}
typed_per_filter_config:
envoy.filters.http.dynamic_forward_proxy:
"@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.PerRouteConfig
host_rewrite_header: "route-to"
route:
cluster: dynamic_forward_proxy_cluster
timeout: {{ .envoy.grpc_route_timeout }}
{{- else }}
route:
cluster: triton_grpc_service
timeout: {{ .envoy.grpc_route_timeout }}
{{- end }}

http_filters:
{{- with .envoy.rate_limiter.prometheus_based }}
{{- with .envoy.lua_filter }}
{{- if .enabled }}
- name: envoy.filters.http.lua
typed_config:
Expand Down Expand Up @@ -94,6 +105,16 @@ static_resources:
provider_name: provider_icecube
{{- end }}
{{- end }}
{{- if .envoy.dynamic_routing.enabled }}
- name: envoy.filters.http.dynamic_forward_proxy
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.FilterConfig
dns_cache_config:
name: dynamic_cache
dns_lookup_family: ALL
dns_cache_circuit_breaker:
max_pending_requests: 1024
{{- end }}
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
Expand Down Expand Up @@ -176,6 +197,22 @@ static_resources:
socket_address:
address: {{ .tritonName }}
port_value: {{ .tritonGrpcPort }}
{{- if .envoy.dynamic_routing.enabled }}
- name: dynamic_forward_proxy_cluster
connect_timeout: 2s
lb_policy: CLUSTER_PROVIDED
http2_protocol_options:
max_concurrent_streams: 1000
cluster_type:
name: envoy.clusters.dynamic_forward_proxy
typed_config:
"@type": type.googleapis.com/envoy.extensions.clusters.dynamic_forward_proxy.v3.ClusterConfig
dns_cache_config:
name: dynamic_cache
dns_lookup_family: ALL
dns_cache_circuit_breaker:
max_pending_requests: 1024
{{- end }}
{{- end }}
{{- end }}

Expand Down Expand Up @@ -226,7 +263,7 @@ data:
{{ include "envoy.configuration.yaml" $envoyContext | indent 4 }}
---

{{- if .Values.envoy.rate_limiter.prometheus_based.enabled }}
{{- if .Values.envoy.lua_filter.enabled }}
{{- /* Create a ConfigMap for the Lua filter */}}
apiVersion: v1
kind: ConfigMap
Expand All @@ -239,12 +276,14 @@ metadata:
data:
envoy-filter.lua: |-
{{- /* Read and process the Lua configuration file */}}
{{- $luaConfig := $.Files.Get .Values.envoy.rate_limiter.prometheus_based.luaConfig | nindent 4 }}
{{- $luaConfig := $.Files.Get .Values.envoy.lua_filter.lua_config | nindent 4 }}
{{- $luaConfig = $luaConfig | replace "SERVER_LOAD_METRIC" (include "supersonic.defaultMetric" . | quote) }}
{{- $luaConfig = $luaConfig | replace "SERVER_LOAD_THRESHOLD" (quote .Values.serverLoadThreshold) }}
{{- $luaConfig = $luaConfig | replace "PROMETHEUS_SCHEME" (include "supersonic.prometheusScheme" .) }}
{{- $luaConfig = $luaConfig | replace "PROMETHEUS_HOST" (include "supersonic.prometheusHost" .) }}
{{- $luaConfig = $luaConfig | replace "PROMETHEUS_PORT" (include "supersonic.prometheusPort" .) }}
{{- $luaConfig = $luaConfig | replace "RELEASE" .Release.Name }}
{{- $luaConfig = $luaConfig | replace "NAMESPACE" .Release.Namespace }}
{{ $luaConfig | indent 4 }}

---
Expand Down
4 changes: 2 additions & 2 deletions helm/supersonic/templates/envoy/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ spec:
volumeMounts:
- name: {{ include "supersonic.name" . }}-envoy-config
mountPath: /etc/envoy
{{- if .Values.envoy.rate_limiter.prometheus_based.enabled }}
{{- if .Values.envoy.lua_filter.enabled }}
- name: {{ include "supersonic.name" . }}-lua-volume
mountPath: /etc/envoy/lua
readOnly: true
Expand All @@ -58,7 +58,7 @@ spec:
- name: {{ include "supersonic.name" . }}-envoy-config
configMap:
name: {{ include "supersonic.name" . }}-envoy-config
{{- if .Values.envoy.rate_limiter.prometheus_based.enabled }}
{{- if .Values.envoy.lua_filter.enabled }}
- name: {{ include "supersonic.name" . }}-lua-volume
configMap:
name: {{ include "supersonic.name" . }}-lua-config
Expand Down
34 changes: 29 additions & 5 deletions helm/supersonic/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -370,14 +370,10 @@
"properties": {
"enabled": {
"type": "boolean"
},
"luaConfig": {
"type": "string"
}
},
"required": [
"enabled",
"luaConfig"
"enabled"
]
}
},
Expand All @@ -389,6 +385,32 @@
"loadBalancerPolicy": {
"type": "string"
},
"lua_filter": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean"
},
"lua_config": {
"type": "string"
}
},
"required": [
"enabled",
"lua_config"
]
},
"dynamic_routing": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean"
}
},
"required": [
"enabled"
]
},
"auth": {
"type": "object",
"properties": {
Expand Down Expand Up @@ -424,11 +446,13 @@
"required": [
"args",
"auth",
"dynamic_routing",
"enabled",
"grpc_route_timeout",
"image",
"ingress",
"loadBalancerPolicy",
"lua_filter",
"rate_limiter",
"replicas",
"resources",
Expand Down
9 changes: 8 additions & 1 deletion helm/supersonic/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,19 @@ envoy:
prometheus_based:
# -- Enable rate limiter
enabled: false
luaConfig: "cfg/envoy-filter.lua"

# -- Envoy load balancer policy.
# Options: ROUND_ROBIN, LEAST_REQUEST, RING_HASH, RANDOM, MAGLEV
loadBalancerPolicy: "LEAST_REQUEST"

lua_filter:
enabled: false
lua_config: "cfg/envoy-filter.lua"

# -- Enable dynamic routing in Envoy proxy.
dynamic_routing:
enabled: false

auth:
# -- Enable authentication in Envoy proxy
enabled: false
Expand Down
29 changes: 22 additions & 7 deletions values/values-geddes-cms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@ triton:
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoEgamma/EgammaPhotonProducers/data/models/ \
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoTauTag/TrainingFiles/data/DeepTauIdSONIC/ \
--model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoMET/METPUSubtraction/data/models/ \
--model-control-mode=explicit \
--allow-gpu-metrics=true \
--log-verbose=0 \
--strict-model-config=false \
--exit-timeout-secs=60

resources:
limits: { nvidia.com/gpu: 1, cpu: 2, memory: 4G}
requests: { nvidia.com/gpu: 1, cpu: 2, memory: 4G}
Expand All @@ -37,6 +39,14 @@ envoy:
enabled: true
hostName: sonic-cms.geddes.rcac.purdue.edu
ingressClassName: public
rate_limiter:
prometheus_based:
enabled: false
dynamic_routing:
enabled: true
lua_filter:
enabled: true
lua_config: "cfg/envoy-filter-dynamic.lua"

autoscaler:
enabled: true
Expand All @@ -55,15 +65,20 @@ tolerations:
effect: NoSchedule

prometheus:
enabled: true
server:
ingress:
enabled: true
hostName: prometheus-cms.geddes.rcac.purdue.edu
ingressClassName: public
# enabled: false
external:
enabled: true
url: "prometheus-cms.geddes.rcac.purdue.edu"
port: 443
scheme: https
# server:
# ingress:
# enabled: true
# hostName: prometheus-cms.geddes.rcac.purdue.edu
# ingressClassName: public

grafana:
enabled: true
enabled: false
ingress:
enabled: true
hostName: grafana-cms.geddes.rcac.purdue.edu
Expand Down
Loading