This repository was archived by the owner on Apr 24, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 164
This repository was archived by the owner on Apr 24, 2025. It is now read-only.
Envoy crash error when buffering incoming requests #376
Copy link
Copy link
Closed
Description
Describe the bug / error
I was trying install some custom logic which basically performs rate limit with buffering. The logic looks like this: let's say we want limit the rate to 1/delay. Each worker thread maintains it own queue, a global shared data records the timeout of the last elements in the queues across all worker threads. Every time a http request comes in, it will be paused. Its contextID will be pushed into a queue with a timeout=shared_timeout+delay. The thread will periodically call ticker function to resume the execution of requests with passed timeout.
When I was running this with online-boutique, there are some crashes in the worker threads.
- Crash in frontend pod which accepts incoming HTTP/1.1 requests
2023-04-10T18:55:23.149999Z critical envoy backtrace external/envoy/source/server/backtrace.h:104 Caught Segmentation fault, suspect faulting address 0x1thread=49
2023-04-10T18:55:23.150031Z critical envoy backtrace external/envoy/source/server/backtrace.h:91 Backtrace (use tools/stack_decode.py to get line numbers): thread=49
2023-04-10T18:55:23.150034Z critical envoy backtrace external/envoy/source/server/backtrace.h:92 Envoy version: 6e6b45cd824e414453ac8f0c81be540269ddff3e/1.25.2-dev/Clean/RELEASE/BoringSSL thread=49
2023-04-10T18:55:23.150425Z critical envoy backtrace external/envoy/source/server/backtrace.h:98 #0: [0x7f49d74b6520] thread=49
2023-04-10T18:55:23.160894Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #1: Envoy::Router::UpstreamCodecFilter::CodecBridge::decodeHeaders() [0x559b7e603dbd] thread=49
2023-04-10T18:55:23.171185Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #2: Envoy::Http::Http1::ActiveClient::StreamWrapper::decodeHeaders() [0x559b7e331a4f] thread=49
2023-04-10T18:55:23.181028Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #3: Envoy::Http::Http1::ClientConnectionImpl::onHeadersCompleteBase() [0x559b7e5248d1] thread=49
2023-04-10T18:55:23.192287Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #4: Envoy::Http::Http1::ClientConnectionImpl::onHeadersCompleteBase() [0x559b7e524c32] thread=49
2023-04-10T18:55:23.202275Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #5: Envoy::Http::Http1::ConnectionImpl::onHeadersCompleteImpl() [0x559b7e51f585] thread=49
2023-04-10T18:55:23.212236Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #6: Envoy::Http::Http1::ConnectionImpl::onHeadersComplete() [0x559b7e51e918] thread=49
2023-04-10T18:55:23.222220Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #7: http_parser_execute [0x559b7eb2f4ea] thread=49
2023-04-10T18:55:23.232033Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #8: Envoy::Http::Http1::ConnectionImpl::dispatchSlice() [0x559b7e51de36] thread=49
2023-04-10T18:55:23.241599Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #9: Envoy::Http::Http1::ConnectionImpl::dispatch() [0x559b7e51d62f] thread=49
2023-04-10T18:55:23.251048Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #10: Envoy::Http::Http1::ClientConnectionImpl::dispatch() [0x559b7e51d1bd] thread=49
2023-04-10T18:55:23.260364Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #11: Envoy::Http::CodecClient::onData() [0x559b7e3cc15dthread=49
2023-04-10T18:55:23.269611Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #12: Envoy::Http::CodecClient::CodecReadFilter::onData() [0x559b7e3cd9d5] thread=49
2023-04-10T18:55:23.278945Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #13: Envoy::Network::FilterManagerImpl::onContinueReading() [0x559b7e7dc146] thread=49
2023-04-10T18:55:23.288425Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #14: Envoy::Network::ConnectionImpl::onReadReady() [0x559b7e769538] thread=49
2023-04-10T18:55:23.297707Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #15: Envoy::Network::ConnectionImpl::onFileEvent() [0x559b7e767073] thread=49
2023-04-10T18:55:23.307032Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #16: std::__1::__function::__func<>::operator()() [0x559b7e75c3f1] thread=49
2023-04-10T18:55:23.316305Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #17: Envoy::Event::FileEventImpl::assignEvents()::$_1::__invoke() [0x559b7e75d90c] thread=49
2023-04-10T18:55:23.325580Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #18: event_process_active_single_queue [0x559b7eb3c0f0]thread=49
2023-04-10T18:55:23.334778Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #19: event_base_loop [0x559b7eb3ab01] thread=49
2023-04-10T18:55:23.344077Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #20: Envoy::Server::WorkerImpl::threadRoutine() [0x559b7e02e3bd] thread=49
2023-04-10T18:55:23.353404Z critical envoy backtrace external/envoy/source/server/backtrace.h:96 #21: Envoy::Thread::ThreadImplPosix::ThreadImplPosix()::{lambda()#1}::__invoke() [0x559b7eb42733] thread=49
2023-04-10T18:55:23.353536Z critical envoy backtrace external/envoy/source/server/backtrace.h:98 #22: [0x7f49d7508b43] thread=49
Http1::ConnectionImpl 0x559b85b39208, dispatching_: 1, dispatching_slice_already_drained_: 0, reset_stream_called_: 0, handling_upgrade_: 0, deferred_end_stream_headers_: 0, processing_trailers_: 0, buffered_body_.length(): 0, header_parsing_state_: Field, current_header_field_: , current_header_value_:
absl::get<ResponseHeaderMapPtr>(headers_or_trailers_):
':status', '200'
'set-cookie', 'shop_session-id=936aa98f-5bf2-4b12-b850-aa180fc80684; Max-Age=172800'
'date', 'Mon, 10 Apr 2023 18:55:23 GMT'
'content-type', 'text/html; charset=utf-8'
'transfer-encoding', 'chunked'
Dumping corresponding downstream request:
decoder:
2023-04-10T18:55:24.307705Z info ads ADS: "@" frontend-5bbb957997-krdwt.online-boutique-1 terminated
2023-04-10T18:55:24.307710Z info ads ADS: "@" frontend-5bbb957997-krdwt.online-boutique-2 terminated
- Crash in other pods which accepts incoming gRPC calls. This happens mostly during the end of experiments. Not sure if it's because envoy tries to resume the http requests while the connection is closed.
2023-04-10T19:25:09.865055Z error envoy wasm external/envoy/source/extensions/common/wasm/wasm_vm.cc:38 Function: proxy_on_request_body failed: Uncaught RuntimeError: unreachable
Proxy-Wasm plugin in-VM backtrace:
0: 0x10a9 - runtime._panic
1: 0xdcc9 - proxy_on_request_body thread=39
What is your Envoy/Istio version?
1.17.1
What is the SDK version?
latest repo with commit 19b7668
What is your TinyGo version?
0.27.0
URL or snippet of your code including Envoy configuration
const sharedDataKey = "last_timeout"
var tickMilliseconds uint32 = 1
var delay uint64 = 5
type queueEntry struct {
contextID uint32
timeout uint64
}
/* ----------------------------- QueueImpl ----------------------------- */
type Queue struct {
maxsize int
array [1024]queueEntry
head int
tail int
}
func (this *Queue) Push(val queueEntry) {
this.array[this.tail] = val
this.tail = (this.tail + 1) % this.maxsize
}
func (this *Queue) Pop() queueEntry {
val := this.array[this.head]
this.head = (this.head + 1) % this.maxsize
return val
}
func (this *Queue) Top() queueEntry {
val := this.array[this.head]
return val
}
func (this *Queue) IsFull() bool {
return (this.tail+1)%this.maxsize == this.head
}
func (this *Queue) Size() int {
return (this.tail + this.maxsize - this.head) % this.maxsize
}
/* ----------------------------- VMContext ----------------------------- */
type vmContext struct {
// Embed the default VM context here,
// so that we don't need to reimplement all the methods.
types.DefaultVMContext
}
// Override types.VMContext.
func (*vmContext) OnVMStart(vmConfigurationSize int) types.OnVMStartStatus {
proxywasm.LogWarnf("Start VM with version 6: ringQueue+remove-max")
initialValueBuf := make([]byte, binary.MaxVarintLen64)
binary.BigEndian.PutUint64(initialValueBuf, uint64(time.Now().UnixMicro()))
if err := proxywasm.SetSharedData(sharedDataKey, initialValueBuf, 0); err != nil {
proxywasm.LogWarnf("error setting shared data on OnVMStart: %v", err)
}
return types.OnVMStartStatusOK
}
// Override types.DefaultVMContext.
func (*vmContext) NewPluginContext(contextID uint32) types.PluginContext {
return &pluginContext{
contextID: contextID,
queue: &Queue{
maxsize: 1024,
tail: 0,
head: 0,
},
}
}
/* ----------------------------- PluginContext ----------------------------- */
type pluginContext struct {
// Embed the default plugin context here,
// so that we don't need to reimplement all the methods.
types.DefaultPluginContext
contextID uint32
queue *Queue
}
func parseConfig() {
data, err := proxywasm.GetPluginConfiguration()
if err != nil {
proxywasm.LogWarnf("failed to get plugin config %v", err)
}
configs := strings.Split(strings.Trim(string(data), "{}"), ",")
for _, config := range configs {
key_value := strings.Split(config, ":")
if strings.Trim(key_value[0], " \"") == "delay" {
value, _ := strconv.Atoi(strings.Trim(key_value[1], " \""))
delay = uint64(value)
proxywasm.LogWarnf("get plugin config, set delay to %v", delay)
} else if strings.Trim(key_value[0], " \"") == "tick" {
value, _ := strconv.Atoi(strings.Trim(key_value[1], " \""))
tickMilliseconds = uint32(value)
proxywasm.LogWarnf("get plugin config, set tick to %v", tickMilliseconds)
}
}
}
// Override types.DefaultPluginContext.
func (ctx *pluginContext) OnPluginStart(pluginConfigurationSize int) types.OnPluginStartStatus {
parseConfig()
if err := proxywasm.SetTickPeriodMilliSeconds(tickMilliseconds); err != nil {
proxywasm.LogCriticalf("failed to set tick period: %v", err)
}
return types.OnPluginStartStatusOK
}
// Override types.DefaultPluginContext.
func (ctx *pluginContext) OnTick() {
for ctx.queue.Size() != 0 {
first := ctx.queue.Top()
if first.timeout <= uint64(time.Now().UnixMicro()) {
proxywasm.SetEffectiveContext(first.contextID)
proxywasm.ResumeHttpRequest()
ctx.queue.Pop()
// proxywasm.LogWarnf("resume request with contextID=%d, msg counts to %d", first.contextID, ctx.queue.Size())
} else {
break
}
}
}
// Override types.DefaultPluginContext.
func (ctx *pluginContext) NewHttpContext(contextID uint32) types.HttpContext {
return &httpContext{
contextID: contextID,
pluginCtx: ctx,
}
}
/* ----------------------------- HTTPContext ----------------------------- */
type httpContext struct {
// Embed the default http context here,
// so that we don't need to reimplement all the methods.
types.DefaultHttpContext
contextID uint32
pluginCtx *pluginContext
}
func getSharedData() ([]byte, uint32, error) {
value, cas, err := proxywasm.GetSharedData(sharedDataKey)
for err != nil {
proxywasm.LogWarnf("error getting shared data on OnHttpRequestHeaders: %v", err)
value, cas, err = proxywasm.GetSharedData(sharedDataKey)
}
return value, cas, err
}
func max(a uint64, b uint64) uint64 {
if a >= b {
return a
}
return b
}
func getTimeout() uint64 {
value, cas, err := getSharedData()
buf := make([]byte, 8)
val := binary.BigEndian.Uint64(value)
timeout := max(val, uint64(time.Now().UnixMicro())) + delay
binary.BigEndian.PutUint64(buf, timeout)
err = proxywasm.SetSharedData(sharedDataKey, buf, cas)
for err != nil {
proxywasm.LogWarnf("error setting shared data on OnHttpRequestHeaders: %v, retry", err)
value, cas, err = proxywasm.GetSharedData(sharedDataKey)
val = binary.BigEndian.Uint64(value)
timeout = val + delay
binary.BigEndian.PutUint64(buf, timeout)
err = proxywasm.SetSharedData(sharedDataKey, buf, cas)
}
return timeout
}
// Override types.DefaultHttpContext.
func (ctx *httpContext) OnHttpRequestHeaders(numHeaders int, endOfStream bool) types.Action {
if ctx.pluginCtx.queue.IsFull() {
if err := proxywasm.SendHttpResponse(403, [][2]string{
{"powered-by", "proxy-wasm-go-sdk!!"},
}, []byte("rate limited, wait and retry."), -1); err != nil {
proxywasm.LogErrorf("failed to send local response: %v", err)
}
proxywasm.LogWarnf("Deny request")
return types.ActionPause
}
timeout := getTimeout()
ctx.pluginCtx.queue.Push(queueEntry{contextID: ctx.contextID, timeout: timeout})
// proxywasm.LogWarnf("postpone request with contextID=%d, msg counts to %d", ctx.contextID, ctx.pluginCtx.queue.Size())
return types.ActionPause
}
/* ----------------------------- Main ----------------------------- */
func main() {
proxywasm.SetVMContext(&vmContext{})
}
enovyfilter yaml
apiVersion: networking.istio.io/v1alpha3
kind: EnvoyFilter
metadata:
name: $app-rate-limit-filter
spec:
workloadSelector:
labels:
app: $app
configPatches:
- applyTo: HTTP_FILTER
match:
context: SIDECAR_INBOUND
listener:
filterChain:
filter:
name: envoy.http_connection_manager
subFilter:
name: envoy.router
patch:
operation: INSERT_BEFORE
value:
name: mydummy
typed_config:
'@type': type.googleapis.com/udpa.type.v1.TypedStruct
type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
value:
config:
configuration:
'@type': type.googleapis.com/google.protobuf.StringValue
value: '{"delay": "${delay}", "tick": "${tick}"}'
root_id: "rate-limit-filter"
vm_config:
code:
local:
filename: /var/local/wasm/rate-limit-filter.wasm
runtime: envoy.wasm.runtime.v8
vm_id: dcoz-vm
Additional context (Optional)
Metadata
Metadata
Assignees
Labels
No labels