Skip to content

GS observability event data marshalling triggers race condition #7623

@vlasebian

Description

@vlasebian

Summary

Race condition in GS happens when one of the observability events are registered. This is triggered by data that is marshalled while the event data is accessed in some other part of the code.

Steps to Reproduce

This is hard to reproduce as it's a race condition. In theory, one of the events (I am not aware of which one specifically) should be triggered and the race should happen at one point.

Current Result

A runtime error occurs when a race condition happens.

Expected Result

The race condition should not happen when a GS observability event is registered.

Relevant Logs

timestamp,message
1739386979047,"{""level"":""warn"",""msg"":""Finished unary call"",""duration"":0.0004,""error"":""error:pkg/gatewayserver:schedule (schedule)"",""error_correlation_id"":""18e9321ffbf94ea797b3b1a0b7c5ea70"",""error_name"":""schedule"",""error_namespace"":""pkg/gatewayserver"",""grpc.context.tenant_id"":""packetworx"",""grpc.method"":""ScheduleDownlink"",""grpc.service"":""ttn.lorawan.v3.NsGs"",""grpc_code"":""Aborted"",""namespace"":""grpc"",""peer.address"":""[2406:da1c:2b3:6203:bd43:6236:288a:bf51]:35064"",""peer.real_ip"":""2406:da1c:2b3:6203:bd43:6236:288a:bf51"",""request_id"":""01JKXRP0Q6CVSA95CVD7V8HFV8""}"
1739386979613,"{""level"":""info"",""msg"":""Finished unary call"",""auth.token_id"":""S2ZMCAE5NNRIYZHWGWSIFGR2B6K3PB4PBPJCZIQ"",""auth.token_type"":""APIKey"",""duration"":0.2621,""grpc.context.tenant_id"":""waarta001"",""grpc.method"":""GetGatewayConnectionStats"",""grpc.request.gateway_id"":""eui-a84041ffff27de84"",""grpc.service"":""ttn.lorawan.v3.Gs"",""namespace"":""grpc"",""peer.address"":""pipe"",""peer.real_ip"":""118.139.166.195"",""request_id"":""587e2dcc-63c3-4edc-a2ae-cb41c72af643""}"
1739386979613,"{""level"":""info"",""msg"":""Request handled"",""auth.token_id"":""S2ZMCAE5NNRIYZHWGWSIFGR2B6K3PB4PBPJCZIQ"",""auth.token_type"":""APIKey"",""duration"":0.263,""http.method"":""GET"",""http.path"":""/api/v3/gs/gateways/eui-a84041ffff27de84/connection/stats"",""http.status"":200,""namespace"":""web"",""peer.address"":""10.21.14.79:55738"",""peer.real_ip"":""118.139.166.195"",""request_id"":""587e2dcc-63c3-4edc-a2ae-cb41c72af643""}"
1739386980156,runtime error: invalid memory address or nil pointer dereference
1739386980156,goroutine 171 [running]:
1739386980156,runtime/debug.Stack()
1739386980156,	/home/runner/go/pkg/mod/golang.org/[email protected]/src/runtime/debug/stack.go:26 +0x64
1739386980156,go.thethings.network/lorawan-stack/v3/pkg/task.Func.Execute.func1()
1739386980156,	/home/runner/work/lorawan-stack/lorawan-stack/pkg/task/task.go:38 +0x7c
1739386980156,"panic({0x3d765c0?, 0x7fea220?})"
1739386980156,	/home/runner/go/pkg/mod/golang.org/[email protected]/src/runtime/panic.go:785 +0x124
1739386980156,"google.golang.org/protobuf/internal/impl.sizeUint32NoZero({0x4002461148?}, 0xcb9b0?, {0x48?})"
1739386980156,	/home/runner/go/pkg/mod/google.golang.org/[email protected]/internal/impl/codec_gen.go:1474
1739386980156,"google.golang.org/protobuf/internal/impl.(*MessageInfo).sizePointerSlow(0x4000edd9b0, {0x16088?}, {0xc8?})"
1739386980156,	/home/runner/go/pkg/mod/google.golang.org/[email protected]/internal/impl/encode.go:111 +0x228
1739386980156,"google.golang.org/protobuf/internal/impl.(*MessageInfo).sizePointer(0x4002461218?, {0x16624?}, {0x9?})"
1739386980156,	/home/runner/go/pkg/mod/google.golang.org/[email protected]/internal/impl/encode.go:60 +0x8c
1739386980156,"google.golang.org/protobuf/internal/impl.sizeMessageInfo({0x4002461248?}, 0x400542eeb0, {0x48?})"
1739386980156,	/home/runner/go/pkg/mod/google.golang.org/[email protected]/internal/impl/codec_field.go:235 +0x30
1739386980156,"google.golang.org/protobuf/internal/impl.(*MessageInfo).sizePointerSlow(0x4000edb3b8, {0x2bc648?}, {0x80?})"
1739386980156,	/home/runner/go/pkg/mod/google.golang.org/[email protected]/internal/impl/encode.go:111 +0x228
1739386980156,"google.golang.org/protobuf/internal/impl.(*MessageInfo).sizePointer(0x3ca6700?, {0x4006dc3738?}, {0xa0?})"
1739386980156,	/home/runner/go/pkg/mod/google.golang.org/[email protected]/internal/impl/encode.go:60 +0x8c
1739386980156,"google.golang.org/protobuf/internal/impl.sizeMessageInfo({0x400eca0b00?}, 0x40019bd5f0, {0x38?})"
1739386980156,	/home/runner/go/pkg/mod/google.golang.org/[email protected]/internal/impl/codec_field.go:235 +0x30
1739386980156,"google.golang.org/protobuf/internal/impl.(*MessageInfo).initOneofFieldCoders.func3({0x20000000000002f?}, 0x0?, {0x0?})"
1739386980156,	/home/runner/go/pkg/mod/google.golang.org/[email protected]/internal/impl/codec_field.go:92 +0x40
1739386980156,"google.golang.org/protobuf/internal/impl.(*MessageInfo).sizePointerSlow(0x4000e0cb70, {0x3ab9e80?}, {0xe8?})"
1739386980156,	/home/runner/go/pkg/mod/google.golang.org/[email protected]/internal/impl/encode.go:111 +0x228
1739386980156,"google.golang.org/protobuf/internal/impl.(*MessageInfo).sizePointer(0x4014f83340?, {0x400eca0b00?}, {0x18?})"
1739386980156,	/home/runner/go/pkg/mod/google.golang.org/[email protected]/internal/impl/encode.go:60 +0x8c
1739386980156,"google.golang.org/protobuf/internal/impl.(*MessageInfo).size(0x4002461478?, {{}, {0x50fcf70?, 0x40045c0900?}, 0x80?})"
1739386980156,	/home/runner/go/pkg/mod/google.golang.org/[email protected]/internal/impl/encode.go:41 +0x64
1739386980156,"google.golang.org/protobuf/proto.MarshalOptions.marshal({{}, 0x88?, 0x15?, 0x46?}, {0x0, 0x0, 0x0}, {0x50fcf70, 0x40045c0900})"
1739386980156,	/home/runner/go/pkg/mod/google.golang.org/[email protected]/proto/encode.go:185 +0xc8
1739386980156,"google.golang.org/protobuf/proto.MarshalOptions.Marshal({{}, 0x0?, 0x0?, 0x0?}, {0x506fa80, 0x40045c0900})"
1739386980156,	/home/runner/go/pkg/mod/google.golang.org/[email protected]/proto/encode.go:125 +0x68
1739386980156,"google.golang.org/protobuf/types/known/anypb.MarshalFrom(0x400754ac80, {0x506fa80, 0x40045c0900}, {{}, 0x40?, 0x88?, 0xa0?})"
1739386980156,	/home/runner/go/pkg/mod/google.golang.org/[email protected]/types/known/anypb/any.pb.go:267 +0x48
1739386980156,google.golang.org/protobuf/types/known/anypb.(*Any).MarshalFrom(...)
1739386980156,	/home/runner/go/pkg/mod/google.golang.org/[email protected]/types/known/anypb/any.pb.go:351
1739386980156,"google.golang.org/protobuf/types/known/anypb.New({0x506fa80, 0x40045c0900})"
1739386980156,	/home/runner/go/pkg/mod/google.golang.org/[email protected]/types/known/anypb/any.pb.go:252 +0x48
1739386980156,"go.thethings.network/lorawan-stack/v3/pkg/events.marshalData({0x45d72a0, 0x40045c0900})"
1739386980156,	/home/runner/work/lorawan-stack/lorawan-stack/pkg/events/events.go:181 +0x6c
1739386980156,"go.thethings.network/lorawan-stack/v3/pkg/events.Proto({0x50fc1c0, 0x4014f83600})"
1739386980156,	/home/runner/work/lorawan-stack/lorawan-stack/pkg/events/events.go:223 +0x90
1739386980156,"go.thethings.network/lorawan-stack/v3/pkg/events/redis.encodeEventData({0x50fc1c0?, 0x4014f83600?})"
1739386980156,	/home/runner/work/lorawan-stack/lorawan-stack/pkg/events/redis/codec.go:43 +0x24
1739386980156,"go.thethings.network/lorawan-stack/v3/pkg/events/redis.(*PubSubStore).storeEvent(0x4001b8ac60, {0x50d2218, 0x4004ac8d70}, {0xffff53576740, 0x4013a082a0}, {0x50fc1c0, 0x4014f83600}, 0x4002461a58)"
1739386980156,	/home/runner/work/lorawan-stack/lorawan-stack/pkg/events/redis/store.go:83 +0x4c
1739386980156,"go.thethings.network/lorawan-stack/v3/pkg/events/redis.(*PubSubStore).publish(0x4001b8ac60, {0x4011390c80, 0x8, 0x4001789c00?})"
1739386980156,	/home/runner/work/lorawan-stack/lorawan-stack/pkg/events/redis/store.go:497 +0x308
1739386980156,"go.thethings.network/lorawan-stack/v3/pkg/events.PublishFunc.Publish(0x4002461dd8?, {0x4011390c80?, 0x4?, 0x0?})"
1739386980156,	/home/runner/work/lorawan-stack/lorawan-stack/pkg/events/pubsub.go:56 +0x34
1739386980156,"go.thethings.network/lorawan-stack/v3/pkg/events/batch.(*batchPublisher).publish(0x4004c1a000, {0x50d2218, 0x4004ac8d70})"
1739386980156,	/home/runner/work/lorawan-stack/lorawan-stack/pkg/events/batch/batch.go:86 +0x4c
1739386980156,"go.thethings.network/lorawan-stack/v3/pkg/task.Func.Execute(0x4004ac9450?, {0x50d2218?, 0x4004ac8d70?}, {0x50f6760?, 0x4004b09400?})"
1739386980156,	/home/runner/work/lorawan-stack/lorawan-stack/pkg/task/task.go:47 +0x74
1739386980156,go.thethings.network/lorawan-stack/v3/pkg/task.DefaultStartTask.func1()
1739386980156,	/home/runner/work/lorawan-stack/lorawan-stack/pkg/task/task.go:167 +0x13c
1739386980156,created by go.thethings.network/lorawan-stack/v3/pkg/task.DefaultStartTask in goroutine 1
1739386980156,	/home/runner/work/lorawan-stack/lorawan-stack/pkg/task/task.go:154 +0xd8
1739386980157,runtime: bad pointer in frame google.golang.org/protobuf/internal/impl.(*MessageInfo).sizePointerSlow at 0x4001ed9180: 0x5
1739386980157,fatal error: invalid pointer found on stack
1739386980176,runtime stack:
1739386980176,"runtime.throw({0x4830a95?, 0x7c6a480?})"
1739386980176,	/home/runner/go/pkg/mod/golang.org/[email protected]/src/runtime/panic.go:1067 +0x38 fp=0x400010fca0 sp=0x400010fc70 pc=0x85928
1739386980176,"runtime.adjustpointers(0x400010fdb8?, 0x4000072200?, 0x4000001125?, {0x2c9d10?, 0x6d0e201?})"
1739386980176,	/home/runner/go/pkg/mod/golang.org/[email protected]/src/runtime/stack.go:640 +0x19c fp=0x400010fce0 sp=0x400010fca0 pc=0x671dc
1739386980176,"runtime.adjustframe(0x400010fdb8, 0x400010fe18)"
1739386980176,	/home/runner/go/pkg/mod/golang.org/[email protected]/src/runtime/stack.go:697 +0xcc fp=0x400010fd70 sp=0x400010fce0 pc=0x672ec
1739386980176,"runtime.copystack(0x4004c1c000, 0x4000)"
1739386980176,	/home/runner/go/pkg/mod/golang.org/[email protected]/src/runtime/stack.go:945 +0x2c0 fp=0x400010fe70 sp=0x400010fd70 pc=0x67a90
1739386980176,runtime.newstack()
1739386980176,	/home/runner/go/pkg/mod/golang.org/[email protected]/src/runtime/stack.go:1126 +0x36c fp=0x400010ffb0 sp=0x400010fe70 pc=0x67f3c
1739386980176,runtime.morestack()
1739386980176,	/home/runner/go/pkg/mod/golang.org/[email protected]/src/runtime/asm_arm64.s:342 +0x70 fp=0x400010ffb0 sp=0x400010ffb0 pc=0x8c560
1739386980176,goroutine 171 gp=0x4004c1c000 m=2 mp=0x40000f4708 [copystack]:
1739386980176,"internal/reflectlite.implements(0x3d9bbe0, 0x3ca3fa0)"
1739386980176,	/home/runner/go/pkg/mod/golang.org/[email protected]/src/internal/reflectlite/type.go:414 +0x4fc fp=0x4001ed83b0 sp=0x4001ed83b0 pc=0x95a7c
1739386980176,"internal/reflectlite.rtype.AssignableTo({0x10?}, {0x50ef8d0?, 0x3d9bbe0?})"
1739386980176,	/home/runner/go/pkg/mod/golang.org/[email protected]/src/internal/reflectlite/type.go:406 +0x58 fp=0x4001ed83e0 sp=0x4001ed83b0 pc=0x95528
1739386980176,"errors.as({0x5063e00?, 0x7feb510?}, {0x3a31780, 0x4012fb4d00}, {0x3a31780?, 0x4012fb4d00?, 0x48466d2?}, {0x50ef8d0, 0x3d9bbe0})"

URL

No response

Deployment

The Things Stack Cloud

The Things Stack Version

3.33.1

Client Name and Version

Other Information

No response

Proposed Fix

For now, it's really hard to pinpoint the exact place in code that triggers this because the logs don't show too much information. Some more logging should be added to find out the exact event for which the race condition happens.

Contributing

  • I can help by doing more research.
  • I can help by implementing a fix after the proposal above is approved.
  • I can help by testing the fix before it's released.

Validation

Code of Conduct

Metadata

Metadata

Assignees

Labels

bugSomething isn't workingc/gateway serverThis is related to the Gateway Server

Type

No type

Projects

No projects

Relationships

None yet

Development

No branches or pull requests

Issue actions