Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/jaeger/internal/components.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
"github.com/jaegertracing/jaeger/cmd/jaeger/internal/extension/remotesampling"
"github.com/jaegertracing/jaeger/cmd/jaeger/internal/integration/storagecleaner"
"github.com/jaegertracing/jaeger/cmd/jaeger/internal/processors/adaptivesampling"
"github.com/jaegertracing/jaeger/cmd/jaeger/internal/processors/dependencyprocessor"
)

type builders struct {
Expand Down Expand Up @@ -114,6 +115,7 @@ func (b builders) build() (otelcol.Factories, error) {
attributesprocessor.NewFactory(),
// add-ons
adaptivesampling.NewFactory(),
dependencyprocessor.NewFactory(),
)
if err != nil {
return otelcol.Factories{}, err
Expand Down
197 changes: 197 additions & 0 deletions cmd/jaeger/internal/processors/dependencyprocessor/aggregator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
// Copyright (c) 2025 The Jaeger Authors.
// SPDX-License-Identifier: Apache-2.0

package dependencyprocessor

import (
"context"
"sync"
"time"

"github.com/apache/beam/sdks/v2/go/pkg/beam"

Check failure on line 11 in cmd/jaeger/internal/processors/dependencyprocessor/aggregator.go

View workflow job for this annotation

GitHub Actions / binary-size-check

no required module provides package github.com/apache/beam/sdks/v2/go/pkg/beam; to add it:

Check failure on line 11 in cmd/jaeger/internal/processors/dependencyprocessor/aggregator.go

View workflow job for this annotation

GitHub Actions / spm (v2, jaeger)

no required module provides package github.com/apache/beam/sdks/v2/go/pkg/beam; to add it:

Check failure on line 11 in cmd/jaeger/internal/processors/dependencyprocessor/aggregator.go

View workflow job for this annotation

GitHub Actions / hotrod (docker, v2)

no required module provides package github.com/apache/beam/sdks/v2/go/pkg/beam; to add it:

Check failure on line 11 in cmd/jaeger/internal/processors/dependencyprocessor/aggregator.go

View workflow job for this annotation

GitHub Actions / all-in-one (v2)

no required module provides package github.com/apache/beam/sdks/v2/go/pkg/beam; to add it:

Check failure on line 11 in cmd/jaeger/internal/processors/dependencyprocessor/aggregator.go

View workflow job for this annotation

GitHub Actions / hotrod (k8s, v2)

no required module provides package github.com/apache/beam/sdks/v2/go/pkg/beam; to add it:

Check failure on line 11 in cmd/jaeger/internal/processors/dependencyprocessor/aggregator.go

View workflow job for this annotation

GitHub Actions / unit-tests

no required module provides package github.com/apache/beam/sdks/v2/go/pkg/beam; to add it:

Check failure on line 11 in cmd/jaeger/internal/processors/dependencyprocessor/aggregator.go

View workflow job for this annotation

GitHub Actions / unit-tests

no required module provides package github.com/apache/beam/sdks/v2/go/pkg/beam; to add it:

Check failure on line 11 in cmd/jaeger/internal/processors/dependencyprocessor/aggregator.go

View workflow job for this annotation

GitHub Actions / unit-tests

no required module provides package github.com/apache/beam/sdks/v2/go/pkg/beam; to add it:
"go.opentelemetry.io/collector/component"
"go.opentelemetry.io/collector/pdata/pcommon"
"go.opentelemetry.io/collector/pdata/ptrace"
"go.uber.org/zap"

"github.com/jaegertracing/jaeger/model"
"github.com/jaegertracing/jaeger/storage/spanstore"

Check failure on line 18 in cmd/jaeger/internal/processors/dependencyprocessor/aggregator.go

View workflow job for this annotation

GitHub Actions / binary-size-check

no required module provides package github.com/jaegertracing/jaeger/storage/spanstore; to add it:

Check failure on line 18 in cmd/jaeger/internal/processors/dependencyprocessor/aggregator.go

View workflow job for this annotation

GitHub Actions / spm (v2, jaeger)

no required module provides package github.com/jaegertracing/jaeger/storage/spanstore; to add it:

Check failure on line 18 in cmd/jaeger/internal/processors/dependencyprocessor/aggregator.go

View workflow job for this annotation

GitHub Actions / hotrod (docker, v2)

no required module provides package github.com/jaegertracing/jaeger/storage/spanstore; to add it:

Check failure on line 18 in cmd/jaeger/internal/processors/dependencyprocessor/aggregator.go

View workflow job for this annotation

GitHub Actions / all-in-one (v2)

no required module provides package github.com/jaegertracing/jaeger/storage/spanstore; to add it:

Check failure on line 18 in cmd/jaeger/internal/processors/dependencyprocessor/aggregator.go

View workflow job for this annotation

GitHub Actions / hotrod (k8s, v2)

no required module provides package github.com/jaegertracing/jaeger/storage/spanstore; to add it:

Check failure on line 18 in cmd/jaeger/internal/processors/dependencyprocessor/aggregator.go

View workflow job for this annotation

GitHub Actions / unit-tests

no required module provides package github.com/jaegertracing/jaeger/storage/spanstore; to add it:

Check failure on line 18 in cmd/jaeger/internal/processors/dependencyprocessor/aggregator.go

View workflow job for this annotation

GitHub Actions / unit-tests

no required module provides package github.com/jaegertracing/jaeger/storage/spanstore; to add it:

Check failure on line 18 in cmd/jaeger/internal/processors/dependencyprocessor/aggregator.go

View workflow job for this annotation

GitHub Actions / unit-tests

no required module provides package github.com/jaegertracing/jaeger/storage/spanstore; to add it:
)
Comment on lines +6 to +19
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The imports section is missing required packages for the Apache Beam window and stats functionality. Please add the following imports:

"github.com/apache/beam/sdks/v2/go/pkg/beam/window"
"github.com/apache/beam/sdks/v2/go/pkg/beam/stats"

These packages are referenced in the code (e.g., beam.window.IntervalWindow and beam.stats.GroupByKey) but not imported, which will cause compilation errors.

Suggested change
import (
"context"
"sync"
"time"
"github.com/apache/beam/sdks/v2/go/pkg/beam"
"go.opentelemetry.io/collector/component"
"go.opentelemetry.io/collector/pdata/pcommon"
"go.opentelemetry.io/collector/pdata/ptrace"
"go.uber.org/zap"
"github.com/jaegertracing/jaeger/model"
"github.com/jaegertracing/jaeger/storage/spanstore"
)
import (
"context"
"sync"
"time"
"github.com/apache/beam/sdks/v2/go/pkg/beam"
"github.com/apache/beam/sdks/v2/go/pkg/beam/stats"
"github.com/apache/beam/sdks/v2/go/pkg/beam/window"
"go.opentelemetry.io/collector/component"
"go.opentelemetry.io/collector/pdata/pcommon"
"go.opentelemetry.io/collector/pdata/ptrace"
"go.uber.org/zap"
"github.com/jaegertracing/jaeger/model"
"github.com/jaegertracing/jaeger/storage/spanstore"
)

Spotted by Diamond

Is this helpful? React 👍 or 👎 to let us know.


var beamInitOnce sync.Once

// dependencyAggregator processes spans and aggregates dependencies using Apache Beam
type dependencyAggregator struct {
config *Config
telset component.TelemetrySettings
dependencyWriter spanstore.Writer
inputChan chan spanEvent
closeChan chan struct{}
}

// spanEvent represents a span with its service name and timestamp
type spanEvent struct {
span ptrace.Span
serviceName string
eventTime time.Time
}

// newDependencyAggregator creates a new dependency aggregator
func newDependencyAggregator(cfg Config, telset component.TelemetrySettings, dependencyWriter spanstore.Writer) *dependencyAggregator {
beamInitOnce.Do(func() {
beam.Init()
})
return &dependencyAggregator{
config: &cfg,
telset: telset,
dependencyWriter: dependencyWriter,
inputChan: make(chan spanEvent),
closeChan: make(chan struct{}),
}
}

// Start begins the aggregation process
func (agg *dependencyAggregator) Start() {
go agg.runPipeline()
}

// HandleSpan processes a single span
func (agg *dependencyAggregator) HandleSpan(ctx context.Context, span ptrace.Span, serviceName string) {
event := spanEvent{
span: span,
serviceName: serviceName,
eventTime: time.Now(),
}
select {
case agg.inputChan <- event:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the motivation for having this done in the background instead of in the caller goroutine? Are the operations on Beam pipeline threadsafe or is this the reason for separation?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The motivation for processing spans in the background (via a separate goroutine) rather than in the caller goroutine is primarily related to performance optimization, decoupling of concerns, and ensuring thread safety when interacting with the Apache Beam pipeline

default:
agg.telset.Logger.Warn("Input channel full, dropping span")
}
}

// runPipeline runs the main processing pipeline
func (agg *dependencyAggregator) runPipeline() {
for {
var events []spanEvent
timer := time.NewTimer(agg.config.AggregationInterval)

collectLoop:
for {
select {
case event := <-agg.inputChan:
events = append(events, event)
case <-timer.C:
break collectLoop
case <-agg.closeChan:
if !timer.Stop() {
<-timer.C
}
if len(events) > 0 {
agg.processEvents(context.Background(), events)
}
return
}
}

if len(events) > 0 {
agg.processEvents(context.Background(), events)
}
}
}

// processEvents processes a batch of spans using Beam pipeline
func (agg *dependencyAggregator) processEvents(ctx context.Context, events []spanEvent) {
// Create new pipeline and scope
p, s := beam.NewPipelineWithRoot()

// Create initial PCollection with timestamps
col := beam.CreateList(s, events)

// Transform into timestamped KV pairs
timestamped := beam.ParDo(s, func(event spanEvent) beam.WindowValue {
return beam.WindowValue{
Timestamp: event.eventTime,
Windows: beam.window.IntervalWindow{Start: event.eventTime, End: event.eventTime.Add(agg.config.InactivityTimeout)},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code references beam.window.IntervalWindow, but this appears to be an incorrect import path. The correct approach would be to import the window package separately and use window.IntervalWindow. Please update the import statement to include:

"github.com/apache/beam/sdks/v2/go/pkg/beam/window"

Then modify the code to use:

Windows: window.IntervalWindow{Start: event.eventTime, End: event.eventTime.Add(agg.config.InactivityTimeout)},

This will ensure proper access to the Beam windowing functionality.

Spotted by Diamond

Is this helpful? React 👍 or 👎 to let us know.

Value: beam.KV{
Key: event.span.TraceID(),
Value: event,
},
}
}, col)

// Apply session windows
windowed := beam.WindowInto(s,
beam.window.NewSessions(agg.config.InactivityTimeout),
timestamped,
)

// Group by TraceID and aggregate dependencies
grouped := beam.stats.GroupByKey(s, windowed)

// Calculate dependencies for each trace
dependencies := beam.ParDo(s, func(key pcommon.TraceID, iter func(*spanEvent) bool) []*model.DependencyLink {
spanMap := make(map[pcommon.SpanID]spanEvent)
var event *spanEvent

// Build span map
for iter(event) {
spanMap[event.span.SpanID()] = *event
}
Comment on lines +132 to +139
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There appears to be an issue with the iterator usage in this code. The pointer variable event is declared but never initialized before being passed to the iterator function. Since iter(event) expects to populate the pointer, it should be initialized first.

Consider changing:

var event *spanEvent

// Build span map
for iter(event) {
    spanMap[event.span.SpanID()] = *event
}

To:

event := new(spanEvent)

// Build span map
for iter(event) {
    spanMap[event.span.SpanID()] = *event
}

This ensures the iterator has a valid pointer to populate during each iteration.

Suggested change
dependencies := beam.ParDo(s, func(key pcommon.TraceID, iter func(*spanEvent) bool) []*model.DependencyLink {
spanMap := make(map[pcommon.SpanID]spanEvent)
var event *spanEvent
// Build span map
for iter(event) {
spanMap[event.span.SpanID()] = *event
}
dependencies := beam.ParDo(s, func(key pcommon.TraceID, iter func(*spanEvent) bool) []*model.DependencyLink {
spanMap := make(map[pcommon.SpanID]spanEvent)
event := new(spanEvent)
// Build span map
for iter(event) {
spanMap[event.span.SpanID()] = *event
}

Spotted by Diamond

Is this helpful? React 👍 or 👎 to let us know.


// Calculate dependencies
deps := make(map[string]*model.DependencyLink)
for _, event := range spanMap {
parentSpanID := event.span.ParentSpanID()
if parentEvent, hasParent := spanMap[parentSpanID]; hasParent {
parentService := parentEvent.serviceName
childService := event.serviceName

// Create dependency link if services are different
if parentService != "" && childService != "" && parentService != childService {
depKey := parentService + "&&&" + childService
if dep, exists := deps[depKey]; exists {
dep.CallCount++
} else {
deps[depKey] = &model.DependencyLink{
Parent: parentService,
Child: childService,
CallCount: 1,
}
}
}
}
}

return depMapToSlice(deps)
}, grouped)

// Merge results from all windows
merged := beam.Flatten(s, dependencies)

// Write to storage
beam.ParDo0(s, func(deps []model.DependencyLink) {
if err := agg.dependencyWriter.WriteDependencies(ctx, time.Now(), deps); err != nil {
agg.telset.Logger.Error("Failed to write dependencies", zap.Error(err))
}
}, merged)

// Execute pipeline
if err := beam.Run(ctx, p); err != nil {
agg.telset.Logger.Error("Failed to run beam pipeline", zap.Error(err))
}
}

// Close shuts down the aggregator
func (agg *dependencyAggregator) Close() error {
close(agg.closeChan)
return nil
}

// depMapToSlice converts dependency map to slice
func depMapToSlice(deps map[string]*model.DependencyLink) []*model.DependencyLink {
result := make([]*model.DependencyLink, 0, len(deps))
for _, dep := range deps {
result = append(result, dep)
}
return result
Comment on lines +191 to +196
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a type mismatch between the return value of depMapToSlice() and what the WriteDependencies() interface method expects. The function currently returns []*model.DependencyLink (slice of pointers), but the interface method expects []model.DependencyLink (slice of values). This will cause compilation errors when trying to pass the result to the writer.

Consider modifying the function to return the correct type:

func depMapToSlice(deps map[string]*model.DependencyLink) []model.DependencyLink {
    result := make([]model.DependencyLink, 0, len(deps))
    for _, dep := range deps {
        result = append(result, *dep)  // Dereference the pointer
    }
    return result
}
Suggested change
func depMapToSlice(deps map[string]*model.DependencyLink) []*model.DependencyLink {
result := make([]*model.DependencyLink, 0, len(deps))
for _, dep := range deps {
result = append(result, dep)
}
return result
func depMapToSlice(deps map[string]*model.DependencyLink) []model.DependencyLink {
result := make([]model.DependencyLink, 0, len(deps))
for _, dep := range deps {
result = append(result, *dep)
}
return result
}

Spotted by Diamond

Is this helpful? React 👍 or 👎 to let us know.

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
// Copyright (c) 2025 The Jaeger Authors.
// SPDX-License-Identifier: Apache-2.0

package dependencyprocessor

import (
"context"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/collector/component"
"go.opentelemetry.io/collector/pdata/pcommon"
"go.opentelemetry.io/collector/pdata/ptrace"
"go.uber.org/zap"

"github.com/jaegertracing/jaeger/model"
)

// MockDependencyWriter is a mock implementation of spanstore.Writer
type MockDependencyWriter struct {
mock.Mock
}

func (m *MockDependencyWriter) WriteSpan(ctx context.Context, span *model.Span) error {
args := m.Called(ctx, span)
return args.Error(0)
}

func (m *MockDependencyWriter) WriteDependencies(ctx context.Context, ts time.Time, deps []model.DependencyLink) error {
args := m.Called(ctx, ts, deps)
return args.Error(0)
}

func TestAggregator(t *testing.T) {
// Create mock writer
mockWriter := new(MockDependencyWriter)

// Create config
cfg := Config{
AggregationInterval: 100 * time.Millisecond,
InactivityTimeout: 50 * time.Millisecond,
}

// Create logger
logger := zap.NewNop()
telemetrySettings := component.TelemetrySettings{
Logger: logger,
}

// Create aggregator
agg := newDependencyAggregator(cfg, telemetrySettings, mockWriter)

// Start aggregator
closeChan := make(chan struct{})
agg.Start()
defer close(closeChan)

// Create test spans
traceID := createTraceID(1)
parentSpanID := createSpanID(2)
childSpanID := createSpanID(3)

// Create parent span
parentSpan := createSpan(traceID, parentSpanID, pcommon.SpanID{}, "service1")

// Create child span
childSpan := createSpan(traceID, childSpanID, parentSpanID, "service2")

// Setup mock expectations
mockWriter.On("WriteDependencies", mock.Anything, mock.Anything, mock.MatchedBy(func(deps []model.DependencyLink) bool {
if len(deps) != 1 {
return false
}
dep := deps[0]
return dep.Parent == "service1" && dep.Child == "service2" && dep.CallCount == 1
})).Return(nil)

// Handle spans
ctx := context.Background()
agg.HandleSpan(ctx, parentSpan, "service1")
agg.HandleSpan(ctx, childSpan, "service2")

// Wait for processing and verify
assert.Eventually(t, func() bool {
return mockWriter.AssertExpectations(t)
}, time.Second, 10*time.Millisecond, "Dependencies were not written as expected")
}

func TestAggregatorInactivityTimeout(t *testing.T) {
mockWriter := new(MockDependencyWriter)
cfg := Config{
AggregationInterval: 1 * time.Second,
InactivityTimeout: 50 * time.Millisecond,
}

agg := newDependencyAggregator(cfg, component.TelemetrySettings{Logger: zap.NewNop()}, mockWriter)
closeChan := make(chan struct{})
agg.Start()
defer close(closeChan)

traceID := createTraceID(1)
spanID := createSpanID(1)
span := createSpan(traceID, spanID, pcommon.SpanID{}, "service1")

mockWriter.On("WriteDependencies", mock.Anything, mock.Anything, mock.Anything).Return(nil)

ctx := context.Background()
agg.HandleSpan(ctx, span, "service1")

// assert.Eventually(t, func() bool {
// agg.tracesLock.RLock()
// defer agg.tracesLock.RUnlock()
// return len(agg.traces) == 0
// }, time.Second, 10*time.Millisecond, "Trace was not cleared after inactivity timeout")
}

func TestAggregatorClose(t *testing.T) {
mockWriter := new(MockDependencyWriter)
cfg := Config{
AggregationInterval: 1 * time.Second,
InactivityTimeout: 1 * time.Second,
}

agg := newDependencyAggregator(cfg, component.TelemetrySettings{Logger: zap.NewNop()}, mockWriter)
closeChan := make(chan struct{})
agg.Start()

traceID := createTraceID(1)
spanID := createSpanID(1)
span := createSpan(traceID, spanID, pcommon.SpanID{}, "service1")

ctx := context.Background()
agg.HandleSpan(ctx, span, "service1")

mockWriter.On("WriteDependencies", mock.Anything, mock.Anything, mock.Anything).Return(nil)

close(closeChan)
err := agg.Close()
require.NoError(t, err)

// assert.Eventually(t, func() bool {
// agg.tracesLock.RLock()
// defer agg.tracesLock.RUnlock()
// return len(agg.traces) == 0
// }, time.Second, 10*time.Millisecond, "Traces were not cleared after close")
}

// Helper functions

func createTraceID(id byte) pcommon.TraceID {
var traceID [16]byte
traceID[15] = id
return pcommon.TraceID(traceID)
}

func createSpanID(id byte) pcommon.SpanID {
var spanID [8]byte
spanID[7] = id
return pcommon.SpanID(spanID)
}

func createSpan(traceID pcommon.TraceID, spanID pcommon.SpanID, parentSpanID pcommon.SpanID, serviceName string) ptrace.Span {
span := ptrace.NewSpan()
span.SetTraceID(traceID)
span.SetSpanID(spanID)
span.SetParentSpanID(parentSpanID)
// Additional span attributes could be set here if needed
return span
}
Loading
Loading