Skip to content

Commit 3f71ee8

Browse files
authored
producer.messages.count: Use kgo.Err as reason (#593)
Updates the `error_reason` dimension to use the value from the returned error (`kgo.Err.Message`). This allows us to classify the errors with the reason more granularly. Previously, all the produce failures that aren't due to `context.Done()` would be classified as `unknown`. --------- Signed-off-by: Marc Lopez Rubio <[email protected]>
1 parent 1451744 commit 3f71ee8

File tree

3 files changed

+55
-19
lines changed

3 files changed

+55
-19
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
GO_TEST_TIMEOUT=60s
1+
GO_TEST_TIMEOUT=180s
22
GOTESTFLAGS=
33
GO_TEST_COUNT=10
44

kafka/metrics.go

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"strings"
2626
"time"
2727

28+
"github.com/twmb/franz-go/pkg/kerr"
2829
"github.com/twmb/franz-go/pkg/kgo"
2930
"github.com/twmb/franz-go/pkg/kmsg"
3031
"go.opentelemetry.io/otel/attribute"
@@ -374,7 +375,7 @@ func (h *metricHooks) OnBrokerConnect(meta kgo.BrokerMetadata, _ time.Duration,
374375
)
375376
}
376377

377-
func (h *metricHooks) OnBrokerDisconnect(meta kgo.BrokerMetadata, _ net.Conn) {
378+
func (h *metricHooks) OnBrokerDisconnect(kgo.BrokerMetadata, net.Conn) {
378379
attrs := make([]attribute.KeyValue, 0, 2)
379380
attrs = append(attrs, semconv.MessagingSystem("kafka"))
380381
if h.namespace != "" {
@@ -387,7 +388,7 @@ func (h *metricHooks) OnBrokerDisconnect(meta kgo.BrokerMetadata, _ net.Conn) {
387388
)
388389
}
389390

390-
func (h *metricHooks) OnBrokerWrite(meta kgo.BrokerMetadata, key int16, bytesWritten int, writeWait, timeToWrite time.Duration, err error) {
391+
func (h *metricHooks) OnBrokerWrite(_ kgo.BrokerMetadata, key int16, bytesWritten int, writeWait, timeToWrite time.Duration, err error) {
391392
attrs := make([]attribute.KeyValue, 0, 3)
392393
attrs = append(attrs,
393394
semconv.MessagingSystem("kafka"),
@@ -419,7 +420,7 @@ func (h *metricHooks) OnBrokerWrite(meta kgo.BrokerMetadata, key int16, bytesWri
419420
)
420421
}
421422

422-
func (h *metricHooks) OnBrokerRead(meta kgo.BrokerMetadata, _ int16, bytesRead int, readWait, timeToRead time.Duration, err error) {
423+
func (h *metricHooks) OnBrokerRead(_ kgo.BrokerMetadata, _ int16, bytesRead int, readWait, timeToRead time.Duration, err error) {
423424
attrs := make([]attribute.KeyValue, 0, 3)
424425
attrs = append(attrs, semconv.MessagingSystem("kafka"))
425426
if h.namespace != "" {
@@ -449,7 +450,7 @@ func (h *metricHooks) OnBrokerRead(meta kgo.BrokerMetadata, _ int16, bytesRead i
449450
}
450451

451452
// HookProduceBatchWritten is called when a batch has been produced.
452-
func (h *metricHooks) OnProduceBatchWritten(meta kgo.BrokerMetadata,
453+
func (h *metricHooks) OnProduceBatchWritten(_ kgo.BrokerMetadata,
453454
topic string, partition int32, m kgo.ProduceBatchMetrics,
454455
) {
455456
attrs := make([]attribute.KeyValue, 0, 7)
@@ -491,7 +492,7 @@ func (h *metricHooks) OnProduceBatchWritten(meta kgo.BrokerMetadata,
491492

492493
// OnFetchBatchRead is called once per batch read from Kafka. Records
493494
// `consumer.messages.fetched`.
494-
func (h *metricHooks) OnFetchBatchRead(meta kgo.BrokerMetadata,
495+
func (h *metricHooks) OnFetchBatchRead(_ kgo.BrokerMetadata,
495496
topic string, partition int32, m kgo.FetchBatchMetrics,
496497
) {
497498
attrs := make([]attribute.KeyValue, 0, 6)
@@ -550,11 +551,15 @@ func (h *metricHooks) OnProduceRecordUnbuffered(r *kgo.Record, err error) {
550551
attrs = append(attrs, attribute.String("namespace", h.namespace))
551552
}
552553

553-
if errors.Is(err, context.DeadlineExceeded) {
554+
var kgoErr *kerr.Error
555+
switch {
556+
case errors.Is(err, context.DeadlineExceeded):
554557
attrs = append(attrs, attribute.String(errorReasonKey, "timeout"))
555-
} else if errors.Is(err, context.Canceled) {
558+
case errors.Is(err, context.Canceled):
556559
attrs = append(attrs, attribute.String(errorReasonKey, "canceled"))
557-
} else {
560+
case errors.As(err, &kgoErr):
561+
attrs = append(attrs, attribute.String(errorReasonKey, kgoErr.Message))
562+
default:
558563
attrs = append(attrs, attribute.String(errorReasonKey, "unknown"))
559564
}
560565

@@ -586,7 +591,7 @@ func (h *metricHooks) OnFetchRecordUnbuffered(r *kgo.Record, polled bool) {
586591
)
587592
}
588593

589-
func (h *metricHooks) OnBrokerThrottle(meta kgo.BrokerMetadata, throttleInterval time.Duration, throttledAfterResponse bool) {
594+
func (h *metricHooks) OnBrokerThrottle(_ kgo.BrokerMetadata, throttleInterval time.Duration, throttledAfterResponse bool) {
590595
attrs := make([]attribute.KeyValue, 0, 2)
591596
attrs = append(attrs, semconv.MessagingSystem("kafka"))
592597
if h.namespace != "" {

kafka/metrics_test.go

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626

2727
"github.com/stretchr/testify/assert"
2828
"github.com/stretchr/testify/require"
29+
"github.com/twmb/franz-go/pkg/kerr"
2930
"github.com/twmb/franz-go/pkg/kgo"
3031
"go.opentelemetry.io/otel/attribute"
3132
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
@@ -44,17 +45,18 @@ func TestProducerMetrics(t *testing.T) {
4445
t *testing.T,
4546
producer apmqueue.Producer,
4647
rdr sdkmetric.Reader,
48+
name string,
4749
want []metricdata.Metrics,
4850
) {
49-
topic := apmqueue.Topic("default-topic")
51+
topic := apmqueue.Topic(name)
5052
producer.Produce(ctx,
5153
apmqueue.Record{Topic: topic, Value: []byte("1")},
5254
apmqueue.Record{Topic: topic, Value: []byte("2")},
5355
apmqueue.Record{Topic: topic, Value: []byte("3")},
5456
)
5557

5658
// Fixes https://github.com/elastic/apm-queue/issues/464
57-
<-time.After(1 * time.Millisecond)
59+
<-time.After(time.Millisecond)
5860

5961
// Close the producer so records are flushed.
6062
require.NoError(t, producer.Close())
@@ -108,7 +110,7 @@ func TestProducerMetrics(t *testing.T) {
108110
}
109111
ctx, cancel := context.WithTimeout(context.Background(), 0)
110112
defer cancel()
111-
test(ctx, t, producer, rdr, want)
113+
test(ctx, t, producer, rdr, "default-topic", want)
112114
})
113115
t.Run("ContextCanceled", func(t *testing.T) {
114116
producer, rdr := setupTestProducer(t, nil)
@@ -138,11 +140,11 @@ func TestProducerMetrics(t *testing.T) {
138140
}
139141
ctx, cancel := context.WithCancel(context.Background())
140142
cancel()
141-
test(ctx, t, producer, rdr, want)
143+
test(ctx, t, producer, rdr, "default-topic", want)
142144
})
143145
t.Run("Unknown error reason", func(t *testing.T) {
144146
producer, rdr := setupTestProducer(t, nil)
145-
want := metricdata.Metrics{
147+
want := []metricdata.Metrics{{
146148
Name: "producer.messages.count",
147149
Description: "The number of messages produced",
148150
Unit: "1",
@@ -164,9 +166,38 @@ func TestProducerMetrics(t *testing.T) {
164166
},
165167
},
166168
},
167-
}
169+
}}
168170
require.NoError(t, producer.Close())
169-
test(context.Background(), t, producer, rdr, []metricdata.Metrics{want})
171+
test(context.Background(), t, producer, rdr, "default-topic", want)
172+
})
173+
t.Run("unknown topic", func(t *testing.T) {
174+
producer, rdr := setupTestProducer(t, nil)
175+
want := []metricdata.Metrics{{
176+
Name: "producer.messages.count",
177+
Description: "The number of messages produced",
178+
Unit: "1",
179+
Data: metricdata.Sum[int64]{
180+
Temporality: metricdata.CumulativeTemporality,
181+
IsMonotonic: true,
182+
DataPoints: []metricdata.DataPoint[int64]{
183+
{
184+
Value: 3,
185+
Attributes: attribute.NewSet(
186+
attribute.String("outcome", "failure"),
187+
attribute.String(errorReasonKey,
188+
kerr.UnknownTopicOrPartition.Message,
189+
),
190+
attribute.String("namespace", "name_space"),
191+
attribute.String("topic", "name_space-unknown-topic"),
192+
semconv.MessagingSystem("kafka"),
193+
semconv.MessagingDestinationName("unknown-topic"),
194+
semconv.MessagingKafkaDestinationPartition(0),
195+
),
196+
},
197+
},
198+
},
199+
}}
200+
test(context.Background(), t, producer, rdr, "unknown-topic", want)
170201
})
171202
t.Run("Produced", func(t *testing.T) {
172203
producer, rdr := setupTestProducer(t, func(topic string) attribute.KeyValue {
@@ -281,7 +312,7 @@ func TestProducerMetrics(t *testing.T) {
281312
}},
282313
},
283314
}
284-
test(context.Background(), t, producer, rdr, want)
315+
test(context.Background(), t, producer, rdr, "default-topic", want)
285316
})
286317
t.Run("ProducedWithHeaders", func(t *testing.T) {
287318
producer, rdr := setupTestProducer(t, func(topic string) attribute.KeyValue {
@@ -365,7 +396,7 @@ func TestProducerMetrics(t *testing.T) {
365396
"key": "value",
366397
"some key": "some value",
367398
})
368-
test(ctx, t, producer, rdr, want)
399+
test(ctx, t, producer, rdr, "default-topic", want)
369400
})
370401
}
371402

0 commit comments

Comments
 (0)