Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PoC: experimental Trace SDK self-observability metrics #6153

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

### Added

- Add OTEL_GO_X_SELF_OBSERVABILITY environment variable to control whether self-observability metrics and traces are produced by SDKs.
- Add experimental otel.sdk.batch_span_processor.queue_size metric to the trace batch span processor.

### Fixed

- Add `ValueFromAttribute` and `KeyValueFromAttribute` in `go.opentelemetry.io/otel/log`. (#6180)
- Add `EventName` and `SetEventName` to `Record` in `go.opentelemetry.io/otel/log`. (#6187)
- Add `EventName` to `RecordFactory` in `go.opentelemetry.io/otel/log/logtest`. (#6187)
Expand Down
87 changes: 83 additions & 4 deletions exporters/otlp/otlptrace/otlptracegrpc/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ package otlptracegrpc // import "go.opentelemetry.io/otel/exporters/otlp/otlptra
import (
"context"
"errors"
"fmt"
"sync"
"sync/atomic"
"time"

"google.golang.org/genproto/googleapis/rpc/errdetails"
Expand All @@ -16,14 +18,20 @@ import (
"google.golang.org/grpc/status"

"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc/internal"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc/internal/otlpconfig"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc/internal/retry"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc/internal/x"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/metric/noop"
coltracepb "go.opentelemetry.io/proto/otlp/collector/trace/v1"
tracepb "go.opentelemetry.io/proto/otlp/trace/v1"
)

const selfObsScopeName = "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"

type client struct {
endpoint string
dialOpts []grpc.DialOption
Expand All @@ -45,6 +53,12 @@ type client struct {
conn *grpc.ClientConn
tscMu sync.RWMutex
tsc coltracepb.TraceServiceClient

spansInflightUpDownCounter metric.Int64UpDownCounter
spansExportedCounter metric.Int64Counter
baseAttributes metric.MeasurementOption
successAttributes metric.MeasurementOption
exportFailedAttributes metric.MeasurementOption
}

// Compile time check *client implements otlptrace.Client.
Expand Down Expand Up @@ -74,9 +88,52 @@ func newClient(opts ...Option) *client {
c.metadata = metadata.New(cfg.Traces.Headers)
}

c.configureSelfObservability()

return c
}

var exporterID atomic.Int64

// nextExporterID returns an identifier for this otlp grpc trace exporter,
// starting with 0 and incrementing by 1 each time it is called.
func nextExporterID() int64 {
return exporterID.Add(1) - 1
}

// configureSelfObservability configures metrics for the batch span processor.
func (c *client) configureSelfObservability() {
mp := otel.GetMeterProvider()
if !x.SelfObservability.Enabled() {
mp = metric.MeterProvider(noop.NewMeterProvider())
}
meter := mp.Meter(
selfObsScopeName,
metric.WithInstrumentationVersion(otlptrace.Version()),
)
var err error
c.spansInflightUpDownCounter, err = meter.Int64UpDownCounter("otel.sdk.span.exporter.spans_inflight",
metric.WithUnit("{span}"),
metric.WithDescription("The number of spans which were passed to the exporter, but that have not been exported yet (neither successful, nor failed)."),
)
if err != nil {
otel.Handle(err)
}
c.spansExportedCounter, err = meter.Int64Counter("otel.sdk.span.exporter.spans_exported",
metric.WithUnit("{span}"),
metric.WithDescription("The number of spans for which the export has finished, either successful or failed."),
)
if err != nil {
otel.Handle(err)
}

componentTypeAttr := attribute.String("otel.sdk.component.type", "otlp_grpc_span_exporter")
componentNameAttr := attribute.String("otel.sdk.component.name", fmt.Sprintf("otlp_grpc_span_exporter/%d", nextExporterID()))
c.baseAttributes = metric.WithAttributes(componentNameAttr, componentTypeAttr)
c.successAttributes = metric.WithAttributes(componentNameAttr, componentTypeAttr, attribute.String("error.type", ""))
c.exportFailedAttributes = metric.WithAttributes(componentNameAttr, componentTypeAttr, attribute.String("error.type", "export_failed"))
}

// Start establishes a gRPC connection to the collector.
func (c *client) Start(context.Context) error {
if c.conn == nil {
Expand Down Expand Up @@ -175,6 +232,16 @@ var errShutdown = errors.New("the client is shutdown")
// Retryable errors from the server will be handled according to any
// RetryConfig the client was created with.
func (c *client) UploadTraces(ctx context.Context, protoSpans []*tracepb.ResourceSpans) error {
var numSpans int64
for _, rs := range protoSpans {
for _, ss := range rs.GetScopeSpans() {
numSpans += int64(len(ss.GetSpans()))
}
}
c.spansInflightUpDownCounter.Add(ctx, numSpans, c.baseAttributes)
defer func() {
c.spansInflightUpDownCounter.Add(ctx, -numSpans, c.baseAttributes)
}()
// Hold a read lock to ensure a shut down initiated after this starts does
// not abandon the export. This read lock acquire has less priority than a
// write lock acquire (i.e. Stop), meaning if the client is shutting down
Expand All @@ -189,15 +256,17 @@ func (c *client) UploadTraces(ctx context.Context, protoSpans []*tracepb.Resourc
ctx, cancel := c.exportContext(ctx)
defer cancel()

return c.requestFunc(ctx, func(iCtx context.Context) error {
var partialRejected int64

err := c.requestFunc(ctx, func(iCtx context.Context) error {
resp, err := c.tsc.Export(iCtx, &coltracepb.ExportTraceServiceRequest{
ResourceSpans: protoSpans,
})
if resp != nil && resp.PartialSuccess != nil {
msg := resp.PartialSuccess.GetErrorMessage()
n := resp.PartialSuccess.GetRejectedSpans()
if n != 0 || msg != "" {
err := internal.TracePartialSuccessError(n, msg)
partialRejected = resp.PartialSuccess.GetRejectedSpans()
if partialRejected != 0 || msg != "" {
err := internal.TracePartialSuccessError(partialRejected, msg)
otel.Handle(err)
}
}
Expand All @@ -208,6 +277,16 @@ func (c *client) UploadTraces(ctx context.Context, protoSpans []*tracepb.Resourc
}
return err
})
if err == nil {
c.spansExportedCounter.Add(ctx, numSpans, c.successAttributes)
} else if partialRejected == 0 {
c.spansExportedCounter.Add(ctx, numSpans, c.exportFailedAttributes)
} else {
// partial success
c.spansExportedCounter.Add(ctx, partialRejected, c.exportFailedAttributes)
c.spansExportedCounter.Add(ctx, numSpans-partialRejected, c.successAttributes)
}
return err
}

// exportContext returns a copy of parent with an appropriate deadline and
Expand Down
2 changes: 1 addition & 1 deletion exporters/otlp/otlptrace/otlptracegrpc/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/stretchr/testify v1.10.0
go.opentelemetry.io/otel v1.34.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.34.0
go.opentelemetry.io/otel/metric v1.34.0
go.opentelemetry.io/otel/sdk v1.34.0
go.opentelemetry.io/otel/trace v1.34.0
go.opentelemetry.io/proto/otlp v1.5.0
Expand All @@ -24,7 +25,6 @@ require (
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/otel/metric v1.34.0 // indirect
golang.org/x/net v0.34.0 // indirect
golang.org/x/sys v0.29.0 // indirect
golang.org/x/text v0.21.0 // indirect
Expand Down
41 changes: 41 additions & 0 deletions exporters/otlp/otlptrace/otlptracegrpc/internal/x/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Experimental Features

The OTLP trace gRPC exporter contains features that have not yet stabilized in the OpenTelemetry specification.
These features are added to the OpenTelemetry Go SDK prior to stabilization in the specification so that users can start experimenting with them and provide feedback.

These feature may change in backwards incompatible ways as feedback is applied.
See the [Compatibility and Stability](#compatibility-and-stability) section for more information.

## Features

- [SDK Self-Observability](#sdk-self-observability)

### SDK Self-Observability

To enable experimental metric and trace instrumentation in SDKs, set the `OTEL_GO_X_SELF_OBSERVABILITY` environment variable.
If enabled, this instrumentation uses the global `TracerProvider` and `MeterProvider`.
The value set must be the case-insensitive string of `"true"` to enable the feature.
All other values are ignored.

#### Examples

Enable experimental sdk self observability.

```console
export OTEL_GO_X_SELF_OBSERVABILITY=true
```

Disable experimental sdk self observability.

```console
unset OTEL_GO_X_SELF_OBSERVABILITY
```

## Compatibility and Stability

Experimental features do not fall within the scope of the OpenTelemetry Go versioning and stability [policy](../../../../../../VERSIONING.md).
These features may be removed or modified in successive version releases, including patch versions.

When an experimental feature is promoted to a stable feature, a migration path will be included in the changelog entry of the release.
There is no guarantee that any environment variable feature flags that enabled the experimental feature will be supported by the stable version.
If they are supported, they may be accompanied with a deprecation notice stating a timeline for the removal of that support.
66 changes: 66 additions & 0 deletions exporters/otlp/otlptrace/otlptracegrpc/internal/x/x.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

// Package x contains support for OTel SDK experimental features.
//
// This package should only be used for features defined in the specification.
// It should not be used for experiments or new project ideas.
package x // import "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc/internal/x"

import (
"os"
"strings"
)

// SelfObservability is an experimental feature flag that determines if SDK
// self-observability metrics are enabled.
//
// To enable this feature set the OTEL_GO_X_SELF_OBSERVABILITY environment variable
// to the case-insensitive string value of "true" (i.e. "True" and "TRUE"
// will also enable this).
var SelfObservability = newFeature("SELF_OBSERVABILITY", func(v string) (string, bool) {
if strings.ToLower(v) == "true" {
return v, true
}
return "", false
})

// Feature is an experimental feature control flag. It provides a uniform way
// to interact with these feature flags and parse their values.
type Feature[T any] struct {
key string
parse func(v string) (T, bool)
}

func newFeature[T any](suffix string, parse func(string) (T, bool)) Feature[T] {
const envKeyRoot = "OTEL_GO_X_"
return Feature[T]{
key: envKeyRoot + suffix,
parse: parse,
}
}

// Key returns the environment variable key that needs to be set to enable the
// feature.
func (f Feature[T]) Key() string { return f.key }

// Lookup returns the user configured value for the feature and true if the
// user has enabled the feature. Otherwise, if the feature is not enabled, a
// zero-value and false are returned.
func (f Feature[T]) Lookup() (v T, ok bool) {
// https://github.com/open-telemetry/opentelemetry-specification/blob/62effed618589a0bec416a87e559c0a9d96289bb/specification/configuration/sdk-environment-variables.md#parsing-empty-value
//
// > The SDK MUST interpret an empty value of an environment variable the
// > same way as when the variable is unset.
vRaw := os.Getenv(f.key)
if vRaw == "" {
return v, ok
}
return f.parse(vRaw)
}

// Enabled returns if the feature is enabled.
func (f Feature[T]) Enabled() bool {
_, ok := f.Lookup()
return ok
}
60 changes: 60 additions & 0 deletions exporters/otlp/otlptrace/otlptracegrpc/internal/x/x_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package x

import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestResource(t *testing.T) {
const key = "OTEL_GO_X_SELF_OBSERVABILITY"
require.Equal(t, key, SelfObservability.Key())

t.Run("true", run(setenv(key, "true"), assertEnabled(SelfObservability, "true")))
t.Run("True", run(setenv(key, "True"), assertEnabled(SelfObservability, "True")))
t.Run("TRUE", run(setenv(key, "TRUE"), assertEnabled(SelfObservability, "TRUE")))
t.Run("false", run(setenv(key, "false"), assertDisabled(SelfObservability)))
t.Run("1", run(setenv(key, "1"), assertDisabled(SelfObservability)))
t.Run("empty", run(assertDisabled(SelfObservability)))
}

func run(steps ...func(*testing.T)) func(*testing.T) {
return func(t *testing.T) {
t.Helper()
for _, step := range steps {
step(t)
}
}
}

func setenv(k, v string) func(t *testing.T) { //nolint:unparam
return func(t *testing.T) { t.Setenv(k, v) }
}

func assertEnabled[T any](f Feature[T], want T) func(*testing.T) {
return func(t *testing.T) {
t.Helper()
assert.True(t, f.Enabled(), "not enabled")

v, ok := f.Lookup()
assert.True(t, ok, "Lookup state")
assert.Equal(t, want, v, "Lookup value")
}
}

func assertDisabled[T any](f Feature[T]) func(*testing.T) {
var zero T
return func(t *testing.T) {
t.Helper()

assert.False(t, f.Enabled(), "enabled")

v, ok := f.Lookup()
assert.False(t, ok, "Lookup state")
assert.Equal(t, zero, v, "Lookup value")
}
}
2 changes: 1 addition & 1 deletion sdk/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ require (
github.com/google/uuid v1.6.0
github.com/stretchr/testify v1.10.0
go.opentelemetry.io/otel v1.34.0
go.opentelemetry.io/otel/metric v1.34.0
go.opentelemetry.io/otel/trace v1.34.0
golang.org/x/sys v0.29.0
)
Expand All @@ -19,7 +20,6 @@ require (
github.com/go-logr/stdr v1.2.2 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/otel/metric v1.34.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

Expand Down
7 changes: 7 additions & 0 deletions sdk/internal/x/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ All other values are ignored.
[OpenTelemetry resource semantic conventions]: https://opentelemetry.io/docs/specs/semconv/resource/
[resource detectors]: https://pkg.go.dev/go.opentelemetry.io/otel/sdk/resource#Detector

### SDK Self-Observability

To enable experimental metric and trace instrumentation in SDKs, set the `OTEL_GO_X_SELF_OBSERVABILITY` environment variable.
If enabled, this instrumentation uses the global `TracerProvider` and `MeterProvider`.
The value set must be the case-insensitive string of `"true"` to enable the feature.
All other values are ignored.

#### Examples

Enable experimental resource semantic conventions.
Expand Down
Loading
Loading