Skip to content

Commit

Permalink
Merge pull request #17983 from thedtripp/feature/addServerRangeDurati…
Browse files Browse the repository at this point in the history
…onMetrics

etcdserver: add server range duration metrics
  • Loading branch information
jmhbnz authored May 18, 2024
2 parents 8938299 + 0232686 commit 52fb28c
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG/CHANGELOG-3.6.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ See [List of metrics](https://etcd.io/docs/latest/metrics/) for all metrics per

- Add [`etcd_disk_defrag_inflight`](https://github.com/etcd-io/etcd/pull/13371).
- Add [`etcd_debugging_server_alarms`](https://github.com/etcd-io/etcd/pull/14276).
- Add [`etcd_server_range_duration_seconds`](https://github.com/etcd-io/etcd/pull/17983).

### Go
- Require [Go 1.22+](https://github.com/etcd-io/etcd/pull/16594).
Expand Down
16 changes: 16 additions & 0 deletions server/etcdserver/txn/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,29 @@ var (
Buckets: prometheus.ExponentialBuckets(0.0001, 2, 20),
},
[]string{"version", "op", "success"})
rangeSec = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "range_duration_seconds",
Help: "The latency distributions of txn.Range",

// lowest bucket start of upper bound 0.0001 sec (0.1 ms) with factor 2
// highest bucket start of 0.0001 sec * 2^19 == 52.4288 sec
Buckets: prometheus.ExponentialBuckets(0.0001, 2, 20),
},
[]string{"success"})
)

func ApplySecObserve(version, op string, success bool, latency time.Duration) {
applySec.WithLabelValues(version, op, strconv.FormatBool(success)).Observe(float64(latency.Microseconds()) / 1000000.0)
}

func RangeSecObserve(success bool, latency time.Duration) {
rangeSec.WithLabelValues(strconv.FormatBool(success)).Observe(float64(latency.Microseconds()) / 1000000.0)
}

func init() {
prometheus.MustRegister(applySec)
prometheus.MustRegister(rangeSec)
prometheus.MustRegister(slowApplies)
}
62 changes: 62 additions & 0 deletions server/etcdserver/txn/metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2022 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package txn

import (
"strings"
"testing"
"time"

"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/require"
)

func TestRangeSecObserve(t *testing.T) {
// Simulate a range operation taking 500 milliseconds.
latency := 500 * time.Millisecond
RangeSecObserve(true, latency)

// Use testutil to collect the results and check against expected value
expected := `
# HELP etcd_server_range_duration_seconds The latency distributions of txn.Range
# TYPE etcd_server_range_duration_seconds histogram
etcd_server_range_duration_seconds_bucket{success="true",le="0.0001"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0002"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0004"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0008"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0016"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0032"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0064"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0128"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0256"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.0512"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.1024"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.2048"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.4096"} 0
etcd_server_range_duration_seconds_bucket{success="true",le="0.8192"} 1
etcd_server_range_duration_seconds_bucket{success="true",le="1.6384"} 1
etcd_server_range_duration_seconds_bucket{success="true",le="3.2768"} 1
etcd_server_range_duration_seconds_bucket{success="true",le="6.5536"} 1
etcd_server_range_duration_seconds_bucket{success="true",le="13.1072"} 1
etcd_server_range_duration_seconds_bucket{success="true",le="26.2144"} 1
etcd_server_range_duration_seconds_bucket{success="true",le="52.4288"} 1
etcd_server_range_duration_seconds_bucket{success="true",le="+Inf"} 1
etcd_server_range_duration_seconds_sum{success="true"} 0.5
etcd_server_range_duration_seconds_count{success="true"} 1
`

err := testutil.CollectAndCompare(rangeSec, strings.NewReader(expected))
require.NoError(t, err, "Collected metrics did not match expected metrics: %v", err)
}
5 changes: 5 additions & 0 deletions server/etcdserver/txn/txn.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"context"
"fmt"
"sort"
"time"

"go.uber.org/zap"

Expand Down Expand Up @@ -138,6 +139,10 @@ func Range(ctx context.Context, lg *zap.Logger, kv mvcc.KV, r *pb.RangeRequest)
trace = traceutil.New("range", lg)
ctx = context.WithValue(ctx, traceutil.TraceKey{}, trace)
}
defer func(start time.Time) {
success := err == nil
RangeSecObserve(success, time.Since(start))
}(time.Now())
txnRead := kv.Read(mvcc.ConcurrentReadTxMode, trace)
defer txnRead.End()
resp, err = executeRange(ctx, lg, txnRead, r)
Expand Down
34 changes: 34 additions & 0 deletions tests/integration/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,14 @@ import (
"testing"
"time"

"github.com/stretchr/testify/require"

pb "go.etcd.io/etcd/api/v3/etcdserverpb"
"go.etcd.io/etcd/client/pkg/v3/transport"
"go.etcd.io/etcd/server/v3/storage"
"go.etcd.io/etcd/tests/v3/framework/integration"

clientv3 "go.etcd.io/etcd/client/v3"
)

// TestMetricDbSizeBoot checks that the db size metric is set on boot.
Expand Down Expand Up @@ -211,3 +215,33 @@ func TestMetricsHealth(t *testing.T) {
t.Fatalf("expected '0' from etcd_server_health_failures, got %q", hv)
}
}

func TestMetricsRangeDurationSeconds(t *testing.T) {
integration.BeforeTest(t)
clus := integration.NewCluster(t, &integration.ClusterConfig{Size: 1})
defer clus.Terminate(t)

client := clus.RandClient()

keys := []string{
"my-namespace/foobar", "my-namespace/foobar1", "namespace/foobar1"}
for _, key := range keys {
_, err := client.Put(context.Background(), key, "data")
require.NoError(t, err)
}

_, err := client.Get(context.Background(), "", clientv3.WithFromKey())
require.NoError(t, err)

rangeDurationSeconds, err := clus.Members[0].Metric("etcd_server_range_duration_seconds")
require.NoError(t, err)

require.NotEmpty(t, rangeDurationSeconds, "expected a number from etcd_server_range_duration_seconds")

rangeDuration, err := strconv.ParseFloat(rangeDurationSeconds, 64)
require.NoError(t, err, "failed to parse duration: %s", err)

maxRangeDuration := 600.0
require.GreaterOrEqual(t, rangeDuration, 0.0, "expected etcd_server_range_duration_seconds to be between 0 and %f, got %f", maxRangeDuration, rangeDuration)
require.LessOrEqual(t, rangeDuration, maxRangeDuration, "expected etcd_server_range_duration_seconds to be between 0 and %f, got %f", maxRangeDuration, rangeDuration)
}

0 comments on commit 52fb28c

Please sign in to comment.