Skip to content

Implement stream connection for remote write #6580

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* [FEATURE] Ruler: Add support for percentage based sharding for rulers. #6680
* [FEATURE] Ruler: Add support for group labels. #6665
* [FEATURE] Support Parquet format: Implement parquet converter service to convert a TSDB block into Parquet. #6716
* [FEATURE] Distributor/Ingester: Implemented experimental feature to use gRPC stream connection for push requests. This can be enabled by setting `-distributor.use-stream-push=true`. #6580
* [ENHANCEMENT] Query Frontend: Change to return 400 when the tenant resolving fail. #6715
* [ENHANCEMENT] Querier: Support query parameters to metadata api (/api/v1/metadata) to allow user to limit metadata to return. #6681
* [ENHANCEMENT] Ingester: Add a `cortex_ingester_active_native_histogram_series` metric to track # of active NH series. #6695
Expand Down
5 changes: 5 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -2690,6 +2690,11 @@ ha_tracker:
# CLI flag: -distributor.sign-write-requests
[sign_write_requests: <boolean> | default = false]

# EXPERIMENTAL: If enabled, distributor would use stream connection to send
# requests to ingesters.
# CLI flag: -distributor.use-stream-push
[use_stream_push: <boolean> | default = false]

ring:
kvstore:
# Backend storage to use for the ring. Supported values are: consul, etcd,
Expand Down
4 changes: 3 additions & 1 deletion docs/configuration/v1-guarantees.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,6 @@ Currently experimental features are:
- `-ingester.instance-limits.cpu-utilization`
- `-ingester.instance-limits.heap-utilization`
- `-store-gateway.instance-limits.cpu-utilization`
- `-store-gateway.instance-limits.heap-utilization`
- `-store-gateway.instance-limits.heap-utilization`
- Distributor/Ingester: Stream push connection
- Enable stream push connection between distributor and ingester by setting `-distributor.use-stream-push=true` on Distributor.
118 changes: 118 additions & 0 deletions integration/ingester_stream_push_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
//go:build requires_docker
// +build requires_docker

package integration

import (
"fmt"
"math/rand"
"strconv"
"testing"
"time"

"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/prompb"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/cortexproject/cortex/integration/e2e"
e2edb "github.com/cortexproject/cortex/integration/e2e/db"
"github.com/cortexproject/cortex/integration/e2ecortex"
)

func TestIngesterStreamPushConnection(t *testing.T) {

s, err := e2e.NewScenario(networkName)
require.NoError(t, err)
defer s.Close()

maxGlobalSeriesPerMetric := 300
maxGlobalSeriesPerTenant := 1000

flags := BlocksStorageFlags()
flags["-distributor.use-stream-push"] = "true"
flags["-distributor.replication-factor"] = "1"
flags["-distributor.shard-by-all-labels"] = "true"
flags["-distributor.sharding-strategy"] = "shuffle-sharding"
flags["-distributor.ingestion-tenant-shard-size"] = "1"
flags["-ingester.max-series-per-user"] = "0"
flags["-ingester.max-series-per-metric"] = "0"
flags["-ingester.max-global-series-per-user"] = strconv.Itoa(maxGlobalSeriesPerTenant)
flags["-ingester.max-global-series-per-metric"] = strconv.Itoa(maxGlobalSeriesPerMetric)
flags["-ingester.heartbeat-period"] = "1s"

// Start dependencies.
consul := e2edb.NewConsul()
minio := e2edb.NewMinio(9000, flags["-blocks-storage.s3.bucket-name"])
require.NoError(t, s.StartAndWaitReady(consul, minio))

// Start Cortex components.
distributor := e2ecortex.NewDistributor("distributor", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), flags, "")
ingester1 := e2ecortex.NewIngester("ingester-1", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), flags, "")
ingester2 := e2ecortex.NewIngester("ingester-2", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), flags, "")
ingester3 := e2ecortex.NewIngester("ingester-3", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), flags, "")
require.NoError(t, s.StartAndWaitReady(distributor, ingester1, ingester2, ingester3))

// Wait until distributor has updated the ring.
require.NoError(t, distributor.WaitSumMetricsWithOptions(e2e.Equals(3), []string{"cortex_ring_members"}, e2e.WithLabelMatchers(
labels.MustNewMatcher(labels.MatchEqual, "name", "ingester"),
labels.MustNewMatcher(labels.MatchEqual, "state", "ACTIVE"))))

// Wait until ingesters have heartbeated the ring after all ingesters were active,
// in order to update the number of instances. Since we have no metric, we have to
// rely on a ugly sleep.
time.Sleep(2 * time.Second)

now := time.Now()
client, err := e2ecortex.NewClient(distributor.HTTPEndpoint(), "", "", "", userID)
require.NoError(t, err)

numSeriesWithSameMetricName := 0
numSeriesTotal := 0
maxErrorsBeforeStop := 100

// Try to push as many series with the same metric name as we can.
for i, errs := 0, 0; i < 10000; i++ {
series, _ := generateSeries("test_limit_per_metric", now, prompb.Label{
Name: "cardinality",
Value: strconv.Itoa(rand.Int()),
})

res, err := client.Push(series)
require.NoError(t, err)

if res.StatusCode == 200 {
numSeriesTotal++
numSeriesWithSameMetricName++
} else if errs++; errs >= maxErrorsBeforeStop {
break
}
}

// Try to push as many series with the different metric name as we can.
for i, errs := 0, 0; i < 10000; i++ {
series, _ := generateSeries(fmt.Sprintf("test_limit_per_tenant_%d", rand.Int()), now)
res, err := client.Push(series)
require.NoError(t, err)

if res.StatusCode == 200 {
numSeriesTotal++
} else if errs++; errs >= maxErrorsBeforeStop {
break
}
}

// We expect the number of series we've been successfully pushed to be around
// the limit. Due to how the global limit implementation works (lack of centralised
// coordination) the actual number of written series could be slightly different
// than the global limit, so we allow a 10% difference.
delta := 0.1
assert.InDelta(t, maxGlobalSeriesPerMetric, numSeriesWithSameMetricName, float64(maxGlobalSeriesPerMetric)*delta)
assert.InDelta(t, maxGlobalSeriesPerTenant, numSeriesTotal, float64(maxGlobalSeriesPerTenant)*delta)

// Ensure no service-specific metrics prefix is used by the wrong service.
assertServiceMetricsPrefixes(t, Distributor, distributor)
assertServiceMetricsPrefixes(t, Ingester, ingester1)
assertServiceMetricsPrefixes(t, Ingester, ingester2)
assertServiceMetricsPrefixes(t, Ingester, ingester3)
}
Loading
Loading