Skip to content

Commit

Permalink
test: add test to measure rate of growth of metrics
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Castilio dos Santos <[email protected]>
  • Loading branch information
alexcastilio committed Jan 10, 2025
1 parent 9449356 commit e7ac2cd
Show file tree
Hide file tree
Showing 4 changed files with 301 additions and 1 deletion.
12 changes: 12 additions & 0 deletions deploy/legacy/prometheus/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,15 @@ prometheus:
- source_labels: [__name__]
action: keep
regex: (.*)
resources:
limits:
memory: 10Gi
enableAdminAPI: true
storageSpec:
volumeClaimTemplate:
spec:
# storageClassName: gp2
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 50Gi
4 changes: 3 additions & 1 deletion test/e2e/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@ import (
)

const (
RetinaPort int = 10093
RetinaPort int = 10093
PrometheusPort int = 9090
// netObsRGtag is used to tag resources created by this test suite
NetObsRGtag = "-e2e-netobs-"
KubeSystemNamespace = "kube-system"
TestPodNamespace = "kube-system-test"
AzureAppInsightsKeyEnv = "AZURE_APP_INSIGHTS_KEY"
OutputFilePathEnv = "OUTPUT_FILEPATH"
)

var (
Expand Down
174 changes: 174 additions & 0 deletions test/e2e/framework/metrics/query-publish.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
package metrics

import (
"context"
"encoding/json"
"fmt"
"io/fs"
"log"
"os"
"sync"
"time"

"github.com/microsoft/retina/pkg/telemetry"
"github.com/microsoft/retina/test/e2e/common"
prom_client "github.com/prometheus/client_golang/api"
prom_v1 "github.com/prometheus/client_golang/api/prometheus/v1"
prom_model "github.com/prometheus/common/model"
)

type QueryAndPublish struct {
Query string
Endpoint string
AdditionalTelemetryProperty map[string]string
outputFilePath string
stop chan struct{}
wg sync.WaitGroup
telemetryClient *telemetry.TelemetryClient
appInsightsKey string
}

func (q *QueryAndPublish) Run() error {
if q.appInsightsKey != "" {
telemetry.InitAppInsights(q.appInsightsKey, q.AdditionalTelemetryProperty["retinaVersion"])

telemetryClient, err := telemetry.NewAppInsightsTelemetryClient("retina-rate-of-growth", q.AdditionalTelemetryProperty)
if err != nil {
return fmt.Errorf("error creating telemetry client: %w", err)
}

q.telemetryClient = telemetryClient
}

q.stop = make(chan struct{})
q.wg.Add(1)

go func() {

t := time.NewTicker(2 * time.Second)

// First execution
err := q.getAndPublishMetrics()
if err != nil {
log.Fatalf("error getting and publishing metrics: %v", err)
return
}

for {
select {

case <-t.C:
err := q.getAndPublishMetrics()
if err != nil {
log.Fatalf("error getting and publishing metrics: %v", err)
return
}

case <-q.stop:
q.wg.Done()
return

}
}

}()

return nil
}

func (q *QueryAndPublish) getAndPublishMetrics() error {
// ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)

Check failure on line 80 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (windows, arm64)

commentedOutCode: may want to remove commented-out code (gocritic)

Check failure on line 80 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (windows, amd64)

commentedOutCode: may want to remove commented-out code (gocritic)

Check failure on line 80 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (linux, arm64)

commentedOutCode: may want to remove commented-out code (gocritic)

Check failure on line 80 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (linux, amd64)

commentedOutCode: may want to remove commented-out code (gocritic)
// defer cancel()

client, err := prom_client.NewClient(prom_client.Config{
Address: q.Endpoint,
})
if err != nil {
return fmt.Errorf("error creating prometheus client: %w", err)
}

promApi := prom_v1.NewAPI(client)

Check failure on line 90 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (windows, arm64)

var-naming: var promApi should be promAPI (revive)

Check failure on line 90 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (windows, amd64)

var-naming: var promApi should be promAPI (revive)

Check failure on line 90 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (linux, arm64)

var-naming: var promApi should be promAPI (revive)

Check failure on line 90 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (linux, amd64)

var-naming: var promApi should be promAPI (revive)
ctx := context.TODO()

result, warnings, err := promApi.Query(ctx, q.Query, time.Now())
if err != nil {
return fmt.Errorf("error querying prometheus: %w", err)
}
if len(warnings) > 0 {
log.Println("query warnings: ", warnings)
}
type metrics map[string]string

allMetrics := []metrics{}

for _, sample := range result.(prom_model.Vector) {
instance := string(sample.Metric["instance"])
samplesScraped := sample.Value.String()

m := map[string]string{
"instance": instance,
"samplesScraped": samplesScraped,
}
allMetrics = append(allMetrics, m)
}

// Publish metrics
if q.telemetryClient != nil {
log.Println("Publishing metrics to AppInsights")
for _, metric := range allMetrics {
q.telemetryClient.TrackEvent("metrics-scraped", metric)

}
}

// Write metrics to file
if q.outputFilePath != "" {
log.Println("Writing metrics to file ", q.outputFilePath)

permissions := 0o644
file, err := os.OpenFile(q.outputFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, fs.FileMode(permissions))
if err != nil {
return fmt.Errorf("error writing to csv file: %w", err)
}
defer file.Close()

for _, m := range allMetrics {
b, err := json.Marshal(m)
if err != nil {
return fmt.Errorf("error marshalling metric: %w", err)
}
file.Write(b)

Check failure on line 140 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (windows, arm64)

Error return value of `file.Write` is not checked (errcheck)

Check failure on line 140 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (windows, amd64)

Error return value of `file.Write` is not checked (errcheck)

Check failure on line 140 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (linux, arm64)

Error return value of `file.Write` is not checked (errcheck)

Check failure on line 140 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (linux, amd64)

Error return value of `file.Write` is not checked (errcheck)
file.WriteString("\n")

Check failure on line 141 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (windows, arm64)

Error return value of `file.WriteString` is not checked (errcheck)

Check failure on line 141 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (windows, amd64)

Error return value of `file.WriteString` is not checked (errcheck)

Check failure on line 141 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (linux, arm64)

Error return value of `file.WriteString` is not checked (errcheck)

Check failure on line 141 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (linux, amd64)

Error return value of `file.WriteString` is not checked (errcheck)
}

}

return nil
}

func (q *QueryAndPublish) Stop() error {
telemetry.ShutdownAppInsights()
close(q.stop)
q.wg.Wait()
return nil
}

func (q *QueryAndPublish) Prevalidate() error {
if os.Getenv(common.AzureAppInsightsKeyEnv) == "" {
log.Println("env ", common.AzureAppInsightsKeyEnv, " not provided")
}
q.appInsightsKey = os.Getenv(common.AzureAppInsightsKeyEnv)

if _, ok := q.AdditionalTelemetryProperty["retinaVersion"]; !ok {
return fmt.Errorf("retinaVersion is required in AdditionalTelemetryProperty")

Check failure on line 163 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (windows, arm64)

do not define dynamic errors, use wrapped static errors instead: "fmt.Errorf(\"retinaVersion is required in AdditionalTelemetryProperty\")" (err113)

Check failure on line 163 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (windows, amd64)

do not define dynamic errors, use wrapped static errors instead: "fmt.Errorf(\"retinaVersion is required in AdditionalTelemetryProperty\")" (err113)

Check failure on line 163 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (linux, arm64)

do not define dynamic errors, use wrapped static errors instead: "fmt.Errorf(\"retinaVersion is required in AdditionalTelemetryProperty\")" (err113)

Check failure on line 163 in test/e2e/framework/metrics/query-publish.go

View workflow job for this annotation

GitHub Actions / Lint (linux, amd64)

do not define dynamic errors, use wrapped static errors instead: "fmt.Errorf(\"retinaVersion is required in AdditionalTelemetryProperty\")" (err113)
}

if os.Getenv(common.OutputFilePathEnv) == "" {
log.Println("Output file path not provided. Metrics will not be written to file")
return nil
}
q.outputFilePath = os.Getenv(common.OutputFilePathEnv)

log.Println("Output file path provided: ", q.outputFilePath)
return nil
}
112 changes: 112 additions & 0 deletions test/e2e/growth_metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package retina

import (
"os"
"path/filepath"
"strconv"
"testing"
"time"

"github.com/microsoft/retina/test/e2e/common"
"github.com/microsoft/retina/test/e2e/framework/azure"
"github.com/microsoft/retina/test/e2e/framework/generic"
"github.com/microsoft/retina/test/e2e/framework/helpers"
"github.com/microsoft/retina/test/e2e/framework/kubernetes"
"github.com/microsoft/retina/test/e2e/framework/metrics"
"github.com/microsoft/retina/test/e2e/framework/types"
"github.com/stretchr/testify/require"
)

func GetKubeconfig(clusterName, subscriptionId, resourceGroup, kubeConfigFilePath string) *types.Job {

Check failure on line 20 in test/e2e/growth_metrics_test.go

View workflow job for this annotation

GitHub Actions / Lint (windows, arm64)

var-naming: func parameter subscriptionId should be subscriptionID (revive)

Check failure on line 20 in test/e2e/growth_metrics_test.go

View workflow job for this annotation

GitHub Actions / Lint (windows, amd64)

var-naming: func parameter subscriptionId should be subscriptionID (revive)

Check failure on line 20 in test/e2e/growth_metrics_test.go

View workflow job for this annotation

GitHub Actions / Lint (linux, arm64)

var-naming: func parameter subscriptionId should be subscriptionID (revive)

Check failure on line 20 in test/e2e/growth_metrics_test.go

View workflow job for this annotation

GitHub Actions / Lint (linux, amd64)

var-naming: func parameter subscriptionId should be subscriptionID (revive)
job := types.NewJob("Get kubeconfig")
job.AddStep(&azure.GetAKSKubeConfig{
ClusterName: clusterName,
SubscriptionID: subscriptionId,
ResourceGroupName: resourceGroup,
Location: "why?",
KubeConfigFilePath: kubeConfigFilePath,
}, nil)
return job
}

func GrowthTest(additionalTelemetryProperty map[string]string, kubeConfigFilePath string) *types.Job {
job := types.NewJob("Growth Test")
labelAffinity := "app.kubernetes.io/instance=prometheus-kube-prometheus-prometheus"
portForwardId := "port-forward"

Check failure on line 35 in test/e2e/growth_metrics_test.go

View workflow job for this annotation

GitHub Actions / Lint (windows, arm64)

var-naming: var portForwardId should be portForwardID (revive)

Check failure on line 35 in test/e2e/growth_metrics_test.go

View workflow job for this annotation

GitHub Actions / Lint (windows, amd64)

var-naming: var portForwardId should be portForwardID (revive)

Check failure on line 35 in test/e2e/growth_metrics_test.go

View workflow job for this annotation

GitHub Actions / Lint (linux, arm64)

var-naming: var portForwardId should be portForwardID (revive)

Check failure on line 35 in test/e2e/growth_metrics_test.go

View workflow job for this annotation

GitHub Actions / Lint (linux, amd64)

var-naming: var portForwardId should be portForwardID (revive)
metricsStepId := "metrics"

Check failure on line 36 in test/e2e/growth_metrics_test.go

View workflow job for this annotation

GitHub Actions / Lint (windows, arm64)

var-naming: var metricsStepId should be metricsStepID (revive)

Check failure on line 36 in test/e2e/growth_metrics_test.go

View workflow job for this annotation

GitHub Actions / Lint (windows, amd64)

var-naming: var metricsStepId should be metricsStepID (revive)

Check failure on line 36 in test/e2e/growth_metrics_test.go

View workflow job for this annotation

GitHub Actions / Lint (linux, arm64)

var-naming: var metricsStepId should be metricsStepID (revive)

Check failure on line 36 in test/e2e/growth_metrics_test.go

View workflow job for this annotation

GitHub Actions / Lint (linux, amd64)

var-naming: var metricsStepId should be metricsStepID (revive)

job.AddStep(&kubernetes.PortForward{
KubeConfigFilePath: kubeConfigFilePath,
Namespace: common.KubeSystemNamespace,
LabelSelector: "app.kubernetes.io/instance=prometheus-kube-prometheus-prometheus",
LocalPort: strconv.Itoa(common.PrometheusPort),
RemotePort: strconv.Itoa(common.PrometheusPort),
Endpoint: "metrics",
OptionalLabelAffinity: labelAffinity,
},
&types.StepOptions{
SkipSavingParametersToJob: true,
RunInBackgroundWithID: portForwardId,
})

job.AddStep(&metrics.QueryAndPublish{
Endpoint: "http://localhost:" + strconv.Itoa(common.PrometheusPort),
Query: "scrape_samples_scraped{job=\"retina-pods\"}",
AdditionalTelemetryProperty: additionalTelemetryProperty,
},
&types.StepOptions{
SkipSavingParametersToJob: true,
RunInBackgroundWithID: metricsStepId,
})

job.AddStep(&types.Sleep{
Duration: 60 * time.Second,
}, nil)

job.AddStep(
&types.Stop{
BackgroundID: metricsStepId,
}, nil)

job.AddStep(
&types.Stop{
BackgroundID: portForwardId,
}, nil)
return job
}

func Test_GrowthOfMetrics(t *testing.T) {
ctx, cancel := helpers.Context(t)
defer cancel()

clusterName := common.ClusterNameForE2ETest(t)

subID := os.Getenv("AZURE_SUBSCRIPTION_ID")
require.NotEmpty(t, subID)

rg := os.Getenv("AZURE_RESOURCE_GROUP")
if rg == "" {
// Use the cluster name as the resource group name by default.
rg = clusterName
}

RetinaVersion := os.Getenv(generic.DefaultTagEnv)
require.NotEmpty(t, RetinaVersion)

additionalTelemetryProperty := map[string]string{}
additionalTelemetryProperty["retinaVersion"] = RetinaVersion
additionalTelemetryProperty["clusterName"] = clusterName
additionalTelemetryProperty["resourceGroup"] = rg

cwd, err := os.Getwd()
require.NoError(t, err)

rootDir := filepath.Dir(filepath.Dir(cwd))
kubeConfigFilePath := filepath.Join(rootDir, "test", "e2e", "test.pem")

getKubeconfig := types.NewRunner(t, GetKubeconfig(clusterName, subID, rg, kubeConfigFilePath))
getKubeconfig.Run(ctx)

growth := types.NewRunner(t, GrowthTest(additionalTelemetryProperty, kubeConfigFilePath))
growth.Run(ctx)
}

0 comments on commit e7ac2cd

Please sign in to comment.