From 9e4526f5acb32476e759acc59f75bfba57c0380c Mon Sep 17 00:00:00 2001 From: lvlcn-t <75443136+lvlcn-t@users.noreply.github.com> Date: Sun, 17 Nov 2024 01:12:07 +0100 Subject: [PATCH] test: add additional e2e tests for check reconfiguration * test: add additional e2e tests for check reconfiguration * fix: race conditions in all checks when reconfiguring while running checks * refactor: add check base constructor * fix: add missing json tags for dns check result struct * chore: use time ticker instead of time.After for check intervals * fix: only update check config if it has changed * chore: improve naming in checks * chore: unexpose private functions & structs * refactor: simplify check config & e2e test builder Signed-off-by: lvlcn-t <75443136+lvlcn-t@users.noreply.github.com> --- .vscode/settings.json | 3 +- pkg/checks/base.go | 45 ++- pkg/checks/base_test.go | 10 +- pkg/checks/dns/dns.go | 107 ++++--- pkg/checks/dns/dns_test.go | 33 +- pkg/checks/dns/resolver.go | 2 +- pkg/checks/health/health.go | 109 ++++--- pkg/checks/health/health_test.go | 4 +- pkg/checks/latency/latency.go | 112 ++++--- pkg/checks/latency/latency_test.go | 4 +- pkg/checks/traceroute/check.go | 125 ++++---- pkg/checks/traceroute/check_test.go | 9 +- test/{framework => }/checks.go | 61 +++- test/e2e.go | 461 ++++++++++++++++++++++++++++ test/e2e/main_test.go | 441 ++++++++++++++++---------- test/framework.go | 58 ++++ test/framework/framework.go | 69 ----- test/{framework => }/startup.go | 6 +- test/unit.go | 19 ++ 19 files changed, 1176 insertions(+), 502 deletions(-) rename test/{framework => }/checks.go (76%) create mode 100644 test/e2e.go create mode 100644 test/framework.go delete mode 100644 test/framework/framework.go rename test/{framework => }/startup.go (97%) create mode 100644 test/unit.go diff --git a/.vscode/settings.json b/.vscode/settings.json index bb2287ce..cefc2e80 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -3,6 +3,7 @@ "-race", "-cover", "-count=1", - "-timeout=120s", + "-timeout=240s", + "-v" ] } \ No newline at end of file diff --git a/pkg/checks/base.go b/pkg/checks/base.go index b3e4e61a..8f98da6a 100644 --- a/pkg/checks/base.go +++ b/pkg/checks/base.go @@ -42,42 +42,55 @@ type Check interface { // run until the context is canceled and handle problems itself. // Returning a non-nil error will cause the shutdown of the check. Run(ctx context.Context, cResult chan ResultDTO) error - // Shutdown is called once when the check is unregistered or sparrow shuts down + // Shutdown is called once when the check is unregistered or sparrow shuts down. Shutdown() - // UpdateConfig is called once when the check is registered - // This is also called while the check is running, if the remote config is updated - // This should return an error if the config is invalid + // UpdateConfig updates the configuration of the check. + // It is called when the runtime configuration is updated. + // The check should handle the update itself. + // Returns an error if the configuration is invalid. UpdateConfig(config Runtime) error - // GetConfig returns the current configuration of the check + // GetConfig returns the current configuration of the check. GetConfig() Runtime - // Name returns the name of the check + // Name returns the name of the check. Name() string - // Schema returns an openapi3.SchemaRef of the result type returned by the check + // Schema returns an openapi3.SchemaRef of the result type returned by the check. Schema() (*openapi3.SchemaRef, error) - // GetMetricCollectors allows the check to provide prometheus metric collectors + // GetMetricCollectors allows the check to provide prometheus metric collectors. GetMetricCollectors() []prometheus.Collector // RemoveLabelledMetrics allows the check to remove the prometheus metrics - // of the check whose `target` label matches the passed value + // of the check whose `target` label matches the passed value. RemoveLabelledMetrics(target string) error } // Base is a struct providing common fields and methods used by implementations of the [Check] interface. // It serves as a foundational structure that should be embedded in specific check implementations. type Base struct { - // Mutex for thread-safe access to shared resources within the check implementation - Mu sync.Mutex - // Signal channel used to notify about shutdown of a check - DoneChan chan struct{} + // Mutex for thread-safe access to shared resources within the check implementation. + Mutex sync.Mutex + // Done channel is used to notify about shutdown of a check. + Done chan struct{} + // Update is a channel used to notify about configuration updates. + Update chan struct{} // closed is a flag indicating if the check has been shut down. closed bool } +// NewBase creates a new instance of the [Base] struct. +func NewBase() Base { + return Base{ + Mutex: sync.Mutex{}, + Done: make(chan struct{}, 1), + Update: make(chan struct{}, 3), + closed: false, + } +} + // Shutdown closes the DoneChan to signal the check to stop running. func (b *Base) Shutdown() { - b.Mu.Lock() - defer b.Mu.Unlock() + b.Mutex.Lock() + defer b.Mutex.Unlock() if !b.closed { - close(b.DoneChan) + close(b.Done) b.closed = true } } diff --git a/pkg/checks/base_test.go b/pkg/checks/base_test.go index 6f2308bb..afd99d48 100644 --- a/pkg/checks/base_test.go +++ b/pkg/checks/base_test.go @@ -14,21 +14,21 @@ func TestBase_Shutdown(t *testing.T) { { name: "shutdown", b: &Base{ - DoneChan: make(chan struct{}, 1), + Done: make(chan struct{}, 1), }, }, { name: "already shutdown", b: &Base{ - DoneChan: make(chan struct{}, 1), - closed: true, + Done: make(chan struct{}, 1), + closed: true, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { if tt.b.closed { - close(tt.b.DoneChan) + close(tt.b.Done) } tt.b.Shutdown() @@ -37,7 +37,7 @@ func TestBase_Shutdown(t *testing.T) { } assert.Panics(t, func() { - tt.b.DoneChan <- struct{}{} + tt.b.Done <- struct{}{} }, "Base.DoneChan should be closed") }) } diff --git a/pkg/checks/dns/dns.go b/pkg/checks/dns/dns.go index d11109f7..87b60b8e 100644 --- a/pkg/checks/dns/dns.go +++ b/pkg/checks/dns/dns.go @@ -21,6 +21,7 @@ package dns import ( "context" "net" + "reflect" "slices" "sync" "time" @@ -33,96 +34,104 @@ import ( ) var ( - _ checks.Check = (*DNS)(nil) + _ checks.Check = (*check)(nil) _ checks.Runtime = (*Config)(nil) ) const CheckName = "dns" -// DNS is a check that resolves the names and addresses -type DNS struct { +// check is the implementation of the dns check. +// It resolves DNS names and IP addresses for a list of targets. +type check struct { checks.Base config Config metrics metrics client Resolver } -func (d *DNS) GetConfig() checks.Runtime { - d.Mu.Lock() - defer d.Mu.Unlock() - return &d.config +func (ch *check) GetConfig() checks.Runtime { + ch.Mutex.Lock() + defer ch.Mutex.Unlock() + return &ch.config } -func (d *DNS) Name() string { +func (*check) Name() string { return CheckName } // NewCheck creates a new instance of the dns check func NewCheck() checks.Check { - return &DNS{ - Base: checks.Base{ - Mu: sync.Mutex{}, - DoneChan: make(chan struct{}, 1), - }, + return &check{ + Base: checks.NewBase(), config: Config{ Retry: checks.DefaultRetry, }, metrics: newMetrics(), - client: NewResolver(), + client: newResolver(), } } // result represents the result of a single DNS check for a specific target type result struct { - Resolved []string - Error *string - Total float64 + Resolved []string `json:"resolved"` + Error *string `json:"error"` + Total float64 `json:"total"` } // Run starts the dns check -func (d *DNS) Run(ctx context.Context, cResult chan checks.ResultDTO) error { +func (ch *check) Run(ctx context.Context, cResult chan checks.ResultDTO) error { ctx, cancel := logger.NewContextWithLogger(ctx) defer cancel() log := logger.FromContext(ctx) - log.Info("Starting dns check", "interval", d.config.Interval.String()) + ticker := time.NewTicker(ch.config.Interval) + log.InfoContext(ctx, "Starting dns check", "interval", ch.config.Interval.String()) for { select { case <-ctx.Done(): - log.Error("Context canceled", "err", ctx.Err()) + log.ErrorContext(ctx, "Context canceled", "err", ctx.Err()) return ctx.Err() - case <-d.DoneChan: + case <-ch.Done: return nil - case <-time.After(d.config.Interval): - res := d.check(ctx) - + case <-ch.Update: + ch.Mutex.Lock() + ticker.Stop() + ticker = time.NewTicker(ch.config.Interval) + log.DebugContext(ctx, "Interval of dns check updated", "interval", ch.config.Interval.String()) + ch.Mutex.Unlock() + case <-ticker.C: + res := ch.check(ctx) cResult <- checks.ResultDTO{ - Name: d.Name(), + Name: ch.Name(), Result: &checks.Result{ Data: res, Timestamp: time.Now(), }, } - log.Debug("Successfully finished dns check run") + log.DebugContext(ctx, "Successfully finished dns check run") } } } -func (d *DNS) UpdateConfig(cfg checks.Runtime) error { +func (ch *check) UpdateConfig(cfg checks.Runtime) error { if c, ok := cfg.(*Config); ok { - d.Mu.Lock() - defer d.Mu.Unlock() + ch.Mutex.Lock() + defer ch.Mutex.Unlock() + if reflect.DeepEqual(ch.config, *c) { + return nil + } - for _, target := range d.config.Targets { + for _, target := range ch.config.Targets { if !slices.Contains(c.Targets, target) { - err := d.metrics.Remove(target) + err := ch.metrics.Remove(target) if err != nil { return err } } } - d.config = *c + ch.config = *c + ch.Update <- struct{}{} return nil } @@ -134,27 +143,31 @@ func (d *DNS) UpdateConfig(cfg checks.Runtime) error { // Schema provides the schema of the data that will be provided // by the dns check -func (d *DNS) Schema() (*openapi3.SchemaRef, error) { - return checks.OpenapiFromPerfData(make(map[string]result)) +func (ch *check) Schema() (*openapi3.SchemaRef, error) { + return checks.OpenapiFromPerfData(map[string]result{}) } // GetMetricCollectors returns all metric collectors of check -func (d *DNS) GetMetricCollectors() []prometheus.Collector { - return d.metrics.GetCollectors() +func (ch *check) GetMetricCollectors() []prometheus.Collector { + return ch.metrics.GetCollectors() } // RemoveLabelledMetrics removes the metrics which have the passed // target as a label -func (d *DNS) RemoveLabelledMetrics(target string) error { - return d.metrics.Remove(target) +func (ch *check) RemoveLabelledMetrics(target string) error { + return ch.metrics.Remove(target) } // check performs DNS checks for all configured targets using a custom net.Resolver. // Returns a map where each target is associated with its DNS check result. -func (d *DNS) check(ctx context.Context) map[string]result { +func (ch *check) check(ctx context.Context) map[string]result { log := logger.FromContext(ctx) log.Debug("Checking dns") - if len(d.config.Targets) == 0 { + ch.Mutex.Lock() + cfg := ch.config + ch.Mutex.Unlock() + + if len(cfg.Targets) == 0 { log.Debug("No targets defined") return map[string]result{} } @@ -163,18 +176,18 @@ func (d *DNS) check(ctx context.Context) map[string]result { var wg sync.WaitGroup results := map[string]result{} - d.client.SetDialer(&net.Dialer{ - Timeout: d.config.Timeout, + ch.client.SetDialer(&net.Dialer{ + Timeout: cfg.Timeout, }) - log.Debug("Getting dns status for each target in separate routine", "amount", len(d.config.Targets)) - for _, t := range d.config.Targets { + log.Debug("Getting dns status for each target in separate routine", "amount", len(cfg.Targets)) + for _, t := range cfg.Targets { target := t wg.Add(1) lo := log.With("target", target) getDNSRetry := helper.Retry(func(ctx context.Context) error { - res, err := getDNS(ctx, d.client, target) + res, err := getDNS(ctx, ch.client, target) mu.Lock() defer mu.Unlock() results[target] = res @@ -182,7 +195,7 @@ func (d *DNS) check(ctx context.Context) map[string]result { return err } return nil - }, d.config.Retry) + }, cfg.Retry) go func() { defer wg.Done() @@ -197,7 +210,7 @@ func (d *DNS) check(ctx context.Context) map[string]result { mu.Lock() defer mu.Unlock() - d.metrics.Set(target, results, float64(status)) + ch.metrics.Set(target, results, float64(status)) }() } wg.Wait() diff --git a/pkg/checks/dns/dns_test.go b/pkg/checks/dns/dns_test.go index 6be26d8b..02cad0b2 100644 --- a/pkg/checks/dns/dns_test.go +++ b/pkg/checks/dns/dns_test.go @@ -23,7 +23,6 @@ import ( "fmt" "net" "reflect" - "sync" "testing" "time" @@ -43,18 +42,15 @@ const ( func TestDNS_Run(t *testing.T) { tests := []struct { name string - mockSetup func() *DNS + mockSetup func() *check targets []string want checks.Result }{ { name: "success with no targets", - mockSetup: func() *DNS { - return &DNS{ - Base: checks.Base{ - Mu: sync.Mutex{}, - DoneChan: make(chan struct{}, 1), - }, + mockSetup: func() *check { + return &check{ + Base: checks.NewBase(), } }, targets: []string{}, @@ -64,7 +60,7 @@ func TestDNS_Run(t *testing.T) { }, { name: "success with one target lookup", - mockSetup: func() *DNS { + mockSetup: func() *check { c := newCommonDNS() c.client = &ResolverMock{ LookupHostFunc: func(ctx context.Context, addr string) ([]string, error) { @@ -83,7 +79,7 @@ func TestDNS_Run(t *testing.T) { }, { //nolint:dupl // normal lookup name: "success with multiple target lookups", - mockSetup: func() *DNS { + mockSetup: func() *check { c := newCommonDNS() c.client = &ResolverMock{ LookupHostFunc: func(ctx context.Context, addr string) ([]string, error) { @@ -103,7 +99,7 @@ func TestDNS_Run(t *testing.T) { }, { //nolint:dupl // reverse lookup name: "success with multiple target reverse lookups", - mockSetup: func() *DNS { + mockSetup: func() *check { c := newCommonDNS() c.client = &ResolverMock{ LookupAddrFunc: func(ctx context.Context, addr string) ([]string, error) { @@ -123,7 +119,7 @@ func TestDNS_Run(t *testing.T) { }, { name: "error - lookup failure for a target", - mockSetup: func() *DNS { + mockSetup: func() *check { c := newCommonDNS() c.client = &ResolverMock{ LookupHostFunc: func(ctx context.Context, addr string) ([]string, error) { @@ -142,7 +138,7 @@ func TestDNS_Run(t *testing.T) { }, { name: "error - timeout scenario for a target", - mockSetup: func() *DNS { + mockSetup: func() *check { c := newCommonDNS() c.client = &ResolverMock{ LookupHostFunc: func(ctx context.Context, addr string) ([]string, error) { @@ -284,7 +280,7 @@ func TestDNS_UpdateConfig(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - c := &DNS{} + c := &check{} if err := c.UpdateConfig(tt.input); (err != nil) != tt.wantErr { t.Errorf("DNS.UpdateConfig() error = %v, wantErr %v", err, tt.wantErr) @@ -305,12 +301,9 @@ func stringPointer(s string) *string { return &s } -func newCommonDNS() *DNS { - return &DNS{ - Base: checks.Base{ - Mu: sync.Mutex{}, - DoneChan: make(chan struct{}, 1), - }, +func newCommonDNS() *check { + return &check{ + Base: checks.NewBase(), metrics: newMetrics(), } } diff --git a/pkg/checks/dns/resolver.go b/pkg/checks/dns/resolver.go index 63cd1ea9..c4c47213 100644 --- a/pkg/checks/dns/resolver.go +++ b/pkg/checks/dns/resolver.go @@ -34,7 +34,7 @@ type resolver struct { *net.Resolver } -func NewResolver() Resolver { +func newResolver() Resolver { return &resolver{ Resolver: &net.Resolver{ // We need to set this so the custom dialer is used diff --git a/pkg/checks/health/health.go b/pkg/checks/health/health.go index 55d7a058..be1443bc 100644 --- a/pkg/checks/health/health.go +++ b/pkg/checks/health/health.go @@ -21,8 +21,8 @@ package health import ( "context" "fmt" - "io" "net/http" + "reflect" "slices" "sync" "time" @@ -36,7 +36,7 @@ import ( ) var ( - _ checks.Check = (*Health)(nil) + _ checks.Check = (*check)(nil) _ checks.Runtime = (*Config)(nil) stateMapping = map[int]string{ 0: "unhealthy", @@ -46,8 +46,9 @@ var ( const CheckName = "health" -// Health is a check that measures the availability of an endpoint -type Health struct { +// check is the implementation of the health check. +// It measures the availability of a list of targets. +type check struct { checks.Base config Config metrics metrics @@ -55,11 +56,8 @@ type Health struct { // NewCheck creates a new instance of the health check func NewCheck() checks.Check { - return &Health{ - Base: checks.Base{ - Mu: sync.Mutex{}, - DoneChan: make(chan struct{}, 1), - }, + return &check{ + Base: checks.NewBase(), config: Config{ Retry: checks.DefaultRetry, }, @@ -68,51 +66,61 @@ func NewCheck() checks.Check { } // Run starts the health check -func (h *Health) Run(ctx context.Context, cResult chan checks.ResultDTO) error { +func (ch *check) Run(ctx context.Context, cResult chan checks.ResultDTO) error { ctx, cancel := logger.NewContextWithLogger(ctx) defer cancel() log := logger.FromContext(ctx) - log.Info("Starting healthcheck", "interval", h.config.Interval.String()) + ticker := time.NewTicker(ch.config.Interval) + defer ticker.Stop() + log.InfoContext(ctx, "Starting health check", "interval", ch.config.Interval.String()) for { select { case <-ctx.Done(): - log.Error("Context canceled", "err", ctx.Err()) + log.ErrorContext(ctx, "Context canceled", "err", ctx.Err()) return ctx.Err() - case <-h.DoneChan: - log.Debug("Soft shut down") + case <-ch.Done: return nil - case <-time.After(h.config.Interval): - res := h.check(ctx) - + case <-ch.Update: + ch.Mutex.Lock() + ticker.Stop() + ticker = time.NewTicker(ch.config.Interval) + log.DebugContext(ctx, "Interval of health check updated", "interval", ch.config.Interval.String()) + ch.Mutex.Unlock() + case <-ticker.C: + res := ch.check(ctx) cResult <- checks.ResultDTO{ - Name: h.Name(), + Name: ch.Name(), Result: &checks.Result{ Data: res, Timestamp: time.Now(), }, } - log.Debug("Successfully finished health check run") + log.DebugContext(ctx, "Successfully finished health check run") } } } // UpdateConfig sets the configuration for the health check -func (h *Health) UpdateConfig(cfg checks.Runtime) error { +func (ch *check) UpdateConfig(cfg checks.Runtime) error { if c, ok := cfg.(*Config); ok { - h.Mu.Lock() - defer h.Mu.Unlock() + ch.Mutex.Lock() + defer ch.Mutex.Unlock() + if reflect.DeepEqual(ch.config, *c) { + return nil + } - for _, target := range h.config.Targets { + for _, target := range ch.config.Targets { if !slices.Contains(c.Targets, target) { - err := h.metrics.Remove(target) + err := ch.metrics.Remove(target) if err != nil { return err } } } - h.config = *c + ch.config = *c + ch.Update <- struct{}{} return nil } @@ -123,62 +131,66 @@ func (h *Health) UpdateConfig(cfg checks.Runtime) error { } // GetConfig returns the current configuration of the check -func (h *Health) GetConfig() checks.Runtime { - h.Mu.Lock() - defer h.Mu.Unlock() - return &h.config +func (ch *check) GetConfig() checks.Runtime { + ch.Mutex.Lock() + defer ch.Mutex.Unlock() + return &ch.config } // Name returns the name of the check -func (h *Health) Name() string { +func (*check) Name() string { return CheckName } // Schema provides the schema of the data that will be provided // by the health check -func (h *Health) Schema() (*openapi3.SchemaRef, error) { - return checks.OpenapiFromPerfData[map[string]string](map[string]string{}) +func (ch *check) Schema() (*openapi3.SchemaRef, error) { + return checks.OpenapiFromPerfData(map[string]string{}) } // GetMetricCollectors returns all metric collectors of check -func (h *Health) GetMetricCollectors() []prometheus.Collector { +func (ch *check) GetMetricCollectors() []prometheus.Collector { return []prometheus.Collector{ - h.metrics, + ch.metrics, } } // RemoveLabelledMetrics removes the metrics which have the passed // target as a label -func (h *Health) RemoveLabelledMetrics(target string) error { - return h.metrics.Remove(target) +func (ch *check) RemoveLabelledMetrics(target string) error { + return ch.metrics.Remove(target) } // check performs a health check using a retry function // to get the health status for all targets -func (h *Health) check(ctx context.Context) map[string]string { +func (ch *check) check(ctx context.Context) map[string]string { log := logger.FromContext(ctx) log.Debug("Checking health") - if len(h.config.Targets) == 0 { + ch.Mutex.Lock() + cfg := ch.config + ch.Mutex.Unlock() + + if len(cfg.Targets) == 0 { log.Debug("No targets defined") return map[string]string{} } - log.Debug("Getting health status for each target in separate routine", "amount", len(h.config.Targets)) + log.Debug("Getting health status for each target in separate routine", "amount", len(cfg.Targets)) var wg sync.WaitGroup var mu sync.Mutex results := map[string]string{} client := &http.Client{ - Timeout: h.config.Timeout, + Timeout: cfg.Timeout, } - for _, t := range h.config.Targets { + for _, t := range cfg.Targets { target := t wg.Add(1) l := log.With("target", target) getHealthRetry := helper.Retry(func(ctx context.Context) error { return getHealth(ctx, client, target) - }, h.config.Retry) + }, cfg.Retry) go func() { defer wg.Done() @@ -187,7 +199,7 @@ func (h *Health) check(ctx context.Context) map[string]string { l.Debug("Starting retry routine to get health status") if err := getHealthRetry(ctx); err != nil { state = 0 - l.Warn(fmt.Sprintf("Health check failed after %d retries", h.config.Retry.Count), "error", err) + l.Warn(fmt.Sprintf("Health check failed after %d retries", cfg.Retry.Count), "error", err) } l.Debug("Successfully got health status of target", "status", stateMapping[state]) @@ -195,7 +207,7 @@ func (h *Health) check(ctx context.Context) map[string]string { defer mu.Unlock() results[target] = stateMapping[state] - h.metrics.WithLabelValues(target).Set(float64(state)) + ch.metrics.WithLabelValues(target).Set(float64(state)) }() } @@ -216,17 +228,12 @@ func getHealth(ctx context.Context, client *http.Client, url string) error { return err } - resp, err := client.Do(req) //nolint:bodyclose // Closed in defer below + resp, err := client.Do(req) if err != nil { log.Error("Error while requesting health", "error", err) return err } - defer func(Body io.ReadCloser) { - err := Body.Close() - if err != nil { - log.Error("Failed to close response body", "error", err.Error()) - } - }(resp.Body) + defer resp.Body.Close() if resp.StatusCode != http.StatusOK { log.Warn("Health request was not ok (HTTP Status 200)", "status", resp.Status) diff --git a/pkg/checks/health/health_test.go b/pkg/checks/health/health_test.go index ee83bb9c..7c53b538 100644 --- a/pkg/checks/health/health_test.go +++ b/pkg/checks/health/health_test.go @@ -69,7 +69,7 @@ func TestHealth_UpdateConfig(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - h := &Health{ + h := &check{ metrics: newMetrics(), } @@ -227,7 +227,7 @@ func TestHealth_Check(t *testing.T) { ) } - h := &Health{ + h := &check{ config: Config{ Targets: tt.targets, Timeout: 30, diff --git a/pkg/checks/latency/latency.go b/pkg/checks/latency/latency.go index d9c0b63c..aab44e76 100644 --- a/pkg/checks/latency/latency.go +++ b/pkg/checks/latency/latency.go @@ -20,8 +20,8 @@ package latency import ( "context" - "io" "net/http" + "reflect" "slices" "sync" "time" @@ -35,14 +35,15 @@ import ( ) var ( - _ checks.Check = (*Latency)(nil) + _ checks.Check = (*check)(nil) _ checks.Runtime = (*Config)(nil) ) const CheckName = "latency" -// Latency is a check that measures the latency to an endpoint -type Latency struct { +// check is the implementation of the latency check. +// It measures the latency to a list of targets. +type check struct { checks.Base config Config metrics metrics @@ -50,11 +51,8 @@ type Latency struct { // NewCheck creates a new instance of the latency check func NewCheck() checks.Check { - return &Latency{ - Base: checks.Base{ - Mu: sync.Mutex{}, - DoneChan: make(chan struct{}, 1), - }, + return &check{ + Base: checks.NewBase(), config: Config{ Retry: checks.DefaultRetry, }, @@ -70,50 +68,61 @@ type result struct { } // Run starts the latency check -func (l *Latency) Run(ctx context.Context, cResult chan checks.ResultDTO) error { +func (ch *check) Run(ctx context.Context, cResult chan checks.ResultDTO) error { ctx, cancel := logger.NewContextWithLogger(ctx) defer cancel() log := logger.FromContext(ctx) - log.Info("Starting latency check", "interval", l.config.Interval.String()) + ticker := time.NewTicker(ch.config.Interval) + defer ticker.Stop() + log.InfoContext(ctx, "Starting latency check", "interval", ch.config.Interval.String()) for { select { case <-ctx.Done(): - log.Error("Context canceled", "err", ctx.Err()) + log.ErrorContext(ctx, "Context canceled", "err", ctx.Err()) return ctx.Err() - case <-l.DoneChan: + case <-ch.Done: return nil - case <-time.After(l.config.Interval): - res := l.check(ctx) - + case <-ch.Update: + ch.Mutex.Lock() + ticker.Stop() + ticker = time.NewTicker(ch.config.Interval) + log.DebugContext(ctx, "Interval of latency check updated", "interval", ch.config.Interval.String()) + ch.Mutex.Unlock() + case <-ticker.C: + res := ch.check(ctx) cResult <- checks.ResultDTO{ - Name: l.Name(), + Name: ch.Name(), Result: &checks.Result{ Data: res, Timestamp: time.Now(), }, } - log.Debug("Successfully finished latency check run") + log.DebugContext(ctx, "Successfully finished latency check run") } } } // UpdateConfig sets the configuration for the latency check -func (l *Latency) UpdateConfig(cfg checks.Runtime) error { +func (ch *check) UpdateConfig(cfg checks.Runtime) error { if c, ok := cfg.(*Config); ok { - l.Mu.Lock() - defer l.Mu.Unlock() + ch.Mutex.Lock() + defer ch.Mutex.Unlock() + if reflect.DeepEqual(ch.config, *c) { + return nil + } - for _, target := range l.config.Targets { + for _, target := range ch.config.Targets { if !slices.Contains(c.Targets, target) { - err := l.metrics.Remove(target) + err := ch.metrics.Remove(target) if err != nil { return err } } } - l.config = *c + ch.config = *c + ch.Update <- struct{}{} return nil } @@ -124,56 +133,60 @@ func (l *Latency) UpdateConfig(cfg checks.Runtime) error { } // GetConfig returns the current configuration of the latency Check -func (l *Latency) GetConfig() checks.Runtime { - l.Mu.Lock() - defer l.Mu.Unlock() - return &l.config +func (ch *check) GetConfig() checks.Runtime { + ch.Mutex.Lock() + defer ch.Mutex.Unlock() + return &ch.config } // Name returns the name of the check -func (l *Latency) Name() string { +func (*check) Name() string { return CheckName } // Schema provides the schema of the data that will be provided // by the latency check -func (l *Latency) Schema() (*openapi3.SchemaRef, error) { - return checks.OpenapiFromPerfData[map[string]result](make(map[string]result)) +func (ch *check) Schema() (*openapi3.SchemaRef, error) { + return checks.OpenapiFromPerfData(map[string]result{}) } // GetMetricCollectors returns all metric collectors of check -func (l *Latency) GetMetricCollectors() []prometheus.Collector { +func (ch *check) GetMetricCollectors() []prometheus.Collector { return []prometheus.Collector{ - l.metrics.totalDuration, - l.metrics.count, - l.metrics.histogram, + ch.metrics.totalDuration, + ch.metrics.count, + ch.metrics.histogram, } } // RemoveLabelledMetrics removes the metrics which have the passed target as a label -func (l *Latency) RemoveLabelledMetrics(target string) error { - return l.metrics.Remove(target) +func (ch *check) RemoveLabelledMetrics(target string) error { + return ch.metrics.Remove(target) } // check performs a latency check using a retry function // to get the latency to all targets -func (l *Latency) check(ctx context.Context) map[string]result { +func (ch *check) check(ctx context.Context) map[string]result { log := logger.FromContext(ctx) log.Debug("Checking latency") - if len(l.config.Targets) == 0 { + ch.Mutex.Lock() + cfg := ch.config + ch.Mutex.Unlock() + + if len(cfg.Targets) == 0 { log.Debug("No targets defined") return map[string]result{} } - log.Debug("Getting latency status for each target in separate routine", "amount", len(l.config.Targets)) + log.Debug("Getting latency status for each target in separate routine", "amount", len(cfg.Targets)) var mu sync.Mutex var wg sync.WaitGroup results := map[string]result{} client := &http.Client{ - Timeout: l.config.Timeout, + Timeout: cfg.Timeout, } - for _, t := range l.config.Targets { + for _, t := range cfg.Targets { target := t wg.Add(1) lo := log.With("target", target) @@ -187,7 +200,7 @@ func (l *Latency) check(ctx context.Context) map[string]result { return err } return nil - }, l.config.Retry) + }, cfg.Retry) go func() { defer wg.Done() @@ -201,9 +214,9 @@ func (l *Latency) check(ctx context.Context) map[string]result { mu.Lock() defer mu.Unlock() - l.metrics.totalDuration.WithLabelValues(target).Set(results[target].Total) - l.metrics.count.WithLabelValues(target).Inc() - l.metrics.histogram.WithLabelValues(target).Observe(results[target].Total) + ch.metrics.totalDuration.WithLabelValues(target).Set(results[target].Total) + ch.metrics.count.WithLabelValues(target).Inc() + ch.metrics.histogram.WithLabelValues(target).Observe(results[target].Total) }() } @@ -228,7 +241,7 @@ func getLatency(ctx context.Context, c *http.Client, url string) (result, error) } start := time.Now() - resp, err := c.Do(req) //nolint:bodyclose // Closed in defer below + resp, err := c.Do(req) if err != nil { log.Error("Error while checking latency", "error", err) errval := err.Error() @@ -236,12 +249,9 @@ func getLatency(ctx context.Context, c *http.Client, url string) (result, error) return res, err } end := time.Now() + defer resp.Body.Close() res.Code = resp.StatusCode - defer func(Body io.ReadCloser) { - _ = Body.Close() - }(resp.Body) - res.Total = end.Sub(start).Seconds() return res, nil } diff --git a/pkg/checks/latency/latency_test.go b/pkg/checks/latency/latency_test.go index d4155cbf..f08543ae 100644 --- a/pkg/checks/latency/latency_test.go +++ b/pkg/checks/latency/latency_test.go @@ -274,7 +274,7 @@ func TestLatency_check(t *testing.T) { } } - l := &Latency{ + l := &check{ config: Config{Targets: tt.targets, Interval: time.Second * 120, Timeout: time.Second * 1}, metrics: newMetrics(), } @@ -306,7 +306,7 @@ func TestLatency_check(t *testing.T) { } func TestLatency_UpdateConfig(t *testing.T) { - c := Latency{} + c := check{} wantCfg := Config{ Targets: []string{"http://localhost:9090"}, } diff --git a/pkg/checks/traceroute/check.go b/pkg/checks/traceroute/check.go index 5ea3c820..0b022be6 100644 --- a/pkg/checks/traceroute/check.go +++ b/pkg/checks/traceroute/check.go @@ -21,6 +21,7 @@ package traceroute import ( "context" "fmt" + "reflect" "slices" "sync" "time" @@ -36,7 +37,7 @@ import ( "go.opentelemetry.io/otel/trace" ) -var _ checks.Check = (*Traceroute)(nil) +var _ checks.Check = (*check)(nil) const CheckName = "traceroute" @@ -51,12 +52,19 @@ func (t Target) String() string { return fmt.Sprintf("%s:%d", t.Addr, t.Port) } +// check is the implementation of the traceroute check. +// It traces the path to a list of targets. +type check struct { + checks.Base + config Config + traceroute tracerouteFactory + metrics metrics + tracer trace.Tracer +} + func NewCheck() checks.Check { - c := &Traceroute{ - Base: checks.Base{ - Mu: sync.Mutex{}, - DoneChan: make(chan struct{}, 1), - }, + c := &check{ + Base: checks.NewBase(), config: Config{}, traceroute: TraceRoute, metrics: newMetrics(), @@ -65,14 +73,6 @@ func NewCheck() checks.Check { return c } -type Traceroute struct { - checks.Base - config Config - traceroute tracerouteFactory - metrics metrics - tracer trace.Tracer -} - type tracerouteConfig struct { Dest string Port int @@ -91,24 +91,32 @@ type result struct { } // Run runs the check in a loop sending results to the provided channel -func (tr *Traceroute) Run(ctx context.Context, cResult chan checks.ResultDTO) error { +func (ch *check) Run(ctx context.Context, cResult chan checks.ResultDTO) error { ctx, cancel := logger.NewContextWithLogger(ctx) defer cancel() log := logger.FromContext(ctx) - log.InfoContext(ctx, "Starting traceroute check", "interval", tr.config.Interval.String()) + ticker := time.NewTicker(ch.config.Interval) + defer ticker.Stop() + log.InfoContext(ctx, "Starting traceroute check", "interval", ch.config.Interval.String()) for { select { case <-ctx.Done(): log.ErrorContext(ctx, "Context canceled", "error", ctx.Err()) return ctx.Err() - case <-tr.DoneChan: + case <-ch.Done: return nil - case <-time.After(tr.config.Interval): - res := tr.check(ctx) - tr.metrics.MinHops(res) + case <-ch.Update: + ch.Mutex.Lock() + ticker.Stop() + ticker = time.NewTicker(ch.config.Interval) + log.DebugContext(ctx, "Interval of traceroute check updated", "interval", ch.config.Interval.String()) + ch.Mutex.Unlock() + case <-ticker.C: + res := ch.check(ctx) + ch.metrics.MinHops(res) cResult <- checks.ResultDTO{ - Name: tr.Name(), + Name: ch.Name(), Result: &checks.Result{ Data: res, Timestamp: time.Now(), @@ -120,50 +128,53 @@ func (tr *Traceroute) Run(ctx context.Context, cResult chan checks.ResultDTO) er } // GetConfig returns the current configuration of the check -func (tr *Traceroute) GetConfig() checks.Runtime { - tr.Mu.Lock() - defer tr.Mu.Unlock() - return &tr.config +func (ch *check) GetConfig() checks.Runtime { + ch.Mutex.Lock() + defer ch.Mutex.Unlock() + return &ch.config } -func (tr *Traceroute) check(ctx context.Context) map[string]result { +func (ch *check) check(ctx context.Context) map[string]result { res := make(map[string]result) log := logger.FromContext(ctx) + ch.Mutex.Lock() + cfg := ch.config + ch.Mutex.Unlock() type internalResult struct { addr string res result } - cResult := make(chan internalResult, len(tr.config.Targets)) + cResult := make(chan internalResult, len(cfg.Targets)) var wg sync.WaitGroup start := time.Now() - wg.Add(len(tr.config.Targets)) + wg.Add(len(cfg.Targets)) - for _, t := range tr.config.Targets { + for _, t := range cfg.Targets { go func(t Target) { defer wg.Done() l := log.With("target", t.String()) l.DebugContext(ctx, "Running traceroute") - c, span := tr.tracer.Start(ctx, t.String(), trace.WithAttributes( + c, span := ch.tracer.Start(ctx, t.String(), trace.WithAttributes( attribute.String("target.addr", t.Addr), attribute.Int("target.port", t.Port), - attribute.Stringer("config.interval", tr.config.Interval), - attribute.Stringer("config.timeout", tr.config.Timeout), - attribute.Int("config.max_hops", tr.config.MaxHops), - attribute.Int("config.retry.count", tr.config.Retry.Count), - attribute.Stringer("config.retry.delay", tr.config.Retry.Delay), + attribute.Stringer("config.interval", cfg.Interval), + attribute.Stringer("config.timeout", cfg.Timeout), + attribute.Int("config.max_hops", cfg.MaxHops), + attribute.Int("config.retry.count", cfg.Retry.Count), + attribute.Stringer("config.retry.delay", cfg.Retry.Delay), )) defer span.End() s := time.Now() - hops, err := tr.traceroute(c, tracerouteConfig{ + hops, err := ch.traceroute(c, tracerouteConfig{ Dest: t.Addr, Port: t.Port, - Timeout: tr.config.Timeout, - MaxHops: tr.config.MaxHops, - Rc: tr.config.Retry, + Timeout: cfg.Timeout, + MaxHops: cfg.MaxHops, + Rc: cfg.Retry, }) elapsed := time.Since(s) @@ -175,12 +186,12 @@ func (tr *Traceroute) check(ctx context.Context) map[string]result { span.SetStatus(codes.Ok, "success") } - tr.metrics.CheckDuration(t.Addr, elapsed) + ch.metrics.CheckDuration(t.Addr, elapsed) l.DebugContext(ctx, "Ran traceroute", "result", hops, "duration", elapsed) res := result{ Hops: hops, - MinHops: tr.config.MaxHops, + MinHops: cfg.MaxHops, } for ttl, hop := range hops { for _, attempt := range hop { @@ -212,24 +223,26 @@ func (tr *Traceroute) check(ctx context.Context) map[string]result { return res } -// UpdateConfig is called once when the check is registered -// This is also called while the check is running, if the remote config is updated -// This should return an error if the config is invalid -func (tr *Traceroute) UpdateConfig(cfg checks.Runtime) error { +// UpdateConfig updates the configuration of the check. +func (ch *check) UpdateConfig(cfg checks.Runtime) error { if c, ok := cfg.(*Config); ok { - tr.Mu.Lock() - defer tr.Mu.Unlock() + ch.Mutex.Lock() + defer ch.Mutex.Unlock() + if reflect.DeepEqual(ch.config, *c) { + return nil + } - for _, target := range tr.config.Targets { + for _, target := range ch.config.Targets { if !slices.Contains(c.Targets, target) { - err := tr.metrics.Remove(target.Addr) + err := ch.metrics.Remove(target.Addr) if err != nil { return err } } } - tr.config = *c + ch.config = *c + ch.Update <- struct{}{} return nil } @@ -240,22 +253,22 @@ func (tr *Traceroute) UpdateConfig(cfg checks.Runtime) error { } // Schema returns an openapi3.SchemaRef of the result type returned by the check -func (tr *Traceroute) Schema() (*openapi3.SchemaRef, error) { +func (ch *check) Schema() (*openapi3.SchemaRef, error) { return checks.OpenapiFromPerfData(map[string]result{}) } // GetMetricCollectors allows the check to provide prometheus metric collectors -func (tr *Traceroute) GetMetricCollectors() []prometheus.Collector { - return tr.metrics.List() +func (ch *check) GetMetricCollectors() []prometheus.Collector { + return ch.metrics.List() } // Name returns the name of the check -func (tr *Traceroute) Name() string { +func (*check) Name() string { return CheckName } // RemoveLabelledMetrics removes the metrics which have the passed // target as a label -func (tr *Traceroute) RemoveLabelledMetrics(target string) error { - return tr.metrics.Remove(target) +func (ch *check) RemoveLabelledMetrics(target string) error { + return ch.metrics.Remove(target) } diff --git a/pkg/checks/traceroute/check_test.go b/pkg/checks/traceroute/check_test.go index 37bea923..e597b472 100644 --- a/pkg/checks/traceroute/check_test.go +++ b/pkg/checks/traceroute/check_test.go @@ -21,7 +21,6 @@ package traceroute import ( "context" "net" - "sync" "testing" "time" @@ -33,7 +32,7 @@ import ( func TestCheck(t *testing.T) { cases := []struct { name string - c *Traceroute + c *check want map[string]result }{ { @@ -73,13 +72,13 @@ func TestCheck(t *testing.T) { } } -func newForTest(f tracerouteFactory, maxHops int, targets []string) *Traceroute { +func newForTest(f tracerouteFactory, maxHops int, targets []string) *check { t := make([]Target, len(targets)) for i, target := range targets { t[i] = Target{Addr: target} } - return &Traceroute{ - Base: checks.Base{Mu: sync.Mutex{}, DoneChan: make(chan struct{})}, + return &check{ + Base: checks.NewBase(), config: Config{Targets: t, MaxHops: maxHops}, traceroute: f, metrics: newMetrics(), diff --git a/test/framework/checks.go b/test/checks.go similarity index 76% rename from test/framework/checks.go rename to test/checks.go index e8ccca02..3bc44071 100644 --- a/test/framework/checks.go +++ b/test/checks.go @@ -1,4 +1,4 @@ -package framework +package test import ( "testing" @@ -14,8 +14,14 @@ import ( ) type CheckBuilder interface { + // For returns the name of the check. + For() string + // Check returns the check. Check(t *testing.T) checks.Check + // YAML returns the yaml representation of the check. YAML(t *testing.T) []byte + // ExpectedWaitTime returns the expected wait time for the check. + ExpectedWaitTime() time.Duration } // newCheck creates a new check with the given config. @@ -31,9 +37,10 @@ func newCheck(t *testing.T, c checks.Check, config checks.Runtime) checks.Check return c } -type marshalConfig map[string]any +// checkConfig is a map of check names to their configuration. +type checkConfig map[string]checks.Runtime -func newCheckAsYAML(t *testing.T, cfg marshalConfig) []byte { +func newCheckAsYAML(t *testing.T, cfg checkConfig) []byte { t.Helper() out, err := yaml.Marshal(cfg) if err != nil { @@ -85,7 +92,17 @@ func (b *healthCheckBuilder) Check(t *testing.T) checks.Check { // YAML returns the yaml representation of the health check. func (b *healthCheckBuilder) YAML(t *testing.T) []byte { t.Helper() - return newCheckAsYAML(t, marshalConfig{b.cfg.For(): b.cfg}) + return newCheckAsYAML(t, checkConfig{b.cfg.For(): &b.cfg}) +} + +// ExpectedWaitTime returns the expected wait time for the health check. +func (b *healthCheckBuilder) ExpectedWaitTime() time.Duration { + return b.cfg.Interval + b.cfg.Timeout + time.Duration(b.cfg.Retry.Count)*b.cfg.Retry.Delay +} + +// For returns the name of the check. +func (b *healthCheckBuilder) For() string { + return b.cfg.For() } var _ CheckBuilder = (*latencyConfigBuilder)(nil) @@ -130,7 +147,17 @@ func (b *latencyConfigBuilder) Check(t *testing.T) checks.Check { // YAML returns the yaml representation of the latency check. func (b *latencyConfigBuilder) YAML(t *testing.T) []byte { t.Helper() - return newCheckAsYAML(t, marshalConfig{b.cfg.For(): b.cfg}) + return newCheckAsYAML(t, checkConfig{b.cfg.For(): &b.cfg}) +} + +// For returns the name of the check. +func (b *latencyConfigBuilder) For() string { + return b.cfg.For() +} + +// ExpectedWaitTime returns the expected wait time for the health check. +func (b *latencyConfigBuilder) ExpectedWaitTime() time.Duration { + return b.cfg.Interval + b.cfg.Timeout + time.Duration(b.cfg.Retry.Count)*b.cfg.Retry.Delay } var _ CheckBuilder = (*dnsConfigBuilder)(nil) @@ -175,7 +202,17 @@ func (b *dnsConfigBuilder) Check(t *testing.T) checks.Check { // YAML returns the yaml representation of the dns check. func (b *dnsConfigBuilder) YAML(t *testing.T) []byte { t.Helper() - return newCheckAsYAML(t, marshalConfig{b.cfg.For(): b.cfg}) + return newCheckAsYAML(t, checkConfig{b.cfg.For(): &b.cfg}) +} + +// ExpectedWaitTime returns the expected wait time for the health check. +func (b *dnsConfigBuilder) ExpectedWaitTime() time.Duration { + return b.cfg.Interval + b.cfg.Timeout + time.Duration(b.cfg.Retry.Count)*b.cfg.Retry.Delay +} + +// For returns the name of the check. +func (b *dnsConfigBuilder) For() string { + return b.cfg.For() } var _ CheckBuilder = (*tracerouteConfigBuilder)(nil) @@ -226,5 +263,15 @@ func (b *tracerouteConfigBuilder) Check(t *testing.T) checks.Check { // YAML returns the yaml representation of the traceroute check. func (b *tracerouteConfigBuilder) YAML(t *testing.T) []byte { t.Helper() - return newCheckAsYAML(t, marshalConfig{b.cfg.For(): b.cfg}) + return newCheckAsYAML(t, checkConfig{b.cfg.For(): &b.cfg}) +} + +// ExpectedWaitTime returns the expected wait time for the health check. +func (b *tracerouteConfigBuilder) ExpectedWaitTime() time.Duration { + return b.cfg.Interval + b.cfg.Timeout + time.Duration(b.cfg.Retry.Count)*b.cfg.Retry.Delay +} + +// For returns the name of the check. +func (b *tracerouteConfigBuilder) For() string { + return b.cfg.For() } diff --git a/test/e2e.go b/test/e2e.go new file mode 100644 index 00000000..09754e26 --- /dev/null +++ b/test/e2e.go @@ -0,0 +1,461 @@ +package test + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net" + "net/http" + "net/url" + "os" + "path/filepath" + "reflect" + "sync" + "testing" + "time" + + "github.com/caas-team/sparrow/pkg/checks" + "github.com/caas-team/sparrow/pkg/config" + "github.com/caas-team/sparrow/pkg/sparrow" + "github.com/getkin/kin-openapi/openapi3" + "github.com/getkin/kin-openapi/routers" + "github.com/getkin/kin-openapi/routers/gorillamux" +) + +var _ Runner = (*E2E)(nil) + +// E2E is an end-to-end test. +type E2E struct { + t *testing.T + config config.Config + sparrow *sparrow.Sparrow + checks map[string]CheckBuilder + buf bytes.Buffer + path string + mu sync.Mutex + running bool +} + +// WithConfigFile sets the path to the config file. +func (t *E2E) WithConfigFile(path string) *E2E { + t.path = path + return t +} + +// WithChecks sets the checks in the test. +func (t *E2E) WithChecks(builders ...CheckBuilder) *E2E { + for _, b := range builders { + t.checks[b.For()] = b + t.buf.Write(b.YAML(t.t)) + } + return t +} + +// UpdateChecks updates the checks of the test. +func (t *E2E) UpdateChecks(builders ...CheckBuilder) *E2E { + t.checks = map[string]CheckBuilder{} + t.buf.Reset() + for _, b := range builders { + t.checks[b.For()] = b + t.buf.Write(b.YAML(t.t)) + } + + err := t.writeCheckConfig() + if err != nil { + t.t.Fatalf("Failed to write check config: %v", err) + } + + return t +} + +// Run runs the test. +// Runs indefinitely until the context is canceled. +func (t *E2E) Run(ctx context.Context) error { + if t.isRunning() { + t.t.Fatal("E2E.Run must be called once") + } + + if t.path == "" { + t.path = "testdata/checks.yaml" + } + + err := t.writeCheckConfig() + if err != nil { + t.t.Fatalf("Failed to write check config: %v", err) + } + + t.mu.Lock() + t.running = true + t.mu.Unlock() + return t.sparrow.Run(ctx) +} + +// AwaitStartup waits for the provided URL to be ready. +// +// Must be called after the e2e test started with [E2E.Run]. +func (t *E2E) AwaitStartup(u string, failureTimeout time.Duration) *E2E { + t.t.Helper() + // To ensure the goroutine is started before we are checking if the test is running. + const initialDelay = 100 * time.Millisecond + <-time.After(initialDelay) + if !t.isRunning() { + t.t.Fatal("E2E.AwaitStartup must be called after E2E.Run") + } + + const retryInterval = 100 * time.Millisecond + start := time.Now() + deadline := start.Add(failureTimeout) + + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, u, http.NoBody) + if err != nil { + t.t.Fatalf("Failed to create request: %v", err) + return t + } + + for { + resp, err := http.DefaultClient.Do(req) + if err == nil && resp.StatusCode == http.StatusOK { + t.t.Logf("%s is ready after %v", u, time.Since(start)) + resp.Body.Close() + return t + } + if time.Now().After(deadline) { + t.t.Errorf("%s is not ready [%s (%d)] after %v: %v", u, http.StatusText(resp.StatusCode), resp.StatusCode, failureTimeout, err) + return t + } + <-time.After(retryInterval) + } +} + +// AwaitLoader waits for the loader to reload the configuration. +// +// Must be called after the e2e test started with [E2E.Run]. +func (t *E2E) AwaitLoader() *E2E { + t.t.Helper() + if !t.isRunning() { + t.t.Fatal("E2E.AwaitLoader must be called after E2E.Run") + } + + t.t.Logf("Waiting %s for loader to reload configuration", t.config.Loader.Interval.String()) + <-time.After(t.config.Loader.Interval) + return t +} + +// AwaitChecks waits for all checks to be executed before proceeding. +// +// Must be called after the e2e test started with [E2E.Run]. +func (t *E2E) AwaitChecks() *E2E { + t.t.Helper() + if !t.isRunning() { + t.t.Fatal("E2E.AwaitReadiness must be called after E2E.Run") + } + + wait := 5 * time.Second + for _, check := range t.checks { + wait = max(wait, check.ExpectedWaitTime()) + } + t.t.Logf("Waiting %s for checks to be executed", wait.String()) + <-time.After(wait) + return t +} + +// writeCheckConfig writes the check config to a file at the provided path. +func (t *E2E) writeCheckConfig() error { + const fileMode = 0o755 + err := os.MkdirAll(filepath.Dir(t.path), fileMode) + if err != nil { + return fmt.Errorf("failed to create %q: %w", filepath.Dir(t.path), err) + } + + err = os.WriteFile(t.path, t.buf.Bytes(), fileMode) + if err != nil { + return fmt.Errorf("failed to write %q: %w", t.path, err) + } + return nil +} + +// isRunning returns true if the test is running. +func (t *E2E) isRunning() bool { + t.mu.Lock() + defer t.mu.Unlock() + return t.running +} + +// e2eHttpAsserter is an HTTP asserter for end-to-end tests. +type e2eHttpAsserter struct { + e2e *E2E + url string + response *e2eResponseAsserter + schema *openapi3.T + router routers.Router +} + +type e2eResponseAsserter struct { + want any + asserter func(r *http.Response) error +} + +// HttpAssertion creates a new HTTP assertion for the given URL. +func (t *E2E) HttpAssertion(u string) *e2eHttpAsserter { + return &e2eHttpAsserter{e2e: t, url: u} +} + +// Assert asserts the status code and optional validations against the response. +// Optional validations must be set before calling this method. +// +// Must be called after the e2e test started with [E2E.Run]. +func (a *e2eHttpAsserter) Assert(status int) { + a.e2e.t.Helper() + if !a.e2e.isRunning() { + a.e2e.t.Fatal("e2eHttpAsserter.Assert must be called after E2E.Run") + } + + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, a.url, http.NoBody) + if err != nil { + a.e2e.t.Fatalf("Failed to create request: %v", err) + return + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + a.e2e.t.Errorf("Failed to get %s: %v", a.url, err) + return + } + defer resp.Body.Close() + + if resp.StatusCode != status { + a.e2e.t.Errorf("Want status code %d for %s, got %d", status, a.url, resp.StatusCode) + return + } + a.e2e.t.Logf("Got status code %d for %s", resp.StatusCode, a.url) + + if status == http.StatusOK { + if a.schema != nil && a.router != nil { + if err = a.assertSchema(req, resp); err != nil { + a.e2e.t.Errorf("Response from %q does not match schema: %v", a.url, err) + return + } + } + + if a.response != nil { + err := a.response.asserter(resp) + if err != nil { + a.e2e.t.Errorf("Failed to assert response: %v", err) + } + } + } +} + +// WithSchema fetches the OpenAPI schema and validates the response against it. +func (a *e2eHttpAsserter) WithSchema() *e2eHttpAsserter { + a.e2e.t.Helper() + schema, err := a.fetchSchema() + if err != nil { + a.e2e.t.Fatalf("Failed to fetch OpenAPI schema: %v", err) + } + + router, err := gorillamux.NewRouter(schema) + if err != nil { + a.e2e.t.Fatalf("Failed to create router from OpenAPI schema: %v", err) + } + + a.schema = schema + a.router = router + return a +} + +// WithResult sets the expected result for the response. +// The result is validated against the response body. +func (a *e2eHttpAsserter) WithCheckResult(r checks.Result) *e2eHttpAsserter { + a.e2e.t.Helper() + a.response = &e2eResponseAsserter{ + want: r, + asserter: a.assertCheckResponse, + } + return a +} + +// fetchSchema fetches the OpenAPI schema from the server. +func (a *e2eHttpAsserter) fetchSchema() (*openapi3.T, error) { + ctx := context.Background() + u, err := url.Parse(a.url) + if err != nil { + return nil, fmt.Errorf("failed to parse URL: %w", err) + } + u.Path = "/openapi" + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), http.NoBody) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to GET OpenAPI schema: %w", err) + } + defer resp.Body.Close() + + data, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read OpenAPI schema: %w", err) + } + + loader := openapi3.NewLoader() + schema, err := loader.LoadFromData(data) + if err != nil { + return nil, fmt.Errorf("failed to load OpenAPI schema: %w", err) + } + + if err = schema.Validate(ctx); err != nil { + return nil, fmt.Errorf("OpenAPI schema validation error: %w", err) + } + + return schema, nil +} + +// assertSchema asserts the response body against the OpenAPI schema. +func (a *e2eHttpAsserter) assertSchema(req *http.Request, resp *http.Response) error { + route, _, err := a.router.FindRoute(req) + if err != nil { + return fmt.Errorf("failed to find route: %w", err) + } + + data, err := io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("failed to read response body: %w", err) + } + resp.Body.Close() + resp.Body = io.NopCloser(bytes.NewReader(data)) + + responseRef := route.Operation.Responses.Status(resp.StatusCode) + if responseRef == nil || responseRef.Value == nil { + return fmt.Errorf("no response defined in OpenAPI schema for status code %d", resp.StatusCode) + } + + mediaType := responseRef.Value.Content.Get("application/json") + if mediaType == nil { + return errors.New("no media type defined in OpenAPI schema for Content-Type 'application/json'") + } + + var body any + if err = json.Unmarshal(data, &body); err != nil { + return fmt.Errorf("failed to unmarshal response body: %w", err) + } + + // Validate the response body against the schema + err = mediaType.Schema.Value.VisitJSON(body) + if err != nil { + return fmt.Errorf("response body does not match schema: %w", err) + } + + return nil +} + +// assertCheckResponse asserts the response body against the expected check result. +func (a *e2eHttpAsserter) assertCheckResponse(resp *http.Response) error { + want, ok := a.response.want.(checks.Result) + if !ok { + a.e2e.t.Fatalf("Invalid response type: %T", a.response.want) + } + + var res checks.Result + if err := json.NewDecoder(resp.Body).Decode(&res); err != nil { + a.e2e.t.Errorf("Failed to decode response body: %v", err) + } + + wantData := want.Data.(map[string]any) + gotData := res.Data.(map[string]any) + assertMapEqual(a.e2e.t, wantData, gotData) + + const deltaTimeThreshold = 5 * time.Minute + if time.Since(res.Timestamp) > deltaTimeThreshold { + a.e2e.t.Errorf("Response timestamp is not recent: %v", res.Timestamp) + } + + return nil +} + +// assertMapEqual asserts the equality of the want and got maps. +// Fails the test if the maps are not equal. +func assertMapEqual(t *testing.T, want, got map[string]any) { + t.Helper() + if len(want) != len(got) { + t.Errorf("Want %d keys (%v), got %d keys (%v)", len(want), want, len(got), got) + } + + for k, w := range want { + g, ok := got[k] + if !ok { + t.Errorf("Missing key %q", k) + } + + if err := assertValueEqual(t, w, g); err != nil { + t.Errorf("got[%q]: %v", k, err) + } + } +} + +// assertValueEqual asserts the equality of the want and got values. +// For values that cannot be compared directly, it uses a type-specific comparison. +// e.g. IP addresses, timestamps, etc. +func assertValueEqual(t *testing.T, want, got any) error { + switch w := want.(type) { + case map[string]any: + gm, ok := got.(map[string]any) + if !ok { + return fmt.Errorf("%v (%T), want %v (%T)", got, got, w, w) + } + assertMapEqual(t, w, gm) + return nil + case time.Time, float32, float64: + // Timestamps and floating-point numbers are time-sensitive and are never equal. + return nil + case int: + // Unmarshaling JSON numbers as int will convert them to float64. + // We need to compare them as float64 to avoid type mismatch errors. + want = float64(w) + case []string: + // Unmarshaling JSON arrays as []string will convert them to []interface{}. + // We need to compare them as []interface{} and cast the elements to string + // to avoid type mismatch errors. + gs, ok := got.([]any) + if !ok { + return fmt.Errorf("%v (%T), want %v (%T)", got, got, w, w) + } + gss := make([]string, len(gs)) + for i, g := range gs { + gss[i] = g.(string) + } + for _, ipStr := range w { + wIP := net.ParseIP(ipStr) + if wIP == nil { + // This is a special case for string slices that might contain IP addresses. + // If the `want` value is not a valid IP address, we skip the IP validation + // and proceed to the default case for a generic equality check. + // + // Using `goto` here avoids introducing an additional boolean flag or + // nesting the logic further, which would make the code harder to read. + // In this case it simplifies the control flow by explicitly directing the + // execution to the default case. + goto defaultCase + } + + for _, gipStr := range gss { + gIP := net.ParseIP(gipStr) + if gIP == nil { + return fmt.Errorf("%q, want an IP address (%s)", gipStr, wIP) + } + } + } + return nil + } + +defaultCase: + if !reflect.DeepEqual(want, got) { + return fmt.Errorf("%v (%T), want %v (%T)", got, got, want, want) + } + return nil +} diff --git a/test/e2e/main_test.go b/test/e2e/main_test.go index 75f84042..9f016659 100644 --- a/test/e2e/main_test.go +++ b/test/e2e/main_test.go @@ -1,41 +1,31 @@ package e2e import ( - "bytes" "context" - "encoding/json" - "errors" - "fmt" - "io" "net/http" "testing" "time" - "github.com/caas-team/sparrow/test/framework" - "github.com/getkin/kin-openapi/openapi3" - "github.com/getkin/kin-openapi/routers" - "github.com/getkin/kin-openapi/routers/gorillamux" + "github.com/caas-team/sparrow/pkg/checks" + "github.com/caas-team/sparrow/test" ) const ( - checkInterval = 20 * time.Second - checkTimeout = 15 * time.Second + checkInterval = 10 * time.Second + checkTimeout = 10 * time.Second ) -func TestSparrow_E2E(t *testing.T) { - if testing.Short() { - t.Skip("skipping e2e tests") - } - +func TestE2E_Sparrow_WithChecks_ConfigureOnce(t *testing.T) { + framework := test.NewFramework(t) tests := []struct { name string - startup framework.ConfigBuilder - checks []framework.CheckBuilder + startup test.ConfigBuilder + checks []test.CheckBuilder wantEndpoints map[string]int }{ { name: "no checks", - startup: *framework.NewConfig(), + startup: *test.NewSparrowConfig(), checks: nil, wantEndpoints: map[string]int{ "http://localhost:8080/v1/metrics/health": http.StatusNotFound, @@ -46,9 +36,9 @@ func TestSparrow_E2E(t *testing.T) { }, { name: "with health check", - startup: *framework.NewConfig(), - checks: []framework.CheckBuilder{ - framework.NewHealthCheck(). + startup: *test.NewSparrowConfig(), + checks: []test.CheckBuilder{ + test.NewHealthCheck(). WithInterval(checkInterval). WithTimeout(checkTimeout). WithTargets("https://www.example.com/", "https://www.google.com/"), @@ -62,17 +52,17 @@ func TestSparrow_E2E(t *testing.T) { }, { name: "with health, latency and dns checks", - startup: *framework.NewConfig(), - checks: []framework.CheckBuilder{ - framework.NewHealthCheck(). + startup: *test.NewSparrowConfig(), + checks: []test.CheckBuilder{ + test.NewHealthCheck(). WithInterval(checkInterval). WithTimeout(checkTimeout). WithTargets("https://www.example.com/"), - framework.NewLatencyCheck(). + test.NewLatencyCheck(). WithInterval(checkInterval). WithTimeout(checkTimeout). WithTargets("https://www.example.com/"), - framework.NewDNSCheck(). + test.NewDNSCheck(). WithInterval(checkInterval). WithTimeout(checkTimeout). WithTargets("www.example.com"), @@ -88,13 +78,7 @@ func TestSparrow_E2E(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - f := framework.New(t) - - e2e := f.E2E(tt.startup.Config(t)) - for _, check := range tt.checks { - e2e = e2e.WithCheck(check) - } - + e2e := framework.E2E(t, tt.startup.Config(t)).WithChecks(tt.checks...) ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) defer cancel() @@ -102,30 +86,10 @@ func TestSparrow_E2E(t *testing.T) { go func() { finish <- e2e.Run(ctx) }() - - // Wait for sparrow to be ready with a readiness probe. - readinessProbe(t, "http://localhost:8080", checkTimeout) - - // Wait for the checks to be executed. - wait := 5 * time.Second - if len(tt.checks) > 0 { - wait = checkInterval + checkTimeout + 5*time.Second - } - t.Logf("Waiting %s for checks to be executed", wait.String()) - <-time.After(wait) - - // Fetch, parse and create a new router from the OpenAPI schema, to be able to validate the responses. - schema, err := fetchOpenAPISchema("http://localhost:8080/openapi") - if err != nil { - t.Fatalf("Failed to fetch OpenAPI schema: %v", err) - } - router, err := gorillamux.NewRouter(schema) - if err != nil { - t.Fatalf("Failed to create router from OpenAPI schema: %v", err) - } + e2e.AwaitStartup("http://localhost:8080", checkTimeout).AwaitChecks() for url, status := range tt.wantEndpoints { - validateResponse(t, router, url, status) + e2e.HttpAssertion(url).WithSchema().Assert(status) } cancel() @@ -134,126 +98,271 @@ func TestSparrow_E2E(t *testing.T) { } } -func fetchOpenAPISchema(url string) (*openapi3.T, error) { - ctx := context.Background() - req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, http.NoBody) - if err != nil { - return nil, fmt.Errorf("failed to create request: %w", err) - } +const loaderInterval = 5 * time.Second - resp, err := http.DefaultClient.Do(req) - if err != nil { - return nil, fmt.Errorf("failed to GET OpenAPI schema: %w", err) - } - defer resp.Body.Close() - - data, err := io.ReadAll(resp.Body) - if err != nil { - return nil, fmt.Errorf("failed to read OpenAPI schema: %w", err) - } - - loader := openapi3.NewLoader() - schema, err := loader.LoadFromData(data) - if err != nil { - return nil, fmt.Errorf("failed to load OpenAPI schema: %w", err) - } - - if err = schema.Validate(ctx); err != nil { - return nil, fmt.Errorf("OpenAPI schema validation error: %w", err) - } - - return schema, nil -} - -func validateResponse(t *testing.T, router routers.Router, url string, wantStatus int) { - t.Helper() - req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, url, http.NoBody) - if err != nil { - t.Fatalf("Failed to create request: %v", err) - return - } - - resp, err := http.DefaultClient.Do(req) - if err != nil { - t.Errorf("Failed to get %s: %v", url, err) - return - } - defer resp.Body.Close() +func TestE2E_Sparrow_WithChecks_Reconfigure(t *testing.T) { + framework := test.NewFramework(t) - if resp.StatusCode != wantStatus { - t.Errorf("Want status code %d for %s, got %d", wantStatus, url, resp.StatusCode) - return + type result struct { + status int + response checks.Result } - - if wantStatus == http.StatusOK { - if err = validateResponseSchema(router, req, resp); err != nil { - t.Errorf("Response from %q does not match schema: %v", url, err) - return - } - } - - t.Logf("Got status code %d for %s", resp.StatusCode, url) -} - -func validateResponseSchema(router routers.Router, req *http.Request, resp *http.Response) error { - route, _, err := router.FindRoute(req) - if err != nil { - return fmt.Errorf("failed to find route: %w", err) + tests := []struct { + name string + startup test.ConfigBuilder + initialChecks []test.CheckBuilder + wantInitial map[string]result + secondChecks []test.CheckBuilder + wantSecond map[string]result + }{ + { + name: "with health check then latency check", + startup: *test.NewSparrowConfig().WithLoader( + test.NewLoaderConfig(). + WithInterval(loaderInterval). + Build(), + ), + initialChecks: []test.CheckBuilder{ + test.NewHealthCheck(). + WithInterval(checkInterval). + WithTimeout(checkTimeout). + WithTargets("https://www.example.com/", "https://www.google.com/"), + }, + wantInitial: map[string]result{ + "http://localhost:8080/v1/metrics/health": { + status: http.StatusOK, + response: checks.Result{ + Data: map[string]any{ + "https://www.example.com/": "healthy", + "https://www.google.com/": "healthy", + }, + Timestamp: time.Now(), + }, + }, + "http://localhost:8080/v1/metrics/latency": {status: http.StatusNotFound}, + "http://localhost:8080/v1/metrics/dns": {status: http.StatusNotFound}, + "http://localhost:8080/v1/metrics/traceroute": {status: http.StatusNotFound}, + }, + secondChecks: []test.CheckBuilder{ + test.NewLatencyCheck(). + WithInterval(checkInterval). + WithTimeout(checkTimeout). + WithTargets("https://www.example.com/"), + }, + wantSecond: map[string]result{ + "http://localhost:8080/v1/metrics/health": { + status: http.StatusOK, + response: checks.Result{ + Data: map[string]any{ + "https://www.example.com/": "healthy", + "https://www.google.com/": "healthy", + }, + Timestamp: time.Now(), + }, + }, + "http://localhost:8080/v1/metrics/latency": { + status: http.StatusOK, + response: checks.Result{ + Data: map[string]any{ + "https://www.example.com/": map[string]any{ + "code": http.StatusOK, + "error": nil, + "total": time.Since(time.Now().Add(-100 * time.Millisecond)).Seconds(), + }, + }, + Timestamp: time.Now(), + }, + }, + "http://localhost:8080/v1/metrics/dns": {status: http.StatusNotFound}, + "http://localhost:8080/v1/metrics/traceroute": {status: http.StatusNotFound}, + }, + }, + { + name: "with health check then dns check", + startup: *test.NewSparrowConfig().WithLoader( + test.NewLoaderConfig(). + WithInterval(loaderInterval). + Build(), + ), + initialChecks: []test.CheckBuilder{ + test.NewHealthCheck(). + WithInterval(checkInterval). + WithTimeout(checkTimeout). + WithTargets("https://www.example.com/"), + }, + wantInitial: map[string]result{ + "http://localhost:8080/v1/metrics/health": { + status: http.StatusOK, + response: checks.Result{ + Data: map[string]any{ + "https://www.example.com/": "healthy", + }, + Timestamp: time.Now(), + }, + }, + "http://localhost:8080/v1/metrics/latency": {status: http.StatusNotFound}, + "http://localhost:8080/v1/metrics/dns": {status: http.StatusNotFound}, + "http://localhost:8080/v1/metrics/traceroute": {status: http.StatusNotFound}, + }, + secondChecks: []test.CheckBuilder{ + test.NewDNSCheck(). + WithInterval(checkInterval). + WithTimeout(checkTimeout). + WithTargets("www.example.com"), + }, + wantSecond: map[string]result{ + "http://localhost:8080/v1/metrics/health": { + status: http.StatusOK, + response: checks.Result{ + Data: map[string]any{ + "https://www.example.com/": "healthy", + }, + Timestamp: time.Now(), + }, + }, + "http://localhost:8080/v1/metrics/latency": {status: http.StatusNotFound}, + "http://localhost:8080/v1/metrics/dns": { + status: http.StatusOK, + response: checks.Result{ + Data: map[string]any{ + "www.example.com": map[string]any{ + "resolved": []string{"1.2.3.4"}, + "error": nil, + "total": time.Since(time.Now().Add(-100 * time.Millisecond)).Seconds(), + }, + }, + Timestamp: time.Now(), + }, + }, + "http://localhost:8080/v1/metrics/traceroute": {status: http.StatusNotFound}, + }, + }, + { + name: "with health check then updated health check", + startup: *test.NewSparrowConfig().WithLoader( + test.NewLoaderConfig(). + WithInterval(loaderInterval). + Build(), + ), + initialChecks: []test.CheckBuilder{ + test.NewHealthCheck(). + WithInterval(checkInterval). + WithTimeout(checkTimeout). + WithTargets("https://www.example.com/"), + }, + wantInitial: map[string]result{ + "http://localhost:8080/v1/metrics/health": { + status: http.StatusOK, + response: checks.Result{ + Data: map[string]any{ + "https://www.example.com/": "healthy", + }, + Timestamp: time.Now(), + }, + }, + "http://localhost:8080/v1/metrics/latency": {status: http.StatusNotFound}, + "http://localhost:8080/v1/metrics/dns": {status: http.StatusNotFound}, + "http://localhost:8080/v1/metrics/traceroute": {status: http.StatusNotFound}, + }, + secondChecks: []test.CheckBuilder{ + test.NewHealthCheck(). + WithInterval(checkInterval). + WithTimeout(checkTimeout). + WithTargets("https://www.google.com/"), + }, + wantSecond: map[string]result{ + "http://localhost:8080/v1/metrics/health": { + status: http.StatusOK, + response: checks.Result{ + Data: map[string]any{ + "https://www.google.com/": "healthy", + }, + Timestamp: time.Now(), + }, + }, + "http://localhost:8080/v1/metrics/latency": {status: http.StatusNotFound}, + "http://localhost:8080/v1/metrics/dns": {status: http.StatusNotFound}, + "http://localhost:8080/v1/metrics/traceroute": {status: http.StatusNotFound}, + }, + }, + { + name: "with health check then no checks", + startup: *test.NewSparrowConfig().WithLoader( + test.NewLoaderConfig(). + WithInterval(loaderInterval). + Build(), + ), + initialChecks: []test.CheckBuilder{ + test.NewHealthCheck(). + WithInterval(checkInterval). + WithTimeout(checkTimeout). + WithTargets("https://www.example.com/"), + }, + wantInitial: map[string]result{ + "http://localhost:8080/v1/metrics/health": { + status: http.StatusOK, + response: checks.Result{ + Data: map[string]any{ + "https://www.example.com/": "healthy", + }, + Timestamp: time.Now(), + }, + }, + "http://localhost:8080/v1/metrics/latency": {status: http.StatusNotFound}, + "http://localhost:8080/v1/metrics/dns": {status: http.StatusNotFound}, + "http://localhost:8080/v1/metrics/traceroute": {status: http.StatusNotFound}, + }, + secondChecks: nil, + wantSecond: map[string]result{ + "http://localhost:8080/v1/metrics/health": { + status: http.StatusOK, + response: checks.Result{ + Data: map[string]any{ + "https://www.example.com/": "healthy", + }, + Timestamp: time.Now(), + }, + }, + "http://localhost:8080/v1/metrics/latency": {status: http.StatusNotFound}, + "http://localhost:8080/v1/metrics/dns": {status: http.StatusNotFound}, + "http://localhost:8080/v1/metrics/traceroute": {status: http.StatusNotFound}, + }, + }, } - data, err := io.ReadAll(resp.Body) - if err != nil { - return fmt.Errorf("failed to read response body: %w", err) - } - // Reset the response body for potential further use - resp.Body = io.NopCloser(bytes.NewBuffer(data)) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + e2e := framework.E2E(t, tt.startup.Config(t)).WithChecks(tt.initialChecks...) + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() - responseRef := route.Operation.Responses.Status(resp.StatusCode) - if responseRef == nil || responseRef.Value == nil { - return fmt.Errorf("no response defined in OpenAPI schema for status code %d", resp.StatusCode) - } + finish := make(chan error, 1) + go func() { + finish <- e2e.Run(ctx) + }() + e2e.AwaitStartup("http://localhost:8080", checkTimeout).AwaitChecks() - mediaType := responseRef.Value.Content.Get("application/json") - if mediaType == nil { - return errors.New("no media type defined in OpenAPI schema for Content-Type 'application/json'") - } + for url, result := range tt.wantInitial { + e2e.HttpAssertion(url). + WithSchema(). + WithCheckResult(result.response). + Assert(result.status) + } - var body any - if err = json.Unmarshal(data, &body); err != nil { - return fmt.Errorf("failed to unmarshal response body: %w", err) - } + e2e.UpdateChecks(tt.secondChecks...).AwaitLoader().AwaitChecks() + for url, result := range tt.wantSecond { + e2e.HttpAssertion(url). + WithSchema(). + WithCheckResult(result.response). + Assert(result.status) + } - // Validate the response body against the schema - err = mediaType.Schema.Value.VisitJSON(body) - if err != nil { - return fmt.Errorf("response body does not match schema: %w", err) + cancel() + <-finish + }) } - - return nil } -func readinessProbe(t *testing.T, url string, timeout time.Duration) { - t.Helper() - const retryInterval = 100 * time.Millisecond - deadline := time.Now().Add(timeout) - - req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, url, http.NoBody) - if err != nil { - t.Fatalf("Failed to create request: %v", err) - return - } +func TestE2E_Sparrow_WithRemoteConfig(t *testing.T) {} - for { - resp, err := http.DefaultClient.Do(req) - if err == nil && resp.StatusCode == http.StatusOK { - t.Log("Sparrow is ready") - resp.Body.Close() - return - } - if time.Now().After(deadline) { - t.Fatalf("Sparrow not ready [%s (%d)] after %v: %v", http.StatusText(resp.StatusCode), resp.StatusCode, timeout, err) - return - } - <-time.After(retryInterval) - } -} +func TestE2E_Sparrow_WithTargetManager(t *testing.T) {} diff --git a/test/framework.go b/test/framework.go new file mode 100644 index 00000000..2de6982f --- /dev/null +++ b/test/framework.go @@ -0,0 +1,58 @@ +package test + +import ( + "context" + "testing" + + "github.com/caas-team/sparrow/pkg/config" + "github.com/caas-team/sparrow/pkg/sparrow" +) + +// Runner is a test runner. +type Runner interface { + // Run runs the test. + Run(ctx context.Context) error +} + +// Framework is a test framework. +// It provides a way to run various tests. +type Framework struct { + t *testing.T +} + +// NewFramework creates a new test framework. +func NewFramework(t *testing.T) *Framework { + t.Helper() + return &Framework{t: t} +} + +// Unit creates a new unit test. +// If the test is not run in short mode, it will be skipped. +func (f *Framework) Unit(t *testing.T, run func(context.Context) error) *Unit { + if !testing.Short() { + f.t.Skip("skipping unit tests") + return nil + } + + return &Unit{t: t, run: run} +} + +// E2E creates a new end-to-end test. +// If the test is run in short mode, it will be skipped. +func (f *Framework) E2E(t *testing.T, cfg *config.Config) *E2E { + if testing.Short() { + f.t.Skip("skipping e2e tests") + return nil + } + + if cfg == nil { + cfg = NewSparrowConfig().Config(f.t) + } + + return &E2E{ + t: t, + config: *cfg, + sparrow: sparrow.New(cfg), + checks: map[string]CheckBuilder{}, + } +} diff --git a/test/framework/framework.go b/test/framework/framework.go deleted file mode 100644 index a1c2047a..00000000 --- a/test/framework/framework.go +++ /dev/null @@ -1,69 +0,0 @@ -package framework - -import ( - "bytes" - "context" - "os" - "testing" - - "github.com/caas-team/sparrow/pkg/config" - "github.com/caas-team/sparrow/pkg/sparrow" -) - -type Framework struct { - t *testing.T -} - -func New(t *testing.T) *Framework { - return &Framework{t: t} -} - -type E2ETest struct { - t *testing.T - sparrow *sparrow.Sparrow - buf bytes.Buffer - path string -} - -func (f *Framework) E2E(cfg *config.Config) *E2ETest { - if cfg == nil { - cfg = NewConfig().Config(f.t) - } - - return &E2ETest{ - t: f.t, - sparrow: sparrow.New(cfg), - } -} - -func (t *E2ETest) WithConfigFile(path string) *E2ETest { - t.path = path - return t -} - -func (t *E2ETest) WithCheck(builder CheckBuilder) *E2ETest { - t.buf.Write(builder.YAML(t.t)) - return t -} - -// Run runs the test. -// Runs indefinitely until the context is canceled. -func (t *E2ETest) Run(ctx context.Context) error { - if t.path == "" { - t.path = "testdata/checks.yaml" - } - - const fileMode = 0o755 - err := os.MkdirAll("testdata", fileMode) - if err != nil { - t.t.Fatalf("failed to create testdata directory: %v", err) - } - - err = os.WriteFile(t.path, t.buf.Bytes(), fileMode) - if err != nil { - t.t.Fatalf("failed to write testdata/checks.yaml: %v", err) - return err - } - - return t.sparrow.Run(ctx) -} diff --git a/test/framework/startup.go b/test/startup.go similarity index 97% rename from test/framework/startup.go rename to test/startup.go index 0b6e36ce..c7712402 100644 --- a/test/framework/startup.go +++ b/test/startup.go @@ -1,4 +1,4 @@ -package framework +package test import ( "context" @@ -20,10 +20,10 @@ import ( type ConfigBuilder struct{ cfg config.Config } -func NewConfig() *ConfigBuilder { +func NewSparrowConfig() *ConfigBuilder { return &ConfigBuilder{ cfg: config.Config{ - SparrowName: "sparrow.de", + SparrowName: "sparrow.telekom.com", Loader: NewLoaderConfig().Build(), Api: NewAPIConfig("localhost:8080"), }, diff --git a/test/unit.go b/test/unit.go new file mode 100644 index 00000000..14c2c43b --- /dev/null +++ b/test/unit.go @@ -0,0 +1,19 @@ +package test + +import ( + "context" + "testing" +) + +var _ Runner = (*Unit)(nil) + +// Unit is a unit test. +type Unit struct { + t *testing.T + run func(context.Context) error +} + +// Run runs the test. +func (t *Unit) Run(ctx context.Context) error { + return t.run(ctx) +}