Skip to content
This repository has been archived by the owner on Jan 19, 2024. It is now read-only.

Commit

Permalink
fix: Queries with no data points (#297)
Browse files Browse the repository at this point in the history
* fix: Queries with no data points

Signed-off-by: Raphael Ludwig <[email protected]>

* refactor: Apply suggestions from code review

Signed-off-by: Raphael Ludwig <[email protected]>
  • Loading branch information
Raffy23 authored and christian-kreuzberger-dtx committed May 11, 2022
1 parent 77671e5 commit 73ee09c
Show file tree
Hide file tree
Showing 9 changed files with 762 additions and 84 deletions.
133 changes: 70 additions & 63 deletions eventhandling/getSliEvent.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,23 @@ import (
"github.com/keptn-contrib/prometheus-service/utils/prometheus"
"gopkg.in/yaml.v2"
"log"
"math"
"net/url"
"strings"

cloudevents "github.com/cloudevents/sdk-go/v2"
"github.com/keptn-contrib/prometheus-service/utils"

keptnv2 "github.com/keptn/go-utils/pkg/lib/v0_2_0"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
v1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/rest"
)

// GetSliEventHandler is responsible for processing configure monitoring events
type GetSliEventHandler struct {
event cloudevents.Event
keptnHandler *keptnv2.Keptn
kubeClient *kubernetes.Clientset
}

type prometheusCredentials struct {
Expand All @@ -47,33 +47,82 @@ func (eh GetSliEventHandler) HandleEvent() error {

// send started event
_, err = eh.keptnHandler.SendTaskStartedEvent(eventData, utils.ServiceName)

if err != nil {
errMsg := fmt.Sprintf("Failed to send task started CloudEvent (%s), aborting...", err.Error())
log.Println(errMsg)
errMsg := fmt.Errorf("failed to send task started CloudEvent: %w", err)
log.Println(errMsg.Error())
return err
}

// create SLI Results
var sliResults []*keptnv2.SLIResult
// helper function to log an error and send an appropriate finished event
sendFinishedErrorEvent := func(err error) error {
log.Printf("sending errored finished event: %s", err.Error())

// 2: try to fetch metrics into sliResults
if sliResults, err = retrieveMetrics(eventData, eh.keptnHandler); err != nil {
// failed to fetch metrics, send a finished event with the error
_, err = eh.keptnHandler.SendTaskFinishedEvent(&keptnv2.EventData{
_, sendError := eh.keptnHandler.SendTaskFinishedEvent(&keptnv2.EventData{
Status: keptnv2.StatusErrored,
Result: keptnv2.ResultFailed,
Message: err.Error(),
}, utils.ServiceName)

return err
// TODO: Maybe log error to console

return sendError
}

// get prometheus API URL for the provided Project from Kubernetes Config Map
prometheusAPIURL, err := getPrometheusAPIURL(eventData.Project, eh.kubeClient.CoreV1())
if err != nil {
return sendFinishedErrorEvent(fmt.Errorf("unable to get prometheus api URL: %w", err))
}

// create a new Prometheus Handler
prometheusHandler := prometheus.NewPrometheusHandler(
prometheusAPIURL,
&eventData.EventData,
eventData.Deployment, // "canary", "primary" or "" (or "direct" or "user_managed")
eventData.Labels,
eventData.GetSLI.CustomFilters,
)

// get SLI queries (from SLI.yaml)
projectCustomQueries, err := getCustomQueries(eh.keptnHandler, eventData.Project, eventData.Stage, eventData.Service)
if err != nil {
return sendFinishedErrorEvent(
fmt.Errorf("unable to retrieve custom queries for project %s: %w", eventData.Project, err),
)
}

// only apply queries if they contain anything
if projectCustomQueries != nil {
prometheusHandler.CustomQueries = projectCustomQueries
}

// retrieve metrics from prometheus
sliResults := retrieveMetrics(prometheusHandler, eventData)

// If we hand any problem retrieving an SLI value, we set the result of the overall .finished event
// to Warning, if all fail ResultFailed is set for the event
finalSLIEventResult := keptnv2.ResultPass

if len(sliResults) > 0 {
sliResultsFailed := 0
for _, sliResult := range sliResults {
if !sliResult.Success {
sliResultsFailed++
}
}

if sliResultsFailed > 0 && sliResultsFailed < len(sliResults) {
finalSLIEventResult = keptnv2.ResultWarning
} else if sliResultsFailed == len(sliResults) {
finalSLIEventResult = keptnv2.ResultFailed
}
}

// construct finished event data
getSliFinishedEventData := &keptnv2.GetSLIFinishedEventData{
EventData: keptnv2.EventData{
Status: keptnv2.StatusSucceeded,
Result: keptnv2.ResultPass,
Result: finalSLIEventResult,
},
GetSLI: keptnv2.GetSLIFinished{
IndicatorValues: sliResults,
Expand All @@ -82,9 +131,12 @@ func (eh GetSliEventHandler) HandleEvent() error {
},
}

// send get-sli.finished event with SLI DAta
_, err = eh.keptnHandler.SendTaskFinishedEvent(getSliFinishedEventData, utils.ServiceName)
if getSliFinishedEventData.EventData.Result == keptnv2.ResultFailed {
getSliFinishedEventData.EventData.Message = "unable to retrieve metrics"
}

// send get-sli.finished event with SLI DATA
_, err = eh.keptnHandler.SendTaskFinishedEvent(getSliFinishedEventData, utils.ServiceName)
if err != nil {
errMsg := fmt.Sprintf("Failed to send task finished CloudEvent (%s), aborting...", err.Error())
log.Println(errMsg)
Expand All @@ -94,48 +146,9 @@ func (eh GetSliEventHandler) HandleEvent() error {
return nil
}

func retrieveMetrics(eventData *keptnv2.GetSLITriggeredEventData, keptnHandler *keptnv2.Keptn) ([]*keptnv2.SLIResult, error) {
func retrieveMetrics(prometheusHandler *prometheus.Handler, eventData *keptnv2.GetSLITriggeredEventData) []*keptnv2.SLIResult {
log.Printf("Retrieving Prometheus metrics")

clusterConfig, err := rest.InClusterConfig()
if err != nil {
log.Println("could not create Kubernetes cluster config")
return nil, errors.New("could not create Kubernetes client")
}

kubeClient, err := kubernetes.NewForConfig(clusterConfig)
if err != nil {
log.Println("could not create Kubernetes client")
return nil, errors.New("could not create Kubernetes client")
}

// get prometheus API URL for the provided Project from Kubernetes Config Map
prometheusAPIURL, err := getPrometheusAPIURL(eventData.Project, kubeClient.CoreV1())
if err != nil {
return nil, err
}

// Create a new Prometheus Handler
prometheusHandler := prometheus.NewPrometheusHandler(
prometheusAPIURL,
&eventData.EventData,
eventData.Deployment, // "canary", "primary" or "" (or "direct" or "user_managed")
eventData.Labels,
eventData.GetSLI.CustomFilters,
)

// get SLI queries (from SLI.yaml)
projectCustomQueries, err := getCustomQueries(keptnHandler, eventData.Project, eventData.Stage, eventData.Service)
if err != nil {
log.Println("retrieveMetrics: Failed to get custom queries for project " + eventData.Project)
log.Println(err.Error())
return nil, err
}

if projectCustomQueries != nil {
prometheusHandler.CustomQueries = projectCustomQueries
}

var sliResults []*keptnv2.SLIResult

for _, indicator := range eventData.GetSLI.Indicators {
Expand All @@ -148,13 +161,6 @@ func retrieveMetrics(eventData *keptnv2.GetSLITriggeredEventData, keptnHandler *
Success: false,
Message: err.Error(),
})
} else if math.IsNaN(sliValue) {
sliResults = append(sliResults, &keptnv2.SLIResult{
Metric: indicator,
Value: 0,
Success: false,
Message: "SLI value is NaN",
})
} else {
sliResults = append(sliResults, &keptnv2.SLIResult{
Metric: indicator,
Expand All @@ -163,7 +169,8 @@ func retrieveMetrics(eventData *keptnv2.GetSLITriggeredEventData, keptnHandler *
})
}
}
return sliResults, nil

return sliResults
}

func getCustomQueries(keptnHandler *keptnv2.Keptn, project string, stage string, service string) (map[string]string, error) {
Expand Down
170 changes: 170 additions & 0 deletions eventhandling/getSliEvent_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
package eventhandling

import (
"encoding/json"
cloudevents "github.com/cloudevents/sdk-go/v2"
"github.com/golang/mock/gomock"
prometheusAPI "github.com/prometheus/client_golang/api/prometheus/v1"
prometheusModel "github.com/prometheus/common/model"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"math/rand"
"testing"

prometheusUtils "github.com/keptn-contrib/prometheus-service/utils/prometheus"
prometheusfake "github.com/keptn-contrib/prometheus-service/utils/prometheus/fake"

keptnv2 "github.com/keptn/go-utils/pkg/lib/v0_2_0"
)

const eventJSON = `
{
"data": {
"deployment": "canary",
"get-sli": {
"end": "2022-04-06T14:36:19.667Z",
"sliProvider": "prometheus",
"start": "2022-04-06T14:35:03.762Z",
"indicators": ["throughput"]
},
"project": "sockshop",
"service": "carts",
"stage": "staging"
},
"gitcommitid": "c8a40997599180a338d72504541c00057550a3dc",
"id": "585cb332-7198-4605-a0ef-28199268b91d",
"shkeptncontext": "37a580f4-96ef-4594-b62a-1235b91ed7f6",
"shkeptnspecversion": "0.2.4",
"source": "lighthouse-service",
"specversion": "1.0",
"time": "2022-04-06T14:36:19.887Z",
"type": "sh.keptn.event.get-sli.triggered"
}
`

func Test_retrieveMetrics(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()

incomingEvent := &cloudevents.Event{}

err := json.Unmarshal([]byte(eventJSON), incomingEvent)
require.NoError(t, err)

eventData := &keptnv2.GetSLITriggeredEventData{}
err = incomingEvent.DataAs(eventData)
require.NoError(t, err)

apiMock := prometheusfake.NewMockPrometheusAPI(mockCtrl)
handler := prometheusUtils.Handler{
Project: eventData.Project,
Stage: eventData.Stage,
Service: eventData.Service,
PrometheusAPI: apiMock,
}

sliValue := rand.Float64()
returnValue := prometheusModel.Vector{
{
Value: prometheusModel.SampleValue(sliValue),
},
}

apiMock.EXPECT().Query(gomock.Any(), gomock.Any(), gomock.Any()).Times(1).Return(
returnValue, prometheusAPI.Warnings{}, nil,
)

sliResults := retrieveMetrics(&handler, eventData)

assert.Len(t, sliResults, 1)
assert.Contains(t, sliResults, &keptnv2.SLIResult{
Metric: Throughput,
Value: sliValue,
ComparedValue: 0,
Success: true,
Message: "",
})
}

func Test_retrieveMetricsWithMultipleValues(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()

incomingEvent := &cloudevents.Event{}

err := json.Unmarshal([]byte(eventJSON), incomingEvent)
require.NoError(t, err)

eventData := &keptnv2.GetSLITriggeredEventData{}
err = incomingEvent.DataAs(eventData)
require.NoError(t, err)

apiMock := prometheusfake.NewMockPrometheusAPI(mockCtrl)
handler := prometheusUtils.Handler{
Project: eventData.Project,
Stage: eventData.Stage,
Service: eventData.Service,
PrometheusAPI: apiMock,
}

returnValue := prometheusModel.Vector{
{
Value: prometheusModel.SampleValue(8.12830),
},
{
Value: prometheusModel.SampleValue(0.28384),
},
}

apiMock.EXPECT().Query(gomock.Any(), gomock.Any(), gomock.Any()).Times(1).Return(
returnValue, prometheusAPI.Warnings{}, nil,
)

sliResults := retrieveMetrics(&handler, eventData)

assert.Len(t, sliResults, 1)
assert.Contains(t, sliResults, &keptnv2.SLIResult{
Metric: Throughput,
Value: 0,
ComparedValue: 0,
Success: false,
Message: prometheusUtils.ErrMultipleValues.Error(),
})
}

func Test_retrieveMetricsWithNoValue(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()

incomingEvent := &cloudevents.Event{}

err := json.Unmarshal([]byte(eventJSON), incomingEvent)
require.NoError(t, err)

eventData := &keptnv2.GetSLITriggeredEventData{}
err = incomingEvent.DataAs(eventData)
require.NoError(t, err)

apiMock := prometheusfake.NewMockPrometheusAPI(mockCtrl)
handler := prometheusUtils.Handler{
Project: eventData.Project,
Stage: eventData.Stage,
Service: eventData.Service,
PrometheusAPI: apiMock,
}

apiMock.EXPECT().Query(gomock.Any(), gomock.Any(), gomock.Any()).Times(1).Return(
prometheusModel.Vector{}, prometheusAPI.Warnings{}, nil,
)

sliResults := retrieveMetrics(&handler, eventData)

assert.Len(t, sliResults, 1)
assert.Contains(t, sliResults, &keptnv2.SLIResult{
Metric: Throughput,
Value: 0,
ComparedValue: 0,
Success: false,
Message: prometheusUtils.ErrNoValues.Error(),
})
}
Loading

0 comments on commit 73ee09c

Please sign in to comment.