Skip to content
This repository was archived by the owner on Jan 19, 2024. It is now read-only.

Commit 73ee09c

Browse files
Raffy23christian-kreuzberger-dtx
authored andcommitted
fix: Queries with no data points (#297)
* fix: Queries with no data points Signed-off-by: Raphael Ludwig <[email protected]> * refactor: Apply suggestions from code review Signed-off-by: Raphael Ludwig <[email protected]>
1 parent 77671e5 commit 73ee09c

File tree

9 files changed

+762
-84
lines changed

9 files changed

+762
-84
lines changed

eventhandling/getSliEvent.go

Lines changed: 70 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,23 @@ import (
77
"github.com/keptn-contrib/prometheus-service/utils/prometheus"
88
"gopkg.in/yaml.v2"
99
"log"
10-
"math"
1110
"net/url"
1211
"strings"
1312

1413
cloudevents "github.com/cloudevents/sdk-go/v2"
1514
"github.com/keptn-contrib/prometheus-service/utils"
15+
1616
keptnv2 "github.com/keptn/go-utils/pkg/lib/v0_2_0"
1717
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1818
"k8s.io/client-go/kubernetes"
1919
v1 "k8s.io/client-go/kubernetes/typed/core/v1"
20-
"k8s.io/client-go/rest"
2120
)
2221

2322
// GetSliEventHandler is responsible for processing configure monitoring events
2423
type GetSliEventHandler struct {
2524
event cloudevents.Event
2625
keptnHandler *keptnv2.Keptn
26+
kubeClient *kubernetes.Clientset
2727
}
2828

2929
type prometheusCredentials struct {
@@ -47,33 +47,82 @@ func (eh GetSliEventHandler) HandleEvent() error {
4747

4848
// send started event
4949
_, err = eh.keptnHandler.SendTaskStartedEvent(eventData, utils.ServiceName)
50-
5150
if err != nil {
52-
errMsg := fmt.Sprintf("Failed to send task started CloudEvent (%s), aborting...", err.Error())
53-
log.Println(errMsg)
51+
errMsg := fmt.Errorf("failed to send task started CloudEvent: %w", err)
52+
log.Println(errMsg.Error())
5453
return err
5554
}
5655

57-
// create SLI Results
58-
var sliResults []*keptnv2.SLIResult
56+
// helper function to log an error and send an appropriate finished event
57+
sendFinishedErrorEvent := func(err error) error {
58+
log.Printf("sending errored finished event: %s", err.Error())
5959

60-
// 2: try to fetch metrics into sliResults
61-
if sliResults, err = retrieveMetrics(eventData, eh.keptnHandler); err != nil {
62-
// failed to fetch metrics, send a finished event with the error
63-
_, err = eh.keptnHandler.SendTaskFinishedEvent(&keptnv2.EventData{
60+
_, sendError := eh.keptnHandler.SendTaskFinishedEvent(&keptnv2.EventData{
6461
Status: keptnv2.StatusErrored,
6562
Result: keptnv2.ResultFailed,
6663
Message: err.Error(),
6764
}, utils.ServiceName)
6865

69-
return err
66+
// TODO: Maybe log error to console
67+
68+
return sendError
69+
}
70+
71+
// get prometheus API URL for the provided Project from Kubernetes Config Map
72+
prometheusAPIURL, err := getPrometheusAPIURL(eventData.Project, eh.kubeClient.CoreV1())
73+
if err != nil {
74+
return sendFinishedErrorEvent(fmt.Errorf("unable to get prometheus api URL: %w", err))
75+
}
76+
77+
// create a new Prometheus Handler
78+
prometheusHandler := prometheus.NewPrometheusHandler(
79+
prometheusAPIURL,
80+
&eventData.EventData,
81+
eventData.Deployment, // "canary", "primary" or "" (or "direct" or "user_managed")
82+
eventData.Labels,
83+
eventData.GetSLI.CustomFilters,
84+
)
85+
86+
// get SLI queries (from SLI.yaml)
87+
projectCustomQueries, err := getCustomQueries(eh.keptnHandler, eventData.Project, eventData.Stage, eventData.Service)
88+
if err != nil {
89+
return sendFinishedErrorEvent(
90+
fmt.Errorf("unable to retrieve custom queries for project %s: %w", eventData.Project, err),
91+
)
92+
}
93+
94+
// only apply queries if they contain anything
95+
if projectCustomQueries != nil {
96+
prometheusHandler.CustomQueries = projectCustomQueries
97+
}
98+
99+
// retrieve metrics from prometheus
100+
sliResults := retrieveMetrics(prometheusHandler, eventData)
101+
102+
// If we hand any problem retrieving an SLI value, we set the result of the overall .finished event
103+
// to Warning, if all fail ResultFailed is set for the event
104+
finalSLIEventResult := keptnv2.ResultPass
105+
106+
if len(sliResults) > 0 {
107+
sliResultsFailed := 0
108+
for _, sliResult := range sliResults {
109+
if !sliResult.Success {
110+
sliResultsFailed++
111+
}
112+
}
113+
114+
if sliResultsFailed > 0 && sliResultsFailed < len(sliResults) {
115+
finalSLIEventResult = keptnv2.ResultWarning
116+
} else if sliResultsFailed == len(sliResults) {
117+
finalSLIEventResult = keptnv2.ResultFailed
118+
}
70119
}
71120

72121
// construct finished event data
73122
getSliFinishedEventData := &keptnv2.GetSLIFinishedEventData{
74123
EventData: keptnv2.EventData{
75124
Status: keptnv2.StatusSucceeded,
76-
Result: keptnv2.ResultPass,
125+
Result: finalSLIEventResult,
77126
},
78127
GetSLI: keptnv2.GetSLIFinished{
79128
IndicatorValues: sliResults,
@@ -82,9 +131,12 @@ func (eh GetSliEventHandler) HandleEvent() error {
82131
},
83132
}
84133

85-
// send get-sli.finished event with SLI DAta
86-
_, err = eh.keptnHandler.SendTaskFinishedEvent(getSliFinishedEventData, utils.ServiceName)
134+
if getSliFinishedEventData.EventData.Result == keptnv2.ResultFailed {
135+
getSliFinishedEventData.EventData.Message = "unable to retrieve metrics"
136+
}
87137

138+
// send get-sli.finished event with SLI DATA
139+
_, err = eh.keptnHandler.SendTaskFinishedEvent(getSliFinishedEventData, utils.ServiceName)
88140
if err != nil {
89141
errMsg := fmt.Sprintf("Failed to send task finished CloudEvent (%s), aborting...", err.Error())
90142
log.Println(errMsg)
@@ -94,48 +146,9 @@ func (eh GetSliEventHandler) HandleEvent() error {
94146
return nil
95147
}
96148

97-
func retrieveMetrics(eventData *keptnv2.GetSLITriggeredEventData, keptnHandler *keptnv2.Keptn) ([]*keptnv2.SLIResult, error) {
149+
func retrieveMetrics(prometheusHandler *prometheus.Handler, eventData *keptnv2.GetSLITriggeredEventData) []*keptnv2.SLIResult {
98150
log.Printf("Retrieving Prometheus metrics")
99151

100-
clusterConfig, err := rest.InClusterConfig()
101-
if err != nil {
102-
log.Println("could not create Kubernetes cluster config")
103-
return nil, errors.New("could not create Kubernetes client")
104-
}
105-
106-
kubeClient, err := kubernetes.NewForConfig(clusterConfig)
107-
if err != nil {
108-
log.Println("could not create Kubernetes client")
109-
return nil, errors.New("could not create Kubernetes client")
110-
}
111-
112-
// get prometheus API URL for the provided Project from Kubernetes Config Map
113-
prometheusAPIURL, err := getPrometheusAPIURL(eventData.Project, kubeClient.CoreV1())
114-
if err != nil {
115-
return nil, err
116-
}
117-
118-
// Create a new Prometheus Handler
119-
prometheusHandler := prometheus.NewPrometheusHandler(
120-
prometheusAPIURL,
121-
&eventData.EventData,
122-
eventData.Deployment, // "canary", "primary" or "" (or "direct" or "user_managed")
123-
eventData.Labels,
124-
eventData.GetSLI.CustomFilters,
125-
)
126-
127-
// get SLI queries (from SLI.yaml)
128-
projectCustomQueries, err := getCustomQueries(keptnHandler, eventData.Project, eventData.Stage, eventData.Service)
129-
if err != nil {
130-
log.Println("retrieveMetrics: Failed to get custom queries for project " + eventData.Project)
131-
log.Println(err.Error())
132-
return nil, err
133-
}
134-
135-
if projectCustomQueries != nil {
136-
prometheusHandler.CustomQueries = projectCustomQueries
137-
}
138-
139152
var sliResults []*keptnv2.SLIResult
140153

141154
for _, indicator := range eventData.GetSLI.Indicators {
@@ -148,13 +161,6 @@ func retrieveMetrics(eventData *keptnv2.GetSLITriggeredEventData, keptnHandler *
148161
Success: false,
149162
Message: err.Error(),
150163
})
151-
} else if math.IsNaN(sliValue) {
152-
sliResults = append(sliResults, &keptnv2.SLIResult{
153-
Metric: indicator,
154-
Value: 0,
155-
Success: false,
156-
Message: "SLI value is NaN",
157-
})
158164
} else {
159165
sliResults = append(sliResults, &keptnv2.SLIResult{
160166
Metric: indicator,
@@ -163,7 +169,8 @@ func retrieveMetrics(eventData *keptnv2.GetSLITriggeredEventData, keptnHandler *
163169
})
164170
}
165171
}
166-
return sliResults, nil
172+
173+
return sliResults
167174
}
168175

169176
func getCustomQueries(keptnHandler *keptnv2.Keptn, project string, stage string, service string) (map[string]string, error) {

eventhandling/getSliEvent_test.go

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
package eventhandling
2+
3+
import (
4+
"encoding/json"
5+
cloudevents "github.com/cloudevents/sdk-go/v2"
6+
"github.com/golang/mock/gomock"
7+
prometheusAPI "github.com/prometheus/client_golang/api/prometheus/v1"
8+
prometheusModel "github.com/prometheus/common/model"
9+
"github.com/stretchr/testify/assert"
10+
"github.com/stretchr/testify/require"
11+
"math/rand"
12+
"testing"
13+
14+
prometheusUtils "github.com/keptn-contrib/prometheus-service/utils/prometheus"
15+
prometheusfake "github.com/keptn-contrib/prometheus-service/utils/prometheus/fake"
16+
17+
keptnv2 "github.com/keptn/go-utils/pkg/lib/v0_2_0"
18+
)
19+
20+
const eventJSON = `
21+
{
22+
"data": {
23+
"deployment": "canary",
24+
"get-sli": {
25+
"end": "2022-04-06T14:36:19.667Z",
26+
"sliProvider": "prometheus",
27+
"start": "2022-04-06T14:35:03.762Z",
28+
"indicators": ["throughput"]
29+
},
30+
"project": "sockshop",
31+
"service": "carts",
32+
"stage": "staging"
33+
},
34+
"gitcommitid": "c8a40997599180a338d72504541c00057550a3dc",
35+
"id": "585cb332-7198-4605-a0ef-28199268b91d",
36+
"shkeptncontext": "37a580f4-96ef-4594-b62a-1235b91ed7f6",
37+
"shkeptnspecversion": "0.2.4",
38+
"source": "lighthouse-service",
39+
"specversion": "1.0",
40+
"time": "2022-04-06T14:36:19.887Z",
41+
"type": "sh.keptn.event.get-sli.triggered"
42+
}
43+
`
44+
45+
func Test_retrieveMetrics(t *testing.T) {
46+
mockCtrl := gomock.NewController(t)
47+
defer mockCtrl.Finish()
48+
49+
incomingEvent := &cloudevents.Event{}
50+
51+
err := json.Unmarshal([]byte(eventJSON), incomingEvent)
52+
require.NoError(t, err)
53+
54+
eventData := &keptnv2.GetSLITriggeredEventData{}
55+
err = incomingEvent.DataAs(eventData)
56+
require.NoError(t, err)
57+
58+
apiMock := prometheusfake.NewMockPrometheusAPI(mockCtrl)
59+
handler := prometheusUtils.Handler{
60+
Project: eventData.Project,
61+
Stage: eventData.Stage,
62+
Service: eventData.Service,
63+
PrometheusAPI: apiMock,
64+
}
65+
66+
sliValue := rand.Float64()
67+
returnValue := prometheusModel.Vector{
68+
{
69+
Value: prometheusModel.SampleValue(sliValue),
70+
},
71+
}
72+
73+
apiMock.EXPECT().Query(gomock.Any(), gomock.Any(), gomock.Any()).Times(1).Return(
74+
returnValue, prometheusAPI.Warnings{}, nil,
75+
)
76+
77+
sliResults := retrieveMetrics(&handler, eventData)
78+
79+
assert.Len(t, sliResults, 1)
80+
assert.Contains(t, sliResults, &keptnv2.SLIResult{
81+
Metric: Throughput,
82+
Value: sliValue,
83+
ComparedValue: 0,
84+
Success: true,
85+
Message: "",
86+
})
87+
}
88+
89+
func Test_retrieveMetricsWithMultipleValues(t *testing.T) {
90+
mockCtrl := gomock.NewController(t)
91+
defer mockCtrl.Finish()
92+
93+
incomingEvent := &cloudevents.Event{}
94+
95+
err := json.Unmarshal([]byte(eventJSON), incomingEvent)
96+
require.NoError(t, err)
97+
98+
eventData := &keptnv2.GetSLITriggeredEventData{}
99+
err = incomingEvent.DataAs(eventData)
100+
require.NoError(t, err)
101+
102+
apiMock := prometheusfake.NewMockPrometheusAPI(mockCtrl)
103+
handler := prometheusUtils.Handler{
104+
Project: eventData.Project,
105+
Stage: eventData.Stage,
106+
Service: eventData.Service,
107+
PrometheusAPI: apiMock,
108+
}
109+
110+
returnValue := prometheusModel.Vector{
111+
{
112+
Value: prometheusModel.SampleValue(8.12830),
113+
},
114+
{
115+
Value: prometheusModel.SampleValue(0.28384),
116+
},
117+
}
118+
119+
apiMock.EXPECT().Query(gomock.Any(), gomock.Any(), gomock.Any()).Times(1).Return(
120+
returnValue, prometheusAPI.Warnings{}, nil,
121+
)
122+
123+
sliResults := retrieveMetrics(&handler, eventData)
124+
125+
assert.Len(t, sliResults, 1)
126+
assert.Contains(t, sliResults, &keptnv2.SLIResult{
127+
Metric: Throughput,
128+
Value: 0,
129+
ComparedValue: 0,
130+
Success: false,
131+
Message: prometheusUtils.ErrMultipleValues.Error(),
132+
})
133+
}
134+
135+
func Test_retrieveMetricsWithNoValue(t *testing.T) {
136+
mockCtrl := gomock.NewController(t)
137+
defer mockCtrl.Finish()
138+
139+
incomingEvent := &cloudevents.Event{}
140+
141+
err := json.Unmarshal([]byte(eventJSON), incomingEvent)
142+
require.NoError(t, err)
143+
144+
eventData := &keptnv2.GetSLITriggeredEventData{}
145+
err = incomingEvent.DataAs(eventData)
146+
require.NoError(t, err)
147+
148+
apiMock := prometheusfake.NewMockPrometheusAPI(mockCtrl)
149+
handler := prometheusUtils.Handler{
150+
Project: eventData.Project,
151+
Stage: eventData.Stage,
152+
Service: eventData.Service,
153+
PrometheusAPI: apiMock,
154+
}
155+
156+
apiMock.EXPECT().Query(gomock.Any(), gomock.Any(), gomock.Any()).Times(1).Return(
157+
prometheusModel.Vector{}, prometheusAPI.Warnings{}, nil,
158+
)
159+
160+
sliResults := retrieveMetrics(&handler, eventData)
161+
162+
assert.Len(t, sliResults, 1)
163+
assert.Contains(t, sliResults, &keptnv2.SLIResult{
164+
Metric: Throughput,
165+
Value: 0,
166+
ComparedValue: 0,
167+
Success: false,
168+
Message: prometheusUtils.ErrNoValues.Error(),
169+
})
170+
}

0 commit comments

Comments
 (0)