Skip to content

Commit d04eb68

Browse files
authored
Add GUI interface for debugging DSP hyper-parameters (#412)
1. Add GUI interface for debugging DSP hyper-parameters 2. Support workload metrics for gRPC
1 parent ebbf52f commit d04eb68

File tree

10 files changed

+299
-7
lines changed

10 files changed

+299
-7
lines changed

cmd/craned/app/manager.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ import (
4646
_ "github.com/gocrane/crane/pkg/querybuilder-providers/prometheus"
4747
"github.com/gocrane/crane/pkg/server"
4848
serverconfig "github.com/gocrane/crane/pkg/server/config"
49+
"github.com/gocrane/crane/pkg/server/handler/prediction"
4950
"github.com/gocrane/crane/pkg/utils/target"
5051
"github.com/gocrane/crane/pkg/webhooks"
5152
)
@@ -351,6 +352,18 @@ func runAll(ctx context.Context, mgr ctrl.Manager, predictorMgr predictor.Manage
351352
if err != nil {
352353
klog.Exit(err)
353354
}
355+
356+
discoveryClientSet, err := discovery.NewDiscoveryClientForConfig(mgr.GetConfig())
357+
if err != nil {
358+
klog.Exit(err, "Unable to create discover client")
359+
}
360+
361+
scaleKindResolver := scale.NewDiscoveryScaleKindResolver(discoveryClientSet)
362+
scaleClient := scale.New(discoveryClientSet.RESTClient(), mgr.GetRESTMapper(), dynamic.LegacyAPIPathResolverFunc, scaleKindResolver)
363+
selectorFetcher := target.NewSelectorFetcher(mgr.GetScheme(), mgr.GetRESTMapper(), scaleClient, mgr.GetClient())
364+
ctx = context.WithValue(ctx, prediction.PredictorManagerKey, predictorMgr)
365+
ctx = context.WithValue(ctx, prediction.SelectorFetcherKey, selectorFetcher)
366+
354367
craneServer.Run(ctx)
355368
return nil
356369
})

examples/tsp-workload-dsp.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ metadata:
55
namespace: default
66
spec:
77
targetRef:
8+
apiVersion: apps/v1
89
kind: Deployment
910
name: dep-1-100m-500mib
1011
namespace: default

examples/tsp-workload-resource-dsp.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ spec:
88
kind: Deployment
99
name: dep-1-100m-500mib
1010
namespace: default
11+
apiVersion: apps/v1
1112
predictionWindowSeconds: 3600
1213
predictionMetrics:
1314
- resourceIdentifier: workload-cpu

go.mod

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,6 @@ require (
181181
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
182182
golang.org/x/tools v0.1.8 // indirect
183183
google.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa // indirect
184-
google.golang.org/protobuf v1.27.1
185184
)
186185

187186
replace (

go.sum

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -310,10 +310,6 @@ github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og=
310310
github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
311311
github.com/gobwas/ws v1.1.0-rc.5 h1:QOAag7FoBaBYYHRqzqkhhd8fq5RTubvI4v3Ft/gDVVQ=
312312
github.com/gobwas/ws v1.1.0-rc.5/go.mod h1:nzvNcVha5eUziGrbxFCo6qFIojQHjJV5cLYIbezhfL0=
313-
github.com/gocrane/api v0.4.1-0.20220507041258-d376db2b4ad4 h1:vGDg3G6y661KAlhjf/8/r8JCjaIi6aV8szCP+MZRU3Y=
314-
github.com/gocrane/api v0.4.1-0.20220507041258-d376db2b4ad4/go.mod h1:GxI+t9AW8+NsHkz2JkPBIJN//9eLUjTZl1ScYAbXMbk=
315-
github.com/gocrane/api v0.4.1-0.20220520134105-09d430d903ac h1:lBKVVOA4del0Plj80PCE+nglxaJxaXanCv5N6a3laVY=
316-
github.com/gocrane/api v0.4.1-0.20220520134105-09d430d903ac/go.mod h1:GxI+t9AW8+NsHkz2JkPBIJN//9eLUjTZl1ScYAbXMbk=
317313
github.com/gocrane/api v0.5.1-0.20220706040335-eaadbb4b99ed h1:aARCU+Hs1ZKTqJFJT/4/or/iGR6qYwMcG99CGmBFJpg=
318314
github.com/gocrane/api v0.5.1-0.20220706040335-eaadbb4b99ed/go.mod h1:GxI+t9AW8+NsHkz2JkPBIJN//9eLUjTZl1ScYAbXMbk=
319315
github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=

pkg/controller/timeseriesprediction/time_series_prediction_controller.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ func (tc *Controller) syncTimeSeriesPrediction(ctx context.Context, tsp *predict
102102

103103
c, err := NewMetricContext(tc.TargetFetcher, tsp, tc.predictorMgr)
104104
if err != nil {
105+
klog.ErrorS(err, "Failed to NewMetricContext.")
105106
return ctrl.Result{}, err
106107
}
107108

@@ -116,7 +117,7 @@ func (tc *Controller) syncTimeSeriesPrediction(ctx context.Context, tsp *predict
116117
c.WithApiConfigs(tsp.Spec.PredictionMetrics)
117118
return
118119
}
119-
// predictor need an interface to query the config and then diff.
120+
// predictor needs an interface to query the config and then diff.
120121
// now just diff the cache in the controller to decide, it can not cover all the cases when users modify the spec
121122
for _, oldMetricConf := range old.Spec.PredictionMetrics {
122123
if !ExistsPredictionMetric(oldMetricConf, tsp.Spec.PredictionMetrics) {

pkg/prediction/dsp/debug.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package dsp
2+
3+
import (
4+
"fmt"
5+
"time"
6+
7+
"k8s.io/klog/v2"
8+
9+
"github.com/gocrane/crane/pkg/common"
10+
"github.com/gocrane/crane/pkg/metricnaming"
11+
"github.com/gocrane/crane/pkg/prediction"
12+
"github.com/gocrane/crane/pkg/prediction/config"
13+
)
14+
15+
func Debug(predictor prediction.Interface, namer metricnaming.MetricNamer, config *config.Config) (*Signal, *Signal, *Signal, error) {
16+
internalConfig, err := makeInternalConfig(config.DSP)
17+
if err != nil {
18+
return nil, nil, nil, err
19+
}
20+
21+
historyTimeSeriesList, err := queryHistoryTimeSeries(predictor.(*periodicSignalPrediction), namer, internalConfig)
22+
if err != nil {
23+
return nil, nil, nil, err
24+
}
25+
26+
queryExpr := namer.BuildUniqueKey()
27+
28+
var signal, history, test, estimate *Signal
29+
var nPeriods int
30+
var chosenEstimator Estimator
31+
for _, ts := range historyTimeSeriesList {
32+
periodLength := findPeriod(ts, internalConfig.historyResolution)
33+
if periodLength == Day || periodLength == Week {
34+
signal = SamplesToSignal(ts.Samples, internalConfig.historyResolution)
35+
signal, nPeriods = signal.Truncate(periodLength)
36+
if nPeriods >= 2 {
37+
chosenEstimator = bestEstimator(queryExpr, internalConfig.estimators, signal, nPeriods, periodLength)
38+
}
39+
if chosenEstimator != nil {
40+
samplesPerPeriod := len(signal.Samples) / nPeriods
41+
history = &Signal{
42+
SampleRate: signal.SampleRate,
43+
Samples: signal.Samples[:(nPeriods-1)*samplesPerPeriod],
44+
}
45+
test = &Signal{
46+
SampleRate: signal.SampleRate,
47+
Samples: signal.Samples[(nPeriods-1)*samplesPerPeriod:],
48+
}
49+
estimate = chosenEstimator.GetEstimation(history, periodLength)
50+
return history, test, estimate, nil
51+
}
52+
}
53+
}
54+
55+
return nil, nil, nil, fmt.Errorf("no prediction result")
56+
}
57+
58+
func queryHistoryTimeSeries(predictor *periodicSignalPrediction, namer metricnaming.MetricNamer, config *internalConfig) ([]*common.TimeSeries, error) {
59+
p := predictor.GetHistoryProvider()
60+
if p == nil {
61+
return nil, fmt.Errorf("history provider not provisioned")
62+
}
63+
64+
end := time.Now().Truncate(config.historyResolution)
65+
start := end.Add(-config.historyDuration - time.Hour)
66+
67+
tsList, err := p.QueryTimeSeries(namer, start, end, config.historyResolution)
68+
if err != nil {
69+
klog.ErrorS(err, "Failed to query history time series.")
70+
return nil, err
71+
}
72+
73+
klog.V(4).InfoS("DSP debug | queryHistoryTimeSeries", "timeSeriesList", tsList, "config", *config)
74+
75+
return preProcessTimeSeriesList(tsList, config)
76+
}

pkg/providers/grpc/grpc.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,16 @@ func grpcMetric(namer metricnaming.MetricNamer) (*pb.Metric, error) {
104104
Name: c.Name,
105105
},
106106
}
107+
case metricquery.WorkloadMetricType:
108+
w := q.GenericQuery.Metric.Workload
109+
m.Info = &pb.Metric_Workload{
110+
Workload: &pb.Workload{
111+
Namespace: w.Namespace,
112+
Name: w.Name,
113+
Kind: w.Kind,
114+
ApiVersion: w.APIVersion,
115+
},
116+
}
107117
default:
108118
return nil, fmt.Errorf("%s not supported", q.GenericQuery.Metric.Type)
109119
}
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
package prediction
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"net/http"
7+
8+
"github.com/gin-gonic/gin"
9+
"github.com/go-echarts/go-echarts/v2/charts"
10+
"github.com/go-echarts/go-echarts/v2/components"
11+
"github.com/go-echarts/go-echarts/v2/opts"
12+
"github.com/go-echarts/go-echarts/v2/types"
13+
"github.com/gocrane/crane/pkg/prediction/dsp"
14+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
15+
"k8s.io/client-go/rest"
16+
"k8s.io/klog/v2"
17+
18+
craneclientset "github.com/gocrane/api/pkg/generated/clientset/versioned"
19+
"github.com/gocrane/api/prediction/v1alpha1"
20+
21+
"github.com/gocrane/crane/pkg/controller/timeseriesprediction"
22+
predictormgr "github.com/gocrane/crane/pkg/predictor"
23+
"github.com/gocrane/crane/pkg/server/ginwrapper"
24+
"github.com/gocrane/crane/pkg/utils/target"
25+
)
26+
27+
type DebugHandler struct {
28+
craneClient *craneclientset.Clientset
29+
predictorManager predictormgr.Manager
30+
selectorFetcher target.SelectorFetcher
31+
}
32+
33+
type ContextKey string
34+
35+
var (
36+
PredictorManagerKey ContextKey = "predictorManager"
37+
SelectorFetcherKey ContextKey = "selectorFetcher"
38+
)
39+
40+
func NewDebugHandler(ctx context.Context) *DebugHandler {
41+
config, err := rest.InClusterConfig()
42+
if err != nil {
43+
klog.Fatalf("Failed to get InClusterConfig, %v.", err)
44+
}
45+
46+
val := ctx.Value(PredictorManagerKey)
47+
if val == nil {
48+
klog.Fatalf("predictorManager not found")
49+
}
50+
predictorManager := val.(predictormgr.Manager)
51+
52+
val = ctx.Value(SelectorFetcherKey)
53+
if val == nil {
54+
klog.Fatalf("selectorFetcher not found")
55+
}
56+
selectorFetcher := val.(target.SelectorFetcher)
57+
58+
return &DebugHandler{
59+
craneClient: craneclientset.NewForConfigOrDie(config),
60+
predictorManager: predictorManager,
61+
selectorFetcher: selectorFetcher,
62+
}
63+
}
64+
65+
func (dh *DebugHandler) Display(c *gin.Context) {
66+
namespace := c.Param("namespace")
67+
name := c.Param("tsp")
68+
if len(namespace) == 0 || len(name) == 0 {
69+
c.Writer.WriteHeader(http.StatusBadRequest)
70+
return
71+
}
72+
73+
tsp, err := dh.craneClient.PredictionV1alpha1().TimeSeriesPredictions(namespace).Get(context.TODO(), name, metav1.GetOptions{})
74+
if err != nil {
75+
ginwrapper.WriteResponse(c, err, nil)
76+
return
77+
}
78+
79+
if len(tsp.Spec.PredictionMetrics) > 0 {
80+
if tsp.Spec.PredictionMetrics[0].Algorithm.AlgorithmType == v1alpha1.AlgorithmTypeDSP && tsp.Spec.PredictionMetrics[0].Algorithm.DSP != nil {
81+
mc, err := timeseriesprediction.NewMetricContext(dh.selectorFetcher, tsp, dh.predictorManager)
82+
if err != nil {
83+
ginwrapper.WriteResponse(c, err, nil)
84+
return
85+
}
86+
87+
internalConf := mc.ConvertApiMetric2InternalConfig(&tsp.Spec.PredictionMetrics[0])
88+
namer := mc.GetMetricNamer(&tsp.Spec.PredictionMetrics[0])
89+
pred := dh.predictorManager.GetPredictor(v1alpha1.AlgorithmTypeDSP)
90+
history, test, estimate, err := dsp.Debug(pred, namer, internalConf)
91+
if err != nil {
92+
ginwrapper.WriteResponse(c, err, nil)
93+
return
94+
}
95+
96+
page := components.NewPage()
97+
page.AddCharts(plot(history, "history", "green", charts.WithTitleOpts(opts.Title{Title: "history"})))
98+
page.AddCharts(plots([]*dsp.Signal{test, estimate}, []string{"actual", "forecasted"},
99+
charts.WithTitleOpts(opts.Title{Title: "actual/forecasted"})))
100+
err = page.Render(c.Writer)
101+
if err != nil {
102+
klog.ErrorS(err, "Failed to display debug time series")
103+
}
104+
105+
return
106+
}
107+
}
108+
109+
c.Writer.WriteHeader(http.StatusBadRequest)
110+
return
111+
}
112+
113+
func plot(s *dsp.Signal, name string, color string, o ...charts.GlobalOpts) *charts.Line {
114+
x := make([]string, 0)
115+
y := make([]opts.LineData, 0)
116+
for i := 0; i < s.Num(); i++ {
117+
x = append(x, fmt.Sprintf("%.1f", float64(i)/s.SampleRate))
118+
y = append(y, opts.LineData{Value: s.Samples[i], Symbol: "none"})
119+
}
120+
121+
line := charts.NewLine()
122+
line.SetGlobalOptions(
123+
charts.WithInitializationOpts(opts.Initialization{Width: "3000px", Theme: types.ThemeRoma}),
124+
charts.WithLegendOpts(
125+
opts.Legend{
126+
Show: true,
127+
Data: name,
128+
}),
129+
charts.WithTooltipOpts(opts.Tooltip{
130+
Show: true,
131+
Trigger: "axis",
132+
TriggerOn: "mousemove",
133+
}),
134+
charts.WithTitleOpts(opts.Title{Title: s.String()}))
135+
if o != nil {
136+
line.SetGlobalOptions(o...)
137+
}
138+
line.SetXAxis(x).AddSeries(name, y, charts.WithLineStyleOpts(opts.LineStyle{Color: color}))
139+
140+
return line
141+
}
142+
143+
func plots(signals []*dsp.Signal, names []string, o ...charts.GlobalOpts) *charts.Line {
144+
if len(signals) < 1 {
145+
return nil
146+
}
147+
s := signals[0]
148+
n := signals[0].Num()
149+
x := make([]string, 0)
150+
y := make([][]opts.LineData, len(signals))
151+
for j := 0; j < len(signals); j++ {
152+
y[j] = make([]opts.LineData, 0)
153+
}
154+
for i := 0; i < n; i++ {
155+
x = append(x, fmt.Sprintf("%.1f", float64(i)/s.SampleRate))
156+
for j := 0; j < len(signals); j++ {
157+
y[j] = append(y[j], opts.LineData{Value: signals[j].Samples[i], Symbol: "none"})
158+
}
159+
}
160+
161+
line := charts.NewLine()
162+
line.SetGlobalOptions(
163+
charts.WithInitializationOpts(opts.Initialization{Width: "3000px", Theme: types.ThemeShine}),
164+
charts.WithLegendOpts(
165+
opts.Legend{
166+
Show: true,
167+
Data: names,
168+
}),
169+
charts.WithTooltipOpts(opts.Tooltip{
170+
Show: true,
171+
Trigger: "axis",
172+
TriggerOn: "mousemove",
173+
}))
174+
if o != nil {
175+
line.SetGlobalOptions(o...)
176+
}
177+
line.SetXAxis(x)
178+
for j := 0; j < len(signals); j++ {
179+
line.AddSeries(names[j], y[j], charts.WithAreaStyleOpts(
180+
opts.AreaStyle{
181+
Opacity: 0.1,
182+
}),
183+
)
184+
}
185+
return line
186+
}

pkg/server/server.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717

1818
"github.com/gocrane/crane/pkg/server/config"
1919
"github.com/gocrane/crane/pkg/server/ginwrapper"
20+
"github.com/gocrane/crane/pkg/server/handler/prediction"
2021
"github.com/gocrane/crane/pkg/server/middleware"
2122
clustersrv "github.com/gocrane/crane/pkg/server/service/cluster"
2223
dashboardsrv "github.com/gocrane/crane/pkg/server/service/dashboard"
@@ -54,6 +55,14 @@ func NewServer(cfg *config.Config) (*apiServer, error) {
5455
return server, nil
5556
}
5657

58+
func (s *apiServer) installPredictionDebugAPIs(ctx context.Context) {
59+
debugHandler := prediction.NewDebugHandler(ctx)
60+
debug := s.Group("/api/prediction/debug")
61+
{
62+
debug.GET(":namespace/:tsp", debugHandler.Display)
63+
}
64+
}
65+
5766
func (s *apiServer) installGenericAPIs() {
5867
// install metric handler
5968
if s.config.EnableMetrics {
@@ -136,7 +145,7 @@ func (s *apiServer) Run(ctx context.Context) {
136145
s.installDefaultMiddlewares()
137146
s.installGenericAPIs()
138147
s.initRouter()
139-
148+
s.installPredictionDebugAPIs(ctx)
140149
s.startGracefulShutDownManager(ctx)
141150

142151
go func() {

0 commit comments

Comments
 (0)