From 435a024e7948196efc0637fd2f74ac8ae582f957 Mon Sep 17 00:00:00 2001 From: "wangzhe.21" Date: Wed, 19 Jun 2024 11:04:27 +0800 Subject: [PATCH 1/6] feat: add npd controller --- cmd/katalyst-controller/app/controller/npd.go | 57 ++++ .../app/enablecontrollers.go | 1 + .../app/options/controller.go | 4 + cmd/katalyst-controller/app/options/npd.go | 64 ++++ go.mod | 1 + go.sum | 4 +- pkg/client/control/npd.go | 77 +++++ pkg/config/controller/controller_base.go | 2 + pkg/config/controller/npd.go | 33 ++ pkg/controller/npd/handler.go | 71 +++++ .../npd/indicator-plugin/manager.go | 127 ++++++++ pkg/controller/npd/indicator-plugin/plugin.go | 72 +++++ pkg/controller/npd/npd.go | 275 +++++++++++++++++ pkg/controller/npd/npd_indicator.go | 117 +++++++ pkg/controller/npd/npd_test.go | 290 ++++++++++++++++++ pkg/util/npd.go | 63 ++++ pkg/util/npd_test.go | 144 +++++++++ 17 files changed, 1400 insertions(+), 2 deletions(-) create mode 100644 cmd/katalyst-controller/app/controller/npd.go create mode 100644 cmd/katalyst-controller/app/options/npd.go create mode 100644 pkg/client/control/npd.go create mode 100644 pkg/config/controller/npd.go create mode 100644 pkg/controller/npd/handler.go create mode 100644 pkg/controller/npd/indicator-plugin/manager.go create mode 100644 pkg/controller/npd/indicator-plugin/plugin.go create mode 100644 pkg/controller/npd/npd.go create mode 100644 pkg/controller/npd/npd_indicator.go create mode 100644 pkg/controller/npd/npd_test.go create mode 100644 pkg/util/npd.go create mode 100644 pkg/util/npd_test.go diff --git a/cmd/katalyst-controller/app/controller/npd.go b/cmd/katalyst-controller/app/controller/npd.go new file mode 100644 index 000000000..6511720f1 --- /dev/null +++ b/cmd/katalyst-controller/app/controller/npd.go @@ -0,0 +1,57 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "fmt" + + "k8s.io/klog/v2" + + katalyst "github.com/kubewharf/katalyst-core/cmd/base" + "github.com/kubewharf/katalyst-core/pkg/config" + "github.com/kubewharf/katalyst-core/pkg/controller/npd" +) + +const ( + NPDControllerName = "npd" +) + +func StartNPDController(ctx context.Context, controlCtx *katalyst.GenericContext, + conf *config.Configuration, extraConf interface{}, _ string, +) (bool, error) { + if controlCtx == nil || conf == nil { + err := fmt.Errorf("controlCtx and controllerConf can't be nil") + klog.Error(err) + return false, err + } + + npdController, err := npd.NewNPDController( + ctx, + controlCtx, conf.GenericConfiguration, + conf.GenericControllerConfiguration, + conf.NPDConfig, + extraConf, + ) + if err != nil { + klog.Errorf("failed to new npd controller") + return false, err + } + + go npdController.Run() + return true, nil +} diff --git a/cmd/katalyst-controller/app/enablecontrollers.go b/cmd/katalyst-controller/app/enablecontrollers.go index a3ade38a7..197db0f66 100644 --- a/cmd/katalyst-controller/app/enablecontrollers.go +++ b/cmd/katalyst-controller/app/enablecontrollers.go @@ -51,6 +51,7 @@ func init() { controllerInitializers.Store(controller.VPAControllerName, ControllerStarter{Starter: controller.StartVPAController}) controllerInitializers.Store(controller.KCCControllerName, ControllerStarter{Starter: controller.StartKCCController}) controllerInitializers.Store(controller.SPDControllerName, ControllerStarter{Starter: controller.StartSPDController}) + controllerInitializers.Store(controller.NPDControllerName, ControllerStarter{Starter: controller.StartNPDController}) controllerInitializers.Store(controller.LifeCycleControllerName, ControllerStarter{Starter: controller.StartLifeCycleController}) controllerInitializers.Store(controller.MonitorControllerName, ControllerStarter{Starter: controller.StartMonitorController}) controllerInitializers.Store(controller.OvercommitControllerName, ControllerStarter{Starter: controller.StartOvercommitController}) diff --git a/cmd/katalyst-controller/app/options/controller.go b/cmd/katalyst-controller/app/options/controller.go index 220db579c..3c515c262 100644 --- a/cmd/katalyst-controller/app/options/controller.go +++ b/cmd/katalyst-controller/app/options/controller.go @@ -28,6 +28,7 @@ type ControllersOptions struct { *VPAOptions *KCCOptions *SPDOptions + *NPDOptions *LifeCycleOptions *MonitorOptions *OvercommitOptions @@ -40,6 +41,7 @@ func NewControllersOptions() *ControllersOptions { VPAOptions: NewVPAOptions(), KCCOptions: NewKCCOptions(), SPDOptions: NewSPDOptions(), + NPDOptions: NewNPDOptions(), LifeCycleOptions: NewLifeCycleOptions(), MonitorOptions: NewMonitorOptions(), OvercommitOptions: NewOvercommitOptions(), @@ -52,6 +54,7 @@ func (o *ControllersOptions) AddFlags(fss *cliflag.NamedFlagSets) { o.VPAOptions.AddFlags(fss) o.KCCOptions.AddFlags(fss) o.SPDOptions.AddFlags(fss) + o.NPDOptions.AddFlags(fss) o.LifeCycleOptions.AddFlags(fss) o.MonitorOptions.AddFlags(fss) o.OvercommitOptions.AddFlags(fss) @@ -66,6 +69,7 @@ func (o *ControllersOptions) ApplyTo(c *controllerconfig.ControllersConfiguratio errList = append(errList, o.VPAOptions.ApplyTo(c.VPAConfig)) errList = append(errList, o.KCCOptions.ApplyTo(c.KCCConfig)) errList = append(errList, o.SPDOptions.ApplyTo(c.SPDConfig)) + errList = append(errList, o.NPDOptions.ApplyTo(c.NPDConfig)) errList = append(errList, o.LifeCycleOptions.ApplyTo(c.LifeCycleConfig)) errList = append(errList, o.MonitorOptions.ApplyTo(c.MonitorConfig)) errList = append(errList, o.OvercommitOptions.ApplyTo(c.OvercommitConfig)) diff --git a/cmd/katalyst-controller/app/options/npd.go b/cmd/katalyst-controller/app/options/npd.go new file mode 100644 index 000000000..75e513efd --- /dev/null +++ b/cmd/katalyst-controller/app/options/npd.go @@ -0,0 +1,64 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package options + +import ( + cliflag "k8s.io/component-base/cli/flag" + + "github.com/kubewharf/katalyst-core/pkg/config/controller" +) + +type NPDOptions struct { + NPDIndicatorPlugins []string + EnableScopeDuplicated bool + SyncWorkers int +} + +func NewNPDOptions() *NPDOptions { + return &NPDOptions{ + NPDIndicatorPlugins: []string{}, + EnableScopeDuplicated: false, + SyncWorkers: 1, + } +} + +func (o *NPDOptions) AddFlags(fss *cliflag.NamedFlagSets) { + fs := fss.FlagSet("npd") + + fs.StringSliceVar(&o.NPDIndicatorPlugins, "npd-indicator-plugins", o.NPDIndicatorPlugins, + "A list of indicator plugins to be used") + fs.BoolVar(&o.EnableScopeDuplicated, "npd-enable-scope-duplicated", o.EnableScopeDuplicated, + "Whether metrics with the same scope can be updated by multiple plugins") + fs.IntVar(&o.SyncWorkers, "npd-sync-workers", o.SyncWorkers, + "Number of workers to sync npd status") +} + +func (o *NPDOptions) ApplyTo(c *controller.NPDConfig) error { + c.NPDIndicatorPlugins = o.NPDIndicatorPlugins + c.EnableScopeDuplicated = o.EnableScopeDuplicated + c.SyncWorkers = o.SyncWorkers + return nil +} + +func (o *NPDOptions) Config() (*controller.NPDConfig, error) { + c := &controller.NPDConfig{} + if err := o.ApplyTo(c); err != nil { + return nil, err + } + + return c, nil +} diff --git a/go.mod b/go.mod index a93a30b46..53c95ce2d 100644 --- a/go.mod +++ b/go.mod @@ -161,6 +161,7 @@ require ( ) replace ( + github.com/kubewharf/katalyst-api => github.com/zzzzhhb/katalyst-api v0.0.0-20240611072629-9a0ca7a14c1e k8s.io/api => k8s.io/api v0.24.6 k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.24.6 k8s.io/apimachinery => k8s.io/apimachinery v0.24.6 diff --git a/go.sum b/go.sum index 10f905bca..42cd6a1e7 100644 --- a/go.sum +++ b/go.sum @@ -568,8 +568,6 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/kubewharf/katalyst-api v0.5.1-0.20240702044746-be552fd7ea7d h1:6CuK3axf2B63zIkEu5XyxbaC+JArE/3Jo3QHvb+Hn0M= -github.com/kubewharf/katalyst-api v0.5.1-0.20240702044746-be552fd7ea7d/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k= github.com/kubewharf/kubelet v1.24.6-kubewharf.8 h1:2e89T/nZTgzaVhyRsZuwEdRk8V8kJXs4PRkgfeG4Ai4= github.com/kubewharf/kubelet v1.24.6-kubewharf.8/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c= github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8= @@ -920,6 +918,8 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/zzzzhhb/katalyst-api v0.0.0-20240611072629-9a0ca7a14c1e h1:+P0uj/zFw5yegDEKDz240A+LuhZqG83VXyUPo3kn8pg= +github.com/zzzzhhb/katalyst-api v0.0.0-20240611072629-9a0ca7a14c1e/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= diff --git a/pkg/client/control/npd.go b/pkg/client/control/npd.go new file mode 100644 index 000000000..966a86e91 --- /dev/null +++ b/pkg/client/control/npd.go @@ -0,0 +1,77 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package control + +import ( + "context" + "fmt" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" + clientset "github.com/kubewharf/katalyst-api/pkg/client/clientset/versioned" +) + +type NodeProfileControl interface { + CreateNPD(ctx context.Context, npd *v1alpha1.NodeProfileDescriptor, opts metav1.CreateOptions) (*v1alpha1.NodeProfileDescriptor, error) + UpdateNPDStatus(ctx context.Context, npd *v1alpha1.NodeProfileDescriptor, opts metav1.UpdateOptions) (*v1alpha1.NodeProfileDescriptor, error) + DeleteNPD(ctx context.Context, npdName string, opts metav1.DeleteOptions) error +} + +type DummyNPDControl struct{} + +func (d *DummyNPDControl) CreateNPD(ctx context.Context, npd *v1alpha1.NodeProfileDescriptor, opts metav1.CreateOptions) (*v1alpha1.NodeProfileDescriptor, error) { + return nil, nil +} + +func (d *DummyNPDControl) UpdateNPDStatus(ctx context.Context, npd *v1alpha1.NodeProfileDescriptor, opts metav1.UpdateOptions) (*v1alpha1.NodeProfileDescriptor, error) { + return nil, nil +} + +func (d *DummyNPDControl) DeleteNPD(ctx context.Context, npdName string, opts metav1.DeleteOptions) error { + return nil +} + +type NPDControlImp struct { + client clientset.Interface +} + +func NewNPDControlImp(client clientset.Interface) *NPDControlImp { + return &NPDControlImp{ + client: client, + } +} + +func (n *NPDControlImp) CreateNPD(ctx context.Context, npd *v1alpha1.NodeProfileDescriptor, opts metav1.CreateOptions) (*v1alpha1.NodeProfileDescriptor, error) { + if npd == nil { + return nil, fmt.Errorf("npd is nil") + } + + return n.client.NodeV1alpha1().NodeProfileDescriptors().Create(ctx, npd, opts) +} + +func (n *NPDControlImp) UpdateNPDStatus(ctx context.Context, npd *v1alpha1.NodeProfileDescriptor, opts metav1.UpdateOptions) (*v1alpha1.NodeProfileDescriptor, error) { + if npd == nil { + return nil, fmt.Errorf("npd is nil") + } + + return n.client.NodeV1alpha1().NodeProfileDescriptors().UpdateStatus(ctx, npd, opts) +} + +func (n *NPDControlImp) DeleteNPD(ctx context.Context, npdName string, opts metav1.DeleteOptions) error { + return n.client.NodeV1alpha1().NodeProfileDescriptors().Delete(ctx, npdName, opts) +} diff --git a/pkg/config/controller/controller_base.go b/pkg/config/controller/controller_base.go index 6e648196d..2bafd2419 100644 --- a/pkg/config/controller/controller_base.go +++ b/pkg/config/controller/controller_base.go @@ -46,6 +46,7 @@ type ControllersConfiguration struct { *VPAConfig *KCCConfig *SPDConfig + *NPDConfig *LifeCycleConfig *MonitorConfig *OvercommitConfig @@ -63,6 +64,7 @@ func NewControllersConfiguration() *ControllersConfiguration { VPAConfig: NewVPAConfig(), KCCConfig: NewKCCConfig(), SPDConfig: NewSPDConfig(), + NPDConfig: NewNPDConfig(), LifeCycleConfig: NewLifeCycleConfig(), MonitorConfig: NewMonitorConfig(), OvercommitConfig: NewOvercommitConfig(), diff --git a/pkg/config/controller/npd.go b/pkg/config/controller/npd.go new file mode 100644 index 000000000..9b83f8318 --- /dev/null +++ b/pkg/config/controller/npd.go @@ -0,0 +1,33 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +type NPDConfig struct { + NPDIndicatorPlugins []string + + EnableScopeDuplicated bool + + SyncWorkers int +} + +func NewNPDConfig() *NPDConfig { + return &NPDConfig{ + NPDIndicatorPlugins: []string{}, + EnableScopeDuplicated: false, + SyncWorkers: 1, + } +} diff --git a/pkg/controller/npd/handler.go b/pkg/controller/npd/handler.go new file mode 100644 index 000000000..638472d68 --- /dev/null +++ b/pkg/controller/npd/handler.go @@ -0,0 +1,71 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package npd + +import ( + v1 "k8s.io/api/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" +) + +func (nc *NPDController) onNodeAdd(obj interface{}) { + node, ok := obj.(*v1.Node) + if !ok { + klog.Errorf("[npd] cannot convert obj to *v1.node") + return + } + + nc.enqueueNode(node) +} + +func (nc *NPDController) onNodeUpdate(_, newObj interface{}) { + node, ok := newObj.(*v1.Node) + if !ok { + klog.Errorf("[npd] cannot convert obj to *v1.node") + return + } + + nc.enqueueNode(node) +} + +func (nc *NPDController) onNodeDelete(obj interface{}) { + node, ok := obj.(*v1.Node) + if !ok { + klog.Errorf("[npd] cannot convert obj to *v1.node") + return + } + + err := nc.deleteNPD(node.Name) + if err != nil { + klog.Errorf("delete node %v fail: %v", node.Name, err) + } +} + +func (nc *NPDController) enqueueNode(node *v1.Node) { + if node == nil { + klog.Warningf("[npd] enqueue a nil node") + return + } + + key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(node) + if err != nil { + klog.Errorf("[npd] couldn't get key for node: %v, err: %v", node.Name, err) + return + } + + nc.nodeQueue.Add(key) +} diff --git a/pkg/controller/npd/indicator-plugin/manager.go b/pkg/controller/npd/indicator-plugin/manager.go new file mode 100644 index 000000000..e209e36d3 --- /dev/null +++ b/pkg/controller/npd/indicator-plugin/manager.go @@ -0,0 +1,127 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package indicator_plugin + +import ( + "sync" + + "k8s.io/klog/v2" + + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" + "github.com/kubewharf/katalyst-core/pkg/util" +) + +const ( + indicatorStatusQueueLen = 1000 +) + +// IndicatorUpdater is used by IndicatorPlugin as a unified implementation +// to trigger indicator updating logic. +type IndicatorUpdater interface { + UpdateNodeMetrics(name string, scopedNodeMetrics []v1alpha1.ScopedNodeMetrics) + UpdatePodMetrics(nodeName string, scopedPodMetrics []v1alpha1.ScopedPodMetrics) +} + +// IndicatorGetter is used by npd controller as indicator notifier to trigger +// update real npd. +type IndicatorGetter interface { + GetNodeProfileStatusChan() chan string + GetNodeProfileStatus(name string) *v1alpha1.NodeProfileDescriptorStatus +} + +type IndicatorManager struct { + sync.Mutex + + statusQueue chan string + statusMap map[string]*v1alpha1.NodeProfileDescriptorStatus +} + +var ( + _ IndicatorUpdater = &IndicatorManager{} + _ IndicatorGetter = &IndicatorManager{} +) + +func NewIndicatorManager() *IndicatorManager { + return &IndicatorManager{ + statusMap: make(map[string]*v1alpha1.NodeProfileDescriptorStatus), + statusQueue: make(chan string, indicatorStatusQueueLen), + } +} + +func (im *IndicatorManager) UpdateNodeMetrics(name string, scopedNodeMetrics []v1alpha1.ScopedNodeMetrics) { + im.Lock() + + insert := false + if _, ok := im.statusMap[name]; !ok { + insert = true + im.statusMap[name] = initNodeProfileDescriptorStatus() + } + for _, scopedNodeMetric := range scopedNodeMetrics { + util.InsertNPDScopedNodeMetrics(im.statusMap[name], &scopedNodeMetric) + } + + im.Unlock() + + if insert { + im.statusQueue <- name + } +} + +func (im *IndicatorManager) UpdatePodMetrics(nodeName string, scopedPodMetrics []v1alpha1.ScopedPodMetrics) { + im.Lock() + + insert := false + if _, ok := im.statusMap[nodeName]; !ok { + insert = true + im.statusMap[nodeName] = initNodeProfileDescriptorStatus() + } + for _, scopedPodMetric := range scopedPodMetrics { + util.InsertNPDScopedPodMetrics(im.statusMap[nodeName], &scopedPodMetric) + } + + im.Unlock() + + if insert { + im.statusQueue <- nodeName + } +} + +func (im *IndicatorManager) GetNodeProfileStatusChan() chan string { + return im.statusQueue +} + +func (im *IndicatorManager) GetNodeProfileStatus(name string) *v1alpha1.NodeProfileDescriptorStatus { + im.Lock() + defer func() { + delete(im.statusMap, name) + im.Unlock() + }() + + status, ok := im.statusMap[name] + if !ok { + klog.Warningf("npd status doesn't exist for node: %v", name) + return nil + } + return status +} + +func initNodeProfileDescriptorStatus() *v1alpha1.NodeProfileDescriptorStatus { + return &v1alpha1.NodeProfileDescriptorStatus{ + NodeMetrics: []v1alpha1.ScopedNodeMetrics{}, + PodMetrics: []v1alpha1.ScopedPodMetrics{}, + } +} diff --git a/pkg/controller/npd/indicator-plugin/plugin.go b/pkg/controller/npd/indicator-plugin/plugin.go new file mode 100644 index 000000000..d2e2c9c5a --- /dev/null +++ b/pkg/controller/npd/indicator-plugin/plugin.go @@ -0,0 +1,72 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package indicator_plugin + +import ( + "context" + "sync" + + katalystbase "github.com/kubewharf/katalyst-core/cmd/base" + "github.com/kubewharf/katalyst-core/pkg/config/controller" +) + +type IndicatorPlugin interface { + Run() + Name() string + + GetSupportedNodeMetricsScope() []string + GetSupportedPodMetricsScope() []string +} + +var pluginInitializers sync.Map + +type InitFunc func(ctx context.Context, conf *controller.NPDConfig, extraConf interface{}, + controlCtx *katalystbase.GenericContext, updater IndicatorUpdater) (IndicatorPlugin, error) + +// RegisterPluginInitializer is used to register user-defined indicator plugins +func RegisterPluginInitializer(name string, initFunc InitFunc) { + pluginInitializers.Store(name, initFunc) +} + +// GetPluginInitializers returns initialized functions of indicator plugins +func GetPluginInitializers() map[string]InitFunc { + plugins := make(map[string]InitFunc) + pluginInitializers.Range(func(key, value any) bool { + plugins[key.(string)] = value.(InitFunc) + return true + }) + return plugins +} + +type DummyIndicatorPlugin struct { + NodeMetricsScopes []string + PodMetricsScopes []string +} + +var _ IndicatorPlugin = DummyIndicatorPlugin{} + +func (d DummyIndicatorPlugin) Run() {} + +func (d DummyIndicatorPlugin) Name() string { return "dummy-indicator-plugin" } + +func (d DummyIndicatorPlugin) GetSupportedNodeMetricsScope() []string { + return d.NodeMetricsScopes +} + +func (d DummyIndicatorPlugin) GetSupportedPodMetricsScope() []string { + return d.PodMetricsScopes +} diff --git a/pkg/controller/npd/npd.go b/pkg/controller/npd/npd.go new file mode 100644 index 000000000..4a4aff16b --- /dev/null +++ b/pkg/controller/npd/npd.go @@ -0,0 +1,275 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package npd + +import ( + "context" + "fmt" + "time" + + "github.com/kubewharf/katalyst-core/pkg/metrics" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/wait" + + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + corev1 "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" + + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" + npdlisters "github.com/kubewharf/katalyst-api/pkg/client/listers/node/v1alpha1" + katalystbase "github.com/kubewharf/katalyst-core/cmd/base" + "github.com/kubewharf/katalyst-core/pkg/client/control" + "github.com/kubewharf/katalyst-core/pkg/config/controller" + "github.com/kubewharf/katalyst-core/pkg/config/generic" + indicator_plugin "github.com/kubewharf/katalyst-core/pkg/controller/npd/indicator-plugin" +) + +const npdControllerName = "npd" + +type NPDController struct { + ctx context.Context + + conf *controller.NPDConfig + + npdControl control.NodeProfileControl + npdLister npdlisters.NodeProfileDescriptorLister + nodeLister corev1.NodeLister + + indicatorManager indicator_plugin.IndicatorGetter + indicatorPlugins map[string]indicator_plugin.IndicatorPlugin + supportedNodeScopes map[string]struct{} + supportedPodScopes map[string]struct{} + + nodeQueue workqueue.RateLimitingInterface + + metricsEmitter metrics.MetricEmitter + syncedFunc []cache.InformerSynced +} + +func NewNPDController( + ctx context.Context, + controlCtx *katalystbase.GenericContext, + genericConf *generic.GenericConfiguration, + _ *controller.GenericControllerConfiguration, + conf *controller.NPDConfig, + extraConf interface{}, +) (*NPDController, error) { + nodeInformer := controlCtx.KubeInformerFactory.Core().V1().Nodes() + npdInformer := controlCtx.InternalInformerFactory.Node().V1alpha1().NodeProfileDescriptors() + + npdController := &NPDController{ + ctx: ctx, + conf: conf, + npdControl: &control.DummyNPDControl{}, + nodeQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "npd"), + metricsEmitter: controlCtx.EmitterPool.GetDefaultMetricsEmitter().WithTags(npdControllerName), + } + + npdController.nodeLister = nodeInformer.Lister() + npdController.syncedFunc = append(npdController.syncedFunc, nodeInformer.Informer().HasSynced) + + npdController.npdLister = npdInformer.Lister() + npdController.syncedFunc = append(npdController.syncedFunc, npdInformer.Informer().HasSynced) + + nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: npdController.onNodeAdd, + UpdateFunc: npdController.onNodeUpdate, + DeleteFunc: npdController.onNodeDelete, + }) + + if !genericConf.DryRun { + npdController.npdControl = control.NewNPDControlImp(controlCtx.Client.InternalClient) + } + + if err := npdController.initializeIndicatorPlugins(controlCtx, extraConf); err != nil { + return nil, err + } + + return npdController, nil +} + +func (nc *NPDController) Run() { + defer utilruntime.HandleCrash() + defer nc.nodeQueue.ShutDown() + defer klog.Infof("shutting down %s controller", npdControllerName) + + if !cache.WaitForCacheSync(nc.ctx.Done(), nc.syncedFunc...) { + utilruntime.HandleError(fmt.Errorf("unable to sync caches for %s controller", npdControllerName)) + return + } + klog.Infof("caches are synced for %s controller", npdControllerName) + + go wait.Until(nc.nodeWorker, time.Second, nc.ctx.Done()) + + for i := 0; i < nc.conf.SyncWorkers; i++ { + go wait.Until(nc.syncIndicatorStatus, time.Second, nc.ctx.Done()) + } + + go wait.Until(nc.cleanNPD, time.Hour, nc.ctx.Done()) + + <-nc.ctx.Done() +} + +func (nc *NPDController) initializeIndicatorPlugins(controlCtx *katalystbase.GenericContext, extraConf interface{}) error { + indicatorManager := indicator_plugin.NewIndicatorManager() + nc.indicatorManager = indicatorManager + nc.indicatorPlugins = make(map[string]indicator_plugin.IndicatorPlugin) + nc.supportedNodeScopes = make(map[string]struct{}) + nc.supportedPodScopes = make(map[string]struct{}) + + initializers := indicator_plugin.GetPluginInitializers() + for _, pluginName := range nc.conf.NPDIndicatorPlugins { + if initFunc, ok := initializers[pluginName]; ok { + plugin, err := initFunc(nc.ctx, nc.conf, extraConf, controlCtx, indicatorManager) + if err != nil { + return err + } + + klog.Infof("[npd] init indicator plugin: %v", pluginName) + nc.indicatorPlugins[pluginName] = plugin + + for _, scope := range plugin.GetSupportedNodeMetricsScope() { + if _, ok := nc.supportedNodeScopes[scope]; !ok { + nc.supportedNodeScopes[scope] = struct{}{} + } else { + if nc.conf.EnableScopeDuplicated { + klog.Warningf("[npd] node scope %v is supported by multi plugins, metrics might be overwrite", scope) + } else { + err := fmt.Errorf("[npd] node scope %v is supported by multi plugins", scope) + klog.Error(err) + return err + } + } + } + + for _, scope := range plugin.GetSupportedPodMetricsScope() { + if _, ok := nc.supportedPodScopes[scope]; !ok { + nc.supportedPodScopes[scope] = struct{}{} + } else { + if nc.conf.EnableScopeDuplicated { + klog.Warningf("[npd] pod scope %v is supported by multi plugins, metrics might be overwrite", scope) + } else { + err := fmt.Errorf("[npd] pod scope %v is supported by multi plugins", scope) + klog.Error(err) + return err + } + } + } + } + } + + return nil +} + +func (nc *NPDController) nodeWorker() { + for nc.processNextNode() { + } +} + +func (nc *NPDController) processNextNode() bool { + key, quit := nc.nodeQueue.Get() + if quit { + return false + } + defer nc.nodeQueue.Done(key) + + err := nc.syncNode(key.(string)) + if err == nil { + nc.nodeQueue.Forget(key) + return true + } + + utilruntime.HandleError(fmt.Errorf("sync %v fail with %v", key, err)) + nc.nodeQueue.AddRateLimited(key) + + return true +} + +func (nc *NPDController) syncNode(key string) error { + _, nodeName, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + klog.Errorf("[npd] faild to split key %v: %v", key, err) + return err + } + + _, err = nc.getOrCreateNPDForNode(nodeName) + if err != nil { + klog.Errorf("getOrCreateNPDForNode %v fail: %v", nodeName, err) + return err + } + + return nil +} + +func (nc *NPDController) getOrCreateNPDForNode(nodeName string) (*v1alpha1.NodeProfileDescriptor, error) { + npd, err := nc.npdLister.Get(nodeName) + if err == nil { + return npd, nil + } + + if errors.IsNotFound(err) { + npd := &v1alpha1.NodeProfileDescriptor{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + }, + Spec: v1alpha1.NodeProfileDescriptorSpec{}, + Status: v1alpha1.NodeProfileDescriptorStatus{ + NodeMetrics: []v1alpha1.ScopedNodeMetrics{}, + PodMetrics: []v1alpha1.ScopedPodMetrics{}, + }, + } + return nc.npdControl.CreateNPD(nc.ctx, npd, metav1.CreateOptions{}) + } else { + err = fmt.Errorf("get npd %v fail: %v", nodeName, err) + return nil, err + } +} + +func (nc *NPDController) cleanNPD() { + npdList, err := nc.npdLister.List(labels.Everything()) + if err != nil { + klog.Errorf("[npd] failed to list npd") + return + } + + for _, npd := range npdList { + _, err := nc.nodeLister.Get(npd.Name) + if err == nil { + continue + } + + if err != nil { + if errors.IsNotFound(err) { + // delete npd + err = nc.deleteNPD(npd.Name) + if err != nil { + klog.Errorf("[npd] delete npd %v fail: %v", npd.Name, err) + } + } else { + klog.Errorf("[npd] get node %v fail: %v", npd.Name, err) + } + } + } +} + +func (nc *NPDController) deleteNPD(nodeName string) error { + return nc.npdControl.DeleteNPD(nc.ctx, nodeName, metav1.DeleteOptions{}) +} diff --git a/pkg/controller/npd/npd_indicator.go b/pkg/controller/npd/npd_indicator.go new file mode 100644 index 000000000..d58acdedc --- /dev/null +++ b/pkg/controller/npd/npd_indicator.go @@ -0,0 +1,117 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package npd + +import ( + apiequality "k8s.io/apimachinery/pkg/api/equality" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/util/retry" + "k8s.io/klog/v2" + + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" + "github.com/kubewharf/katalyst-core/pkg/metrics" + "github.com/kubewharf/katalyst-core/pkg/util" +) + +const ( + metricsNameSyncNPDStatus = "sync_npd_status" +) + +func (nc *NPDController) syncIndicatorStatus() { + c := nc.indicatorManager.GetNodeProfileStatusChan() + for { + select { + case nodeName, ok := <-c: + if !ok { + klog.Infof("[npd] indicator status chan is closed") + return + } + + nc.syncStatus(nodeName) + case <-nc.ctx.Done(): + klog.Infoln("[npd] stop sync status.") + return + } + } +} + +func (nc *NPDController) syncStatus(nodeName string) { + klog.V(6).Infof("[npd] sync node %v npd status", nodeName) + + status := nc.indicatorManager.GetNodeProfileStatus(nodeName) + if status == nil { + klog.Warningf("[npd] get node %v npd status nil", nodeName) + return + } + + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + npd, err := nc.npdLister.Get(nodeName) + if err != nil { + klog.Errorf("[npd] failed to get npd %v: %v", nodeName, err) + return err + } + + npdCopy := npd.DeepCopy() + nc.mergeIndicatorStatus(npdCopy, *status) + if apiequality.Semantic.DeepEqual(npd.Status, npdCopy.Status) { + return nil + } + + _, err = nc.npdControl.UpdateNPDStatus(nc.ctx, npdCopy, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("[npd] failed to update npd status for node %v: %v", nodeName, err) + return err + } + + klog.V(10).Infof("[npd] node %v npd status update to %+v", nodeName, npdCopy.Status) + _ = nc.metricsEmitter.StoreInt64(metricsNameSyncNPDStatus, 1, metrics.MetricTypeNameCount, metrics.MetricTag{ + Key: "status", Val: "success", + }) + + return nil + }) + if err != nil { + klog.Errorf("[npd] faild to update npd status for node %v: %v", nodeName, err) + _ = nc.metricsEmitter.StoreInt64(metricsNameSyncNPDStatus, 1, metrics.MetricTypeNameCount, metrics.MetricTag{ + Key: "status", Val: "failed", + }) + } +} + +func (nc *NPDController) mergeIndicatorStatus(npd *v1alpha1.NodeProfileDescriptor, expected v1alpha1.NodeProfileDescriptorStatus) { + for _, nodeMetric := range expected.NodeMetrics { + util.InsertNPDScopedNodeMetrics(&npd.Status, &nodeMetric) + } + + for _, podMetric := range expected.PodMetrics { + util.InsertNPDScopedPodMetrics(&npd.Status, &podMetric) + } + + for i := 0; i < len(npd.Status.NodeMetrics); i++ { + if _, ok := nc.supportedNodeScopes[npd.Status.NodeMetrics[i].Scope]; !ok { + klog.Infof("skip npd %v node metric with unsupported scope %v", npd.Name, npd.Status.NodeMetrics[i].Scope) + npd.Status.NodeMetrics = append(npd.Status.NodeMetrics[:i], npd.Status.NodeMetrics[i+1:]...) + } + } + + for i := 0; i < len(npd.Status.PodMetrics); i++ { + if _, ok := nc.supportedPodScopes[npd.Status.PodMetrics[i].Scope]; !ok { + klog.Infof("skip npd %v pod metric with unsupported scope %v", npd.Name, npd.Status.PodMetrics[i].Scope) + npd.Status.PodMetrics = append(npd.Status.PodMetrics[:i], npd.Status.PodMetrics[i+1:]...) + } + } +} diff --git a/pkg/controller/npd/npd_test.go b/pkg/controller/npd/npd_test.go new file mode 100644 index 000000000..cd8ba7b4f --- /dev/null +++ b/pkg/controller/npd/npd_test.go @@ -0,0 +1,290 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package npd + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + v12 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/cache" + + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" + katalystbase "github.com/kubewharf/katalyst-core/cmd/base" + "github.com/kubewharf/katalyst-core/pkg/config/controller" + "github.com/kubewharf/katalyst-core/pkg/config/generic" + indicator_plugin "github.com/kubewharf/katalyst-core/pkg/controller/npd/indicator-plugin" +) + +func TestIndicatorUpdater(t *testing.T) { + t.Parallel() + + ctx := context.TODO() + npdConfig := &controller.NPDConfig{ + NPDIndicatorPlugins: []string{"plugin1", "plugin2"}, + SyncWorkers: 1, + EnableScopeDuplicated: false, + } + genericConfig := &generic.GenericConfiguration{} + + nodes := []*v1.Node{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "node1", + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "node2", + }, + }, + } + npd := &v1alpha1.NodeProfileDescriptor{ + ObjectMeta: v12.ObjectMeta{ + Name: "node1", + }, + Spec: v1alpha1.NodeProfileDescriptorSpec{}, + Status: v1alpha1.NodeProfileDescriptorStatus{ + NodeMetrics: []v1alpha1.ScopedNodeMetrics{}, + PodMetrics: []v1alpha1.ScopedPodMetrics{}, + }, + } + controlCtx, err := katalystbase.GenerateFakeGenericContext([]runtime.Object{nodes[0], nodes[1]}, + []runtime.Object{npd}, []runtime.Object{}) + assert.NoError(t, err) + + // register plugins + indicator_plugin.RegisterPluginInitializer("plugin1", func(ctx context.Context, conf *controller.NPDConfig, extraConf interface{}, controlCtx *katalystbase.GenericContext, updater indicator_plugin.IndicatorUpdater) (indicator_plugin.IndicatorPlugin, error) { + return indicator_plugin.DummyIndicatorPlugin{ + NodeMetricsScopes: []string{ + "scope1", "scope2", + }, + PodMetricsScopes: []string{ + "scope1", "scope2", + }, + }, nil + }) + indicator_plugin.RegisterPluginInitializer("plugin2", func(ctx context.Context, conf *controller.NPDConfig, extraConf interface{}, controlCtx *katalystbase.GenericContext, updater indicator_plugin.IndicatorUpdater) (indicator_plugin.IndicatorPlugin, error) { + return indicator_plugin.DummyIndicatorPlugin{ + NodeMetricsScopes: []string{ + "scope3", "scope4", + }, + PodMetricsScopes: []string{ + "scope3", "scope4", + }, + }, nil + }) + + npdController, err := NewNPDController(ctx, controlCtx, genericConfig, nil, npdConfig, nil) + assert.NoError(t, err) + + controlCtx.StartInformer(ctx) + go npdController.Run() + timeout, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + synced := cache.WaitForCacheSync(timeout.Done(), npdController.syncedFunc...) + assert.True(t, synced) + + manager := npdController.indicatorManager.(*indicator_plugin.IndicatorManager) + manager.UpdateNodeMetrics("node1", []v1alpha1.ScopedNodeMetrics{ + { + Scope: "scope1", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("1"), + }, + }, + }, + }) + manager.UpdateNodeMetrics("node1", []v1alpha1.ScopedNodeMetrics{ + { + Scope: "scope2", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("2"), + }, + }, + }, + }) + manager.UpdatePodMetrics("node1", []v1alpha1.ScopedPodMetrics{ + { + Scope: "scope1", + PodMetrics: []v1alpha1.PodMetric{ + { + Namespace: "default", + Name: "pod1", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("1"), + }, + }, + }, + }, + }, + }) + manager.UpdatePodMetrics("node2", []v1alpha1.ScopedPodMetrics{ + { + Scope: "scope4", + PodMetrics: []v1alpha1.PodMetric{ + { + Namespace: "default", + Name: "pod2", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("1"), + }, + }, + }, + }, + }, + }) + manager.UpdateNodeMetrics("node2", []v1alpha1.ScopedNodeMetrics{ + { + Scope: "scope3", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "memory", + Value: resource.MustParse("8Gi"), + }, + }, + }, + }) + manager.UpdateNodeMetrics("node1", []v1alpha1.ScopedNodeMetrics{ + { + Scope: "unsupported", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "memory", + Value: resource.MustParse("8Gi"), + }, + }, + }, + }) + manager.UpdatePodMetrics("node2", []v1alpha1.ScopedPodMetrics{ + { + Scope: "unsupported", + PodMetrics: []v1alpha1.PodMetric{ + { + Namespace: "default", + Name: "pod2", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("1"), + }, + }, + }, + }, + }, + }) + + expectedNPD1 := v1alpha1.NodeProfileDescriptorStatus{ + NodeMetrics: []v1alpha1.ScopedNodeMetrics{ + { + Scope: "scope1", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("1"), + }, + }, + }, + { + Scope: "scope2", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("2"), + }, + }, + }, + }, + PodMetrics: []v1alpha1.ScopedPodMetrics{ + { + Scope: "scope1", + PodMetrics: []v1alpha1.PodMetric{ + { + Namespace: "default", + Name: "pod1", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("1"), + }, + }, + }, + }, + }, + }, + } + expectedNPD2 := v1alpha1.NodeProfileDescriptorStatus{ + NodeMetrics: []v1alpha1.ScopedNodeMetrics{ + { + Scope: "scope3", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "memory", + Value: resource.MustParse("8Gi"), + }, + }, + }, + }, + PodMetrics: []v1alpha1.ScopedPodMetrics{ + { + Scope: "scope4", + PodMetrics: []v1alpha1.PodMetric{ + { + Namespace: "default", + Name: "pod2", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("1"), + }, + }, + }, + }, + }, + }, + } + + time.Sleep(time.Second) + node1NPD, err := controlCtx.Client.InternalClient.NodeV1alpha1().NodeProfileDescriptors().Get(ctx, "node1", v12.GetOptions{}) + assert.NoError(t, err) + assert.Equal(t, expectedNPD1, node1NPD.Status) + + node2NPD, err := controlCtx.Client.InternalClient.NodeV1alpha1().NodeProfileDescriptors().Get(ctx, "node2", v12.GetOptions{}) + assert.NoError(t, err) + assert.Equal(t, expectedNPD2, node2NPD.Status) + + err = controlCtx.Client.KubeClient.CoreV1().Nodes().Delete(ctx, "node2", v12.DeleteOptions{}) + assert.NoError(t, err) + time.Sleep(time.Second) + _, err = controlCtx.Client.InternalClient.NodeV1alpha1().NodeProfileDescriptors().Get(ctx, "node2", v12.GetOptions{}) + assert.Error(t, err) + assert.True(t, errors.IsNotFound(err)) +} diff --git a/pkg/util/npd.go b/pkg/util/npd.go new file mode 100644 index 000000000..95115efe7 --- /dev/null +++ b/pkg/util/npd.go @@ -0,0 +1,63 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" + +func InsertNPDScopedNodeMetrics( + status *v1alpha1.NodeProfileDescriptorStatus, + scopedNodeMetrics *v1alpha1.ScopedNodeMetrics, +) { + if status == nil || scopedNodeMetrics == nil { + return + } + + if status.NodeMetrics == nil { + status.NodeMetrics = []v1alpha1.ScopedNodeMetrics{} + } + + for i := range status.NodeMetrics { + if status.NodeMetrics[i].Scope == scopedNodeMetrics.Scope { + status.NodeMetrics[i].Metrics = scopedNodeMetrics.Metrics + return + } + } + + status.NodeMetrics = append(status.NodeMetrics, *scopedNodeMetrics) +} + +func InsertNPDScopedPodMetrics( + status *v1alpha1.NodeProfileDescriptorStatus, + scopedPodMetrics *v1alpha1.ScopedPodMetrics, +) { + if status == nil || scopedPodMetrics == nil { + return + } + + if status.PodMetrics == nil { + status.PodMetrics = []v1alpha1.ScopedPodMetrics{} + } + + for i := range status.PodMetrics { + if status.PodMetrics[i].Scope == scopedPodMetrics.Scope { + status.PodMetrics[i].PodMetrics = scopedPodMetrics.PodMetrics + return + } + } + + status.PodMetrics = append(status.PodMetrics, *scopedPodMetrics) +} diff --git a/pkg/util/npd_test.go b/pkg/util/npd_test.go new file mode 100644 index 000000000..17728cefd --- /dev/null +++ b/pkg/util/npd_test.go @@ -0,0 +1,144 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "k8s.io/apimachinery/pkg/api/resource" + + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" +) + +func TestInsertNPDScopedNodeMetrics(t *testing.T) { + t.Parallel() + + status := &v1alpha1.NodeProfileDescriptorStatus{ + NodeMetrics: []v1alpha1.ScopedNodeMetrics{ + { + Scope: "test", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("1"), + }, + }, + }, + }, + } + metrics := &v1alpha1.ScopedNodeMetrics{ + Scope: "test", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("2"), + }, + }, + } + expectedStatus := v1alpha1.NodeProfileDescriptorStatus{ + NodeMetrics: []v1alpha1.ScopedNodeMetrics{ + { + Scope: "test", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("2"), + }, + }, + }, + }, + } + + InsertNPDScopedNodeMetrics(status, metrics) + assert.Equal(t, expectedStatus, *status) + + InsertNPDScopedNodeMetrics(status, nil) + assert.Equal(t, expectedStatus, *status) +} + +func TestInsertNPDScopedPodMetrics(t *testing.T) { + t.Parallel() + + status := &v1alpha1.NodeProfileDescriptorStatus{ + PodMetrics: []v1alpha1.ScopedPodMetrics{ + { + Scope: "test", + PodMetrics: []v1alpha1.PodMetric{ + { + Name: "testPod", + Namespace: "default", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("1"), + }, + }, + }, + }, + }, + }, + } + metrics := &v1alpha1.ScopedPodMetrics{ + Scope: "test", + PodMetrics: []v1alpha1.PodMetric{ + { + Name: "testPod", + Namespace: "default", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("2"), + }, + { + MetricName: "memory", + Value: resource.MustParse("4Gi"), + }, + }, + }, + }, + } + expectedStatus := v1alpha1.NodeProfileDescriptorStatus{ + PodMetrics: []v1alpha1.ScopedPodMetrics{ + { + Scope: "test", + PodMetrics: []v1alpha1.PodMetric{ + { + Name: "testPod", + Namespace: "default", + Metrics: []v1alpha1.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("2"), + }, + { + MetricName: "memory", + Value: resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + }, + } + + InsertNPDScopedPodMetrics(status, metrics) + assert.Equal(t, expectedStatus, *status) + + InsertNPDScopedPodMetrics(status, nil) + assert.Equal(t, expectedStatus, *status) +} From 839501c3b6d63c152f5f57129a41155ebd96210d Mon Sep 17 00:00:00 2001 From: "wangzhe.21" Date: Fri, 21 Jun 2024 10:57:29 +0800 Subject: [PATCH 2/6] fix: run all plugins when npdController run --- pkg/controller/npd/npd.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/controller/npd/npd.go b/pkg/controller/npd/npd.go index 4a4aff16b..f6da24f21 100644 --- a/pkg/controller/npd/npd.go +++ b/pkg/controller/npd/npd.go @@ -119,6 +119,9 @@ func (nc *NPDController) Run() { go wait.Until(nc.nodeWorker, time.Second, nc.ctx.Done()) + for _, plugin := range nc.indicatorPlugins { + go plugin.Run() + } for i := 0; i < nc.conf.SyncWorkers; i++ { go wait.Until(nc.syncIndicatorStatus, time.Second, nc.ctx.Done()) } From d5633b9da38e0f427922c1fd3c4d4b4008bb6962 Mon Sep 17 00:00:00 2001 From: "wangzhe.21" Date: Thu, 20 Jun 2024 15:59:51 +0800 Subject: [PATCH 3/6] add npd loadAware plugin --- cmd/katalyst-controller/app/options/npd.go | 61 +- pkg/client/genericclient.go | 8 + pkg/config/controller/npd.go | 21 + .../npd/indicator-plugin/loadaware/handler.go | 159 +++++ .../npd/indicator-plugin/loadaware/helper.go | 177 +++++ .../indicator-plugin/loadaware/loadaware.go | 660 ++++++++++++++++++ .../loadaware/sorter/helper.go | 105 +++ .../indicator-plugin/loadaware/sorter/pod.go | 29 + .../loadaware/sorter/pod_test.go | 65 ++ .../loadaware/sorter/scorer.go | 43 ++ .../npd/indicator-plugin/loadaware/types.go | 82 +++ pkg/controller/npd/npd.go | 8 +- pkg/util/native/object.go | 10 + 13 files changed, 1421 insertions(+), 7 deletions(-) create mode 100644 pkg/controller/npd/indicator-plugin/loadaware/handler.go create mode 100644 pkg/controller/npd/indicator-plugin/loadaware/helper.go create mode 100644 pkg/controller/npd/indicator-plugin/loadaware/loadaware.go create mode 100644 pkg/controller/npd/indicator-plugin/loadaware/sorter/helper.go create mode 100644 pkg/controller/npd/indicator-plugin/loadaware/sorter/pod.go create mode 100644 pkg/controller/npd/indicator-plugin/loadaware/sorter/pod_test.go create mode 100644 pkg/controller/npd/indicator-plugin/loadaware/sorter/scorer.go create mode 100644 pkg/controller/npd/indicator-plugin/loadaware/types.go diff --git a/cmd/katalyst-controller/app/options/npd.go b/cmd/katalyst-controller/app/options/npd.go index 75e513efd..22dfa2244 100644 --- a/cmd/katalyst-controller/app/options/npd.go +++ b/cmd/katalyst-controller/app/options/npd.go @@ -18,6 +18,7 @@ package options import ( cliflag "k8s.io/component-base/cli/flag" + "time" "github.com/kubewharf/katalyst-core/pkg/config/controller" ) @@ -26,13 +27,15 @@ type NPDOptions struct { NPDIndicatorPlugins []string EnableScopeDuplicated bool SyncWorkers int + *LoadAwarePluginOptions } func NewNPDOptions() *NPDOptions { return &NPDOptions{ - NPDIndicatorPlugins: []string{}, - EnableScopeDuplicated: false, - SyncWorkers: 1, + NPDIndicatorPlugins: []string{}, + EnableScopeDuplicated: false, + SyncWorkers: 1, + LoadAwarePluginOptions: NewLoadAwarePluginOptions(), } } @@ -45,12 +48,36 @@ func (o *NPDOptions) AddFlags(fss *cliflag.NamedFlagSets) { "Whether metrics with the same scope can be updated by multiple plugins") fs.IntVar(&o.SyncWorkers, "npd-sync-workers", o.SyncWorkers, "Number of workers to sync npd status") + + fs.IntVar(&o.Workers, "loadaware-sync-workers", o.Workers, + "num of workers to sync node metrics") + fs.DurationVar(&o.SyncMetricInterval, "loadaware-sync-interval", o.SyncMetricInterval, + "interval of syncing node metrics") + fs.DurationVar(&o.ListMetricTimeout, "loadaware-list-metric-timeout", o.ListMetricTimeout, + "timeout duration when list metrics from metrics server") + + fs.StringVar(&o.PodUsageSelectorNamespace, "loadaware-podusage-selector-namespace", o.PodUsageSelectorNamespace, + "pod namespace used to detect whether podusage should be calculated") + fs.StringVar(&o.PodUsageSelectorKey, "loadaware-podusage-selector-key", o.PodUsageSelectorKey, + "pod label key used to detect whether podusage should be calculated") + fs.StringVar(&o.PodUsageSelectorVal, "loadaware-podusage-selector-val", o.PodUsageSelectorVal, + "pod label value used to detect whether podusage should be calculated") + fs.IntVar(&o.MaxPodUsageCount, "loadaware-max-podusage-count", o.MaxPodUsageCount, + "max podusage count on nodemonitor") } func (o *NPDOptions) ApplyTo(c *controller.NPDConfig) error { c.NPDIndicatorPlugins = o.NPDIndicatorPlugins c.EnableScopeDuplicated = o.EnableScopeDuplicated c.SyncWorkers = o.SyncWorkers + + c.Workers = o.Workers + c.SyncMetricInterval = o.SyncMetricInterval + c.ListMetricTimeout = o.ListMetricTimeout + c.PodUsageSelectorNamespace = o.PodUsageSelectorNamespace + c.PodUsageSelectorKey = o.PodUsageSelectorKey + c.PodUsageSelectorVal = o.PodUsageSelectorVal + c.MaxPodUsageCount = o.MaxPodUsageCount return nil } @@ -62,3 +89,31 @@ func (o *NPDOptions) Config() (*controller.NPDConfig, error) { return c, nil } + +type LoadAwarePluginOptions struct { + // number of workers to sync node metrics + Workers int + // time interval of sync node metrics + SyncMetricInterval time.Duration + // timeout of list metrics from apiserver + ListMetricTimeout time.Duration + + // pod selector for checking if pod usage is required + PodUsageSelectorNamespace string + PodUsageSelectorKey string + PodUsageSelectorVal string + + MaxPodUsageCount int +} + +func NewLoadAwarePluginOptions() *LoadAwarePluginOptions { + return &LoadAwarePluginOptions{ + Workers: 3, + SyncMetricInterval: time.Minute * 1, + ListMetricTimeout: time.Second * 10, + PodUsageSelectorNamespace: "", + PodUsageSelectorKey: "", + PodUsageSelectorVal: "", + MaxPodUsageCount: 20, + } +} diff --git a/pkg/client/genericclient.go b/pkg/client/genericclient.go index 6b404341d..0b82fbc3e 100644 --- a/pkg/client/genericclient.go +++ b/pkg/client/genericclient.go @@ -31,6 +31,7 @@ import ( "k8s.io/client-go/tools/clientcmd" componentbaseconfig "k8s.io/component-base/config" aggregator "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset" + "k8s.io/metrics/pkg/client/clientset/versioned" "k8s.io/metrics/pkg/client/custom_metrics" customclient "k8s.io/metrics/pkg/client/custom_metrics" cmfake "k8s.io/metrics/pkg/client/custom_metrics/fake" @@ -52,6 +53,7 @@ type GenericClientSet struct { DynamicClient dynamic.Interface DiscoveryClient discovery.DiscoveryInterface AggregatorClient aggregator.Interface + MetricClient versioned.Interface CustomClient customclient.CustomMetricsClient ExternalClient externalclient.ExternalMetricsClient @@ -97,6 +99,11 @@ func newForConfig(cfg *rest.Config) (*GenericClientSet, error) { return nil, err } + metricClient, err := versioned.NewForConfig(cfg) + if err != nil { + return nil, err + } + return &GenericClientSet{ cfg: cfg, MetaClient: metaClient, @@ -104,6 +111,7 @@ func newForConfig(cfg *rest.Config) (*GenericClientSet, error) { InternalClient: internalClient, DynamicClient: dynamicClient, DiscoveryClient: discoveryClient, + MetricClient: metricClient, CustomClient: &cmfake.FakeCustomMetricsClient{}, ExternalClient: &emfake.FakeExternalMetricsClient{}, diff --git a/pkg/config/controller/npd.go b/pkg/config/controller/npd.go index 9b83f8318..467359e2e 100644 --- a/pkg/config/controller/npd.go +++ b/pkg/config/controller/npd.go @@ -16,12 +16,16 @@ limitations under the License. package controller +import "time" + type NPDConfig struct { NPDIndicatorPlugins []string EnableScopeDuplicated bool SyncWorkers int + + *LoadAwarePluginConfig } func NewNPDConfig() *NPDConfig { @@ -29,5 +33,22 @@ func NewNPDConfig() *NPDConfig { NPDIndicatorPlugins: []string{}, EnableScopeDuplicated: false, SyncWorkers: 1, + LoadAwarePluginConfig: &LoadAwarePluginConfig{}, } } + +type LoadAwarePluginConfig struct { + // number of workers to sync node metrics + Workers int + // time interval of sync node metrics + SyncMetricInterval time.Duration + // timeout of list metrics from apiserver + ListMetricTimeout time.Duration + + // pod selector for checking if pod usage is required + PodUsageSelectorNamespace string + PodUsageSelectorKey string + PodUsageSelectorVal string + + MaxPodUsageCount int +} diff --git a/pkg/controller/npd/indicator-plugin/loadaware/handler.go b/pkg/controller/npd/indicator-plugin/loadaware/handler.go new file mode 100644 index 000000000..b9884af7b --- /dev/null +++ b/pkg/controller/npd/indicator-plugin/loadaware/handler.go @@ -0,0 +1,159 @@ +package loadaware + +import ( + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" + + "github.com/kubewharf/katalyst-core/pkg/util/native" +) + +func (p *Plugin) OnNodeAdd(obj interface{}) { + node, ok := obj.(*v1.Node) + if !ok { + klog.Error("transfer to v1.Node error") + } + klog.V(5).Infof("OnNodeAdd node %v add event", node.Name) + p.Lock() + defer p.Unlock() + bucketID := p.getBucketID(node.Name) + if pool, ok := p.nodePoolMap[bucketID]; ok { + pool.Insert(node.Name) + } else { + pool = sets.NewString(node.Name) + p.nodePoolMap[bucketID] = pool + } + metricData, exist := p.nodeStatDataMap[node.Name] + if !exist { + metricData = &NodeMetricData{} + p.nodeStatDataMap[node.Name] = metricData + } + metricData.TotalRes = node.Status.Allocatable.DeepCopy() +} + +func (p *Plugin) OnNodeUpdate(_, obj interface{}) { + node, ok := obj.(*v1.Node) + if !ok { + klog.Error("transfer to v1.Node error") + } + klog.V(5).Infof("OnNodeUpdate node %v update event", node.Name) + p.Lock() + defer p.Unlock() + metricData, exist := p.nodeStatDataMap[node.Name] + if !exist { + metricData = &NodeMetricData{} + p.nodeStatDataMap[node.Name] = metricData + } + metricData.TotalRes = node.Status.Allocatable.DeepCopy() +} + +func (p *Plugin) OnNodeDelete(obj interface{}) { + node, ok := obj.(*v1.Node) + if !ok { + klog.Error("transfer to v1.Node error") + } + klog.V(5).Infof("OnNodeDelete node %v delete event", node.Name) + p.Lock() + bucketID := p.getBucketID(node.Name) + if pool, ok := p.nodePoolMap[bucketID]; ok { + pool.Delete(node.Name) + } + delete(p.nodeStatDataMap, node.Name) + p.Unlock() +} + +func (p *Plugin) OnPodAdd(obj interface{}) { + pod, ok := obj.(*v1.Pod) + if !ok { + klog.Error("transfer to v1.Pod error") + } + klog.V(5).Infof("OnPodAdd pod %v add event", pod.Name) + p.Lock() + defer p.Unlock() + if p.podUsageSelectorKey != "" { + if value, exist := pod.Labels[p.podUsageSelectorKey]; exist && value == p.podUsageSelectorVal { + klog.Info("start sync pod usage to nodeMonitor") + p.enableSyncPodUsage = true + } + } + + if len(pod.Spec.NodeName) == 0 { + return + } + podName := native.GenerateNamespaceNameKey(pod.Namespace, pod.Name) + if existPods, ok := p.nodeToPodsMap[pod.Spec.NodeName]; ok { + existPods[podName] = struct{}{} + } else { + existPods = make(map[string]struct{}) + existPods[podName] = struct{}{} + p.nodeToPodsMap[pod.Spec.NodeName] = existPods + } +} + +func (p *Plugin) OnPodUpdate(_, newObj interface{}) { + var pod *v1.Pod + switch t := newObj.(type) { + case *v1.Pod: + pod = t + case cache.DeletedFinalStateUnknown: + var ok bool + pod, ok = t.Obj.(*v1.Pod) + if !ok { + klog.Errorf("cannot convert to *v1.Pod: %v", t.Obj) + return + } + default: + klog.Errorf("cannot convert to *v1.Pod: %v", t) + return + } + + klog.V(5).Infof("OnPodUpdate node %v update event", pod.Name) + if len(pod.Spec.NodeName) == 0 { + return + } + p.Lock() + defer p.Unlock() + podName := native.GenerateNamespaceNameKey(pod.Namespace, pod.Name) + if existPods, ok := p.nodeToPodsMap[pod.Spec.NodeName]; ok { + existPods[podName] = struct{}{} + } else { + existPods = make(map[string]struct{}) + existPods[podName] = struct{}{} + p.nodeToPodsMap[pod.Spec.NodeName] = existPods + } +} + +func (p *Plugin) OnPodDelete(obj interface{}) { + pod, ok := obj.(*v1.Pod) + if !ok { + klog.Error("transfer to v1.Pod error") + tombstone, ok := obj.(cache.DeletedFinalStateUnknown) + if !ok { + klog.Error("couldn't get object from tombstone %#v", obj) + return + } + pod, ok = tombstone.Obj.(*v1.Pod) + if !ok { + klog.Error("tombstone contained object that is not a pod %#v", obj) + return + } + } + klog.V(5).Infof("OnPodDelete node %v delete event", pod.Name) + p.Lock() + defer p.Unlock() + if p.podUsageSelectorVal != "" { + if value, exist := pod.Labels[p.podUsageSelectorKey]; exist && value == p.podUsageSelectorVal { + klog.Info("stop sync pod usage to nodeMonitor") + p.enableSyncPodUsage = false + } + } + podName := native.GenerateNamespaceNameKey(pod.Namespace, pod.Name) + delete(p.podStatDataMap, podName) + if len(pod.Spec.NodeName) == 0 { + return + } + if existPods, ok := p.nodeToPodsMap[pod.Spec.NodeName]; ok { + delete(existPods, podName) + } +} diff --git a/pkg/controller/npd/indicator-plugin/loadaware/helper.go b/pkg/controller/npd/indicator-plugin/loadaware/helper.go new file mode 100644 index 000000000..bdc2c0be4 --- /dev/null +++ b/pkg/controller/npd/indicator-plugin/loadaware/helper.go @@ -0,0 +1,177 @@ +package loadaware + +import ( + "github.com/kubewharf/katalyst-core/pkg/controller/npd/indicator-plugin/loadaware/sorter" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + quotav1 "k8s.io/apiserver/pkg/quota/v1" + "k8s.io/metrics/pkg/apis/metrics/v1beta1" + "time" +) + +// getUsage transfer cpu Nano to Milli, memory Ki to Mega +func getUsage(src corev1.ResourceList) corev1.ResourceList { + return corev1.ResourceList{ + corev1.ResourceCPU: *resource.NewMilliQuantity(src.Cpu().MilliValue(), resource.DecimalSI), + corev1.ResourceMemory: *resource.NewQuantity(src.Memory().Value(), resource.BinarySI), + } +} + +func calCPUAndMemoryAvg(dataList []corev1.ResourceList) corev1.ResourceList { + cpuSum := int64(0) + memorySum := int64(0) + for _, value := range dataList { + cpuSum += value.Cpu().MilliValue() + memorySum += value.Memory().Value() + } + avgCPU := cpuSum / int64(len(dataList)) + avgMem := memorySum / int64(len(dataList)) + return corev1.ResourceList{ + corev1.ResourceCPU: *resource.NewMilliQuantity(avgCPU, resource.DecimalSI), + corev1.ResourceMemory: *resource.NewQuantity(avgMem, resource.BinarySI), + } +} + +func calCPUAndMemoryMax(dataList []*ResourceListWithTime) corev1.ResourceList { + maxCPU := int64(0) + maxMem := int64(0) + for _, value := range dataList { + if value.Cpu().MilliValue() > maxCPU { + maxCPU = value.Cpu().MilliValue() + } + if value.Memory().Value() > maxMem { + maxMem = value.Memory().Value() + } + } + return corev1.ResourceList{ + corev1.ResourceCPU: *resource.NewMilliQuantity(maxCPU, resource.DecimalSI), + corev1.ResourceMemory: *resource.NewQuantity(maxMem, resource.BinarySI), + } +} + +func isNodeMetricsExpired(nodeMetric *v1beta1.NodeMetrics, now metav1.Time) bool { + return nodeMetric.Timestamp.Time.Add(NodeMetricExpiredTime).Before(now.Time) +} + +func isPodMetricsExpired(podMetrics *v1beta1.PodMetrics, now metav1.Time) bool { + return podMetrics.Timestamp.Time.Add(NodeMetricExpiredTime).Before(now.Time) +} + +func refreshNodeMetricData(metricData *NodeMetricData, metricInfo *v1beta1.NodeMetrics, now time.Time) { + metricData.lock.Lock() + defer metricData.lock.Unlock() + metricData.LatestUsage = metricInfo.Usage.DeepCopy() + metricData.Latest15MinCache = append(metricData.Latest15MinCache, getUsage(metricInfo.Usage)) + if len(metricData.Latest15MinCache) > Avg15MinPointNumber { + metricData.Latest15MinCache = metricData.Latest15MinCache[1:] + } + // calculate 5 min avg data + metaData5min := metricData.Latest15MinCache + if len(metricData.Latest15MinCache) > Avg5MinPointNumber { + metaData5min = metricData.Latest15MinCache[len(metricData.Latest15MinCache)-Avg5MinPointNumber:] + } + avg5Min := calCPUAndMemoryAvg(metaData5min) + metricData.Avg5Min = avg5Min.DeepCopy() + + // calculate 15 min avg data + avg15Min := calCPUAndMemoryAvg(metricData.Latest15MinCache) + metricData.Avg15Min = avg15Min.DeepCopy() + + // calculate 1 hour max data + if metricData.ifCanInsertLatest1HourCache(now) { + resWithTime := &ResourceListWithTime{ + ResourceList: avg15Min.DeepCopy(), + Ts: now.Unix(), + } + metricData.Latest1HourCache = append(metricData.Latest1HourCache, resWithTime) + if len(metricData.Latest1HourCache) > Max1HourPointNumber { + metricData.Latest1HourCache = metricData.Latest1HourCache[1:] + } + } + max1Hour := calCPUAndMemoryMax(metricData.Latest1HourCache) + metricData.Max1Hour = max1Hour.DeepCopy() + + //calculate 1 day max data + if metricData.ifCanInsertLatest1DayCache(now) { + resWithTime := &ResourceListWithTime{ + ResourceList: max1Hour.DeepCopy(), + Ts: now.Unix(), + } + metricData.Latest1DayCache = append(metricData.Latest1DayCache, resWithTime) + if len(metricData.Latest1DayCache) > Max1DayPointNumber { + metricData.Latest1DayCache = metricData.Latest1DayCache[1:] + } + } + max1Day := calCPUAndMemoryMax(metricData.Latest1DayCache) + metricData.Max1Day = max1Day.DeepCopy() +} + +func refreshPodMetricData(metricData *PodMetricData, metricInfo *v1beta1.PodMetrics) { + metricData.lock.Lock() + defer metricData.lock.Unlock() + podUsage := make(corev1.ResourceList) + for _, containerMetrics := range metricInfo.Containers { + podUsage = quotav1.Add(podUsage, containerMetrics.Usage) + } + metricData.LatestUsage = podUsage.DeepCopy() + //calculate 5 min avg data + metricData.Latest5MinCache = append(metricData.Latest5MinCache, getUsage(podUsage)) + if len(metricData.Latest5MinCache) > Avg5MinPointNumber { + metricData.Latest5MinCache = metricData.Latest5MinCache[len(metricData.Latest5MinCache)-Avg5MinPointNumber:] + } + avg5Min := calCPUAndMemoryAvg(metricData.Latest5MinCache) + metricData.Avg5Min = avg5Min.DeepCopy() +} + +func getTopNPodUsages(podUsages map[string]corev1.ResourceList, maxPodUsageCount int) map[string]corev1.ResourceList { + if len(podUsages) <= maxPodUsageCount { + return podUsages + } + resourceToWeightMap := map[corev1.ResourceName]int64{ + corev1.ResourceCPU: int64(1), + corev1.ResourceMemory: int64(1), + } + var objs []*sorter.Obj + totalResUsage := make(corev1.ResourceList) + for name, usage := range podUsages { + obj := sorter.Obj{ + Name: name, + } + objs = append(objs, &obj) + totalResUsage = quotav1.Add(totalResUsage, usage) + } + sorter.SortPodsByUsage(objs, podUsages, totalResUsage, resourceToWeightMap) + topNPodUsages := make(map[string]corev1.ResourceList) + for i, obj := range objs { + if i >= maxPodUsageCount { + break + } + if podUsage, ok := podUsages[obj.Name]; ok { + topNPodUsages[obj.Name] = podUsage + } + } + return topNPodUsages +} + +func calNodeLoad(resourceName corev1.ResourceName, usage, totalRes corev1.ResourceList) int64 { + if usage == nil || totalRes == nil { + return 0 + } + used := int64(0) + total := int64(0) + if resourceName == corev1.ResourceCPU { + used = usage.Cpu().MilliValue() + total = totalRes.Cpu().MilliValue() + } else { + used = usage.Memory().Value() + total = totalRes.Memory().Value() + } + if total == 0 { + return 0 + } + if used >= total { + return 99 + } + return used * 100 / total +} diff --git a/pkg/controller/npd/indicator-plugin/loadaware/loadaware.go b/pkg/controller/npd/indicator-plugin/loadaware/loadaware.go new file mode 100644 index 000000000..33b544499 --- /dev/null +++ b/pkg/controller/npd/indicator-plugin/loadaware/loadaware.go @@ -0,0 +1,660 @@ +package loadaware + +import ( + "context" + "fmt" + "hash/crc32" + "sync" + "time" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" + listersv1 "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" + "k8s.io/metrics/pkg/apis/metrics/v1beta1" + metricsclientset "k8s.io/metrics/pkg/client/clientset/versioned" + + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" + nodev1alpha1 "github.com/kubewharf/katalyst-api/pkg/client/listers/node/v1alpha1" + katalystbase "github.com/kubewharf/katalyst-core/cmd/base" + "github.com/kubewharf/katalyst-core/pkg/config/controller" + indicator_plugin "github.com/kubewharf/katalyst-core/pkg/controller/npd/indicator-plugin" + "github.com/kubewharf/katalyst-core/pkg/metrics" + "github.com/kubewharf/katalyst-core/pkg/util/native" +) + +func init() { + indicator_plugin.RegisterPluginInitializer(LoadAwarePluginName, NewLoadAwarePlugin) +} + +type Plugin struct { + sync.RWMutex + + ctx context.Context + workers int32 + + nodeLister listersv1.NodeLister + npdLister nodev1alpha1.NodeProfileDescriptorLister + metricsClient metricsclientset.Interface + namespaceLister listersv1.NamespaceLister + podLister listersv1.PodLister + npdUpdater indicator_plugin.IndicatorUpdater + + nodePoolMap map[int32]sets.String + nodeStatDataMap map[string]*NodeMetricData + podStatDataMap map[string]*PodMetricData + nodeToPodsMap map[string]map[string]struct{} + + syncMetricInterval time.Duration + listMetricTimeout time.Duration + syncedFunc []cache.InformerSynced + + maxPodUsageCount int + enableSyncPodUsage bool + podUsageSelectorKey string + podUsageSelectorVal string + podUsageSelectorNamespace string + + emitter metrics.MetricEmitter +} + +func NewLoadAwarePlugin(ctx context.Context, conf *controller.NPDConfig, extraConf interface{}, + controlCtx *katalystbase.GenericContext, updater indicator_plugin.IndicatorUpdater) (indicator_plugin.IndicatorPlugin, error) { + p := &Plugin{ + ctx: ctx, + workers: int32(conf.Workers), + + nodeLister: controlCtx.KubeInformerFactory.Core().V1().Nodes().Lister(), + npdLister: controlCtx.InternalInformerFactory.Node().V1alpha1().NodeProfileDescriptors().Lister(), + podLister: controlCtx.KubeInformerFactory.Core().V1().Pods().Lister(), + namespaceLister: controlCtx.KubeInformerFactory.Core().V1().Namespaces().Lister(), + metricsClient: controlCtx.Client.MetricClient, + npdUpdater: updater, + + nodePoolMap: make(map[int32]sets.String), + nodeStatDataMap: make(map[string]*NodeMetricData), + podStatDataMap: make(map[string]*PodMetricData), + nodeToPodsMap: make(map[string]map[string]struct{}), + + emitter: controlCtx.EmitterPool.GetDefaultMetricsEmitter(), + syncMetricInterval: conf.SyncMetricInterval, + listMetricTimeout: conf.ListMetricTimeout, + syncedFunc: []cache.InformerSynced{}, + + maxPodUsageCount: conf.MaxPodUsageCount, + podUsageSelectorNamespace: conf.PodUsageSelectorNamespace, + podUsageSelectorKey: conf.PodUsageSelectorKey, + podUsageSelectorVal: conf.PodUsageSelectorVal, + } + + nodeInformer := controlCtx.KubeInformerFactory.Core().V1().Nodes().Informer() + nodeInformer.AddEventHandler( + cache.ResourceEventHandlerFuncs{ + AddFunc: p.OnNodeAdd, + UpdateFunc: p.OnNodeUpdate, + DeleteFunc: p.OnNodeDelete, + }, + ) + + podInformer := controlCtx.KubeInformerFactory.Core().V1().Pods().Informer() + podInformer.AddEventHandler( + cache.ResourceEventHandlerFuncs{ + AddFunc: p.OnPodAdd, + UpdateFunc: p.OnPodUpdate, + DeleteFunc: p.OnPodDelete, + }, + ) + + nsInformer := controlCtx.KubeInformerFactory.Core().V1().Namespaces().Informer() + nsInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{}) + + npdInformer := controlCtx.InternalInformerFactory.Node().V1alpha1().NodeProfileDescriptors().Informer() + npdInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{}) + + p.syncedFunc = []cache.InformerSynced{ + nodeInformer.HasSynced, + podInformer.HasSynced, + nsInformer.HasSynced, + npdInformer.HasSynced, + } + + return p, nil +} + +func (p *Plugin) Run() { + defer utilruntime.HandleCrash() + defer func() { + klog.Infof("Shutting down %s npd plugin", LoadAwarePluginName) + }() + + if !cache.WaitForCacheSync(p.ctx.Done(), p.syncedFunc...) { + utilruntime.HandleError(fmt.Errorf("unable to sync caches for %s npd plugin", LoadAwarePluginName)) + return + } + + klog.Infof("caches are synced for %s controller", LoadAwarePluginName) + + p.Lock() + defer p.Unlock() + + if p.podUsageRequired() { + p.enableSyncPodUsage = true + } + + nodes, err := p.nodeLister.List(labels.Everything()) + if err != nil { + klog.Fatalf("get all nodes from cache error, err:%v", err) + } + //init worker node pool + for _, node := range nodes { + bucketID := p.getBucketID(node.Name) + if pool, ok := p.nodePoolMap[bucketID]; !ok { + p.nodePoolMap[bucketID] = sets.NewString(node.Name) + } else { + pool.Insert(node.Name) + } + } + + p.constructNodeToPodMap() + + //restore npd from api server + p.restoreNPD() + + // start sync node + go wait.Until(p.syncNode, p.syncMetricInterval, p.ctx.Done()) + + go time.AfterFunc(TransferToCRStoreTime, func() { + klog.Infof("start transferMetaToCRStore") + wait.Until(func() { + p.transferMetaToCRStore() + }, TransferToCRStoreTime, p.ctx.Done()) + }) + + go wait.Until(p.podWorker, p.syncMetricInterval, p.ctx.Done()) + + go wait.Until(p.reCleanPodData, 5*time.Minute, p.ctx.Done()) + + go wait.Until(p.checkPodUsageRequired, time.Minute, p.ctx.Done()) +} + +func (p *Plugin) Name() string { + return LoadAwarePluginName +} + +func (p *Plugin) GetSupportedNodeMetricsScope() []string { + return []string{loadAwareMetricsScope, loadAwareMetricMetadataScope} +} + +func (p *Plugin) GetSupportedPodMetricsScope() []string { + return []string{loadAwareMetricsScope} +} + +func (p *Plugin) syncNode() { + // list node metrics + nodeMetricsMap, err := p.listNodeMetrics() + if err != nil { + klog.Errorf("list node metrics fail: %v", err) + return + } + + wg := sync.WaitGroup{} + for i := int32(0); i < p.workers; i++ { + wg.Add(1) + go func(id int32) { + p.worker(id, nodeMetricsMap) + wg.Done() + }(i) + } + wg.Wait() +} + +func (p *Plugin) restoreNPD() { + if p.nodeStatDataMap == nil { + p.nodeStatDataMap = make(map[string]*NodeMetricData) + } + npds, err := p.npdLister.List(labels.Everything()) + if err != nil { + klog.Errorf("get all npd from cache fail: %v", err) + return + } + + for _, npd := range npds { + for i := range npd.Status.NodeMetrics { + if npd.Status.NodeMetrics[i].Scope != loadAwareMetricMetadataScope { + continue + } + + var ( + avg15MinCache = make([]corev1.ResourceList, 0) + max1HourCache = make([]*ResourceListWithTime, 0) + max1DayCache = make([]*ResourceListWithTime, 0) + + avg15MinMap = make(map[metav1.Time]corev1.ResourceList) + max1HourMap = make(map[metav1.Time]*ResourceListWithTime) + max1DayMap = make(map[metav1.Time]*ResourceListWithTime) + ) + + for _, metricValue := range npd.Status.NodeMetrics[i].Metrics { + if metricValue.Window.Duration == 15*time.Minute { + if _, ok := avg15MinMap[metricValue.Timestamp]; !ok { + avg15MinMap[metricValue.Timestamp] = corev1.ResourceList{} + } + avg15MinMap[metricValue.Timestamp][corev1.ResourceName(metricValue.MetricName)] = metricValue.Value + } else if metricValue.Window.Duration == time.Hour { + if _, ok := max1HourMap[metricValue.Timestamp]; !ok { + max1HourMap[metricValue.Timestamp] = &ResourceListWithTime{ + Ts: metricValue.Timestamp.Unix(), + ResourceList: corev1.ResourceList{}, + } + } + max1HourMap[metricValue.Timestamp].ResourceList[corev1.ResourceName(metricValue.MetricName)] = metricValue.Value + } else if metricValue.Window.Duration == 24*time.Hour { + if _, ok := max1DayMap[metricValue.Timestamp]; !ok { + max1DayMap[metricValue.Timestamp] = &ResourceListWithTime{ + Ts: metricValue.Timestamp.Unix(), + ResourceList: corev1.ResourceList{}, + } + } + max1DayMap[metricValue.Timestamp].ResourceList[corev1.ResourceName(metricValue.MetricName)] = metricValue.Value + } else { + klog.Warningf("unkonw metadata metricName: %v, window: %v", metricValue.MetricName, metricValue.Window) + } + } + + for i := range avg15MinMap { + avg15MinCache = append(avg15MinCache, avg15MinMap[i]) + } + for i := range max1HourMap { + max1HourCache = append(max1HourCache, max1HourMap[i]) + } + for i := range max1DayMap { + max1DayCache = append(max1DayCache, max1DayMap[i]) + } + + p.nodeStatDataMap[npd.Name] = &NodeMetricData{ + Latest15MinCache: avg15MinCache, + Latest1HourCache: max1HourCache, + Latest1DayCache: max1DayCache, + } + + break + } + } +} + +func (p *Plugin) listNodeMetrics() (map[string]*v1beta1.NodeMetrics, error) { + timeout, cancel := context.WithTimeout(p.ctx, p.listMetricTimeout) + defer cancel() + + nodeMetricsList, err := p.metricsClient.MetricsV1beta1().NodeMetricses().List(timeout, metav1.ListOptions{}) + if err != nil { + return nil, err + } + + res := make(map[string]*v1beta1.NodeMetrics) + for _, nm := range nodeMetricsList.Items { + res[nm.Name] = nm.DeepCopy() + } + + return res, nil +} + +func (p *Plugin) worker(i int32, nodeMetricsMap map[string]*v1beta1.NodeMetrics) { + p.RLock() + nodeNames, ok := p.nodePoolMap[i] + p.RUnlock() + + if !ok { + return + } + for name := range nodeNames { + nodeMetrics, ok := nodeMetricsMap[name] + if !ok { + klog.Errorf("%s node metrics miss", name) + continue + } + now := metav1.Now() + if isNodeMetricsExpired(nodeMetrics, now) { + klog.Errorf("node %s node metrics expired, metricsTime: %v", name, nodeMetrics.Timestamp.String()) + continue + } + + p.Lock() + nodeMetricData, exist := p.nodeStatDataMap[name] + if !exist { + nodeMetricData = &NodeMetricData{} + p.nodeStatDataMap[name] = nodeMetricData + } + // build podUsage + podUsage := make(map[string]corev1.ResourceList) + if pods, ok := p.nodeToPodsMap[name]; ok { + for podNamespaceName := range pods { + if podMetaData, exist := p.podStatDataMap[podNamespaceName]; exist { + podUsage[podNamespaceName] = podMetaData.Avg5Min.DeepCopy() + } + } + } + p.Unlock() + refreshNodeMetricData(nodeMetricData, nodeMetrics, now.Time) + err := p.updateNPDStatus(nodeMetricData, name, now, podUsage) + if err != nil { + klog.Errorf("createOrUpdateNodeMonitorStatus fail, node: %v, err: %v", name, err) + continue + } + } +} + +func (p *Plugin) podWorker() { + if !p.enableSyncPodUsage { + return + } + nsList, err := p.namespaceLister.List(labels.Everything()) + if err != nil { + klog.Errorf("get all namespaces failed, err:%v", err) + return + } + for _, ns := range nsList { + podMetricsList, err := p.getPodMetrics(ns.Name) + if err != nil { + klog.Errorf("get podMetrics of namespace:%s failed, err:%v", ns.Name, err) + continue + } + p.Lock() + for _, podMetrics := range podMetricsList.Items { + now := metav1.Now() + if isPodMetricsExpired(&podMetrics, now) { + klog.Errorf("podMetrics is expired, podName: %v", podMetrics.Name) + continue + } + namespacedName := native.GenerateNamespaceNameKey(podMetrics.Namespace, podMetrics.Name) + metricData, exist := p.podStatDataMap[namespacedName] + if !exist { + metricData = &PodMetricData{} + p.podStatDataMap[namespacedName] = metricData + } + + refreshPodMetricData(metricData, &podMetrics) + } + p.Unlock() + } +} + +func (p *Plugin) getPodMetrics(namespace string) (*v1beta1.PodMetricsList, error) { + timeout, cancel := context.WithTimeout(p.ctx, p.listMetricTimeout) + defer cancel() + mc := p.metricsClient.MetricsV1beta1() + return mc.PodMetricses(namespace).List(timeout, metav1.ListOptions{}) +} + +func (p *Plugin) transferMetaToCRStore() { + copyNodeStatDataMap := p.getNodeStatDataMap() + for nodeName, metricData := range copyNodeStatDataMap { + _, err := p.npdLister.Get(nodeName) + if err != nil { + klog.Errorf("get node %v npd fail: %v", nodeName, err) + continue + } + + status := &v1alpha1.NodeProfileDescriptorStatus{ + NodeMetrics: make([]v1alpha1.ScopedNodeMetrics, 0), + } + p.updateMetadata(status, metricData) + + p.npdUpdater.UpdateNodeMetrics(nodeName, status.NodeMetrics) + klog.V(10).Infof("update loadAware metadata success, nodeName: %v, data: %v", nodeName, status.NodeMetrics) + } +} + +func (p *Plugin) getNodeStatDataMap() map[string]*NodeMetricData { + p.RLock() + defer p.RUnlock() + meta := make(map[string]*NodeMetricData) + for nodeName, value := range p.nodeStatDataMap { + meta[nodeName] = value + } + return meta +} + +func (p *Plugin) updateNPDStatus(metricData *NodeMetricData, nodeName string, now metav1.Time, podUsages map[string]corev1.ResourceList) error { + _, err := p.npdLister.Get(nodeName) + if err != nil { + err = fmt.Errorf("get node %v npd fail: %v", nodeName, err) + return err + } + + npdStatus := &v1alpha1.NodeProfileDescriptorStatus{ + NodeMetrics: make([]v1alpha1.ScopedNodeMetrics, 0), + PodMetrics: make([]v1alpha1.ScopedPodMetrics, 0), + } + + // update node metrics + p.updateNodeMetrics(npdStatus, metricData, now) + + // update pod metrics if needed + if p.enableSyncPodUsage { + p.updatePodMetrics(npdStatus, podUsages, p.maxPodUsageCount) + } + + // update by updater + p.npdUpdater.UpdateNodeMetrics(nodeName, npdStatus.NodeMetrics) + if p.enableSyncPodUsage { + p.npdUpdater.UpdatePodMetrics(nodeName, npdStatus.PodMetrics) + } + + klog.V(6).Infof("plugin %v update node %v NPDStatus: %v", LoadAwarePluginName, nodeName, *npdStatus) + return nil +} + +func (p *Plugin) updateMetadata(npdStatus *v1alpha1.NodeProfileDescriptorStatus, metricData *NodeMetricData) { + metricData.lock.RLock() + defer metricData.lock.RUnlock() + + metricMetadata := make([]v1alpha1.MetricValue, 0) + + metricMetadata = append(metricMetadata, p.appendMetricValues(metricData.Latest15MinCache, v1alpha1.AggregatorAvg, 15*time.Minute, time.Minute)...) + metricMetadata = append(metricMetadata, p.appendMetricValuesWithTime(metricData.Latest1HourCache, v1alpha1.AggregatorMax, time.Hour)...) + metricMetadata = append(metricMetadata, p.appendMetricValuesWithTime(metricData.Latest1DayCache, v1alpha1.AggregatorMax, 24*time.Hour)...) + + npdStatus.NodeMetrics = append(npdStatus.NodeMetrics, v1alpha1.ScopedNodeMetrics{ + Scope: loadAwareMetricMetadataScope, + Metrics: metricMetadata, + }) +} + +func (p *Plugin) updateNodeMetrics(npdStatus *v1alpha1.NodeProfileDescriptorStatus, metricData *NodeMetricData, now metav1.Time) { + metricData.lock.RLock() + defer metricData.lock.RUnlock() + + nodeMetrics := make([]v1alpha1.MetricValue, 0) + nodeMetrics = append(nodeMetrics, p.appendMetricValue(metricData.Avg5Min, v1alpha1.AggregatorAvg, 5*time.Minute, &now)...) + nodeMetrics = append(nodeMetrics, p.appendMetricValue(metricData.Avg15Min, v1alpha1.AggregatorAvg, 15*time.Minute, &now)...) + nodeMetrics = append(nodeMetrics, p.appendMetricValue(metricData.Max1Hour, v1alpha1.AggregatorMax, time.Hour, &now)...) + nodeMetrics = append(nodeMetrics, p.appendMetricValue(metricData.Max1Day, v1alpha1.AggregatorMax, 24*time.Hour, &now)...) + + npdStatus.NodeMetrics = append(npdStatus.NodeMetrics, v1alpha1.ScopedNodeMetrics{ + Scope: loadAwareMetricsScope, + Metrics: nodeMetrics, + }) +} + +func (p *Plugin) updatePodMetrics(npdStatus *v1alpha1.NodeProfileDescriptorStatus, podUsage map[string]corev1.ResourceList, maxPodUsageCount int) { + podUsage = getTopNPodUsages(podUsage, maxPodUsageCount) + + podMetrics := make([]v1alpha1.PodMetric, 0) + for namespaceName, resourceList := range podUsage { + podMetric, err := p.appendPodMetrics(resourceList, namespaceName) + if err != nil { + klog.Errorf("skip pod: %v, update pod metrics fail: %v", namespaceName, err) + continue + } + podMetrics = append(podMetrics, podMetric) + } + + npdStatus.PodMetrics = append(npdStatus.PodMetrics, v1alpha1.ScopedPodMetrics{ + Scope: loadAwareMetricsScope, + PodMetrics: podMetrics, + }) +} + +func (p *Plugin) appendMetricValue(resourceList corev1.ResourceList, aggregator v1alpha1.Aggregator, window time.Duration, now *metav1.Time) []v1alpha1.MetricValue { + metricValues := make([]v1alpha1.MetricValue, 0) + for resourceName, quantity := range resourceList { + mv := v1alpha1.MetricValue{ + MetricName: resourceName.String(), + Window: &metav1.Duration{Duration: window}, + Aggregator: &aggregator, + Value: quantity, + } + if now != nil { + mv.Timestamp = *now + } + metricValues = append(metricValues, mv) + } + return metricValues +} + +func (p *Plugin) appendMetricValues(resourceLists []corev1.ResourceList, aggregator v1alpha1.Aggregator, window time.Duration, step time.Duration) []v1alpha1.MetricValue { + metricValues := make([]v1alpha1.MetricValue, 0) + ts := time.Now().Add(-1 * window) + for _, resourceList := range resourceLists { + metricValues = append(metricValues, p.appendMetricValue(resourceList, aggregator, window, &metav1.Time{Time: ts})...) + ts = ts.Add(step) + } + + return metricValues +} + +func (p *Plugin) appendMetricValuesWithTime(resourceLists []*ResourceListWithTime, aggregator v1alpha1.Aggregator, window time.Duration) []v1alpha1.MetricValue { + metricValues := make([]v1alpha1.MetricValue, 0) + for _, resourceListWithTime := range resourceLists { + ts := metav1.Time{Time: time.Unix(resourceListWithTime.Ts, 0)} + metricValues = append(metricValues, p.appendMetricValue(resourceListWithTime.ResourceList, aggregator, window, &ts)...) + } + return metricValues +} + +func (p *Plugin) appendPodMetrics(resourceList corev1.ResourceList, namespaceName string) (v1alpha1.PodMetric, error) { + namespace, name, err := native.ParseNamespaceNameKey(namespaceName) + if err != nil { + return v1alpha1.PodMetric{}, err + } + + podMetric := v1alpha1.PodMetric{ + Namespace: namespace, + Name: name, + Metrics: []v1alpha1.MetricValue{}, + } + for resourceName, quantity := range resourceList { + podMetric.Metrics = append(podMetric.Metrics, v1alpha1.MetricValue{ + MetricName: resourceName.String(), + Value: quantity, + }) + } + + return podMetric, nil +} + +func (p *Plugin) constructNodeToPodMap() { + pods, err := p.podLister.List(labels.Everything()) + if err != nil { + klog.Errorf("list all pod error, err:%v", err) + return + } + for _, pod := range pods { + if len(pod.Spec.NodeName) > 0 { + if podMap, ok := p.nodeToPodsMap[pod.Spec.NodeName]; ok { + podMap[native.GenerateNamespaceNameKey(pod.Namespace, pod.Name)] = struct{}{} + } else { + p.nodeToPodsMap[pod.Spec.NodeName] = map[string]struct{}{ + native.GenerateNamespaceNameKey(pod.Namespace, pod.Name): {}, + } + } + } + } +} + +func (p *Plugin) podUsageRequired() bool { + pods, err := p.podLister.Pods(p.podUsageSelectorNamespace). + List(labels.SelectorFromSet(map[string]string{p.podUsageSelectorKey: p.podUsageSelectorVal})) + if err != nil { + klog.Errorf("get pod usage pods err: %v", err) + return false + } + return len(pods) > 0 +} + +func (p *Plugin) getBucketID(name string) int32 { + hash := int64(crc32.ChecksumIEEE([]byte(name))) + size := hash % int64(p.workers) + return int32(size) +} + +// reCleanPodData Because the data in the podStatDataMap is pulled from the metrics-server with a specific interval, +// relying solely on pod delete events to remove data from the podStatDataMap can result in data residue. +// Here we need to actively perform cleanup data residue. +func (p *Plugin) reCleanPodData() { + p.Lock() + defer p.Unlock() + pods, err := p.podLister.List(labels.Everything()) + if err != nil { + klog.Errorf("get all pods error, err:=%v", err) + return + } + existPod := make(map[string]struct{}) + for _, pod := range pods { + existPod[native.GenerateNamespaceNameKey(pod.Namespace, pod.Name)] = struct{}{} + } + for podName := range p.podStatDataMap { + if _, ok := existPod[podName]; !ok { + delete(p.podStatDataMap, podName) + } + } +} + +func (p *Plugin) checkPodUsageRequired() { + if p.podUsageRequired() { + podUsageUnrequiredCount = 0 + p.enableSyncPodUsage = true + } else { + podUsageUnrequiredCount++ + if podUsageUnrequiredCount >= 5 { + p.enableSyncPodUsage = false + podUsageUnrequiredCount = 0 + } + } +} + +func (p *Plugin) reportNodeLoadMetric() { + p.RLock() + defer p.RUnlock() + resourceDims := []corev1.ResourceName{corev1.ResourceCPU, corev1.ResourceMemory} + for _, resourceName := range resourceDims { + resultMap := make(map[int64]*int64) + for _, data := range p.nodeStatDataMap { + data.lock.RLock() + load := calNodeLoad(resourceName, data.LatestUsage, data.TotalRes) + data.lock.RUnlock() + idx := load / 10 + if count, ok := resultMap[idx]; !ok { + i := int64(1) + resultMap[idx] = &i + } else { + *count++ + } + } + for idx, level := range levels { + typeTag := metrics.MetricTag{Key: metricTagType, Val: string(resourceName)} + levelTag := metrics.MetricTag{Key: metricTagLevel, Val: level} + if count, ok := resultMap[int64(idx)]; ok { + _ = p.emitter.StoreFloat64(loadAwareMetricName, float64(*count), metrics.MetricTypeNameRaw, typeTag, levelTag) + } else { + _ = p.emitter.StoreFloat64(loadAwareMetricName, 0, metrics.MetricTypeNameRaw, typeTag, levelTag) + } + } + } +} diff --git a/pkg/controller/npd/indicator-plugin/loadaware/sorter/helper.go b/pkg/controller/npd/indicator-plugin/loadaware/sorter/helper.go new file mode 100644 index 000000000..df85126c2 --- /dev/null +++ b/pkg/controller/npd/indicator-plugin/loadaware/sorter/helper.go @@ -0,0 +1,105 @@ +package sorter + +import "sort" + +// CompareFn compares p1 and p2 and returns: +// +// -1 if p1 < p2 +// 0 if p1 == p2 +// +1 if p1 > p2 +type CompareFn func(p1, p2 *Obj) int + +// Obj ... +type Obj struct { + Name string +} + +// MultiSorter implements the Sort interface +type MultiSorter struct { + ascending bool + objs []*Obj + cmp []CompareFn +} + +// Sort sorts the objs according to the cmp functions passed to OrderedBy. +func (ms *MultiSorter) Sort(objs []*Obj) { + ms.objs = objs + sort.Sort(ms) +} + +// OrderedBy returns a Sorter sorted using the cmp functions, sorts in ascending order by default +func OrderedBy(cmp ...CompareFn) *MultiSorter { + return &MultiSorter{ + ascending: true, + cmp: cmp, + } +} + +// Ascending ... +func (ms *MultiSorter) Ascending() *MultiSorter { + ms.ascending = true + return ms +} + +// Descending ... +func (ms *MultiSorter) Descending() *MultiSorter { + ms.ascending = false + return ms +} + +// Len is part of sort.Interface. +func (ms *MultiSorter) Len() int { + return len(ms.objs) +} + +// Swap is part of sort.Interface. +func (ms *MultiSorter) Swap(i, j int) { + ms.objs[i], ms.objs[j] = ms.objs[j], ms.objs[i] +} + +// Less is part of sort.Interface. +func (ms *MultiSorter) Less(i, j int) bool { + p1, p2 := ms.objs[i], ms.objs[j] + var k int + for k = 0; k < len(ms.cmp)-1; k++ { + cmpResult := ms.cmp[k](p1, p2) + // p1 is less than p2 + if cmpResult < 0 { + return ms.ascending + } + // p1 is greater than p2 + if cmpResult > 0 { + return !ms.ascending + } + } + cmpResult := ms.cmp[k](p1, p2) + if cmpResult < 0 { + return ms.ascending + } + return !ms.ascending +} + +// cmpBool compares booleans, placing true before false +func cmpBool(a, b bool) int { + if a == b { + return 0 + } + if !b { + return -1 + } + return 1 +} + +// Reverse ... +func Reverse(cmp CompareFn) CompareFn { + return func(p1, p2 *Obj) int { + result := cmp(p1, p2) + if result > 0 { + return -1 + } + if result < 0 { + return 1 + } + return 0 + } +} diff --git a/pkg/controller/npd/indicator-plugin/loadaware/sorter/pod.go b/pkg/controller/npd/indicator-plugin/loadaware/sorter/pod.go new file mode 100644 index 000000000..d615b9652 --- /dev/null +++ b/pkg/controller/npd/indicator-plugin/loadaware/sorter/pod.go @@ -0,0 +1,29 @@ +package sorter + +import corev1 "k8s.io/api/core/v1" + +// PodUsage compares objs by the actual usage +func PodUsage(podRealUsage map[string]corev1.ResourceList, totalPodUsage corev1.ResourceList, resourceToWeightMap map[corev1.ResourceName]int64) CompareFn { + scorer := ResourceUsageScorer(resourceToWeightMap) + return func(p1, p2 *Obj) int { + p1Usage, p1Found := podRealUsage[p1.Name] + p2Usage, p2Found := podRealUsage[p2.Name] + if !p1Found || !p2Found { + return cmpBool(!p1Found, !p2Found) + } + p1Score := scorer(p1Usage, totalPodUsage) + p2Score := scorer(p2Usage, totalPodUsage) + if p1Score == p2Score { + return 0 + } + if p1Score > p2Score { + return 1 + } + return -1 + } +} + +// SortPodsByUsage ... +func SortPodsByUsage(objs []*Obj, podRealUsage map[string]corev1.ResourceList, nodeAllocatableMap corev1.ResourceList, resourceToWeightMap map[corev1.ResourceName]int64) { + OrderedBy(Reverse(PodUsage(podRealUsage, nodeAllocatableMap, resourceToWeightMap))).Sort(objs) +} diff --git a/pkg/controller/npd/indicator-plugin/loadaware/sorter/pod_test.go b/pkg/controller/npd/indicator-plugin/loadaware/sorter/pod_test.go new file mode 100644 index 000000000..a99f5a745 --- /dev/null +++ b/pkg/controller/npd/indicator-plugin/loadaware/sorter/pod_test.go @@ -0,0 +1,65 @@ +package sorter + +import ( + "testing" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + quotav1 "k8s.io/apiserver/pkg/quota/v1" +) + +func TestSortPods(t *testing.T) { + t.Parallel() + podRealUsage := map[string]corev1.ResourceList{ + "default/test-1": { + corev1.ResourceCPU: resource.MustParse("80"), + corev1.ResourceMemory: resource.MustParse("10Gi"), + }, + "default/test-2": { + corev1.ResourceCPU: resource.MustParse("30"), + corev1.ResourceMemory: resource.MustParse("10Gi"), + }, + "default/test-3": { + corev1.ResourceCPU: resource.MustParse("50"), + corev1.ResourceMemory: resource.MustParse("10Gi"), + }, + "default/test-4": { + corev1.ResourceCPU: resource.MustParse("70"), + corev1.ResourceMemory: resource.MustParse("10Gi"), + }, + "default/test-5": { + corev1.ResourceCPU: resource.MustParse("10"), + corev1.ResourceMemory: resource.MustParse("10Gi"), + }, + "default/test-6": { + corev1.ResourceCPU: resource.MustParse("40"), + corev1.ResourceMemory: resource.MustParse("10Gi"), + }, + "default/test-7": { + corev1.ResourceCPU: resource.MustParse("60"), + corev1.ResourceMemory: resource.MustParse("10Gi"), + }, + } + + resourceToWeightMap := map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 1, + corev1.ResourceMemory: 1, + } + var objs []*Obj + totalResUsage := make(corev1.ResourceList) + for name, usage := range podRealUsage { + obj := Obj{ + Name: name, + } + objs = append(objs, &obj) + totalResUsage = quotav1.Add(totalResUsage, usage) + } + SortPodsByUsage(objs, podRealUsage, totalResUsage, resourceToWeightMap) + expectedPodsOrder := []string{"default/test-1", "default/test-4", "default/test-7", "default/test-3", "default/test-6", "default/test-2", "default/test-5"} + var podsOrder []string + for _, v := range objs { + podsOrder = append(podsOrder, v.Name) + } + assert.Equal(t, expectedPodsOrder, podsOrder) +} diff --git a/pkg/controller/npd/indicator-plugin/loadaware/sorter/scorer.go b/pkg/controller/npd/indicator-plugin/loadaware/sorter/scorer.go new file mode 100644 index 000000000..36ea6260b --- /dev/null +++ b/pkg/controller/npd/indicator-plugin/loadaware/sorter/scorer.go @@ -0,0 +1,43 @@ +package sorter + +import ( + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +// ResourceUsageScorer ... +func ResourceUsageScorer(resToWeightMap map[corev1.ResourceName]int64) func(requested, allocatable corev1.ResourceList) int64 { + return func(requested, allocatable corev1.ResourceList) int64 { + var nodeScore, weightSum int64 + for resourceName, quantity := range requested { + weight := resToWeightMap[resourceName] + resourceScore := mostRequestedScore(getResourceValue(resourceName, quantity), getResourceValue(resourceName, allocatable[resourceName])) + nodeScore += resourceScore * weight + weightSum += weight + } + if weightSum == 0 { + return 0 + } + return nodeScore / weightSum + } +} + +func mostRequestedScore(requested, capacity int64) int64 { + if capacity == 0 { + return 0 + } + if requested > capacity { + // `requested` might be greater than `capacity` because objs with no + // requests get minimum values. + requested = capacity + } + + return (requested * 1000) / capacity +} + +func getResourceValue(resourceName corev1.ResourceName, quantity resource.Quantity) int64 { + if resourceName == corev1.ResourceCPU { + return quantity.MilliValue() + } + return quantity.Value() +} diff --git a/pkg/controller/npd/indicator-plugin/loadaware/types.go b/pkg/controller/npd/indicator-plugin/loadaware/types.go new file mode 100644 index 000000000..6afa123a3 --- /dev/null +++ b/pkg/controller/npd/indicator-plugin/loadaware/types.go @@ -0,0 +1,82 @@ +package loadaware + +import ( + v1 "k8s.io/api/core/v1" + "sync" + "time" +) + +const ( + Avg5MinPointNumber = 5 + Avg15MinPointNumber = 15 + Max1HourPointNumber = 4 + Max1DayPointNumber = 24 + + NodeMetricExpiredTime = 3 * time.Minute + TransferToCRStoreTime = 5 * time.Minute +) + +const ( + LoadAwarePluginName = "loadAware" + loadAwareMetricsScope = "loadAware" + loadAwareMetricMetadataScope = "loadAware_metadata" + loadAwareMetricName = "node_load" + metricTagType = "type" + metricTagLevel = "level" +) + +var ( + levels = []string{"0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"} + + podUsageUnrequiredCount = 0 +) + +type NodeMetricData struct { + lock sync.RWMutex + LatestUsage v1.ResourceList + TotalRes v1.ResourceList + Avg5Min v1.ResourceList + Avg15Min v1.ResourceList + Max1Hour v1.ResourceList + Max1Day v1.ResourceList + Latest15MinCache []v1.ResourceList //latest 15 1min_avg_data + Latest1HourCache []*ResourceListWithTime //latest 4 15min_max_data + Latest1DayCache []*ResourceListWithTime //latest 24 1hour_max_data +} + +func (md *NodeMetricData) ifCanInsertLatest1HourCache(now time.Time) bool { + if len(md.Latest1HourCache) == 0 { + return true + } + latestData := md.Latest1HourCache[len(md.Latest1HourCache)-1] + lastTime := time.Unix(latestData.Ts, 0) + if now.After(lastTime.Add(15*time.Minute)) || now.Equal(lastTime.Add(15*time.Minute)) { + return true + } + return false +} + +func (md *NodeMetricData) ifCanInsertLatest1DayCache(now time.Time) bool { + if len(md.Latest1DayCache) == 0 { + return true + } + latestData := md.Latest1DayCache[len(md.Latest1DayCache)-1] + lastTime := time.Unix(latestData.Ts, 0) + if now.After(lastTime.Add(1*time.Hour)) || now.Equal(lastTime.Add(1*time.Hour)) { + return true + } + return false +} + +type PodMetricData struct { + lock sync.RWMutex + LatestUsage v1.ResourceList + Avg5Min v1.ResourceList + Latest5MinCache []v1.ResourceList //latest 15 1min_avg_data +} + +// ResourceListWithTime ... +type ResourceListWithTime struct { + v1.ResourceList `json:"R,omitempty"` + Ts int64 `json:"T,omitempty"` +} diff --git a/pkg/controller/npd/npd.go b/pkg/controller/npd/npd.go index f6da24f21..d81d272c5 100644 --- a/pkg/controller/npd/npd.go +++ b/pkg/controller/npd/npd.go @@ -21,13 +21,11 @@ import ( "fmt" "time" - "github.com/kubewharf/katalyst-core/pkg/metrics" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" corev1 "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" @@ -40,6 +38,8 @@ import ( "github.com/kubewharf/katalyst-core/pkg/config/controller" "github.com/kubewharf/katalyst-core/pkg/config/generic" indicator_plugin "github.com/kubewharf/katalyst-core/pkg/controller/npd/indicator-plugin" + _ "github.com/kubewharf/katalyst-core/pkg/controller/npd/indicator-plugin/loadaware" + "github.com/kubewharf/katalyst-core/pkg/metrics" ) const npdControllerName = "npd" diff --git a/pkg/util/native/object.go b/pkg/util/native/object.go index 0df24c756..bf34c2e7e 100644 --- a/pkg/util/native/object.go +++ b/pkg/util/native/object.go @@ -96,6 +96,16 @@ func ParseNamespaceNameUIDKey(key string) (string, string, string, error) { return names[0], names[1], names[2], nil } +// ParseNamespaceNameKey parse the given key into namespace/name +func ParseNamespaceNameKey(key string) (string, string, error) { + names := strings.Split(key, "/") + if len(names) != 2 { + return "", "", fmt.Errorf("key %s split error", key) + } + + return names[0], names[1], nil +} + // GenerateDynamicResourceByGVR generates dynamic resource by given gvr, the format is such as `resource.version.group`, // which can be input of ParseResourceArg func GenerateDynamicResourceByGVR(gvr schema.GroupVersionResource) string { From 006c1ff9ad85b13bbdbc03b6c2e10bf66e2cd628 Mon Sep 17 00:00:00 2001 From: "wangzhe.21" Date: Mon, 24 Jun 2024 12:06:11 +0800 Subject: [PATCH 4/6] add loadaware scheduler plugin --- cmd/katalyst-scheduler/main.go | 2 + go.mod | 2 +- go.sum | 4 +- pkg/scheduler/plugins/loadaware/cache.go | 76 ++++++ pkg/scheduler/plugins/loadaware/fit.go | 98 ++++++++ pkg/scheduler/plugins/loadaware/handler.go | 100 ++++++++ pkg/scheduler/plugins/loadaware/plugin.go | 73 ++++++ pkg/scheduler/plugins/loadaware/reserve.go | 18 ++ pkg/scheduler/plugins/loadaware/score.go | 260 +++++++++++++++++++++ 9 files changed, 630 insertions(+), 3 deletions(-) create mode 100644 pkg/scheduler/plugins/loadaware/cache.go create mode 100644 pkg/scheduler/plugins/loadaware/fit.go create mode 100644 pkg/scheduler/plugins/loadaware/handler.go create mode 100644 pkg/scheduler/plugins/loadaware/plugin.go create mode 100644 pkg/scheduler/plugins/loadaware/reserve.go create mode 100644 pkg/scheduler/plugins/loadaware/score.go diff --git a/cmd/katalyst-scheduler/main.go b/cmd/katalyst-scheduler/main.go index 20671696e..b7aed5294 100644 --- a/cmd/katalyst-scheduler/main.go +++ b/cmd/katalyst-scheduler/main.go @@ -17,6 +17,7 @@ limitations under the License. package main import ( + "github.com/kubewharf/katalyst-core/pkg/scheduler/plugins/loadaware" "os" "github.com/spf13/cobra" @@ -40,6 +41,7 @@ func main() { app.WithPlugin(qosawarenoderesources.BalancedAllocationName, qosawarenoderesources.NewBalancedAllocation), app.WithPlugin(noderesourcetopology.TopologyMatchName, noderesourcetopology.New), app.WithPlugin(nodeovercommitment.Name, nodeovercommitment.New), + app.WithPlugin(loadaware.Name, loadaware.NewPlugin), ) if err := runCommand(command); err != nil { diff --git a/go.mod b/go.mod index 53c95ce2d..e364d09a0 100644 --- a/go.mod +++ b/go.mod @@ -161,7 +161,7 @@ require ( ) replace ( - github.com/kubewharf/katalyst-api => github.com/zzzzhhb/katalyst-api v0.0.0-20240611072629-9a0ca7a14c1e + github.com/kubewharf/katalyst-api => github.com/WangZzzhe/katalyst-api v0.0.0-20240624040716-906659246334 k8s.io/api => k8s.io/api v0.24.6 k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.24.6 k8s.io/apimachinery => k8s.io/apimachinery v0.24.6 diff --git a/go.sum b/go.sum index 42cd6a1e7..8499e9491 100644 --- a/go.sum +++ b/go.sum @@ -84,6 +84,8 @@ github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWX github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= +github.com/WangZzzhe/katalyst-api v0.0.0-20240624040716-906659246334 h1:vszHYT1PweqTV09lVUeZW8vE4bKNp6HGskpocSABxXg= +github.com/WangZzzhe/katalyst-api v0.0.0-20240624040716-906659246334/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k= github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= @@ -918,8 +920,6 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -github.com/zzzzhhb/katalyst-api v0.0.0-20240611072629-9a0ca7a14c1e h1:+P0uj/zFw5yegDEKDz240A+LuhZqG83VXyUPo3kn8pg= -github.com/zzzzhhb/katalyst-api v0.0.0-20240611072629-9a0ca7a14c1e/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= diff --git a/pkg/scheduler/plugins/loadaware/cache.go b/pkg/scheduler/plugins/loadaware/cache.go new file mode 100644 index 000000000..2078c8a40 --- /dev/null +++ b/pkg/scheduler/plugins/loadaware/cache.go @@ -0,0 +1,76 @@ +package loadaware + +import ( + "sync" + "time" + + v1 "k8s.io/api/core/v1" +) + +var cache *Cache + +func init() { + cache = &Cache{ + NodePodInfo: map[string]*NodeCache{}, + } +} + +type Cache struct { + sync.RWMutex + + // key: nodeName, value: NodeCache + NodePodInfo map[string]*NodeCache +} + +type NodeCache struct { + // key: podUID, value: PodInfo + PodInfoMap map[string]*PodInfo +} + +type PodInfo struct { + pod *v1.Pod + startTime time.Time +} + +func (c *Cache) addPod(nodeName string, pod *v1.Pod, time time.Time) { + if pod == nil { + return + } + podUID := string(pod.UID) + c.Lock() + defer c.Unlock() + + nodeCache, ok := c.NodePodInfo[nodeName] + if !ok { + nodeCache = &NodeCache{ + PodInfoMap: map[string]*PodInfo{}, + } + } + + _, ok = nodeCache.PodInfoMap[podUID] + if ok { + return + } + + nodeCache.PodInfoMap[podUID] = &PodInfo{ + pod: pod, + startTime: time, + } + c.NodePodInfo[nodeName] = nodeCache +} + +func (c *Cache) removePod(nodeName string, pod *v1.Pod) { + c.Lock() + defer c.Unlock() + nodeCache, ok := c.NodePodInfo[nodeName] + if !ok { + return + } + + delete(nodeCache.PodInfoMap, string(pod.UID)) + if len(nodeCache.PodInfoMap) <= 0 { + delete(c.NodePodInfo, nodeName) + } else { + c.NodePodInfo[nodeName] = nodeCache + } +} diff --git a/pkg/scheduler/plugins/loadaware/fit.go b/pkg/scheduler/plugins/loadaware/fit.go new file mode 100644 index 000000000..ca9800341 --- /dev/null +++ b/pkg/scheduler/plugins/loadaware/fit.go @@ -0,0 +1,98 @@ +package loadaware + +import ( + "context" + "fmt" + "math" + "time" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/scheduler/framework" + + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" +) + +func (p *Plugin) Filter(_ context.Context, _ *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status { + if !p.IsLoadAwareEnabled(pod) { + return nil + } + + node := nodeInfo.Node() + if node == nil { + return framework.NewStatus(framework.Unschedulable, "node not found") + } + + if len(p.args.ResourceToThresholdMap) == 0 { + klog.Warningf("load aware fit missing required args") + return nil + } + + // get npd from informer + npd, err := p.npdLister.Get(node.Name) + if err != nil { + klog.Errorf("get npd of node %v failed, err: %v", node.Name, err) + return nil + } + if npd.Status.NodeMetrics == nil { + klog.Errorf("npd of node %s status NodeUsage is nil", node.Name) + return nil + } + + usageInfo := p.getNodeMetrics(npd, loadAwareMetricScope, 15*time.Minute) + if usageInfo == nil { + klog.Errorf("npd of node %s status NodeUsage miss avg_15min metrics", node.Name) + return nil + } + for resourceName, threshold := range p.args.ResourceToThresholdMap { + if threshold == 0 { + continue + } + total := node.Status.Allocatable[resourceName] + if total.IsZero() { + continue + } + used := usageInfo[resourceName] + usage := int64(math.Round(float64(used.MilliValue()) / float64(total.MilliValue()) * 100)) + klog.V(6).Infof("loadAware fit node: %v resource: %v usage: %v, threshold: %v", node.Name, resourceName, usage, threshold) + if usage > threshold { + return framework.NewStatus(framework.Unschedulable, fmt.Sprintf("node(s) %s usage exceed threshold, usage:%v, threshold: %v ", resourceName, usage, threshold)) + } + } + + return nil +} + +func (p *Plugin) getNodeMetrics(npd *v1alpha1.NodeProfileDescriptor, scope string, window time.Duration) v1.ResourceList { + res := v1.ResourceList{} + + for i := range npd.Status.NodeMetrics { + if npd.Status.NodeMetrics[i].Scope != scope { + continue + } + + for _, metricValue := range npd.Status.NodeMetrics[i].Metrics { + if metricValue.Window.Duration == window { + if isMetricExpired(metricValue.Timestamp, *p.args.NodeMetricsExpiredSeconds) { + klog.Warningf("node %v skip expired metric %v, timestamp: %v, now: %v, expiredSeconds: %v", + npd.Name, metricValue.MetricName, metricValue.Timestamp, time.Now(), *p.args.NodeMetricsExpiredSeconds) + continue + } + + res[v1.ResourceName(metricValue.MetricName)] = metricValue.Value + } + } + + break + } + if len(res) <= 0 { + return nil + } + + return res +} + +func isMetricExpired(t metav1.Time, nodeMetricExpirationSeconds int64) bool { + return nodeMetricExpirationSeconds > 0 && time.Since(t.Time) > time.Duration(nodeMetricExpirationSeconds)*time.Second +} diff --git a/pkg/scheduler/plugins/loadaware/handler.go b/pkg/scheduler/plugins/loadaware/handler.go new file mode 100644 index 000000000..4690af6ce --- /dev/null +++ b/pkg/scheduler/plugins/loadaware/handler.go @@ -0,0 +1,100 @@ +package loadaware + +import ( + "time" + + v1 "k8s.io/api/core/v1" + "k8s.io/client-go/informers" + toolcache "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" + v1pod "k8s.io/kubernetes/pkg/api/v1/pod" + + "github.com/kubewharf/katalyst-api/pkg/client/informers/externalversions" + "github.com/kubewharf/katalyst-core/pkg/scheduler/eventhandlers" +) + +const ( + LoadAwarePodHandler = "LoadAwarePodHandler" + LoadAwareNPDHandler = "LoadAwareNPDHandler" +) + +func RegisterPodHandler() { + eventhandlers.RegisterEventHandler( + LoadAwarePodHandler, + func(informerFactory informers.SharedInformerFactory, _ externalversions.SharedInformerFactory) { + podInformer := informerFactory.Core().V1().Pods() + podInformer.Informer().AddEventHandler( + toolcache.FilteringResourceEventHandler{ + FilterFunc: func(obj interface{}) bool { + return true + }, + Handler: toolcache.ResourceEventHandlerFuncs{ + AddFunc: OnAdd, + UpdateFunc: OnUpdate, + DeleteFunc: OnDelete, + }, + }, + ) + }) +} + +func (p *Plugin) registerNodeMonitorHandler() { + eventhandlers.RegisterEventHandler( + LoadAwareNPDHandler, + func(_ informers.SharedInformerFactory, internalInformerFactory externalversions.SharedInformerFactory) { + p.npdLister = internalInformerFactory.Node().V1alpha1().NodeProfileDescriptors().Lister() + }, + ) +} + +func OnAdd(obj interface{}) { + pod, ok := obj.(*v1.Pod) + if !ok { + klog.Warningf("transfer obj to pod fail") + return + } + nodeName := pod.Spec.NodeName + if nodeName == "" || v1pod.IsPodTerminal(pod) { + return + } + startTime := time.Now() + if pod.Status.StartTime != nil { + startTime = pod.Status.StartTime.Time + } + + cache.addPod(nodeName, pod, startTime) +} + +func OnUpdate(oldObj, newObj interface{}) { + pod, ok := newObj.(*v1.Pod) + if !ok { + return + } + if v1pod.IsPodTerminal(pod) { + cache.removePod(pod.Spec.NodeName, pod) + } else { + //pod delete and pod may merge a update event + assignTime := time.Now() + if pod.Status.StartTime != nil { + assignTime = pod.Status.StartTime.Time + } + cache.addPod(pod.Spec.NodeName, pod, assignTime) + } +} + +func OnDelete(obj interface{}) { + var pod *v1.Pod + switch t := obj.(type) { + case *v1.Pod: + pod = t + case toolcache.DeletedFinalStateUnknown: + var ok bool + pod, ok = t.Obj.(*v1.Pod) + if !ok { + return + } + default: + return + } + cache.removePod(pod.Spec.NodeName, pod) +} diff --git a/pkg/scheduler/plugins/loadaware/plugin.go b/pkg/scheduler/plugins/loadaware/plugin.go new file mode 100644 index 000000000..51090053f --- /dev/null +++ b/pkg/scheduler/plugins/loadaware/plugin.go @@ -0,0 +1,73 @@ +package loadaware + +import ( + "fmt" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/scheduler/framework" + "time" + + "github.com/kubewharf/katalyst-api/pkg/apis/scheduling/config" + "github.com/kubewharf/katalyst-api/pkg/apis/scheduling/config/validation" + listers "github.com/kubewharf/katalyst-api/pkg/client/listers/node/v1alpha1" + "github.com/kubewharf/katalyst-api/pkg/consts" +) + +const ( + Name = "LoadAware" + loadAwareMetricScope = "loadAware" + + DefaultNPDReportInterval = 60 * time.Second + DefaultMilliCPURequest int64 = 250 // 0.25 core + DefaultMemoryRequest int64 = 200 * 1024 * 1024 // 200 MB +) + +var ( + _ framework.FilterPlugin = &Plugin{} + _ framework.ScorePlugin = &Plugin{} + _ framework.ReservePlugin = &Plugin{} +) + +type Plugin struct { + handle framework.Handle + args *config.LoadAwareArgs + npdLister listers.NodeProfileDescriptorLister +} + +func NewPlugin(args runtime.Object, handle framework.Handle) (framework.Plugin, error) { + klog.Infof("new loadAware scheduler plugin") + pluginArgs, ok := args.(*config.LoadAwareArgs) + if !ok { + return nil, fmt.Errorf("want args to be of type LoadAwareArgs, got %T", args) + } + if err := validation.ValidateLoadAwareSchedulingArgs(pluginArgs); err != nil { + klog.Errorf("validate pluginArgs fail, err: %v", err) + return nil, err + } + + p := &Plugin{ + handle: handle, + args: pluginArgs, + } + p.registerNodeMonitorHandler() + RegisterPodHandler() + + return p, nil +} + +func (p *Plugin) Name() string { + return Name +} + +func (p *Plugin) IsLoadAwareEnabled(pod *v1.Pod) bool { + if p.args.PodAnnotationLoadAwareEnable == nil || *p.args.PodAnnotationLoadAwareEnable == "" { + return true + } + + if flag, ok := pod.Annotations[*p.args.PodAnnotationLoadAwareEnable]; ok && flag == consts.PodAnnotationLoadAwareEnableTrue { + return true + } + return false +} diff --git a/pkg/scheduler/plugins/loadaware/reserve.go b/pkg/scheduler/plugins/loadaware/reserve.go new file mode 100644 index 000000000..76e59d6d1 --- /dev/null +++ b/pkg/scheduler/plugins/loadaware/reserve.go @@ -0,0 +1,18 @@ +package loadaware + +import ( + "context" + "time" + + v1 "k8s.io/api/core/v1" + "k8s.io/kubernetes/pkg/scheduler/framework" +) + +func (p *Plugin) Reserve(ctx context.Context, _ *framework.CycleState, pod *v1.Pod, nodeName string) *framework.Status { + cache.addPod(nodeName, pod, time.Now()) + return nil +} + +func (p *Plugin) Unreserve(ctx context.Context, _ *framework.CycleState, pod *v1.Pod, nodeName string) { + cache.removePod(nodeName, pod) +} diff --git a/pkg/scheduler/plugins/loadaware/score.go b/pkg/scheduler/plugins/loadaware/score.go new file mode 100644 index 000000000..4740c7476 --- /dev/null +++ b/pkg/scheduler/plugins/loadaware/score.go @@ -0,0 +1,260 @@ +package loadaware + +import ( + "context" + "fmt" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" + "math" + "time" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + quotav1 "k8s.io/apiserver/pkg/quota/v1" + resourceapi "k8s.io/kubernetes/pkg/api/v1/resource" + "k8s.io/kubernetes/pkg/scheduler/framework" + + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" + "github.com/kubewharf/katalyst-api/pkg/apis/scheduling/config" + "github.com/kubewharf/katalyst-api/pkg/consts" +) + +const ( + metric5Min = "avg_5min" + metric15Min = "avg_15min" + metric1Hour = "max_1hour" + metric1Day = "max_1day" +) + +func (p *Plugin) ScoreExtensions() framework.ScoreExtensions { + return nil +} + +func (p *Plugin) Score(_ context.Context, _ *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) { + if !p.IsLoadAwareEnabled(pod) { + return 0, nil + } + + nodeInfo, err := p.handle.SnapshotSharedLister().NodeInfos().Get(nodeName) + if err != nil { + return 0, framework.NewStatus(framework.Unschedulable, fmt.Sprintf("get node %v from Snapshot: %v", nodeName, err)) + } + node := nodeInfo.Node() + if node == nil { + return 0, framework.NewStatus(framework.Unschedulable, "node not found") + } + npd, err := p.npdLister.Get(nodeName) + if err != nil { + return 0, nil + } + + timeStamp := getLoadAwareScopeUpdateTime(npd) + if p.args.NodeMetricsExpiredSeconds != nil && time.Now().After(timeStamp.Add(time.Duration(*p.args.NodeMetricsExpiredSeconds)*time.Second)) { + return 0, nil + } + + loadAwareUsage := p.getLoadAwareResourceList(npd) + + //estimated the recent assign pod usage + estimatedUsed := estimatedPodUsed(pod, p.args.ResourceToWeightMap, p.args.ResourceToScalingFactorMap) + estimatedAssignedPodUsage := p.estimatedAssignedPodUsage(nodeName, timeStamp) + finalEstimatedUsed := quotav1.Add(estimatedUsed, estimatedAssignedPodUsage) + //add estimated usage to avg_15min_usage + finalNodeUsedOfIndicators := make(map[config.IndicatorType]v1.ResourceList) + for indicator := range p.args.CalculateIndicatorWeight { + if loadAwareUsage != nil { + used := loadAwareUsage[string(indicator)] + if indicator == consts.Usage15MinAvgKey { + used = quotav1.Add(used, finalEstimatedUsed) + } + finalNodeUsedOfIndicators[indicator] = used + } + } + score := loadAwareSchedulingScorer(finalNodeUsedOfIndicators, node.Status.Allocatable, p.args.ResourceToWeightMap, p.args.CalculateIndicatorWeight) + klog.V(6).Infof("loadAware score node: %v resourceUsage: %v, score: %v", node.Name, finalNodeUsedOfIndicators, score) + return score, nil +} + +func (p *Plugin) estimatedAssignedPodUsage(nodeName string, updateTime time.Time) v1.ResourceList { + cache.RLock() + defer cache.RUnlock() + var ( + estimatedUsed = make(map[v1.ResourceName]int64) + result = v1.ResourceList{} + ) + nodeCache, ok := cache.NodePodInfo[nodeName] + if !ok { + return result + } + + for _, podInfo := range nodeCache.PodInfoMap { + if isNeedToEstimatedUsage(podInfo, updateTime) { + estimated := estimatedPodUsed(podInfo.pod, p.args.ResourceToWeightMap, p.args.ResourceToScalingFactorMap) + for resourceName, quantity := range estimated { + if resourceName == v1.ResourceCPU { + estimatedUsed[resourceName] += quantity.MilliValue() + } else { + estimatedUsed[resourceName] += quantity.Value() + } + } + } + } + // transfer map[ResourceName]int64 to ResourceList + for resourceName, value := range estimatedUsed { + if resourceName == v1.ResourceCPU { + result[resourceName] = *resource.NewMilliQuantity(value, resource.DecimalSI) + } else { + result[resourceName] = *resource.NewQuantity(value, resource.DecimalSI) + } + } + return result +} + +func (p *Plugin) getLoadAwareResourceList(npd *v1alpha1.NodeProfileDescriptor) map[string]v1.ResourceList { + if npd == nil { + return nil + } + res := make(map[string]v1.ResourceList) + + for i := range npd.Status.NodeMetrics { + if npd.Status.NodeMetrics[i].Scope == loadAwareMetricScope { + for _, metricValue := range npd.Status.NodeMetrics[i].Metrics { + key := metricWindowToKey(metricValue.Window) + if _, ok := res[key]; !ok { + res[key] = v1.ResourceList{} + } + res[key][v1.ResourceName(metricValue.MetricName)] = metricValue.Value + } + } + } + + return res +} + +func estimatedPodUsed(pod *v1.Pod, resourceWeights map[v1.ResourceName]int64, scalingFactors map[v1.ResourceName]int64) v1.ResourceList { + requests, limits := resourceapi.PodRequestsAndLimits(pod) + estimatedUsed := v1.ResourceList{} + for resourceName := range resourceWeights { + value := estimatedUsedByResource(requests, limits, resourceName, scalingFactors[resourceName]) + if resourceName == v1.ResourceCPU { + estimatedUsed[resourceName] = *resource.NewMilliQuantity(value, resource.DecimalSI) + } else { + estimatedUsed[resourceName] = *resource.NewQuantity(value, resource.DecimalSI) + } + } + return estimatedUsed +} + +func isNeedToEstimatedUsage(podInfo *PodInfo, updateTime time.Time) bool { + return podInfo.startTime.After(updateTime) +} + +func getLoadAwareScopeUpdateTime(npd *v1alpha1.NodeProfileDescriptor) time.Time { + // all nodeMetrics in loadAware scope have same timestamp + for i := range npd.Status.NodeMetrics { + if npd.Status.NodeMetrics[i].Scope != loadAwareMetricScope { + continue + } + + if len(npd.Status.NodeMetrics[i].Metrics) <= 0 { + break + } + return npd.Status.NodeMetrics[i].Metrics[0].Timestamp.Time + } + return time.Now() +} + +func estimatedUsedByResource(requests, limits v1.ResourceList, resourceName v1.ResourceName, scalingFactor int64) int64 { + limitQuantity := limits[resourceName] + requestQuantity := requests[resourceName] + var quantity resource.Quantity + if limitQuantity.Cmp(requestQuantity) > 0 { + scalingFactor = 100 + quantity = limitQuantity + } else { + quantity = requestQuantity + } + + if quantity.IsZero() { + switch resourceName { + case v1.ResourceCPU: + return DefaultMilliCPURequest + case v1.ResourceMemory: + return DefaultMemoryRequest + } + return 0 + } + + var estimatedUsed int64 + switch resourceName { + case v1.ResourceCPU: + estimatedUsed = int64(math.Round(float64(quantity.MilliValue()) * float64(scalingFactor) / 100)) + default: + estimatedUsed = int64(math.Round(float64(quantity.Value()) * float64(scalingFactor) / 100)) + } + return estimatedUsed +} + +// first calculate cpu/memory score according to avg_15min, max_1hour, max_1day and its weight +// then calculate final score with cpuScore and memoryScore with its weight +func loadAwareSchedulingScorer(usedOfIndicators map[config.IndicatorType]v1.ResourceList, allocatable v1.ResourceList, resourceWeight map[v1.ResourceName]int64, indicatorRatio map[config.IndicatorType]int64) int64 { + var nodeScore, weightSum int64 + // cpu and memory weight + for resourceName, weight := range resourceWeight { + resourceSumScore := int64(0) + ratioSum := int64(0) + // calculate cpu/memory score by avg_15min, max_1hour, max_1day + for indicatorName, ratio := range indicatorRatio { + alloc, ok := allocatable[resourceName] + if !ok { + continue + } + resList := usedOfIndicators[indicatorName] + if resList == nil { + continue + } + quantity, ok := resList[resourceName] + if !ok { + continue + } + resourceScore := int64(0) + if resourceName == v1.ResourceCPU { + resourceScore = leastUsedScore(quantity.MilliValue(), alloc.MilliValue()) + } else { + resourceScore = leastUsedScore(quantity.Value(), alloc.Value()) + } + resourceSumScore += resourceScore * ratio + ratioSum += ratio + } + nodeScore += (resourceSumScore / ratioSum) * weight + weightSum += weight + } + + return nodeScore / weightSum +} + +func leastUsedScore(used, capacity int64) int64 { + if capacity == 0 { + return 0 + } + if used > capacity { + return 0 + } + return ((capacity - used) * framework.MaxNodeScore) / capacity +} + +func metricWindowToKey(window *metav1.Duration) string { + if window.Duration == 5*time.Minute { + return consts.Usage5MinAvgKey + } + if window.Duration == 15*time.Minute { + return consts.Usage15MinAvgKey + } + if window.Duration == time.Hour { + return consts.Usage1HourMaxKey + } + if window.Duration == 24*time.Hour { + return consts.Usage1DayMaxKey + } + return "" +} From 0802c8ac3ce643b761fbfce9cb0bfce1a1ff4bb4 Mon Sep 17 00:00:00 2001 From: "wangzhe.21" Date: Thu, 27 Jun 2024 19:45:53 +0800 Subject: [PATCH 5/6] add spd portrait loadaware scheduler plugin --- go.mod | 2 +- go.sum | 4 +- pkg/scheduler/plugins/loadaware/cache.go | 252 +++++++++++++-- pkg/scheduler/plugins/loadaware/cache_test.go | 123 ++++++++ pkg/scheduler/plugins/loadaware/fit.go | 59 ++++ pkg/scheduler/plugins/loadaware/fit_test.go | 291 ++++++++++++++++++ pkg/scheduler/plugins/loadaware/handler.go | 13 +- pkg/scheduler/plugins/loadaware/helper.go | 110 +++++++ pkg/scheduler/plugins/loadaware/plugin.go | 217 ++++++++++++- pkg/scheduler/plugins/loadaware/score.go | 83 ++++- 10 files changed, 1121 insertions(+), 33 deletions(-) create mode 100644 pkg/scheduler/plugins/loadaware/cache_test.go create mode 100644 pkg/scheduler/plugins/loadaware/fit_test.go create mode 100644 pkg/scheduler/plugins/loadaware/helper.go diff --git a/go.mod b/go.mod index e364d09a0..4a7c34c23 100644 --- a/go.mod +++ b/go.mod @@ -161,7 +161,7 @@ require ( ) replace ( - github.com/kubewharf/katalyst-api => github.com/WangZzzhe/katalyst-api v0.0.0-20240624040716-906659246334 + github.com/kubewharf/katalyst-api => github.com/WangZzzhe/katalyst-api v0.0.0-20240626083651-4a90fe53af11 k8s.io/api => k8s.io/api v0.24.6 k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.24.6 k8s.io/apimachinery => k8s.io/apimachinery v0.24.6 diff --git a/go.sum b/go.sum index 8499e9491..717e5ff1a 100644 --- a/go.sum +++ b/go.sum @@ -84,8 +84,8 @@ github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWX github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= -github.com/WangZzzhe/katalyst-api v0.0.0-20240624040716-906659246334 h1:vszHYT1PweqTV09lVUeZW8vE4bKNp6HGskpocSABxXg= -github.com/WangZzzhe/katalyst-api v0.0.0-20240624040716-906659246334/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k= +github.com/WangZzzhe/katalyst-api v0.0.0-20240626083651-4a90fe53af11 h1:4RUG7QfX0hBwtHtI3Nll6F4lCP31ThYxkWIu93G6Ei4= +github.com/WangZzzhe/katalyst-api v0.0.0-20240626083651-4a90fe53af11/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k= github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= diff --git a/pkg/scheduler/plugins/loadaware/cache.go b/pkg/scheduler/plugins/loadaware/cache.go index 2078c8a40..20c9e8409 100644 --- a/pkg/scheduler/plugins/loadaware/cache.go +++ b/pkg/scheduler/plugins/loadaware/cache.go @@ -1,10 +1,12 @@ package loadaware import ( + "fmt" "sync" "time" v1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" ) var cache *Cache @@ -15,16 +17,35 @@ func init() { } } +type SPDLister interface { + GetPodPortrait(pod *v1.Pod) *ResourceUsage +} + type Cache struct { sync.RWMutex + // node predict usage cache is calculated by portrait from SPDLister, + // it is possible to cause errors due to the update of portrait data, + // but these errors can be tolerated because the update frequency of the portrait is very low and there will not be too much error between the daily data. + // Reconcile will correct these errors. + + // node predict usage also can be summed when scheduling pod, + // it makes the results more accurate, but it brings more performance loss. + podPortraitLister SPDLister + // key: nodeName, value: NodeCache + // The value of this map is a pointer, the data of different nodes can be modified concurrently without causing concurrent read/write problems. + // NodeCache should be locked to ensure that the data of the same node will not be concurrently written, resulting in data coverage. NodePodInfo map[string]*NodeCache } type NodeCache struct { + sync.RWMutex + // key: podUID, value: PodInfo PodInfoMap map[string]*PodInfo + + PredictUsage *ResourceUsage } type PodInfo struct { @@ -32,45 +53,240 @@ type PodInfo struct { startTime time.Time } +type ResourceUsage struct { + Cpu []float64 + Memory []float64 +} + +func (c *Cache) ReconcilePredictUsage() { + if c.podPortraitLister == nil { + return + } + + startTime := time.Now() + + // use read lock here, only adding node and removing node will be blocked by reconcile. + c.RLock() + defer c.RUnlock() + + for nodeName, nc := range c.NodePodInfo { + nc.Lock() + + var ( + nodePredictUsage = &ResourceUsage{ + Cpu: make([]float64, portraitItemsLength, portraitItemsLength), + Memory: make([]float64, portraitItemsLength, portraitItemsLength)} + err error + ) + for _, podInfo := range nc.PodInfoMap { + podResourceUsage := c.podPortraitLister.GetPodPortrait(podInfo.pod) + err = nodePredictUsage.add(podResourceUsage) + if err != nil { + klog.Error(err) + break + } + klog.V(6).Infof("ReconcilePredictUsage,pod %v cpu resourceUsage: %v, memory resourceUsage: %v", podInfo.pod.Name, podResourceUsage.Cpu, podResourceUsage.Memory) + } + if err != nil { + klog.Errorf("node %v update predict usage fail: %v, keep old predictUsage", nodeName, err) + nc.Unlock() + continue + } + nc.PredictUsage = nodePredictUsage + klog.V(6).Infof("ReconcilePredictUsage, node %v cpu resourceUsage: %v, memory resourceUsage: %v", nodeName, nodePredictUsage.Cpu, nodePredictUsage.Memory) + + nc.Unlock() + } + + klog.Infof("ReconcilePredictUsage, startTime: %v, duration: %v", startTime, time.Now().Sub(startTime)) +} + +func (c *Cache) SetSPDLister(lister SPDLister) { + if c.podPortraitLister != nil { + klog.Warningf("cache podPortraitLister has been set") + return + } + + c.podPortraitLister = lister +} + +func (c *Cache) GetNodePredictUsage(nodeName string) *ResourceUsage { + c.RLock() + nodeCache, ok := c.NodePodInfo[nodeName] + c.RUnlock() + if !ok { + return &ResourceUsage{ + Cpu: make([]float64, portraitItemsLength, portraitItemsLength), + Memory: make([]float64, portraitItemsLength, portraitItemsLength), + } + } + + return nodeCache.getPredictUsageCopy() +} + func (c *Cache) addPod(nodeName string, pod *v1.Pod, time time.Time) { if pod == nil { return } - podUID := string(pod.UID) - c.Lock() - defer c.Unlock() + c.RLock() nodeCache, ok := c.NodePodInfo[nodeName] + c.RUnlock() if !ok { nodeCache = &NodeCache{ PodInfoMap: map[string]*PodInfo{}, } - } - _, ok = nodeCache.PodInfoMap[podUID] - if ok { - return + c.Lock() + c.NodePodInfo[nodeName] = nodeCache + c.Unlock() } - nodeCache.PodInfoMap[podUID] = &PodInfo{ - pod: pod, - startTime: time, - } - c.NodePodInfo[nodeName] = nodeCache + nodeCache.addPod(pod, time, c.podPortraitLister, nodeName) } func (c *Cache) removePod(nodeName string, pod *v1.Pod) { - c.Lock() - defer c.Unlock() + c.RLock() nodeCache, ok := c.NodePodInfo[nodeName] + c.RUnlock() if !ok { return } - delete(nodeCache.PodInfoMap, string(pod.UID)) - if len(nodeCache.PodInfoMap) <= 0 { + podNum := nodeCache.removePod(pod, c.podPortraitLister, nodeName) + + c.Lock() + defer c.Unlock() + if podNum <= 0 { delete(c.NodePodInfo, nodeName) - } else { - c.NodePodInfo[nodeName] = nodeCache } } + +func (nc *NodeCache) addPod(pod *v1.Pod, time time.Time, spdLister SPDLister, nodeName string) { + podUID := string(pod.UID) + nc.Lock() + defer nc.Unlock() + + if _, ok := nc.PodInfoMap[podUID]; ok { + return + } + + // update node pod info + nc.PodInfoMap[podUID] = &PodInfo{ + pod: pod, + startTime: time, + } + + // update node usage if spdLister is not nil + if spdLister != nil { + podPortrait := spdLister.GetPodPortrait(pod) + if nc.PredictUsage == nil { + nc.PredictUsage = &ResourceUsage{ + Cpu: make([]float64, portraitItemsLength, portraitItemsLength), + Memory: make([]float64, portraitItemsLength, portraitItemsLength), + } + } + + err := nc.PredictUsage.add(podPortrait) + if err != nil { + klog.Errorf("%s nodeCache add pod %v portrait fail: %v", nodeName, pod.Name, err) + return + } + klog.V(6).Infof("node %v add pod %v portrait, cpu: %v, memory: %v", nodeName, pod.Name, podPortrait.Cpu, podPortrait.Memory) + } +} + +func (nc *NodeCache) removePod(pod *v1.Pod, spdLister SPDLister, nodeName string) int { + podUID := string(pod.UID) + nc.Lock() + defer nc.Unlock() + + delete(nc.PodInfoMap, podUID) + + if spdLister != nil { + podPortrait := spdLister.GetPodPortrait(pod) + + if nc.PredictUsage == nil { + klog.Errorf("remove pod from node %v without predictUsage", nodeName) + } else { + err := nc.PredictUsage.sub(podPortrait) + if err != nil { + klog.Errorf("%s nodeCache remove pod %v portrait fail: %v", nodeName, pod.Name, err) + } + } + } + return len(nc.PodInfoMap) +} + +func (nc *NodeCache) getPredictUsageCopy() *ResourceUsage { + res := &ResourceUsage{ + Cpu: make([]float64, portraitItemsLength, portraitItemsLength), + Memory: make([]float64, portraitItemsLength, portraitItemsLength), + } + + nc.RLock() + defer nc.RUnlock() + for i := range nc.PredictUsage.Cpu { + res.Cpu[i] = nc.PredictUsage.Cpu[i] + res.Memory[i] = nc.PredictUsage.Memory[i] + } + + return res +} + +func (r *ResourceUsage) add(data *ResourceUsage) error { + if len(data.Cpu) != portraitItemsLength || len(data.Memory) != portraitItemsLength { + return fmt.Errorf("portrait data length cpu: %v memory: %v not support", len(data.Cpu), len(data.Memory)) + } + + for i := 0; i < portraitItemsLength; i++ { + r.Cpu[i] += data.Cpu[i] + r.Memory[i] += data.Memory[i] + } + + return nil +} + +func (r *ResourceUsage) sub(data *ResourceUsage) error { + if len(data.Cpu) != portraitItemsLength || len(data.Memory) != portraitItemsLength { + return fmt.Errorf("portrait data length cpu: %v memory: %v not support", len(data.Cpu), len(data.Memory)) + } + + for i := 0; i < portraitItemsLength; i++ { + r.Cpu[i] -= data.Cpu[i] + r.Memory[i] -= data.Memory[i] + + if r.Cpu[i] < 0 { + r.Cpu[i] = 0 + } + if r.Memory[i] < 0 { + r.Memory[i] = 0 + } + } + + return nil +} + +func (r *ResourceUsage) max(resourceName v1.ResourceName) float64 { + var ( + data []float64 + res float64 + ) + + switch resourceName { + case v1.ResourceCPU: + data = r.Cpu + case v1.ResourceMemory: + data = r.Memory + default: + klog.Warningf("unsupported resource %v", resourceName.String()) + return res + } + + for i := range data { + if data[i] > res { + res = data[i] + } + } + return res +} diff --git a/pkg/scheduler/plugins/loadaware/cache_test.go b/pkg/scheduler/plugins/loadaware/cache_test.go new file mode 100644 index 000000000..433b7cced --- /dev/null +++ b/pkg/scheduler/plugins/loadaware/cache_test.go @@ -0,0 +1,123 @@ +package loadaware + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + v12 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/kubernetes/pkg/scheduler/framework" +) + +func TestAddPod(t *testing.T) { + t.Parallel() + + c := &Cache{ + NodePodInfo: map[string]*NodeCache{}, + } + + c.addPod("testNode", &v1.Pod{ + ObjectMeta: v12.ObjectMeta{ + Name: "testPod", + UID: "testPod", + }, + }, time.Now()) + assert.Equal(t, 1, len(c.NodePodInfo["testNode"].PodInfoMap)) + + c.addPod("testNode", nil, time.Now()) + assert.Equal(t, 1, len(c.NodePodInfo["testNode"].PodInfoMap)) + + c.addPod("testNode", &v1.Pod{ + ObjectMeta: v12.ObjectMeta{ + Name: "testPod", + UID: "testPod", + }, + }, time.Now()) + assert.Equal(t, 1, len(c.NodePodInfo["testNode"].PodInfoMap)) + + c.addPod("testNode", &v1.Pod{ + ObjectMeta: v12.ObjectMeta{ + Name: "testPod2", + UID: "testPod2", + }, + }, time.Now()) + assert.Equal(t, 2, len(c.NodePodInfo["testNode"].PodInfoMap)) + + c.addPod("testNode2", &v1.Pod{ + ObjectMeta: v12.ObjectMeta{ + Name: "testPod2", + UID: "testPod2", + }, + }, time.Now()) + assert.Equal(t, 2, len(c.NodePodInfo)) + + c.removePod("testNode2", &v1.Pod{ + ObjectMeta: v12.ObjectMeta{ + Name: "testPod2", + UID: "testPod2", + }, + }) + assert.Equal(t, 1, len(c.NodePodInfo)) + + c.removePod("testNode", &v1.Pod{ + ObjectMeta: v12.ObjectMeta{ + Name: "testPod2", + UID: "testPod2", + }}) + assert.Equal(t, 1, len(c.NodePodInfo["testNode"].PodInfoMap)) +} + +type testSharedLister struct { + nodes []*v1.Node + nodeInfos []*framework.NodeInfo + nodeInfoMap map[string]*framework.NodeInfo +} + +func (f *testSharedLister) NodeInfos() framework.NodeInfoLister { + return f +} + +func (f *testSharedLister) List() ([]*framework.NodeInfo, error) { + return f.nodeInfos, nil +} + +func (f *testSharedLister) HavePodsWithAffinityList() ([]*framework.NodeInfo, error) { + return nil, nil +} + +func (f *testSharedLister) HavePodsWithRequiredAntiAffinityList() ([]*framework.NodeInfo, error) { + return nil, nil +} + +func (f *testSharedLister) Get(nodeName string) (*framework.NodeInfo, error) { + return f.nodeInfoMap[nodeName], nil +} + +func newTestSharedLister(pods []*v1.Pod, nodes []*v1.Node) *testSharedLister { + nodeInfoMap := make(map[string]*framework.NodeInfo) + nodeInfos := make([]*framework.NodeInfo, 0) + for _, pod := range pods { + nodeName := pod.Spec.NodeName + if _, ok := nodeInfoMap[nodeName]; !ok { + nodeInfoMap[nodeName] = framework.NewNodeInfo() + } + nodeInfoMap[nodeName].AddPod(pod) + } + for _, node := range nodes { + if _, ok := nodeInfoMap[node.Name]; !ok { + nodeInfoMap[node.Name] = framework.NewNodeInfo() + } + nodeInfoMap[node.Name].SetNode(node) + } + + for _, v := range nodeInfoMap { + nodeInfos = append(nodeInfos, v) + } + + return &testSharedLister{ + nodes: nodes, + nodeInfos: nodeInfos, + nodeInfoMap: nodeInfoMap, + } +} diff --git a/pkg/scheduler/plugins/loadaware/fit.go b/pkg/scheduler/plugins/loadaware/fit.go index ca9800341..2b1590a3e 100644 --- a/pkg/scheduler/plugins/loadaware/fit.go +++ b/pkg/scheduler/plugins/loadaware/fit.go @@ -19,6 +19,21 @@ func (p *Plugin) Filter(_ context.Context, _ *framework.CycleState, pod *v1.Pod, return nil } + // fit by node metrics + status := p.fitByNPD(nodeInfo) + if status != nil || status.IsUnschedulable() { + return status + } + + if p.enablePortrait() { + // fit by workload portrait + status = p.fitByPortrait(pod, nodeInfo) + } + + return status +} + +func (p *Plugin) fitByNPD(nodeInfo *framework.NodeInfo) *framework.Status { node := nodeInfo.Node() if node == nil { return framework.NewStatus(framework.Unschedulable, "node not found") @@ -64,6 +79,50 @@ func (p *Plugin) Filter(_ context.Context, _ *framework.CycleState, pod *v1.Pod, return nil } +func (p *Plugin) fitByPortrait(pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status { + if pod == nil { + return nil + } + if nodeInfo == nil || nodeInfo.Node() == nil { + return nil + } + + nodePredictUsage, err := p.getNodePredictUsage(pod, nodeInfo.Node().Name) + if err != nil { + klog.Error(err) + return nil + } + + // check if nodePredictUsage is greater than threshold + for _, resourceName := range []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory} { + threshold, ok := p.args.ResourceToThresholdMap[resourceName] + if !ok { + continue + } + total := nodeInfo.Node().Status.Allocatable[resourceName] + if total.IsZero() { + continue + } + var totalValue int64 + if resourceName == v1.ResourceCPU { + totalValue = total.MilliValue() + } else { + totalValue = total.Value() + } + + maxUsage := nodePredictUsage.max(resourceName) + usageRatio := int64(math.Round(maxUsage / float64(totalValue) * 100)) + klog.V(6).Infof("loadAware fit pod %v, node %v, resource %v, threshold: %v, usageRatio: %v, maxUsage: %v, nodeTotal %v", + pod.Name, nodeInfo.Node().Name, resourceName, threshold, usageRatio, maxUsage, totalValue) + + if usageRatio > threshold { + return framework.NewStatus(framework.Unschedulable, fmt.Sprintf("node(s) %s usage exceed threshold, usage:%v, threshold: %v ", resourceName, usageRatio, threshold)) + } + } + + return nil +} + func (p *Plugin) getNodeMetrics(npd *v1alpha1.NodeProfileDescriptor, scope string, window time.Duration) v1.ResourceList { res := v1.ResourceList{} diff --git a/pkg/scheduler/plugins/loadaware/fit_test.go b/pkg/scheduler/plugins/loadaware/fit_test.go new file mode 100644 index 000000000..cc5880777 --- /dev/null +++ b/pkg/scheduler/plugins/loadaware/fit_test.go @@ -0,0 +1,291 @@ +package loadaware + +import ( + "context" + "math/rand" + "testing" + "time" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + v12 "k8s.io/apimachinery/pkg/apis/meta/v1" + cache2 "k8s.io/client-go/tools/cache" + "k8s.io/kubernetes/pkg/scheduler/framework" + "k8s.io/kubernetes/pkg/scheduler/framework/runtime" + "k8s.io/metrics/pkg/apis/metrics/v1beta1" + "k8s.io/utils/pointer" + + "github.com/kubewharf/katalyst-api/pkg/apis/scheduling/config" + "github.com/kubewharf/katalyst-api/pkg/apis/workload/v1alpha1" + katalyst_base "github.com/kubewharf/katalyst-core/cmd/base" + "github.com/kubewharf/katalyst-core/pkg/config/generic" + "github.com/kubewharf/katalyst-core/pkg/scheduler/util" +) + +func TestFitByPortrait(t *testing.T) { + t.Parallel() + + util.SetQoSConfig(generic.NewQoSConfiguration()) + + for _, tc := range []struct { + name string + pod *v1.Pod + node *v1.Node + pods []*v1.Pod + portraits []*v1alpha1.ServiceProfileDescriptor + expectRes *framework.Status + }{ + { + name: "", + pod: &v1.Pod{ + ObjectMeta: v12.ObjectMeta{ + Name: "pod1", + UID: "pod1UID", + Namespace: "testNs", + OwnerReferences: []v12.OwnerReference{ + { + Kind: "Deployment", + Name: "deployment1", + }, + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "testContainer", + Resources: v1.ResourceRequirements{ + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("16Gi"), + }, + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("16Gi"), + }, + }, + }, + }, + }, + }, + node: &v1.Node{ + ObjectMeta: v12.ObjectMeta{ + Name: "node1", + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{ + Capacity: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourceMemory: resource.MustParse("64Gi"), + }, + Allocatable: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourceMemory: resource.MustParse("64Gi"), + }, + }, + }, + pods: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod2", + UID: "pod2UID", + Namespace: "testNs", + OwnerReferences: []v12.OwnerReference{ + { + Kind: "Deployment", + Name: "deployment2", + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod3", + UID: "pod3UID", + Namespace: "testNs", + OwnerReferences: []v12.OwnerReference{ + { + Kind: "Deployment", + Name: "deployment3", + }, + }, + }, + }, + }, + portraits: []*v1alpha1.ServiceProfileDescriptor{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "deployment1", + Namespace: "testNs", + }, + Status: v1alpha1.ServiceProfileDescriptorStatus{ + AggMetrics: []v1alpha1.AggPodMetrics{ + { + Scope: spdPortraitScope, + Items: rangeItems(4, 8*1024*1024*1024), + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "deployment2", + Namespace: "testNs", + }, + Status: v1alpha1.ServiceProfileDescriptorStatus{ + AggMetrics: []v1alpha1.AggPodMetrics{ + { + Scope: spdPortraitScope, + Items: fixedItems(4, 8*1024*1024*1024), + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "deployment3", + Namespace: "testNs", + }, + Status: v1alpha1.ServiceProfileDescriptorStatus{ + AggMetrics: []v1alpha1.AggPodMetrics{ + { + Scope: spdPortraitScope, + Items: fixedItems(8, 16*1024*1024*1024), + }, + }, + }, + }, + }, + expectRes: nil, + }, + } { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + nodeInfo := framework.NewNodeInfo() + nodeInfo.SetNode(tc.node) + for _, pod := range tc.pods { + nodeInfo.AddPod(pod) + } + fw, err := runtime.NewFramework(nil, nil, + runtime.WithSnapshotSharedLister(newTestSharedLister(tc.pods, []*v1.Node{tc.node}))) + assert.NoError(t, err) + + controlCtx, err := katalyst_base.GenerateFakeGenericContext() + assert.NoError(t, err) + + p := &Plugin{ + handle: fw, + args: makeTestArgs(), + spdLister: controlCtx.InternalInformerFactory.Workload().V1alpha1().ServiceProfileDescriptors().Lister(), + spdHasSynced: controlCtx.InternalInformerFactory.Workload().V1alpha1().ServiceProfileDescriptors().Informer().HasSynced, + } + cache.SetSPDLister(p) + + for _, pr := range tc.portraits { + _, err = controlCtx.Client.InternalClient.WorkloadV1alpha1().ServiceProfileDescriptors(pr.Namespace). + Create(context.TODO(), pr, v12.CreateOptions{}) + assert.NoError(t, err) + } + controlCtx.StartInformer(context.TODO()) + + // wait for portrait synced + if !cache2.WaitForCacheSync(context.TODO().Done(), p.spdHasSynced) { + t.Error("wait for portrait informer synced fail") + t.FailNow() + } + + // add pod to cache + for _, pod := range tc.pods { + cache.addPod(tc.node.Name, pod, time.Now()) + } + + status := p.fitByPortrait(tc.pod, nodeInfo) + + if tc.expectRes == nil { + assert.Nil(t, status) + } else { + assert.Equal(t, tc.expectRes.Code(), status.Code()) + } + }) + } +} + +func fixedItems(cpu, memory int64) []v1beta1.PodMetrics { + res := make([]v1beta1.PodMetrics, portraitItemsLength, portraitItemsLength) + + t := time.Now() + for i := 0; i < portraitItemsLength; i++ { + res[i].Timestamp = metav1.Time{Time: t.Add(time.Duration(i) * time.Hour)} + res[i].Containers = []v1beta1.ContainerMetrics{ + { + Name: spdPortraitLoadAwareMetricName, + Usage: map[v1.ResourceName]resource.Quantity{ + cpuUsageMetric: *resource.NewQuantity(cpu, resource.DecimalSI), + memoryUsageMetric: *resource.NewQuantity(memory, resource.BinarySI), + }, + }, + } + } + + return res +} + +func rangeItems(cpu, memory int64) []v1beta1.PodMetrics { + res := make([]v1beta1.PodMetrics, portraitItemsLength, portraitItemsLength) + + t := time.Now() + rand.Seed(t.UnixNano()) + for i := 0; i < portraitItemsLength; i++ { + res[i].Timestamp = metav1.Time{Time: t.Add(time.Duration(i) * time.Hour)} + res[i].Containers = []v1beta1.ContainerMetrics{ + { + Name: spdPortraitLoadAwareMetricName, + Usage: map[v1.ResourceName]resource.Quantity{ + cpuUsageMetric: *resource.NewQuantity(rand.Int63n(cpu), resource.DecimalSI), + memoryUsageMetric: *resource.NewQuantity(rand.Int63n(memory), resource.BinarySI), + }, + }, + } + } + + return res +} + +func makeTestArgs() *config.LoadAwareArgs { + args := &config.LoadAwareArgs{ + EnablePortrait: pointer.Bool(true), + ResourceToTargetMap: map[v1.ResourceName]int64{ + v1.ResourceCPU: 40, + v1.ResourceMemory: 50, + }, + ResourceToThresholdMap: map[v1.ResourceName]int64{ + v1.ResourceCPU: 60, + v1.ResourceMemory: 80, + }, + ResourceToScalingFactorMap: map[v1.ResourceName]int64{ + v1.ResourceCPU: 100, + v1.ResourceMemory: 100, + }, + ResourceToWeightMap: map[v1.ResourceName]int64{ + v1.ResourceCPU: 1, + v1.ResourceMemory: 1, + }, + } + args.PodAnnotationLoadAwareEnable = new(string) + *args.PodAnnotationLoadAwareEnable = "" + + return args +} + +func TestTTT(t *testing.T) { + a := resource.MustParse("4") + cpu := a.MilliValue() + t.Log(cpu) + + a = resource.MustParse("165m") + cpu = a.MilliValue() + t.Log(cpu) +} diff --git a/pkg/scheduler/plugins/loadaware/handler.go b/pkg/scheduler/plugins/loadaware/handler.go index 4690af6ce..a1a7004a5 100644 --- a/pkg/scheduler/plugins/loadaware/handler.go +++ b/pkg/scheduler/plugins/loadaware/handler.go @@ -16,6 +16,7 @@ import ( const ( LoadAwarePodHandler = "LoadAwarePodHandler" LoadAwareNPDHandler = "LoadAwareNPDHandler" + LoadAwareSPDHandler = "LoadAwareSPDHandler" ) func RegisterPodHandler() { @@ -38,7 +39,7 @@ func RegisterPodHandler() { }) } -func (p *Plugin) registerNodeMonitorHandler() { +func (p *Plugin) registerNPDHandler() { eventhandlers.RegisterEventHandler( LoadAwareNPDHandler, func(_ informers.SharedInformerFactory, internalInformerFactory externalversions.SharedInformerFactory) { @@ -47,6 +48,16 @@ func (p *Plugin) registerNodeMonitorHandler() { ) } +func (p *Plugin) registerSPDHandler() { + eventhandlers.RegisterEventHandler( + LoadAwareSPDHandler, + func(_ informers.SharedInformerFactory, internalInformerFactory externalversions.SharedInformerFactory) { + p.spdLister = internalInformerFactory.Workload().V1alpha1().ServiceProfileDescriptors().Lister() + p.spdHasSynced = internalInformerFactory.Workload().V1alpha1().ServiceProfileDescriptors().Informer().HasSynced + }, + ) +} + func OnAdd(obj interface{}) { pod, ok := obj.(*v1.Pod) if !ok { diff --git a/pkg/scheduler/plugins/loadaware/helper.go b/pkg/scheduler/plugins/loadaware/helper.go new file mode 100644 index 000000000..7f4c00d4b --- /dev/null +++ b/pkg/scheduler/plugins/loadaware/helper.go @@ -0,0 +1,110 @@ +package loadaware + +import ( + "fmt" + "math" + "strings" + "time" + + v1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" + + "github.com/kubewharf/katalyst-core/pkg/util/native" + "k8s.io/kubernetes/pkg/scheduler/framework" +) + +type Item struct { + Value int64 + Timestamp time.Time +} + +type Items []Item + +func (it Items) Len() int { + return len(it) +} + +func (it Items) Swap(i, j int) { + it[i], it[j] = it[j], it[i] +} + +func (it Items) Less(i, j int) bool { + + location, err := time.LoadLocation("Asia/Shanghai") + if err != nil { + location = time.Local + } + // sort sample timestamp hour + houri := it[i].Timestamp.In(location).Hour() + hourj := it[i].Timestamp.In(location).Hour() + + return houri < hourj +} + +func podToWorkloadByOwner(pod *v1.Pod) (string, string, bool) { + for _, owner := range pod.OwnerReferences { + kind := owner.Kind + switch kind { + // resource portrait time series predicted and stored by deployment, but pod owned by rs + case "ReplicaSet": + names := strings.Split(owner.Name, "-") + if len(names) <= 1 { + klog.Warningf("unexpected rs name: %v", owner.Name) + return "", "", false + } + names = names[0 : len(names)-1] + return strings.Join(names, "-"), "Deployment", true + default: + return owner.Name, kind, true + } + } + + return "", "", false +} + +func cpuTimeSeriesByRequest(podResource v1.ResourceList, scaleFactor float64) []float64 { + timeSeries := make([]float64, portraitItemsLength, portraitItemsLength) + + if podResource.Cpu() != nil && !podResource.Cpu().IsZero() { + cpuUsage := native.MultiplyResourceQuantity(v1.ResourceCPU, *podResource.Cpu(), scaleFactor) + for i := range timeSeries { + timeSeries[i] = float64(cpuUsage.MilliValue()) + } + } + return timeSeries +} + +func memoryTimeSeriesByRequest(podResource v1.ResourceList, scaleFactor float64) []float64 { + timeSeries := make([]float64, portraitItemsLength, portraitItemsLength) + + if podResource.Memory() != nil && !podResource.Memory().IsZero() { + memoryUsage := native.MultiplyResourceQuantity(v1.ResourceMemory, *podResource.Memory(), scaleFactor) + for i := range timeSeries { + timeSeries[i] = float64(memoryUsage.Value()) + } + } + return timeSeries +} + +func targetLoadPacking(targetRatio, usageRatio float64) (int64, error) { + var score int64 + if targetRatio <= 0 || targetRatio >= 100 { + return 0, fmt.Errorf("target %v is not supported", targetRatio) + } + if usageRatio < 0 { + klog.Warningf("usageRatio %v less than zero", usageRatio) + usageRatio = 0 + } + if usageRatio > 100 { + klog.Warningf("usageRatio %v greater than 100", usageRatio) + return framework.MinNodeScore, nil + } + + if usageRatio <= targetRatio { + score = int64(math.Round((100-targetRatio)*usageRatio/targetRatio + targetRatio)) + } else { + score = int64(math.Round(targetRatio * (100 - usageRatio) / (100 - targetRatio))) + } + + return score, nil +} diff --git a/pkg/scheduler/plugins/loadaware/plugin.go b/pkg/scheduler/plugins/loadaware/plugin.go index 51090053f..c1da0cd54 100644 --- a/pkg/scheduler/plugins/loadaware/plugin.go +++ b/pkg/scheduler/plugins/loadaware/plugin.go @@ -1,27 +1,40 @@ package loadaware import ( + "context" "fmt" + "sort" + "time" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/wait" + toolscache "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/scheduler/framework" - "time" "github.com/kubewharf/katalyst-api/pkg/apis/scheduling/config" "github.com/kubewharf/katalyst-api/pkg/apis/scheduling/config/validation" + "github.com/kubewharf/katalyst-api/pkg/apis/workload/v1alpha1" listers "github.com/kubewharf/katalyst-api/pkg/client/listers/node/v1alpha1" + workloadlisters "github.com/kubewharf/katalyst-api/pkg/client/listers/workload/v1alpha1" "github.com/kubewharf/katalyst-api/pkg/consts" + "github.com/kubewharf/katalyst-core/pkg/util/native" ) const ( Name = "LoadAware" loadAwareMetricScope = "loadAware" - DefaultNPDReportInterval = 60 * time.Second - DefaultMilliCPURequest int64 = 250 // 0.25 core - DefaultMemoryRequest int64 = 200 * 1024 * 1024 // 200 MB + DefaultMilliCPURequest int64 = 250 // 0.25 core + DefaultMemoryRequest int64 = 200 * 1024 * 1024 // 200 MB + + portraitItemsLength = 24 + spdPortraitLoadAwareMetricName = "overcommit-predict" + spdPortraitScope = "ResourcePortraitIndicatorPlugin" + + cpuUsageMetric = "cpu_utilization_usage_seconds_max" + memoryUsageMetric = "memory_utilization_max" ) var ( @@ -31,9 +44,11 @@ var ( ) type Plugin struct { - handle framework.Handle - args *config.LoadAwareArgs - npdLister listers.NodeProfileDescriptorLister + handle framework.Handle + args *config.LoadAwareArgs + npdLister listers.NodeProfileDescriptorLister + spdLister workloadlisters.ServiceProfileDescriptorLister + spdHasSynced toolscache.InformerSynced } func NewPlugin(args runtime.Object, handle framework.Handle) (framework.Plugin, error) { @@ -51,9 +66,24 @@ func NewPlugin(args runtime.Object, handle framework.Handle) (framework.Plugin, handle: handle, args: pluginArgs, } - p.registerNodeMonitorHandler() + p.registerNPDHandler() + p.registerSPDHandler() RegisterPodHandler() + if p.enablePortrait() { + cache.SetSPDLister(p) + } + + go func() { + wait.Until(func() { + if p.spdHasSynced == nil || !p.spdHasSynced() { + klog.Warningf("portrait has not synced, skip") + return + } + cache.ReconcilePredictUsage() + }, time.Hour, context.TODO().Done()) + }() + return p, nil } @@ -61,6 +91,170 @@ func (p *Plugin) Name() string { return Name } +func (p *Plugin) GetPodPortrait(pod *v1.Pod) *ResourceUsage { + startTime := time.Now() + defer func() { + if klog.V(6).Enabled() { + duration := time.Now().Sub(startTime) + klog.Infof("GetPodPortrait duration: %v", duration.String()) + } + }() + ownerName, _, ok := podToWorkloadByOwner(pod) + if !ok { + return p.portraitByRequest(pod) + } + + // get pod workload spd + spd, err := p.getSPD(ownerName, pod.GetNamespace()) + if err != nil { + return p.portraitByRequest(pod) + } + + // get portrait metrics from spd + podResourceUsage, err := p.getPortraitTimeSeries(spd) + if err != nil { + klog.Errorf("getPortraitTimeSeries fail, namespace: %v, workload: %v, err: %v", pod.GetNamespace(), ownerName, err) + return p.portraitByRequest(pod) + } + + // validate metrics + if len(podResourceUsage.Cpu) != portraitItemsLength { + resourceList := native.SumUpPodRequestResources(pod) + cpuScaleFactor := p.args.ResourceToScalingFactorMap[v1.ResourceCPU] + if cpuScaleFactor == 0 { + cpuScaleFactor = 100 + } + podResourceUsage.Cpu = cpuTimeSeriesByRequest(resourceList, float64(cpuScaleFactor)/100.0) + } + if len(podResourceUsage.Memory) != portraitItemsLength { + resourceList := native.SumUpPodRequestResources(pod) + memScaleFactor := p.args.ResourceToScalingFactorMap[v1.ResourceMemory] + if memScaleFactor == 0 { + memScaleFactor = 100 + } + podResourceUsage.Memory = memoryTimeSeriesByRequest(resourceList, float64(memScaleFactor)/100.0) + } + + return podResourceUsage +} + +func (p *Plugin) getSPD(workloadName, namespace string) (*v1alpha1.ServiceProfileDescriptor, error) { + spd, err := p.spdLister.ServiceProfileDescriptors(namespace).Get(workloadName) + if err != nil { + klog.V(5).Infof("get SPD fail, workloadName: %v, namespace: %v, err: %v", workloadName, namespace, err) + return nil, err + } + if spd == nil { + err = fmt.Errorf("get nil SPD, workloadName: %v, namespace: %v", workloadName, namespace) + klog.V(5).Infof(err.Error()) + return nil, err + } + + return spd, nil +} + +func (p *Plugin) getPortraitTimeSeries(spd *v1alpha1.ServiceProfileDescriptor) (*ResourceUsage, error) { + if spd == nil { + return nil, fmt.Errorf("spd is nil") + } + + var ( + cpuUsages = make([]Item, 0) + memoryUsages = make([]Item, 0) + res = &ResourceUsage{ + Cpu: make([]float64, 0), + Memory: make([]float64, 0), + } + ) + + for i := range spd.Status.AggMetrics { + if spd.Status.AggMetrics[i].Scope != spdPortraitScope { + continue + } + + // podMetric contains metrics from multiple sources at a certain timestamp + for j := range spd.Status.AggMetrics[i].Items { + t := spd.Status.AggMetrics[i].Items[j].Timestamp.Time + for _, metrics := range spd.Status.AggMetrics[i].Items[j].Containers { + if metrics.Name == spdPortraitLoadAwareMetricName { + cpuUsage, ok := metrics.Usage[cpuUsageMetric] + if ok { + cpuUsages = append(cpuUsages, Item{ + Value: cpuUsage.MilliValue(), + Timestamp: t, + }) + } + + memoryUsage, ok := metrics.Usage[memoryUsageMetric] + if ok { + memoryUsages = append(memoryUsages, Item{ + Value: memoryUsage.Value(), + Timestamp: t, + }) + } + } + } + } + } + + if len(cpuUsages) != portraitItemsLength { + klog.Errorf("portrait %v metric more than %v: %v", cpuUsageMetric, portraitItemsLength, len(cpuUsages)) + cpuUsages = make([]Item, 0) + } + if len(memoryUsages) != portraitItemsLength { + klog.Errorf("portrait %v metric more than %v: %v", memoryUsageMetric, portraitItemsLength, len(memoryUsages)) + memoryUsages = make([]Item, 0) + } + + sort.Sort(Items(cpuUsages)) + sort.Sort(Items(memoryUsages)) + for i := range cpuUsages { + res.Cpu = append(res.Cpu, float64(cpuUsages[i].Value)) + } + for i := range memoryUsages { + res.Memory = append(res.Memory, float64(memoryUsages[i].Value)) + } + + return res, nil +} + +func (p *Plugin) portraitByRequest(pod *v1.Pod) *ResourceUsage { + res := &ResourceUsage{} + + resourceList := native.SumUpPodRequestResources(pod) + + cpuScaleFactor := p.args.ResourceToScalingFactorMap[v1.ResourceCPU] + if cpuScaleFactor == 0 { + cpuScaleFactor = 100 + } + memScaleFactor := p.args.ResourceToScalingFactorMap[v1.ResourceMemory] + if memScaleFactor == 0 { + memScaleFactor = 100 + } + cpuSeries := cpuTimeSeriesByRequest(resourceList, float64(cpuScaleFactor)/100.0) + memSeries := memoryTimeSeriesByRequest(resourceList, float64(memScaleFactor)/100.0) + + res.Cpu = cpuSeries + res.Memory = memSeries + return res +} + +func (p *Plugin) getNodePredictUsage(pod *v1.Pod, nodeName string) (*ResourceUsage, error) { + nodePredictUsage := cache.GetNodePredictUsage(nodeName) + klog.V(6).Infof("node %v predict usage cpu: %v, memory: %v", nodeName, nodePredictUsage.Cpu, nodePredictUsage.Memory) + + podPredictUsage := p.GetPodPortrait(pod) + klog.V(6).Infof("pod %v predict usage cpu: %v, memory: %v", pod.Name, podPredictUsage.Cpu, podPredictUsage.Memory) + + err := nodePredictUsage.add(podPredictUsage) + if err != nil { + err = fmt.Errorf("sum node %s predict usage fail: %v", nodeName, err) + return nil, err + } + + return nodePredictUsage, nil +} + func (p *Plugin) IsLoadAwareEnabled(pod *v1.Pod) bool { if p.args.PodAnnotationLoadAwareEnable == nil || *p.args.PodAnnotationLoadAwareEnable == "" { return true @@ -71,3 +265,10 @@ func (p *Plugin) IsLoadAwareEnabled(pod *v1.Pod) bool { } return false } + +func (p *Plugin) enablePortrait() bool { + if p.args.EnablePortrait == nil { + return false + } + return *p.args.EnablePortrait +} diff --git a/pkg/scheduler/plugins/loadaware/score.go b/pkg/scheduler/plugins/loadaware/score.go index 4740c7476..dd189dc6e 100644 --- a/pkg/scheduler/plugins/loadaware/score.go +++ b/pkg/scheduler/plugins/loadaware/score.go @@ -3,13 +3,13 @@ package loadaware import ( "context" "fmt" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog/v2" "math" "time" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" quotav1 "k8s.io/apiserver/pkg/quota/v1" resourceapi "k8s.io/kubernetes/pkg/api/v1/resource" "k8s.io/kubernetes/pkg/scheduler/framework" @@ -35,10 +35,19 @@ func (p *Plugin) Score(_ context.Context, _ *framework.CycleState, pod *v1.Pod, return 0, nil } + if p.enablePortrait() { + return p.scoreByPortrait(pod, nodeName) + } + + return p.scoreByNPD(pod, nodeName) +} + +func (p *Plugin) scoreByNPD(pod *v1.Pod, nodeName string) (int64, *framework.Status) { nodeInfo, err := p.handle.SnapshotSharedLister().NodeInfos().Get(nodeName) if err != nil { return 0, framework.NewStatus(framework.Unschedulable, fmt.Sprintf("get node %v from Snapshot: %v", nodeName, err)) } + node := nodeInfo.Node() if node == nil { return 0, framework.NewStatus(framework.Unschedulable, "node not found") @@ -75,18 +84,86 @@ func (p *Plugin) Score(_ context.Context, _ *framework.CycleState, pod *v1.Pod, return score, nil } +func (p *Plugin) scoreByPortrait(pod *v1.Pod, nodeName string) (int64, *framework.Status) { + if pod == nil { + return framework.MinNodeScore, nil + } + nodeInfo, err := p.handle.SnapshotSharedLister().NodeInfos().Get(nodeName) + if err != nil { + return 0, framework.NewStatus(framework.Unschedulable, fmt.Sprintf("get node %v from Snapshot: %v", nodeName, err)) + } + + nodePredictUsage, err := p.getNodePredictUsage(pod, nodeName) + if err != nil { + klog.Error(err) + return framework.MinNodeScore, nil + } + + var ( + scoreSum, weightSum int64 + ) + + for _, resourceName := range []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory} { + targetUsage, ok := p.args.ResourceToTargetMap[resourceName] + if !ok { + continue + } + weight, ok := p.args.ResourceToWeightMap[resourceName] + if !ok { + continue + } + + total := nodeInfo.Node().Status.Allocatable[resourceName] + if total.IsZero() { + continue + } + var totalValue int64 + if resourceName == v1.ResourceCPU { + totalValue = total.MilliValue() + } else { + totalValue = total.Value() + } + + maxUsage := nodePredictUsage.max(resourceName) + usageRatio := maxUsage / float64(totalValue) * 100 + + score, err := targetLoadPacking(float64(targetUsage), usageRatio) + if err != nil { + klog.Errorf("pod %v node %v targetLoadPacking fail: %v", pod.Name, nodeName, err) + return framework.MinNodeScore, nil + } + + klog.V(6).Infof("loadAware score pod %v, node %v, resource %v, target: %v, maxUsage: %v, total: %v, usageRatio: %v, score: %v", + pod.Name, nodeInfo.Node().Name, resourceName, targetUsage, maxUsage, totalValue, usageRatio, score) + scoreSum += score + weightSum += weight + } + + if weightSum <= 0 { + err = fmt.Errorf("resource weight is zero, resourceWightMap: %v", p.args.ResourceToWeightMap) + klog.Error(err) + return framework.MinNodeScore, nil + } + score := scoreSum / weightSum + klog.V(6).Infof("loadAware score pod %v, node %v, finalScore: %v", + pod.Name, nodeInfo.Node().Name, score) + return score, nil +} + func (p *Plugin) estimatedAssignedPodUsage(nodeName string, updateTime time.Time) v1.ResourceList { - cache.RLock() - defer cache.RUnlock() var ( estimatedUsed = make(map[v1.ResourceName]int64) result = v1.ResourceList{} ) + cache.RLock() nodeCache, ok := cache.NodePodInfo[nodeName] + cache.RUnlock() if !ok { return result } + nodeCache.RLock() + defer nodeCache.RUnlock() for _, podInfo := range nodeCache.PodInfoMap { if isNeedToEstimatedUsage(podInfo, updateTime) { estimated := estimatedPodUsed(podInfo.pod, p.args.ResourceToWeightMap, p.args.ResourceToScalingFactorMap) From 7bdce6a28986ac9b2ad9b909188884fbd0be4810 Mon Sep 17 00:00:00 2001 From: "wangzhe.21" Date: Tue, 2 Jul 2024 20:12:32 +0800 Subject: [PATCH 6/6] add ut for loadaware scheduler plugin and npd plugin --- cmd/katalyst-controller/app/options/npd.go | 3 +- cmd/katalyst-scheduler/main.go | 2 +- go.mod | 2 +- go.sum | 4 +- .../npd/indicator-plugin/loadaware/handler.go | 24 +- .../loadaware/handler_test.go | 143 ++++ .../npd/indicator-plugin/loadaware/helper.go | 48 +- .../indicator-plugin/loadaware/helper_test.go | 68 ++ .../indicator-plugin/loadaware/loadaware.go | 70 +- .../loadaware/loadaware_test.go | 553 +++++++++++++++ .../loadaware/sorter/helper.go | 16 + .../indicator-plugin/loadaware/sorter/pod.go | 16 + .../loadaware/sorter/pod_test.go | 16 + .../loadaware/sorter/scorer.go | 16 + .../npd/indicator-plugin/loadaware/types.go | 58 +- pkg/scheduler/plugins/loadaware/cache.go | 27 +- pkg/scheduler/plugins/loadaware/cache_test.go | 19 +- pkg/scheduler/plugins/loadaware/fit.go | 16 + pkg/scheduler/plugins/loadaware/fit_test.go | 443 +++++++++++- pkg/scheduler/plugins/loadaware/handler.go | 40 +- pkg/scheduler/plugins/loadaware/helper.go | 21 +- pkg/scheduler/plugins/loadaware/plugin.go | 28 +- .../plugins/loadaware/plugin_test.go | 91 +++ pkg/scheduler/plugins/loadaware/reserve.go | 20 +- .../plugins/loadaware/reserve_test.go | 94 +++ pkg/scheduler/plugins/loadaware/score.go | 40 +- pkg/scheduler/plugins/loadaware/score_test.go | 640 ++++++++++++++++++ 27 files changed, 2374 insertions(+), 144 deletions(-) create mode 100644 pkg/controller/npd/indicator-plugin/loadaware/handler_test.go create mode 100644 pkg/controller/npd/indicator-plugin/loadaware/helper_test.go create mode 100644 pkg/controller/npd/indicator-plugin/loadaware/loadaware_test.go create mode 100644 pkg/scheduler/plugins/loadaware/plugin_test.go create mode 100644 pkg/scheduler/plugins/loadaware/reserve_test.go create mode 100644 pkg/scheduler/plugins/loadaware/score_test.go diff --git a/cmd/katalyst-controller/app/options/npd.go b/cmd/katalyst-controller/app/options/npd.go index 22dfa2244..e63e20cc8 100644 --- a/cmd/katalyst-controller/app/options/npd.go +++ b/cmd/katalyst-controller/app/options/npd.go @@ -17,9 +17,10 @@ limitations under the License. package options import ( - cliflag "k8s.io/component-base/cli/flag" "time" + cliflag "k8s.io/component-base/cli/flag" + "github.com/kubewharf/katalyst-core/pkg/config/controller" ) diff --git a/cmd/katalyst-scheduler/main.go b/cmd/katalyst-scheduler/main.go index b7aed5294..3cb9a7225 100644 --- a/cmd/katalyst-scheduler/main.go +++ b/cmd/katalyst-scheduler/main.go @@ -17,13 +17,13 @@ limitations under the License. package main import ( - "github.com/kubewharf/katalyst-core/pkg/scheduler/plugins/loadaware" "os" "github.com/spf13/cobra" "k8s.io/component-base/logs" "github.com/kubewharf/katalyst-core/cmd/katalyst-scheduler/app" + "github.com/kubewharf/katalyst-core/pkg/scheduler/plugins/loadaware" "github.com/kubewharf/katalyst-core/pkg/scheduler/plugins/nodeovercommitment" "github.com/kubewharf/katalyst-core/pkg/scheduler/plugins/noderesourcetopology" "github.com/kubewharf/katalyst-core/pkg/scheduler/plugins/qosawarenoderesources" diff --git a/go.mod b/go.mod index 4a7c34c23..bda26a81e 100644 --- a/go.mod +++ b/go.mod @@ -161,7 +161,7 @@ require ( ) replace ( - github.com/kubewharf/katalyst-api => github.com/WangZzzhe/katalyst-api v0.0.0-20240626083651-4a90fe53af11 + github.com/kubewharf/katalyst-api => github.com/WangZzzhe/katalyst-api v0.0.0-20240719035252-ac200da4db6c k8s.io/api => k8s.io/api v0.24.6 k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.24.6 k8s.io/apimachinery => k8s.io/apimachinery v0.24.6 diff --git a/go.sum b/go.sum index 717e5ff1a..564916d68 100644 --- a/go.sum +++ b/go.sum @@ -84,8 +84,8 @@ github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWX github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= -github.com/WangZzzhe/katalyst-api v0.0.0-20240626083651-4a90fe53af11 h1:4RUG7QfX0hBwtHtI3Nll6F4lCP31ThYxkWIu93G6Ei4= -github.com/WangZzzhe/katalyst-api v0.0.0-20240626083651-4a90fe53af11/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k= +github.com/WangZzzhe/katalyst-api v0.0.0-20240719035252-ac200da4db6c h1:/0fwVknrQEJoRKnT2H0f5xkzCdcDIH4qfNvpPn7QoH8= +github.com/WangZzzhe/katalyst-api v0.0.0-20240719035252-ac200da4db6c/go.mod h1:HHUJnOrDN5xrzKhEspq70ZJL859b09j07pMAl9ACnwU= github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= diff --git a/pkg/controller/npd/indicator-plugin/loadaware/handler.go b/pkg/controller/npd/indicator-plugin/loadaware/handler.go index b9884af7b..36e48181c 100644 --- a/pkg/controller/npd/indicator-plugin/loadaware/handler.go +++ b/pkg/controller/npd/indicator-plugin/loadaware/handler.go @@ -1,3 +1,19 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package loadaware import ( @@ -72,7 +88,9 @@ func (p *Plugin) OnPodAdd(obj interface{}) { p.Lock() defer p.Unlock() if p.podUsageSelectorKey != "" { - if value, exist := pod.Labels[p.podUsageSelectorKey]; exist && value == p.podUsageSelectorVal { + if value, exist := pod.Labels[p.podUsageSelectorKey]; exist && + value == p.podUsageSelectorVal && + p.podUsageSelectorNamespace == pod.Namespace { klog.Info("start sync pod usage to nodeMonitor") p.enableSyncPodUsage = true } @@ -143,7 +161,9 @@ func (p *Plugin) OnPodDelete(obj interface{}) { p.Lock() defer p.Unlock() if p.podUsageSelectorVal != "" { - if value, exist := pod.Labels[p.podUsageSelectorKey]; exist && value == p.podUsageSelectorVal { + if value, exist := pod.Labels[p.podUsageSelectorKey]; exist && + value == p.podUsageSelectorVal && + p.podUsageSelectorNamespace == pod.Namespace { klog.Info("stop sync pod usage to nodeMonitor") p.enableSyncPodUsage = false } diff --git a/pkg/controller/npd/indicator-plugin/loadaware/handler_test.go b/pkg/controller/npd/indicator-plugin/loadaware/handler_test.go new file mode 100644 index 000000000..dba947711 --- /dev/null +++ b/pkg/controller/npd/indicator-plugin/loadaware/handler_test.go @@ -0,0 +1,143 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package loadaware + +import ( + "testing" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" +) + +func TestOnNodeAdd(t *testing.T) { + t.Parallel() + + p := &Plugin{ + workers: 3, + nodePoolMap: map[int32]sets.String{}, + nodeStatDataMap: map[string]*NodeMetricData{}, + } + + testNode1 := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testNode1", + }, + Status: v1.NodeStatus{ + Allocatable: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("16"), + v1.ResourceMemory: resource.MustParse("32Gi"), + }, + }, + } + + p.OnNodeAdd(testNode1) + assert.NotNil(t, p.nodeStatDataMap["testNode1"]) + assert.Equal(t, 2, len(p.nodeStatDataMap["testNode1"].TotalRes)) + + p.OnNodeDelete(testNode1) + assert.Nil(t, p.nodeStatDataMap["testNode1"]) +} + +func TestOnNodeUpdate(t *testing.T) { + t.Parallel() + + p := &Plugin{ + nodeStatDataMap: map[string]*NodeMetricData{}, + } + + testNode1 := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testNode1", + }, + Status: v1.NodeStatus{ + Allocatable: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("16"), + v1.ResourceMemory: resource.MustParse("32Gi"), + }, + }, + } + + p.OnNodeUpdate(nil, testNode1) + assert.NotNil(t, p.nodeStatDataMap["testNode1"]) + assert.Equal(t, 2, len(p.nodeStatDataMap["testNode1"].TotalRes)) +} + +func TestOnPodAdd(t *testing.T) { + t.Parallel() + + p := &Plugin{ + nodeToPodsMap: map[string]map[string]struct{}{}, + podUsageSelectorKey: "app", + podUsageSelectorVal: "testPod", + podUsageSelectorNamespace: "katalyst-system", + } + + testPod1 := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testPod1", + Namespace: "katalyst-system", + Labels: map[string]string{ + "app": "testPod", + }, + }, + Spec: v1.PodSpec{ + NodeName: "testNode1", + }, + } + + p.OnPodAdd(testPod1) + assert.NotNil(t, p.nodeToPodsMap["testNode1"]) + assert.Equal(t, 1, len(p.nodeToPodsMap["testNode1"])) + + p.OnPodDelete(testPod1) + assert.Equal(t, 0, len(p.nodeToPodsMap["testNode1"])) + + p.OnPodDelete("") +} + +func TestOnPodUpdate(t *testing.T) { + t.Parallel() + + p := &Plugin{ + nodeToPodsMap: map[string]map[string]struct{}{}, + podUsageSelectorKey: "app", + podUsageSelectorVal: "testPod", + podUsageSelectorNamespace: "katalyst-system", + } + + testPod1 := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testPod1", + Namespace: "katalyst-system", + Labels: map[string]string{ + "app": "testPod", + }, + }, + Spec: v1.PodSpec{ + NodeName: "testNode1", + }, + } + + p.OnPodUpdate(nil, testPod1) + assert.NotNil(t, p.nodeToPodsMap["testNode1"]) + assert.Equal(t, 1, len(p.nodeToPodsMap["testNode1"])) + + p.OnPodUpdate(nil, "") +} diff --git a/pkg/controller/npd/indicator-plugin/loadaware/helper.go b/pkg/controller/npd/indicator-plugin/loadaware/helper.go index bdc2c0be4..003d89a81 100644 --- a/pkg/controller/npd/indicator-plugin/loadaware/helper.go +++ b/pkg/controller/npd/indicator-plugin/loadaware/helper.go @@ -1,13 +1,31 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package loadaware import ( - "github.com/kubewharf/katalyst-core/pkg/controller/npd/indicator-plugin/loadaware/sorter" + "time" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" quotav1 "k8s.io/apiserver/pkg/quota/v1" "k8s.io/metrics/pkg/apis/metrics/v1beta1" - "time" + + "github.com/kubewharf/katalyst-core/pkg/controller/npd/indicator-plugin/loadaware/sorter" ) // getUsage transfer cpu Nano to Milli, memory Ki to Mega @@ -92,7 +110,7 @@ func refreshNodeMetricData(metricData *NodeMetricData, metricInfo *v1beta1.NodeM max1Hour := calCPUAndMemoryMax(metricData.Latest1HourCache) metricData.Max1Hour = max1Hour.DeepCopy() - //calculate 1 day max data + // calculate 1 day max data if metricData.ifCanInsertLatest1DayCache(now) { resWithTime := &ResourceListWithTime{ ResourceList: max1Hour.DeepCopy(), @@ -115,7 +133,7 @@ func refreshPodMetricData(metricData *PodMetricData, metricInfo *v1beta1.PodMetr podUsage = quotav1.Add(podUsage, containerMetrics.Usage) } metricData.LatestUsage = podUsage.DeepCopy() - //calculate 5 min avg data + // calculate 5 min avg data metricData.Latest5MinCache = append(metricData.Latest5MinCache, getUsage(podUsage)) if len(metricData.Latest5MinCache) > Avg5MinPointNumber { metricData.Latest5MinCache = metricData.Latest5MinCache[len(metricData.Latest5MinCache)-Avg5MinPointNumber:] @@ -153,25 +171,3 @@ func getTopNPodUsages(podUsages map[string]corev1.ResourceList, maxPodUsageCount } return topNPodUsages } - -func calNodeLoad(resourceName corev1.ResourceName, usage, totalRes corev1.ResourceList) int64 { - if usage == nil || totalRes == nil { - return 0 - } - used := int64(0) - total := int64(0) - if resourceName == corev1.ResourceCPU { - used = usage.Cpu().MilliValue() - total = totalRes.Cpu().MilliValue() - } else { - used = usage.Memory().Value() - total = totalRes.Memory().Value() - } - if total == 0 { - return 0 - } - if used >= total { - return 99 - } - return used * 100 / total -} diff --git a/pkg/controller/npd/indicator-plugin/loadaware/helper_test.go b/pkg/controller/npd/indicator-plugin/loadaware/helper_test.go new file mode 100644 index 000000000..9459a1fc9 --- /dev/null +++ b/pkg/controller/npd/indicator-plugin/loadaware/helper_test.go @@ -0,0 +1,68 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package loadaware + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +func TestGetTopNPodUsages(t *testing.T) { + t.Parallel() + podRealUsage := map[string]corev1.ResourceList{ + "default/test-1": { + corev1.ResourceCPU: resource.MustParse("80"), + corev1.ResourceMemory: resource.MustParse("10Gi"), + }, + "default/test-2": { + corev1.ResourceCPU: resource.MustParse("30"), + corev1.ResourceMemory: resource.MustParse("10Gi"), + }, + "default/test-3": { + corev1.ResourceCPU: resource.MustParse("50"), + corev1.ResourceMemory: resource.MustParse("10Gi"), + }, + "default/test-4": { + corev1.ResourceCPU: resource.MustParse("70"), + corev1.ResourceMemory: resource.MustParse("10Gi"), + }, + "default/test-5": { + corev1.ResourceCPU: resource.MustParse("10"), + corev1.ResourceMemory: resource.MustParse("10Gi"), + }, + "default/test-6": { + corev1.ResourceCPU: resource.MustParse("40"), + corev1.ResourceMemory: resource.MustParse("10Gi"), + }, + "default/test-7": { + corev1.ResourceCPU: resource.MustParse("60"), + corev1.ResourceMemory: resource.MustParse("10Gi"), + }, + } + resultMap := getTopNPodUsages(podRealUsage, 3) + expected := []string{"default/test-1", "default/test-4", "default/test-7"} + assert.Equal(t, len(resultMap), 3) + for _, v := range expected { + if _, ok := resultMap[v]; !ok { + t.Error(fmt.Errorf("not exit")) + } + } +} diff --git a/pkg/controller/npd/indicator-plugin/loadaware/loadaware.go b/pkg/controller/npd/indicator-plugin/loadaware/loadaware.go index 33b544499..c76df08b6 100644 --- a/pkg/controller/npd/indicator-plugin/loadaware/loadaware.go +++ b/pkg/controller/npd/indicator-plugin/loadaware/loadaware.go @@ -1,9 +1,26 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package loadaware import ( "context" "fmt" "hash/crc32" + "sort" "sync" "time" @@ -64,7 +81,8 @@ type Plugin struct { } func NewLoadAwarePlugin(ctx context.Context, conf *controller.NPDConfig, extraConf interface{}, - controlCtx *katalystbase.GenericContext, updater indicator_plugin.IndicatorUpdater) (indicator_plugin.IndicatorPlugin, error) { + controlCtx *katalystbase.GenericContext, updater indicator_plugin.IndicatorUpdater, +) (indicator_plugin.IndicatorPlugin, error) { p := &Plugin{ ctx: ctx, workers: int32(conf.Workers), @@ -150,7 +168,7 @@ func (p *Plugin) Run() { if err != nil { klog.Fatalf("get all nodes from cache error, err:%v", err) } - //init worker node pool + // init worker node pool for _, node := range nodes { bucketID := p.getBucketID(node.Name) if pool, ok := p.nodePoolMap[bucketID]; !ok { @@ -162,7 +180,7 @@ func (p *Plugin) Run() { p.constructNodeToPodMap() - //restore npd from api server + // restore npd from api server p.restoreNPD() // start sync node @@ -230,11 +248,11 @@ func (p *Plugin) restoreNPD() { } var ( - avg15MinCache = make([]corev1.ResourceList, 0) + avg15MinCache = make([]*ResourceListWithTime, 0) max1HourCache = make([]*ResourceListWithTime, 0) max1DayCache = make([]*ResourceListWithTime, 0) - avg15MinMap = make(map[metav1.Time]corev1.ResourceList) + avg15MinMap = make(map[metav1.Time]*ResourceListWithTime) max1HourMap = make(map[metav1.Time]*ResourceListWithTime) max1DayMap = make(map[metav1.Time]*ResourceListWithTime) ) @@ -242,9 +260,12 @@ func (p *Plugin) restoreNPD() { for _, metricValue := range npd.Status.NodeMetrics[i].Metrics { if metricValue.Window.Duration == 15*time.Minute { if _, ok := avg15MinMap[metricValue.Timestamp]; !ok { - avg15MinMap[metricValue.Timestamp] = corev1.ResourceList{} + avg15MinMap[metricValue.Timestamp] = &ResourceListWithTime{ + Ts: metricValue.Timestamp.Unix(), + ResourceList: corev1.ResourceList{}, + } + avg15MinMap[metricValue.Timestamp].ResourceList[corev1.ResourceName(metricValue.MetricName)] = metricValue.Value } - avg15MinMap[metricValue.Timestamp][corev1.ResourceName(metricValue.MetricName)] = metricValue.Value } else if metricValue.Window.Duration == time.Hour { if _, ok := max1HourMap[metricValue.Timestamp]; !ok { max1HourMap[metricValue.Timestamp] = &ResourceListWithTime{ @@ -269,15 +290,18 @@ func (p *Plugin) restoreNPD() { for i := range avg15MinMap { avg15MinCache = append(avg15MinCache, avg15MinMap[i]) } + sort.Sort(ResourceListWithTimeList(avg15MinCache)) for i := range max1HourMap { max1HourCache = append(max1HourCache, max1HourMap[i]) } + sort.Sort(ResourceListWithTimeList(max1HourCache)) for i := range max1DayMap { max1DayCache = append(max1DayCache, max1DayMap[i]) } + sort.Sort(ResourceListWithTimeList(max1DayCache)) p.nodeStatDataMap[npd.Name] = &NodeMetricData{ - Latest15MinCache: avg15MinCache, + Latest15MinCache: ResourceListWithTimeList(avg15MinCache).ToResourceList(), Latest1HourCache: max1HourCache, Latest1DayCache: max1DayCache, } @@ -628,33 +652,3 @@ func (p *Plugin) checkPodUsageRequired() { } } } - -func (p *Plugin) reportNodeLoadMetric() { - p.RLock() - defer p.RUnlock() - resourceDims := []corev1.ResourceName{corev1.ResourceCPU, corev1.ResourceMemory} - for _, resourceName := range resourceDims { - resultMap := make(map[int64]*int64) - for _, data := range p.nodeStatDataMap { - data.lock.RLock() - load := calNodeLoad(resourceName, data.LatestUsage, data.TotalRes) - data.lock.RUnlock() - idx := load / 10 - if count, ok := resultMap[idx]; !ok { - i := int64(1) - resultMap[idx] = &i - } else { - *count++ - } - } - for idx, level := range levels { - typeTag := metrics.MetricTag{Key: metricTagType, Val: string(resourceName)} - levelTag := metrics.MetricTag{Key: metricTagLevel, Val: level} - if count, ok := resultMap[int64(idx)]; ok { - _ = p.emitter.StoreFloat64(loadAwareMetricName, float64(*count), metrics.MetricTypeNameRaw, typeTag, levelTag) - } else { - _ = p.emitter.StoreFloat64(loadAwareMetricName, 0, metrics.MetricTypeNameRaw, typeTag, levelTag) - } - } - } -} diff --git a/pkg/controller/npd/indicator-plugin/loadaware/loadaware_test.go b/pkg/controller/npd/indicator-plugin/loadaware/loadaware_test.go new file mode 100644 index 000000000..8e98cabc1 --- /dev/null +++ b/pkg/controller/npd/indicator-plugin/loadaware/loadaware_test.go @@ -0,0 +1,553 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package loadaware + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + v12 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/rand" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/metrics/pkg/apis/metrics/v1beta1" + + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" + katalyst_base "github.com/kubewharf/katalyst-core/cmd/base" + "github.com/kubewharf/katalyst-core/pkg/config/controller" +) + +func TestNewLoadAwarePlugin(t *testing.T) { + t.Parallel() + + controlCtx, err := katalyst_base.GenerateFakeGenericContext() + assert.NoError(t, err) + + conf := &controller.NPDConfig{ + LoadAwarePluginConfig: &controller.LoadAwarePluginConfig{ + Workers: 3, + PodUsageSelectorNamespace: "katalyst-system", + PodUsageSelectorKey: "app", + PodUsageSelectorVal: "testPod", + MaxPodUsageCount: 10, + }, + } + + updater := &fakeIndicatorUpdater{} + + p, err := NewLoadAwarePlugin(context.TODO(), conf, nil, controlCtx, updater) + assert.NoError(t, err) + assert.NotNil(t, p) +} + +func TestRestoreNPD(t *testing.T) { + t.Parallel() + + controlCtx, err := katalyst_base.GenerateFakeGenericContext() + assert.NoError(t, err) + + p := &Plugin{ + npdLister: controlCtx.InternalInformerFactory.Node().V1alpha1().NodeProfileDescriptors().Lister(), + } + + npds := []*v1alpha1.NodeProfileDescriptor{ + { + ObjectMeta: v1.ObjectMeta{ + Name: "testNode1", + }, + Status: v1alpha1.NodeProfileDescriptorStatus{ + NodeMetrics: []v1alpha1.ScopedNodeMetrics{ + { + Scope: "testScope", + }, + { + Scope: loadAwareMetricMetadataScope, + Metrics: makeTestMetadata(4, 8*1024*1024*1024), + }, + }, + }, + }, + } + controlCtx.StartInformer(context.TODO()) + for _, npd := range npds { + _, err = controlCtx.Client.InternalClient.NodeV1alpha1().NodeProfileDescriptors(). + Create(context.TODO(), npd, v1.CreateOptions{}) + assert.NoError(t, err) + } + time.Sleep(time.Second) + p.restoreNPD() + + for _, npd := range npds { + data, ok := p.nodeStatDataMap[npd.Name] + assert.True(t, ok) + + assert.Equal(t, Avg15MinPointNumber, len(data.Latest15MinCache)) + assert.Equal(t, Max1HourPointNumber, len(data.Latest1HourCache)) + assert.Equal(t, Max1DayPointNumber, len(data.Latest1DayCache)) + } +} + +func TestConstructNodeToPodMap(t *testing.T) { + t.Parallel() + + controlCtx, err := katalyst_base.GenerateFakeGenericContext() + assert.NoError(t, err) + + p := &Plugin{ + nodeToPodsMap: map[string]map[string]struct{}{}, + podLister: controlCtx.KubeInformerFactory.Core().V1().Pods().Lister(), + } + + pods := []*v12.Pod{ + { + ObjectMeta: v1.ObjectMeta{ + Name: "pod1", + Namespace: "default", + }, + Spec: v12.PodSpec{ + NodeName: "testNode1", + }, + }, + { + ObjectMeta: v1.ObjectMeta{ + Name: "pod2", + Namespace: "default", + }, + Spec: v12.PodSpec{ + NodeName: "testNode1", + }, + }, + { + ObjectMeta: v1.ObjectMeta{ + Name: "pod3", + Namespace: "default", + }, + Spec: v12.PodSpec{ + NodeName: "testNode2", + }, + }, + } + + controlCtx.StartInformer(context.TODO()) + for _, pod := range pods { + _, err = controlCtx.Client.KubeClient.CoreV1().Pods(pod.Namespace). + Create(context.TODO(), pod, v1.CreateOptions{}) + assert.NoError(t, err) + } + time.Sleep(time.Second) + + p.constructNodeToPodMap() + assert.Equal(t, 2, len(p.nodeToPodsMap)) + assert.Equal(t, 2, len(p.nodeToPodsMap["testNode1"])) +} + +func TestWorker(t *testing.T) { + t.Parallel() + + controlCtx, err := katalyst_base.GenerateFakeGenericContext() + assert.NoError(t, err) + + p := &Plugin{ + nodeToPodsMap: map[string]map[string]struct{}{}, + podLister: controlCtx.KubeInformerFactory.Core().V1().Pods().Lister(), + enableSyncPodUsage: false, + npdUpdater: &fakeIndicatorUpdater{}, + workers: 1, + nodePoolMap: map[int32]sets.String{ + 0: sets.NewString("node1", "node2", "node3"), + }, + nodeStatDataMap: map[string]*NodeMetricData{}, + npdLister: controlCtx.InternalInformerFactory.Node().V1alpha1().NodeProfileDescriptors().Lister(), + } + controlCtx.StartInformer(context.TODO()) + makeTestNodeStatData(p, "node1", 4, 16*1024*1024*1024) + + nodeMetrics := map[string]*v1beta1.NodeMetrics{ + "node1": { + ObjectMeta: v1.ObjectMeta{ + Name: "node1", + }, + Timestamp: v1.Time{Time: time.Now()}, + Usage: v12.ResourceList{ + v12.ResourceCPU: resource.MustParse("4"), + v12.ResourceMemory: resource.MustParse("6Gi"), + }, + }, + "node2": { + ObjectMeta: v1.ObjectMeta{ + Name: "node2", + }, + Timestamp: v1.Time{Time: time.Now()}, + Usage: v12.ResourceList{ + v12.ResourceCPU: resource.MustParse("4"), + v12.ResourceMemory: resource.MustParse("6Gi"), + }, + }, + "node3": { + ObjectMeta: v1.ObjectMeta{ + Name: "node3", + }, + Timestamp: v1.Time{Time: time.Now()}, + Usage: v12.ResourceList{ + v12.ResourceCPU: resource.MustParse("2"), + v12.ResourceMemory: resource.MustParse("5Gi"), + }, + }, + } + + npds := []*v1alpha1.NodeProfileDescriptor{ + { + ObjectMeta: v1.ObjectMeta{ + Name: "node1", + }, + }, + { + ObjectMeta: v1.ObjectMeta{ + Name: "node2", + }, + }, + } + for i := range npds { + _, err = controlCtx.Client.InternalClient.NodeV1alpha1().NodeProfileDescriptors(). + Create(context.TODO(), npds[i], v1.CreateOptions{}) + assert.NoError(t, err) + } + time.Sleep(time.Second) + + p.worker(0, nodeMetrics) +} + +func TestTransferMetaToCRStore(t *testing.T) { + t.Parallel() + + controlCtx, err := katalyst_base.GenerateFakeGenericContext() + assert.NoError(t, err) + updater := &fakeIndicatorUpdater{} + + p := &Plugin{ + npdLister: controlCtx.InternalInformerFactory.Node().V1alpha1().NodeProfileDescriptors().Lister(), + nodeStatDataMap: map[string]*NodeMetricData{}, + npdUpdater: updater, + } + makeTestNodeStatData(p, "testNode1", 16, 32*1024*1024*1024) + controlCtx.StartInformer(context.TODO()) + + npds := []*v1alpha1.NodeProfileDescriptor{ + { + ObjectMeta: v1.ObjectMeta{ + Name: "testNode1", + }, + }, + } + for _, npd := range npds { + _, err = controlCtx.Client.InternalClient.NodeV1alpha1().NodeProfileDescriptors(). + Create(context.TODO(), npd, v1.CreateOptions{}) + assert.NoError(t, err) + } + time.Sleep(time.Second) + + p.transferMetaToCRStore() + assert.NotNil(t, updater.data["testNode1"]) + assert.Equal(t, loadAwareMetricMetadataScope, updater.data["testNode1"].NodeMetrics[0].Scope) + assert.Equal(t, 48+30+8, len(updater.data["testNode1"].NodeMetrics[0].Metrics)) +} + +func TestUpdatePodMetrics(t *testing.T) { + t.Parallel() + + p := &Plugin{} + + npdStatus := &v1alpha1.NodeProfileDescriptorStatus{ + PodMetrics: []v1alpha1.ScopedPodMetrics{}, + } + podUsage := map[string]v12.ResourceList{ + "default/testPod1": { + v12.ResourceCPU: resource.MustParse("2"), + v12.ResourceMemory: resource.MustParse("6Gi"), + }, + "default/testPod2": { + v12.ResourceCPU: resource.MustParse("3"), + v12.ResourceMemory: resource.MustParse("8Gi"), + }, + "default/testPod3": { + v12.ResourceCPU: resource.MustParse("1"), + v12.ResourceMemory: resource.MustParse("6Gi"), + }, + "default/testPod4": { + v12.ResourceCPU: resource.MustParse("2"), + v12.ResourceMemory: resource.MustParse("5Gi"), + }, + "default/testPod5": { + v12.ResourceCPU: resource.MustParse("4"), + v12.ResourceMemory: resource.MustParse("6Gi"), + }, + "default/testPod6": { + v12.ResourceCPU: resource.MustParse("7"), + v12.ResourceMemory: resource.MustParse("13Gi"), + }, + } + + p.updatePodMetrics(npdStatus, podUsage, 5) + + assert.Equal(t, 1, len(npdStatus.PodMetrics)) + assert.Equal(t, loadAwareMetricsScope, npdStatus.PodMetrics[0].Scope) + assert.Equal(t, 5, len(npdStatus.PodMetrics[0].PodMetrics)) +} + +func TestCheckPodUsageRequired(t *testing.T) { + t.Parallel() + + controlCtx, err := katalyst_base.GenerateFakeGenericContext() + assert.NoError(t, err) + + p := &Plugin{ + podLister: controlCtx.KubeInformerFactory.Core().V1().Pods().Lister(), + workers: 3, + podUsageSelectorNamespace: "katalyst-system", + podUsageSelectorKey: "app", + podUsageSelectorVal: "testPod", + } + controlCtx.StartInformer(context.TODO()) + + pods := []*v12.Pod{ + { + ObjectMeta: v1.ObjectMeta{ + Name: "testPod", + Namespace: "katalyst-system", + Labels: map[string]string{ + "app": "testPod", + }, + }, + }, + { + ObjectMeta: v1.ObjectMeta{ + Name: "testPod2", + Namespace: "default", + Labels: map[string]string{ + "app": "testPod", + }, + }, + }, + { + ObjectMeta: v1.ObjectMeta{ + Name: "testPod3", + Namespace: "katalyst-system", + Labels: map[string]string{ + "app": "testPod3", + }, + }, + }, + } + for _, pod := range pods { + _, err = controlCtx.Client.KubeClient.CoreV1().Pods(pod.Namespace). + Create(context.TODO(), pod, v1.CreateOptions{}) + assert.NoError(t, err) + } + time.Sleep(time.Second) + + p.checkPodUsageRequired() + assert.True(t, p.enableSyncPodUsage) + + err = controlCtx.Client.KubeClient.CoreV1().Pods("katalyst-system").Delete(context.TODO(), "testPod", v1.DeleteOptions{}) + assert.NoError(t, err) + time.Sleep(time.Second) + + for i := 0; i < 10; i++ { + p.checkPodUsageRequired() + } + assert.False(t, p.enableSyncPodUsage) +} + +func TestReCleanPodData(t *testing.T) { + t.Parallel() + + controlCtx, err := katalyst_base.GenerateFakeGenericContext() + assert.NoError(t, err) + + p := &Plugin{ + podLister: controlCtx.KubeInformerFactory.Core().V1().Pods().Lister(), + podStatDataMap: map[string]*PodMetricData{ + "default/testPod1": nil, + "default/testPod2": nil, + "default/testPod3": nil, + }, + } + controlCtx.StartInformer(context.TODO()) + + pods := []*v12.Pod{ + { + ObjectMeta: v1.ObjectMeta{ + Name: "testPod1", + Namespace: "default", + }, + }, + { + ObjectMeta: v1.ObjectMeta{ + Name: "testPod5", + Namespace: "default", + }, + }, + } + for _, pod := range pods { + _, err = controlCtx.Client.KubeClient.CoreV1().Pods(pod.Namespace). + Create(context.TODO(), pod, v1.CreateOptions{}) + assert.NoError(t, err) + } + time.Sleep(time.Second) + + p.reCleanPodData() + assert.Equal(t, 1, len(p.podStatDataMap)) +} + +func TestName(t *testing.T) { + t.Parallel() + p := &Plugin{} + assert.Equal(t, LoadAwarePluginName, p.Name()) +} + +func TestGetSupportedNodeMetricsScope(t *testing.T) { + t.Parallel() + p := Plugin{} + assert.Equal(t, []string{loadAwareMetricsScope, loadAwareMetricMetadataScope}, p.GetSupportedNodeMetricsScope()) + assert.Equal(t, []string{loadAwareMetricsScope}, p.GetSupportedPodMetricsScope()) +} + +func makeTestMetadata(cpu, memory int64) []v1alpha1.MetricValue { + res := make([]v1alpha1.MetricValue, 0) + now := time.Now() + rand.Seed(now.Unix()) + for i := 0; i < Avg15MinPointNumber; i++ { + res = append(res, v1alpha1.MetricValue{ + MetricName: "cpu", + Timestamp: v1.Time{Time: now}, + Window: &v1.Duration{Duration: 15 * time.Minute}, + Value: *resource.NewQuantity(rand.Int63nRange(0, cpu), resource.DecimalSI), + }) + res = append(res, v1alpha1.MetricValue{ + MetricName: "memory", + Timestamp: v1.Time{Time: now}, + Window: &v1.Duration{Duration: 15 * time.Minute}, + Value: *resource.NewQuantity(rand.Int63nRange(0, memory), resource.BinarySI), + }) + now = now.Add(time.Minute) + } + + for i := 0; i < Max1HourPointNumber; i++ { + res = append(res, v1alpha1.MetricValue{ + MetricName: "cpu", + Timestamp: v1.Time{Time: now}, + Window: &v1.Duration{Duration: time.Hour}, + Value: *resource.NewQuantity(rand.Int63nRange(0, cpu), resource.DecimalSI), + }) + res = append(res, v1alpha1.MetricValue{ + MetricName: "memory", + Timestamp: v1.Time{Time: now}, + Window: &v1.Duration{Duration: time.Hour}, + Value: *resource.NewQuantity(rand.Int63nRange(0, memory), resource.BinarySI), + }) + now = now.Add(time.Minute) + } + + for i := 0; i < Max1DayPointNumber; i++ { + res = append(res, v1alpha1.MetricValue{ + MetricName: "cpu", + Timestamp: v1.Time{Time: now}, + Window: &v1.Duration{Duration: 24 * time.Hour}, + Value: *resource.NewQuantity(rand.Int63nRange(0, cpu), resource.DecimalSI), + }) + res = append(res, v1alpha1.MetricValue{ + MetricName: "memory", + Timestamp: v1.Time{Time: now}, + Window: &v1.Duration{Duration: 24 * time.Hour}, + Value: *resource.NewQuantity(rand.Int63nRange(0, memory), resource.BinarySI), + }) + now = now.Add(time.Minute) + } + + return res +} + +func makeTestNodeStatData(plugin *Plugin, nodeName string, cpu, memory int64) { + if plugin.nodeStatDataMap == nil { + plugin.nodeStatDataMap = map[string]*NodeMetricData{} + } + if plugin.nodeStatDataMap[nodeName] == nil { + plugin.nodeStatDataMap[nodeName] = &NodeMetricData{} + } + now := time.Now().Add(-2 * time.Hour) + rand.Seed(now.Unix()) + + for i := 0; i < Avg15MinPointNumber; i++ { + plugin.nodeStatDataMap[nodeName].Latest15MinCache = append(plugin.nodeStatDataMap[nodeName].Latest15MinCache, v12.ResourceList{ + v12.ResourceCPU: *resource.NewQuantity(rand.Int63nRange(0, cpu), resource.DecimalSI), + v12.ResourceMemory: *resource.NewQuantity(rand.Int63nRange(0, memory), resource.BinarySI), + }) + } + + for i := 0; i < Max1HourPointNumber; i++ { + plugin.nodeStatDataMap[nodeName].Latest1HourCache = append(plugin.nodeStatDataMap[nodeName].Latest1HourCache, &ResourceListWithTime{ + Ts: now.Unix(), + ResourceList: v12.ResourceList{ + v12.ResourceCPU: *resource.NewQuantity(rand.Int63nRange(0, cpu), resource.DecimalSI), + v12.ResourceMemory: *resource.NewQuantity(rand.Int63nRange(0, memory), resource.BinarySI), + }, + }) + now = now.Add(time.Minute) + } + + for i := 0; i < Max1DayPointNumber; i++ { + plugin.nodeStatDataMap[nodeName].Latest1DayCache = append(plugin.nodeStatDataMap[nodeName].Latest1DayCache, &ResourceListWithTime{ + Ts: now.Unix(), + ResourceList: v12.ResourceList{ + v12.ResourceCPU: *resource.NewQuantity(rand.Int63nRange(0, cpu), resource.DecimalSI), + v12.ResourceMemory: *resource.NewQuantity(rand.Int63nRange(0, memory), resource.BinarySI), + }, + }) + now = now.Add(time.Minute) + } +} + +type fakeIndicatorUpdater struct { + data map[string]v1alpha1.NodeProfileDescriptorStatus +} + +func (f *fakeIndicatorUpdater) UpdateNodeMetrics(name string, scopedNodeMetrics []v1alpha1.ScopedNodeMetrics) { + if f.data == nil { + f.data = map[string]v1alpha1.NodeProfileDescriptorStatus{} + } + data, ok := f.data[name] + if !ok { + data = v1alpha1.NodeProfileDescriptorStatus{} + } + data.NodeMetrics = scopedNodeMetrics + f.data[name] = data +} + +func (f *fakeIndicatorUpdater) UpdatePodMetrics(nodeName string, scopedPodMetrics []v1alpha1.ScopedPodMetrics) { + if f.data == nil { + f.data = map[string]v1alpha1.NodeProfileDescriptorStatus{} + } + data, ok := f.data[nodeName] + if !ok { + data = v1alpha1.NodeProfileDescriptorStatus{} + } + data.PodMetrics = scopedPodMetrics + f.data[nodeName] = data +} diff --git a/pkg/controller/npd/indicator-plugin/loadaware/sorter/helper.go b/pkg/controller/npd/indicator-plugin/loadaware/sorter/helper.go index df85126c2..18676bfcd 100644 --- a/pkg/controller/npd/indicator-plugin/loadaware/sorter/helper.go +++ b/pkg/controller/npd/indicator-plugin/loadaware/sorter/helper.go @@ -1,3 +1,19 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package sorter import "sort" diff --git a/pkg/controller/npd/indicator-plugin/loadaware/sorter/pod.go b/pkg/controller/npd/indicator-plugin/loadaware/sorter/pod.go index d615b9652..cc03a5acc 100644 --- a/pkg/controller/npd/indicator-plugin/loadaware/sorter/pod.go +++ b/pkg/controller/npd/indicator-plugin/loadaware/sorter/pod.go @@ -1,3 +1,19 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package sorter import corev1 "k8s.io/api/core/v1" diff --git a/pkg/controller/npd/indicator-plugin/loadaware/sorter/pod_test.go b/pkg/controller/npd/indicator-plugin/loadaware/sorter/pod_test.go index a99f5a745..c52a08935 100644 --- a/pkg/controller/npd/indicator-plugin/loadaware/sorter/pod_test.go +++ b/pkg/controller/npd/indicator-plugin/loadaware/sorter/pod_test.go @@ -1,3 +1,19 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package sorter import ( diff --git a/pkg/controller/npd/indicator-plugin/loadaware/sorter/scorer.go b/pkg/controller/npd/indicator-plugin/loadaware/sorter/scorer.go index 36ea6260b..0b03cc58a 100644 --- a/pkg/controller/npd/indicator-plugin/loadaware/sorter/scorer.go +++ b/pkg/controller/npd/indicator-plugin/loadaware/sorter/scorer.go @@ -1,3 +1,19 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package sorter import ( diff --git a/pkg/controller/npd/indicator-plugin/loadaware/types.go b/pkg/controller/npd/indicator-plugin/loadaware/types.go index 6afa123a3..75a5f6a95 100644 --- a/pkg/controller/npd/indicator-plugin/loadaware/types.go +++ b/pkg/controller/npd/indicator-plugin/loadaware/types.go @@ -1,9 +1,26 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package loadaware import ( - v1 "k8s.io/api/core/v1" "sync" "time" + + v1 "k8s.io/api/core/v1" ) const ( @@ -20,16 +37,9 @@ const ( LoadAwarePluginName = "loadAware" loadAwareMetricsScope = "loadAware" loadAwareMetricMetadataScope = "loadAware_metadata" - loadAwareMetricName = "node_load" - metricTagType = "type" - metricTagLevel = "level" ) -var ( - levels = []string{"0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"} - - podUsageUnrequiredCount = 0 -) +var podUsageUnrequiredCount = 0 type NodeMetricData struct { lock sync.RWMutex @@ -39,9 +49,9 @@ type NodeMetricData struct { Avg15Min v1.ResourceList Max1Hour v1.ResourceList Max1Day v1.ResourceList - Latest15MinCache []v1.ResourceList //latest 15 1min_avg_data - Latest1HourCache []*ResourceListWithTime //latest 4 15min_max_data - Latest1DayCache []*ResourceListWithTime //latest 24 1hour_max_data + Latest15MinCache []v1.ResourceList // latest 15 1min_avg_data + Latest1HourCache []*ResourceListWithTime // latest 4 15min_max_data + Latest1DayCache []*ResourceListWithTime // latest 24 1hour_max_data } func (md *NodeMetricData) ifCanInsertLatest1HourCache(now time.Time) bool { @@ -72,7 +82,7 @@ type PodMetricData struct { lock sync.RWMutex LatestUsage v1.ResourceList Avg5Min v1.ResourceList - Latest5MinCache []v1.ResourceList //latest 15 1min_avg_data + Latest5MinCache []v1.ResourceList // latest 15 1min_avg_data } // ResourceListWithTime ... @@ -80,3 +90,25 @@ type ResourceListWithTime struct { v1.ResourceList `json:"R,omitempty"` Ts int64 `json:"T,omitempty"` } + +type ResourceListWithTimeList []*ResourceListWithTime + +func (r ResourceListWithTimeList) Len() int { + return len(r) +} + +func (r ResourceListWithTimeList) Swap(i, j int) { + r[i], r[j] = r[j], r[i] +} + +func (r ResourceListWithTimeList) Less(i, j int) bool { + return r[i].Ts < r[j].Ts +} + +func (r ResourceListWithTimeList) ToResourceList() []v1.ResourceList { + res := make([]v1.ResourceList, 0) + for i := range r { + res = append(res, r[i].ResourceList) + } + return res +} diff --git a/pkg/scheduler/plugins/loadaware/cache.go b/pkg/scheduler/plugins/loadaware/cache.go index 20c9e8409..5912aa46a 100644 --- a/pkg/scheduler/plugins/loadaware/cache.go +++ b/pkg/scheduler/plugins/loadaware/cache.go @@ -1,3 +1,19 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package loadaware import ( @@ -9,14 +25,6 @@ import ( "k8s.io/klog/v2" ) -var cache *Cache - -func init() { - cache = &Cache{ - NodePodInfo: map[string]*NodeCache{}, - } -} - type SPDLister interface { GetPodPortrait(pod *v1.Pod) *ResourceUsage } @@ -75,7 +83,8 @@ func (c *Cache) ReconcilePredictUsage() { var ( nodePredictUsage = &ResourceUsage{ Cpu: make([]float64, portraitItemsLength, portraitItemsLength), - Memory: make([]float64, portraitItemsLength, portraitItemsLength)} + Memory: make([]float64, portraitItemsLength, portraitItemsLength), + } err error ) for _, podInfo := range nc.PodInfoMap { diff --git a/pkg/scheduler/plugins/loadaware/cache_test.go b/pkg/scheduler/plugins/loadaware/cache_test.go index 433b7cced..8d172ed77 100644 --- a/pkg/scheduler/plugins/loadaware/cache_test.go +++ b/pkg/scheduler/plugins/loadaware/cache_test.go @@ -1,3 +1,19 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package loadaware import ( @@ -64,7 +80,8 @@ func TestAddPod(t *testing.T) { ObjectMeta: v12.ObjectMeta{ Name: "testPod2", UID: "testPod2", - }}) + }, + }) assert.Equal(t, 1, len(c.NodePodInfo["testNode"].PodInfoMap)) } diff --git a/pkg/scheduler/plugins/loadaware/fit.go b/pkg/scheduler/plugins/loadaware/fit.go index 2b1590a3e..1bc879299 100644 --- a/pkg/scheduler/plugins/loadaware/fit.go +++ b/pkg/scheduler/plugins/loadaware/fit.go @@ -1,3 +1,19 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package loadaware import ( diff --git a/pkg/scheduler/plugins/loadaware/fit_test.go b/pkg/scheduler/plugins/loadaware/fit_test.go index cc5880777..684bec7f4 100644 --- a/pkg/scheduler/plugins/loadaware/fit_test.go +++ b/pkg/scheduler/plugins/loadaware/fit_test.go @@ -1,3 +1,19 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package loadaware import ( @@ -17,13 +33,251 @@ import ( "k8s.io/metrics/pkg/apis/metrics/v1beta1" "k8s.io/utils/pointer" + v1alpha12 "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" "github.com/kubewharf/katalyst-api/pkg/apis/scheduling/config" "github.com/kubewharf/katalyst-api/pkg/apis/workload/v1alpha1" + "github.com/kubewharf/katalyst-api/pkg/consts" katalyst_base "github.com/kubewharf/katalyst-core/cmd/base" "github.com/kubewharf/katalyst-core/pkg/config/generic" "github.com/kubewharf/katalyst-core/pkg/scheduler/util" ) +func TestFilter(t *testing.T) { + t.Parallel() + + util.SetQoSConfig(generic.NewQoSConfiguration()) + + for _, tc := range []struct { + name string + pod *v1.Pod + node *v1.Node + pods []*v1.Pod + npd *v1alpha12.NodeProfileDescriptor + portraits []*v1alpha1.ServiceProfileDescriptor + expectRes *framework.Status + }{ + { + name: "filter success", + pod: &v1.Pod{ + ObjectMeta: v12.ObjectMeta{ + Name: "pod1", + UID: "pod1UID", + Namespace: "testNs", + OwnerReferences: []v12.OwnerReference{ + { + Kind: "Deployment", + Name: "deployment1", + }, + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "testContainer", + Resources: v1.ResourceRequirements{ + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("16Gi"), + }, + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("16Gi"), + }, + }, + }, + }, + }, + }, + node: &v1.Node{ + ObjectMeta: v12.ObjectMeta{ + Name: "node1", + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{ + Capacity: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourceMemory: resource.MustParse("64Gi"), + }, + Allocatable: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourceMemory: resource.MustParse("64Gi"), + }, + }, + }, + pods: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod2", + UID: "pod2UID", + Namespace: "testNs", + OwnerReferences: []v12.OwnerReference{ + { + Kind: "Deployment", + Name: "deployment2", + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod3", + UID: "pod3UID", + Namespace: "testNs", + OwnerReferences: []v12.OwnerReference{ + { + Kind: "Deployment", + Name: "deployment3", + }, + }, + }, + }, + }, + portraits: []*v1alpha1.ServiceProfileDescriptor{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "deployment1", + Namespace: "testNs", + }, + Status: v1alpha1.ServiceProfileDescriptorStatus{ + AggMetrics: []v1alpha1.AggPodMetrics{ + { + Scope: spdPortraitScope, + Items: rangeItems(4, 8*1024*1024*1024), + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "deployment2", + Namespace: "testNs", + }, + Status: v1alpha1.ServiceProfileDescriptorStatus{ + AggMetrics: []v1alpha1.AggPodMetrics{ + { + Scope: spdPortraitScope, + Items: fixedItems(4, 8*1024*1024*1024), + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "deployment3", + Namespace: "testNs", + }, + Status: v1alpha1.ServiceProfileDescriptorStatus{ + AggMetrics: []v1alpha1.AggPodMetrics{ + { + Scope: spdPortraitScope, + Items: fixedItems(8, 16*1024*1024*1024), + }, + }, + }, + }, + }, + npd: &v1alpha12.NodeProfileDescriptor{ + ObjectMeta: v12.ObjectMeta{ + Name: "node1", + }, + Spec: v1alpha12.NodeProfileDescriptorSpec{}, + Status: v1alpha12.NodeProfileDescriptorStatus{ + NodeMetrics: []v1alpha12.ScopedNodeMetrics{ + { + Scope: loadAwareMetricScope, + Metrics: []v1alpha12.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("1088m"), + Window: &metav1.Duration{Duration: 15 * time.Minute}, + Timestamp: metav1.Time{Time: time.Now()}, + }, + { + MetricName: "memory", + Value: resource.MustParse("5035916Ki"), + Window: &metav1.Duration{Duration: 15 * time.Minute}, + Timestamp: metav1.Time{Time: time.Now()}, + }, + { + MetricName: "cpu", + Value: resource.MustParse("1090m"), + Window: &metav1.Duration{Duration: 5 * time.Minute}, + Timestamp: metav1.Time{Time: time.Now()}, + }, + { + MetricName: "memory", + Value: resource.MustParse("5035916Ki"), + Window: &metav1.Duration{Duration: 5 * time.Minute}, + Timestamp: metav1.Time{Time: time.Now()}, + }, + }, + }, + }, + }, + }, + expectRes: nil, + }, + } { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + nodeInfo := framework.NewNodeInfo() + nodeInfo.SetNode(tc.node) + for _, pod := range tc.pods { + nodeInfo.AddPod(pod) + } + fw, err := runtime.NewFramework(nil, nil, + runtime.WithSnapshotSharedLister(newTestSharedLister(tc.pods, []*v1.Node{tc.node}))) + assert.NoError(t, err) + + controlCtx, err := katalyst_base.GenerateFakeGenericContext() + assert.NoError(t, err) + + p := &Plugin{ + handle: fw, + args: makeTestArgs(), + spdLister: controlCtx.InternalInformerFactory.Workload().V1alpha1().ServiceProfileDescriptors().Lister(), + npdLister: controlCtx.InternalInformerFactory.Node().V1alpha1().NodeProfileDescriptors().Lister(), + spdHasSynced: controlCtx.InternalInformerFactory.Workload().V1alpha1().ServiceProfileDescriptors().Informer().HasSynced, + cache: &Cache{ + NodePodInfo: map[string]*NodeCache{}, + }, + } + p.cache.SetSPDLister(p) + + for _, pr := range tc.portraits { + _, err = controlCtx.Client.InternalClient.WorkloadV1alpha1().ServiceProfileDescriptors(pr.Namespace). + Create(context.TODO(), pr, v12.CreateOptions{}) + assert.NoError(t, err) + } + _, err = controlCtx.Client.InternalClient.NodeV1alpha1().NodeProfileDescriptors(). + Create(context.TODO(), tc.npd, v12.CreateOptions{}) + assert.NoError(t, err) + controlCtx.StartInformer(context.TODO()) + + // wait for portrait synced + if !cache2.WaitForCacheSync(context.TODO().Done(), p.spdHasSynced) { + t.Error("wait for portrait informer synced fail") + t.FailNow() + } + + // add pod to cache + for _, pod := range tc.pods { + p.cache.addPod(tc.node.Name, pod, time.Now()) + } + + status := p.Filter(context.TODO(), nil, tc.pod, nodeInfo) + + if tc.expectRes == nil { + assert.Nil(t, status) + } else { + assert.Equal(t, tc.expectRes.Code(), status.Code()) + } + }) + } +} + func TestFitByPortrait(t *testing.T) { t.Parallel() @@ -181,8 +435,11 @@ func TestFitByPortrait(t *testing.T) { args: makeTestArgs(), spdLister: controlCtx.InternalInformerFactory.Workload().V1alpha1().ServiceProfileDescriptors().Lister(), spdHasSynced: controlCtx.InternalInformerFactory.Workload().V1alpha1().ServiceProfileDescriptors().Informer().HasSynced, + cache: &Cache{ + NodePodInfo: map[string]*NodeCache{}, + }, } - cache.SetSPDLister(p) + p.cache.SetSPDLister(p) for _, pr := range tc.portraits { _, err = controlCtx.Client.InternalClient.WorkloadV1alpha1().ServiceProfileDescriptors(pr.Namespace). @@ -199,7 +456,7 @@ func TestFitByPortrait(t *testing.T) { // add pod to cache for _, pod := range tc.pods { - cache.addPod(tc.node.Name, pod, time.Now()) + p.cache.addPod(tc.node.Name, pod, time.Now()) } status := p.fitByPortrait(tc.pod, nodeInfo) @@ -213,6 +470,169 @@ func TestFitByPortrait(t *testing.T) { } } +func TestFitByNPD(t *testing.T) { + t.Parallel() + + for _, tc := range []struct { + name string + node *v1.Node + npd *v1alpha12.NodeProfileDescriptor + expectRes *framework.Status + }{ + { + name: "less than threshold", + node: &v1.Node{ + ObjectMeta: v12.ObjectMeta{ + Name: "node1", + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{ + Capacity: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourceMemory: resource.MustParse("64Gi"), + }, + Allocatable: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourceMemory: resource.MustParse("64Gi"), + }, + }, + }, + npd: &v1alpha12.NodeProfileDescriptor{ + ObjectMeta: v12.ObjectMeta{ + Name: "node1", + }, + Spec: v1alpha12.NodeProfileDescriptorSpec{}, + Status: v1alpha12.NodeProfileDescriptorStatus{ + NodeMetrics: []v1alpha12.ScopedNodeMetrics{ + { + Scope: loadAwareMetricScope, + Metrics: []v1alpha12.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("1088m"), + Window: &metav1.Duration{Duration: 15 * time.Minute}, + Timestamp: metav1.Time{Time: time.Now()}, + }, + { + MetricName: "memory", + Value: resource.MustParse("5035916Ki"), + Window: &metav1.Duration{Duration: 15 * time.Minute}, + Timestamp: metav1.Time{Time: time.Now()}, + }, + { + MetricName: "cpu", + Value: resource.MustParse("1090m"), + Window: &metav1.Duration{Duration: 5 * time.Minute}, + Timestamp: metav1.Time{Time: time.Now()}, + }, + { + MetricName: "memory", + Value: resource.MustParse("5035916Ki"), + Window: &metav1.Duration{Duration: 5 * time.Minute}, + Timestamp: metav1.Time{Time: time.Now()}, + }, + }, + }, + }, + }, + }, + expectRes: nil, + }, + { + name: "more than threshold", + node: &v1.Node{ + ObjectMeta: v12.ObjectMeta{ + Name: "node1", + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{ + Capacity: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourceMemory: resource.MustParse("64Gi"), + }, + Allocatable: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourceMemory: resource.MustParse("64Gi"), + }, + }, + }, + npd: &v1alpha12.NodeProfileDescriptor{ + ObjectMeta: v12.ObjectMeta{ + Name: "node1", + }, + Spec: v1alpha12.NodeProfileDescriptorSpec{}, + Status: v1alpha12.NodeProfileDescriptorStatus{ + NodeMetrics: []v1alpha12.ScopedNodeMetrics{ + { + Scope: loadAwareMetricScope, + Metrics: []v1alpha12.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("21088m"), + Window: &metav1.Duration{Duration: 15 * time.Minute}, + Timestamp: metav1.Time{Time: time.Now()}, + }, + { + MetricName: "memory", + Value: resource.MustParse("5035916Ki"), + Window: &metav1.Duration{Duration: 15 * time.Minute}, + Timestamp: metav1.Time{Time: time.Now()}, + }, + { + MetricName: "cpu", + Value: resource.MustParse("1090m"), + Window: &metav1.Duration{Duration: 5 * time.Minute}, + Timestamp: metav1.Time{Time: time.Now()}, + }, + { + MetricName: "memory", + Value: resource.MustParse("5035916Ki"), + Window: &metav1.Duration{Duration: 5 * time.Minute}, + Timestamp: metav1.Time{Time: time.Now()}, + }, + }, + }, + }, + }, + }, + expectRes: framework.NewStatus(framework.Unschedulable, ""), + }, + } { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + nodeInfo := framework.NewNodeInfo() + nodeInfo.SetNode(tc.node) + + controlCtx, err := katalyst_base.GenerateFakeGenericContext() + assert.NoError(t, err) + + p := &Plugin{ + args: makeTestArgs(), + npdLister: controlCtx.InternalInformerFactory.Node().V1alpha1().NodeProfileDescriptors().Lister(), + cache: &Cache{ + NodePodInfo: map[string]*NodeCache{}, + }, + } + + _, err = controlCtx.Client.InternalClient.NodeV1alpha1().NodeProfileDescriptors(). + Create(context.TODO(), tc.npd, v12.CreateOptions{}) + assert.NoError(t, err) + controlCtx.StartInformer(context.TODO()) + time.Sleep(time.Second) + + status := p.fitByNPD(nodeInfo) + + if tc.expectRes == nil { + assert.Nil(t, status) + } else { + assert.Equal(t, tc.expectRes.Code(), status.Code()) + } + }) + } +} + func fixedItems(cpu, memory int64) []v1beta1.PodMetrics { res := make([]v1beta1.PodMetrics, portraitItemsLength, portraitItemsLength) @@ -273,19 +693,16 @@ func makeTestArgs() *config.LoadAwareArgs { v1.ResourceCPU: 1, v1.ResourceMemory: 1, }, + CalculateIndicatorWeight: map[config.IndicatorType]int64{ + consts.Usage15MinAvgKey: 5, + consts.Usage1HourMaxKey: 3, + consts.Usage1DayMaxKey: 2, + }, + NodeMetricsExpiredSeconds: new(int64), + PodAnnotationLoadAwareEnable: new(string), } - args.PodAnnotationLoadAwareEnable = new(string) + *args.NodeMetricsExpiredSeconds = 300 *args.PodAnnotationLoadAwareEnable = "" return args } - -func TestTTT(t *testing.T) { - a := resource.MustParse("4") - cpu := a.MilliValue() - t.Log(cpu) - - a = resource.MustParse("165m") - cpu = a.MilliValue() - t.Log(cpu) -} diff --git a/pkg/scheduler/plugins/loadaware/handler.go b/pkg/scheduler/plugins/loadaware/handler.go index a1a7004a5..a7e260bc5 100644 --- a/pkg/scheduler/plugins/loadaware/handler.go +++ b/pkg/scheduler/plugins/loadaware/handler.go @@ -1,3 +1,19 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package loadaware import ( @@ -19,7 +35,7 @@ const ( LoadAwareSPDHandler = "LoadAwareSPDHandler" ) -func RegisterPodHandler() { +func (p *Plugin) registerPodHandler() { eventhandlers.RegisterEventHandler( LoadAwarePodHandler, func(informerFactory informers.SharedInformerFactory, _ externalversions.SharedInformerFactory) { @@ -30,9 +46,9 @@ func RegisterPodHandler() { return true }, Handler: toolcache.ResourceEventHandlerFuncs{ - AddFunc: OnAdd, - UpdateFunc: OnUpdate, - DeleteFunc: OnDelete, + AddFunc: p.OnAdd, + UpdateFunc: p.OnUpdate, + DeleteFunc: p.OnDelete, }, }, ) @@ -58,7 +74,7 @@ func (p *Plugin) registerSPDHandler() { ) } -func OnAdd(obj interface{}) { +func (p *Plugin) OnAdd(obj interface{}) { pod, ok := obj.(*v1.Pod) if !ok { klog.Warningf("transfer obj to pod fail") @@ -73,27 +89,27 @@ func OnAdd(obj interface{}) { startTime = pod.Status.StartTime.Time } - cache.addPod(nodeName, pod, startTime) + p.cache.addPod(nodeName, pod, startTime) } -func OnUpdate(oldObj, newObj interface{}) { +func (p *Plugin) OnUpdate(oldObj, newObj interface{}) { pod, ok := newObj.(*v1.Pod) if !ok { return } if v1pod.IsPodTerminal(pod) { - cache.removePod(pod.Spec.NodeName, pod) + p.cache.removePod(pod.Spec.NodeName, pod) } else { - //pod delete and pod may merge a update event + // pod delete and pod may merge a update event assignTime := time.Now() if pod.Status.StartTime != nil { assignTime = pod.Status.StartTime.Time } - cache.addPod(pod.Spec.NodeName, pod, assignTime) + p.cache.addPod(pod.Spec.NodeName, pod, assignTime) } } -func OnDelete(obj interface{}) { +func (p *Plugin) OnDelete(obj interface{}) { var pod *v1.Pod switch t := obj.(type) { case *v1.Pod: @@ -107,5 +123,5 @@ func OnDelete(obj interface{}) { default: return } - cache.removePod(pod.Spec.NodeName, pod) + p.cache.removePod(pod.Spec.NodeName, pod) } diff --git a/pkg/scheduler/plugins/loadaware/helper.go b/pkg/scheduler/plugins/loadaware/helper.go index 7f4c00d4b..db1ca98bd 100644 --- a/pkg/scheduler/plugins/loadaware/helper.go +++ b/pkg/scheduler/plugins/loadaware/helper.go @@ -1,3 +1,19 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package loadaware import ( @@ -8,9 +24,9 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/scheduler/framework" "github.com/kubewharf/katalyst-core/pkg/util/native" - "k8s.io/kubernetes/pkg/scheduler/framework" ) type Item struct { @@ -29,14 +45,13 @@ func (it Items) Swap(i, j int) { } func (it Items) Less(i, j int) bool { - location, err := time.LoadLocation("Asia/Shanghai") if err != nil { location = time.Local } // sort sample timestamp hour houri := it[i].Timestamp.In(location).Hour() - hourj := it[i].Timestamp.In(location).Hour() + hourj := it[j].Timestamp.In(location).Hour() return houri < hourj } diff --git a/pkg/scheduler/plugins/loadaware/plugin.go b/pkg/scheduler/plugins/loadaware/plugin.go index c1da0cd54..fddb24300 100644 --- a/pkg/scheduler/plugins/loadaware/plugin.go +++ b/pkg/scheduler/plugins/loadaware/plugin.go @@ -1,3 +1,19 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package loadaware import ( @@ -49,6 +65,7 @@ type Plugin struct { npdLister listers.NodeProfileDescriptorLister spdLister workloadlisters.ServiceProfileDescriptorLister spdHasSynced toolscache.InformerSynced + cache *Cache } func NewPlugin(args runtime.Object, handle framework.Handle) (framework.Plugin, error) { @@ -65,13 +82,16 @@ func NewPlugin(args runtime.Object, handle framework.Handle) (framework.Plugin, p := &Plugin{ handle: handle, args: pluginArgs, + cache: &Cache{ + NodePodInfo: map[string]*NodeCache{}, + }, } p.registerNPDHandler() p.registerSPDHandler() - RegisterPodHandler() + p.registerPodHandler() if p.enablePortrait() { - cache.SetSPDLister(p) + p.cache.SetSPDLister(p) } go func() { @@ -80,7 +100,7 @@ func NewPlugin(args runtime.Object, handle framework.Handle) (framework.Plugin, klog.Warningf("portrait has not synced, skip") return } - cache.ReconcilePredictUsage() + p.cache.ReconcilePredictUsage() }, time.Hour, context.TODO().Done()) }() @@ -240,7 +260,7 @@ func (p *Plugin) portraitByRequest(pod *v1.Pod) *ResourceUsage { } func (p *Plugin) getNodePredictUsage(pod *v1.Pod, nodeName string) (*ResourceUsage, error) { - nodePredictUsage := cache.GetNodePredictUsage(nodeName) + nodePredictUsage := p.cache.GetNodePredictUsage(nodeName) klog.V(6).Infof("node %v predict usage cpu: %v, memory: %v", nodeName, nodePredictUsage.Cpu, nodePredictUsage.Memory) podPredictUsage := p.GetPodPortrait(pod) diff --git a/pkg/scheduler/plugins/loadaware/plugin_test.go b/pkg/scheduler/plugins/loadaware/plugin_test.go new file mode 100644 index 000000000..e3a6aff6b --- /dev/null +++ b/pkg/scheduler/plugins/loadaware/plugin_test.go @@ -0,0 +1,91 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package loadaware + +import ( + "testing" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + v12 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/kubewharf/katalyst-api/pkg/apis/scheduling/config" +) + +func TestIsLoadAwareEnabled(t *testing.T) { + t.Parallel() + + p := &Plugin{ + args: &config.LoadAwareArgs{ + PodAnnotationLoadAwareEnable: new(string), + }, + } + *p.args.PodAnnotationLoadAwareEnable = "" + + testpod := &v1.Pod{ + ObjectMeta: v12.ObjectMeta{ + Annotations: map[string]string{ + "loadAwareEnable": "false", + }, + }, + } + + assert.True(t, p.IsLoadAwareEnabled(testpod)) + + *p.args.PodAnnotationLoadAwareEnable = "loadAwareEnable" + assert.False(t, p.IsLoadAwareEnabled(testpod)) + + testpod.Annotations["loadAwareEnable"] = "true" + assert.True(t, p.IsLoadAwareEnabled(testpod)) +} + +func TestPortraitByRequest(t *testing.T) { + t.Parallel() + + p := Plugin{ + args: &config.LoadAwareArgs{ + PodAnnotationLoadAwareEnable: new(string), + }, + } + + testpod := &v1.Pod{ + ObjectMeta: v12.ObjectMeta{ + Name: "testPod", + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "testContainer", + Resources: v1.ResourceRequirements{ + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("4"), + v1.ResourceMemory: resource.MustParse("8Gi"), + }, + }, + }, + }, + }, + } + + resourceUsage := p.portraitByRequest(testpod) + assert.Equal(t, len(resourceUsage.Cpu), portraitItemsLength) + assert.Equal(t, len(resourceUsage.Memory), portraitItemsLength) + + assert.Equal(t, resourceUsage.Cpu[0], 4000.0) + assert.Equal(t, resourceUsage.Memory[0], float64(8*1024*1024*1024)) +} diff --git a/pkg/scheduler/plugins/loadaware/reserve.go b/pkg/scheduler/plugins/loadaware/reserve.go index 76e59d6d1..38f4e23ed 100644 --- a/pkg/scheduler/plugins/loadaware/reserve.go +++ b/pkg/scheduler/plugins/loadaware/reserve.go @@ -1,3 +1,19 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package loadaware import ( @@ -9,10 +25,10 @@ import ( ) func (p *Plugin) Reserve(ctx context.Context, _ *framework.CycleState, pod *v1.Pod, nodeName string) *framework.Status { - cache.addPod(nodeName, pod, time.Now()) + p.cache.addPod(nodeName, pod, time.Now()) return nil } func (p *Plugin) Unreserve(ctx context.Context, _ *framework.CycleState, pod *v1.Pod, nodeName string) { - cache.removePod(nodeName, pod) + p.cache.removePod(nodeName, pod) } diff --git a/pkg/scheduler/plugins/loadaware/reserve_test.go b/pkg/scheduler/plugins/loadaware/reserve_test.go new file mode 100644 index 000000000..fc11372b7 --- /dev/null +++ b/pkg/scheduler/plugins/loadaware/reserve_test.go @@ -0,0 +1,94 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package loadaware + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + v12 "k8s.io/apimachinery/pkg/apis/meta/v1" + cache2 "k8s.io/client-go/tools/cache" + + "github.com/kubewharf/katalyst-api/pkg/apis/workload/v1alpha1" + katalyst_base "github.com/kubewharf/katalyst-core/cmd/base" +) + +func TestReserve(t *testing.T) { + t.Parallel() + + controlCtx, err := katalyst_base.GenerateFakeGenericContext() + assert.NoError(t, err) + + p := &Plugin{ + args: makeTestArgs(), + spdLister: controlCtx.InternalInformerFactory.Workload().V1alpha1().ServiceProfileDescriptors().Lister(), + spdHasSynced: controlCtx.InternalInformerFactory.Workload().V1alpha1().ServiceProfileDescriptors().Informer().HasSynced, + cache: &Cache{ + NodePodInfo: map[string]*NodeCache{}, + }, + } + p.cache.SetSPDLister(p) + + testPod := &v1.Pod{ + ObjectMeta: v12.ObjectMeta{ + Name: "testPod", + Namespace: "default", + OwnerReferences: []v12.OwnerReference{ + { + Name: "reserveDeployment1", + Kind: "Deployment", + }, + }, + }, + } + testNode := "testReserveNode" + testSPD := &v1alpha1.ServiceProfileDescriptor{ + ObjectMeta: v12.ObjectMeta{ + Name: "reserveDeployment1", + Namespace: "default", + }, + Status: v1alpha1.ServiceProfileDescriptorStatus{ + AggMetrics: []v1alpha1.AggPodMetrics{ + { + Scope: spdPortraitScope, + Items: fixedItems(4, 8*1024*1024*1024), + }, + }, + }, + } + _, err = controlCtx.Client.InternalClient.WorkloadV1alpha1().ServiceProfileDescriptors(testSPD.GetNamespace()). + Create(context.TODO(), testSPD, v12.CreateOptions{}) + assert.NoError(t, err) + controlCtx.StartInformer(context.TODO()) + // wait for portrait synced + if !cache2.WaitForCacheSync(context.TODO().Done(), p.spdHasSynced) { + t.Error("wait for portrait informer synced fail") + t.FailNow() + } + + _ = p.Reserve(context.TODO(), nil, testPod, testNode) + resourceUsage := p.cache.GetNodePredictUsage(testNode) + assert.Equal(t, portraitItemsLength, len(resourceUsage.Cpu)) + assert.Equal(t, portraitItemsLength, len(resourceUsage.Memory)) + assert.NotZero(t, resourceUsage.Cpu[0]) + + p.Unreserve(context.TODO(), nil, testPod, testNode) + resourceUsage = p.cache.GetNodePredictUsage(testNode) + assert.Zero(t, resourceUsage.Cpu[0]) +} diff --git a/pkg/scheduler/plugins/loadaware/score.go b/pkg/scheduler/plugins/loadaware/score.go index dd189dc6e..07297f1ab 100644 --- a/pkg/scheduler/plugins/loadaware/score.go +++ b/pkg/scheduler/plugins/loadaware/score.go @@ -1,12 +1,29 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package loadaware import ( "context" "fmt" - "k8s.io/klog/v2" "math" "time" + "k8s.io/klog/v2" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -19,13 +36,6 @@ import ( "github.com/kubewharf/katalyst-api/pkg/consts" ) -const ( - metric5Min = "avg_5min" - metric15Min = "avg_15min" - metric1Hour = "max_1hour" - metric1Day = "max_1day" -) - func (p *Plugin) ScoreExtensions() framework.ScoreExtensions { return nil } @@ -64,11 +74,11 @@ func (p *Plugin) scoreByNPD(pod *v1.Pod, nodeName string) (int64, *framework.Sta loadAwareUsage := p.getLoadAwareResourceList(npd) - //estimated the recent assign pod usage + // estimated the recent assign pod usage estimatedUsed := estimatedPodUsed(pod, p.args.ResourceToWeightMap, p.args.ResourceToScalingFactorMap) estimatedAssignedPodUsage := p.estimatedAssignedPodUsage(nodeName, timeStamp) finalEstimatedUsed := quotav1.Add(estimatedUsed, estimatedAssignedPodUsage) - //add estimated usage to avg_15min_usage + // add estimated usage to avg_15min_usage finalNodeUsedOfIndicators := make(map[config.IndicatorType]v1.ResourceList) for indicator := range p.args.CalculateIndicatorWeight { if loadAwareUsage != nil { @@ -99,9 +109,7 @@ func (p *Plugin) scoreByPortrait(pod *v1.Pod, nodeName string) (int64, *framewor return framework.MinNodeScore, nil } - var ( - scoreSum, weightSum int64 - ) + var scoreSum, weightSum int64 for _, resourceName := range []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory} { targetUsage, ok := p.args.ResourceToTargetMap[resourceName] @@ -155,9 +163,9 @@ func (p *Plugin) estimatedAssignedPodUsage(nodeName string, updateTime time.Time estimatedUsed = make(map[v1.ResourceName]int64) result = v1.ResourceList{} ) - cache.RLock() - nodeCache, ok := cache.NodePodInfo[nodeName] - cache.RUnlock() + p.cache.RLock() + nodeCache, ok := p.cache.NodePodInfo[nodeName] + p.cache.RUnlock() if !ok { return result } diff --git a/pkg/scheduler/plugins/loadaware/score_test.go b/pkg/scheduler/plugins/loadaware/score_test.go new file mode 100644 index 000000000..ce4ba355e --- /dev/null +++ b/pkg/scheduler/plugins/loadaware/score_test.go @@ -0,0 +1,640 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package loadaware + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + v12 "k8s.io/apimachinery/pkg/apis/meta/v1" + cache2 "k8s.io/client-go/tools/cache" + "k8s.io/kubernetes/pkg/scheduler/framework" + "k8s.io/kubernetes/pkg/scheduler/framework/runtime" + + v1alpha12 "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" + "github.com/kubewharf/katalyst-api/pkg/apis/workload/v1alpha1" + katalyst_base "github.com/kubewharf/katalyst-core/cmd/base" + "github.com/kubewharf/katalyst-core/pkg/config/generic" + "github.com/kubewharf/katalyst-core/pkg/scheduler/util" +) + +func TestTargetLoadPacking(t *testing.T) { + t.Parallel() + + for _, tc := range []struct { + name string + targetRatio float64 + usageRatio float64 + expectErr bool + expectRes int64 + }{ + { + name: "less than target", + targetRatio: 50, + usageRatio: 10, + expectErr: false, + expectRes: 60, + }, + { + name: "greater than target", + targetRatio: 50, + usageRatio: 60, + expectErr: false, + expectRes: 40, + }, + { + name: "zero target", + targetRatio: 0, + usageRatio: 10, + expectErr: true, + expectRes: 0, + }, + { + name: "target greater than 100", + targetRatio: 200, + usageRatio: 10, + expectErr: true, + expectRes: 0, + }, + { + name: "usage less than 0", + targetRatio: 50, + usageRatio: -1, + expectErr: false, + expectRes: 50, + }, + { + name: "usage greater than 100", + targetRatio: 50, + usageRatio: 101, + expectErr: false, + expectRes: 0, + }, + { + name: "low usage", + targetRatio: 30, + usageRatio: 0.1, + expectErr: false, + expectRes: 30, + }, + } { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + res, err := targetLoadPacking(tc.targetRatio, tc.usageRatio) + if !tc.expectErr { + assert.NoError(t, err) + } else { + assert.Error(t, err) + } + assert.Equal(t, tc.expectRes, res) + }) + } +} + +func TestScore(t *testing.T) { + t.Parallel() + + util.SetQoSConfig(generic.NewQoSConfiguration()) + + for _, tc := range []struct { + name string + pod *v1.Pod + lowNode *v1.Node + highNode *v1.Node + lowNodePods []*v1.Pod + highNodePods []*v1.Pod + spd []*v1alpha1.ServiceProfileDescriptor + npd []*v1alpha12.NodeProfileDescriptor + enablePortrait bool + }{ + { + name: "enablePortrait", + pod: &v1.Pod{ + ObjectMeta: v12.ObjectMeta{ + Name: "pod1", + UID: "pod1UID", + Namespace: "testNs", + OwnerReferences: []v12.OwnerReference{ + { + Kind: "Deployment", + Name: "deployment1", + }, + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "testContainer", + Resources: v1.ResourceRequirements{ + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("16Gi"), + }, + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("16Gi"), + }, + }, + }, + }, + }, + }, + lowNode: &v1.Node{ + ObjectMeta: v12.ObjectMeta{ + Name: "node1", + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{ + Capacity: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourceMemory: resource.MustParse("64Gi"), + }, + Allocatable: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourceMemory: resource.MustParse("64Gi"), + }, + }, + }, + highNode: &v1.Node{ + ObjectMeta: v12.ObjectMeta{ + Name: "node2", + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{ + Capacity: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourceMemory: resource.MustParse("64Gi"), + }, + Allocatable: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourceMemory: resource.MustParse("64Gi"), + }, + }, + }, + lowNodePods: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod2", + UID: "pod2UID", + Namespace: "testNs", + OwnerReferences: []v12.OwnerReference{ + { + Kind: "Deployment", + Name: "deployment3", + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod3", + UID: "pod3UID", + Namespace: "testNs", + OwnerReferences: []v12.OwnerReference{ + { + Kind: "Deployment", + Name: "deployment3", + }, + }, + }, + }, + }, + highNodePods: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod5", + UID: "pod5UID", + Namespace: "testNs", + OwnerReferences: []v12.OwnerReference{ + { + Kind: "Deployment", + Name: "deployment2", + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod4", + UID: "pod4UID", + Namespace: "testNs", + OwnerReferences: []v12.OwnerReference{ + { + Kind: "Deployment", + Name: "deployment2", + }, + }, + }, + }, + }, + spd: []*v1alpha1.ServiceProfileDescriptor{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "deployment1", + Namespace: "testNs", + }, + Status: v1alpha1.ServiceProfileDescriptorStatus{ + AggMetrics: []v1alpha1.AggPodMetrics{ + { + Scope: spdPortraitScope, + Items: rangeItems(4, 8*1024*1024*1024), + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "deployment2", + Namespace: "testNs", + }, + Status: v1alpha1.ServiceProfileDescriptorStatus{ + AggMetrics: []v1alpha1.AggPodMetrics{ + { + Scope: spdPortraitScope, + Items: fixedItems(4, 8*1024*1024*1024), + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "deployment3", + Namespace: "testNs", + }, + Status: v1alpha1.ServiceProfileDescriptorStatus{ + AggMetrics: []v1alpha1.AggPodMetrics{ + { + Scope: spdPortraitScope, + Items: fixedItems(8, 16*1024*1024*1024), + }, + }, + }, + }, + }, + npd: []*v1alpha12.NodeProfileDescriptor{}, + enablePortrait: true, + }, + { + name: "unablePortrait", + pod: &v1.Pod{ + ObjectMeta: v12.ObjectMeta{ + Name: "pod1", + UID: "pod1UID", + Namespace: "testNs", + OwnerReferences: []v12.OwnerReference{ + { + Kind: "Deployment", + Name: "deployment1", + }, + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "testContainer", + Resources: v1.ResourceRequirements{ + Limits: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("16Gi"), + }, + Requests: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("16Gi"), + }, + }, + }, + }, + }, + }, + lowNode: &v1.Node{ + ObjectMeta: v12.ObjectMeta{ + Name: "node1", + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{ + Capacity: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourceMemory: resource.MustParse("64Gi"), + }, + Allocatable: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourceMemory: resource.MustParse("64Gi"), + }, + }, + }, + highNode: &v1.Node{ + ObjectMeta: v12.ObjectMeta{ + Name: "node2", + }, + Spec: v1.NodeSpec{}, + Status: v1.NodeStatus{ + Capacity: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourceMemory: resource.MustParse("64Gi"), + }, + Allocatable: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourceMemory: resource.MustParse("64Gi"), + }, + }, + }, + lowNodePods: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod2", + UID: "pod2UID", + Namespace: "testNs", + OwnerReferences: []v12.OwnerReference{ + { + Kind: "Deployment", + Name: "deployment3", + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod3", + UID: "pod3UID", + Namespace: "testNs", + OwnerReferences: []v12.OwnerReference{ + { + Kind: "Deployment", + Name: "deployment3", + }, + }, + }, + }, + }, + highNodePods: []*v1.Pod{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod5", + UID: "pod5UID", + Namespace: "testNs", + OwnerReferences: []v12.OwnerReference{ + { + Kind: "Deployment", + Name: "deployment2", + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "pod4", + UID: "pod4UID", + Namespace: "testNs", + OwnerReferences: []v12.OwnerReference{ + { + Kind: "Deployment", + Name: "deployment2", + }, + }, + }, + }, + }, + spd: []*v1alpha1.ServiceProfileDescriptor{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "deployment1", + Namespace: "testNs", + }, + Status: v1alpha1.ServiceProfileDescriptorStatus{ + AggMetrics: []v1alpha1.AggPodMetrics{ + { + Scope: spdPortraitScope, + Items: rangeItems(4, 8*1024*1024*1024), + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "deployment2", + Namespace: "testNs", + }, + Status: v1alpha1.ServiceProfileDescriptorStatus{ + AggMetrics: []v1alpha1.AggPodMetrics{ + { + Scope: spdPortraitScope, + Items: fixedItems(4, 8*1024*1024*1024), + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "deployment3", + Namespace: "testNs", + }, + Status: v1alpha1.ServiceProfileDescriptorStatus{ + AggMetrics: []v1alpha1.AggPodMetrics{ + { + Scope: spdPortraitScope, + Items: fixedItems(8, 16*1024*1024*1024), + }, + }, + }, + }, + }, + npd: []*v1alpha12.NodeProfileDescriptor{ + { + ObjectMeta: v12.ObjectMeta{ + Name: "node1", + }, + Status: v1alpha12.NodeProfileDescriptorStatus{ + NodeMetrics: []v1alpha12.ScopedNodeMetrics{ + { + Scope: loadAwareMetricScope, + Metrics: []v1alpha12.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("9088m"), + Window: &v12.Duration{Duration: 15 * time.Minute}, + Timestamp: v12.Time{Time: time.Now()}, + }, + { + MetricName: "memory", + Value: resource.MustParse("9035916Ki"), + Window: &v12.Duration{Duration: 15 * time.Minute}, + Timestamp: v12.Time{Time: time.Now()}, + }, + { + MetricName: "cpu", + Value: resource.MustParse("10090m"), + Window: &v12.Duration{Duration: time.Hour}, + Timestamp: v12.Time{Time: time.Now()}, + }, + { + MetricName: "memory", + Value: resource.MustParse("9035916Ki"), + Window: &v12.Duration{Duration: time.Hour}, + Timestamp: v12.Time{Time: time.Now()}, + }, + { + MetricName: "cpu", + Value: resource.MustParse("12088m"), + Window: &v12.Duration{Duration: 24 * time.Hour}, + Timestamp: v12.Time{Time: time.Now()}, + }, + { + MetricName: "memory", + Value: resource.MustParse("9035916Ki"), + Window: &v12.Duration{Duration: 24 * time.Hour}, + Timestamp: v12.Time{Time: time.Now()}, + }, + }, + }, + }, + }, + }, + { + ObjectMeta: v12.ObjectMeta{ + Name: "node2", + }, + Status: v1alpha12.NodeProfileDescriptorStatus{ + NodeMetrics: []v1alpha12.ScopedNodeMetrics{ + { + Scope: loadAwareMetricScope, + Metrics: []v1alpha12.MetricValue{ + { + MetricName: "cpu", + Value: resource.MustParse("1088m"), + Window: &v12.Duration{Duration: 15 * time.Minute}, + Timestamp: v12.Time{Time: time.Now()}, + }, + { + MetricName: "memory", + Value: resource.MustParse("5035916Ki"), + Window: &v12.Duration{Duration: 15 * time.Minute}, + Timestamp: v12.Time{Time: time.Now()}, + }, + { + MetricName: "cpu", + Value: resource.MustParse("1090m"), + Window: &v12.Duration{Duration: time.Hour}, + Timestamp: v12.Time{Time: time.Now()}, + }, + { + MetricName: "memory", + Value: resource.MustParse("5035916Ki"), + Window: &v12.Duration{Duration: time.Hour}, + Timestamp: v12.Time{Time: time.Now()}, + }, + { + MetricName: "cpu", + Value: resource.MustParse("1088m"), + Window: &v12.Duration{Duration: 24 * time.Hour}, + Timestamp: v12.Time{Time: time.Now()}, + }, + { + MetricName: "memory", + Value: resource.MustParse("5035916Ki"), + Window: &v12.Duration{Duration: 24 * time.Hour}, + Timestamp: v12.Time{Time: time.Now()}, + }, + }, + }, + }, + }, + }, + }, + enablePortrait: false, + }, + } { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + lowNodeInfo := framework.NewNodeInfo() + lowNodeInfo.SetNode(tc.lowNode) + for _, pod := range tc.lowNodePods { + lowNodeInfo.AddPod(pod) + } + + highNodeInfo := framework.NewNodeInfo() + highNodeInfo.SetNode(tc.highNode) + for _, pod := range tc.highNodePods { + highNodeInfo.AddPod(pod) + } + + fw, err := runtime.NewFramework(nil, nil, + runtime.WithSnapshotSharedLister(newTestSharedLister(nil, []*v1.Node{tc.lowNode, tc.highNode}))) + assert.NoError(t, err) + + controlCtx, err := katalyst_base.GenerateFakeGenericContext() + assert.NoError(t, err) + + p := &Plugin{ + handle: fw, + args: makeTestArgs(), + spdLister: controlCtx.InternalInformerFactory.Workload().V1alpha1().ServiceProfileDescriptors().Lister(), + npdLister: controlCtx.InternalInformerFactory.Node().V1alpha1().NodeProfileDescriptors().Lister(), + spdHasSynced: controlCtx.InternalInformerFactory.Workload().V1alpha1().ServiceProfileDescriptors().Informer().HasSynced, + cache: &Cache{ + NodePodInfo: map[string]*NodeCache{}, + }, + } + *p.args.EnablePortrait = tc.enablePortrait + p.cache.SetSPDLister(p) + + for _, pr := range tc.spd { + _, err = controlCtx.Client.InternalClient.WorkloadV1alpha1().ServiceProfileDescriptors(pr.Namespace). + Create(context.TODO(), pr, v12.CreateOptions{}) + assert.NoError(t, err) + } + for _, n := range tc.npd { + _, err = controlCtx.Client.InternalClient.NodeV1alpha1().NodeProfileDescriptors(). + Create(context.TODO(), n, v12.CreateOptions{}) + assert.NoError(t, err) + } + + controlCtx.StartInformer(context.TODO()) + + // wait for portrait synced + if !cache2.WaitForCacheSync(context.TODO().Done(), p.spdHasSynced) { + t.Error("wait for portrait informer synced fail") + t.FailNow() + } + + // add pod to cache + for _, pod := range tc.lowNodePods { + p.cache.addPod(tc.lowNode.Name, pod, time.Now()) + } + for _, pod := range tc.highNodePods { + p.cache.addPod(tc.highNode.Name, pod, time.Now()) + } + + lowScore, stat := p.Score(context.TODO(), nil, tc.pod, tc.lowNode.Name) + assert.Nil(t, stat) + assert.NotZero(t, lowScore) + + highScore, stat := p.Score(context.TODO(), nil, tc.pod, tc.highNode.Name) + assert.Nil(t, stat) + assert.NotZero(t, highScore) + + assert.Greater(t, highScore, lowScore) + }) + } +}