Skip to content

Commit

Permalink
cluster-api: node template in scale-from-0-nodes scenario with DRA
Browse files Browse the repository at this point in the history
Modify TemplateNodeInfo() to return the template of ResourceSlice.
This is to address the DRA expansion of Cluster Autoscaler, allowing users to set the number of GPUs and DRA driver name by specifying
the annotation to NodeGroup provided by cluster-api.

Signed-off-by: Tsubasa Watanabe <[email protected]>
  • Loading branch information
ttsuuubasa committed Feb 12, 2025
1 parent 08e7250 commit 3fbacf0
Show file tree
Hide file tree
Showing 6 changed files with 158 additions and 7 deletions.
8 changes: 7 additions & 1 deletion cluster-autoscaler/cloudprovider/clusterapi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -223,15 +223,21 @@ metadata:
capacity.cluster-autoscaler.kubernetes.io/memory: "128G"
capacity.cluster-autoscaler.kubernetes.io/cpu: "16"
capacity.cluster-autoscaler.kubernetes.io/ephemeral-disk: "100Gi"
capacity.cluster-autoscaler.kubernetes.io/maxPods: "200"
// Device Plugin
capacity.cluster-autoscaler.kubernetes.io/gpu-type: "nvidia.com/gpu"
// Dynamic Resource Allocation (DRA)
capacity.cluster-autoscaler.kubernetes.io/dra-driver: "gpu.nvidia.com"
// Common in Device Plugin and DRA
capacity.cluster-autoscaler.kubernetes.io/gpu-count: "2"
capacity.cluster-autoscaler.kubernetes.io/maxPods: "200"
```
*Note* the `maxPods` annotation will default to `110` if it is not supplied.
This value is inspired by the Kubernetes best practices
[Considerations for large clusters](https://kubernetes.io/docs/setup/best-practices/cluster-large/).

*Note* User should select the annotation for GPU either `gpu-type` or `dra-driver` depends on whether using Device Plugin or Dynamic Resource Allocation(DRA). `gpu-count` is a common parameter in both.

#### RBAC changes for scaling from zero

If you are using the opt-in support for scaling from zero as defined by the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,12 @@ func (ng *nodegroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
return nil, err
}

nodeInfo := framework.NewNodeInfo(&node, nil, &framework.PodInfo{Pod: cloudprovider.BuildKubeProxy(ng.scalableResource.Name())})
resourceSlices, err := ng.scalableResource.InstanceResourceSlices(nodeName)
if err != nil {
return nil, err
}

nodeInfo := framework.NewNodeInfo(&node, resourceSlices, &framework.PodInfo{Pod: cloudprovider.BuildKubeProxy(ng.scalableResource.Name())})
return nodeInfo, nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1309,12 +1309,19 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
nodeGroupMaxSizeAnnotationKey: "10",
}

type testResourceSlice struct {
driverName string
gpuCount int
deviceType string
}

type testCaseConfig struct {
nodeLabels map[string]string
includeNodes bool
expectedErr error
expectedCapacity map[corev1.ResourceName]int64
expectedNodeLabels map[string]string
nodeLabels map[string]string
includeNodes bool
expectedErr error
expectedCapacity map[corev1.ResourceName]int64
expectedNodeLabels map[string]string
expectedResourceSlice testResourceSlice
}

testCases := []struct {
Expand Down Expand Up @@ -1407,6 +1414,33 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
},
},
},
{
name: "When the NodeGroup can scale from zero and DRA is enabled, it creates ResourceSlice derived from the annotation of DRA driver name and GPU count",
nodeGroupAnnotations: map[string]string{
memoryKey: "2048Mi",
cpuKey: "2",
draDriverKey: "gpu.nvidia.com",
gpuCountKey: "2",
},
config: testCaseConfig{
expectedErr: nil,
expectedCapacity: map[corev1.ResourceName]int64{
corev1.ResourceCPU: 2,
corev1.ResourceMemory: 2048 * 1024 * 1024,
corev1.ResourcePods: 110,
},
expectedResourceSlice: testResourceSlice{
driverName: "gpu.nvidia.com",
gpuCount: 2,
deviceType: GpuDeviceType,
},
expectedNodeLabels: map[string]string{
"kubernetes.io/os": "linux",
"kubernetes.io/arch": "amd64",
"kubernetes.io/hostname": "random value",
},
},
},
}

test := func(t *testing.T, testConfig *testConfig, config testCaseConfig) {
Expand Down Expand Up @@ -1470,6 +1504,18 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
}
}
}
for _, resourceslice := range nodeInfo.LocalResourceSlices {
if resourceslice.Spec.Driver != config.expectedResourceSlice.driverName {
t.Errorf("Expected DRA driver in ResourceSlice to have: %s, but got: %s", config.expectedResourceSlice.driverName, resourceslice.Spec.Driver)
} else if len(resourceslice.Spec.Devices) != config.expectedResourceSlice.gpuCount {
t.Errorf("Expected the number of DRA devices in ResourceSlice to have: %d, but got: %d", config.expectedResourceSlice.gpuCount, len(resourceslice.Spec.Devices))
}
for _, device := range resourceslice.Spec.Devices {
if *device.Basic.Attributes["type"].StringValue != config.expectedResourceSlice.deviceType {
t.Errorf("Expected device type to have: %s, but got: %s", config.expectedResourceSlice.deviceType, *device.Basic.Attributes["type"].StringValue)
}
}
}
}

for _, tc := range testCases {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,21 @@ import (
"context"
"fmt"
"path"
"strconv"
"strings"
"time"

"github.com/pkg/errors"
apiv1 "k8s.io/api/core/v1"
corev1 "k8s.io/api/core/v1"
resourceapi "k8s.io/api/resource/v1beta1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/validation"
klog "k8s.io/klog/v2"
"k8s.io/utils/ptr"
)

type unstructuredScalableResource struct {
Expand Down Expand Up @@ -297,6 +300,46 @@ func (r unstructuredScalableResource) InstanceCapacity() (map[corev1.ResourceNam
return capacity, nil
}

func (r unstructuredScalableResource) InstanceResourceSlices(nodeName string) ([]*resourceapi.ResourceSlice, error) {
driver := r.InstanceDRADriver()
gpuCount, err := r.InstanceGPUCapacityAnnotation()
if err != nil {
return nil, err
}

var result []*resourceapi.ResourceSlice
if driver != "" && !gpuCount.IsZero() {
resourceslice := &resourceapi.ResourceSlice{
ObjectMeta: metav1.ObjectMeta{
Name: nodeName + "-" + driver,
},
Spec: resourceapi.ResourceSliceSpec{
Driver: driver,
NodeName: nodeName,
Pool: resourceapi.ResourcePool{
Name: nodeName,
},
},
}
for i := 0; i < int(gpuCount.Value()); i++ {
device := resourceapi.Device{
Name: "gpu-" + strconv.Itoa(i),
Basic: &resourceapi.BasicDevice{
Attributes: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{
"type": {
StringValue: ptr.To(GpuDeviceType),
},
},
},
}
resourceslice.Spec.Devices = append(resourceslice.Spec.Devices, device)
}
result = append(result, resourceslice)
return result, nil
}
return nil, nil
}

func (r unstructuredScalableResource) InstanceEphemeralDiskCapacityAnnotation() (resource.Quantity, error) {
return parseEphemeralDiskCapacity(r.unstructured.GetAnnotations())
}
Expand All @@ -321,6 +364,10 @@ func (r unstructuredScalableResource) InstanceMaxPodsCapacityAnnotation() (resou
return parseMaxPodsCapacity(r.unstructured.GetAnnotations())
}

func (r unstructuredScalableResource) InstanceDRADriver() string {
return parseDRADriver(r.unstructured.GetAnnotations())
}

func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*unstructured.Unstructured, error) {
infraref, found, err := unstructured.NestedStringMap(r.unstructured.Object, "spec", "template", "spec", "infrastructureRef")
if !found || err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@ import (

"github.com/stretchr/testify/assert"
v1 "k8s.io/api/core/v1"
resourceapi "k8s.io/api/resource/v1beta1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/client-go/tools/cache"
"k8s.io/utils/ptr"
)

const (
Expand Down Expand Up @@ -297,6 +299,32 @@ func TestAnnotations(t *testing.T) {
gpuQuantity := resource.MustParse("1")
maxPodsQuantity := resource.MustParse("42")
expectedTaints := []v1.Taint{{Key: "key1", Effect: v1.TaintEffectNoSchedule, Value: "value1"}, {Key: "key2", Effect: v1.TaintEffectNoExecute, Value: "value2"}}
testNodeName := "test-node"
draDriver := "test-driver"
expectedResourceSlice := &resourceapi.ResourceSlice{
ObjectMeta: metav1.ObjectMeta{
Name: testNodeName + "-" + draDriver,
},
Spec: resourceapi.ResourceSliceSpec{
Driver: draDriver,
NodeName: testNodeName,
Pool: resourceapi.ResourcePool{
Name: testNodeName,
},
Devices: []resourceapi.Device{
{
Name: "gpu-0",
Basic: &resourceapi.BasicDevice{
Attributes: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{
"type": {
StringValue: ptr.To(GpuDeviceType),
},
},
},
},
},
},
}
annotations := map[string]string{
cpuKey: cpuQuantity.String(),
memoryKey: memQuantity.String(),
Expand All @@ -305,6 +333,7 @@ func TestAnnotations(t *testing.T) {
maxPodsKey: maxPodsQuantity.String(),
taintsKey: "key1=value1:NoSchedule,key2=value2:NoExecute",
labelsKey: "key3=value3,key4=value4,key5=value5",
draDriverKey: draDriver,
}

test := func(t *testing.T, testConfig *testConfig, testResource *unstructured.Unstructured) {
Expand Down Expand Up @@ -346,6 +375,14 @@ func TestAnnotations(t *testing.T) {
t.Errorf("expected %v, got %v", maxPodsQuantity, maxPods)
}

if resourceSlices, err := sr.InstanceResourceSlices(testNodeName); err != nil {
t.Fatal(err)
} else {
for _, resourceslice := range resourceSlices {
assert.Equal(t, expectedResourceSlice, resourceslice)
}
}

taints := sr.Taints()
assert.Equal(t, expectedTaints, taints)

Expand Down
10 changes: 10 additions & 0 deletions cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ const (
maxPodsKey = "capacity.cluster-autoscaler.kubernetes.io/maxPods"
taintsKey = "capacity.cluster-autoscaler.kubernetes.io/taints"
labelsKey = "capacity.cluster-autoscaler.kubernetes.io/labels"
draDriverKey = "capacity.cluster-autoscaler.kubernetes.io/dra-driver"
// UnknownArch is used if the Architecture is Unknown
UnknownArch SystemArchitecture = ""
// Amd64 is used if the Architecture is x86_64
Expand All @@ -54,6 +55,8 @@ const (
DefaultArch = Amd64
// scaleUpFromZeroDefaultEnvVar is the name of the env var for the default architecture
scaleUpFromZeroDefaultArchEnvVar = "CAPI_SCALE_ZERO_DEFAULT_ARCH"
// GpuDeviceType is used if DRA device is GPU
GpuDeviceType = "gpu"
)

var (
Expand Down Expand Up @@ -282,6 +285,13 @@ func parseMaxPodsCapacity(annotations map[string]string) (resource.Quantity, err
return parseIntKey(annotations, maxPodsKey)
}

func parseDRADriver(annotations map[string]string) string {
if val, found := annotations[draDriverKey]; found {
return val
}
return ""
}

func clusterNameFromResource(r *unstructured.Unstructured) string {
// Use Spec.ClusterName if defined (only available on v1alpha3+ types)
clusterName, found, err := unstructured.NestedString(r.Object, "spec", "clusterName")
Expand Down

0 comments on commit 3fbacf0

Please sign in to comment.