kubernetes · ttsuuubasa · Feb 3, 2025
diff --git a/cluster-autoscaler/cloudprovider/clusterapi/README.md b/cluster-autoscaler/cloudprovider/clusterapi/README.md
@@ -223,15 +223,21 @@ metadata:
     capacity.cluster-autoscaler.kubernetes.io/memory: "128G"
     capacity.cluster-autoscaler.kubernetes.io/cpu: "16"
     capacity.cluster-autoscaler.kubernetes.io/ephemeral-disk: "100Gi"
+    capacity.cluster-autoscaler.kubernetes.io/maxPods: "200"
+    // Device Plugin
     capacity.cluster-autoscaler.kubernetes.io/gpu-type: "nvidia.com/gpu"
+    // Dynamic Resource Allocation (DRA)
+    capacity.cluster-autoscaler.kubernetes.io/dra-driver: "gpu.nvidia.com"
+    // Common in Device Plugin and DRA
     capacity.cluster-autoscaler.kubernetes.io/gpu-count: "2"
-    capacity.cluster-autoscaler.kubernetes.io/maxPods: "200"
 ```
 
 *Note* the `maxPods` annotation will default to `110` if it is not supplied.
 This value is inspired by the Kubernetes best practices
 [Considerations for large clusters](https://kubernetes.io/docs/setup/best-practices/cluster-large/).
 
+*Note* User should select the annotation for GPU either `gpu-type` or `dra-driver` depends on whether using Device Plugin or Dynamic Resource Allocation(DRA). `gpu-count` is a common parameter in both.
+
 #### RBAC changes for scaling from zero
 
 If you are using the opt-in support for scaling from zero as defined by the

diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go
@@ -283,7 +283,12 @@ func (ng *nodegroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
 		return nil, err
 	}
 
-	nodeInfo := framework.NewNodeInfo(&node, nil, &framework.PodInfo{Pod: cloudprovider.BuildKubeProxy(ng.scalableResource.Name())})
+	resourceSlices, err := ng.scalableResource.InstanceResourceSlices(nodeName)
+	if err != nil {
+		return nil, err
+	}
+
+	nodeInfo := framework.NewNodeInfo(&node, resourceSlices, &framework.PodInfo{Pod: cloudprovider.BuildKubeProxy(ng.scalableResource.Name())})
 	return nodeInfo, nil
 }
 

diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go
@@ -1309,12 +1309,19 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
 		nodeGroupMaxSizeAnnotationKey: "10",
 	}
 
+	type testResourceSlice struct {
+		driverName string
+		gpuCount   int
+		deviceType string
+	}
+
 	type testCaseConfig struct {
-		nodeLabels         map[string]string
-		includeNodes       bool
-		expectedErr        error
-		expectedCapacity   map[corev1.ResourceName]int64
-		expectedNodeLabels map[string]string
+		nodeLabels            map[string]string
+		includeNodes          bool
+		expectedErr           error
+		expectedCapacity      map[corev1.ResourceName]int64
+		expectedNodeLabels    map[string]string
+		expectedResourceSlice testResourceSlice
 	}
 
 	testCases := []struct {
@@ -1407,6 +1414,33 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
 				},
 			},
 		},
+		{
+			name: "When the NodeGroup can scale from zero and DRA is enabled, it creates ResourceSlice derived from the annotation of DRA driver name and GPU count",
+			nodeGroupAnnotations: map[string]string{
+				memoryKey:    "2048Mi",
+				cpuKey:       "2",
+				draDriverKey: "gpu.nvidia.com",
+				gpuCountKey:  "2",
+			},
+			config: testCaseConfig{
+				expectedErr: nil,
+				expectedCapacity: map[corev1.ResourceName]int64{
+					corev1.ResourceCPU:    2,
+					corev1.ResourceMemory: 2048 * 1024 * 1024,
+					corev1.ResourcePods:   110,
+				},
+				expectedResourceSlice: testResourceSlice{
+					driverName: "gpu.nvidia.com",
+					gpuCount:   2,
+					deviceType: GpuDeviceType,
+				},
+				expectedNodeLabels: map[string]string{
+					"kubernetes.io/os":       "linux",
+					"kubernetes.io/arch":     "amd64",
+					"kubernetes.io/hostname": "random value",
+				},
+			},
+		},
 	}
 
 	test := func(t *testing.T, testConfig *testConfig, config testCaseConfig) {
@@ -1470,6 +1504,18 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
 				}
 			}
 		}
+		for _, resourceslice := range nodeInfo.LocalResourceSlices {
+			if resourceslice.Spec.Driver != config.expectedResourceSlice.driverName {
+				t.Errorf("Expected DRA driver in ResourceSlice to have: %s, but got: %s", config.expectedResourceSlice.driverName, resourceslice.Spec.Driver)
+			} else if len(resourceslice.Spec.Devices) != config.expectedResourceSlice.gpuCount {
+				t.Errorf("Expected the number of DRA devices in ResourceSlice to have: %d, but got: %d", config.expectedResourceSlice.gpuCount, len(resourceslice.Spec.Devices))
+			}
+			for _, device := range resourceslice.Spec.Devices {
+				if *device.Basic.Attributes["type"].StringValue != config.expectedResourceSlice.deviceType {
+					t.Errorf("Expected device type to have: %s, but got: %s", config.expectedResourceSlice.deviceType, *device.Basic.Attributes["type"].StringValue)
+				}
+			}
+		}
 	}
 
 	for _, tc := range testCases {

diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go
@@ -20,18 +20,21 @@ import (
 	"context"
 	"fmt"
 	"path"
+	"strconv"
 	"strings"
 	"time"
 
 	"github.com/pkg/errors"
 	apiv1 "k8s.io/api/core/v1"
 	corev1 "k8s.io/api/core/v1"
+	resourceapi "k8s.io/api/resource/v1beta1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/runtime/schema"
 	"k8s.io/apimachinery/pkg/util/validation"
 	klog "k8s.io/klog/v2"
+	"k8s.io/utils/ptr"
 )
 
 type unstructuredScalableResource struct {
@@ -297,6 +300,46 @@ func (r unstructuredScalableResource) InstanceCapacity() (map[corev1.ResourceNam
 	return capacity, nil
 }
 
+func (r unstructuredScalableResource) InstanceResourceSlices(nodeName string) ([]*resourceapi.ResourceSlice, error) {
+	driver := r.InstanceDRADriver()
+	gpuCount, err := r.InstanceGPUCapacityAnnotation()
+	if err != nil {
+		return nil, err
+	}
+
+	var result []*resourceapi.ResourceSlice
+	if driver != "" && !gpuCount.IsZero() {
+		resourceslice := &resourceapi.ResourceSlice{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: nodeName + "-" + driver,
+			},
+			Spec: resourceapi.ResourceSliceSpec{
+				Driver:   driver,
+				NodeName: nodeName,
+				Pool: resourceapi.ResourcePool{
+					Name: nodeName,
+				},
+			},
+		}
+		for i := 0; i < int(gpuCount.Value()); i++ {
+			device := resourceapi.Device{
+				Name: "gpu-" + strconv.Itoa(i),
+				Basic: &resourceapi.BasicDevice{
+					Attributes: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{
+						"type": {
+							StringValue: ptr.To(GpuDeviceType),
+						},
+					},
+				},
+			}
+			resourceslice.Spec.Devices = append(resourceslice.Spec.Devices, device)
+		}
+		result = append(result, resourceslice)
+		return result, nil
+	}
+	return nil, nil
+}
+
 func (r unstructuredScalableResource) InstanceEphemeralDiskCapacityAnnotation() (resource.Quantity, error) {
 	return parseEphemeralDiskCapacity(r.unstructured.GetAnnotations())
 }
@@ -321,6 +364,10 @@ func (r unstructuredScalableResource) InstanceMaxPodsCapacityAnnotation() (resou
 	return parseMaxPodsCapacity(r.unstructured.GetAnnotations())
 }
 
+func (r unstructuredScalableResource) InstanceDRADriver() string {
+	return parseDRADriver(r.unstructured.GetAnnotations())
+}
+
 func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*unstructured.Unstructured, error) {
 	infraref, found, err := unstructured.NestedStringMap(r.unstructured.Object, "spec", "template", "spec", "infrastructureRef")
 	if !found || err != nil {

diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go
@@ -24,10 +24,12 @@ import (
 
 	"github.com/stretchr/testify/assert"
 	v1 "k8s.io/api/core/v1"
+	resourceapi "k8s.io/api/resource/v1beta1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/client-go/tools/cache"
+	"k8s.io/utils/ptr"
 )
 
 const (
@@ -297,6 +299,32 @@ func TestAnnotations(t *testing.T) {
 	gpuQuantity := resource.MustParse("1")
 	maxPodsQuantity := resource.MustParse("42")
 	expectedTaints := []v1.Taint{{Key: "key1", Effect: v1.TaintEffectNoSchedule, Value: "value1"}, {Key: "key2", Effect: v1.TaintEffectNoExecute, Value: "value2"}}
+	testNodeName := "test-node"
+	draDriver := "test-driver"
+	expectedResourceSlice := &resourceapi.ResourceSlice{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: testNodeName + "-" + draDriver,
+		},
+		Spec: resourceapi.ResourceSliceSpec{
+			Driver:   draDriver,
+			NodeName: testNodeName,
+			Pool: resourceapi.ResourcePool{
+				Name: testNodeName,
+			},
+			Devices: []resourceapi.Device{
+				{
+					Name: "gpu-0",
+					Basic: &resourceapi.BasicDevice{
+						Attributes: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{
+							"type": {
+								StringValue: ptr.To(GpuDeviceType),
+							},
+						},
+					},
+				},
+			},
+		},
+	}
 	annotations := map[string]string{
 		cpuKey:          cpuQuantity.String(),
 		memoryKey:       memQuantity.String(),
@@ -305,6 +333,7 @@ func TestAnnotations(t *testing.T) {
 		maxPodsKey:      maxPodsQuantity.String(),
 		taintsKey:       "key1=value1:NoSchedule,key2=value2:NoExecute",
 		labelsKey:       "key3=value3,key4=value4,key5=value5",
+		draDriverKey:    draDriver,
 	}
 
 	test := func(t *testing.T, testConfig *testConfig, testResource *unstructured.Unstructured) {
@@ -346,6 +375,14 @@ func TestAnnotations(t *testing.T) {
 			t.Errorf("expected %v, got %v", maxPodsQuantity, maxPods)
 		}
 
+		if resourceSlices, err := sr.InstanceResourceSlices(testNodeName); err != nil {
+			t.Fatal(err)
+		} else {
+			for _, resourceslice := range resourceSlices {
+				assert.Equal(t, expectedResourceSlice, resourceslice)
+			}
+		}
+
 		taints := sr.Taints()
 		assert.Equal(t, expectedTaints, taints)
 

diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go
@@ -40,6 +40,7 @@ const (
 	maxPodsKey      = "capacity.cluster-autoscaler.kubernetes.io/maxPods"
 	taintsKey       = "capacity.cluster-autoscaler.kubernetes.io/taints"
 	labelsKey       = "capacity.cluster-autoscaler.kubernetes.io/labels"
+	draDriverKey    = "capacity.cluster-autoscaler.kubernetes.io/dra-driver"
 	// UnknownArch is used if the Architecture is Unknown
 	UnknownArch SystemArchitecture = ""
 	// Amd64 is used if the Architecture is x86_64
@@ -54,6 +55,8 @@ const (
 	DefaultArch = Amd64
 	// scaleUpFromZeroDefaultEnvVar is the name of the env var for the default architecture
 	scaleUpFromZeroDefaultArchEnvVar = "CAPI_SCALE_ZERO_DEFAULT_ARCH"
+	// GpuDeviceType is used if DRA device is GPU
+	GpuDeviceType = "gpu"
 )
 
 var (
@@ -282,6 +285,13 @@ func parseMaxPodsCapacity(annotations map[string]string) (resource.Quantity, err
 	return parseIntKey(annotations, maxPodsKey)
 }
 
+func parseDRADriver(annotations map[string]string) string {
+	if val, found := annotations[draDriverKey]; found {
+		return val
+	}
+	return ""
+}
+
 func clusterNameFromResource(r *unstructured.Unstructured) string {
 	// Use Spec.ClusterName if defined (only available on v1alpha3+ types)
 	clusterName, found, err := unstructured.NestedString(r.Object, "spec", "clusterName")