Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,11 @@ package schedulerinterface
import (
"context"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"

rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
)

// BatchScheduler manages submitting RayCluster pods to a third-party scheduler.
Expand All @@ -23,11 +20,6 @@ type BatchScheduler interface {
// For most batch schedulers, this results in the creation of a PodGroup.
DoBatchSchedulingOnSubmission(ctx context.Context, object metav1.Object) error

// AddMetadataToPod enriches the pod with metadata necessary to tie it to the scheduler.
// For example, setting labels for queues / priority, and setting schedulerName.
// This function will be removed once Rayjob Volcano scheduler integration is completed.
AddMetadataToPod(ctx context.Context, rayCluster *rayv1.RayCluster, groupName string, pod *corev1.Pod)

// AddMetadataToChildResource enriches the child resource (batchv1.Job, rayv1.RayCluster) with metadata necessary to tie it to the scheduler.
// For example, setting labels for queues / priority, and setting schedulerName.
AddMetadataToChildResource(ctx context.Context, parent metav1.Object, child metav1.Object, groupName string)
Expand Down Expand Up @@ -63,9 +55,6 @@ func (d *DefaultBatchScheduler) DoBatchSchedulingOnSubmission(_ context.Context,
return nil
}

func (d *DefaultBatchScheduler) AddMetadataToPod(_ context.Context, _ *rayv1.RayCluster, _ string, _ *corev1.Pod) {
}

func (d *DefaultBatchScheduler) AddMetadataToChildResource(_ context.Context, _ metav1.Object, _ metav1.Object, _ string) {
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"

rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
schedulerinterface "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/interface"
)

Expand All @@ -38,23 +37,33 @@ func (k *KaiScheduler) DoBatchSchedulingOnSubmission(_ context.Context, _ metav1
return nil
}

func (k *KaiScheduler) AddMetadataToPod(ctx context.Context, app *rayv1.RayCluster, _ string, pod *corev1.Pod) {
func (k *KaiScheduler) AddMetadataToChildResource(ctx context.Context, parent metav1.Object, child metav1.Object, _ string) {
logger := ctrl.LoggerFrom(ctx).WithName("kai-scheduler")
pod.Spec.SchedulerName = k.Name()
addSchedulerNameToObject(child, k.Name())

queue, ok := app.Labels[QueueLabelName]
parentLabel := parent.GetLabels()
queue, ok := parentLabel[QueueLabelName]
if !ok || queue == "" {
logger.Info("Queue label missing from RayCluster; pods will remain pending",
logger.Info("Queue label missing from parent; child will remain pending",
"requiredLabel", QueueLabelName)
return
}
if pod.Labels == nil {
pod.Labels = make(map[string]string)

childLabels := child.GetLabels()
if childLabels == nil {
childLabels = make(map[string]string)
}
pod.Labels[QueueLabelName] = queue
childLabels[QueueLabelName] = queue
child.SetLabels(childLabels)
}

func (k *KaiScheduler) AddMetadataToChildResource(_ context.Context, _ metav1.Object, _ metav1.Object, _ string) {
func addSchedulerNameToObject(obj metav1.Object, schedulerName string) {
switch obj := obj.(type) {
case *corev1.Pod:
obj.Spec.SchedulerName = schedulerName
case *corev1.PodTemplateSpec:
obj.Spec.SchedulerName = schedulerName
}
}

func (kf *KaiSchedulerFactory) New(_ context.Context, _ *rest.Config, _ client.Client) (schedulerinterface.BatchScheduler, error) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func createTestPod() *corev1.Pod {
}
}

func TestAddMetadataToPod_WithQueueLabel(t *testing.T) {
func TestAddMetadataToChildResource_WithQueueLabel(t *testing.T) {
a := assert.New(t)
scheduler := &KaiScheduler{}
ctx := context.Background()
Expand All @@ -52,8 +52,8 @@ func TestAddMetadataToPod_WithQueueLabel(t *testing.T) {
})
pod := createTestPod()

// Call AddMetadataToPod
scheduler.AddMetadataToPod(ctx, rayCluster, "test-group", pod)
// Call AddMetadataToChildResource
scheduler.AddMetadataToChildResource(ctx, rayCluster, pod, "test-group")

// Assert scheduler name is set to kai-scheduler
a.Equal("kai-scheduler", pod.Spec.SchedulerName)
Expand All @@ -63,7 +63,7 @@ func TestAddMetadataToPod_WithQueueLabel(t *testing.T) {
a.Equal("test-queue", pod.Labels[QueueLabelName])
}

func TestAddMetadataToPod_WithoutQueueLabel(t *testing.T) {
func TestAddMetadataToChildResource_WithoutQueueLabel(t *testing.T) {
a := assert.New(t)
scheduler := &KaiScheduler{}
ctx := context.Background()
Expand All @@ -72,8 +72,8 @@ func TestAddMetadataToPod_WithoutQueueLabel(t *testing.T) {
rayCluster := createTestRayCluster(map[string]string{})
pod := createTestPod()

// Call AddMetadataToPod
scheduler.AddMetadataToPod(ctx, rayCluster, "test-group", pod)
// Call AddMetadataToChildResource
scheduler.AddMetadataToChildResource(ctx, rayCluster, pod, "test-group")

// Assert scheduler name is still set (always required)
a.Equal("kai-scheduler", pod.Spec.SchedulerName)
Expand All @@ -85,7 +85,7 @@ func TestAddMetadataToPod_WithoutQueueLabel(t *testing.T) {
}
}

func TestAddMetadataToPod_WithEmptyQueueLabel(t *testing.T) {
func TestAddMetadataToChildResource_WithEmptyQueueLabel(t *testing.T) {
a := assert.New(t)
scheduler := &KaiScheduler{}
ctx := context.Background()
Expand All @@ -96,8 +96,8 @@ func TestAddMetadataToPod_WithEmptyQueueLabel(t *testing.T) {
})
pod := createTestPod()

// Call AddMetadataToPod
scheduler.AddMetadataToPod(ctx, rayCluster, "test-group", pod)
// Call AddMetadataToChildResource
scheduler.AddMetadataToChildResource(ctx, rayCluster, pod, "test-group")

// Assert scheduler name is still set
a.Equal("kai-scheduler", pod.Spec.SchedulerName)
Expand All @@ -109,7 +109,7 @@ func TestAddMetadataToPod_WithEmptyQueueLabel(t *testing.T) {
}
}

func TestAddMetadataToPod_PreservesExistingPodLabels(t *testing.T) {
func TestAddMetadataToChildResource_PreservesExistingPodLabels(t *testing.T) {
a := assert.New(t)
scheduler := &KaiScheduler{}
ctx := context.Background()
Expand All @@ -126,8 +126,8 @@ func TestAddMetadataToPod_PreservesExistingPodLabels(t *testing.T) {
"app": "ray",
}

// Call AddMetadataToPod
scheduler.AddMetadataToPod(ctx, rayCluster, "test-group", pod)
// Call AddMetadataToChildResource
scheduler.AddMetadataToChildResource(ctx, rayCluster, pod, "test-group")

// Assert scheduler name is set
a.Equal("kai-scheduler", pod.Spec.SchedulerName)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,21 +93,32 @@ func (k *KubeScheduler) DoBatchSchedulingOnSubmission(ctx context.Context, objec
return nil
}

// AddMetadataToPod adds essential labels and annotations to the Ray pod
// AddMetadataToChildResource adds essential labels and annotations to the child resource.
// the scheduler needs these labels and annotations in order to do the scheduling properly
func (k *KubeScheduler) AddMetadataToPod(_ context.Context, rayCluster *rayv1.RayCluster, _ string, pod *corev1.Pod) {
// when gang scheduling is enabled, extra labels need to be added to all pods
if k.isGangSchedulingEnabled(rayCluster) {
pod.Labels[kubeSchedulerPodGroupLabelKey] = rayCluster.Name
func (k *KubeScheduler) AddMetadataToChildResource(_ context.Context, parent metav1.Object, child metav1.Object, _ string) {
// when gang scheduling is enabled, extra labels need to be added to all child resources
if k.isGangSchedulingEnabled(parent) {
labels := child.GetLabels()
if labels == nil {
labels = make(map[string]string)
}
labels[kubeSchedulerPodGroupLabelKey] = parent.GetName()
child.SetLabels(labels)
}
pod.Spec.SchedulerName = k.Name()
addSchedulerNameToObject(child, k.Name())
}

func (k *KubeScheduler) AddMetadataToChildResource(_ context.Context, _ metav1.Object, _ metav1.Object, _ string) {
func addSchedulerNameToObject(obj metav1.Object, schedulerName string) {
switch obj := obj.(type) {
case *corev1.Pod:
obj.Spec.SchedulerName = schedulerName
case *corev1.PodTemplateSpec:
obj.Spec.SchedulerName = schedulerName
}
}

func (k *KubeScheduler) isGangSchedulingEnabled(app *rayv1.RayCluster) bool {
_, exist := app.Labels[utils.RayGangSchedulingEnabled]
func (k *KubeScheduler) isGangSchedulingEnabled(obj metav1.Object) bool {
_, exist := obj.GetLabels()[utils.RayGangSchedulingEnabled]
return exist
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ func TestCreatePodGroupWithMultipleHosts(t *testing.T) {
a.Equal(int32(5), podGroup.Spec.MinMember)
}

func TestAddMetadataToPod(t *testing.T) {
func TestAddMetadataToChildResource(t *testing.T) {
tests := []struct {
name string
enableGang bool
Expand Down Expand Up @@ -150,7 +150,7 @@ func TestAddMetadataToPod(t *testing.T) {
}

scheduler := &KubeScheduler{}
scheduler.AddMetadataToPod(context.TODO(), &cluster, "worker", pod)
scheduler.AddMetadataToChildResource(context.TODO(), &cluster, pod, "worker")

if tt.enableGang {
a.Equal(cluster.Name, pod.Labels[kubeSchedulerPodGroupLabelKey])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,19 +228,6 @@ func (v *VolcanoBatchScheduler) AddMetadataToChildResource(_ context.Context, pa
addSchedulerName(child, v.Name())
}

// This function will be removed in interface migration PR
func (v *VolcanoBatchScheduler) AddMetadataToPod(_ context.Context, app *rayv1.RayCluster, groupName string, pod *corev1.Pod) {
pod.Annotations[volcanoschedulingv1beta1.KubeGroupNameAnnotationKey] = getAppPodGroupName(app)
pod.Annotations[volcanobatchv1alpha1.TaskSpecKey] = groupName
if queue, ok := app.ObjectMeta.Labels[QueueNameLabelKey]; ok {
pod.Labels[QueueNameLabelKey] = queue
}
if priorityClassName, ok := app.ObjectMeta.Labels[utils.RayPriorityClassName]; ok {
pod.Spec.PriorityClassName = priorityClassName
}
pod.Spec.SchedulerName = v.Name()
}

func (vf *VolcanoBatchSchedulerFactory) New(_ context.Context, _ *rest.Config, cli client.Client) (schedulerinterface.BatchScheduler, error) {
if err := volcanoschedulingv1beta1.AddToScheme(cli.Scheme()); err != nil {
return nil, fmt.Errorf("failed to add volcano to scheme with error %w", err)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,32 +122,6 @@ func (y *YuniKornScheduler) isGangSchedulingEnabled(obj metav1.Object) bool {
return exist
}

// AddMetadataToPod adds essential labels and annotations to the Ray pod
// the yunikorn scheduler needs these labels and annotations in order to do the scheduling properly
func (y *YuniKornScheduler) AddMetadataToPod(ctx context.Context, rayCluster *rayv1.RayCluster, groupName string, pod *corev1.Pod) {
logger := ctrl.LoggerFrom(ctx).WithName(SchedulerName)
// the applicationID and queue name must be provided in the labels
populateLabelsFromObject(rayCluster, pod, RayApplicationIDLabelName, YuniKornPodApplicationIDLabelName)
populateLabelsFromObject(rayCluster, pod, RayApplicationQueueLabelName, YuniKornPodQueueLabelName)
addSchedulerNameToObject(pod, y.Name())

// when gang scheduling is enabled, extra annotations need to be added to all pods
if y.isGangSchedulingEnabled(rayCluster) {
// populate the taskGroups info to each pod
err := propagateTaskGroupsAnnotation(rayCluster, pod)
if err != nil {
logger.Error(err, "failed to add gang scheduling related annotations to pod, "+
"gang scheduling will not be enabled for this workload",
"name", pod.Name, "namespace", pod.Namespace)
return
}

// set the task group name based on the head or worker group name
// the group name for the head and each of the worker group should be different
pod.Annotations[YuniKornTaskGroupNameAnnotationName] = groupName
}
}

func (y *YuniKornScheduler) AddMetadataToChildResource(ctx context.Context, parent metav1.Object, child metav1.Object, groupName string) {
logger := ctrl.LoggerFrom(ctx).WithName(SchedulerName)

Expand Down
4 changes: 2 additions & 2 deletions ray-operator/controllers/ray/raycluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -928,7 +928,7 @@ func (r *RayClusterReconciler) createHeadPod(ctx context.Context, instance rayv1
// call the scheduler plugin if so
if r.options.BatchSchedulerManager != nil {
if scheduler, err := r.options.BatchSchedulerManager.GetScheduler(); err == nil {
scheduler.AddMetadataToPod(ctx, &instance, utils.RayNodeHeadGroupLabelValue, &pod)
scheduler.AddMetadataToChildResource(ctx, &instance, &pod, utils.RayNodeHeadGroupLabelValue)
} else {
return err
}
Expand All @@ -951,7 +951,7 @@ func (r *RayClusterReconciler) createWorkerPod(ctx context.Context, instance ray
pod := r.buildWorkerPod(ctx, instance, worker)
if r.options.BatchSchedulerManager != nil {
if scheduler, err := r.options.BatchSchedulerManager.GetScheduler(); err == nil {
scheduler.AddMetadataToPod(ctx, &instance, worker.GroupName, &pod)
scheduler.AddMetadataToChildResource(ctx, &instance, &pod, worker.GroupName)
} else {
return err
}
Expand Down
Loading