Skip to content

Commit

Permalink
[Feature][RayCluster]: Deprecate the RayCluster .Status.State field (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
rueian authored Sep 3, 2024
1 parent 2ac9c44 commit 5231dbf
Show file tree
Hide file tree
Showing 8 changed files with 24 additions and 19 deletions.
2 changes: 2 additions & 0 deletions ray-operator/apis/ray/v1/raycluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ type RayClusterStatus struct {
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
// Important: Run "make" to regenerate code after modifying this file
// Status reflects the status of the cluster
//
// Deprecated: the State field is replaced by the Conditions field.
State ClusterState `json:"state,omitempty"`
// DesiredCPU indicates total desired CPUs for the cluster
DesiredCPU resource.Quantity `json:"desiredCPU,omitempty"`
Expand Down
2 changes: 2 additions & 0 deletions ray-operator/apis/ray/v1alpha1/raycluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ type RayClusterStatus struct {
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
// Important: Run "make" to regenerate code after modifying this file
// Status reflects the status of the cluster
//
// Deprecated: the State field is replaced by the Conditions field.
State ClusterState `json:"state,omitempty"`
// Reason provides more information about current State
Reason string `json:"reason,omitempty"`
Expand Down
13 changes: 7 additions & 6 deletions ray-operator/controllers/ray/raycluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -364,10 +364,11 @@ func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, request
// this field should be used to determine whether to update this CR or not.
func (r *RayClusterReconciler) inconsistentRayClusterStatus(ctx context.Context, oldStatus rayv1.RayClusterStatus, newStatus rayv1.RayClusterStatus) bool {
logger := ctrl.LoggerFrom(ctx)
if oldStatus.State != newStatus.State || oldStatus.Reason != newStatus.Reason {

if oldStatus.State != newStatus.State || oldStatus.Reason != newStatus.Reason { //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288
logger.Info("inconsistentRayClusterStatus", "detect inconsistency", fmt.Sprintf(
"old State: %s, new State: %s, old Reason: %s, new Reason: %s",
oldStatus.State, newStatus.State, oldStatus.Reason, newStatus.Reason))
oldStatus.State, newStatus.State, oldStatus.Reason, newStatus.Reason)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288
return true
}
if oldStatus.ReadyWorkerReplicas != newStatus.ReadyWorkerReplicas ||
Expand Down Expand Up @@ -1199,7 +1200,7 @@ func (r *RayClusterReconciler) calculateStatus(ctx context.Context, instance *ra
newInstance.Status.DesiredTPU = totalResources[corev1.ResourceName("google.com/tpu")]

if utils.CheckAllPodsRunning(ctx, runtimePods) {
newInstance.Status.State = rayv1.Ready
newInstance.Status.State = rayv1.Ready //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288
}

// Check if the head node is running and ready by checking the head pod's status.
Expand Down Expand Up @@ -1244,7 +1245,7 @@ func (r *RayClusterReconciler) calculateStatus(ctx context.Context, instance *ra
}

if newInstance.Spec.Suspend != nil && *newInstance.Spec.Suspend && len(runtimePods.Items) == 0 {
newInstance.Status.State = rayv1.Suspended
newInstance.Status.State = rayv1.Suspended //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288
}

if err := r.updateEndpoints(ctx, newInstance); err != nil {
Expand All @@ -1258,11 +1259,11 @@ func (r *RayClusterReconciler) calculateStatus(ctx context.Context, instance *ra
timeNow := metav1.Now()
newInstance.Status.LastUpdateTime = &timeNow

if instance.Status.State != newInstance.Status.State {
if instance.Status.State != newInstance.Status.State { //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288
if newInstance.Status.StateTransitionTimes == nil {
newInstance.Status.StateTransitionTimes = make(map[rayv1.ClusterState]*metav1.Time)
}
newInstance.Status.StateTransitionTimes[newInstance.Status.State] = &timeNow
newInstance.Status.StateTransitionTimes[newInstance.Status.State] = &timeNow //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288
}

return newInstance, nil
Expand Down
10 changes: 5 additions & 5 deletions ray-operator/controllers/ray/raycluster_controller_unit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1552,7 +1552,7 @@ func TestReconcile_UpdateClusterState(t *testing.T) {
cluster := rayv1.RayCluster{}
err := fakeClient.Get(ctx, namespacedName, &cluster)
assert.Nil(t, err, "Fail to get RayCluster")
assert.Empty(t, cluster.Status.State, "Cluster state should be empty")
assert.Empty(t, cluster.Status.State, "Cluster state should be empty") //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288

testRayClusterReconciler := &RayClusterReconciler{
Client: fakeClient,
Expand All @@ -1562,13 +1562,13 @@ func TestReconcile_UpdateClusterState(t *testing.T) {

state := rayv1.Ready
newTestRayCluster := testRayCluster.DeepCopy()
newTestRayCluster.Status.State = state
newTestRayCluster.Status.State = state //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288
err = testRayClusterReconciler.updateRayClusterStatus(ctx, testRayCluster, newTestRayCluster)
assert.Nil(t, err, "Fail to update cluster state")

err = fakeClient.Get(ctx, namespacedName, &cluster)
assert.Nil(t, err, "Fail to get RayCluster after updating state")
assert.Equal(t, cluster.Status.State, state, "Cluster state should be updated")
assert.Equal(t, cluster.Status.State, state, "Cluster state should be updated") //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288
}

func TestInconsistentRayClusterStatus(t *testing.T) {
Expand Down Expand Up @@ -1612,7 +1612,7 @@ func TestInconsistentRayClusterStatus(t *testing.T) {

// Case 1: `State` is different => return true
newStatus := oldStatus.DeepCopy()
newStatus.State = rayv1.Suspended
newStatus.State = rayv1.Suspended //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288
assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus))

// Case 2: `Reason` is different => return true
Expand Down Expand Up @@ -1905,7 +1905,7 @@ func TestStateTransitionTimes_NoStateChange(t *testing.T) {
}

preUpdateTime := metav1.Now()
testRayCluster.Status.State = rayv1.Ready
testRayCluster.Status.State = rayv1.Ready //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288
testRayCluster.Status.StateTransitionTimes = map[rayv1.ClusterState]*metav1.Time{rayv1.Ready: &preUpdateTime}
newInstance, err := r.calculateStatus(ctx, testRayCluster, nil)
assert.Nil(t, err)
Expand Down
4 changes: 2 additions & 2 deletions ray-operator/controllers/ray/rayjob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request)

// Check the current status of RayCluster before submitting.
if clientURL := rayJobInstance.Status.DashboardURL; clientURL == "" {
if rayClusterInstance.Status.State != rayv1.Ready {
logger.Info("Wait for the RayCluster.Status.State to be ready before submitting the job.", "RayCluster", rayClusterInstance.Name, "State", rayClusterInstance.Status.State)
if rayClusterInstance.Status.State != rayv1.Ready { //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288
logger.Info("Wait for the RayCluster.Status.State to be ready before submitting the job.", "RayCluster", rayClusterInstance.Name, "State", rayClusterInstance.Status.State) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288
return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err
}

Expand Down
8 changes: 4 additions & 4 deletions ray-operator/controllers/ray/rayjob_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ var _ = Context("RayJob in K8sJobMode", func() {

It("Make RayCluster.Status.State to be rayv1.Ready", func() {
// The RayCluster is not 'Ready' yet because Pods are not running and ready.
Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready))
Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288

updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace)
updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace)
Expand Down Expand Up @@ -364,7 +364,7 @@ var _ = Context("RayJob in K8sJobMode", func() {

It("Make RayCluster.Status.State to be rayv1.Ready", func() {
// The RayCluster is not 'Ready' yet because Pods are not running and ready.
Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready))
Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288

updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace)
updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace)
Expand Down Expand Up @@ -526,7 +526,7 @@ var _ = Context("RayJob in K8sJobMode", func() {

It("Make RayCluster.Status.State to be rayv1.Ready", func() {
// The RayCluster is not 'Ready' yet because Pods are not running and ready.
Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready))
Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288

updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace)
updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace)
Expand Down Expand Up @@ -619,7 +619,7 @@ var _ = Context("RayJob in K8sJobMode", func() {

It("Make RayCluster.Status.State to be rayv1.Ready (attempt 2)", func() {
// The RayCluster is not 'Ready' yet because Pods are not running and ready.
Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready))
Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288

updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace)
updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace)
Expand Down
2 changes: 1 addition & 1 deletion ray-operator/controllers/ray/suite_helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func getClusterState(ctx context.Context, namespace string, clusterName string)
if err := k8sClient.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, &cluster); err != nil {
log.Fatal(err)
}
return cluster.Status.State
return cluster.Status.State //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288
}
}

Expand Down
2 changes: 1 addition & 1 deletion ray-operator/test/support/ray.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func GetRayCluster(t Test, namespace, name string) *rayv1.RayCluster {
}

func RayClusterState(cluster *rayv1.RayCluster) rayv1.ClusterState {
return cluster.Status.State
return cluster.Status.State //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288
}

func RayClusterDesiredWorkerReplicas(cluster *rayv1.RayCluster) int32 {
Expand Down

0 comments on commit 5231dbf

Please sign in to comment.