diff --git a/ray-operator/apis/ray/v1/raycluster_types.go b/ray-operator/apis/ray/v1/raycluster_types.go index a317a3b479f..79b8db3b88e 100644 --- a/ray-operator/apis/ray/v1/raycluster_types.go +++ b/ray-operator/apis/ray/v1/raycluster_types.go @@ -121,6 +121,8 @@ type RayClusterStatus struct { // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster // Important: Run "make" to regenerate code after modifying this file // Status reflects the status of the cluster + // + // Deprecated: the State field is replaced by the Conditions field. State ClusterState `json:"state,omitempty"` // DesiredCPU indicates total desired CPUs for the cluster DesiredCPU resource.Quantity `json:"desiredCPU,omitempty"` diff --git a/ray-operator/apis/ray/v1alpha1/raycluster_types.go b/ray-operator/apis/ray/v1alpha1/raycluster_types.go index 1d848e5ebf1..3018540c865 100644 --- a/ray-operator/apis/ray/v1alpha1/raycluster_types.go +++ b/ray-operator/apis/ray/v1alpha1/raycluster_types.go @@ -133,6 +133,8 @@ type RayClusterStatus struct { // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster // Important: Run "make" to regenerate code after modifying this file // Status reflects the status of the cluster + // + // Deprecated: the State field is replaced by the Conditions field. State ClusterState `json:"state,omitempty"` // Reason provides more information about current State Reason string `json:"reason,omitempty"` diff --git a/ray-operator/controllers/ray/raycluster_controller.go b/ray-operator/controllers/ray/raycluster_controller.go index 05dc7a8fa28..c6df3b62624 100644 --- a/ray-operator/controllers/ray/raycluster_controller.go +++ b/ray-operator/controllers/ray/raycluster_controller.go @@ -364,10 +364,11 @@ func (r *RayClusterReconciler) rayClusterReconcile(ctx context.Context, request // this field should be used to determine whether to update this CR or not. func (r *RayClusterReconciler) inconsistentRayClusterStatus(ctx context.Context, oldStatus rayv1.RayClusterStatus, newStatus rayv1.RayClusterStatus) bool { logger := ctrl.LoggerFrom(ctx) - if oldStatus.State != newStatus.State || oldStatus.Reason != newStatus.Reason { + + if oldStatus.State != newStatus.State || oldStatus.Reason != newStatus.Reason { //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 logger.Info("inconsistentRayClusterStatus", "detect inconsistency", fmt.Sprintf( "old State: %s, new State: %s, old Reason: %s, new Reason: %s", - oldStatus.State, newStatus.State, oldStatus.Reason, newStatus.Reason)) + oldStatus.State, newStatus.State, oldStatus.Reason, newStatus.Reason)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 return true } if oldStatus.ReadyWorkerReplicas != newStatus.ReadyWorkerReplicas || @@ -1199,7 +1200,7 @@ func (r *RayClusterReconciler) calculateStatus(ctx context.Context, instance *ra newInstance.Status.DesiredTPU = totalResources[corev1.ResourceName("google.com/tpu")] if utils.CheckAllPodsRunning(ctx, runtimePods) { - newInstance.Status.State = rayv1.Ready + newInstance.Status.State = rayv1.Ready //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 } // Check if the head node is running and ready by checking the head pod's status. @@ -1244,7 +1245,7 @@ func (r *RayClusterReconciler) calculateStatus(ctx context.Context, instance *ra } if newInstance.Spec.Suspend != nil && *newInstance.Spec.Suspend && len(runtimePods.Items) == 0 { - newInstance.Status.State = rayv1.Suspended + newInstance.Status.State = rayv1.Suspended //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 } if err := r.updateEndpoints(ctx, newInstance); err != nil { @@ -1258,11 +1259,11 @@ func (r *RayClusterReconciler) calculateStatus(ctx context.Context, instance *ra timeNow := metav1.Now() newInstance.Status.LastUpdateTime = &timeNow - if instance.Status.State != newInstance.Status.State { + if instance.Status.State != newInstance.Status.State { //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 if newInstance.Status.StateTransitionTimes == nil { newInstance.Status.StateTransitionTimes = make(map[rayv1.ClusterState]*metav1.Time) } - newInstance.Status.StateTransitionTimes[newInstance.Status.State] = &timeNow + newInstance.Status.StateTransitionTimes[newInstance.Status.State] = &timeNow //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 } return newInstance, nil diff --git a/ray-operator/controllers/ray/raycluster_controller_unit_test.go b/ray-operator/controllers/ray/raycluster_controller_unit_test.go index dce70c38171..df3105ad7a8 100644 --- a/ray-operator/controllers/ray/raycluster_controller_unit_test.go +++ b/ray-operator/controllers/ray/raycluster_controller_unit_test.go @@ -1552,7 +1552,7 @@ func TestReconcile_UpdateClusterState(t *testing.T) { cluster := rayv1.RayCluster{} err := fakeClient.Get(ctx, namespacedName, &cluster) assert.Nil(t, err, "Fail to get RayCluster") - assert.Empty(t, cluster.Status.State, "Cluster state should be empty") + assert.Empty(t, cluster.Status.State, "Cluster state should be empty") //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 testRayClusterReconciler := &RayClusterReconciler{ Client: fakeClient, @@ -1562,13 +1562,13 @@ func TestReconcile_UpdateClusterState(t *testing.T) { state := rayv1.Ready newTestRayCluster := testRayCluster.DeepCopy() - newTestRayCluster.Status.State = state + newTestRayCluster.Status.State = state //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 err = testRayClusterReconciler.updateRayClusterStatus(ctx, testRayCluster, newTestRayCluster) assert.Nil(t, err, "Fail to update cluster state") err = fakeClient.Get(ctx, namespacedName, &cluster) assert.Nil(t, err, "Fail to get RayCluster after updating state") - assert.Equal(t, cluster.Status.State, state, "Cluster state should be updated") + assert.Equal(t, cluster.Status.State, state, "Cluster state should be updated") //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 } func TestInconsistentRayClusterStatus(t *testing.T) { @@ -1612,7 +1612,7 @@ func TestInconsistentRayClusterStatus(t *testing.T) { // Case 1: `State` is different => return true newStatus := oldStatus.DeepCopy() - newStatus.State = rayv1.Suspended + newStatus.State = rayv1.Suspended //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 assert.True(t, r.inconsistentRayClusterStatus(ctx, oldStatus, *newStatus)) // Case 2: `Reason` is different => return true @@ -1905,7 +1905,7 @@ func TestStateTransitionTimes_NoStateChange(t *testing.T) { } preUpdateTime := metav1.Now() - testRayCluster.Status.State = rayv1.Ready + testRayCluster.Status.State = rayv1.Ready //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 testRayCluster.Status.StateTransitionTimes = map[rayv1.ClusterState]*metav1.Time{rayv1.Ready: &preUpdateTime} newInstance, err := r.calculateStatus(ctx, testRayCluster, nil) assert.Nil(t, err) diff --git a/ray-operator/controllers/ray/rayjob_controller.go b/ray-operator/controllers/ray/rayjob_controller.go index ecf770ea5a8..a921c341890 100644 --- a/ray-operator/controllers/ray/rayjob_controller.go +++ b/ray-operator/controllers/ray/rayjob_controller.go @@ -165,8 +165,8 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request) // Check the current status of RayCluster before submitting. if clientURL := rayJobInstance.Status.DashboardURL; clientURL == "" { - if rayClusterInstance.Status.State != rayv1.Ready { - logger.Info("Wait for the RayCluster.Status.State to be ready before submitting the job.", "RayCluster", rayClusterInstance.Name, "State", rayClusterInstance.Status.State) + if rayClusterInstance.Status.State != rayv1.Ready { //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 + logger.Info("Wait for the RayCluster.Status.State to be ready before submitting the job.", "RayCluster", rayClusterInstance.Name, "State", rayClusterInstance.Status.State) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 return ctrl.Result{RequeueAfter: RayJobDefaultRequeueDuration}, err } diff --git a/ray-operator/controllers/ray/rayjob_controller_test.go b/ray-operator/controllers/ray/rayjob_controller_test.go index e6f44a5bc80..d866be957d2 100644 --- a/ray-operator/controllers/ray/rayjob_controller_test.go +++ b/ray-operator/controllers/ray/rayjob_controller_test.go @@ -239,7 +239,7 @@ var _ = Context("RayJob in K8sJobMode", func() { It("Make RayCluster.Status.State to be rayv1.Ready", func() { // The RayCluster is not 'Ready' yet because Pods are not running and ready. - Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) + Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) @@ -364,7 +364,7 @@ var _ = Context("RayJob in K8sJobMode", func() { It("Make RayCluster.Status.State to be rayv1.Ready", func() { // The RayCluster is not 'Ready' yet because Pods are not running and ready. - Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) + Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) @@ -526,7 +526,7 @@ var _ = Context("RayJob in K8sJobMode", func() { It("Make RayCluster.Status.State to be rayv1.Ready", func() { // The RayCluster is not 'Ready' yet because Pods are not running and ready. - Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) + Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) @@ -619,7 +619,7 @@ var _ = Context("RayJob in K8sJobMode", func() { It("Make RayCluster.Status.State to be rayv1.Ready (attempt 2)", func() { // The RayCluster is not 'Ready' yet because Pods are not running and ready. - Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) + Expect(rayCluster.Status.State).NotTo(Equal(rayv1.Ready)) //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 updateHeadPodToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) updateWorkerPodsToRunningAndReady(ctx, rayJob.Status.RayClusterName, namespace) diff --git a/ray-operator/controllers/ray/suite_helpers_test.go b/ray-operator/controllers/ray/suite_helpers_test.go index f4175d4a314..54ceb56be50 100644 --- a/ray-operator/controllers/ray/suite_helpers_test.go +++ b/ray-operator/controllers/ray/suite_helpers_test.go @@ -46,7 +46,7 @@ func getClusterState(ctx context.Context, namespace string, clusterName string) if err := k8sClient.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, &cluster); err != nil { log.Fatal(err) } - return cluster.Status.State + return cluster.Status.State //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 } } diff --git a/ray-operator/test/support/ray.go b/ray-operator/test/support/ray.go index dc58dd0c767..61081053e21 100644 --- a/ray-operator/test/support/ray.go +++ b/ray-operator/test/support/ray.go @@ -75,7 +75,7 @@ func GetRayCluster(t Test, namespace, name string) *rayv1.RayCluster { } func RayClusterState(cluster *rayv1.RayCluster) rayv1.ClusterState { - return cluster.Status.State + return cluster.Status.State //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288 } func RayClusterDesiredWorkerReplicas(cluster *rayv1.RayCluster) int32 {