@@ -94,21 +94,66 @@ func (r *RayCluster) validateWorkerGroups() *field.Error {
9494}
9595
9696func (r * RayCluster ) ValidateRayClusterSpec () * field.Error {
97- if r . Annotations [ RayFTEnabledAnnotationKey ] == "false" && r . Spec .GcsFaultToleranceOptions != nil {
97+ if len ( r . Spec . HeadGroupSpec . Template . Spec .Containers ) == 0 {
9898 return field .Invalid (
99- field .NewPath ("spec" ).Child ("gcsFaultToleranceOptions " ),
100- r .Spec .GcsFaultToleranceOptions ,
101- fmt . Sprintf ( "GcsFaultToleranceOptions should be nil when %s annotation is set to false" , RayFTEnabledAnnotationKey ) ,
99+ field .NewPath ("spec" ).Child ("headGroupSpec" ). Child ( "template" ). Child ( "spec" ). Child ( "containers " ),
100+ r .Spec .HeadGroupSpec . Template . Spec . Containers ,
101+ "headGroupSpec should have at least one container" ,
102102 )
103103 }
104- if r .Annotations [RayFTEnabledAnnotationKey ] != "true" && len (r .Spec .HeadGroupSpec .Template .Spec .Containers ) > 0 {
104+
105+ for _ , workerGroup := range r .Spec .WorkerGroupSpecs {
106+ if len (workerGroup .Template .Spec .Containers ) == 0 {
107+ return field .Invalid (
108+ field .NewPath ("spec" ).Child ("workerGroupSpecs" ),
109+ r .Spec .WorkerGroupSpecs ,
110+ "workerGroupSpec should have at least one container" ,
111+ )
112+ }
113+ }
114+
115+ if r .Annotations [RayFTEnabledAnnotationKey ] != "" && r .Spec .GcsFaultToleranceOptions != nil {
116+ return field .Invalid (
117+ field .NewPath ("metadata" ).Child ("annotations" ).Child (RayFTEnabledAnnotationKey ),
118+ r .Annotations [RayFTEnabledAnnotationKey ],
119+ fmt .Sprintf ("%s annotation and GcsFaultToleranceOptions are both set. " +
120+ "Please use only GcsFaultToleranceOptions to configure GCS fault tolerance" , RayFTEnabledAnnotationKey ),
121+ )
122+ }
123+
124+ if ! IsGCSFaultToleranceEnabled (* r ) {
105125 if EnvVarExists (RAY_REDIS_ADDRESS , r .Spec .HeadGroupSpec .Template .Spec .Containers [RayContainerIndex ].Env ) {
106126 return field .Invalid (
107- field .NewPath ("spec" ).Child ("headGroupSpec" ).Child ("template" ).Child ("spec" ).Child ("containers" ).Index (0 ).Child ("env" ),
108- RAY_REDIS_ADDRESS ,
109- fmt .Sprintf ("%s should not be set when %s is disabled" , RAY_REDIS_ADDRESS , RayFTEnabledAnnotationKey ),
127+ field .NewPath ("spec" ).Child ("headGroupSpec" ).Child ("template" ).Child ("spec" ).Child ("containers" ).Index (RayContainerIndex ).Child ("env" ),
128+ r .Spec .HeadGroupSpec .Template .Spec .Containers [RayContainerIndex ].Env ,
129+ fmt .Sprintf ("%s is set which implicitly enables GCS fault tolerance, " +
130+ "but GcsFaultToleranceOptions is not set. Please set GcsFaultToleranceOptions " +
131+ "to enable GCS fault tolerance" , RAY_REDIS_ADDRESS ),
110132 )
111133 }
112134 }
135+
136+ if r .Spec .GcsFaultToleranceOptions != nil {
137+ if redisPassword := r .Spec .HeadGroupSpec .RayStartParams ["redis-password" ]; redisPassword != "" {
138+ return field .Invalid (
139+ field .NewPath ("spec" ).Child ("headGroupSpec" ).Child ("rayStartParams" ),
140+ r .Spec .HeadGroupSpec .RayStartParams ,
141+ "cannot set `redis-password` in rayStartParams when GcsFaultToleranceOptions is enabled - use GcsFaultToleranceOptions.RedisPassword instead" ,
142+ )
143+ }
144+
145+ headContainer := r .Spec .HeadGroupSpec .Template .Spec .Containers [RayContainerIndex ]
146+ if EnvVarExists (REDIS_PASSWORD , headContainer .Env ) {
147+ return field .Invalid (
148+ field .NewPath ("spec" ).Child ("headGroupSpec" ).Child ("template" ).Child ("spec" ).Child ("containers" ).Index (RayContainerIndex ).Child ("env" ),
149+ headContainer .Env ,
150+ "cannot set `REDIS_PASSWORD` env var in head Pod when GcsFaultToleranceOptions is enabled - use GcsFaultToleranceOptions.RedisPassword instead" ,
151+ )
152+ }
153+ }
154+
155+ // TODO (kevin85421): If GcsFaultToleranceOptions is set, users should use `GcsFaultToleranceOptions.RedisAddress` instead of `RAY_REDIS_ADDRESS`.
156+ // TODO (kevin85421): If GcsFaultToleranceOptions is set, users should use `GcsFaultToleranceOptions.ExternalStorageNamespace` instead of
157+ // the annotation `ray.io/external-storage-namespace`.
113158 return nil
114159}
0 commit comments