@@ -305,7 +305,7 @@ func TestBuildPod(t *testing.T) {
305
305
// Test head pod
306
306
podName := strings .ToLower (cluster .Name + utils .DashSymbol + string (rayv1 .HeadNode ) + utils .DashSymbol + utils .FormatInt32 (0 ))
307
307
podTemplateSpec := DefaultHeadPodTemplate (ctx , * cluster , cluster .Spec .HeadGroupSpec , podName , "6379" )
308
- pod := BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .HeadGroupSpec .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), "" )
308
+ pod := BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .GcsFaultToleranceOptions , cluster . Spec . HeadGroupSpec .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), "" )
309
309
310
310
// Check environment variables
311
311
rayContainer := pod .Spec .Containers [utils .RayContainerIndex ]
@@ -360,7 +360,7 @@ func TestBuildPod(t *testing.T) {
360
360
podName = cluster .Name + utils .DashSymbol + string (rayv1 .WorkerNode ) + utils .DashSymbol + worker .GroupName + utils .DashSymbol + utils .FormatInt32 (0 )
361
361
fqdnRayIP := utils .GenerateFQDNServiceName (ctx , * cluster , cluster .Namespace )
362
362
podTemplateSpec = DefaultWorkerPodTemplate (ctx , * cluster , worker , podName , fqdnRayIP , "6379" )
363
- pod = BuildPod (ctx , podTemplateSpec , rayv1 .WorkerNode , worker .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), fqdnRayIP )
363
+ pod = BuildPod (ctx , podTemplateSpec , rayv1 .WorkerNode , cluster . Spec . GcsFaultToleranceOptions , worker .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), fqdnRayIP )
364
364
365
365
// Check resources
366
366
rayContainer = pod .Spec .Containers [utils .RayContainerIndex ]
@@ -423,7 +423,7 @@ func TestBuildPod_WithNoCPULimits(t *testing.T) {
423
423
// Test head pod
424
424
podName := strings .ToLower (cluster .Name + utils .DashSymbol + string (rayv1 .HeadNode ) + utils .DashSymbol + utils .FormatInt32 (0 ))
425
425
podTemplateSpec := DefaultHeadPodTemplate (ctx , * cluster , cluster .Spec .HeadGroupSpec , podName , "6379" )
426
- pod := BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .HeadGroupSpec .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), "" )
426
+ pod := BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .GcsFaultToleranceOptions , cluster . Spec . HeadGroupSpec .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), "" )
427
427
expectedCommandArg := splitAndSort ("ulimit -n 65536; ray start --head --block --dashboard-agent-listen-port=52365 --memory=1073741824 --num-cpus=2 --metrics-export-port=8080 --dashboard-host=0.0.0.0" )
428
428
actualCommandArg := splitAndSort (pod .Spec .Containers [0 ].Args [0 ])
429
429
if ! reflect .DeepEqual (expectedCommandArg , actualCommandArg ) {
@@ -435,7 +435,7 @@ func TestBuildPod_WithNoCPULimits(t *testing.T) {
435
435
podName = cluster .Name + utils .DashSymbol + string (rayv1 .WorkerNode ) + utils .DashSymbol + worker .GroupName + utils .DashSymbol + utils .FormatInt32 (0 )
436
436
fqdnRayIP := utils .GenerateFQDNServiceName (ctx , * cluster , cluster .Namespace )
437
437
podTemplateSpec = DefaultWorkerPodTemplate (ctx , * cluster , worker , podName , fqdnRayIP , "6379" )
438
- pod = BuildPod (ctx , podTemplateSpec , rayv1 .WorkerNode , worker .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), fqdnRayIP )
438
+ pod = BuildPod (ctx , podTemplateSpec , rayv1 .WorkerNode , cluster . Spec . GcsFaultToleranceOptions , worker .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), fqdnRayIP )
439
439
expectedCommandArg = splitAndSort ("ulimit -n 65536; ray start --block --dashboard-agent-listen-port=52365 --memory=1073741824 --num-cpus=2 --num-gpus=3 --address=raycluster-sample-head-svc.default.svc.cluster.local:6379 --port=6379 --metrics-export-port=8080" )
440
440
actualCommandArg = splitAndSort (pod .Spec .Containers [0 ].Args [0 ])
441
441
if ! reflect .DeepEqual (expectedCommandArg , actualCommandArg ) {
@@ -459,7 +459,7 @@ func TestBuildPod_WithOverwriteCommand(t *testing.T) {
459
459
460
460
podName := strings .ToLower (cluster .Name + utils .DashSymbol + string (rayv1 .HeadNode ) + utils .DashSymbol + utils .FormatInt32 (0 ))
461
461
podTemplateSpec := DefaultHeadPodTemplate (ctx , * cluster , cluster .Spec .HeadGroupSpec , podName , "6379" )
462
- headPod := BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .HeadGroupSpec .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), "" )
462
+ headPod := BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .GcsFaultToleranceOptions , cluster . Spec . HeadGroupSpec .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), "" )
463
463
headContainer := headPod .Spec .Containers [utils .RayContainerIndex ]
464
464
assert .Equal (t , headContainer .Command , []string {"I am head" })
465
465
assert .Equal (t , headContainer .Args , []string {"I am head again" })
@@ -468,7 +468,7 @@ func TestBuildPod_WithOverwriteCommand(t *testing.T) {
468
468
podName = cluster .Name + utils .DashSymbol + string (rayv1 .WorkerNode ) + utils .DashSymbol + worker .GroupName + utils .DashSymbol + utils .FormatInt32 (0 )
469
469
fqdnRayIP := utils .GenerateFQDNServiceName (ctx , * cluster , cluster .Namespace )
470
470
podTemplateSpec = DefaultWorkerPodTemplate (ctx , * cluster , worker , podName , fqdnRayIP , "6379" )
471
- workerPod := BuildPod (ctx , podTemplateSpec , rayv1 .WorkerNode , worker .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), fqdnRayIP )
471
+ workerPod := BuildPod (ctx , podTemplateSpec , rayv1 .WorkerNode , cluster . Spec . GcsFaultToleranceOptions , worker .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), fqdnRayIP )
472
472
workerContainer := workerPod .Spec .Containers [utils .RayContainerIndex ]
473
473
assert .Equal (t , workerContainer .Command , []string {"I am worker" })
474
474
assert .Equal (t , workerContainer .Args , []string {"I am worker again" })
@@ -480,7 +480,7 @@ func TestBuildPod_WithAutoscalerEnabled(t *testing.T) {
480
480
cluster .Spec .EnableInTreeAutoscaling = & trueFlag
481
481
podName := strings .ToLower (cluster .Name + utils .DashSymbol + string (rayv1 .HeadNode ) + utils .DashSymbol + utils .FormatInt32 (0 ))
482
482
podTemplateSpec := DefaultHeadPodTemplate (ctx , * cluster , cluster .Spec .HeadGroupSpec , podName , "6379" )
483
- pod := BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .HeadGroupSpec .RayStartParams , "6379" , & trueFlag , utils .GetCRDType ("" ), "" )
483
+ pod := BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .GcsFaultToleranceOptions , cluster . Spec . HeadGroupSpec .RayStartParams , "6379" , & trueFlag , utils .GetCRDType ("" ), "" )
484
484
485
485
actualResult := pod .Labels [utils .RayClusterLabelKey ]
486
486
expectedResult := cluster .Name
@@ -537,7 +537,7 @@ func TestBuildPod_WithCreatedByRayService(t *testing.T) {
537
537
cluster .Spec .EnableInTreeAutoscaling = & trueFlag
538
538
podName := strings .ToLower (cluster .Name + utils .DashSymbol + string (rayv1 .HeadNode ) + utils .DashSymbol + utils .FormatInt32 (0 ))
539
539
podTemplateSpec := DefaultHeadPodTemplate (ctx , * cluster , cluster .Spec .HeadGroupSpec , podName , "6379" )
540
- pod := BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .HeadGroupSpec .RayStartParams , "6379" , & trueFlag , utils .RayServiceCRD , "" )
540
+ pod := BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .GcsFaultToleranceOptions , cluster . Spec . HeadGroupSpec .RayStartParams , "6379" , & trueFlag , utils .RayServiceCRD , "" )
541
541
542
542
val , ok := pod .Labels [utils .RayClusterServingServiceLabelKey ]
543
543
assert .True (t , ok , "Expected serve label is not present" )
@@ -548,7 +548,7 @@ func TestBuildPod_WithCreatedByRayService(t *testing.T) {
548
548
podName = cluster .Name + utils .DashSymbol + string (rayv1 .WorkerNode ) + utils .DashSymbol + worker .GroupName + utils .DashSymbol + utils .FormatInt32 (0 )
549
549
fqdnRayIP := utils .GenerateFQDNServiceName (ctx , * cluster , cluster .Namespace )
550
550
podTemplateSpec = DefaultWorkerPodTemplate (ctx , * cluster , worker , podName , fqdnRayIP , "6379" )
551
- pod = BuildPod (ctx , podTemplateSpec , rayv1 .WorkerNode , worker .RayStartParams , "6379" , nil , utils .RayServiceCRD , fqdnRayIP )
551
+ pod = BuildPod (ctx , podTemplateSpec , rayv1 .WorkerNode , cluster . Spec . GcsFaultToleranceOptions , worker .RayStartParams , "6379" , nil , utils .RayServiceCRD , fqdnRayIP )
552
552
553
553
val , ok = pod .Labels [utils .RayClusterServingServiceLabelKey ]
554
554
assert .True (t , ok , "Expected serve label is not present" )
@@ -567,7 +567,7 @@ func TestBuildPod_WithGcsFtEnabled(t *testing.T) {
567
567
// Build a head Pod.
568
568
podName := strings .ToLower (cluster .Name + utils .DashSymbol + string (rayv1 .HeadNode ) + utils .DashSymbol + utils .FormatInt32 (0 ))
569
569
podTemplateSpec := DefaultHeadPodTemplate (ctx , * cluster , cluster .Spec .HeadGroupSpec , podName , "6379" )
570
- pod := BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .HeadGroupSpec .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), "" )
570
+ pod := BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .GcsFaultToleranceOptions , cluster . Spec . HeadGroupSpec .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), "" )
571
571
572
572
// Check environment variable "RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S"
573
573
rayContainer := pod .Spec .Containers [utils .RayContainerIndex ]
@@ -585,7 +585,7 @@ func TestBuildPod_WithGcsFtEnabled(t *testing.T) {
585
585
cluster .Spec .HeadGroupSpec .Template .Spec .Containers [utils .RayContainerIndex ].Env = append (cluster .Spec .HeadGroupSpec .Template .Spec .Containers [utils .RayContainerIndex ].Env ,
586
586
corev1.EnvVar {Name : utils .RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S , Value : "60" })
587
587
podTemplateSpec = DefaultHeadPodTemplate (ctx , * cluster , cluster .Spec .HeadGroupSpec , podName , "6379" )
588
- pod = BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .HeadGroupSpec .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), "" )
588
+ pod = BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .GcsFaultToleranceOptions , cluster . Spec . HeadGroupSpec .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), "" )
589
589
rayContainer = pod .Spec .Containers [utils .RayContainerIndex ]
590
590
591
591
// Check environment variable "RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S"
@@ -602,7 +602,7 @@ func TestBuildPod_WithGcsFtEnabled(t *testing.T) {
602
602
podName = cluster .Name + utils .DashSymbol + string (rayv1 .WorkerNode ) + utils .DashSymbol + worker .GroupName + utils .DashSymbol + utils .FormatInt32 (0 )
603
603
fqdnRayIP := utils .GenerateFQDNServiceName (ctx , * cluster , cluster .Namespace )
604
604
podTemplateSpec = DefaultWorkerPodTemplate (ctx , * cluster , worker , podName , fqdnRayIP , "6379" )
605
- pod = BuildPod (ctx , podTemplateSpec , rayv1 .WorkerNode , worker .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), fqdnRayIP )
605
+ pod = BuildPod (ctx , podTemplateSpec , rayv1 .WorkerNode , cluster . Spec . GcsFaultToleranceOptions , worker .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), fqdnRayIP )
606
606
607
607
// Check the default value of "RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S"
608
608
rayContainer = pod .Spec .Containers [utils .RayContainerIndex ]
@@ -619,11 +619,52 @@ func TestBuildPod_WithGcsFtEnabled(t *testing.T) {
619
619
corev1.EnvVar {Name : utils .RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S , Value : "120" })
620
620
worker = cluster .Spec .WorkerGroupSpecs [0 ]
621
621
podTemplateSpec = DefaultWorkerPodTemplate (ctx , * cluster , worker , podName , fqdnRayIP , "6379" )
622
- pod = BuildPod (ctx , podTemplateSpec , rayv1 .WorkerNode , worker .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), fqdnRayIP )
622
+ pod = BuildPod (ctx , podTemplateSpec , rayv1 .WorkerNode , cluster . Spec . GcsFaultToleranceOptions , worker .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), fqdnRayIP )
623
623
624
624
// Check the default value of "RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S"
625
625
rayContainer = pod .Spec .Containers [utils .RayContainerIndex ]
626
626
checkContainerEnv (t , rayContainer , utils .RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S , "120" )
627
+
628
+ // Test 5 with a minimal GcsFaultToleranceOptions
629
+ cluster = instance .DeepCopy ()
630
+ cluster .UID = "mycluster"
631
+ cluster .Annotations = map [string ]string {}
632
+ cluster .Spec .GcsFaultToleranceOptions = & rayv1.GcsFaultToleranceOptions {
633
+ RedisAddress : "redis://127.0.0.1:6379" ,
634
+ }
635
+ podTemplateSpec = DefaultHeadPodTemplate (ctx , * cluster , cluster .Spec .HeadGroupSpec , podName , "6379" )
636
+ pod = BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .GcsFaultToleranceOptions , cluster .Spec .HeadGroupSpec .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), "" )
637
+ rayContainer = pod .Spec .Containers [utils .RayContainerIndex ]
638
+
639
+ if pod .Annotations [utils .RayFTEnabledAnnotationKey ] != "true" {
640
+ t .Fatalf ("Ray pod has unexpected %s annotation: %v" , utils .RayFTEnabledAnnotationKey , pod .Annotations [utils .RayFTEnabledAnnotationKey ])
641
+ }
642
+ if pod .Annotations [utils .RayExternalStorageNSAnnotationKey ] != string (cluster .UID ) {
643
+ t .Fatalf ("Ray pod has unexpected %s annotation: %v" , utils .RayExternalStorageNSAnnotationKey , pod .Annotations [utils .RayExternalStorageNSAnnotationKey ])
644
+ }
645
+ checkContainerEnv (t , rayContainer , utils .RAY_REDIS_ADDRESS , "redis://127.0.0.1:6379" )
646
+ checkContainerEnv (t , rayContainer , utils .RAY_EXTERNAL_STORAGE_NS , string (cluster .UID ))
647
+
648
+ // Test 6 with a full GcsFaultToleranceOptions
649
+ cluster = instance .DeepCopy ()
650
+ cluster .UID = "mycluster"
651
+ cluster .Annotations = map [string ]string {}
652
+ cluster .Spec .GcsFaultToleranceOptions = & rayv1.GcsFaultToleranceOptions {
653
+ ExternalStorageNamespace : "myns" ,
654
+ RedisAddress : "redis://127.0.0.1:6379" ,
655
+ }
656
+ podTemplateSpec = DefaultHeadPodTemplate (ctx , * cluster , cluster .Spec .HeadGroupSpec , podName , "6379" )
657
+ pod = BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .GcsFaultToleranceOptions , cluster .Spec .HeadGroupSpec .RayStartParams , "6379" , nil , utils .GetCRDType ("" ), "" )
658
+ rayContainer = pod .Spec .Containers [utils .RayContainerIndex ]
659
+
660
+ if pod .Annotations [utils .RayFTEnabledAnnotationKey ] != "true" {
661
+ t .Fatalf ("Ray pod has unexpected %s annotation: %v" , utils .RayFTEnabledAnnotationKey , pod .Annotations [utils .RayFTEnabledAnnotationKey ])
662
+ }
663
+ if pod .Annotations [utils .RayExternalStorageNSAnnotationKey ] != "myns" {
664
+ t .Fatalf ("Ray pod has unexpected %s annotation: %v" , utils .RayExternalStorageNSAnnotationKey , pod .Annotations [utils .RayExternalStorageNSAnnotationKey ])
665
+ }
666
+ checkContainerEnv (t , rayContainer , utils .RAY_EXTERNAL_STORAGE_NS , "myns" )
667
+ checkContainerEnv (t , rayContainer , utils .RAY_REDIS_ADDRESS , "redis://127.0.0.1:6379" )
627
668
}
628
669
629
670
// Check that autoscaler container overrides work as expected.
@@ -690,7 +731,7 @@ func TestBuildPodWithAutoscalerOptions(t *testing.T) {
690
731
SecurityContext : & customSecurityContext ,
691
732
}
692
733
podTemplateSpec := DefaultHeadPodTemplate (ctx , * cluster , cluster .Spec .HeadGroupSpec , podName , "6379" )
693
- pod := BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .HeadGroupSpec .RayStartParams , "6379" , & trueFlag , utils .GetCRDType ("" ), "" )
734
+ pod := BuildPod (ctx , podTemplateSpec , rayv1 .HeadNode , cluster .Spec .GcsFaultToleranceOptions , cluster . Spec . HeadGroupSpec .RayStartParams , "6379" , & trueFlag , utils .GetCRDType ("" ), "" )
694
735
expectedContainer := * autoscalerContainer .DeepCopy ()
695
736
expectedContainer .Image = customAutoscalerImage
696
737
expectedContainer .ImagePullPolicy = customPullPolicy
0 commit comments