Skip to content

Add KubeRay e2e Test for custom idleTimeoutSeconds with v2 Autoscaler #2725

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ray-operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ test-e2e: WHAT ?= ./test/e2e
test-e2e: manifests fmt vet ## Run e2e tests.
go test -timeout 30m -v $(WHAT)

test-e2e-autoscaler: WHAT ?= ./test/e2eautoscaler
test-e2e-autoscaler: manifests fmt vet ## Run e2e autoscaler tests.
go test -timeout 30m -v $(WHAT)

test-sampleyaml: WHAT ?= ./test/sampleyaml
test-sampleyaml: manifests fmt vet
Expand Down
67 changes: 67 additions & 0 deletions ray-operator/test/e2eautoscaler/raycluster_autoscaler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package e2eautoscaler
import (
"fmt"
"testing"
"time"

"github.com/onsi/gomega"
corev1ac "k8s.io/client-go/applyconfigurations/core/v1"
Expand Down Expand Up @@ -355,3 +356,69 @@ func TestRayClusterAutoscalerMinReplicasUpdate(t *testing.T) {
})
}
}

func TestRayClusterAutoscalerV2IdleTimeout(t *testing.T) {
// Only test with the V2 Autoscaler
name := "Create a RayCluster with autoscaler v2 enabled"
tc := tests["Create a RayCluster with autoscaler v2 enabled"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
tc := tests["Create a RayCluster with autoscaler v2 enabled"]
tc, ok := tests[name]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can make the test fail if !ok


test := With(t)
g := gomega.NewWithT(t)

// Create a namespace
namespace := test.NewTestNamespace()

// Minimum Ray Version for custom idleTimeoutSeconds
idleTimeoutMinRayVersion := "2.40.0"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't have a convention for stating the minimum requirement in tests, so you can just use the current version by GetRayVersion().


customIdleTimeoutSeconds := int32(30)
defaultIdleTimeoutSeconds := int32(60)

test.T().Run(name, func(_ *testing.T) {
rayClusterSpecAC := rayv1ac.RayClusterSpec().
WithEnableInTreeAutoscaling(true).
WithRayVersion(idleTimeoutMinRayVersion).
WithHeadGroupSpec(rayv1ac.HeadGroupSpec().
WithRayStartParams(map[string]string{"num-cpus": "0"}).
WithTemplate(tc.HeadPodTemplateGetter())).
WithWorkerGroupSpecs(
rayv1ac.WorkerGroupSpec().
WithReplicas(1).
WithMinReplicas(0).
WithMaxReplicas(4).
WithGroupName("no-idle-timeout-group").
WithRayStartParams(map[string]string{"num-cpus": "1"}).
WithTemplate(tc.WorkerPodTemplateGetter()),
rayv1ac.WorkerGroupSpec().
WithReplicas(1).
WithMinReplicas(0).
WithMaxReplicas(4).
WithIdleTimeoutSeconds(customIdleTimeoutSeconds).
WithGroupName("custom-idle-timeout-group").
WithRayStartParams(map[string]string{"num-cpus": "1"}).
WithTemplate(tc.WorkerPodTemplateGetter()),
)
rayClusterAC := rayv1ac.RayCluster("ray-cluster", namespace.Name).WithSpec((rayClusterSpecAC))

rayCluster, err := test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions)
g.Expect(err).NotTo(gomega.HaveOccurred())
test.T().Logf("Created RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name)

// Wait for RayCluster to become ready and verify the number of available worker replicas.
g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium).
Should(gomega.WithTransform(RayClusterState, gomega.Equal(rayv1.Ready)))
g.Expect(GetRayCluster(test, rayCluster.Namespace, rayCluster.Name)).To(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(2))))

headPod, err := GetHeadPod(test, rayCluster)
g.Expect(err).NotTo(gomega.HaveOccurred())
test.T().Logf("Found head pod %s/%s", headPod.Namespace, headPod.Name)

// After customIdleTimeoutSeconds, the replica in the worker group with custom idleTimeoutSeconds set should be scaled down.
g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), time.Duration(customIdleTimeoutSeconds)*time.Second).
Should(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(1))))

// After the default idleTimeoutSeconds, all worker replicas should be scaled down.
g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), time.Duration(defaultIdleTimeoutSeconds)*time.Second).
Should(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(0))))
})
}
Loading