Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 6c8f3db

Browse files
committedDec 16, 2024·
Adds RayCluster.apply()
- Adds RayCluster.apply() implementation - Adds e2e tests for apply - Adds unit tests for apply - Exclude unit tests code from coverage - Add coverage to cluster.py - Adding coverage for the case of an openshift cluster
1 parent be9763a commit 6c8f3db

File tree

9 files changed

+678
-37
lines changed

9 files changed

+678
-37
lines changed
 

‎CONTRIBUTING.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ pytest -v src/codeflare_sdk
7676

7777
### Local e2e Testing
7878

79-
- Please follow the [e2e documentation](https://github.com/project-codeflare/codeflare-sdk/blob/main/docs/e2e.md)
79+
- Please follow the [e2e documentation](https://github.com/project-codeflare/codeflare-sdk/blob/main/docs/sphinx/user-docs/e2e.rst)
8080

8181
#### Code Coverage
8282

‎src/codeflare_sdk/common/kueue/test_kueue.py

+124-7
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
from ..utils.unit_test_support import get_local_queue, createClusterConfig
14+
from ..utils.unit_test_support import get_local_queue, create_cluster_config
1515
from unittest.mock import patch
1616
from codeflare_sdk.ray.cluster.cluster import Cluster, ClusterConfiguration
1717
import yaml
1818
import os
1919
import filecmp
2020
from pathlib import Path
21-
from .kueue import list_local_queues
21+
from .kueue import list_local_queues, local_queue_exists, add_queue_label
2222

2323
parent = Path(__file__).resolve().parents[4] # project directory
2424
aw_dir = os.path.expanduser("~/.codeflare/resources/")
@@ -46,7 +46,7 @@ def test_cluster_creation_no_aw_local_queue(mocker):
4646
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
4747
return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"),
4848
)
49-
config = createClusterConfig()
49+
config = create_cluster_config()
5050
config.name = "unit-test-cluster-kueue"
5151
config.write_to_file = True
5252
config.local_queue = "local-queue-default"
@@ -59,7 +59,7 @@ def test_cluster_creation_no_aw_local_queue(mocker):
5959
)
6060

6161
# With resources loaded in memory, no Local Queue specified.
62-
config = createClusterConfig()
62+
config = create_cluster_config()
6363
config.name = "unit-test-cluster-kueue"
6464
config.write_to_file = False
6565
cluster = Cluster(config)
@@ -79,7 +79,7 @@ def test_aw_creation_local_queue(mocker):
7979
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
8080
return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"),
8181
)
82-
config = createClusterConfig()
82+
config = create_cluster_config()
8383
config.name = "unit-test-aw-kueue"
8484
config.appwrapper = True
8585
config.write_to_file = True
@@ -93,7 +93,7 @@ def test_aw_creation_local_queue(mocker):
9393
)
9494

9595
# With resources loaded in memory, no Local Queue specified.
96-
config = createClusterConfig()
96+
config = create_cluster_config()
9797
config.name = "unit-test-aw-kueue"
9898
config.appwrapper = True
9999
config.write_to_file = False
@@ -114,7 +114,7 @@ def test_get_local_queue_exists_fail(mocker):
114114
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
115115
return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"),
116116
)
117-
config = createClusterConfig()
117+
config = create_cluster_config()
118118
config.name = "unit-test-aw-kueue"
119119
config.appwrapper = True
120120
config.write_to_file = True
@@ -169,6 +169,123 @@ def test_list_local_queues(mocker):
169169
assert lqs == []
170170

171171

172+
def test_local_queue_exists_found(mocker):
173+
# Mock Kubernetes client and list_namespaced_custom_object method
174+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
175+
mock_api_instance = mocker.Mock()
176+
mocker.patch("kubernetes.client.CustomObjectsApi", return_value=mock_api_instance)
177+
mocker.patch("codeflare_sdk.ray.cluster.cluster.config_check")
178+
179+
# Mock return value for list_namespaced_custom_object
180+
mock_api_instance.list_namespaced_custom_object.return_value = {
181+
"items": [
182+
{"metadata": {"name": "existing-queue"}},
183+
{"metadata": {"name": "another-queue"}},
184+
]
185+
}
186+
187+
# Call the function
188+
namespace = "test-namespace"
189+
local_queue_name = "existing-queue"
190+
result = local_queue_exists(namespace, local_queue_name)
191+
192+
# Assertions
193+
assert result is True
194+
mock_api_instance.list_namespaced_custom_object.assert_called_once_with(
195+
group="kueue.x-k8s.io",
196+
version="v1beta1",
197+
namespace=namespace,
198+
plural="localqueues",
199+
)
200+
201+
202+
def test_local_queue_exists_not_found(mocker):
203+
# Mock Kubernetes client and list_namespaced_custom_object method
204+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
205+
mock_api_instance = mocker.Mock()
206+
mocker.patch("kubernetes.client.CustomObjectsApi", return_value=mock_api_instance)
207+
mocker.patch("codeflare_sdk.ray.cluster.cluster.config_check")
208+
209+
# Mock return value for list_namespaced_custom_object
210+
mock_api_instance.list_namespaced_custom_object.return_value = {
211+
"items": [
212+
{"metadata": {"name": "another-queue"}},
213+
{"metadata": {"name": "different-queue"}},
214+
]
215+
}
216+
217+
# Call the function
218+
namespace = "test-namespace"
219+
local_queue_name = "non-existent-queue"
220+
result = local_queue_exists(namespace, local_queue_name)
221+
222+
# Assertions
223+
assert result is False
224+
mock_api_instance.list_namespaced_custom_object.assert_called_once_with(
225+
group="kueue.x-k8s.io",
226+
version="v1beta1",
227+
namespace=namespace,
228+
plural="localqueues",
229+
)
230+
231+
232+
import pytest
233+
from unittest import mock # If you're also using mocker from pytest-mock
234+
235+
236+
def test_add_queue_label_with_valid_local_queue(mocker):
237+
# Mock the kubernetes.client.CustomObjectsApi and its response
238+
mock_api_instance = mocker.patch("kubernetes.client.CustomObjectsApi")
239+
mock_api_instance.return_value.list_namespaced_custom_object.return_value = {
240+
"items": [
241+
{"metadata": {"name": "valid-queue"}},
242+
]
243+
}
244+
245+
# Mock other dependencies
246+
mocker.patch("codeflare_sdk.common.kueue.local_queue_exists", return_value=True)
247+
mocker.patch(
248+
"codeflare_sdk.common.kueue.get_default_kueue_name",
249+
return_value="default-queue",
250+
)
251+
252+
# Define input item and parameters
253+
item = {"metadata": {}}
254+
namespace = "test-namespace"
255+
local_queue = "valid-queue"
256+
257+
# Call the function
258+
add_queue_label(item, namespace, local_queue)
259+
260+
# Assert that the label is added to the item
261+
assert item["metadata"]["labels"] == {"kueue.x-k8s.io/queue-name": "valid-queue"}
262+
263+
264+
def test_add_queue_label_with_invalid_local_queue(mocker):
265+
# Mock the kubernetes.client.CustomObjectsApi and its response
266+
mock_api_instance = mocker.patch("kubernetes.client.CustomObjectsApi")
267+
mock_api_instance.return_value.list_namespaced_custom_object.return_value = {
268+
"items": [
269+
{"metadata": {"name": "valid-queue"}},
270+
]
271+
}
272+
273+
# Mock the local_queue_exists function to return False
274+
mocker.patch("codeflare_sdk.common.kueue.local_queue_exists", return_value=False)
275+
276+
# Define input item and parameters
277+
item = {"metadata": {}}
278+
namespace = "test-namespace"
279+
local_queue = "invalid-queue"
280+
281+
# Call the function and expect a ValueError
282+
with pytest.raises(
283+
ValueError,
284+
match="local_queue provided does not exist or is not in this namespace",
285+
):
286+
add_queue_label(item, namespace, local_queue)
287+
288+
172289
# Make sure to always keep this function last
173290
def test_cleanup():
174291
os.remove(f"{aw_dir}unit-test-cluster-kueue.yaml")

‎src/codeflare_sdk/common/utils/unit_test_support.py

+55-11
Original file line numberDiff line numberDiff line change
@@ -26,32 +26,34 @@
2626
aw_dir = os.path.expanduser("~/.codeflare/resources/")
2727

2828

29-
def createClusterConfig():
29+
def create_cluster_config(num_workers=2, write_to_file=False):
3030
config = ClusterConfiguration(
3131
name="unit-test-cluster",
3232
namespace="ns",
33-
num_workers=2,
33+
num_workers=num_workers,
3434
worker_cpu_requests=3,
3535
worker_cpu_limits=4,
3636
worker_memory_requests=5,
3737
worker_memory_limits=6,
3838
appwrapper=True,
39-
write_to_file=False,
39+
write_to_file=write_to_file,
4040
)
4141
return config
4242

4343

44-
def createClusterWithConfig(mocker):
45-
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
46-
mocker.patch(
47-
"kubernetes.client.CustomObjectsApi.get_cluster_custom_object",
48-
return_value={"spec": {"domain": "apps.cluster.awsroute.org"}},
49-
)
50-
cluster = Cluster(createClusterConfig())
44+
def create_cluster(mocker, num_workers=2, write_to_file=False):
45+
cluster = Cluster(create_cluster_config(num_workers, write_to_file))
5146
return cluster
5247

5348

54-
def createClusterWrongType():
49+
def patch_cluster_with_dynamic_client(mocker, cluster, dynamic_client=None):
50+
mocker.patch.object(cluster, "get_dynamic_client", return_value=dynamic_client)
51+
mocker.patch.object(cluster, "down", return_value=None)
52+
mocker.patch.object(cluster, "config_check", return_value=None)
53+
# mocker.patch.object(cluster, "_throw_for_no_raycluster", return_value=None)
54+
55+
56+
def create_cluster_wrong_type():
5557
config = ClusterConfiguration(
5658
name="unit-test-cluster",
5759
namespace="ns",
@@ -383,6 +385,48 @@ def mocked_ingress(port, cluster_name="unit-test-cluster", annotations: dict = N
383385
return mock_ingress
384386

385387

388+
# Global dictionary to maintain state in the mock
389+
cluster_state = {}
390+
391+
392+
# The mock side_effect function for server_side_apply
393+
def mock_server_side_apply(resource, body=None, name=None, namespace=None, **kwargs):
394+
# Simulate the behavior of server_side_apply:
395+
# Update a mock state that represents the cluster's current configuration.
396+
# Stores the state in a global dictionary for simplicity.
397+
398+
global cluster_state
399+
400+
if not resource or not body or not name or not namespace:
401+
raise ValueError("Missing required parameters for server_side_apply")
402+
403+
# Extract worker count from the body if it exists
404+
try:
405+
worker_count = (
406+
body["spec"]["workerGroupSpecs"][0]["replicas"]
407+
if "spec" in body and "workerGroupSpecs" in body["spec"]
408+
else None
409+
)
410+
except KeyError:
411+
worker_count = None
412+
413+
# Apply changes to the cluster_state mock
414+
cluster_state[name] = {
415+
"namespace": namespace,
416+
"worker_count": worker_count,
417+
"body": body,
418+
}
419+
420+
# Return a response that mimics the behavior of a successful apply
421+
return {
422+
"status": "success",
423+
"applied": True,
424+
"name": name,
425+
"namespace": namespace,
426+
"worker_count": worker_count,
427+
}
428+
429+
386430
@patch.dict("os.environ", {"NB_PREFIX": "test-prefix"})
387431
def create_cluster_all_config_params(mocker, cluster_name, is_appwrapper) -> Cluster:
388432
mocker.patch(

‎src/codeflare_sdk/common/widgets/test_widgets.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import codeflare_sdk.common.widgets.widgets as cf_widgets
1616
import pandas as pd
1717
from unittest.mock import MagicMock, patch
18-
from ..utils.unit_test_support import get_local_queue, createClusterConfig
18+
from ..utils.unit_test_support import get_local_queue, create_cluster_config
1919
from codeflare_sdk.ray.cluster.cluster import Cluster
2020
from codeflare_sdk.ray.cluster.status import (
2121
RayCluster,
@@ -38,7 +38,7 @@ def test_cluster_up_down_buttons(mocker):
3838
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
3939
return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"),
4040
)
41-
cluster = Cluster(createClusterConfig())
41+
cluster = Cluster(create_cluster_config())
4242

4343
with patch("ipywidgets.Button") as MockButton, patch(
4444
"ipywidgets.Checkbox"

‎src/codeflare_sdk/ray/cluster/cluster.py

+87-8
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@
5252
import requests
5353

5454
from kubernetes import config
55+
from kubernetes.dynamic import DynamicClient
56+
from kubernetes import client as k8s_client
57+
from kubernetes.client.rest import ApiException
58+
5559
from kubernetes.client.rest import ApiException
5660
import warnings
5761

@@ -84,6 +88,14 @@ def __init__(self, config: ClusterConfiguration):
8488
if is_notebook():
8589
cluster_up_down_buttons(self)
8690

91+
def get_dynamic_client(self): # pragma: no cover
92+
"""Return a dynamic client, optionally mocked in tests."""
93+
return DynamicClient(get_api_client())
94+
95+
def config_check(self):
96+
"""Return a dynamic client, optionally mocked in tests."""
97+
return config_check()
98+
8799
@property
88100
def _client_headers(self):
89101
k8_client = get_api_client()
@@ -95,9 +107,7 @@ def _client_headers(self):
95107

96108
@property
97109
def _client_verify_tls(self):
98-
if not _is_openshift_cluster or not self.config.verify_tls:
99-
return False
100-
return True
110+
return _is_openshift_cluster and self.config.verify_tls
101111

102112
@property
103113
def job_client(self):
@@ -121,7 +131,6 @@ def create_resource(self):
121131
Called upon cluster object creation, creates an AppWrapper yaml based on
122132
the specifications of the ClusterConfiguration.
123133
"""
124-
125134
if self.config.namespace is None:
126135
self.config.namespace = get_current_namespace()
127136
if self.config.namespace is None:
@@ -130,7 +139,6 @@ def create_resource(self):
130139
raise TypeError(
131140
f"Namespace {self.config.namespace} is of type {type(self.config.namespace)}. Check your Kubernetes Authentication."
132141
)
133-
134142
return build_ray_cluster(self)
135143

136144
# creates a new cluster with the provided or default spec
@@ -139,10 +147,11 @@ def up(self):
139147
Applies the Cluster yaml, pushing the resource request onto
140148
the Kueue localqueue.
141149
"""
142-
150+
print(
151+
"WARNING: The up() function is planned for deprecation in favor of apply()."
152+
)
143153
# check if RayCluster CustomResourceDefinition exists if not throw RuntimeError
144154
self._throw_for_no_raycluster()
145-
146155
namespace = self.config.namespace
147156

148157
try:
@@ -176,6 +185,52 @@ def up(self):
176185
except Exception as e: # pragma: no cover
177186
return _kube_api_error_handling(e)
178187

188+
# Applies a new cluster with the provided or default spec
189+
def apply(self, force=False):
190+
"""
191+
Applies the Cluster yaml using server-side apply.
192+
If 'force' is set to True, conflicts will be forced.
193+
"""
194+
# check if RayCluster CustomResourceDefinition exists if not throw RuntimeError
195+
self._throw_for_no_raycluster()
196+
namespace = self.config.namespace
197+
198+
try:
199+
self.config_check()
200+
api_instance = client.CustomObjectsApi(get_api_client())
201+
crds = self.get_dynamic_client().resources
202+
api_instance = crds.get(
203+
api_version="workload.codeflare.dev/v1beta2", kind="AppWrapper"
204+
)
205+
if self.config.appwrapper:
206+
if self.config.write_to_file:
207+
with open(self.resource_yaml) as f:
208+
aw = yaml.load(f, Loader=yaml.FullLoader)
209+
api_instance.server_side_apply(
210+
group="workload.codeflare.dev",
211+
version="v1beta2",
212+
namespace=namespace,
213+
plural="appwrappers",
214+
body=aw,
215+
)
216+
else:
217+
api_instance.server_side_apply(
218+
group="workload.codeflare.dev",
219+
version="v1beta2",
220+
namespace=namespace,
221+
plural="appwrappers",
222+
body=self.resource_yaml,
223+
)
224+
print(f"AppWrapper: '{self.config.name}' has successfully been created")
225+
else:
226+
api_instance = crds.get(api_version="ray.io/v1", kind="RayCluster")
227+
self._component_resources_apply(namespace, api_instance)
228+
print(
229+
f"Ray Cluster: '{self.config.name}' has successfully been applied"
230+
)
231+
except Exception as e: # pragma: no cover
232+
return _kube_api_error_handling(e)
233+
179234
def _throw_for_no_raycluster(self):
180235
api_instance = client.CustomObjectsApi(get_api_client())
181236
try:
@@ -204,7 +259,7 @@ def down(self):
204259
resource_name = self.config.name
205260
self._throw_for_no_raycluster()
206261
try:
207-
config_check()
262+
self.config_check()
208263
api_instance = client.CustomObjectsApi(get_api_client())
209264
if self.config.appwrapper:
210265
api_instance.delete_namespaced_custom_object(
@@ -507,6 +562,16 @@ def _component_resources_up(
507562
else:
508563
_create_resources(self.resource_yaml, namespace, api_instance)
509564

565+
def _component_resources_apply(
566+
self, namespace: str, api_instance: client.CustomObjectsApi
567+
):
568+
if self.config.write_to_file:
569+
with open(self.resource_yaml) as f:
570+
ray_cluster = yaml.safe_load(f)
571+
_apply_resources(ray_cluster, namespace, api_instance)
572+
else:
573+
_apply_resources(self.resource_yaml, namespace, api_instance)
574+
510575
def _component_resources_down(
511576
self, namespace: str, api_instance: client.CustomObjectsApi
512577
):
@@ -744,6 +809,20 @@ def _create_resources(yamls, namespace: str, api_instance: client.CustomObjectsA
744809
)
745810

746811

812+
def _apply_resources(
813+
yamls, namespace: str, api_instance: client.CustomObjectsApi, force=False
814+
):
815+
api_instance.server_side_apply(
816+
field_manager="cluster-manager",
817+
group="ray.io",
818+
version="v1",
819+
namespace=namespace,
820+
plural="rayclusters",
821+
body=yamls,
822+
force_conflicts=force, # Allow forcing conflicts if needed
823+
)
824+
825+
747826
def _check_aw_exists(name: str, namespace: str) -> bool:
748827
try:
749828
config_check()

‎src/codeflare_sdk/ray/cluster/test_cluster.py

+232-6
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,18 @@
1919
list_all_queued,
2020
)
2121
from codeflare_sdk.common.utils.unit_test_support import (
22-
createClusterWithConfig,
22+
create_cluster,
2323
arg_check_del_effect,
2424
ingress_retrieval,
2525
arg_check_apply_effect,
2626
get_local_queue,
27-
createClusterConfig,
27+
create_cluster_config,
2828
get_ray_obj,
2929
get_obj_none,
3030
get_ray_obj_with_status,
3131
get_aw_obj_with_status,
32+
patch_cluster_with_dynamic_client,
33+
route_list_retrieval,
3234
)
3335
from codeflare_sdk.ray.cluster.cluster import _is_openshift_cluster
3436
from pathlib import Path
@@ -67,11 +69,189 @@ def test_cluster_up_down(mocker):
6769
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
6870
return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"),
6971
)
70-
cluster = cluster = createClusterWithConfig(mocker)
72+
cluster = create_cluster(mocker)
7173
cluster.up()
7274
cluster.down()
7375

7476

77+
def test_cluster_apply_scale_up_scale_down(mocker):
78+
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
79+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
80+
mock_dynamic_client = mocker.Mock()
81+
mocker.patch(
82+
"kubernetes.dynamic.DynamicClient.resources", new_callable=mocker.PropertyMock
83+
)
84+
mocker.patch(
85+
"codeflare_sdk.ray.cluster.cluster.Cluster.create_resource",
86+
return_value="./tests/test_cluster_yamls/ray/default-ray-cluster.yaml",
87+
)
88+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
89+
mocker.patch(
90+
"kubernetes.client.CustomObjectsApi.get_cluster_custom_object",
91+
return_value={"spec": {"domain": "apps.cluster.awsroute.org"}},
92+
)
93+
94+
# Initialize test
95+
initial_num_workers = 1
96+
scaled_up_num_workers = 2
97+
98+
# Step 1: Create cluster with initial workers
99+
cluster = create_cluster(mocker, initial_num_workers)
100+
patch_cluster_with_dynamic_client(mocker, cluster, mock_dynamic_client)
101+
mocker.patch(
102+
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
103+
return_value=get_obj_none("ray.io", "v1", "ns", "rayclusters"),
104+
)
105+
cluster.apply()
106+
107+
# Step 2: Scale up the cluster
108+
cluster = create_cluster(mocker, scaled_up_num_workers)
109+
patch_cluster_with_dynamic_client(mocker, cluster, mock_dynamic_client)
110+
cluster.apply()
111+
112+
# Step 3: Scale down the cluster
113+
cluster = create_cluster(mocker, initial_num_workers)
114+
patch_cluster_with_dynamic_client(mocker, cluster, mock_dynamic_client)
115+
cluster.apply()
116+
117+
# Tear down
118+
cluster.down()
119+
120+
121+
def test_cluster_apply_with_file(mocker):
122+
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
123+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
124+
mock_dynamic_client = mocker.Mock()
125+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster._throw_for_no_raycluster")
126+
mocker.patch(
127+
"kubernetes.dynamic.DynamicClient.resources", new_callable=mocker.PropertyMock
128+
)
129+
mocker.patch(
130+
"codeflare_sdk.ray.cluster.cluster.Cluster.create_resource",
131+
return_value="./tests/test_cluster_yamls/ray/default-ray-cluster.yaml",
132+
)
133+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
134+
mocker.patch(
135+
"kubernetes.client.CustomObjectsApi.get_cluster_custom_object",
136+
return_value={"spec": {"domain": "apps.cluster.awsroute.org"}},
137+
)
138+
139+
# Step 1: Create cluster with initial workers
140+
cluster = create_cluster(mocker, 1, write_to_file=True)
141+
patch_cluster_with_dynamic_client(mocker, cluster, mock_dynamic_client)
142+
mocker.patch(
143+
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
144+
return_value=get_obj_none("ray.io", "v1", "ns", "rayclusters"),
145+
)
146+
cluster.apply()
147+
# Tear down
148+
cluster.down()
149+
150+
151+
def test_cluster_apply_with_appwrapper(mocker):
152+
# Mock Kubernetes client and dynamic client methods
153+
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
154+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
155+
mocker.patch(
156+
"codeflare_sdk.ray.cluster.cluster._check_aw_exists",
157+
return_value=True,
158+
)
159+
mock_dynamic_client = mocker.Mock()
160+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster._throw_for_no_raycluster")
161+
mocker.patch(
162+
"kubernetes.dynamic.DynamicClient.resources", new_callable=mocker.PropertyMock
163+
)
164+
mocker.patch(
165+
"codeflare_sdk.ray.cluster.cluster.Cluster.create_resource",
166+
return_value="./tests/test_cluster_yamls/ray/default-ray-cluster.yaml",
167+
)
168+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
169+
170+
# Create a cluster configuration with appwrapper set to False
171+
cluster = create_cluster(mocker, 1, write_to_file=False)
172+
patch_cluster_with_dynamic_client(mocker, cluster, mock_dynamic_client)
173+
174+
# Mock listing RayCluster to simulate it doesn't exist
175+
mocker.patch(
176+
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
177+
return_value=get_obj_none("ray.io", "v1", "ns", "rayclusters"),
178+
)
179+
# Call the apply method
180+
cluster.apply()
181+
182+
# Assertions
183+
print("Cluster applied without AppWrapper.")
184+
185+
186+
def test_cluster_apply_without_appwrapper_write_to_file(mocker):
187+
# Mock Kubernetes client and dynamic client methods
188+
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
189+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
190+
mocker.patch(
191+
"codeflare_sdk.ray.cluster.cluster._check_aw_exists",
192+
return_value=True,
193+
)
194+
mock_dynamic_client = mocker.Mock()
195+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster._throw_for_no_raycluster")
196+
mocker.patch(
197+
"kubernetes.dynamic.DynamicClient.resources", new_callable=mocker.PropertyMock
198+
)
199+
mocker.patch(
200+
"codeflare_sdk.ray.cluster.cluster.Cluster.create_resource",
201+
return_value="./tests/test_cluster_yamls/ray/default-ray-cluster.yaml",
202+
)
203+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
204+
205+
# Create a cluster configuration with appwrapper set to False
206+
cluster = create_cluster(mocker, 1, write_to_file=True)
207+
patch_cluster_with_dynamic_client(mocker, cluster, mock_dynamic_client)
208+
cluster.config.appwrapper = False
209+
210+
# Mock listing RayCluster to simulate it doesn't exist
211+
mocker.patch(
212+
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
213+
return_value=get_obj_none("ray.io", "v1", "ns", "rayclusters"),
214+
)
215+
# Call the apply method
216+
cluster.apply()
217+
218+
# Assertions
219+
print("Cluster applied without AppWrapper.")
220+
221+
222+
def test_cluster_apply_without_appwrapper(mocker):
223+
# Mock Kubernetes client and dynamic client methods
224+
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
225+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
226+
mock_dynamic_client = mocker.Mock()
227+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster._throw_for_no_raycluster")
228+
mocker.patch(
229+
"kubernetes.dynamic.DynamicClient.resources", new_callable=mocker.PropertyMock
230+
)
231+
mocker.patch(
232+
"codeflare_sdk.ray.cluster.cluster.Cluster.create_resource",
233+
return_value="./tests/test_cluster_yamls/ray/default-ray-cluster.yaml",
234+
)
235+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
236+
237+
# Create a cluster configuration with appwrapper set to False
238+
cluster = create_cluster(mocker, 1, write_to_file=False)
239+
cluster.config.appwrapper = None
240+
patch_cluster_with_dynamic_client(mocker, cluster, mock_dynamic_client)
241+
242+
# Mock listing RayCluster to simulate it doesn't exist
243+
mocker.patch(
244+
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
245+
return_value=get_obj_none("ray.io", "v1", "ns", "rayclusters"),
246+
)
247+
248+
# Call the apply method
249+
cluster.apply()
250+
251+
# Assertions
252+
print("Cluster applied without AppWrapper.")
253+
254+
75255
def test_cluster_up_down_no_mcad(mocker):
76256
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster._throw_for_no_raycluster")
77257
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
@@ -98,7 +278,7 @@ def test_cluster_up_down_no_mcad(mocker):
98278
"kubernetes.client.CustomObjectsApi.list_cluster_custom_object",
99279
return_value={"items": []},
100280
)
101-
config = createClusterConfig()
281+
config = create_cluster_config()
102282
config.name = "unit-test-cluster-ray"
103283
config.appwrapper = False
104284
cluster = Cluster(config)
@@ -117,7 +297,7 @@ def test_cluster_uris(mocker):
117297
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
118298
return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"),
119299
)
120-
cluster = cluster = createClusterWithConfig(mocker)
300+
cluster = create_cluster(mocker)
121301
mocker.patch(
122302
"kubernetes.client.NetworkingV1Api.list_namespaced_ingress",
123303
return_value=ingress_retrieval(
@@ -147,6 +327,52 @@ def test_cluster_uris(mocker):
147327
== "Dashboard not available yet, have you run cluster.up()?"
148328
)
149329

330+
mocker.patch(
331+
"codeflare_sdk.ray.cluster.cluster._is_openshift_cluster", return_value=True
332+
)
333+
mocker.patch(
334+
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
335+
return_value={
336+
"items": [
337+
{
338+
"metadata": {
339+
"name": "ray-dashboard-unit-test-cluster",
340+
},
341+
"spec": {
342+
"host": "ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org",
343+
"tls": {}, # Indicating HTTPS
344+
},
345+
}
346+
]
347+
},
348+
)
349+
cluster = create_cluster(mocker)
350+
assert (
351+
cluster.cluster_dashboard_uri()
352+
== "http://ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org"
353+
)
354+
mocker.patch(
355+
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
356+
return_value={
357+
"items": [
358+
{
359+
"metadata": {
360+
"name": "ray-dashboard-unit-test-cluster",
361+
},
362+
"spec": {
363+
"host": "ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org",
364+
"tls": {"termination": "passthrough"}, # Indicating HTTPS
365+
},
366+
}
367+
]
368+
},
369+
)
370+
cluster = create_cluster(mocker)
371+
assert (
372+
cluster.cluster_dashboard_uri()
373+
== "https://ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org"
374+
)
375+
150376

151377
def test_ray_job_wrapping(mocker):
152378
import ray
@@ -159,7 +385,7 @@ def ray_addr(self, *args):
159385
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
160386
return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"),
161387
)
162-
cluster = cluster = createClusterWithConfig(mocker)
388+
cluster = create_cluster(mocker)
163389
mocker.patch(
164390
"ray.job_submission.JobSubmissionClient._check_connection_and_version_with_url",
165391
return_value="None",

‎src/codeflare_sdk/ray/cluster/test_config.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414

1515
from codeflare_sdk.common.utils.unit_test_support import (
16-
createClusterWrongType,
16+
create_cluster_wrong_type,
1717
get_local_queue,
1818
create_cluster_all_config_params,
1919
)
@@ -55,6 +55,7 @@ def test_default_appwrapper_creation(mocker):
5555
assert cluster.resource_yaml == expected_aw
5656

5757

58+
@pytest.mark.filterwarnings("ignore::UserWarning")
5859
def test_config_creation_all_parameters(mocker):
5960
from codeflare_sdk.ray.cluster.config import DEFAULT_RESOURCE_MAPPING
6061

@@ -98,6 +99,7 @@ def test_config_creation_all_parameters(mocker):
9899
)
99100

100101

102+
@pytest.mark.filterwarnings("ignore::UserWarning")
101103
def test_all_config_params_aw(mocker):
102104
create_cluster_all_config_params(mocker, "aw-all-params", True)
103105
assert filecmp.cmp(
@@ -109,11 +111,12 @@ def test_all_config_params_aw(mocker):
109111

110112
def test_config_creation_wrong_type():
111113
with pytest.raises(TypeError) as error_info:
112-
createClusterWrongType()
114+
create_cluster_wrong_type()
113115

114116
assert len(str(error_info.value).splitlines()) == 4
115117

116118

119+
@pytest.mark.filterwarnings("ignore::UserWarning")
117120
def test_cluster_config_deprecation_conversion(mocker):
118121
config = ClusterConfiguration(
119122
name="test",

‎tests/e2e/cluster_apply_kind_test.py

+156
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
from codeflare_sdk import Cluster, ClusterConfiguration
2+
import pytest
3+
from kubernetes import client
4+
5+
from support import (
6+
initialize_kubernetes_client,
7+
create_namespace,
8+
delete_namespace,
9+
get_ray_cluster,
10+
)
11+
12+
13+
@pytest.mark.kind
14+
class TestRayClusterApply:
15+
def setup_method(self):
16+
initialize_kubernetes_client(self)
17+
18+
def teardown_method(self):
19+
delete_namespace(self)
20+
21+
def test_cluster_apply(self):
22+
self.setup_method()
23+
create_namespace(self)
24+
25+
cluster_name = "test-cluster-apply"
26+
namespace = self.namespace
27+
28+
# Initial configuration with 1 worker
29+
initial_config = ClusterConfiguration(
30+
name=cluster_name,
31+
namespace=namespace,
32+
num_workers=1,
33+
head_cpu_requests="500m",
34+
head_cpu_limits="1",
35+
head_memory_requests="1Gi",
36+
head_memory_limits="2Gi",
37+
worker_cpu_requests="500m",
38+
worker_cpu_limits="1",
39+
worker_memory_requests="1Gi",
40+
worker_memory_limits="2Gi",
41+
write_to_file=True,
42+
verify_tls=False,
43+
)
44+
45+
# Create the cluster
46+
cluster = Cluster(initial_config)
47+
cluster.apply()
48+
49+
# Wait for the cluster to be ready
50+
cluster.wait_ready()
51+
status = cluster.status()
52+
assert status["ready"], f"Cluster {cluster_name} is not ready: {status}"
53+
54+
# Verify the cluster is created
55+
ray_cluster = get_ray_cluster(cluster_name, namespace)
56+
assert ray_cluster is not None, "Cluster was not created successfully"
57+
assert (
58+
ray_cluster["spec"]["workerGroupSpecs"][0]["replicas"] == 1
59+
), "Initial worker count does not match"
60+
61+
# Update configuration with 3 workers
62+
updated_config = ClusterConfiguration(
63+
name=cluster_name,
64+
namespace=namespace,
65+
num_workers=2,
66+
head_cpu_requests="500m",
67+
head_cpu_limits="1",
68+
head_memory_requests="1Gi",
69+
head_memory_limits="2Gi",
70+
worker_cpu_requests="500m",
71+
worker_cpu_limits="1",
72+
worker_memory_requests="1Gi",
73+
worker_memory_limits="2Gi",
74+
write_to_file=True,
75+
verify_tls=False,
76+
)
77+
78+
# Apply the updated configuration
79+
cluster.config = updated_config
80+
cluster.apply()
81+
82+
# Wait for the updated cluster to be ready
83+
cluster.wait_ready()
84+
updated_status = cluster.status()
85+
assert updated_status[
86+
"ready"
87+
], f"Cluster {cluster_name} is not ready after update: {updated_status}"
88+
89+
# Verify the cluster is updated
90+
updated_ray_cluster = get_ray_cluster(cluster_name, namespace)
91+
assert (
92+
updated_ray_cluster["spec"]["workerGroupSpecs"][0]["replicas"] == 2
93+
), "Worker count was not updated"
94+
95+
# Clean up
96+
cluster.down()
97+
ray_cluster = get_ray_cluster(cluster_name, namespace)
98+
assert ray_cluster is None, "Cluster was not deleted successfully"
99+
100+
def test_apply_invalid_update(self):
101+
self.setup_method()
102+
create_namespace(self)
103+
104+
cluster_name = "test-cluster-apply-invalid"
105+
namespace = self.namespace
106+
107+
# Initial configuration
108+
initial_config = ClusterConfiguration(
109+
name=cluster_name,
110+
namespace=namespace,
111+
num_workers=1,
112+
head_cpu_requests="500m",
113+
head_cpu_limits="1",
114+
head_memory_requests="1Gi",
115+
head_memory_limits="2Gi",
116+
worker_cpu_requests="500m",
117+
worker_cpu_limits="1",
118+
worker_memory_requests="1Gi",
119+
worker_memory_limits="2Gi",
120+
write_to_file=True,
121+
verify_tls=False,
122+
)
123+
124+
# Create the cluster
125+
cluster = Cluster(initial_config)
126+
cluster.apply()
127+
128+
# Wait for the cluster to be ready
129+
cluster.wait_ready()
130+
status = cluster.status()
131+
assert status["ready"], f"Cluster {cluster_name} is not ready: {status}"
132+
133+
# Update with an invalid configuration (e.g., immutable field change)
134+
invalid_config = ClusterConfiguration(
135+
name=cluster_name,
136+
namespace=namespace,
137+
num_workers=2,
138+
head_cpu_requests="1",
139+
head_cpu_limits="2", # Changing CPU limits (immutable)
140+
head_memory_requests="1Gi",
141+
head_memory_limits="2Gi",
142+
worker_cpu_requests="500m",
143+
worker_cpu_limits="1",
144+
worker_memory_requests="1Gi",
145+
worker_memory_limits="2Gi",
146+
write_to_file=True,
147+
verify_tls=False,
148+
)
149+
150+
# Try to apply the invalid configuration and expect failure
151+
cluster.config = invalid_config
152+
with pytest.raises(RuntimeError, match="Immutable fields detected"):
153+
cluster.apply()
154+
155+
# Clean up
156+
cluster.down()

‎tests/e2e/support.py

+16
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,22 @@
1111
)
1212

1313

14+
def get_ray_cluster(cluster_name, namespace):
15+
api = client.CustomObjectsApi()
16+
try:
17+
return api.get_namespaced_custom_object(
18+
group="ray.io",
19+
version="v1",
20+
namespace=namespace,
21+
plural="rayclusters",
22+
name=cluster_name,
23+
)
24+
except client.exceptions.ApiException as e:
25+
if e.status == 404:
26+
return None
27+
raise
28+
29+
1430
def get_ray_image():
1531
default_ray_image = "quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06"
1632
return os.getenv("RAY_IMAGE", default_ray_image)

0 commit comments

Comments
 (0)
Please sign in to comment.