From 4b0b55f7bf3f77ab85cc8f46caad98ec7af720e4 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Fri, 17 Nov 2023 16:41:46 +0000 Subject: [PATCH 1/5] Fixed creating ingresses without admin access --- src/codeflare_sdk/cluster/cluster.py | 42 +++++++-- src/codeflare_sdk/cluster/config.py | 2 +- src/codeflare_sdk/utils/generate_yaml.py | 107 ++++++++--------------- tests/test-case-no-mcad.yamls | 1 + tests/test-case-prio.yaml | 1 + tests/test-case.yaml | 1 + tests/unit_test.py | 55 +++--------- tests/unit_test_support.py | 2 +- 8 files changed, 90 insertions(+), 121 deletions(-) diff --git a/src/codeflare_sdk/cluster/cluster.py b/src/codeflare_sdk/cluster/cluster.py index f5f226a01..129d045ae 100644 --- a/src/codeflare_sdk/cluster/cluster.py +++ b/src/codeflare_sdk/cluster/cluster.py @@ -189,7 +189,7 @@ def create_app_wrapper(self): local_interactive = self.config.local_interactive image_pull_secrets = self.config.image_pull_secrets dispatch_priority = self.config.dispatch_priority - ingress_domain = self.config.ingress_domain + domain_name = self.config.domain_name ingress_options = self.config.ingress_options return generate_appwrapper( name=name, @@ -214,7 +214,7 @@ def create_app_wrapper(self): dispatch_priority=dispatch_priority, priority_val=priority_val, openshift_oauth=self.config.openshift_oauth, - ingress_domain=ingress_domain, + domain_name=domain_name, ingress_options=ingress_options, ) @@ -468,7 +468,7 @@ def torchx_config( to_return["requirements"] = requirements return to_return - def from_k8_cluster_object(rc, mcad=True): + def from_k8_cluster_object(rc, mcad=True, domain_name=None): machine_types = ( rc["metadata"]["labels"]["orderedinstance"].split("_") if "orderedinstance" in rc["metadata"]["labels"] @@ -508,6 +508,7 @@ def from_k8_cluster_object(rc, mcad=True): ]["image"], local_interactive=local_interactive, mcad=mcad, + domain_name=domain_name, ) return Cluster(cluster_config) @@ -644,7 +645,10 @@ def get_cluster(cluster_name: str, namespace: str = "default"): for rc in rcs["items"]: if rc["metadata"]["name"] == cluster_name: mcad = _check_aw_exists(cluster_name, namespace) - return Cluster.from_k8_cluster_object(rc, mcad=mcad) + domain_name = _extract_domain_name(cluster_name, namespace) + return Cluster.from_k8_cluster_object( + rc, mcad=mcad, domain_name=domain_name + ) raise FileNotFoundError( f"Cluster {cluster_name} is not found in {namespace} namespace" ) @@ -663,14 +667,40 @@ def _check_aw_exists(name: str, namespace: str) -> bool: ) except Exception as e: # pragma: no cover return _kube_api_error_handling(e, print_error=False) - for aw in aws["items"]: if aw["metadata"]["name"] == name: return True return False -# Cant test this until get_current_namespace is fixed +def _extract_domain_name(name: str, namespace: str) -> str: + try: + config_check() + api_instance = client.CustomObjectsApi(api_config_handler()) + aws = api_instance.list_namespaced_custom_object( + group="workload.codeflare.dev", + version="v1beta1", + namespace=namespace, + plural="appwrappers", + ) + except Exception as e: # pragma: no cover + return _kube_api_error_handling(e, print_error=False) + for aw in aws["items"]: + if aw["metadata"]["name"] == name: + host = aw["spec"]["resources"]["GenericItems"][1]["generictemplate"][ + "spec" + ]["rules"][0]["host"] + + dot_index = host.find(".") + if dot_index != -1: + domain_name = host[dot_index + 1 :] + return domain_name + else: + print("Host is not configured correctly.") + return None + + +# Cant test this until get_current_namespace is fixed and placed in this function over using `self` def _get_ingress_domain(self): # pragma: no cover try: config_check() diff --git a/src/codeflare_sdk/cluster/config.py b/src/codeflare_sdk/cluster/config.py index 0311d0e32..f36a6c644 100644 --- a/src/codeflare_sdk/cluster/config.py +++ b/src/codeflare_sdk/cluster/config.py @@ -54,4 +54,4 @@ class ClusterConfiguration: dispatch_priority: str = None openshift_oauth: bool = False # NOTE: to use the user must have permission to create a RoleBinding for system:auth-delegator ingress_options: dict = field(default_factory=dict) - ingress_domain: str = None + domain_name: str = None diff --git a/src/codeflare_sdk/utils/generate_yaml.py b/src/codeflare_sdk/utils/generate_yaml.py index 3ffbefb57..cd5f29cac 100755 --- a/src/codeflare_sdk/utils/generate_yaml.py +++ b/src/codeflare_sdk/utils/generate_yaml.py @@ -29,8 +29,6 @@ from base64 import b64encode from urllib3.util import parse_url -from kubernetes import client, config - from .kube_api_helpers import _get_api_host @@ -56,26 +54,23 @@ def gen_dashboard_ingress_name(cluster_name): return f"ray-dashboard-{cluster_name}" -# Check if the ingress api cluster resource exists +# Check if the routes api exists def is_openshift_cluster(): try: config_check() - api_instance = client.CustomObjectsApi(api_config_handler()) - api_instance.get_cluster_custom_object( - "config.openshift.io", "v1", "ingresses", "cluster" - ) - - return True - except client.ApiException as e: # pragma: no cover - if e.status == 404 or e.status == 403: - return False + for api in client.ApisApi(api_config_handler()).get_api_versions().groups: + for v in api.versions: + if "route.openshift.io/v1" in v.group_version: + return True else: - print(f"Error detecting cluster type defaulting to Kubernetes: {e}") return False + except client.ApiException as e: # pragma: no cover + print(f"Error detecting cluster type defaulting to Kubernetes: {e}") + return False def update_dashboard_ingress( - ingress_item, cluster_name, namespace, ingress_options, ingress_domain + ingress_item, cluster_name, namespace, ingress_options, domain_name ): # pragma: no cover metadata = ingress_item.get("generictemplate", {}).get("metadata") spec = ingress_item.get("generictemplate", {}).get("spec") @@ -123,34 +118,26 @@ def update_dashboard_ingress( "name" ] = f"{cluster_name}-head-svc" else: + if is_openshift_cluster(): + spec["ingressClassName"] = "openshift-default" + else: + spec["ingressClassName"] = "nginx" + metadata["name"] = f"ray-dashboard-{cluster_name}" metadata["namespace"] = namespace spec["rules"][0]["http"]["paths"][0]["backend"]["service"][ "name" ] = f"{cluster_name}-head-svc" - if is_openshift_cluster(): - try: - config_check() - api_client = client.CustomObjectsApi(api_config_handler()) - ingress = api_client.get_cluster_custom_object( - "config.openshift.io", "v1", "ingresses", "cluster" - ) - del spec["ingressClassName"] - except Exception as e: # pragma: no cover - return _kube_api_error_handling(e) - domain = ingress["spec"]["domain"] - elif ingress_domain is None: - raise ValueError( - "ingress_domain is invalid. For Kubernetes Clusters please specify an ingress domain" - ) + if domain_name is None: + raise ValueError("domain_name is invalid. Please specify an ingress domain") else: - domain = ingress_domain + domain = domain_name del metadata["annotations"] spec["rules"][0]["host"] = f"ray-dashboard-{cluster_name}-{namespace}.{domain}" def update_rayclient_ingress( - ingress_item, cluster_name, namespace, ingress_domain + ingress_item, cluster_name, namespace, domain_name ): # pragma: no cover metadata = ingress_item.get("generictemplate", {}).get("metadata") spec = ingress_item.get("generictemplate", {}).get("spec") @@ -162,38 +149,27 @@ def update_rayclient_ingress( "name" ] = f"{cluster_name}-head-svc" - if is_openshift_cluster(): - try: - config_check() - api_client = client.CustomObjectsApi(api_config_handler()) - ingress = api_client.get_cluster_custom_object( - "config.openshift.io", "v1", "ingresses", "cluster" - ) + if domain_name is not None: + if is_openshift_cluster(): ingressClassName = "openshift-default" annotations = { "nginx.ingress.kubernetes.io/rewrite-target": "/", "nginx.ingress.kubernetes.io/ssl-redirect": "true", "route.openshift.io/termination": "passthrough", } - except Exception as e: # pragma: no cover - return _kube_api_error_handling(e) - domain = ingress["spec"]["domain"] - elif ingress_domain is None: - raise ValueError( - "ingress_domain is invalid. For Kubernetes Clusters please specify an ingress domain" - ) + else: + ingressClassName = "nginx" + annotations = { + "nginx.ingress.kubernetes.io/rewrite-target": "/", + "nginx.ingress.kubernetes.io/ssl-redirect": "true", + "nginx.ingress.kubernetes.io/ssl-passthrough": "true", + } else: - domain = ingress_domain - ingressClassName = "nginx" - annotations = { - "nginx.ingress.kubernetes.io/rewrite-target": "/", - "nginx.ingress.kubernetes.io/ssl-redirect": "true", - "nginx.ingress.kubernetes.io/ssl-passthrough": "true", - } + raise ValueError("domain_name is invalid. Please specify a domain") metadata["annotations"] = annotations spec["ingressClassName"] = ingressClassName - spec["rules"][0]["host"] = f"rayclient-{cluster_name}-{namespace}.{domain}" + spec["rules"][0]["host"] = f"rayclient-{cluster_name}-{namespace}.{domain_name}" def update_names(yaml, item, appwrapper_name, cluster_name, namespace): @@ -396,7 +372,7 @@ def update_ca_secret(ca_secret_item, cluster_name, namespace): data["ca.key"], data["ca.crt"] = generate_cert.generate_ca_cert(365) -def enable_local_interactive(resources, cluster_name, namespace, ingress_domain): +def enable_local_interactive(resources, cluster_name, namespace, domain_name): rayclient_ingress_item = resources["resources"].get("GenericItems")[2] ca_secret_item = resources["resources"].get("GenericItems")[3] item = resources["resources"].get("GenericItems")[0] @@ -422,23 +398,12 @@ def enable_local_interactive(resources, cluster_name, namespace, ingress_domain) command = command.replace("deployment-name", cluster_name) - if is_openshift_cluster(): - # We can try get the domain through checking ingresses.config.openshift.io - try: - config_check() - api_client = client.CustomObjectsApi(api_config_handler()) - ingress = api_client.get_cluster_custom_object( - "config.openshift.io", "v1", "ingresses", "cluster" - ) - except Exception as e: # pragma: no cover - return _kube_api_error_handling(e) - domain = ingress["spec"]["domain"] - elif ingress_domain is None: + if domain_name is None: raise ValueError( - "ingress_domain is invalid. For Kubernetes Clusters please specify an ingress domain" + "domain_name is invalid. For Kubernetes Clusters please specify an ingress domain" ) else: - domain = ingress_domain + domain = domain_name command = command.replace("server-name", domain) update_rayclient_ingress(rayclient_ingress_item, cluster_name, namespace, domain) @@ -618,7 +583,7 @@ def generate_appwrapper( dispatch_priority: str, priority_val: int, openshift_oauth: bool, - ingress_domain: str, + domain_name: str, ingress_options: dict, ): user_yaml = read_template(template) @@ -659,10 +624,10 @@ def generate_appwrapper( head_gpus, ) update_dashboard_ingress( - ingress_item, cluster_name, namespace, ingress_options, ingress_domain + ingress_item, cluster_name, namespace, ingress_options, domain_name ) if local_interactive: - enable_local_interactive(resources, cluster_name, namespace, ingress_domain) + enable_local_interactive(resources, cluster_name, namespace, domain_name) else: disable_raycluster_tls(resources["resources"]) diff --git a/tests/test-case-no-mcad.yamls b/tests/test-case-no-mcad.yamls index b8993a7f0..484636bc2 100644 --- a/tests/test-case-no-mcad.yamls +++ b/tests/test-case-no-mcad.yamls @@ -145,6 +145,7 @@ metadata: name: ray-dashboard-unit-test-cluster-ray namespace: ns spec: + ingressClassName: nginx rules: - host: ray-dashboard-unit-test-cluster-ray-ns.apps.cluster.awsroute.org http: diff --git a/tests/test-case-prio.yaml b/tests/test-case-prio.yaml index 6051104ac..70b68e971 100644 --- a/tests/test-case-prio.yaml +++ b/tests/test-case-prio.yaml @@ -178,6 +178,7 @@ spec: name: ray-dashboard-prio-test-cluster namespace: ns spec: + ingressClassName: nginx rules: - host: ray-dashboard-prio-test-cluster-ns.apps.cluster.awsroute.org http: diff --git a/tests/test-case.yaml b/tests/test-case.yaml index 7c649c5ff..920459c40 100644 --- a/tests/test-case.yaml +++ b/tests/test-case.yaml @@ -175,6 +175,7 @@ spec: name: ray-dashboard-unit-test-cluster namespace: ns spec: + ingressClassName: nginx rules: - host: ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org http: diff --git a/tests/unit_test.py b/tests/unit_test.py index c33b95ab9..11697d269 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -331,13 +331,10 @@ def test_default_cluster_creation(mocker): "codeflare_sdk.cluster.cluster.get_current_namespace", return_value="opendatahub", ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value={"spec": {"domain": ""}}, - ) default_config = ClusterConfiguration( name="unit-test-default-cluster", image="quay.io/project-codeflare/ray:latest-py39-cu118", + domain_name="apps.cluster.awsroute.org", ) cluster = Cluster(default_config) @@ -469,14 +466,14 @@ def arg_check_list_effect(group, version, plural, name, *args): return {"spec": {"domain": "test"}} -""" -def test_get_ingress_domain(self, mocker): +""" We need to fix get_current_namespace in order to reuse this test. +def test_get_ingress_domain(mocker): mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") mocker.patch( "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", side_effect=arg_check_list_effect, ) - domain = _get_ingress_domain(self) + domain = _get_ingress_domain() assert domain == "test" """ @@ -734,10 +731,6 @@ def test_print_appwrappers(capsys): def test_ray_details(mocker, capsys): - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value={"spec": {"domain": ""}}, - ) ray1 = RayCluster( name="raytest1", status=RayClusterStatus.READY, @@ -765,6 +758,7 @@ def test_ray_details(mocker, capsys): name="raytest2", namespace="ns", image="quay.io/project-codeflare/ray:latest-py39-cu118", + domain_name="apps.cluster.awsroute.org", ) ) captured = capsys.readouterr() @@ -1771,14 +1765,14 @@ def get_aw_obj(group, version, namespace, plural): def test_get_cluster(mocker): mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value={"spec": {"domain": ""}}, - ) mocker.patch( "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", side_effect=get_ray_obj, ) + mocker.patch( + "codeflare_sdk.cluster.cluster._extract_domain_name", + return_value="apps.cluster.awsroute.org", + ) cluster = get_cluster("quicktest") cluster_config = cluster.config assert cluster_config.name == "quicktest" and cluster_config.namespace == "ns" @@ -1790,6 +1784,7 @@ def test_get_cluster(mocker): assert cluster_config.min_memory == 2 and cluster_config.max_memory == 2 assert cluster_config.num_gpus == 0 assert cluster_config.instascale + assert cluster_config.domain_name == "apps.cluster.awsroute.org" assert ( cluster_config.image == "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103" @@ -1878,10 +1873,6 @@ def test_list_queue(mocker, capsys): def test_cluster_status(mocker): mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value={"spec": {"domain": ""}}, - ) fake_aw = AppWrapper( "test", AppWrapperStatus.FAILED, can_run=True, job_state="unused" ) @@ -1904,6 +1895,7 @@ def test_cluster_status(mocker): name="test", namespace="ns", image="quay.io/project-codeflare/ray:latest-py39-cu118", + domain_name="apps.cluster.awsroute.org", ) ) mocker.patch("codeflare_sdk.cluster.cluster._app_wrapper_status", return_value=None) @@ -1969,10 +1961,6 @@ def test_cluster_status(mocker): def test_wait_ready(mocker, capsys): - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value={"spec": {"domain": ""}}, - ) mocker.patch( "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", return_value=ingress_retrieval(8265), @@ -2000,6 +1988,7 @@ def test_wait_ready(mocker, capsys): name="test", namespace="ns", image="quay.io/project-codeflare/ray:latest-py39-cu118", + domain_name="apps.cluster.awsroute.org", ) ) try: @@ -2483,21 +2472,6 @@ def secret_ca_retreival(secret_name, namespace): return client.models.V1Secret(data=data) -def test_is_openshift_cluster(mocker): - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch.object( - client.CustomObjectsApi, - "get_cluster_custom_object", - side_effect=client.ApiException(status=404), - ) - assert is_openshift_cluster() == False - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value={"spec": {"domain": ""}}, - ) - assert is_openshift_cluster() == True - - def test_generate_tls_cert(mocker): """ test the function codeflare_sdk.utils.generate_ca_cert generates the correct outputs @@ -2823,10 +2797,6 @@ def test_gen_app_wrapper_with_oauth(mocker: MockerFixture): "codeflare_sdk.cluster.cluster.get_current_namespace", return_value="opendatahub", ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value={"spec": {"domain": ""}}, - ) write_user_appwrapper = MagicMock() mocker.patch( "codeflare_sdk.utils.generate_yaml.write_user_appwrapper", write_user_appwrapper @@ -2836,6 +2806,7 @@ def test_gen_app_wrapper_with_oauth(mocker: MockerFixture): "test_cluster", openshift_oauth=True, image="quay.io/project-codeflare/ray:latest-py39-cu118", + domain_name="apps.cluster.awsroute.org", ) ) user_yaml = write_user_appwrapper.call_args.args[0] diff --git a/tests/unit_test_support.py b/tests/unit_test_support.py index 85f1a76d3..801c988c3 100644 --- a/tests/unit_test_support.py +++ b/tests/unit_test_support.py @@ -46,7 +46,7 @@ def createClusterConfig(): instascale=True, machine_types=["cpu.small", "gpu.large"], image_pull_secrets=["unit-test-pull-secret"], - ingress_domain="apps.cluster.awsroute.org", + domain_name="apps.cluster.awsroute.org", image="quay.io/project-codeflare/ray:latest-py39-cu118", ) return config From 1278f5c75950af5ac3f7821bfdd6c61686e5fb6a Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Mon, 20 Nov 2023 11:03:20 +0000 Subject: [PATCH 2/5] Fixed unit tests --- tests/unit_test.py | 51 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/tests/unit_test.py b/tests/unit_test.py index 11697d269..574898a1f 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -261,7 +261,28 @@ def test_config_creation(): assert config.local_interactive == False +def sample_no_routes(): + api_versions = client.V1APIGroupList( + api_version="v1", + groups=[ + { + "name": "route.openshift.io", + "preferred_version": { + "group_version": "route.openshift.io/v1", + "version": "v1", + }, + "versions": [ + {"group_version": "route.openshift.io/v1", "version": "v1"} + ], + } + ], + ) + + return api_versions + + def test_cluster_creation(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") cluster = createClusterWithConfig(mocker) assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster.yaml" assert cluster.app_wrapper_name == "unit-test-cluster" @@ -286,6 +307,7 @@ def test_create_app_wrapper_raises_error_with_no_image(): def test_cluster_creation_no_mcad(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch( "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", return_value={"spec": {"domain": "apps.cluster.awsroute.org"}}, @@ -304,6 +326,7 @@ def test_cluster_creation_no_mcad(mocker): def test_cluster_creation_priority(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") mocker.patch( "kubernetes.client.CustomObjectsApi.list_cluster_custom_object", @@ -327,6 +350,7 @@ def test_cluster_creation_priority(mocker): def test_default_cluster_creation(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch( "codeflare_sdk.cluster.cluster.get_current_namespace", return_value="opendatahub", @@ -409,6 +433,7 @@ def arg_check_del_effect(group, version, namespace, plural, name, *args): def test_cluster_up_down(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") mocker.patch( "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", @@ -432,6 +457,7 @@ def test_cluster_up_down(mocker): def test_cluster_up_down_no_mcad(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") mocker.patch( "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", @@ -538,6 +564,7 @@ def test_delete_openshift_oauth_objects(mocker): def test_cluster_uris(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") mocker.patch( "codeflare_sdk.cluster.cluster._get_ingress_domain", @@ -637,6 +664,7 @@ def ingress_retrieval(port, annotations=None): def test_ray_job_wrapping(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") cluster = cluster = createClusterWithConfig(mocker) cluster.config.image = "quay.io/project-codeflare/ray:latest-py39-cu118" mocker.patch( @@ -731,6 +759,7 @@ def test_print_appwrappers(capsys): def test_ray_details(mocker, capsys): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") ray1 = RayCluster( name="raytest1", status=RayClusterStatus.READY, @@ -1764,6 +1793,7 @@ def get_aw_obj(group, version, namespace, plural): def test_get_cluster(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") mocker.patch( "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", @@ -1872,6 +1902,7 @@ def test_list_queue(mocker, capsys): def test_cluster_status(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") fake_aw = AppWrapper( "test", AppWrapperStatus.FAILED, can_run=True, job_state="unused" @@ -1961,6 +1992,7 @@ def test_cluster_status(mocker): def test_wait_ready(mocker, capsys): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch( "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", return_value=ingress_retrieval(8265), @@ -2021,6 +2053,7 @@ def test_wait_ready(mocker, capsys): def test_jobdefinition_coverage(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch( "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", return_value={"spec": {"domain": ""}}, @@ -2037,7 +2070,8 @@ def test_job_coverage(): abstract.logs() -def test_DDPJobDefinition_creation(): +def test_DDPJobDefinition_creation(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") ddp = createTestDDP() assert ddp.script == "test.py" assert ddp.m == None @@ -2061,6 +2095,7 @@ def test_DDPJobDefinition_dry_run(mocker: MockerFixture): that the attributes of the returned object are of the correct type, and that the values from cluster and job definition are correctly passed. """ + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") mocker.patch( "codeflare_sdk.cluster.cluster.Cluster.cluster_dashboard_uri", @@ -2097,7 +2132,7 @@ def test_DDPJobDefinition_dry_run_no_cluster(mocker): that the attributes of the returned object are of the correct type, and that the values from cluster and job definition are correctly passed. """ - + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch( "codeflare_sdk.job.jobs.get_current_namespace", return_value="opendatahub", @@ -2136,6 +2171,7 @@ def test_DDPJobDefinition_dry_run_no_resource_args(mocker): Test that the dry run correctly gets resources from the cluster object when the job definition does not specify resources. """ + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch.object(Cluster, "job_client") mocker.patch( "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", @@ -2175,6 +2211,7 @@ def test_DDPJobDefinition_dry_run_no_cluster_no_resource_args(mocker): that the attributes of the returned object are of the correct type, and that the values from cluster and job definition are correctly passed. """ + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch( "codeflare_sdk.job.jobs.get_current_namespace", @@ -2229,6 +2266,7 @@ def test_DDPJobDefinition_submit(mocker: MockerFixture): Tests that the submit method returns the correct type: DDPJob And that the attributes of the returned object are of the correct type """ + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mock_schedule = MagicMock() mocker.patch.object(Runner, "schedule", mock_schedule) mock_schedule.return_value = "fake-dashboard-url" @@ -2259,6 +2297,7 @@ def test_DDPJobDefinition_submit(mocker: MockerFixture): def test_DDPJob_creation(mocker: MockerFixture): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch.object(Cluster, "job_client") mock_schedule = MagicMock() mocker.patch.object(Runner, "schedule", mock_schedule) @@ -2284,6 +2323,7 @@ def test_DDPJob_creation(mocker: MockerFixture): def test_DDPJob_creation_no_cluster(mocker: MockerFixture): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") ddp_def = createTestDDP() ddp_def.image = "fake-image" mocker.patch( @@ -2309,6 +2349,7 @@ def test_DDPJob_creation_no_cluster(mocker: MockerFixture): def test_DDPJob_status(mocker: MockerFixture): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") # Setup the neccesary mock patches mock_status = MagicMock() mocker.patch.object(Runner, "status", mock_status) @@ -2323,6 +2364,7 @@ def test_DDPJob_status(mocker: MockerFixture): def test_DDPJob_logs(mocker: MockerFixture): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mock_log = MagicMock() mocker.patch.object(Runner, "log_lines", mock_log) # Setup the neccesary mock patches @@ -2369,7 +2411,8 @@ def parse_j(cmd): return f"{worker}x{gpu}" -def test_AWManager_creation(): +def test_AWManager_creation(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") testaw = AWManager(f"{aw_dir}test.yaml") assert testaw.name == "test" assert testaw.namespace == "ns" @@ -2410,6 +2453,7 @@ def arg_check_aw_del_effect(group, version, namespace, plural, name, *args): def test_AWManager_submit_remove(mocker, capsys): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") testaw = AWManager(f"{aw_dir}test.yaml") testaw.remove() captured = capsys.readouterr() @@ -2790,6 +2834,7 @@ def test_replace_openshift_oauth(mocker: MockerFixture): def test_gen_app_wrapper_with_oauth(mocker: MockerFixture): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch( "codeflare_sdk.utils.generate_yaml._get_api_host", return_value="foo.com" ) From 5b39a405d5919e1a63c0d8bee1ce6148d351953f Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Mon, 27 Nov 2023 11:42:05 +0000 Subject: [PATCH 3/5] Changed behaviour to create routes instead of ingresses on OpenShift --- src/codeflare_sdk/cluster/cluster.py | 163 +++++++++++------- src/codeflare_sdk/cluster/config.py | 2 +- .../templates/base-template.yaml | 38 +++- src/codeflare_sdk/utils/generate_yaml.py | 150 +++++++++++----- tests/unit_test.py | 92 ++-------- tests/unit_test_support.py | 2 +- 6 files changed, 265 insertions(+), 182 deletions(-) diff --git a/src/codeflare_sdk/cluster/cluster.py b/src/codeflare_sdk/cluster/cluster.py index 129d045ae..2493df030 100644 --- a/src/codeflare_sdk/cluster/cluster.py +++ b/src/codeflare_sdk/cluster/cluster.py @@ -32,6 +32,7 @@ generate_appwrapper, ) from ..utils.kube_api_helpers import _kube_api_error_handling +from ..utils.generate_yaml import is_openshift_cluster from ..utils.openshift_oauth import ( create_openshift_oauth_objects, delete_openshift_oauth_objects, @@ -189,7 +190,7 @@ def create_app_wrapper(self): local_interactive = self.config.local_interactive image_pull_secrets = self.config.image_pull_secrets dispatch_priority = self.config.dispatch_priority - domain_name = self.config.domain_name + ingress_domain = self.config.ingress_domain ingress_options = self.config.ingress_options return generate_appwrapper( name=name, @@ -214,7 +215,7 @@ def create_app_wrapper(self): dispatch_priority=dispatch_priority, priority_val=priority_val, openshift_oauth=self.config.openshift_oauth, - domain_name=domain_name, + ingress_domain=ingress_domain, ingress_options=ingress_options, ) @@ -415,25 +416,48 @@ def cluster_dashboard_uri(self) -> str: """ Returns a string containing the cluster's dashboard URI. """ - try: - config_check() - api_instance = client.NetworkingV1Api(api_config_handler()) - ingresses = api_instance.list_namespaced_ingress(self.config.namespace) - except Exception as e: # pragma no cover - return _kube_api_error_handling(e) + config_check() + if is_openshift_cluster(): + try: + api_instance = client.CustomObjectsApi(api_config_handler()) + routes = api_instance.list_namespaced_custom_object( + group="route.openshift.io", + version="v1", + namespace=self.config.namespace, + plural="routes", + ) + except Exception as e: # pragma: no cover + return _kube_api_error_handling(e) + + for route in routes["items"]: + if route["metadata"][ + "name" + ] == f"ray-dashboard-{self.config.name}" or route["metadata"][ + "name" + ].startswith( + f"{self.config.name}-ingress" + ): + protocol = "https" if route["spec"].get("tls") else "http" + return f"{protocol}://{route['spec']['host']}" + else: + try: + api_instance = client.NetworkingV1Api(api_config_handler()) + ingresses = api_instance.list_namespaced_ingress(self.config.namespace) + except Exception as e: # pragma no cover + return _kube_api_error_handling(e) - for ingress in ingresses.items: - annotations = ingress.metadata.annotations - protocol = "http" - if ( - ingress.metadata.name == f"ray-dashboard-{self.config.name}" - or ingress.metadata.name.startswith(f"{self.config.name}-ingress") - ): - if annotations == None: - protocol = "http" - elif "route.openshift.io/termination" in annotations: - protocol = "https" - return f"{protocol}://{ingress.spec.rules[0].host}" + for ingress in ingresses.items: + annotations = ingress.metadata.annotations + protocol = "http" + if ( + ingress.metadata.name == f"ray-dashboard-{self.config.name}" + or ingress.metadata.name.startswith(f"{self.config.name}-ingress") + ): + if annotations == None: + protocol = "http" + elif "route.openshift.io/termination" in annotations: + protocol = "https" + return f"{protocol}://{ingress.spec.rules[0].host}" return "Dashboard ingress not available yet, have you run cluster.up()?" def list_jobs(self) -> List: @@ -468,7 +492,7 @@ def torchx_config( to_return["requirements"] = requirements return to_return - def from_k8_cluster_object(rc, mcad=True, domain_name=None): + def from_k8_cluster_object(rc, mcad=True, ingress_domain=None): machine_types = ( rc["metadata"]["labels"]["orderedinstance"].split("_") if "orderedinstance" in rc["metadata"]["labels"] @@ -508,7 +532,7 @@ def from_k8_cluster_object(rc, mcad=True, domain_name=None): ]["image"], local_interactive=local_interactive, mcad=mcad, - domain_name=domain_name, + ingress_domain=ingress_domain, ) return Cluster(cluster_config) @@ -533,6 +557,14 @@ def _component_resources_up( plural="rayclusters", body=resource, ) + elif resource["kind"] == "Ingress": + api_instance.create_namespaced_custom_object( + group="networking.k8s.io", + version="v1", + namespace=namespace, + plural="ingresses", + body=resource, + ) elif resource["kind"] == "Route": api_instance.create_namespaced_custom_object( group="route.openshift.io", @@ -562,6 +594,15 @@ def _component_resources_down( plural="rayclusters", name=self.app_wrapper_name, ) + elif resource["kind"] == "Ingress": + name = resource["metadata"]["name"] + api_instance.delete_namespaced_custom_object( + group="networking.k8s.io", + version="v1", + namespace=namespace, + plural="ingresses", + name=name, + ) elif resource["kind"] == "Route": name = resource["metadata"]["name"] api_instance.delete_namespaced_custom_object( @@ -629,7 +670,7 @@ def get_current_namespace(): # pragma: no cover return None -def get_cluster(cluster_name: str, namespace: str = "default"): +def get_cluster(cluster_name: str, namespace: str = "default", ingress_domain=None): try: config_check() api_instance = client.CustomObjectsApi(api_config_handler()) @@ -645,9 +686,8 @@ def get_cluster(cluster_name: str, namespace: str = "default"): for rc in rcs["items"]: if rc["metadata"]["name"] == cluster_name: mcad = _check_aw_exists(cluster_name, namespace) - domain_name = _extract_domain_name(cluster_name, namespace) return Cluster.from_k8_cluster_object( - rc, mcad=mcad, domain_name=domain_name + rc, mcad=mcad, ingress_domain=ingress_domain ) raise FileNotFoundError( f"Cluster {cluster_name} is not found in {namespace} namespace" @@ -673,49 +713,42 @@ def _check_aw_exists(name: str, namespace: str) -> bool: return False -def _extract_domain_name(name: str, namespace: str) -> str: - try: - config_check() - api_instance = client.CustomObjectsApi(api_config_handler()) - aws = api_instance.list_namespaced_custom_object( - group="workload.codeflare.dev", - version="v1beta1", - namespace=namespace, - plural="appwrappers", - ) - except Exception as e: # pragma: no cover - return _kube_api_error_handling(e, print_error=False) - for aw in aws["items"]: - if aw["metadata"]["name"] == name: - host = aw["spec"]["resources"]["GenericItems"][1]["generictemplate"][ - "spec" - ]["rules"][0]["host"] - - dot_index = host.find(".") - if dot_index != -1: - domain_name = host[dot_index + 1 :] - return domain_name - else: - print("Host is not configured correctly.") - return None - - # Cant test this until get_current_namespace is fixed and placed in this function over using `self` def _get_ingress_domain(self): # pragma: no cover - try: - config_check() - api_client = client.NetworkingV1Api(api_config_handler()) - if self.config.namespace != None: - namespace = self.config.namespace - else: - namespace = get_current_namespace() - ingresses = api_client.list_namespaced_ingress(namespace) - except Exception as e: # pragma: no cover - return _kube_api_error_handling(e) + config_check() + + if self.config.namespace != None: + namespace = self.config.namespace + else: + namespace = get_current_namespace() domain = None - for ingress in ingresses.items: - if ingress.spec.rules[0].http.paths[0].backend.service.port.number == 10001: - domain = ingress.spec.rules[0].host + + if is_openshift_cluster(): + try: + api_instance = client.CustomObjectsApi(api_config_handler()) + + routes = api_instance.list_namespaced_custom_object( + group="route.openshift.io", + version="v1", + namespace=namespace, + plural="routes", + ) + except Exception as e: # pragma: no cover + return _kube_api_error_handling(e) + + for route in routes["items"]: + if route["spec"]["port"]["targetPort"] == "client": + domain = route["spec"]["host"] + else: + try: + api_client = client.NetworkingV1Api(api_config_handler()) + ingresses = api_client.list_namespaced_ingress(namespace) + except Exception as e: # pragma: no cover + return _kube_api_error_handling(e) + + for ingress in ingresses.items: + if ingress.spec.rules[0].http.paths[0].backend.service.port.number == 10001: + domain = ingress.spec.rules[0].host return domain diff --git a/src/codeflare_sdk/cluster/config.py b/src/codeflare_sdk/cluster/config.py index f36a6c644..0311d0e32 100644 --- a/src/codeflare_sdk/cluster/config.py +++ b/src/codeflare_sdk/cluster/config.py @@ -54,4 +54,4 @@ class ClusterConfiguration: dispatch_priority: str = None openshift_oauth: bool = False # NOTE: to use the user must have permission to create a RoleBinding for system:auth-delegator ingress_options: dict = field(default_factory=dict) - domain_name: str = None + ingress_domain: str = None diff --git a/src/codeflare_sdk/templates/base-template.yaml b/src/codeflare_sdk/templates/base-template.yaml index c98f53c99..8e6fd0e9e 100644 --- a/src/codeflare_sdk/templates/base-template.yaml +++ b/src/codeflare_sdk/templates/base-template.yaml @@ -289,7 +289,7 @@ spec: apiVersion: networking.k8s.io/v1 kind: Ingress metadata: - name: ray-dashboard-raytest + name: ray-dashboard-deployment-ingress namespace: default annotations: annotations-example:annotations-example @@ -306,12 +306,28 @@ spec: pathType: Prefix path: / host: ray-dashboard-raytest. + - replicas: 1 + generictemplate: + kind: Route + apiVersion: route.openshift.io/v1 + metadata: + name: ray-dashboard-deployment-route + namespace: default + labels: + # allows me to return name of service that Ray operator creates + odh-ray-cluster-service: deployment-name-head-svc + spec: + to: + kind: Service + name: deployment-name-head-svc + port: + targetPort: dashboard - replicas: 1 generictemplate: apiVersion: networking.k8s.io/v1 kind: Ingress metadata: - name: rayclient-deployment-name + name: rayclient-deployment-ingress namespace: default annotations: annotations-example:annotations-example @@ -330,6 +346,24 @@ spec: path: '' pathType: ImplementationSpecific host: rayclient-raytest. + - replicas: 1 + generictemplate: + apiVersion: route.openshift.io/v1 + kind: Route + metadata: + name: rayclient-deployment-route + namespace: default + labels: + # allows me to return name of service that Ray operator creates + odh-ray-cluster-service: deployment-name-head-svc + spec: + port: + targetPort: client + tls: + termination: passthrough + to: + kind: Service + name: deployment-name-head-svc - replicas: 1 generictemplate: apiVersion: v1 diff --git a/src/codeflare_sdk/utils/generate_yaml.py b/src/codeflare_sdk/utils/generate_yaml.py index cd5f29cac..d09e4911e 100755 --- a/src/codeflare_sdk/utils/generate_yaml.py +++ b/src/codeflare_sdk/utils/generate_yaml.py @@ -69,8 +69,49 @@ def is_openshift_cluster(): return False +def update_dashboard_route(route_item, cluster_name, namespace): + metadata = route_item.get("generictemplate", {}).get("metadata") + metadata["name"] = gen_dashboard_ingress_name(cluster_name) + metadata["namespace"] = namespace + metadata["labels"]["odh-ray-cluster-service"] = f"{cluster_name}-head-svc" + spec = route_item.get("generictemplate", {}).get("spec") + spec["to"]["name"] = f"{cluster_name}-head-svc" + + +# ToDo: refactor the update_x_route() functions +def update_rayclient_route(route_item, cluster_name, namespace): + metadata = route_item.get("generictemplate", {}).get("metadata") + metadata["name"] = f"rayclient-{cluster_name}" + metadata["namespace"] = namespace + metadata["labels"]["odh-ray-cluster-service"] = f"{cluster_name}-head-svc" + spec = route_item.get("generictemplate", {}).get("spec") + spec["to"]["name"] = f"{cluster_name}-head-svc" + + +def update_dashboard_exposure( + ingress_item, route_item, cluster_name, namespace, ingress_options, ingress_domain +): + if is_openshift_cluster(): + update_dashboard_route(route_item, cluster_name, namespace) + else: + update_dashboard_ingress( + ingress_item, cluster_name, namespace, ingress_options, ingress_domain + ) + + +def update_rayclient_exposure( + client_route_item, client_ingress_item, cluster_name, namespace, ingress_domain +): + if is_openshift_cluster(): + update_rayclient_route(client_route_item, cluster_name, namespace) + else: + update_rayclient_ingress( + client_ingress_item, cluster_name, namespace, ingress_domain + ) + + def update_dashboard_ingress( - ingress_item, cluster_name, namespace, ingress_options, domain_name + ingress_item, cluster_name, namespace, ingress_options, ingress_domain ): # pragma: no cover metadata = ingress_item.get("generictemplate", {}).get("metadata") spec = ingress_item.get("generictemplate", {}).get("spec") @@ -118,26 +159,24 @@ def update_dashboard_ingress( "name" ] = f"{cluster_name}-head-svc" else: - if is_openshift_cluster(): - spec["ingressClassName"] = "openshift-default" - else: - spec["ingressClassName"] = "nginx" - - metadata["name"] = f"ray-dashboard-{cluster_name}" + spec["ingressClassName"] = "nginx" + metadata["name"] = gen_dashboard_ingress_name(cluster_name) metadata["namespace"] = namespace spec["rules"][0]["http"]["paths"][0]["backend"]["service"][ "name" ] = f"{cluster_name}-head-svc" - if domain_name is None: - raise ValueError("domain_name is invalid. Please specify an ingress domain") + if ingress_domain is None: + raise ValueError( + "ingress_domain is invalid. Please specify an ingress domain" + ) else: - domain = domain_name + domain = ingress_domain del metadata["annotations"] spec["rules"][0]["host"] = f"ray-dashboard-{cluster_name}-{namespace}.{domain}" def update_rayclient_ingress( - ingress_item, cluster_name, namespace, domain_name + ingress_item, cluster_name, namespace, ingress_domain ): # pragma: no cover metadata = ingress_item.get("generictemplate", {}).get("metadata") spec = ingress_item.get("generictemplate", {}).get("spec") @@ -149,27 +188,19 @@ def update_rayclient_ingress( "name" ] = f"{cluster_name}-head-svc" - if domain_name is not None: - if is_openshift_cluster(): - ingressClassName = "openshift-default" - annotations = { - "nginx.ingress.kubernetes.io/rewrite-target": "/", - "nginx.ingress.kubernetes.io/ssl-redirect": "true", - "route.openshift.io/termination": "passthrough", - } - else: - ingressClassName = "nginx" - annotations = { - "nginx.ingress.kubernetes.io/rewrite-target": "/", - "nginx.ingress.kubernetes.io/ssl-redirect": "true", - "nginx.ingress.kubernetes.io/ssl-passthrough": "true", - } + if ingress_domain is not None: + ingressClassName = "nginx" + annotations = { + "nginx.ingress.kubernetes.io/rewrite-target": "/", + "nginx.ingress.kubernetes.io/ssl-redirect": "true", + "nginx.ingress.kubernetes.io/ssl-passthrough": "true", + } else: - raise ValueError("domain_name is invalid. Please specify a domain") + raise ValueError("ingress_domain is invalid. Please specify a domain") metadata["annotations"] = annotations spec["ingressClassName"] = ingressClassName - spec["rules"][0]["host"] = f"rayclient-{cluster_name}-{namespace}.{domain_name}" + spec["rules"][0]["host"] = f"rayclient-{cluster_name}-{namespace}.{ingress_domain}" def update_names(yaml, item, appwrapper_name, cluster_name, namespace): @@ -372,9 +403,10 @@ def update_ca_secret(ca_secret_item, cluster_name, namespace): data["ca.key"], data["ca.crt"] = generate_cert.generate_ca_cert(365) -def enable_local_interactive(resources, cluster_name, namespace, domain_name): - rayclient_ingress_item = resources["resources"].get("GenericItems")[2] - ca_secret_item = resources["resources"].get("GenericItems")[3] +def enable_local_interactive(resources, cluster_name, namespace, ingress_domain): + rayclient_ingress_item = resources["resources"].get("GenericItems")[3] + rayclient_route_item = resources["resources"].get("GenericItems")[4] + ca_secret_item = resources["resources"].get("GenericItems")[5] item = resources["resources"].get("GenericItems")[0] update_ca_secret(ca_secret_item, cluster_name, namespace) # update_ca_secret_volumes @@ -398,15 +430,21 @@ def enable_local_interactive(resources, cluster_name, namespace, domain_name): command = command.replace("deployment-name", cluster_name) - if domain_name is None: + if ingress_domain is None: raise ValueError( - "domain_name is invalid. For Kubernetes Clusters please specify an ingress domain" + "ingress_domain is invalid. For creating the client route/ingress please specify an ingress domain" ) else: - domain = domain_name + domain = ingress_domain command = command.replace("server-name", domain) - update_rayclient_ingress(rayclient_ingress_item, cluster_name, namespace, domain) + update_rayclient_exposure( + rayclient_route_item, + rayclient_ingress_item, + cluster_name, + namespace, + ingress_domain, + ) item["generictemplate"]["spec"]["headGroupSpec"]["template"]["spec"][ "initContainers" @@ -449,7 +487,9 @@ def disable_raycluster_tls(resources): updated_items = [] for i in resources["GenericItems"][:]: - if "rayclient-deployment-name" in i["generictemplate"]["metadata"]["name"]: + if "rayclient-deployment-ingress" in i["generictemplate"]["metadata"]["name"]: + continue + if "rayclient-deployment-route" in i["generictemplate"]["metadata"]["name"]: continue if "ca-secret-deployment-name" in i["generictemplate"]["metadata"]["name"]: continue @@ -458,6 +498,26 @@ def disable_raycluster_tls(resources): resources["GenericItems"] = updated_items +def delete_route_or_ingress(resources): + if is_openshift_cluster(): + client_to_remove_name = "rayclient-deployment-ingress" + dashboard_to_remove_name = "ray-dashboard-deployment-ingress" + else: + client_to_remove_name = "rayclient-deployment-route" + dashboard_to_remove_name = "ray-dashboard-deployment-route" + + updated_items = [] + for i in resources["GenericItems"][:]: + if dashboard_to_remove_name in i["generictemplate"]["metadata"]["name"]: + continue + elif client_to_remove_name in i["generictemplate"]["metadata"]["name"]: + continue + + updated_items.append(i) + + resources["GenericItems"] = updated_items + + def write_user_appwrapper(user_yaml, output_file_name): # Create the directory if it doesn't exist directory_path = os.path.dirname(output_file_name) @@ -583,7 +643,7 @@ def generate_appwrapper( dispatch_priority: str, priority_val: int, openshift_oauth: bool, - domain_name: str, + ingress_domain: str, ingress_options: dict, ): user_yaml = read_template(template) @@ -591,6 +651,7 @@ def generate_appwrapper( resources = user_yaml.get("spec", "resources") item = resources["resources"].get("GenericItems")[0] ingress_item = resources["resources"].get("GenericItems")[1] + route_item = resources["resources"].get("GenericItems")[2] update_names(user_yaml, item, appwrapper_name, cluster_name, namespace) update_labels(user_yaml, instascale, instance_types) update_priority(user_yaml, item, dispatch_priority, priority_val) @@ -623,14 +684,23 @@ def generate_appwrapper( head_memory, head_gpus, ) - update_dashboard_ingress( - ingress_item, cluster_name, namespace, ingress_options, domain_name + update_dashboard_exposure( + ingress_item, + route_item, + cluster_name, + namespace, + ingress_options, + ingress_domain, ) if local_interactive: - enable_local_interactive(resources, cluster_name, namespace, domain_name) + enable_local_interactive( + resources["resources"], cluster_name, namespace, ingress_domain + ) else: disable_raycluster_tls(resources["resources"]) + delete_route_or_ingress(resources["resources"]) + if openshift_oauth: enable_openshift_oauth(user_yaml, cluster_name, namespace) diff --git a/tests/unit_test.py b/tests/unit_test.py index 574898a1f..50b0b4d9b 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -261,26 +261,6 @@ def test_config_creation(): assert config.local_interactive == False -def sample_no_routes(): - api_versions = client.V1APIGroupList( - api_version="v1", - groups=[ - { - "name": "route.openshift.io", - "preferred_version": { - "group_version": "route.openshift.io/v1", - "version": "v1", - }, - "versions": [ - {"group_version": "route.openshift.io/v1", "version": "v1"} - ], - } - ], - ) - - return api_versions - - def test_cluster_creation(mocker): mocker.patch("kubernetes.client.ApisApi.get_api_versions") cluster = createClusterWithConfig(mocker) @@ -358,7 +338,7 @@ def test_default_cluster_creation(mocker): default_config = ClusterConfiguration( name="unit-test-default-cluster", image="quay.io/project-codeflare/ray:latest-py39-cu118", - domain_name="apps.cluster.awsroute.org", + ingress_domain="apps.cluster.awsroute.org", ) cluster = Cluster(default_config) @@ -403,6 +383,14 @@ def arg_check_apply_effect(group, version, namespace, plural, body, *args): for resource in yamls: if resource["kind"] == "RayCluster": assert body == resource + elif plural == "ingresses": + assert group == "networking.k8s.io" + assert version == "v1" + with open(f"{aw_dir}unit-test-cluster-ray.yaml") as f: + yamls = yaml.load_all(f, Loader=yaml.FullLoader) + for resource in yamls: + if resource["kind"] == "Ingress": + assert body == resource elif plural == "routes": assert group == "route.openshift.io" assert version == "v1" @@ -787,7 +775,7 @@ def test_ray_details(mocker, capsys): name="raytest2", namespace="ns", image="quay.io/project-codeflare/ray:latest-py39-cu118", - domain_name="apps.cluster.awsroute.org", + ingress_domain="apps.cluster.awsroute.org", ) ) captured = capsys.readouterr() @@ -1799,11 +1787,9 @@ def test_get_cluster(mocker): "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", side_effect=get_ray_obj, ) - mocker.patch( - "codeflare_sdk.cluster.cluster._extract_domain_name", - return_value="apps.cluster.awsroute.org", + cluster = get_cluster( + cluster_name="quicktest", ingress_domain="apps.cluster.awsroute.org" ) - cluster = get_cluster("quicktest") cluster_config = cluster.config assert cluster_config.name == "quicktest" and cluster_config.namespace == "ns" assert ( @@ -1814,7 +1800,7 @@ def test_get_cluster(mocker): assert cluster_config.min_memory == 2 and cluster_config.max_memory == 2 assert cluster_config.num_gpus == 0 assert cluster_config.instascale - assert cluster_config.domain_name == "apps.cluster.awsroute.org" + assert cluster_config.ingress_domain == "apps.cluster.awsroute.org" assert ( cluster_config.image == "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103" @@ -1926,7 +1912,7 @@ def test_cluster_status(mocker): name="test", namespace="ns", image="quay.io/project-codeflare/ray:latest-py39-cu118", - domain_name="apps.cluster.awsroute.org", + ingress_domain="apps.cluster.awsroute.org", ) ) mocker.patch("codeflare_sdk.cluster.cluster._app_wrapper_status", return_value=None) @@ -2020,7 +2006,7 @@ def test_wait_ready(mocker, capsys): name="test", namespace="ns", image="quay.io/project-codeflare/ray:latest-py39-cu118", - domain_name="apps.cluster.awsroute.org", + ingress_domain="apps.cluster.awsroute.org", ) ) try: @@ -2566,6 +2552,7 @@ def test_enable_local_interactive(mocker): cluster_name = "test-enable-local" namespace = "default" ingress_domain = "mytest.domain" + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch( "codeflare_sdk.utils.generate_yaml.is_openshift_cluster", return_value=False ) @@ -2599,7 +2586,7 @@ def test_enable_local_interactive(mocker): worker_group_spec = aw_spec["resources"]["GenericItems"][0]["generictemplate"][ "spec" ]["workerGroupSpecs"] - ca_secret = aw_spec["resources"]["GenericItems"][3]["generictemplate"] + ca_secret = aw_spec["resources"]["GenericItems"][5]["generictemplate"] # At a minimal, make sure the following items are presented in the appwrapper spec.resources. # 1. headgroup has the initContainers command to generated TLS cert from the mounted CA cert. # Note: In this particular command, the DNS.5 in [alt_name] must match the exposed local_client_url: rayclient-{cluster_name}.{namespace}.{ingress_domain} @@ -2655,7 +2642,7 @@ def test_enable_local_interactive(mocker): assert ca_secret["metadata"]["namespace"] == namespace # 5. Rayclient ingress - Kind - rayclient_ingress = aw_spec["resources"]["GenericItems"][2]["generictemplate"] + rayclient_ingress = aw_spec["resources"]["GenericItems"][3]["generictemplate"] paths = [ { "backend": { @@ -2681,47 +2668,6 @@ def test_enable_local_interactive(mocker): "host": f"rayclient-{cluster_name}-{namespace}.{ingress_domain}", "http": {"paths": paths}, } - # 5.1 Rayclient ingress - OCP - user_yaml = read_template(template) - aw_spec = user_yaml.get("spec", None) - cluster_name = "test-ocp-enable-local" - namespace = "default" - ocp_cluster_domain = {"spec": {"domain": "mytest.ocp.domain"}} - ingress_domain = ocp_cluster_domain["spec"]["domain"] - mocker.patch( - "codeflare_sdk.utils.generate_yaml.is_openshift_cluster", return_value=True - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value=ocp_cluster_domain, - ) - paths = [ - { - "backend": { - "service": { - "name": f"{cluster_name}-head-svc", - "port": {"number": 10001}, - } - }, - "path": "", - "pathType": "ImplementationSpecific", - } - ] - enable_local_interactive(aw_spec, cluster_name, namespace, ingress_domain) - rayclient_ocp_ingress = aw_spec["resources"]["GenericItems"][2]["generictemplate"] - assert rayclient_ocp_ingress["kind"] == "Ingress" - assert rayclient_ocp_ingress["metadata"]["annotations"] == { - "nginx.ingress.kubernetes.io/rewrite-target": "/", - "nginx.ingress.kubernetes.io/ssl-redirect": "true", - "route.openshift.io/termination": "passthrough", - } - assert rayclient_ocp_ingress["metadata"]["name"] == f"rayclient-{cluster_name}" - assert rayclient_ocp_ingress["metadata"]["namespace"] == namespace - assert rayclient_ocp_ingress["spec"]["ingressClassName"] == "openshift-default" - assert rayclient_ocp_ingress["spec"]["rules"][0] == { - "host": f"rayclient-{cluster_name}-{namespace}.{ingress_domain}", - "http": {"paths": paths}, - } def test_create_openshift_oauth(mocker: MockerFixture): @@ -2851,7 +2797,7 @@ def test_gen_app_wrapper_with_oauth(mocker: MockerFixture): "test_cluster", openshift_oauth=True, image="quay.io/project-codeflare/ray:latest-py39-cu118", - domain_name="apps.cluster.awsroute.org", + ingress_domain="apps.cluster.awsroute.org", ) ) user_yaml = write_user_appwrapper.call_args.args[0] diff --git a/tests/unit_test_support.py b/tests/unit_test_support.py index 801c988c3..85f1a76d3 100644 --- a/tests/unit_test_support.py +++ b/tests/unit_test_support.py @@ -46,7 +46,7 @@ def createClusterConfig(): instascale=True, machine_types=["cpu.small", "gpu.large"], image_pull_secrets=["unit-test-pull-secret"], - domain_name="apps.cluster.awsroute.org", + ingress_domain="apps.cluster.awsroute.org", image="quay.io/project-codeflare/ray:latest-py39-cu118", ) return config From 2e8801324e57830ab52ff3f9db34a8e0605a2f11 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Tue, 28 Nov 2023 16:41:06 +0000 Subject: [PATCH 4/5] updated resources reference in local_interactive --- src/codeflare_sdk/utils/generate_yaml.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/codeflare_sdk/utils/generate_yaml.py b/src/codeflare_sdk/utils/generate_yaml.py index d09e4911e..95c17cc28 100755 --- a/src/codeflare_sdk/utils/generate_yaml.py +++ b/src/codeflare_sdk/utils/generate_yaml.py @@ -693,9 +693,7 @@ def generate_appwrapper( ingress_domain, ) if local_interactive: - enable_local_interactive( - resources["resources"], cluster_name, namespace, ingress_domain - ) + enable_local_interactive(resources, cluster_name, namespace, ingress_domain) else: disable_raycluster_tls(resources["resources"]) From 6b5e93f3a12a03a86f7871bacbf3d174d20fd88c Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Wed, 20 Dec 2023 15:14:00 +0000 Subject: [PATCH 5/5] Removed get cluster fix --- src/codeflare_sdk/cluster/cluster.py | 9 +++------ tests/unit_test.py | 7 ++++--- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/codeflare_sdk/cluster/cluster.py b/src/codeflare_sdk/cluster/cluster.py index 2493df030..11cf5fdb9 100644 --- a/src/codeflare_sdk/cluster/cluster.py +++ b/src/codeflare_sdk/cluster/cluster.py @@ -492,7 +492,7 @@ def torchx_config( to_return["requirements"] = requirements return to_return - def from_k8_cluster_object(rc, mcad=True, ingress_domain=None): + def from_k8_cluster_object(rc, mcad=True): machine_types = ( rc["metadata"]["labels"]["orderedinstance"].split("_") if "orderedinstance" in rc["metadata"]["labels"] @@ -532,7 +532,6 @@ def from_k8_cluster_object(rc, mcad=True, ingress_domain=None): ]["image"], local_interactive=local_interactive, mcad=mcad, - ingress_domain=ingress_domain, ) return Cluster(cluster_config) @@ -670,7 +669,7 @@ def get_current_namespace(): # pragma: no cover return None -def get_cluster(cluster_name: str, namespace: str = "default", ingress_domain=None): +def get_cluster(cluster_name: str, namespace: str = "default"): try: config_check() api_instance = client.CustomObjectsApi(api_config_handler()) @@ -686,9 +685,7 @@ def get_cluster(cluster_name: str, namespace: str = "default", ingress_domain=No for rc in rcs["items"]: if rc["metadata"]["name"] == cluster_name: mcad = _check_aw_exists(cluster_name, namespace) - return Cluster.from_k8_cluster_object( - rc, mcad=mcad, ingress_domain=ingress_domain - ) + return Cluster.from_k8_cluster_object(rc, mcad=mcad) raise FileNotFoundError( f"Cluster {cluster_name} is not found in {namespace} namespace" ) diff --git a/tests/unit_test.py b/tests/unit_test.py index 50b0b4d9b..7ad0d08d7 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -1787,9 +1787,11 @@ def test_get_cluster(mocker): "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", side_effect=get_ray_obj, ) - cluster = get_cluster( - cluster_name="quicktest", ingress_domain="apps.cluster.awsroute.org" + mocker.patch( + "codeflare_sdk.utils.generate_yaml.is_openshift_cluster", + return_value=True, ) + cluster = get_cluster(cluster_name="quicktest") cluster_config = cluster.config assert cluster_config.name == "quicktest" and cluster_config.namespace == "ns" assert ( @@ -1800,7 +1802,6 @@ def test_get_cluster(mocker): assert cluster_config.min_memory == 2 and cluster_config.max_memory == 2 assert cluster_config.num_gpus == 0 assert cluster_config.instascale - assert cluster_config.ingress_domain == "apps.cluster.awsroute.org" assert ( cluster_config.image == "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103"