From 0dae82ad7ce8c8ee77871e857de98ae99c36c745 Mon Sep 17 00:00:00 2001 From: Paloma Rebuelta <82956400+paloma-rebuelta@users.noreply.github.com> Date: Fri, 12 Jan 2024 16:40:34 +0000 Subject: [PATCH] Editing files to add resource requests and limits (#3202) Signed-off-by: Author Name paloma.rebuelta@6point6.co.uk Signed-off-by: Paloma Rebuelta --- docs/source/getting_started/changelog.md | 5 ++- elyra/pipeline/airflow/processor_airflow.py | 2 ++ elyra/pipeline/kfp/processor_kfp.py | 5 +++ elyra/pipeline/pipeline.py | 34 +++++++++++++++++-- elyra/pipeline/validation.py | 10 ++++-- .../templates/airflow/airflow_template.jinja2 | 8 ++++- .../generic_properties_template.jinja2 | 20 ++++++++--- .../kubeflow/v1/python_dsl_template.jinja2 | 6 ++++ elyra/tests/pipeline/kfp/conftest.py | 21 +++++++++++- .../tests/pipeline/kfp/test_processor_kfp.py | 6 +++- .../pipeline/test_pipeline_constructor.py | 34 +++++++++++++++++++ .../src/FileSubmissionDialog.tsx | 32 ++++++++++++++--- .../src/PipelineEditorWidget.tsx | 6 ++++ .../src/SubmitFileButtonExtension.tsx | 6 +++- packages/pipeline-editor/src/utils.ts | 6 +++- packages/pipeline-editor/style/index.css | 6 ++++ 16 files changed, 188 insertions(+), 19 deletions(-) diff --git a/docs/source/getting_started/changelog.md b/docs/source/getting_started/changelog.md index 0e4c77a9b..eb8045b72 100644 --- a/docs/source/getting_started/changelog.md +++ b/docs/source/getting_started/changelog.md @@ -2,8 +2,11 @@ A summary of new feature highlights is located on the [GitHub release page](https://github.com/elyra-ai/elyra/releases). -## Release 3.15.0 - 03/28/2023 +## Release 4.0.0 + +- Add functionality to select resource limits from the GUI [#3202](https://github.com/elyra-ai/elyra/pull/3202) +## Release 3.15.0 - 03/28/2023 ## Release 3.15.0rc0 - 03/28/2023 diff --git a/elyra/pipeline/airflow/processor_airflow.py b/elyra/pipeline/airflow/processor_airflow.py index b81d8d42a..c7449c6f8 100644 --- a/elyra/pipeline/airflow/processor_airflow.py +++ b/elyra/pipeline/airflow/processor_airflow.py @@ -342,6 +342,8 @@ def _cc_pipeline(self, pipeline: Pipeline, pipeline_name: str, pipeline_instance "image_pull_policy": image_pull_policy, "cpu_request": operation.cpu, "mem_request": operation.memory, + "cpu_limit": operation.cpu_limit, + "memory_limit": operation.memory_limit, "gpu_limit": operation.gpu, "operator_source": operation.filename, } diff --git a/elyra/pipeline/kfp/processor_kfp.py b/elyra/pipeline/kfp/processor_kfp.py index 7277e770a..065e2e9eb 100644 --- a/elyra/pipeline/kfp/processor_kfp.py +++ b/elyra/pipeline/kfp/processor_kfp.py @@ -843,6 +843,11 @@ def _generate_workflow_tasks( "size": operation.memory, "units": "G", } + workflow_task["task_modifiers"]["cpu_limit"] = operation.cpu_limit + workflow_task["task_modifiers"]["memory_limit"] = { + "size": operation.memory_limit, + "units": "G", + } gpu_vendor = "nvidia.com/gpu" if operation.gpu_vendor: gpu_vendor = operation.gpu_vendor diff --git a/elyra/pipeline/pipeline.py b/elyra/pipeline/pipeline.py index 9a1b3877b..2fa57b6e0 100644 --- a/elyra/pipeline/pipeline.py +++ b/elyra/pipeline/pipeline.py @@ -246,6 +246,8 @@ def __init__( outputs: List of files produced by this operation to be included in a child operation(s) cpu: number of cpus requested to run the operation memory: amount of memory requested to run the operation (in Gi) + cpu_limit: limit of number of cpus to run the operation + memory_limit: limit of amount of memory to run the operation (in Gi) gpu: number of gpus requested to run the operation parameters: a list of names of pipeline parameters that should be passed to this operation gpu_vendor: gpu resource type, eg. nvidia.com/gpu, amd.com/gpu etc. @@ -261,11 +263,29 @@ def __init__( if not component_props.get("runtime_image"): raise ValueError("Invalid pipeline operation: Missing field 'operation runtime image'.") if component_props.get("cpu") and not self._validate_range(component_props.get("cpu"), min_value=1): - raise ValueError("Invalid pipeline operation: CPU must be a positive value or None") + raise ValueError("Invalid pipeline operation: CPU request must be a positive value or None") + if component_props.get("cpu_limit") and not self._validate_range(component_props.get("cpu_limit"), min_value=1): + raise ValueError("Invalid pipeline operation: CPU limit must be a positive value or None") if component_props.get("gpu") and not self._validate_range(component_props.get("gpu"), min_value=0): raise ValueError("Invalid pipeline operation: GPU must be a positive value or None") if component_props.get("memory") and not self._validate_range(component_props.get("memory"), min_value=1): - raise ValueError("Invalid pipeline operation: Memory must be a positive value or None") + raise ValueError("Invalid pipeline operation: Memory request must be a positive value or None") + if component_props.get("memory_limit") and not self._validate_range( + component_props.get("memory_limit"), min_value=1 + ): + raise ValueError("Invalid pipeline operation: Memory limit must be a positive value or None") + if ( + component_props.get("memory_limit") + and component_props.get("memory") + and component_props.get("memory_limit") < component_props.get("memory") + ): + raise ValueError("Invalid pipeline operation: Memory limit must be equal or larger than memory request") + if ( + component_props.get("cpu_limit") + and component_props.get("cpu") + and component_props.get("cpu_limit") < component_props.get("cpu") + ): + raise ValueError("Invalid pipeline operation: CPU limit must be equal or larger than CPU request") # Re-build object to include default values self._component_props["filename"] = component_props.get("filename") @@ -273,7 +293,9 @@ def __init__( self._component_props["dependencies"] = Operation._scrub_list(component_props.get("dependencies", [])) self._component_props["include_subdirectories"] = component_props.get("include_subdirectories", False) self._component_props["cpu"] = component_props.get("cpu") + self._component_props["cpu_limit"] = component_props.get("cpu_limit") self._component_props["memory"] = component_props.get("memory") + self._component_props["memory_limit"] = component_props.get("memory_limit") self._component_props["gpu"] = component_props.get("gpu") self._component_props["gpu_vendor"] = component_props.get("gpu_vendor") self._component_props["parameters"] = component_props.get(PIPELINE_PARAMETERS, []) @@ -316,10 +338,18 @@ def env_vars(self) -> ElyraPropertyList[EnvironmentVariable]: def cpu(self) -> Optional[str]: return self._component_props.get("cpu") + @property + def cpu_limit(self) -> Optional[str]: + return self._component_props.get("cpu_limit") + @property def memory(self) -> Optional[str]: return self._component_props.get("memory") + @property + def memory_limit(self) -> Optional[str]: + return self._component_props.get("memory_limit") + @property def gpu(self) -> Optional[str]: return self._component_props.get("gpu") diff --git a/elyra/pipeline/validation.py b/elyra/pipeline/validation.py index d29682172..a3d66eed6 100644 --- a/elyra/pipeline/validation.py +++ b/elyra/pipeline/validation.py @@ -431,7 +431,13 @@ async def _validate_generic_node_properties( # If not running locally, we check resource and image name if pipeline_runtime != "local": self._validate_container_image_name(node.id, node_label, image_name, response=response) - for resource_name in ["cpu", "gpu", "memory"]: + for resource_name in [ + "cpu", + "gpu", + "memory", + "cpu_limit", + "memory_limit", + ]: resource_value = node.get_component_parameter(resource_name) if resource_value: self._validate_resource_value( @@ -638,7 +644,7 @@ def _validate_resource_value( Validates the value for hardware resources requested :param node_id: the unique ID of the node :param node_label: the given node name or user customized name/label of the node - :param resource_name: the name of the resource e.g. cpu, gpu. memory + :param resource_name: the name of the resource e.g. cpu, cpu_limit, gpu, memory, memory_limit :param resource_value: the value of the resource :param response: ValidationResponse containing the issue list to be updated """ diff --git a/elyra/templates/airflow/airflow_template.jinja2 b/elyra/templates/airflow/airflow_template.jinja2 index 2f08b929c..fc68a55f6 100644 --- a/elyra/templates/airflow/airflow_template.jinja2 +++ b/elyra/templates/airflow/airflow_template.jinja2 @@ -47,7 +47,7 @@ op_{{ operation.id|regex_replace }} = KubernetesPodOperator(name='{{ operation.n arguments=["{{ operation.argument_list }}"], task_id='{{ operation.notebook|regex_replace }}', env_vars={{ operation.pipeline_envs }}, - {% if operation.cpu_request or operation.mem_request or operation.gpu_limit %} + {% if operation.cpu_request or operation.mem_request or operation.cpu_limit or operation.memory_limit or operation.gpu_limit %} resources = { {% if operation.cpu_request %} 'request_cpu': '{{ operation.cpu_request }}', @@ -55,6 +55,12 @@ op_{{ operation.id|regex_replace }} = KubernetesPodOperator(name='{{ operation.n {% if operation.mem_request %} 'request_memory': '{{ operation.mem_request }}G', {% endif %} + {% if operation.cpu_limit %} + 'limit_cpu': '{{ operation.cpu_limit }}', + {% endif %} + {% if operation.memory_limit %} + 'limit_memory': '{{ operation.memory_limit }}G', + {% endif %} {% if operation.gpu_limit %} 'limit_gpu': '{{ operation.gpu_limit }}', {% endif %} diff --git a/elyra/templates/components/generic_properties_template.jinja2 b/elyra/templates/components/generic_properties_template.jinja2 index c500c4d6c..004b6088e 100644 --- a/elyra/templates/components/generic_properties_template.jinja2 +++ b/elyra/templates/components/generic_properties_template.jinja2 @@ -37,14 +37,26 @@ }, "cpu": { "type": "integer", - "title": "CPU", - "description": "For CPU-intensive workloads, you can choose more than 1 CPU (e.g. 1.5).", + "title": "CPU request", + "description": "For CPU-intensive workloads, you can request more than 1 CPU (e.g. 1.5, this is optional).", + "minimum": 0 + }, + "cpu_limit": { + "type": "integer", + "title": "CPU limit", + "description": "The maximum CPU that can be allocated to this node. This should be equal or higher than the request", "minimum": 0 }, "memory": { "type": "integer", - "title": "RAM(GB)", - "description": "The total amount of RAM specified.", + "title": "RAM request (GB)", + "description": "The total amount of RAM requested (optional).", + "minimum": 0 + }, + "memory_limit": { + "type": "integer", + "title": "RAM limit (GB)", + "description": "The maximum amount of RAM allowed. This should be equal or higher than the request", "minimum": 0 }, "gpu": { diff --git a/elyra/templates/kubeflow/v1/python_dsl_template.jinja2 b/elyra/templates/kubeflow/v1/python_dsl_template.jinja2 index 4ff84e884..5f1d0eda3 100644 --- a/elyra/templates/kubeflow/v1/python_dsl_template.jinja2 +++ b/elyra/templates/kubeflow/v1/python_dsl_template.jinja2 @@ -66,6 +66,12 @@ def generated_pipeline( {% if workflow_task.task_modifiers.mem_request and workflow_task.task_modifiers.mem_request.size %} {{ task_name }}.container.set_memory_request(memory="{{ workflow_task.task_modifiers.mem_request.size }}{{ workflow_task.task_modifiers.mem_request.units }}") {% endif %} +{% if workflow_task.task_modifiers.cpu_limit %} + {{ task_name }}.container.set_cpu_limit(cpu="{{ workflow_task.task_modifiers.cpu_limit }}") +{% endif %} +{% if workflow_task.task_modifiers.memory_limit and workflow_task.task_modifiers.memory_limit.size %} + {{ task_name }}.container.set_memory_limit(memory="{{ workflow_task.task_modifiers.memory_limit.size }}{{ workflow_task.task_modifiers.memory_limit.units }}") +{% endif %} {% if workflow_task.task_modifiers.gpu_limit and workflow_task.task_modifiers.gpu_limit.size %} {{ task_name }}.container.add_resource_limit(resource_name="{{ workflow_task.task_modifiers.gpu_limit.vendor }}", value="{{ workflow_task.task_modifiers.gpu_limit.size }}") {% endif %} diff --git a/elyra/tests/pipeline/kfp/conftest.py b/elyra/tests/pipeline/kfp/conftest.py index 401b48760..239c7a78f 100644 --- a/elyra/tests/pipeline/kfp/conftest.py +++ b/elyra/tests/pipeline/kfp/conftest.py @@ -177,7 +177,14 @@ def metadata_dependencies(metadata_managers, request): # Create a Pipeline object from the pipeline file, applying the customization # options customization_options = {} - for supported_option in ["with_cos_object_prefix", "resources_cpu", "resources_gpu", "resources_memory"]: + for supported_option in [ + "with_cos_object_prefix", + "resources_cpu", + "resources_cpu_limit", + "resources_gpu", + "resources_memory", + "resources_memory_limit", + ]: customization_options[supported_option] = request.param.get(supported_option) pipeline_object = get_pipeline_object( pipeline_filename=request.param["pipeline_file"], @@ -361,6 +368,8 @@ def get_pipeline_object( - "resources_cpu" (number, applied to all generic nodes) - "resources_gpu" (number, applied to all generic nodes) - "resources_memory" (number, applied to all generic nodes) + - "resources_cpu_limit" (number, applied to all generic nodes) + - "resources_memory_limit" (number, applied to all generic nodes) """ assert pipeline_filename is not None, "A pipeline filename is required." @@ -409,6 +418,10 @@ def get_pipeline_object( node["app_data"]["component_parameters"]["cpu"] = customization_options["resources_cpu"] else: node["app_data"]["component_parameters"].pop("cpu", None) + if customization_options.get("resources_cpu_limit") is not None: + node["app_data"]["component_parameters"]["cpu_limit"] = customization_options["resources_cpu_limit"] + else: + node["app_data"]["component_parameters"].pop("cpu_limit", None) if customization_options.get("resources_gpu") is not None: node["app_data"]["component_parameters"]["gpu"] = customization_options["resources_gpu"] else: @@ -417,6 +430,12 @@ def get_pipeline_object( node["app_data"]["component_parameters"]["memory"] = customization_options["resources_memory"] else: node["app_data"]["component_parameters"].pop("memory", None) + if customization_options.get("resources_memory_limit") is not None: + node["app_data"]["component_parameters"]["memory_limit"] = customization_options[ + "resources_memory_limit" + ] + else: + node["app_data"]["component_parameters"].pop("memory_limit", None) # Parse JSON and return Pipeline instance return PipelineParser().parse(pipeline_json=pipeline_json) diff --git a/elyra/tests/pipeline/kfp/test_processor_kfp.py b/elyra/tests/pipeline/kfp/test_processor_kfp.py index f57040b78..8cee71fef 100644 --- a/elyra/tests/pipeline/kfp/test_processor_kfp.py +++ b/elyra/tests/pipeline/kfp/test_processor_kfp.py @@ -943,7 +943,7 @@ def test_generate_pipeline_dsl_compile_pipeline_dsl_one_generic_node_pipeline_te op = list(pipeline.operations.values())[0] - if op.gpu or op.cpu or op.memory: + if op.gpu or op.cpu or op.memory or op.cpu_limit or op.memory_limit: assert node_template["container"].get("resources") is not None if op.gpu: assert node_template["container"]["resources"]["limits"]["nvidia.com/gpu"] == str(op.gpu) @@ -951,6 +951,10 @@ def test_generate_pipeline_dsl_compile_pipeline_dsl_one_generic_node_pipeline_te assert node_template["container"]["resources"]["requests"]["cpu"] == str(op.cpu) if op.memory: assert node_template["container"]["resources"]["requests"]["memory"] == f"{op.memory}G" + if op.cpu_limit: + assert node_template["container"]["resources"]["limits"]["cpu"] == str(op.cpu_limit) + if op.memory_limit: + assert node_template["container"]["resources"]["limits"]["memory"] == f"{op.memory_limit}G" @pytest.fixture(autouse=False) diff --git a/elyra/tests/pipeline/test_pipeline_constructor.py b/elyra/tests/pipeline/test_pipeline_constructor.py index c47a907c8..78d7888f1 100644 --- a/elyra/tests/pipeline/test_pipeline_constructor.py +++ b/elyra/tests/pipeline/test_pipeline_constructor.py @@ -494,6 +494,40 @@ def test_fail_creating_operation_with_negative_cpu_resources(): ) +def test_fail_creating_operation_with_more_requests_than_limit_cpu(): + component_parameters = { + "filename": "elyra/pipeline/tests/resources/archive/test.ipynb", + "cpu": "4", + "cpu_limit": "2", + "runtime_image": "tensorflow/tensorflow:latest", + } + with pytest.raises(ValueError): + GenericOperation( + id="test-id", + type="execution-node", + classifier="execute-notebook-node", + name="test", + component_props=component_parameters, + ) + + +def test_fail_creating_operation_with_more_requests_than_limit_memory(): + component_parameters = { + "filename": "elyra/pipeline/tests/resources/archive/test.ipynb", + "memory": "4", + "memory_limit": "2", + "runtime_image": "tensorflow/tensorflow:latest", + } + with pytest.raises(ValueError): + GenericOperation( + id="test-id", + type="execution-node", + classifier="execute-notebook-node", + name="test", + component_props=component_parameters, + ) + + def test_fail_creating_operation_with_0_memory_resources(): component_parameters = { "filename": "elyra/pipeline/tests/resources/archive/test.ipynb", diff --git a/packages/pipeline-editor/src/FileSubmissionDialog.tsx b/packages/pipeline-editor/src/FileSubmissionDialog.tsx index 764cfbb73..1e6b94f1b 100644 --- a/packages/pipeline-editor/src/FileSubmissionDialog.tsx +++ b/packages/pipeline-editor/src/FileSubmissionDialog.tsx @@ -86,13 +86,24 @@ export const FileSubmissionDialog: React.FC = ({
- +
- For CPU-intensive workloads, you can choose more than 1 CPU (e.g. - 1.5). + For CPU-intensive workloads, you can request more than 1 CPU (e.g. + 1.5, this is optional).
+
+ +
+ The maximum CPU that can be allocated to this node. This should be + equal or higher than the request +
+ +
@@ -102,15 +113,26 @@ export const FileSubmissionDialog: React.FC = ({
- +
- The total amount of RAM specified. + The total amount of RAM requested (optional).
+
+ +
+ The maximum amount of RAM allowed. This should be equal or higher + than the request +
+ +

= ({ if (node.app_data.component_parameters.cpu === null) { delete node.app_data.component_parameters.cpu; } + if (node.app_data.component_parameters.cpu_limit === null) { + delete node.app_data.component_parameters.cpu_limit; + } if (node.app_data.component_parameters.memory === null) { delete node.app_data.component_parameters.memory; } + if (node.app_data.component_parameters.memory_limit === null) { + delete node.app_data.component_parameters.memory_limit; + } if (node.app_data.component_parameters.gpu === null) { delete node.app_data.component_parameters.gpu; } diff --git a/packages/pipeline-editor/src/SubmitFileButtonExtension.tsx b/packages/pipeline-editor/src/SubmitFileButtonExtension.tsx index 5f3097938..71c2ad926 100644 --- a/packages/pipeline-editor/src/SubmitFileButtonExtension.tsx +++ b/packages/pipeline-editor/src/SubmitFileButtonExtension.tsx @@ -128,8 +128,10 @@ export class SubmitFileButtonExtension< runtime_config, framework, cpu, + cpu_limit, gpu, memory, + memory_limit, dependency_include, dependencies, ...envObject @@ -145,8 +147,10 @@ export class SubmitFileButtonExtension< dependency_include ? dependencies.split(',') : undefined, envObject, cpu, + cpu_limit, gpu, - memory + memory, + memory_limit ); PipelineService.submitPipeline( diff --git a/packages/pipeline-editor/src/utils.ts b/packages/pipeline-editor/src/utils.ts index 30a4bded7..7eb04ae2e 100644 --- a/packages/pipeline-editor/src/utils.ts +++ b/packages/pipeline-editor/src/utils.ts @@ -37,8 +37,10 @@ export default class Utils { dependencies: string[] | undefined, envObject: { [key: string]: string }, cpu?: number, + cpu_limit?: number, gpu?: number, - memory?: number + memory?: number, + memory_limit?: number ): any { const generated_uuid = uuid4(); @@ -71,8 +73,10 @@ export default class Utils { env_vars: envVars, dependencies, cpu, + cpu_limit, gpu, memory, + memory_limit, include_subdirectories: false }, ui_data: { diff --git a/packages/pipeline-editor/style/index.css b/packages/pipeline-editor/style/index.css index 579764a9a..a184ef98c 100644 --- a/packages/pipeline-editor/style/index.css +++ b/packages/pipeline-editor/style/index.css @@ -864,6 +864,8 @@ input.elyra-Dialog-checkbox.jp-mod-styled { div#root_component_parameters_cpu, div#root_component_parameters_memory, +div#root_component_parameters_cpu_limit, +div#root_component_parameters_memory_limit, div#root_component_parameters_gpu, div#root_component_parameters_gpu_vendor { width: 50%; @@ -871,13 +873,17 @@ div#root_component_parameters_gpu_vendor { input#root_component_parameters_cpu, input#root_component_parameters_memory, +input#root_component_parameters_cpu_limit, +input#root_component_parameters_memory_limit, input#root_component_parameters_gpu, input#root_component_parameters_gpu_vendor { width: 100%; } #cpu, +#cpu_limit, #gpu, +#memory_limit, #memory { position: absolute; vertical-align: baseline;