Merge branch 'master' into fix-lint-ignore-comments

fmigneault · web-flow · commit 7afde6f1ec04 · 2021-07-07T11:14:28.000-04:00
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -21,10 +21,13 @@ Fixes:
 
 Changes:
 --------
-- No change.
+- Add support for array type as job inputs
+  (relates to `#233 <https://github.com/crim-ca/weaver/issues/233>`_).
 
 Fixes:
 ------
+- Fixed the format of the output file URL. When the prefix ``/`` was not present, 
+  URL was incorrectly handled by not prepending the required base URL location.
 - Fix backward compatibility of pre-deployed processes that did not define ``jobControlOptions`` that is now required.
   Missing definition are substituted in-place by default ``["execute-async"]`` mode.
 
@@ -36,6 +39,8 @@ Changes:
 - Add reference link to ReadTheDocs URL of `Weaver` in API landing page.
 - Add references to `OGC-API Processes` requirements and recommendations for eventual conformance listing
   (relates to `#231 <https://github.com/crim-ca/weaver/issues/231>`_).
+- In order to align with OpenAPI ``boolean`` type definitions, non explicit ``boolean`` values will not be automatically 
+  converted to ``bool`` anymore. They will require explicit ``false|true``.
 - Add ``datetime`` query parameter for job searches queries
   (relates to `#236 <https://github.com/crim-ca/weaver/issues/236>`_).
 - Add ``limit`` query parameter validation and integration for jobs in retrieve queries
diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py
@@ -8,8 +8,10 @@
     - :mod:`tests.processes.wps_package`.
 """
 import contextlib
+import json
 import logging
 import os
+from inspect import cleandoc
 
 import colander
 import pytest
@@ -23,6 +25,8 @@
     mocked_aws_s3,
     mocked_aws_s3_bucket_test_file,
     mocked_execute_process,
+    mocked_http_file,
+    mocked_reference_test_file,
     mocked_sub_requests
 )
 from weaver.execute import EXECUTE_MODE_ASYNC, EXECUTE_RESPONSE_DOCUMENT, EXECUTE_TRANSMISSION_MODE_REFERENCE
@@ -37,7 +41,11 @@
     IANA_NAMESPACE,
     get_cwl_file_format
 )
-from weaver.processes.constants import CWL_REQUIREMENT_APP_BUILTIN, CWL_REQUIREMENT_INIT_WORKDIR
+from weaver.processes.constants import (
+    CWL_REQUIREMENT_APP_BUILTIN,
+    CWL_REQUIREMENT_APP_DOCKER,
+    CWL_REQUIREMENT_INIT_WORKDIR
+)
 from weaver.utils import get_any_value
 
 EDAM_PLAIN = EDAM_NAMESPACE + ":" + EDAM_MAPPING[CONTENT_TYPE_TEXT_PLAIN]
@@ -69,6 +77,7 @@ def setUpClass(cls):
             "weaver.wps": True,
             "weaver.wps_path": "/ows/wps",
             "weaver.wps_restapi_path": "/",
+            "weaver.wps_output_dir": "/tmp",  # nosec: B108 # don't care hardcoded for test
         }
         super(WpsPackageAppTest, cls).setUpClass()
 
@@ -925,6 +934,178 @@ def test_valid_io_min_max_occurs_as_str_or_int(self):
                     "Field '{}' of input '{}'({}) is expected to be '{}' but was '{}'" \
                     .format(field, process_input, i, proc_in_exp, proc_in_res)
 
+    @mocked_aws_credentials
+    @mocked_aws_s3
+    @mocked_http_file
+    def test_execute_job_with_array_input(self):
+        """
+        The test validates job can receive an array as input and process it as expected.
+        """
+        cwl = {
+            "cwlVersion": "v1.0",
+            "class": "CommandLineTool",
+            "baseCommand": ["python3", "script.py"],
+            "inputs":
+            {
+                "test_int_array": {"type": {"type": "array", "items": "int"}, "inputBinding": {"position": 1}},
+                "test_float_array": {"type": {"type": "array", "items": "float"}},
+                "test_string_array": {"type": {"type": "array", "items": "string"}},
+                "test_reference_array": {"type": {"type": "array", "items": "File"}},
+                "test_int_value": "int",
+                "test_float_value": "float",
+                "test_string_value": "string",
+                "test_reference_http_value": "File",
+                "test_reference_file_value": "File",
+                "test_reference_s3_value": "File"
+            },
+            "requirements": {
+                CWL_REQUIREMENT_APP_DOCKER: {
+                    "dockerPull": "python:3.7-alpine"
+                },
+                CWL_REQUIREMENT_INIT_WORKDIR: {
+                    "listing": [
+                        {
+                            "entryname": "script.py",
+                            "entry": cleandoc("""
+                                import json
+                                import os
+                                input = $(inputs)
+                                for key, value in input.items():
+                                    if isinstance(value, list):
+                                        if all(isinstance(val, int) for val in value):
+                                            value = map(lambda v: v+1, value)
+                                        elif all(isinstance(val, float) for val in value):
+                                            value = map(lambda v: v+0.5, value)
+                                        elif all(isinstance(val, bool) for val in value):
+                                            value = map(lambda v: not v, value)
+                                        elif all(isinstance(val, str) for val in value):
+                                            value = map(lambda v: v.upper(), value)
+                                        elif all(isinstance(val, dict) for val in value):
+                                            def tmp(value):
+                                                path_ = value.get('path')
+                                                if path_ and os.path.exists(path_):
+                                                    with open (path_, 'r') as file_:
+                                                        filedata = file_.read()
+                                                return filedata.upper()
+                                            value = map(tmp, value)
+                                        input[key] = ";".join(map(str, value))
+                                    elif isinstance(value, dict):
+                                        path_ = value.get('path')
+                                        if path_ and os.path.exists(path_):
+                                            with open (path_, 'r') as file_:
+                                                filedata = file_.read()
+                                            input[key] = filedata.upper()
+                                    elif isinstance(value, str):
+                                        input[key] = value.upper()
+                                    elif isinstance(value, bool):
+                                        input[key] = not value
+                                    elif isinstance(value, int):
+                                        input[key] = value+1
+                                    elif isinstance(value, float):
+                                        input[key] = value+0.5
+                                json.dump(input, open("./tmp.txt","w"))
+                                """)
+                        }
+                    ]
+                }
+            },
+            "outputs": [{"id": "output_test", "type": "File", "outputBinding": {"glob": "tmp.txt"}}],
+        }
+        body = {
+            "processDescription": {
+                "process": {
+                    "id": self._testMethodName,
+                    "title": "some title",
+                    "abstract": "this is a test",
+                },
+            },
+            "deploymentProfileName": "http://www.opengis.net/profiles/eoc/wpsApplication",
+            "executionUnit": [{"unit": cwl}],
+        }
+        try:
+            desc, _ = self.deploy_process(body)
+        except colander.Invalid:
+            self.fail("Test")
+
+        assert desc["process"] is not None
+
+        test_bucket_ref = mocked_aws_s3_bucket_test_file(
+            "wps-process-test-bucket",
+            "input_file_s3.txt",
+            "This is a generated file for s3 test"
+        )
+
+        test_http_ref = mocked_reference_test_file(
+            "input_file_http.txt",
+            "http",
+            "This is a generated file for http test"
+        )
+
+        test_file_ref = mocked_reference_test_file(
+            "input_file_ref.txt",
+            "file",
+            "This is a generated file for file test"
+        )
+
+        exec_body = {
+            "mode": EXECUTE_MODE_ASYNC,
+            "response": EXECUTE_RESPONSE_DOCUMENT,
+            "inputs":
+            [
+                {"id": "test_int_array", "value": [10, 20, 30, 40, 50]},
+                {"id": "test_float_array", "value": [10.03, 20.03, 30.03, 40.03, 50.03]},
+                {"id": "test_string_array", "value": ["this", "is", "a", "test"]},
+                {"id": "test_reference_array",
+                 "value": [{"href": test_file_ref},
+                           {"href": test_http_ref},
+                           {"href": test_bucket_ref}
+                           ]
+                 },
+                {"id": "test_int_value", "value": 2923},
+                {"id": "test_float_value", "value": 389.73},
+                {"id": "test_string_value", "value": "stringtest"},
+                {"id": "test_reference_http_value", "href": test_http_ref},
+                {"id": "test_reference_file_value", "href": test_file_ref},
+                {"id": "test_reference_s3_value", "href": test_bucket_ref}
+            ],
+            "outputs": [
+                {"id": "output_test", "type": "File"},
+            ]
+        }
+
+        with contextlib.ExitStack() as stack_exec:
+            for mock_exec in mocked_execute_process():
+                stack_exec.enter_context(mock_exec)
+            proc_url = "/processes/{}/jobs".format(self._testMethodName)
+            resp = mocked_sub_requests(self.app, "post_json", proc_url, timeout=5,
+                                       data=exec_body, headers=self.json_headers, only_local=True)
+            assert resp.status_code in [200, 201], "Failed with: [{}]\nReason:\n{}".format(resp.status_code, resp.json)
+            status_url = resp.json.get("location")
+
+        results = self.monitor_job(status_url)
+
+        job_output_file = results.get("output_test")["href"].split("/", 3)[-1]
+        tmpfile = "{}/{}".format(self.settings["weaver.wps_output_dir"], job_output_file)
+
+        try:
+            processed_values = json.load(open(tmpfile, "r"))
+        except FileNotFoundError:
+            self.fail("Output file [{}] was not found where it was expected to resume test".format(tmpfile))
+        except Exception as exception:
+            self.fail("An error occured during the reading of the file: {}".format(exception))
+        assert processed_values["test_int_array"] == "11;21;31;41;51"
+        assert processed_values["test_float_array"] == "10.53;20.53;30.53;40.53;50.53"
+        assert processed_values["test_string_array"] == "THIS;IS;A;TEST"
+        assert processed_values["test_reference_array"] == ("THIS IS A GENERATED FILE FOR FILE TEST;"
+                                                            "THIS IS A GENERATED FILE FOR HTTP TEST;"
+                                                            "THIS IS A GENERATED FILE FOR S3 TEST")
+        assert processed_values["test_int_value"] == 2924
+        assert processed_values["test_float_value"] == 390.23
+        assert processed_values["test_string_value"] == "STRINGTEST"
+        assert processed_values["test_reference_s3_value"] == "THIS IS A GENERATED FILE FOR S3 TEST"
+        assert processed_values["test_reference_http_value"] == "THIS IS A GENERATED FILE FOR HTTP TEST"
+        assert processed_values["test_reference_file_value"] == "THIS IS A GENERATED FILE FOR FILE TEST"
+
     # FIXME: test not working
     #   same payloads sent directly to running weaver properly raise invalid schema -> bad request error
     #   somehow they don't work within this test (not raised)...
diff --git a/tests/utils.py b/tests/utils.py
@@ -30,7 +30,7 @@
 from weaver.datatype import Service
 from weaver.formats import CONTENT_TYPE_APP_JSON, CONTENT_TYPE_TEXT_XML
 from weaver.store.mongodb import MongodbJobStore, MongodbProcessStore, MongodbServiceStore
-from weaver.utils import get_path_kvp, get_url_without_query, get_weaver_url, null
+from weaver.utils import fetch_file, get_path_kvp, get_url_without_query, get_weaver_url, null
 from weaver.warning import MissingParameterWarning, UnsupportedOperationWarning
 
 if TYPE_CHECKING:
@@ -46,6 +46,7 @@
     MockPatch = mock._patch  # noqa
 
 MOCK_AWS_REGION = "us-central-1"
+MOCK_HTTP_REF = "http://mock.localhost"
 
 
 def ignore_warning_regex(func, warning_message_regex, warning_categories=DeprecationWarning):
@@ -371,11 +372,11 @@ def mocked_app_request(method, url=None, **req_kwargs):
 
         url, func, req_kwargs = _parse_for_app_req(method, url, **req_kwargs)
         redirects = req_kwargs.pop("allow_redirects", True)
-        if not url.startswith("mock://"):
-            _resp = func(url, expect_errors=True, **req_kwargs)
-        else:
+        if url.startswith("mock://"):
             path = get_url_without_query(url.replace("mock://", ""))
             _resp = mocked_file_response(path, url)
+        else:
+            _resp = func(url, expect_errors=True, **req_kwargs)
         if redirects:
             # must handle redirects manually with TestApp
             while 300 <= _resp.status_code < 400:
@@ -384,7 +385,7 @@ def mocked_app_request(method, url=None, **req_kwargs):
         return _resp
 
     # permit schema validation against 'mock' scheme during test only
-    mock_file_regex = mock.PropertyMock(return_value=colander.Regex(r"^(file|mock://)?(?:/|[/?]\S+)$"))
+    mock_file_regex = mock.PropertyMock(return_value=colander.Regex(r"^((file|mock)://)?(?:/|[/?]\S+)$"))
     with contextlib.ExitStack() as stack:
         stack.enter_context(mock.patch("requests.request", side_effect=mocked_app_request))
         stack.enter_context(mock.patch("requests.Session.request", side_effect=mocked_app_request))
@@ -549,3 +550,40 @@ def mocked_aws_s3_bucket_test_file(bucket_name, file_name, file_content="Test fi
         tmp_file.flush()
         s3.upload_file(Bucket=bucket_name, Filename=tmp_file.name, Key=file_name)
     return "s3://{}/{}".format(bucket_name, file_name)
+
+
+def mocked_http_file(test_func):
+    # type: (Callable[[...], Any]) -> Callable
+    """
+    Creates a mock of the function :func:`fetch_file`, to fetch a generated file locally, for test purposes only.
+    For instance, calling this function with :func:`mocked_http_file` decorator
+    will effectively employ the mocked :func:`fetch_file` and return a generated local file.
+
+    .. seealso::
+        - :func:`mocked_reference_test_file`
+    """
+    def mocked_file_request(file_reference, file_outdir, **kwargs):
+        if file_reference and file_reference.startswith(MOCK_HTTP_REF):
+            file_reference = file_reference.replace(MOCK_HTTP_REF, "")
+        file_path = fetch_file(file_reference, file_outdir, **kwargs)
+        return file_path
+
+    def wrapped(*args, **kwargs):
+        with mock.patch("weaver.processes.wps_package.fetch_file", side_effect=mocked_file_request):
+            return test_func(*args, **kwargs)
+    return wrapped
+
+
+def mocked_reference_test_file(file_name, href_type, file_content="This is a generated file for href test"):
+    # type: (str,str,str) -> str
+    """
+    Generates a test file reference from dummy data for http and file href types.
+
+    .. seealso::
+        - :func:`mocked_http_file`
+    """
+    tmpdir = tempfile.mkdtemp()
+    path = os.path.join(tmpdir, file_name)
+    with open(path, "w") as tmp_file:
+        tmp_file.write(file_content)
+    return "file://{}".format(path) if href_type == "file" else os.path.join(MOCK_HTTP_REF, path)
diff --git a/weaver/processes/constants.py b/weaver/processes/constants.py
@@ -33,6 +33,7 @@
     CWL_REQUIREMENT_APP_WPS1,
 ])
 CWL_REQUIREMENT_INIT_WORKDIR = "InitialWorkDirRequirement"
+CWL_REQUIREMENT_APP_DOCKER = "DockerRequirement"
 
 # CWL package types and extensions
 PACKAGE_SIMPLE_TYPES = frozenset(["string", "boolean", "float", "int", "integer", "long", "double"])
diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py
@@ -118,7 +118,14 @@ def execute_process(self, job_id, url, headers=None):
                 input_values = process_value if isinstance(process_value, list) else [process_value]
 
                 # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file://
-                input_values = [val[7:] if str(val).startswith("file://") else val for val in input_values]
+                input_values = [
+                    # when value is an array of dict that each contain a file reference
+                    (get_any_value(val)[7:] if str(get_any_value(val)).startswith("file://") else get_any_value(val))
+                    if isinstance(val, dict) else
+                    # when value is directly a single dict with file reference
+                    (val[7:] if str(val).startswith("file://") else val)
+                    for val in input_values
+                ]
 
                 # need to use ComplexDataInput structure for complex input
                 # need to use literal String for anything else than complex
diff --git a/weaver/wps_restapi/colander_extras.py b/weaver/wps_restapi/colander_extras.py
@@ -179,6 +179,20 @@ def __init__(self, *args, v_prefix=False, rc_suffix=True, **kwargs):
 
 
 class ExtendedBoolean(colander.Boolean):
+
+    def __init__(self, *args, true_choices=None, fase_choices=None, **kwargs):
+        """
+        The arguments :paramref:`true_choices` and :paramref:`false_choices`
+        are defined as ``"true"`` and ``"false"`` since :mod:`colander` converts the value to string lowercase
+        to compare with other thruty/falsy values it should accept. Do NOT add other values like ``"1"``
+        to avoid conflict with ``Integer`` type for schemas that support both variants.
+        """
+        if true_choices is None:
+            true_choices = ("true")
+        if fase_choices is None:
+            false_choices = ("false")
+        super(ExtendedBoolean, self).__init__(true_choices=true_choices, false_choices=false_choices, *args, **kwargs)
+
     def serialize(self, node, cstruct):  # pylint: disable=W0221
         result = super(ExtendedBoolean, self).serialize(node, cstruct)
         if result is not colander.null:
diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py
@@ -105,7 +105,9 @@ def get_results(job, container, value_key=None, ogc_api=False):
         if rtype == "href":
             # fix paths relative to instance endpoint, but leave explicit links as is (eg: S3 bucket, remote HTTP, etc.)
             if value.startswith("/"):
-                value = wps_url + str(value).lstrip("/")
+                value = str(value).lstrip("/")
+            if "://" not in value:
+                value = wps_url + value
         elif ogc_api:
             out_key = "value"
         elif value_key:
diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py