changes strictly for parametrisation of two-model runs

mo-nikosbaltas · mo-nikosbaltas · commit 274e9be82501 · 2026-03-04T11:44:39.000Z
diff --git a/CMEW/app/configure_standardise/bin/create_request_file.py b/CMEW/app/configure_standardise/bin/create_request_file.py
@@ -2,25 +2,31 @@
 # (C) Crown Copyright 2024-2026, Met Office.
 # The LICENSE.md file contains full licensing details.
 """
-Generate CDDS request configuration.
+Generate CDDS request configuration for CMEW.
 
-THIS VERSION USES ONLY environment variables populated from rose-suite.conf.
+Two-run only:
 
-Supported modes:
+Reference run (REF_*):
+  - REF_MODEL_ID
+  - REF_SUITE_ID
+  - REF_CALENDAR
+  - REF_VARIANT_LABEL
 
-1) "Legacy" serial/unit-test mode (RUN_LABEL may be unset):
-   - REQUIRES two-run configuration in the environment (REF_* and eval vars).
-   - If RUN_LABEL is unset, defaults to generating the EVAL request
-     (uses SUITE_ID / MODEL_ID / CALENDAR / VARIANT_LABEL).
+Evaluation run (non-REF):
+  - MODEL_ID
+  - SUITE_ID
+  - CALENDAR
+  - VARIANT_LABEL
 
-2) Two-model "multi-run" mode via task parameterisation:
-   - RUN_LABEL is set to a suite_id (e.g. u-bv526 or u-cw673).
-   - If RUN_LABEL == REF_SUITE_ID -> use REF_* variables.
-   - If RUN_LABEL == SUITE_ID     -> use non-REF variables.
-   - Any other RUN_LABEL is an error (no silent fallback).
+Selection rule:
+- RUN_LABEL must be set and must match either REF_SUITE_ID or SUITE_ID.
+- If RUN_LABEL == REF_SUITE_ID -> use REF_* metadata.
+- If RUN_LABEL == SUITE_ID     -> use non-REF metadata.
 
 Naming requirement:
-- ALWAYS set workflow_basename = suite_id, so CDDS paths are cdds_<suite_id>.
+- ALWAYS set workflow_basename = suite_id so CDDS paths are cdds_<suite_id>.
+
+Environment variables are accessed directly via os.environ[...].
 """
 
 from __future__ import annotations
@@ -31,106 +37,75 @@
 
 
 def create_request() -> configparser.ConfigParser:
-    # ---------------------------------------------------------------------
-    # 0) Enforce "two-run legacy": require BOTH ref and eval env variables
-    # ---------------------------------------------------------------------
-    required = [
-        "START_YEAR",
-        "NUMBER_OF_YEARS",
-        "INSTITUTION_ID",
-        "ROOT_PROC_DIR",
-        "ROOT_DATA_DIR",
-        "VARIABLES_PATH",
-        # Reference run
-        "REF_MODEL_ID",
-        "REF_SUITE_ID",
-        "REF_CALENDAR",
-        "REF_VARIANT_LABEL",
-        # Evaluation run
-        "MODEL_ID",
-        "SUITE_ID",
-        "CALENDAR",
-        "VARIANT_LABEL",
-    ]
-    missing = [
-        k for k in required if not (os.environ.get(k, "") or "").strip()
-    ]
-    if missing:
-        raise KeyError(
-            "Two-run legacy is required; missing environment variables: "
-            + ", ".join(missing)
-        )
-
-    # ---------------------------------------------------------------------
-    # 1) Time window
-    # ---------------------------------------------------------------------
-    start_year = int(os.environ.get("START_YEAR", "").strip())
-    number_of_years = int(os.environ.get("NUMBER_OF_YEARS", "").strip())
+    # Required time window
+    start_year = int(os.environ["START_YEAR"])
+    number_of_years = int(os.environ["NUMBER_OF_YEARS"])
     end_year = start_year + number_of_years
 
-    # ---------------------------------------------------------------------
-    # 2) Resolve which run we're generating (Option B)
-    #    - RUN_LABEL may be unset -> default to SUITE_ID (eval)
-    # ---------------------------------------------------------------------
-    run_label = (os.environ.get("RUN_LABEL", "") or "").strip()
-    ref_suite_id = (os.environ.get("REF_SUITE_ID", "") or "").strip()
-    suite_id = (os.environ.get("SUITE_ID", "") or "").strip()
+    # Required global metadata
+    institution_id = os.environ["INSTITUTION_ID"]
+    variables_path = os.environ["VARIABLES_PATH"]
+    root_proc_dir = os.environ["ROOT_PROC_DIR"]
+    root_data_dir = os.environ["ROOT_DATA_DIR"]
+
+    # Enforce two-run config exists (KeyError if missing)
+    ref_model_id = os.environ["REF_MODEL_ID"]
+    ref_suite_id = os.environ["REF_SUITE_ID"]
+    ref_calendar = os.environ["REF_CALENDAR"]
+    ref_variant_label = os.environ["REF_VARIANT_LABEL"]
+
+    model_id = os.environ["MODEL_ID"]
+    suite_id = os.environ["SUITE_ID"]
+    calendar = os.environ["CALENDAR"]
+    variant_label = os.environ["VARIANT_LABEL"]
+
+    # Must be set in two-run mode
+    run_label = os.environ["RUN_LABEL"].strip()
 
-    if not run_label:
-        run_label = suite_id
+    # Optional experiment IDs (default to "amip" if not provided)
+    ref_experiment_id = (
+        os.environ.get("REF_EXPERIMENT_ID", "amip").strip() or "amip"
+    )
+    experiment_id = os.environ.get("EXPERIMENT_ID", "amip").strip() or "amip"
 
-    # ---------------------------------------------------------------------
-    # 3) Resolve per-run metadata from env only
-    # ---------------------------------------------------------------------
     if run_label == ref_suite_id:
-        meta_model_id = (os.environ.get("REF_MODEL_ID", "") or "").strip()
-        meta_suite_id = ref_suite_id
-        meta_calendar = (os.environ.get("REF_CALENDAR", "") or "").strip()
-        meta_variant_label = (
-            os.environ.get("REF_VARIANT_LABEL", "") or ""
-        ).strip()
-        meta_experiment_id = (
-            os.environ.get("REF_EXPERIMENT_ID", "amip") or "amip"
-        ).strip()
+        chosen_model_id = ref_model_id
+        chosen_suite_id = ref_suite_id
+        chosen_calendar = ref_calendar
+        chosen_variant_label = ref_variant_label
+        chosen_experiment_id = ref_experiment_id
     elif run_label == suite_id:
-        meta_model_id = (os.environ.get("MODEL_ID", "") or "").strip()
-        meta_suite_id = suite_id
-        meta_calendar = (os.environ.get("CALENDAR", "") or "").strip()
-        meta_variant_label = (
-            os.environ.get("VARIANT_LABEL", "") or ""
-        ).strip()
-        meta_experiment_id = (
-            os.environ.get("EXPERIMENT_ID", "amip") or "amip"
-        ).strip()
+        chosen_model_id = model_id
+        chosen_suite_id = suite_id
+        chosen_calendar = calendar
+        chosen_variant_label = variant_label
+        chosen_experiment_id = experiment_id
     else:
         raise KeyError(
-            "RUN_LABEL must match one of the configured suite IDs. "
-            f"Got RUN_LABEL='{run_label}'. "
-            f"Expected REF_SUITE_ID='{ref_suite_id}' or SUITE_ID='{suite_id}'."
+            "RUN_LABEL must match REF_SUITE_ID or SUITE_ID. "
+            f"Got RUN_LABEL='{run_label}', REF_SUITE_ID='{ref_suite_id}',"
+            f"SUITE_ID='{suite_id}'."
         )
 
-    # Naming requirement: always suite_id
-    workflow_basename = meta_suite_id
+    # Requirement: ALWAYS use suite_id for basename (so cdds_<suite_id>)
+    workflow_basename = chosen_suite_id
 
-    # Avoid ConfigParser interpolation problems (e.g. '%' in URLs)
+    # Avoid ConfigParser interpolation issues (e.g. '%' in URLs)
     request = configparser.RawConfigParser()
 
-    # ---------------------------------------------------------------------
-    # 4) Populate request sections
-    # ---------------------------------------------------------------------
     request["metadata"] = {
         "base_date": "1850-01-01T00:00:00",
         "branch_method": "no parent",
-        "calendar": meta_calendar,
-        "experiment_id": meta_experiment_id or "amip",
-        "institution_id": (os.environ.get("INSTITUTION_ID", "") or "").strip(),
+        "calendar": chosen_calendar,
+        "experiment_id": chosen_experiment_id,
+        "institution_id": institution_id,
         "license": "GCModelDev model data is licensed under the Open Government License v3 (https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/)",  # noqa: E501
         "mip": "ESMVal",
         "mip_era": "GCModelDev",
-        "model_id": meta_model_id,
+        "model_id": chosen_model_id,
         "model_type": "AGCM AER",
         "sub_experiment_id": "none",
-        "variant_label": meta_variant_label,
+        "variant_label": chosen_variant_label,
     }
 
     request["common"] = {
@@ -141,22 +116,20 @@ def create_request() -> configparser.ConfigParser:
         ),
         "mode": "relaxed",
         "package": "round-1",
-        "root_proc_dir": (os.environ.get("ROOT_PROC_DIR", "") or "").strip(),
-        "root_data_dir": (os.environ.get("ROOT_DATA_DIR", "") or "").strip(),
+        "root_proc_dir": root_proc_dir,
+        "root_data_dir": root_data_dir,
         "workflow_basename": workflow_basename,
     }
 
     request["data"] = {
         "end_date": f"{end_year}-01-01T00:00:00",
         "mass_data_class": "crum",
         "model_workflow_branch": "trunk",
-        "model_workflow_id": meta_suite_id,
+        "model_workflow_id": chosen_suite_id,
         "model_workflow_revision": "not used except with data request",
         "start_date": f"{start_year}-01-01T00:00:00",
         "streams": "apm",
-        "variable_list_file": (
-            os.environ.get("VARIABLES_PATH", "") or ""
-        ).strip(),
+        "variable_list_file": variables_path,
     }
 
     request["misc"] = {"atmos_timestep": "1200"}
@@ -174,15 +147,12 @@ def write_request(
     request: configparser.ConfigParser, target_path: Path
 ) -> None:
     target_path.parent.mkdir(parents=True, exist_ok=True)
-    with open(target_path, mode="w", encoding="utf-8") as file_handle:
-        request.write(file_handle)
+    with open(target_path, mode="w", encoding="utf-8") as fh:
+        request.write(fh)
 
 
 def main() -> None:
-    request_path = (os.environ.get("REQUEST_PATH", "") or "").strip()
-    if not request_path:
-        raise KeyError("REQUEST_PATH must be set")
-    target_path = Path(request_path)
+    target_path = Path(os.environ["REQUEST_PATH"])
     request = create_request()
     write_request(request, target_path)
 
diff --git a/CMEW/app/configure_standardise/bin/test_create_request_file.py b/CMEW/app/configure_standardise/bin/test_create_request_file.py
@@ -6,11 +6,8 @@
 
 
 def test_create_request(monkeypatch):
-    # This create_request_file.py enforces "two-run legacy" even in unit tests,
-    # so we must set BOTH eval and ref environment variables.
-    #
-    # RUN_LABEL is intentionally NOT set here; the code defaults to SUITE_ID
-    # (i.e. generates the EVAL request).
+    # Two-run only: must set BOTH eval and ref environment variables
+    # and must set RUN_LABEL to select which request to generate.
 
     # Shared / common env
     monkeypatch.setenv("START_YEAR", "1993")
@@ -32,6 +29,9 @@ def test_create_request(monkeypatch):
     monkeypatch.setenv("REF_SUITE_ID", "u-bv526")
     monkeypatch.setenv("REF_VARIANT_LABEL", "r5i1p1f3")
 
+    # Select EVAL branch explicitly
+    monkeypatch.setenv("RUN_LABEL", "u-az513")
+
     config = create_request()
     actual = {
         section: dict(config.items(section)) for section in config.sections()
@@ -74,9 +74,7 @@ def test_create_request(monkeypatch):
             "streams": "apm",
             "variable_list_file": "/path/to/variables.txt",
         },
-        "misc": {
-            "atmos_timestep": "1200",
-        },
+        "misc": {"atmos_timestep": "1200"},
         "conversion": {
             "mip_convert_plugin": "UKESM1",
             "skip_archive": "True",