|
| 1 | +#!/usr/bin/env python |
| 2 | +# (C) Crown Copyright 2026, Met Office. |
| 3 | +# The LICENSE.md file contains full licensing details. |
| 4 | +""" |
| 5 | +Process and copy the dataset namelist files to a shared directory. |
| 6 | +
|
| 7 | +Namelist files are created by rose from the sections in |
| 8 | +CMEW/app/add_datasets/rose-app.conf. These may be edited in the GUI. |
| 9 | +This application reads the namelist files, |
| 10 | +converts the contents to a dictionary of datasets and their facets, |
| 11 | +then writes those dictionaries to YAML files in the share directory. |
| 12 | +""" |
| 13 | +import os |
| 14 | +import yaml |
| 15 | + |
| 16 | + |
| 17 | +def extract_sections_from_naml(naml_fp): |
| 18 | + """ |
| 19 | + Read sections from a namelist file and return them as a list of strings. |
| 20 | +
|
| 21 | + Parameters |
| 22 | + ---------- |
| 23 | + naml_fp: str |
| 24 | + The file path to the namelist file containing the datasets. |
| 25 | +
|
| 26 | + Returns |
| 27 | + ------- |
| 28 | + datatsets: list of str |
| 29 | + A list of strings, each containing the content of a section in the |
| 30 | + namelist file minus the headers and separating characters. |
| 31 | + """ |
| 32 | + |
| 33 | + # Read the namelist file |
| 34 | + with open(naml_fp, "r") as file: |
| 35 | + content = file.read() |
| 36 | + |
| 37 | + # Namelist files are separated by a line containing only "/" |
| 38 | + datasets = content.split("\n/\n") |
| 39 | + |
| 40 | + # Read the line containing the header for the first dataset's section |
| 41 | + first_dataset = datasets[0] |
| 42 | + first_line = first_dataset.split("\n")[0] |
| 43 | + name = first_line.replace("&", "") # This could be returned if needed |
| 44 | + |
| 45 | + # Initialise a list to hold the extracted datasets |
| 46 | + extracted_datasets = [] |
| 47 | + |
| 48 | + for dataset in datasets: |
| 49 | + if dataset: # There is an empty dataset at the end |
| 50 | + # Replace newlines with just commas |
| 51 | + dataset = dataset.replace(",\n", ",") |
| 52 | + |
| 53 | + # Remove remaining new lines |
| 54 | + dataset = dataset.replace("\n", "") |
| 55 | + |
| 56 | + # Remove the header |
| 57 | + dataset = dataset.replace(f"&{name}", "") |
| 58 | + |
| 59 | + # Add the datasets to the list |
| 60 | + extracted_datasets.append(dataset) |
| 61 | + |
| 62 | + return extracted_datasets |
| 63 | + |
| 64 | + |
| 65 | +def convert_str_to_facets(section): |
| 66 | + """ |
| 67 | + Converts a section of a naml file to a dictionary of its facets. |
| 68 | +
|
| 69 | + Parameters |
| 70 | + ---------- |
| 71 | + section: str |
| 72 | + A string containing the amended content of a section of namelist file. |
| 73 | + The content is expected to be in the format of key=value pairs, |
| 74 | + without a header and separated by commas. |
| 75 | +
|
| 76 | + Returns |
| 77 | + ------- |
| 78 | + section_dict: dict |
| 79 | + A dictionary containing the facets of the dataset. |
| 80 | + """ |
| 81 | + |
| 82 | + # Initialise a dictionary to hold the facets of the dataset |
| 83 | + section_dict = {} |
| 84 | + |
| 85 | + # Separate the facets in the string to loop over |
| 86 | + facets = section.split(",") |
| 87 | + for facet in facets: |
| 88 | + if facet: # There's an empty facet at the end |
| 89 | + |
| 90 | + # The facets are in the string are key=value pairs |
| 91 | + key, value = facet.split("=") |
| 92 | + |
| 93 | + # Values are output with quotes around them |
| 94 | + value = value.replace('"', "") |
| 95 | + |
| 96 | + # Add the key: value pair dictionary |
| 97 | + section_dict[key.strip()] = value.strip() |
| 98 | + |
| 99 | + return section_dict |
| 100 | + |
| 101 | + |
| 102 | +def add_common_facets(dataset_dict, project="CMIP6"): |
| 103 | + """ |
| 104 | + Add start year, end year and project to a dataset dictionary. |
| 105 | +
|
| 106 | + Parameters |
| 107 | + ---------- |
| 108 | + dataset_dict: dict |
| 109 | + A dictionary containing the facets of a dataset. |
| 110 | + project: str |
| 111 | + A string indicating the project to which the dataset belongs. |
| 112 | + Default is "CMIP6". |
| 113 | +
|
| 114 | + Returns |
| 115 | + ------- |
| 116 | + dataset_dict: dict |
| 117 | + The input dataset dictionary with the common facets added. |
| 118 | + """ |
| 119 | + # Read the time window from environment |
| 120 | + start_year = int(os.environ["START_YEAR"]) |
| 121 | + end_year = ( |
| 122 | + int(os.environ["START_YEAR"]) + int(os.environ["NUMBER_OF_YEARS"]) - 1 |
| 123 | + ) |
| 124 | + |
| 125 | + # Add the start year, end year and project to the dataset dictionary |
| 126 | + dataset_dict["start_year"] = start_year |
| 127 | + dataset_dict["end_year"] = end_year |
| 128 | + dataset_dict["project"] = project |
| 129 | + |
| 130 | + return dataset_dict |
| 131 | + |
| 132 | + |
| 133 | +def process_naml_file(naml_fp): |
| 134 | + """ |
| 135 | + Extract the datasets and their facets from a namelist file. |
| 136 | +
|
| 137 | + Parameters |
| 138 | + ---------- |
| 139 | + naml_fp: str |
| 140 | + The file path to the namelist file containing the datasets. |
| 141 | +
|
| 142 | + Returns |
| 143 | + ------- |
| 144 | + datasets: list of dict |
| 145 | + A list of dictionaries, each containing the facets of one dataset. |
| 146 | + """ |
| 147 | + datasets = [] |
| 148 | + sections = extract_sections_from_naml(naml_fp) |
| 149 | + for section in sections: |
| 150 | + dataset_dict = convert_str_to_facets(section) |
| 151 | + dataset_dict = add_common_facets(dataset_dict) |
| 152 | + datasets.append(dataset_dict) |
| 153 | + return datasets |
| 154 | + |
| 155 | + |
| 156 | +# Note: I've stolen this with a slight rename from update_recipe_file.py |
| 157 | +# Eventually the plan is to move it to a common directory |
| 158 | +def write_dict_to_yaml(dict_to_write, target_path): |
| 159 | + """Write the contents of a dictionary to a YAML file at ``target_path``. |
| 160 | +
|
| 161 | + Parameters |
| 162 | + ---------- |
| 163 | + dict_to_write dict |
| 164 | + Dictionary containing the content to write. |
| 165 | +
|
| 166 | + target_path: str |
| 167 | + Location at which to write the content. |
| 168 | + """ |
| 169 | + with open(target_path, "w") as file_handle: |
| 170 | + yaml.dump( |
| 171 | + dict_to_write, |
| 172 | + file_handle, |
| 173 | + default_flow_style=False, |
| 174 | + sort_keys=True, |
| 175 | + ) |
| 176 | + |
| 177 | + |
| 178 | +# If the above function does stay here, there's no reason to have this |
| 179 | +# whole function just to create a target path then call the above |
| 180 | +def write_datasets_to_yaml(datasets, name, target_dir): |
| 181 | + """ |
| 182 | + Write a list of dataset dictionaries to a YAML file in the directory. |
| 183 | +
|
| 184 | + Parameters |
| 185 | + ---------- |
| 186 | + datasets: list of dict |
| 187 | + A list of dictionaries, each containing the facets of a dataset. |
| 188 | + name: str |
| 189 | + The name of the YAML file to which the datasets are to be written. |
| 190 | + target_dir: str |
| 191 | + The directory in which the YAML file is to be written. |
| 192 | + """ |
| 193 | + target_fp = os.path.join(target_dir, f"{name}.yml") |
| 194 | + write_dict_to_yaml(datasets, target_fp) |
| 195 | + |
| 196 | + |
| 197 | +def dict_namelists_in_work_dir(): |
| 198 | + """ |
| 199 | + Looks for namelist files in the work directory of the current app. |
| 200 | +
|
| 201 | + Returns |
| 202 | + ------- |
| 203 | + filepaths: dict |
| 204 | + A dictionary of namelist file basenames and their file paths |
| 205 | + based on the filenames ending ".nl". |
| 206 | + """ |
| 207 | + filepaths = {} |
| 208 | + |
| 209 | + # Namelist files are written to the work directory of the add_datasets |
| 210 | + work_dir = os.getenv("CYLC_TASK_WORK_DIR") |
| 211 | + |
| 212 | + # Grab all the namelist files, in case we add more in future |
| 213 | + for file in os.listdir(work_dir): |
| 214 | + if file.endswith(".nl"): |
| 215 | + |
| 216 | + # Read the name of the file for the key, minus ".nl" |
| 217 | + basename = os.path.basename(file)[:-3] |
| 218 | + |
| 219 | + # Use the filepath for the value |
| 220 | + namelist_fp = os.path.join(work_dir, file) |
| 221 | + |
| 222 | + # Add to the dictionary |
| 223 | + filepaths[basename] = namelist_fp |
| 224 | + |
| 225 | + return filepaths |
| 226 | + |
| 227 | + |
| 228 | +if __name__ == "__main__": |
| 229 | + # Read the target (shared) directory from the environment |
| 230 | + target_dir = os.environ["DATASETS_LIST_DIR"] |
| 231 | + |
| 232 | + # Create the target directory if it doesn't exist |
| 233 | + os.makedirs(target_dir, exist_ok=True) |
| 234 | + |
| 235 | + # Loop over the namelist files in the work directory |
| 236 | + for basename, nl_fp in dict_namelists_in_work_dir().items(): |
| 237 | + |
| 238 | + # Extract the datasets from each file |
| 239 | + datasets = process_naml_file(nl_fp) |
| 240 | + |
| 241 | + # Write the datasets to a YAML file in the target directory |
| 242 | + write_datasets_to_yaml(datasets, basename, target_dir) |
0 commit comments