Skip to content

Commit 63d01c8

Browse files
authored
Merge branch 'main' into 371-update-the-pr-checklist
2 parents 1b66f8f + 5e863ad commit 63d01c8

18 files changed

+1039
-105
lines changed
Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
#!/usr/bin/env python
2+
# (C) Crown Copyright 2026, Met Office.
3+
# The LICENSE.md file contains full licensing details.
4+
"""
5+
Process and copy the dataset namelist files to a shared directory.
6+
7+
Namelist files are created by rose from the sections in
8+
CMEW/app/add_datasets/rose-app.conf. These may be edited in the GUI.
9+
This application reads the namelist files,
10+
converts the contents to a dictionary of datasets and their facets,
11+
then writes those dictionaries to YAML files in the share directory.
12+
"""
13+
import os
14+
import yaml
15+
16+
17+
def extract_sections_from_naml(naml_fp):
18+
"""
19+
Read sections from a namelist file and return them as a list of strings.
20+
21+
Parameters
22+
----------
23+
naml_fp: str
24+
The file path to the namelist file containing the datasets.
25+
26+
Returns
27+
-------
28+
datatsets: list of str
29+
A list of strings, each containing the content of a section in the
30+
namelist file minus the headers and separating characters.
31+
"""
32+
33+
# Read the namelist file
34+
with open(naml_fp, "r") as file:
35+
content = file.read()
36+
37+
# Namelist files are separated by a line containing only "/"
38+
datasets = content.split("\n/\n")
39+
40+
# Read the line containing the header for the first dataset's section
41+
first_dataset = datasets[0]
42+
first_line = first_dataset.split("\n")[0]
43+
name = first_line.replace("&", "") # This could be returned if needed
44+
45+
# Initialise a list to hold the extracted datasets
46+
extracted_datasets = []
47+
48+
for dataset in datasets:
49+
if dataset: # There is an empty dataset at the end
50+
# Replace newlines with just commas
51+
dataset = dataset.replace(",\n", ",")
52+
53+
# Remove remaining new lines
54+
dataset = dataset.replace("\n", "")
55+
56+
# Remove the header
57+
dataset = dataset.replace(f"&{name}", "")
58+
59+
# Add the datasets to the list
60+
extracted_datasets.append(dataset)
61+
62+
return extracted_datasets
63+
64+
65+
def convert_str_to_facets(section):
66+
"""
67+
Converts a section of a naml file to a dictionary of its facets.
68+
69+
Parameters
70+
----------
71+
section: str
72+
A string containing the amended content of a section of namelist file.
73+
The content is expected to be in the format of key=value pairs,
74+
without a header and separated by commas.
75+
76+
Returns
77+
-------
78+
section_dict: dict
79+
A dictionary containing the facets of the dataset.
80+
"""
81+
82+
# Initialise a dictionary to hold the facets of the dataset
83+
section_dict = {}
84+
85+
# Separate the facets in the string to loop over
86+
facets = section.split(",")
87+
for facet in facets:
88+
if facet: # There's an empty facet at the end
89+
90+
# The facets are in the string are key=value pairs
91+
key, value = facet.split("=")
92+
93+
# Values are output with quotes around them
94+
value = value.replace('"', "")
95+
96+
# Add the key: value pair dictionary
97+
section_dict[key.strip()] = value.strip()
98+
99+
return section_dict
100+
101+
102+
def add_common_facets(dataset_dict, project="CMIP6"):
103+
"""
104+
Add start year, end year and project to a dataset dictionary.
105+
106+
Parameters
107+
----------
108+
dataset_dict: dict
109+
A dictionary containing the facets of a dataset.
110+
project: str
111+
A string indicating the project to which the dataset belongs.
112+
Default is "CMIP6".
113+
114+
Returns
115+
-------
116+
dataset_dict: dict
117+
The input dataset dictionary with the common facets added.
118+
"""
119+
# Read the time window from environment
120+
start_year = int(os.environ["START_YEAR"])
121+
end_year = (
122+
int(os.environ["START_YEAR"]) + int(os.environ["NUMBER_OF_YEARS"]) - 1
123+
)
124+
125+
# Add the start year, end year and project to the dataset dictionary
126+
dataset_dict["start_year"] = start_year
127+
dataset_dict["end_year"] = end_year
128+
dataset_dict["project"] = project
129+
130+
return dataset_dict
131+
132+
133+
def process_naml_file(naml_fp):
134+
"""
135+
Extract the datasets and their facets from a namelist file.
136+
137+
Parameters
138+
----------
139+
naml_fp: str
140+
The file path to the namelist file containing the datasets.
141+
142+
Returns
143+
-------
144+
datasets: list of dict
145+
A list of dictionaries, each containing the facets of one dataset.
146+
"""
147+
datasets = []
148+
sections = extract_sections_from_naml(naml_fp)
149+
for section in sections:
150+
dataset_dict = convert_str_to_facets(section)
151+
dataset_dict = add_common_facets(dataset_dict)
152+
datasets.append(dataset_dict)
153+
return datasets
154+
155+
156+
# Note: I've stolen this with a slight rename from update_recipe_file.py
157+
# Eventually the plan is to move it to a common directory
158+
def write_dict_to_yaml(dict_to_write, target_path):
159+
"""Write the contents of a dictionary to a YAML file at ``target_path``.
160+
161+
Parameters
162+
----------
163+
dict_to_write dict
164+
Dictionary containing the content to write.
165+
166+
target_path: str
167+
Location at which to write the content.
168+
"""
169+
with open(target_path, "w") as file_handle:
170+
yaml.dump(
171+
dict_to_write,
172+
file_handle,
173+
default_flow_style=False,
174+
sort_keys=True,
175+
)
176+
177+
178+
# If the above function does stay here, there's no reason to have this
179+
# whole function just to create a target path then call the above
180+
def write_datasets_to_yaml(datasets, name, target_dir):
181+
"""
182+
Write a list of dataset dictionaries to a YAML file in the directory.
183+
184+
Parameters
185+
----------
186+
datasets: list of dict
187+
A list of dictionaries, each containing the facets of a dataset.
188+
name: str
189+
The name of the YAML file to which the datasets are to be written.
190+
target_dir: str
191+
The directory in which the YAML file is to be written.
192+
"""
193+
target_fp = os.path.join(target_dir, f"{name}.yml")
194+
write_dict_to_yaml(datasets, target_fp)
195+
196+
197+
def dict_namelists_in_work_dir():
198+
"""
199+
Looks for namelist files in the work directory of the current app.
200+
201+
Returns
202+
-------
203+
filepaths: dict
204+
A dictionary of namelist file basenames and their file paths
205+
based on the filenames ending ".nl".
206+
"""
207+
filepaths = {}
208+
209+
# Namelist files are written to the work directory of the add_datasets
210+
work_dir = os.getenv("CYLC_TASK_WORK_DIR")
211+
212+
# Grab all the namelist files, in case we add more in future
213+
for file in os.listdir(work_dir):
214+
if file.endswith(".nl"):
215+
216+
# Read the name of the file for the key, minus ".nl"
217+
basename = os.path.basename(file)[:-3]
218+
219+
# Use the filepath for the value
220+
namelist_fp = os.path.join(work_dir, file)
221+
222+
# Add to the dictionary
223+
filepaths[basename] = namelist_fp
224+
225+
return filepaths
226+
227+
228+
if __name__ == "__main__":
229+
# Read the target (shared) directory from the environment
230+
target_dir = os.environ["DATASETS_LIST_DIR"]
231+
232+
# Create the target directory if it doesn't exist
233+
os.makedirs(target_dir, exist_ok=True)
234+
235+
# Loop over the namelist files in the work directory
236+
for basename, nl_fp in dict_namelists_in_work_dir().items():
237+
238+
# Extract the datasets from each file
239+
datasets = process_naml_file(nl_fp)
240+
241+
# Write the datasets to a YAML file in the target directory
242+
write_datasets_to_yaml(datasets, basename, target_dir)

0 commit comments

Comments
 (0)