-
Notifications
You must be signed in to change notification settings - Fork 86
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Calibration superfluous edges #2033
base: master
Are you sure you want to change the base?
Changes from 2 commits
c3ee496
114776c
b193539
8187177
66978c0
99a9c8b
0d3019c
9cb95f7
31a840d
a599460
098fa58
95865ce
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -15,7 +15,7 @@ | |||||||||
get_diagnostic_cube_name_from_probability_name, | ||||||||||
) | ||||||||||
from improver.utilities.cube_manipulation import MergeCubes | ||||||||||
|
||||||||||
from improver.utilities.load import load_cubelist | ||||||||||
|
||||||||||
def split_forecasts_and_truth( | ||||||||||
cubes: List[Cube], truth_attribute: str | ||||||||||
|
@@ -266,3 +266,47 @@ def add_warning_comment(forecast: Cube) -> Cube: | |||||||||
"however, no calibration has been applied." | ||||||||||
) | ||||||||||
return forecast | ||||||||||
|
||||||||||
from datetime import datetime, timedelta | ||||||||||
|
||||||||||
def get_cube_from_directory(directory, cycle_point=None, max_days_offset=None, date_format='%Y%m%dT%H%MZ'): | ||||||||||
""" | ||||||||||
loads and merges all netCDF files in a directory | ||||||||||
|
||||||||||
To switch on the max offset filter, both cycle_point and max_days_offset | ||||||||||
need to be provided | ||||||||||
Args: | ||||||||||
directory (pathlib.Path): | ||||||||||
The path to the directory. | ||||||||||
cycle_point (str): | ||||||||||
The cycle point of the forecast, used to filter files | ||||||||||
max_days_offset (int): | ||||||||||
Maximum number of days before cycle_point to consider files, | ||||||||||
Defined as a postive int that is subtracted from the cycle_point | ||||||||||
Comment on lines
+292
to
+293
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||
date_format (str): | ||||||||||
format of the cyclepoint and datetime in the filename, used by | ||||||||||
datetime.strptime | ||||||||||
|
||||||||||
Returns: | ||||||||||
Cube | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add something more descriptive here? E.g. |
||||||||||
""" | ||||||||||
files = [*map(str, directory.glob("*.nc"))] | ||||||||||
if len(files) == 0: | ||||||||||
# This is probably too serious - is there a quiet way to handle this? | ||||||||||
raise ValueError(f"No files found in {directory}") | ||||||||||
SamGriffithsMO marked this conversation as resolved.
Show resolved
Hide resolved
SamGriffithsMO marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
|
||||||||||
if max_days_offset and cycle_point: | ||||||||||
# Ignore files if they are older than max_days_offset days from cycle_point | ||||||||||
cycle_point = datetime.strptime(cycle_point, date_format) | ||||||||||
earliest_time = cycle_point - timedelta(days=max_days_offset) | ||||||||||
for filename in files.copy(): | ||||||||||
file_datetime = filename.split('/')[-1].split('-')[0] | ||||||||||
if datetime.strptime(file_datetime, date_format) < earliest_time: | ||||||||||
files.remove(filename) | ||||||||||
|
||||||||||
if len(files) < 2: | ||||||||||
raise ValueError(f"Not enough files found in {directory}") | ||||||||||
|
||||||||||
# Check for a lower limit on number of files? - 2 | ||||||||||
SamGriffithsMO marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
cubes = load_cubelist(files) | ||||||||||
return MergeCubes()(cubes) |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -13,9 +13,14 @@ | |||||||||||||||||||||
@cli.clizefy | ||||||||||||||||||||||
@cli.with_output | ||||||||||||||||||||||
def process( | ||||||||||||||||||||||
*cubes: cli.inputcube, | ||||||||||||||||||||||
forecast_directory: cli.inputpath, | ||||||||||||||||||||||
truth_directory: cli.inputpath, | ||||||||||||||||||||||
land_sea_mask: cli.inputcube = None, | ||||||||||||||||||||||
Comment on lines
+16
to
+18
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about:
Suggested change
Comment on lines
+16
to
+18
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. or actually (enabling continued CLI behaviour)
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Caveat to this. I'm not a fan of the use of the clize typing mechanism within IMPROVER. However, this might facilitate maintaining existing CLI functionality and behaviour while also facilitating separating 'load' from the plugin itself (useful on our journey towards our long-term goals). |
||||||||||||||||||||||
*, | ||||||||||||||||||||||
distribution, | ||||||||||||||||||||||
truth_attribute, | ||||||||||||||||||||||
cycle_point: str = None, | ||||||||||||||||||||||
max_days_offset: int = None, | ||||||||||||||||||||||
point_by_point=False, | ||||||||||||||||||||||
use_default_initial_guess=False, | ||||||||||||||||||||||
units=None, | ||||||||||||||||||||||
|
@@ -32,13 +37,12 @@ def process( | |||||||||||||||||||||
The estimated coefficients are output as a cube. | ||||||||||||||||||||||
|
||||||||||||||||||||||
Args: | ||||||||||||||||||||||
cubes (list of iris.cube.Cube): | ||||||||||||||||||||||
A list of cubes containing the historical forecasts and | ||||||||||||||||||||||
corresponding truth used for calibration. They must have the same | ||||||||||||||||||||||
cube name and will be separated based on the truth attribute. | ||||||||||||||||||||||
Optionally this may also contain a single land-sea mask cube on the | ||||||||||||||||||||||
same domain as the historic forecasts and truth (where land points | ||||||||||||||||||||||
are set to one and sea points are set to zero). | ||||||||||||||||||||||
forecast_directory (posix.Path): | ||||||||||||||||||||||
The path to a directory containing the historical forecasts | ||||||||||||||||||||||
truth_directory (posix.Path): | ||||||||||||||||||||||
The path to a directory containing the truths to be used | ||||||||||||||||||||||
land_sea_mask (iris.cube.Cube): | ||||||||||||||||||||||
Optional land-sea mask cube, used as a static additonal predictor. | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||
distribution (str): | ||||||||||||||||||||||
The distribution that will be used for minimising the | ||||||||||||||||||||||
Continuous Ranked Probability Score when estimating the EMOS | ||||||||||||||||||||||
|
@@ -88,12 +92,13 @@ def process( | |||||||||||||||||||||
coefficient is stored in a separate cube. | ||||||||||||||||||||||
""" | ||||||||||||||||||||||
|
||||||||||||||||||||||
from improver.calibration import split_forecasts_and_truth | ||||||||||||||||||||||
from improver.calibration.ensemble_calibration import ( | ||||||||||||||||||||||
EstimateCoefficientsForEnsembleCalibration, | ||||||||||||||||||||||
) | ||||||||||||||||||||||
from improver.calibration import get_cube_from_directory | ||||||||||||||||||||||
|
||||||||||||||||||||||
forecast, truth, land_sea_mask = split_forecasts_and_truth(cubes, truth_attribute) | ||||||||||||||||||||||
forecast = get_cube_from_directory(forecast_directory, cycle_point=cycle_point, max_days_offset=max_days_offset) | ||||||||||||||||||||||
truth = get_cube_from_directory(truth_directory, cycle_point=cycle_point, max_days_offset=max_days_offset) | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I appreciate this is still in draft so likely you are doing a minimal change for testing. However, if/when you come closer to pulling this out of draft, we would want to ensure feature parity between the CLI and the plugin, whetehr that means changing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had thought about this issue, but hadn't considered a metaplugin, which I think might be the way to go. Thanks :) |
||||||||||||||||||||||
|
||||||||||||||||||||||
plugin = EstimateCoefficientsForEnsembleCalibration( | ||||||||||||||||||||||
distribution, | ||||||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that this would be more intuitively named as e.g.
training_dataset_length
ortraining_dataset_days
, as I think that's what this is defining.