diff --git a/.binder/requirements.txt b/.binder/requirements.txt index e00dd4ac..dd993204 100644 --- a/.binder/requirements.txt +++ b/.binder/requirements.txt @@ -333,7 +333,7 @@ tinycss2==1.2.1 # via nbconvert tomli==2.0.1 # via jupyterlab -tornado==6.4 +tornado==6.4.1 # via # ipykernel # jupyter-client diff --git a/README.md b/README.md index 41a0fe19..360c8aca 100644 --- a/README.md +++ b/README.md @@ -17,9 +17,10 @@ Here is a **curated selection** of the metrics, tools and statistical tests incl | | **Description** | **Selection of Included Functions** | |----------------------- |----------------- |-------------- | -| **[Continuous](https://scores.readthedocs.io/en/latest/included.html#continuous)** |Scores for evaluating single-valued continuous forecasts. |Mean Absolute Error (MAE), Mean Squared Error (MSE), Root Mean Squared Error (RMSE), Additive Bias, Multiplicative Bias, Pearson's Correlation Coefficient, Flip-Flop Index, Quantile loss, Murphy score. | +| **[Continuous](https://scores.readthedocs.io/en/latest/included.html#continuous)** |Scores for evaluating single-valued continuous forecasts. |Mean Absolute Error (MAE), Mean Squared Error (MSE), Root Mean Squared Error (RMSE), Additive Bias, Multiplicative Bias, Pearson's Correlation Coefficient, Flip-Flop Index, Quantile Loss, Murphy Score. | | **[Probability](https://scores.readthedocs.io/en/latest/included.html#probability)** |Scores for evaluating forecasts that are expressed as predictive distributions, ensembles, and probabilities of binary events. |Brier Score, Continuous Ranked Probability Score (CRPS) for Cumulative Density Function (CDF), Threshold weighted CRPS for CDF, CRPS for ensembles, Receiver Operating Characteristic (ROC), Isotonic Regression (reliability diagrams). | -| **[Categorical](https://scores.readthedocs.io/en/latest/included.html#categorical)** |Scores for evaluating forecasts based on categories. |Probability of Detection (POD), False Alarm Ratio (FAR), Probability of False Detection (POFD), Success Ratio, Accuracy, Peirce's Skill Score, Critical Success Index (CSI), Gilbert Skill Score, Heidke Skill Score, Odds Ratio, Odds Ratio Skill Score, F1 score, FIxed Risk Multicategorical (FIRM) Score. | +| **[Categorical](https://scores.readthedocs.io/en/latest/included.html#categorical)** |Scores for evaluating forecasts based on categories. |Probability of Detection (POD), False Alarm Ratio (FAR), Probability of False Detection (POFD), Success Ratio, Accuracy, Peirce's Skill Score, Critical Success Index (CSI), Gilbert Skill Score, Heidke Skill Score, Odds Ratio, Odds Ratio Skill Score, F1 score, Symmetric Extremal Dependence Index, FIxed Risk Multicategorical (FIRM) Score. | +| **[Spatial](https://scores.readthedocs.io/en/latest/included.html#spatial)** |Scores that take into account spatial structure. |Fractions Skill Score. | | **[Statistical Tests](https://scores.readthedocs.io/en/latest/included.html#statistical-tests)** |Tools to conduct statistical tests and generate confidence intervals. |Diebold Mariano. | | **[Processing Tools](https://scores.readthedocs.io/en/latest/included.html#processing-tools-for-preparing-data)** |Tools to pre-process data. |Data matching, Discretisation, Cumulative Density Function Manipulation. | diff --git a/docs/SECURITY.md b/docs/SECURITY.md index 5620e108..10c1d8f9 100644 --- a/docs/SECURITY.md +++ b/docs/SECURITY.md @@ -1,4 +1,4 @@ -## Handling Security Concerns +# Handling Security Concerns The scores package is not intended to function as a security boundary, checker or gatekeeper on untrusted or malicious data inputs. It should not be passed user-generated data or be used directly in web applications. diff --git a/docs/api.md b/docs/api.md index 39114282..d2d8d343 100644 --- a/docs/api.md +++ b/docs/api.md @@ -7,7 +7,6 @@ :backlinks: none ``` - ## scores.continuous ```{eval-rst} .. automodule:: scores.continuous @@ -54,6 +53,18 @@ :members: ``` +## scores.spatial +```{eval-rst} +.. autofunction:: scores.spatial.fss_2d +.. autofunction:: scores.spatial.fss_2d_binary +.. autofunction:: scores.spatial.fss_2d_single_field +``` + +## scores.stats +```{eval-rst} +.. autofunction:: scores.stats.statistical_tests.diebold_mariano +``` + ## scores.processing ```{eval-rst} .. autofunction:: scores.processing.isotonic_fit @@ -72,14 +83,10 @@ .. autofunction:: scores.processing.cdf.cdf_envelope ``` -## scores.stats -```{eval-rst} -.. autofunction:: scores.stats.statistical_tests.diebold_mariano -``` - ## scores.pandas ```{eval-rst} .. autofunction:: scores.pandas.continuous.mse .. autofunction:: scores.pandas.continuous.rmse .. autofunction:: scores.pandas.continuous.mae -``` \ No newline at end of file +``` + diff --git a/docs/conf.py b/docs/conf.py index 23dc7aed..f111935f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -9,7 +9,7 @@ project = "scores" copyright = "Licensed under Apache 2.0 - https://www.apache.org/licenses/LICENSE-2.0" -release = "0.8.4" +release = "0.8.5" version = __version__ @@ -38,8 +38,11 @@ html_theme = "sphinx_book_theme" html_theme_options = { "repository_url": "https://github.com/nci/scores", + "canonical_url": "https://scores.readthedocs.io/en/latest/", "use_repository_button": True, + "show_toc_level": 3, } +html_baseurl = "https://scores.readthedocs.io/en/latest/" # -- nbsphinx --------------------------------------------------------------- # This is processed by Jinja2 and inserted after each notebook diff --git a/docs/included.md b/docs/included.md index 2c082669..92a5bcc6 100644 --- a/docs/included.md +++ b/docs/included.md @@ -26,7 +26,7 @@ - [Griffiths et al. (2019)](https://doi.org/10.1002/met.1732); [Griffiths et al. (2021)](https://doi.org/10.1071/ES21010) * - - - Flip-Flop Index - proportion exceeding + - Flip-Flop Index - Proportion Exceeding - [API](https://scores.readthedocs.io/en/latest/api.html#scores.continuous.flip_flop_index_proportion_exceeding) - @@ -72,15 +72,15 @@ - [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Murphy_Diagrams.html) - - [Ehm et al. (2016) - Theorem 1](https://www.jstor.org/stable/24775351); [Taggart (2022) - Theorem 5.3](https://doi.org/10.1214/21-ejs1957) + [Ehm et al. (2016) - Theorem 1](https://doi.org/10.1111/rssb.12154); [Taggart (2022) - Theorem 5.3](https://doi.org/10.1214/21-ejs1957) * - - - Murphy Score (Mean Elementary Score) - theta values + - Murphy Score (Mean Elementary Score) - Theta Values - [API](https://scores.readthedocs.io/en/latest/api.html#scores.continuous.murphy_thetas) - [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Murphy_Diagrams.html) - - [Ehm et al. (2016) - Corollary 2 (p.521)](https://www.jstor.org/stable/24775351); [Taggart (2022) - Corollary 5.6](https://doi.org/10.1214/21-ejs1957) + [Ehm et al. (2016) - Corollary 2 (p.521)](https://doi.org/10.1111/rssb.12154); [Taggart (2022) - Corollary 5.6](https://doi.org/10.1214/21-ejs1957) * - Pearson's Correlation Coefficient - [API](https://scores.readthedocs.io/en/latest/api.html#scores.continuous.correlation) - [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Pearsons_Correlation.html) @@ -175,15 +175,15 @@ - [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Murphy_Diagrams.html) - - [Ehm et al. (2016) - Theorem 1](https://www.jstor.org/stable/24775351); [Taggart (2022) - Theorem 5.3](https://doi.org/10.1214/21-ejs1957) + [Ehm et al. (2016) - Theorem 1](https://doi.org/10.1111/rssb.12154); [Taggart (2022) - Theorem 5.3](https://doi.org/10.1214/21-ejs1957) * - - - Murphy Score (Mean Elementary Score) - theta values + - Murphy Score (Mean Elementary Score) - Theta Values - [API](https://scores.readthedocs.io/en/latest/api.html#scores.probability.murphy_thetas) - [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Murphy_Diagrams.html) - - [Ehm et al. (2016) - Corollary 2 (p.521)](https://www.jstor.org/stable/24775351); [Taggart (2022) - Corollary 5.6](https://doi.org/10.1214/21-ejs1957) + [Ehm et al. (2016) - Corollary 2 (p.521)](https://doi.org/10.1111/rssb.12154); [Taggart (2022) - Corollary 5.6](https://doi.org/10.1214/21-ejs1957) * - Receiver (Relative) Operating Characteristic (ROC) - [API](https://scores.readthedocs.io/en/latest/api.html#scores.probability.roc_curve_data) - [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/ROC.html) @@ -215,6 +215,14 @@ [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Binary_Contingency_Scores.html) - [Accuracy (WWRP/WGNE Joint Working Group on Forecast Verification Research)](https://www.cawcr.gov.au/projects/verification/#ACC) +* - + - Base Rate + - + [API](https://scores.readthedocs.io/en/latest/api.html#scores.categorical.BasicContingencyManager.base_rate) + - + [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Binary_Contingency_Scores.html) + - + [Hogan and Mason (2011)](https://doi.org/10.1002/9781119960003.ch3) * - - Bias Score (Frequency Bias) - @@ -246,7 +254,7 @@ - [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Binary_Contingency_Scores.html) - - [Equitable Threat score (WWRP/WGNE Joint Working Group on Forecast Verification Research)](https://www.cawcr.gov.au/projects/verification/#ETS) + [Hogan et al. (2010)](https://doi.org/10.1175/2009WAF2222350.1); [Equitable Threat score (WWRP/WGNE Joint Working Group on Forecast Verification Research)](https://www.cawcr.gov.au/projects/verification/#ETS) * - - F1 Score - @@ -271,6 +279,14 @@ [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Binary_Contingency_Scores.html) - [False alarm ratio (WWRP/WGNE Joint Working Group on Forecast Verification Research)](https://www.cawcr.gov.au/projects/verification/#FAR) +* - + - Forecast Rate + - + [API](https://scores.readthedocs.io/en/latest/api.html#scores.categorical.BasicContingencyManager.forecast_rate) + - + [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Binary_Contingency_Scores.html) + - + [Hogan and Mason (2011)](https://doi.org/10.1002/9781119960003.ch3) * - - Fraction Correct (Accuracy) - @@ -294,7 +310,7 @@ - [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Binary_Contingency_Scores.html) - - [Equitable Threat score (WWRP/WGNE Joint Working Group on Forecast Verification Research)](https://www.cawcr.gov.au/projects/verification/#ETS) + [Hogan et al. (2010)](https://doi.org/10.1175/2009WAF2222350.1); [Equitable Threat score (WWRP/WGNE Joint Working Group on Forecast Verification Research)](https://www.cawcr.gov.au/projects/verification/#ETS) * - - Hanssen and Kuipers' Discriminant (Peirce's Skill Score, True Skill Statistic) - @@ -342,7 +358,7 @@ - [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Binary_Contingency_Scores.html) - - [Hanssen and Kuipers discriminant (WWRP/WGNE Joint Working Group on Forecast Verification Research)](https://www.cawcr.gov.au/projects/verification/#HK) + [Peirce (1884)](https://doi.org/10.1126/science.ns-4.93.453.b); [Hanssen and Kuipers discriminant (WWRP/WGNE Joint Working Group on Forecast Verification Research)](https://www.cawcr.gov.au/projects/verification/#HK) * - - Precision (Success Ratio) - @@ -375,6 +391,14 @@ [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Binary_Contingency_Scores.html) - [Wikipedia](https://en.wikipedia.org/wiki/Precision_and_recall); [Probability of detection (WWRP/WGNE Joint Working Group on Forecast Verification Research)](https://www.cawcr.gov.au/projects/verification/#POD) +* - + - Symmetric Extremal Dependence Index (SEDI) + - + [API](https://scores.readthedocs.io/en/latest/api.html#scores.categorical.BasicContingencyManager.symmetric_extremal_dependence_index) + - + [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Binary_Contingency_Scores.html) + - + [Ferro and Stephenson (2011)](https://doi.org/10.1175/WAF-D-10-05030.1) * - - Sensitivity (Hit Rate, Probability of Detection (POD), True Positive Rate, Recall) - @@ -473,6 +497,45 @@ — ``` +## Spatial + +```{list-table} +:header-rows: 1 + +* - Name (Alphabetical order) + - API + - Tutorial + - Reference(s) +* - Fractions Skill Score (FSS) + - + - + - +* - + - FSS - 2D + - + [API](https://scores.readthedocs.io/en/latest/api.html#scores.spatial.fss_2d) + - + [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Fractions_Skill_Score.html) + - + [Roberts and Lean (2008)](https://doi.org/10.1175/2007mwr2123.1); [Mittermaier (2021)](https://doi.org/10.1175/mwr-d-18-0106.1) +* - + - FSS - 2D Binary + - + [API](https://scores.readthedocs.io/en/latest/api.html#scores.spatial.fss_2d_binary) + - + [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Fractions_Skill_Score.html) + - + — +* - + - FSS - 2D Single Field + - + [API](https://scores.readthedocs.io/en/latest/api.html#scores.spatial.fss_2d_single_field) + - + [Tutorial](https://scores.readthedocs.io/en/latest/tutorials/Fractions_Skill_Score.html) + - + [Roberts and Lean (2008)](https://doi.org/10.1175/2007mwr2123.1); [Faggian et al. (2015)](https://doi.org/10.54302/mausam.v66i3.555) +``` + ## Statistical Tests ```{list-table} diff --git a/docs/related_works.md b/docs/related_works.md index c46a2132..50a56745 100644 --- a/docs/related_works.md +++ b/docs/related_works.md @@ -3,21 +3,31 @@ Here are some related packages that may be of interest. - **`xskillscore`** - - Open source Python package that provides verification metrics of deterministic (and probabilistic from properscoring) forecasts with xarray. - - [Software Repository](https://github.com/xarray-contrib/xskillscore); [Documentation](https://xskillscore.readthedocs.io/en/latest/); [Reference](https://doi.org/10.5281/zenodo.5173153) + - Open source Python package that provides verification metrics of deterministic (and probabilistic from [`properscoring`](https://github.com/properscoring/properscoring)) forecasts with xarray. + - [Software Repository](https://github.com/xarray-contrib/xskillscore); [Documentation](https://xskillscore.readthedocs.io/en/latest/) + - Reference: [Bell et al. (2021)](https://doi.org/10.5281/zenodo.5173153) - **`climpred`** - Open source Python package for verification of weather and climate forecasts. - - [Software Repository](https://github.com/pangeo-data/climpred); [Documentation](https://climpred.readthedocs.io/en/stable/); [Reference](https://doi.org/10.21105/joss.02781) + - [Software Repository](https://github.com/pangeo-data/climpred); [Documentation](https://climpred.readthedocs.io/en/stable/) + - Reference: [Brady and Spring (2021)](https://doi.org/10.21105/joss.02781) - **`Verif`** - Open source command-line tool for forecast verification. Generates verification plots. Can evaluate deterministic and probabilistic predictions. - - [Software Repository](https://github.com/WFRT/verif); [Wiki](https://github.com/WFRT/verif/wiki); [Reference](https://doi.org/10.1175/bams-d-22-0253.1) + - [Software Repository](https://github.com/WFRT/verif); [Wiki](https://github.com/WFRT/verif/wiki) + - Reference: [Nipen et al. (2023)](https://doi.org/10.1175/bams-d-22-0253.1) - **`METplus`** - - `METplus` includes a database and visualisation system with python wrappers to utilise the `MET` package. Verification scores in `MET` are implemented in C++. - - [Website](https://dtcenter.org/community-code/metplus); [Software Repository](https://github.com/dtcenter/METplus); [Documentation](https://metplus.readthedocs.io/en/latest/); [Reference](https://doi.org/10.1175/bams-d-19-0093.1) - - - - \ No newline at end of file + - `METplus` includes a database and visualisation system, with Python and shell script wrappers to utilise the [`MET`](https://github.com/dtcenter/MET) package. Verification scores in [`MET`](https://github.com/dtcenter/MET) are implemented in C++. + - [Software Repository](https://github.com/dtcenter/METplus); [Documentation](https://metplus.readthedocs.io/en/latest/); [Website](https://dtcenter.org/community-code/metplus) + - Reference: [Brown et al. (2021)](https://doi.org/10.1175/bams-d-19-0093.1) + +- **`Pysteps`** + - Open source Python library for short-term ensemble prediction systems, focusing on probabilistic nowcasting of radar precipitation fields. Includes a significant verification submodule. + - [Software Repository](https://github.com/pySTEPS/pysteps); [Documentation](https://pysteps.readthedocs.io/en/stable/); [Website](https://pysteps.github.io/) + - References: [Pulkkinen et al. (2019)](https://doi.org/10.5194/gmd-12-4185-2019); [Imhoff et al. (2023)](https://doi.org/10.1002/qj.4461) + +- **`PyForecastTools`** + - Open source Python module for model validation, forecast verification and plot generation. Uses dmarray data structures from [`SpacePy`](https://github.com/spacepy/spacepy). + - [Software Repository](https://github.com/drsteve/PyForecastTools); [Documentation](https://drsteve.github.io/PyForecastTools/) + - Reference: [Morley and Burrell (2020)](https://doi.org/10.5281/zenodo.3764117) \ No newline at end of file diff --git a/src/scores/__init__.py b/src/scores/__init__.py index 78c68e50..02b0e64c 100644 --- a/src/scores/__init__.py +++ b/src/scores/__init__.py @@ -13,7 +13,7 @@ import scores.sample_data import scores.stats.statistical_tests # noqa: F401 -__version__ = "0.8.4" +__version__ = "0.8.5" __all__ = [ "scores.categorical", diff --git a/src/scores/categorical/contingency_impl.py b/src/scores/categorical/contingency_impl.py index 981bb9d5..e7f682bd 100644 --- a/src/scores/categorical/contingency_impl.py +++ b/src/scores/categorical/contingency_impl.py @@ -1,12 +1,12 @@ """ -This foundation class provides an underpinning data structure which can be used for +This foundation class provides an underpinning data structure which can be used for contingency tables of various kinds, also known as a confusion matrix. It allows the careful setting out of when and where forecasts closely match observations. -The binary contingency table captures true positives (hits), true negatives (correct negatives), -false positives (false alarms) and false negatives (misses). +The binary contingency table captures true positives (hits), true negatives (correct negatives), +false positives (false alarms) and false negatives (misses). The process of deriving a contingency table relies on the forecast data, the observation data, and a matching or event operator. The event operator will produce the category from the @@ -78,13 +78,13 @@ def __str__(self): final = "\n".join([heading, tablerepr]) return final - def get_counts(self): + def get_counts(self) -> dict: """ Return the contingency table counts (tp, fp, tn, fn) """ return self.counts - def get_table(self): + def get_table(self) -> xr.DataArray: """ Return the contingency table as an xarray object """ @@ -103,7 +103,6 @@ def accuracy(self) -> xr.DataArray: \\text{accuracy} = \\frac{\\text{true positives} + \\text{true negatives}}{\\text{total count}} Notes: - - Range: 0 to 1, where 1 indicates a perfect score. - "True positives" is the same at "hits". - "False negatives" is the same as "misses". @@ -116,6 +115,54 @@ def accuracy(self) -> xr.DataArray: ratio = correct_count / count_dictionary["total_count"] return ratio + def base_rate(self) -> xr.DataArray: + """ + The observed event frequency. + + Returns: + xr.DataArray: An xarray object containing the base rate. + + .. math:: + \\text{base rate} = \\frac{\\text{true positives} + \\text{false negatives}}{\\text{total count}} + + Notes: + - Range: 0 to 1, where 1 indicates the event occurred every time. + - "True positives" is the same as "hits". + - "False negatives" is the same as "misses". + + References: + Hogan, R. J. & Mason, I. B. (2011). Deterministic forecasts of binary events. + In I. T. Jolliffe & D. B. Stephenson (Eds.), Forecast verification: A practitioner's guide in atmospheric science (2nd ed., + pp. 39-51). https://doi.org/10.1002/9781119960003.ch3 + """ + cd = self.counts + br = (cd["tp_count"] + cd["fn_count"]) / cd["total_count"] + return br + + def forecast_rate(self) -> xr.DataArray: + """ + The forecast event frequency. + + Returns: + xr.DataArray: An xarray object containing the forecast rate. + + .. math:: + \\text{forecast rate} = \\frac{\\text{true positives} + \\text{false positives}}{\\text{total count}} + + Notes: + - Range: 0 to 1, where 1 indicates the event was forecast every time. + - "True positives" is the same as "hits". + - "False positives" is the same as "false alarms". + + References: + Hogan, R. J. & Mason, I. B. (2011). Deterministic forecasts of binary events. + In I. T. Jolliffe & D. B. Stephenson (Eds.), Forecast verification: A practitioner's guide in atmospheric science (2nd ed., + pp. 39-51). https://doi.org/10.1002/9781119960003.ch3 + """ + cd = self.counts + br = (cd["tp_count"] + cd["fp_count"]) / cd["total_count"] + return br + def fraction_correct(self) -> xr.DataArray: """ Identical to accuracy. @@ -129,10 +176,9 @@ def fraction_correct(self) -> xr.DataArray: \\text{fraction correct} = \\frac{\\text{true positives} + \\text{true negatives}}{\\text{total count}} Notes: - - Range: 0 to 1, where 1 indicates a perfect score. - "True positives" is the same as "hits". - - "False negatives" is the same as "misses". + - "True negatives" is the same as "correct negatives". References: https://www.cawcr.gov.au/projects/verification/#ACC @@ -141,6 +187,8 @@ def fraction_correct(self) -> xr.DataArray: def frequency_bias(self) -> xr.DataArray: """ + Identical to bias scores. + How did the forecast frequency of "yes" events compare to the observed frequency of "yes" events? Returns: @@ -150,11 +198,10 @@ def frequency_bias(self) -> xr.DataArray: \\text{frequency bias} = \\frac{\\text{true positives} + \\text{false positives}}{\\text{true positives} + \\text{false negatives}} Notes: - - - Range: 0 to ∞ (infinity), where 1 indicates a perfect score - - "True positives" is the same as "hits" - - "False positives" is the same as "false alarms" - - "False negatives" is the same as "misses" + - Range: 0 to ∞ (infinity), where 1 indicates a perfect score. + - "True positives" is the same as "hits". + - "False positives" is the same as "false alarms". + - "False negatives" is the same as "misses". References: https://www.cawcr.gov.au/projects/verification/#BIAS @@ -167,6 +214,8 @@ def frequency_bias(self) -> xr.DataArray: def bias_score(self) -> xr.DataArray: """ + Identical to frequence bias. + How did the forecast frequency of "yes" events compare to the observed frequency of "yes" events? Returns: @@ -176,11 +225,10 @@ def bias_score(self) -> xr.DataArray: \\text{frequency bias} = \\frac{\\text{true positives} + \\text{false positives}}{\\text{true positives} + \\text{false negatives}} Notes: - - - Range: 0 to ∞ (infinity), where 1 indicates a perfect score - - "True positives" is the same as "hits" - - "False positives" is the same as "false alarms" - - "False negatives" is the same as "misses" + - Range: 0 to ∞ (infinity), where 1 indicates a perfect score. + - "True positives" is the same as "hits". + - "False positives" is the same as "false alarms". + - "False negatives" is the same as "misses". References: https://www.cawcr.gov.au/projects/verification/#BIAS @@ -189,7 +237,7 @@ def bias_score(self) -> xr.DataArray: def hit_rate(self) -> xr.DataArray: """ - Identical to probability_of_detection + Identical to true positive rate, probability of detection (POD), sensitivity and recall. Calculates the proportion of the observed events that were correctly forecast. @@ -200,10 +248,9 @@ def hit_rate(self) -> xr.DataArray: \\text{true positives} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}} Notes: - - Range: 0 to 1. Perfect score: 1. - - "True positives" is the same as "hits" - - "False negatives" is the same as "misses" + - "True positives" is the same as "hits". + - "False negatives" is the same as "misses". References: https://www.cawcr.gov.au/projects/verification/#POD @@ -212,7 +259,7 @@ def hit_rate(self) -> xr.DataArray: def probability_of_detection(self) -> xr.DataArray: """ - Identical to hit rate + Probability of detection (POD) is identical to hit rate, true positive rate, sensitivity and recall. Calculates the proportion of the observed events that were correctly forecast. @@ -238,20 +285,20 @@ def probability_of_detection(self) -> xr.DataArray: def true_positive_rate(self) -> xr.DataArray: """ - Identical to probability_of_detection + Identical to hit rate, probability of detection (POD), sensitivity and recall. What proportion of the observed events where correctly forecast? Returns: - An xarray object containing the true positive rate + xr.DataArray: An xarray object containing the true positive rate .. math:: \\text{true positive rate} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}} Notes: - Range: 0 to 1. Perfect score: 1. - - "True positives" is the same as "hits" - - "False negatives" is the same as "misses" + - "True positives" is the same as "hits". + - "False negatives" is the same as "misses". References: https://www.cawcr.gov.au/projects/verification/#POD @@ -260,8 +307,8 @@ def true_positive_rate(self) -> xr.DataArray: def false_alarm_ratio(self) -> xr.DataArray: """ - What fraction of the predicted "yes" events actually did not occur (i.e., - were false alarms)? + The false alarm ratio (FAR) calculates the fraction of the predicted "yes" events + which did not eventuate (i.e., were false alarms). Returns: xr.DataArray: An xarray object containing the false alarm ratio @@ -271,9 +318,9 @@ def false_alarm_ratio(self) -> xr.DataArray: Notes: - Range: 0 to 1. Perfect score: 0. - - Not to be confused with the False Alarm Rate - - "False positives" is the same as "false alarms" - - "True positives" is the same as "hits" + - Not to be confused with the False Alarm Rate. + - "False positives" is the same as "false alarms". + - "True positives" is the same as "hits". References: https://www.cawcr.gov.au/projects/verification/#FAR @@ -285,7 +332,7 @@ def false_alarm_ratio(self) -> xr.DataArray: def false_alarm_rate(self) -> xr.DataArray: """ - Identical to probability_of_false_detection + Identical to probability of false detection. What fraction of the non-events were incorrectly predicted? @@ -297,9 +344,9 @@ def false_alarm_rate(self) -> xr.DataArray: Notes: - Range: 0 to 1. Perfect score: 0. - - Not to be confused with the false alarm ratio - - "False positives" is the same as "false alarms" - - "True negatives" is the same as "correct negatives" + - Not to be confused with the false alarm ratio. + - "False positives" is the same as "false alarms". + - "True negatives" is the same as "correct negatives". References: https://www.cawcr.gov.au/projects/verification/#POFD @@ -312,20 +359,20 @@ def false_alarm_rate(self) -> xr.DataArray: def probability_of_false_detection(self) -> xr.DataArray: """ - Identical to false_alarm_rate + Probability of false detection (POFD) is identical to the false alarm rate. What fraction of the non-events were incorrectly predicted? Returns: - An xarray object containing the probability of false detection + xr.DataArray: An xarray object containing the probability of false detection .. math:: \\text{probability of false detection} = \\frac{\\text{false positives}}{\\text{true negatives} + \\text{false positives}} Notes: - Range: 0 to 1. Perfect score: 0. - - "False positives" is the same as "false alarms" - - "True negatives" is the same as "correct negatives" + - "False positives" is the same as "false alarms". + - "True negatives" is the same as "correct negatives". References: https://www.cawcr.gov.au/projects/verification/#POFD @@ -335,18 +382,20 @@ def probability_of_false_detection(self) -> xr.DataArray: def success_ratio(self) -> xr.DataArray: """ + Identical to precision. + What proportion of the forecast events actually eventuated? Returns: - An xarray object containing the success ratio + xr.DataArray: An xarray object containing the success ratio .. math:: \\text{success ratio} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false positives}} Notes: - Range: 0 to 1. Perfect score: 1. - - "True positives" is the same as "hits" - - "False positives" is the same as "misses" + - "True positives" is the same as "hits". + - "False positives" is the same as "misses". References: https://www.cawcr.gov.au/projects/verification/#SR @@ -358,7 +407,7 @@ def success_ratio(self) -> xr.DataArray: def threat_score(self) -> xr.DataArray: """ - Identical to critical_success_index + Identical to critical success index. Returns: xr.DataArray: An xarray object containing the threat score @@ -368,9 +417,9 @@ def threat_score(self) -> xr.DataArray: Notes: - Range: 0 to 1, 0 indicates no skill. Perfect score: 1. - - "True positives" is the same as "hits" - - "False positives" is the same as "false alarms" - - "True negatives" is the same as "correct negatives" + - "True positives" is the same as "hits". + - "False positives" is the same as "false alarms". + - "True negatives" is the same as "correct negatives". References: https://www.cawcr.gov.au/projects/verification/#CSI @@ -383,14 +432,14 @@ def threat_score(self) -> xr.DataArray: def critical_success_index(self) -> xr.DataArray: """ - Identical to threat_score + Identical to threat score. + + Returns: + xr.DataArray: An xarray object containing the critical success index .. math:: \\text{threat score} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false positives} + \\text{false negatives}} - Returns: - An xarray object containing the critical success index - Notes: - Range: 0 to 1, 0 indicates no skill. Perfect score: 1. - Often known as CSI. @@ -405,25 +454,27 @@ def critical_success_index(self) -> xr.DataArray: def peirce_skill_score(self) -> xr.DataArray: """ - Identical to Hanssen and Kuipers discriminant and the true skill statistic + Identical to Hanssen and Kuipers discriminant and the true skill statistic. How well did the forecast separate the "yes" events from the "no" events? Returns: - An xarray object containing the Peirce Skill Score + xr.DataArray: An xarray object containing the Peirce Skill Score .. math:: \\text{Peirce skill score} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}} - \\frac{\\text{false positives}}{\\text{false positives} + \\text{true negatives}} Notes: - Range: -1 to 1, 0 indicates no skill. Perfect score: 1. - - "True positives" is the same as "hits" - - "False negatives" is the same as "misses" - - "False positives" is the same as "false alarms" - - "True negatives" is the same as "correct negatives" + - "True positives" is the same as "hits". + - "False negatives" is the same as "misses". + - "False positives" is the same as "false alarms". + - "True negatives" is the same as "correct negatives". References: - https://www.cawcr.gov.au/projects/verification/#HK + - https://www.cawcr.gov.au/projects/verification/#HK + - Peirce, C.S., 1884. The numerical measure of the success of predictions. \ + Science, ns-4(93), pp.453-454. https://doi.org/10.1126/science.ns-4.93.453.b """ cd = self.counts component_a = cd["tp_count"] / (cd["tp_count"] + cd["fn_count"]) @@ -433,7 +484,7 @@ def peirce_skill_score(self) -> xr.DataArray: def true_skill_statistic(self) -> xr.DataArray: """ - Identical to Peirce's skill score and to Hanssen and Kuipers discriminant + Identical to Peirce's skill score and to Hanssen and Kuipers discriminant. How well did the forecast separate the "yes" events from the "no" events? @@ -445,10 +496,10 @@ def true_skill_statistic(self) -> xr.DataArray: Notes: - Range: -1 to 1, 0 indicates no skill. Perfect score: 1. - - "True positives" is the same as "hits" - - "False negatives" is the same as "misses" - - "False positives" is the same as "false alarms" - - "True negatives" is the same as "correct negatives" + - "True positives" is the same as "hits". + - "False negatives" is the same as "misses". + - "False positives" is the same as "false alarms". + - "True negatives" is the same as "correct negatives". References: https://www.cawcr.gov.au/projects/verification/#HK @@ -457,7 +508,7 @@ def true_skill_statistic(self) -> xr.DataArray: def hanssen_and_kuipers_discriminant(self) -> xr.DataArray: """ - Identical to Peirce's skill score and to true skill statistic + Identical to Peirce's skill score and to the true skill statistic. How well did the forecast separate the "yes" events from the "no" events? @@ -467,14 +518,14 @@ def hanssen_and_kuipers_discriminant(self) -> xr.DataArray: .. math:: \\text{HK} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}} - \\frac{\\text{false positives}}{\\text{false positives} + \\text{true negatives}} - Where :math:`\\text{HK}` is Hansen and Kuipers Discriminant + where :math:`\\text{HK}` is Hansen and Kuipers Discriminant Notes: - Range: -1 to 1, 0 indicates no skill. Perfect score: 1. - - "True positives" is the same as "hits" - - "False negatives" is the same as "misses" - - "False positives" is the same as "false alarms" - - "True negatives" is the same as "correct negatives" + - "True positives" is the same as "hits". + - "False negatives" is the same as "misses". + - "False positives" is the same as "false alarms". + - "True negatives" is the same as "correct negatives". References: https://www.cawcr.gov.au/projects/verification/#HK @@ -483,20 +534,20 @@ def hanssen_and_kuipers_discriminant(self) -> xr.DataArray: def sensitivity(self) -> xr.DataArray: """ - Identical to probability of detection and hit_rate + Identical to hit rate, probability of detection (POD), true positive rate and recall. Calculates the proportion of the observed events that were correctly forecast. Returns: - An xarray object containing the probability of detection + xr.DataArray: An xarray object containing the probability of detection .. math:: \\text{sensitivity} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}} Notes: - Range: 0 to 1. Perfect score: 1. - - "True positives" is the same as "hits" - - "False negatives" is the same as "misses" + - "True positives" is the same as "hits". + - "False negatives" is the same as "misses". References: - https://www.cawcr.gov.au/projects/verification/#POD @@ -551,20 +602,20 @@ def true_negative_rate(self) -> xr.DataArray: def recall(self) -> xr.DataArray: """ - Identical to probability of detection. + Identical to hit rate, probability of detection (POD), true positive rate and sensitivity. Calculates the proportion of the observed events that were correctly forecast. Returns: - An xarray object containing the probability of detection + xr.DataArray: An xarray object containing the probability of detection .. math:: \\text{recall} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}} Notes: - Range: 0 to 1. Perfect score: 1. - - "True positives" is the same as "hits" - - "False negatives" is the same as "misses" + - "True positives" is the same as "hits". + - "False negatives" is the same as "misses". References: - https://www.cawcr.gov.au/projects/verification/#POD @@ -572,40 +623,84 @@ def recall(self) -> xr.DataArray: """ return self.probability_of_detection() - def precision(self): + def precision(self) -> xr.DataArray: """ Identical to the Success Ratio. - https://en.wikipedia.org/wiki/Precision_and_recall + What proportion of the forecast events actually eventuated? + + Returns: + xr.DataArray: An xarray object containing the precision score + + .. math:: + \\text{precision} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false positives}} + + Notes: + - Range: 0 to 1. Perfect score: 1. + - "True positives" is the same as "hits" + - "False positives" is the same as "misses" + + References: + - https://www.cawcr.gov.au/projects/verification/#SR + - https://en.wikipedia.org/wiki/Precision_and_recall + """ return self.success_ratio() - def f1_score(self): + def f1_score(self) -> xr.DataArray: """ Calculates the F1 score. - https://en.wikipedia.org/wiki/F-score + + Returns: + xr.DataArray: An xarray object containing the F1 score + + .. math:: + \\text{F1} = \\frac{2 \\cdot \\text{true positives}}{(2 \\cdot \\text{true positives}) + \\text{false positives} + \\text{false negatives}} + + Notes: + - "True positives" is the same as "hits". + - "False positives" is the same as "false alarms". + - "False negatives" is the same as "misses". + + References: + - https://en.wikipedia.org/wiki/F-score """ cd = self.counts f1 = 2 * cd["tp_count"] / (2 * cd["tp_count"] + cd["fp_count"] + cd["fn_count"]) return f1 - def equitable_threat_score(self): + def equitable_threat_score(self) -> xr.DataArray: """ - Calculates the Equitable threat score (also known as the Gilbert skill score). + Identical to the Gilbert Skill Score. + + Calculates the Equitable threat score. How well did the forecast "yes" events correspond to the observed "yes" events (accounting for hits due to chance)? - Range: -1/3 to 1, 0 indicates no skill. Perfect score: 1. + Returns: + xr.DataArray: An xarray object containing the equitable threat score + + .. math:: + \\text{ETS} = \\frac{\\text{true positives} - \\text{true positives} _\\text{random}}\ + {\\text{true positives} + \\text{false negatives} + \\text{false positives} - \\text{true positives} _\\text{random}} + where - References: + .. math:: + \\text{true_positives}_{\\text{random}} = \\frac{(\\text{true positives} + \\text{false negatives}) (\\text{true positives} + \\text{false positives})}{\\text{total count}} - Gilbert, G.K., 1884. Finley’s tornado predictions. American Meteorological Journal, 1(5), pp.166–172. + Notes: + - Range: -1/3 to 1, 0 indicates no skill. Perfect score: 1. + - "True positives" is the same as "hits". + - "False negatives" is the same as "misses". + - "False positives" is the same as "false alarms" - Hogan, R.J., Ferro, C.A., Jolliffe, I.T. and Stephenson, D.B., 2010. - Equitability revisited: Why the “equitable threat score” is not equitable. - Weather and Forecasting, 25(2), pp.710-726. https://doi.org/10.1175/2009WAF2222350.1 + References: + - Gilbert, G.K., 1884. Finley’s tornado predictions. American Meteorological Journal, 1(5), pp.166–172. + - Hogan, R.J., Ferro, C.A., Jolliffe, I.T. and Stephenson, D.B., 2010. \ + Equitability revisited: Why the “equitable threat score” is not equitable. \ + Weather and Forecasting, 25(2), pp.710-726. https://doi.org/10.1175/2009WAF2222350.1 """ cd = self.counts hits_random = (cd["tp_count"] + cd["fn_count"]) * (cd["tp_count"] + cd["fp_count"]) / cd["total_count"] @@ -613,34 +708,57 @@ def equitable_threat_score(self): return ets - def gilberts_skill_score(self): + def gilberts_skill_score(self) -> xr.DataArray: """ - Calculates the Gilbert skill score (also known as the Equitable threat score). + Identical to the equitable threat scores + + Calculates the Gilbert skill score. How well did the forecast "yes" events correspond to the observed "yes" events (accounting for hits due to chance)? - Range: -1/3 to 1, 0 indicates no skill. Perfect score: 1. + Returns: + xr.DataArray: An xarray object containing the Gilberts Skill Score - References: + .. math:: + \\text{GSS} = \\frac{\\text{true positives} - \\text{true positives} _\\text{random}}\ + {\\text{true positives} + \\text{false negatives} + \\text{false positives} - \\text{true positivies} _\\text{random}} - Gilbert, G.K., 1884. Finley’s tornado predictions. American Meteorological Journal, 1(5), pp.166–172. + where - Hogan, R.J., Ferro, C.A., Jolliffe, I.T. and Stephenson, D.B., 2010. - Equitability revisited: Why the “equitable threat score” is not equitable. - Weather and Forecasting, 25(2), pp.710-726. https://doi.org/10.1175/2009WAF2222350.1 + .. math:: + \\text{true_positives}_{\\text{random}} = \\frac{(\\text{true positives} + \\text{false negatives}) (\\text{true positives} + \\text{false positives})}{\\text{total count}} + + Notes: + - Range: -1/3 to 1, 0 indicates no skill. Perfect score: 1. + - "True positives" is the same as "hits". + - "False negatives" is the same as "misses" + - "False positives" is the same as "false alarms". + + References: + - Gilbert, G.K., 1884. Finley’s tornado predictions. American Meteorological Journal, 1(5), pp.166–172. + - Hogan, R.J., Ferro, C.A., Jolliffe, I.T. and Stephenson, D.B., 2010. \ + Equitability revisited: Why the “equitable threat score” is not equitable. \ + Weather and Forecasting, 25(2), pp.710-726. https://doi.org/10.1175/2009WAF2222350.1 """ return self.equitable_threat_score() - def heidke_skill_score(self): + def heidke_skill_score(self) -> xr.DataArray: """ - Calculates the Heidke skill score (also known as Cohen's kappa). + Identical to Cohen's Kappa + + Calculates the Heidke skill score. What was the accuracy of the forecast relative to that of random chance? - Range: -1 to 1, 0 indicates no skill. Perfect score: 1. + Returns: + xr.DataArray: An xarray object containing the Heidke skill score + + Notes: + - Range: -1 to 1, 0 indicates no skill. Perfect score: 1. - https://en.wikipedia.org/wiki/Cohen%27s_kappa + References: + - https://en.wikipedia.org/wiki/Cohen%27s_kappa """ cd = self.counts exp_correct = (1 / cd["total_count"]) * ( @@ -650,47 +768,109 @@ def heidke_skill_score(self): hss = ((cd["tp_count"] + cd["tn_count"]) - exp_correct) / (cd["total_count"] - exp_correct) return hss - def cohens_kappa(self): + def cohens_kappa(self) -> xr.DataArray: """ - Calculates the Cohen's kappa (also known as the Heidke skill score). + Identical to the Heidke skill score. + + Calculates Cohen's kappa. What was the accuracy of the forecast relative to that of random chance? - Range: -1 to 1, 0 indicates no skill. Perfect score: 1. + Returns: + xr.DataArray: An xarray object containing the Cohen's Kappa score - https://en.wikipedia.org/wiki/Cohen%27s_kappa + Notes: + - Range: -1 to 1, 0 indicates no skill. Perfect score: 1. + + References: + - https://en.wikipedia.org/wiki/Cohen%27s_kappa """ return self.heidke_skill_score() - def odds_ratio(self): + def odds_ratio(self) -> xr.DataArray: """ Calculates the odds ratio What is the ratio of the odds of a "yes" forecast being correct, to the odds of a "yes" forecast being wrong? - Odds ratio - Range: 0 to ∞, 1 indicates no skill. Perfect score: ∞. + Returns: + xr.DataArray: An xarray object containing the odds ratio + + .. math:: + \\begin{aligned} + \\text{odds ratio} &= + \\left[\\frac{\\text{POD}}{\\text{1 - POD}}\\right] + \\div + \\left[\\frac{\\text{POFD}}{\\text{1 - POFD}}\\right] + \\\\ &= + \\frac{\\text{true positives} \\cdot \\text{true negatives}}{\\text{false positives} \\cdot \\text{false negatives}} + \\end{aligned} + + where + + .. math:: + \\text{POD} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}} + + and + + .. math:: + \\text{POFD} = \\frac{\\text{false positives}}{\\text{true negatives} + \\text{false positives}} + + + Notes: + - Range: 0 to ∞, 1 indicates no skill. Perfect score: ∞. + - POD = Probability of Detection + - POFD = Probability of False Detection + - "True positives" is the same as "hits". + - "False negatives" is the same as "misses". + - "False positives" is the same as "false alarms". + - "True negatives" is the same as "correct negatives". - Stephenson, D.B., 2000. Use of the “odds ratio” for diagnosing forecast skill. - Weather and Forecasting, 15(2), pp.221-232. + References: + - Stephenson, D.B., 2000. Use of the “odds ratio” for diagnosing forecast skill. \ + Weather and Forecasting, 15(2), pp.221-232. \ + https://doi.org/10.1175/1520-0434(2000)015%3C0221:UOTORF%3E2.0.CO;2 """ odds_r = (self.probability_of_detection() / (1 - self.probability_of_detection())) / ( self.probability_of_false_detection() / (1 - self.probability_of_false_detection()) ) return odds_r - def odds_ratio_skill_score(self): + def odds_ratio_skill_score(self) -> xr.DataArray: """ - Calculates the odds ratio skill score (also known as Yule's Q). + Identical to Yule's Q. + + Calculates the odds ratio skill score. - Note - the term 'skill score' is often used to describe the relative performance - of one source of predictoins over another - e.g. the relative performance of an - upgraded model on its predecessor, or the relative performance to a benchmark such - as climatology. The odds ratio skill score is not that kind of skill score. What was the improvement of the forecast over random chance? - Range: -1 to 1, 0 indicates no skill. Perfect score: 1. - Stephenson, D.B., 2000. Use of the “odds ratio” for diagnosing forecast skill. - Weather and Forecasting, 15(2), pp.221-232. + + Returns: + xr.DataArray: An xarray object containing the odds ratio skill score + + .. math:: + + \\begin{aligned} + \\text{ORSS} &= \\frac{\\text{OR} - 1}{\\text{OR} + 1} + \\\\ &= \\frac{\\text{true positives} \\cdot \\text{true negatives} + - \\text{false positives} \\cdot \\text{false negatives}}{ + \\text{true positives} \\cdot \\text{true negatives} + + \\text{false positives} \\cdot \\text{false negatives}} + \\end{aligned} + + Notes: + - Range: -1 to 1, 0 indicates no skill. Perfect score: 1. + - ORSS = Odds Ratio Skill Score + - OR = Odds ratio, see: :meth:`odds_ratio` + - "True positives" is the same as "hits". + - "False negatives" is the same as "misses". + - "False positives" is the same as "false alarms". + - "True negatives" is the same as "correct negatives". + + References: + - Stephenson, D.B., 2000. Use of the “odds ratio” for diagnosing forecast skill. \ + Weather and Forecasting, 15(2), pp.221-232. \ + https://doi.org/10.1175/1520-0434(2000)015%3C0221:UOTORF%3E2.0.CO;2 """ cd = self.counts orss = (cd["tp_count"] * cd["tn_count"] - cd["fn_count"] * cd["fp_count"]) / ( @@ -698,16 +878,92 @@ def odds_ratio_skill_score(self): ) return orss - def yules_q(self): + def yules_q(self) -> xr.DataArray: """ - Calculates the Yule's Q (also known as the odds ratio skill score). + Identical to the odds ratio skill score. + + Calculates the Yule's Q. + What was the improvement of the forecast over random chance? - Range: -1 to 1, 0 indicates no skill. Perfect score: 1. - Stephenson, D.B., 2000. Use of the “odds ratio” for diagnosing forecast skill. - Weather and Forecasting, 15(2), pp.221-232. + + Returns: + xr.DataArray: An xarray object containing Yule's Q + + .. math:: + + \\begin{aligned} + \\text{Yule's Q} &= \\frac{\\text{OR} - 1}{\\text{OR} + 1} + \\\\ &= \\frac{\\text{true positives} \\cdot \\text{true negatives} + - \\text{false positives} \\cdot \\text{false negatives}}{ + \\text{true positives} \\cdot \\text{true negatives} + + \\text{false positives} \\cdot \\text{false negatives}} + \\end{aligned} + + Notes: + - Range: -1 to 1, 0 indicates no skill. Perfect score: 1. + - OR = Odds ratio, see: :meth:`odds_ratio`. + - "True positives" is the same as "hits". + - "False negatives" is the same as "misses". + - "False positives" is the same as "false alarms". + - "True negatives" is the same as "correct negatives". + + References: + Stephenson, D.B., 2000. Use of the “odds ratio” for diagnosing forecast skill. \ + Weather and Forecasting, 15(2), pp.221-232. \ + https://doi.org/10.1175/1520-0434(2000)015%3C0221:UOTORF%3E2.0.CO;2 """ return self.odds_ratio_skill_score() + def symmetric_extremal_dependence_index(self) -> xr.DataArray: + """ + Calculates the Symmetric Extremal Dependence Index (SEDI). + + Returns: + xr.DataArray: An xarray object containing the SEDI score + + .. math:: + \\frac{\\ln(\\text{POFD}) - \\ln(\\text{POD}) + \\ln(\\text{1-POD}) - \\ln(\\text{1 -POFD})} + {\\ln(\\text{POFD}) + \\ln(\\text{POD}) + \\ln(\\text{1-POD}) + \\ln(\\text{1 -POFD})} + + where + + .. math:: + \\text{POD} = \\frac{\\text{true positives}}{\\text{true positives} + \\text{false negatives}} + + and + + .. math:: + \\text{POFD} = \\frac{\\text{false positives}}{\\text{true negatives} + \\text{false positives}} + + + Notes: + - POD = Probability of Detection + - POFD = Probability of False Detection + - "True positives" is the same as "hits". + - "False negatives" is the same as "misses". + - "False positives" is the same as "false alarms". + - "True negatives" is the same as "correct negatives". + - Range: -1 to 1, Perfect score: 1. + + + References: + Ferro, C.A.T. and Stephenson, D.B., 2011. Extremal dependence indices: Improved verification + measures for deterministic forecasts of rare binary events. Weather and Forecasting, 26(5), pp.699-713. + https://doi.org/10.1175/WAF-D-10-05030.1 + """ + score = ( + np.log(self.probability_of_false_detection()) + - np.log(self.probability_of_detection()) + + np.log(1 - self.probability_of_detection()) + - np.log(1 - self.probability_of_false_detection()) + ) / ( + np.log(self.probability_of_false_detection()) + + np.log(self.probability_of_detection()) + + np.log(1 - self.probability_of_detection()) + + np.log(1 - self.probability_of_false_detection()) + ) + return score + class BinaryContingencyManager(BasicContingencyManager): """ @@ -764,7 +1020,7 @@ def transform( *, reduce_dims: Optional[FlexibleDimensionTypes] = None, preserve_dims: Optional[FlexibleDimensionTypes] = None, - ): + ) -> BasicContingencyManager: """ Calculate and compute the contingency table according to the specified dimensions """ @@ -776,7 +1032,7 @@ def _get_counts( *, reduce_dims: Optional[FlexibleDimensionTypes] = None, preserve_dims: Optional[FlexibleDimensionTypes] = None, - ): + ) -> dict: """ Generates the uncomputed count values """ @@ -868,7 +1124,7 @@ def make_event_tables( def make_contingency_manager( self, forecast: FlexibleArrayType, observed: FlexibleArrayType, *, event_threshold=None, op_fn=None - ): + ) -> BinaryContingencyManager: """ Using this function requires a careful understanding of the structure of the data and the use of the operator function. The default operator is a simple greater-than diff --git a/src/scores/continuous/standard_impl.py b/src/scores/continuous/standard_impl.py index 1e9b330b..a39b09ef 100644 --- a/src/scores/continuous/standard_impl.py +++ b/src/scores/continuous/standard_impl.py @@ -91,16 +91,12 @@ def rmse( weights: Optional[xr.DataArray] = None, is_angular: bool = False, ) -> FlexibleArrayType: - """Calculate the Root Mean Squared Error from xarray or pandas objects. + """Calculate the Root Mean Squared Error - A detailed explanation is on [Wikipedia](https://en.wikipedia.org/wiki/Root-mean-square_deviation) - - - Dimensional reduction is not supported for pandas and the user should - convert their data to xarray to formulate the call to the metric. - At most one of `reduce_dims` and `preserve_dims` may be specified. - Specifying both will result in an exception. + A detailed explanation is on https://en.wikipedia.org/wiki/Root-mean-square_deviation + .. math :: + \\sqrt{\\frac{1}{n} \\sum_{i=1}^n (\\text{forecast}_i - \\text{observed}_i)^2} Args: fcst: Forecast @@ -151,12 +147,10 @@ def mae( ) -> FlexibleArrayType: """Calculates the mean absolute error from forecast and observed data. - A detailed explanation is on [Wikipedia](https://en.wikipedia.org/wiki/Mean_absolute_error) + A detailed explanation is on https://en.wikipedia.org/wiki/Mean_absolute_error - Dimensional reduction is not supported for pandas and the user should - convert their data to xarray to formulate the call to the metric. - At most one of reduce_dims and preserve_dims may be specified. - Specifying both will result in an exception. + .. math :: + \\frac{1}{n} \\sum_{i=1}^n | \\text{forecast}_i - \\text{observed}_i | Args: fcst: Forecast or predicted variables in xarray or pandas. diff --git a/src/scores/fast/__init__.py b/src/scores/fast/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/scores/fast/fss/__init__.py b/src/scores/fast/fss/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/scores/fast/fss/backend.py b/src/scores/fast/fss/backend.py new file mode 100644 index 00000000..9c1e55c5 --- /dev/null +++ b/src/scores/fast/fss/backend.py @@ -0,0 +1,152 @@ +""" +Backend implementation of the FSS algorithm. Currently calculations are +performed mainly by the `numpy` backend. +""" + +from dataclasses import dataclass, field +from typing import Tuple + +import numpy as np +import numpy.typing as npt + +from scores.fast.fss.typing import FssComputeMethod, FssDecomposed +from scores.utils import BinaryOperator, CompatibilityError, DimensionError + + +@dataclass +class FssBackend: # pylint: disable=too-many-instance-attributes + """ + Abstract base class for computing fss. + + required methods: + - :py:meth:`_compute_fss_components` + - :py:meth:`_compute_integral_field` + """ + + compute_method = FssComputeMethod.INVALID + + fcst: npt.NDArray[np.float64] + obs: npt.NDArray[np.float64] + # KW_ONLY in dataclasses only supported for python >=3.10 + # _: KW_ONLY + event_threshold: np.float64 + threshold_operator: BinaryOperator + window_size: Tuple[int, int] + zero_padding: bool + + # internal buffers + _obs_pop: npt.NDArray[np.int64] = field(init=False) + _fcst_pop: npt.NDArray[np.int64] = field(init=False) + _obs_img: npt.NDArray[np.int64] = field(init=False) + _fcst_img: npt.NDArray[np.int64] = field(init=False) + + def __post_init__(self): + """ + Post-initialization checks go here. + """ + self._check_dims() + self._check_compatibility() + + def _check_compatibility(self): # pragma: no cover + raise CompatibilityError( + f"Unable to run `{self.compute_method}`. Are you missing extra/optional dependencies in your install?" + ) + + def _check_dims(self): + if self.fcst.shape != self.obs.shape: + raise DimensionError("fcst and obs shapes do not match") + if ( + self.window_size[0] > self.fcst.shape[0] + or self.window_size[1] > self.fcst.shape[1] + or self.window_size[0] < 1 + or self.window_size[1] < 1 + ): + raise DimensionError( + "invalid window size, `window_size` must be smaller than input data shape and greater than 0" + ) + + def _apply_event_threshold(self): + """ + Default implementation of converting the input fields into binary fields by comparing + against a event threshold; using numpy. This is a relatively cheap operation, so a specific + implementation in derived backends is optional. + """ + _op = self.threshold_operator.get() + self._obs_pop = _op(self.obs, self.event_threshold) + self._fcst_pop = _op(self.fcst, self.event_threshold) + return self + + def _compute_integral_field(self): + """ + Computes the rolling windowed sums over the entire observation & forecast fields. The + resulting "integral field (or image)" can be cached in `self._obs_img` for observations, and + `self._fcst_img` for forecast. + + See also: https://en.wikipedia.org/wiki/Summed-area_table + + Note: for non-`python` backends e.g. `rust`, its often the case that the integral field is + computed & cached in the native language. In which case this method can just be overriden + with a `return self`. + """ + raise NotImplementedError(f"_compute_integral_field not implemented for {self.compute_method}") + + def _compute_fss_components(self) -> FssDecomposed: + """ + FSS is roughly defined as + ``` + fss = 1 - sum_w((p_o - p_f)^2) / (sum_w(p_o^2) + sum_w(p_f^2)) + + where, + p_o: observation populace > event_threshold, in one window + p_f: forecast populace > event_threshold, in one window + sum_w: sum over all windows + ```` + + In order to accumulate scores over non spatial dimensions at a higher level operation, we + need to keep track of the de-composed sums as they need to be accumulated separately. + + The "fss components", hence, consist of: + - `power_diff :: float = sum_w((p_o - p_f)^2)` + - `power_obs :: float = sum_w(p_o^2)` + - `power_fcst :: float = sum_w(p_f^2)` + + **WARNING:** returning sums of powers can result in overflows on aggregation. It is advisable + to return the means instead, as it is equivilent with minimal computational trade-off. + + Returns: + Tuple[float, float, float]: (power_fcst, power_obs, power_diff) as described above + """ + raise NotImplementedError(f"`_compute_fss_components` not implemented for {self.compute_method}") + + def compute_fss_decomposed(self) -> FssDecomposed: + """ + Calls the main pipeline to compute the fss score. + + Returns the decomposed scores for aggregation. This is because aggregation cannot be + done on the final score directly. + + Note: FSS computations of stand-alone 2-D fields should use :py:meth:`compute_fss` instead. + """ + # fmt: off + return ( + self._apply_event_threshold() # pylint: disable=protected-access + ._compute_integral_field() + ._compute_fss_components() + ) + # fmt: on + + def compute_fss(self) -> np.float64: + """ + Uses the components from :py:method:`compute_fss_decomposed` to compute the final + Fractions Skill Score. + """ + (fcst, obs, diff) = self.compute_fss_decomposed() + denom = fcst + obs + fss: np.float = 0.0 + + if denom > 0.0: + fss = 1.0 - diff / denom + + fss_clamped = max(min(fss, 1.0), 0.0) + + return fss_clamped diff --git a/src/scores/fast/fss/fss_backends.py b/src/scores/fast/fss/fss_backends.py new file mode 100644 index 00000000..c062142c --- /dev/null +++ b/src/scores/fast/fss/fss_backends.py @@ -0,0 +1,20 @@ +""" +Convenience module to retrieve compute backend +""" +from scores.fast.fss.fss_numba import FssNumba +from scores.fast.fss.fss_numpy import FssNumpy +from scores.fast.fss.typing import FssComputeMethod + + +def get_compute_backend(compute_method: FssComputeMethod): # pragma: no cover + """ + Returns the appropriate compute backend class constructor. + """ + # Note: compute methods other than NUMPY currently not implemented + if compute_method == FssComputeMethod.NUMPY: + return FssNumpy + if compute_method == FssComputeMethod.NUMBA: + # Note: currently not implemented fore demonstration purposes only + # this should throw a compatiblity error. + return FssNumba + raise NotImplementedError(f"Compute method {compute_method} not implemented.") diff --git a/src/scores/fast/fss/fss_numba.py b/src/scores/fast/fss/fss_numba.py new file mode 100644 index 00000000..232cdc7e --- /dev/null +++ b/src/scores/fast/fss/fss_numba.py @@ -0,0 +1,39 @@ +""" +Backend for computing Fractions Skill Score (FSS) using numba + +""" +from dataclasses import dataclass + +from scores.fast.fss.backend import FssBackend +from scores.fast.fss.typing import FssComputeMethod + +_COMPATIBLE = True +try: + import numba # pylint: disable=unused-import +except ImportError: # pragma: no cover + _COMPATIBLE = False + + +@dataclass +class FssNumba(FssBackend): # pragma: no cover + """ + Implementation of numba backend for computing FSS + + This is currently a stub. + """ + + compute_method = FssComputeMethod.NUMBA + + def _check_compatibility(self): + """ + Currently not implemented. + """ + if _COMPATIBLE: + return + super()._check_compatibility() + + def _compute_integral_field(self): + super()._compute_integral_field() + + def _compute_fss_components(self): + super()._compute_integral_field() diff --git a/src/scores/fast/fss/fss_numpy.py b/src/scores/fast/fss/fss_numpy.py new file mode 100644 index 00000000..57cda822 --- /dev/null +++ b/src/scores/fast/fss/fss_numpy.py @@ -0,0 +1,143 @@ +""" +Backend for computing Fractions Skill Score (FSS) using numpy +""" + +from dataclasses import dataclass + +import numpy as np + +from scores.fast.fss.backend import FssBackend +from scores.fast.fss.typing import FssComputeMethod, FssDecomposed, f8x3 + +_COMPATIBLE = True + + +@dataclass +class FssNumpy(FssBackend): + """ + Implementation of numpy backend for computing FSS + """ + + compute_method = FssComputeMethod.NUMPY + + def _check_compatibility(self): # pragma: no cover + """ + Numpy should be compatible by default. + """ + # false branch - should always return + # kept here to preserve the pattern consistency with other backends + if _COMPATIBLE: + return + # unreachable + super()._check_compatibility() + + def _compute_integral_field(self): # pylint: disable=too-many-locals + obs_partial_sums = self._obs_pop.cumsum(1).cumsum(0) + fcst_partial_sums = self._fcst_pop.cumsum(1).cumsum(0) + + # pad first row/column with 0s, addressses off-by-one window sizes for the first row/column + # due to the way meshgrid indexing works. + zero_row = np.zeros((fcst_partial_sums.shape[0], 1)) + obs_partial_sums = np.hstack([zero_row, obs_partial_sums]) + fcst_partial_sums = np.hstack([zero_row, fcst_partial_sums]) + + zero_col = np.zeros((1, fcst_partial_sums.shape[1])) + obs_partial_sums = np.vstack([zero_col, obs_partial_sums]) + fcst_partial_sums = np.vstack([zero_col, fcst_partial_sums]) + + if self.zero_padding: + # ------------------------------ + # 0-padding + # ------------------------------ + # .................. + # .................. + # ..+------------+.. + # ..| |.. + # ..| |.. + # ..| |.. + # ..+------------+.. + # .................. + # .................. + # + # ".." represents 0 padding + # the rectangle represents the + # FSS computation region. + # ------------------------------ + (w_h, w_w) = (self.window_size[0], self.window_size[1]) + half_w_h = int(w_h / 2) + half_w_w = int(w_w / 2) + im_h = fcst_partial_sums.shape[0] + im_w = fcst_partial_sums.shape[1] + + # get remaining window size, incase window size is odd + rem_h = w_h - half_w_h + rem_w = w_w - half_w_w + + # Zero padding is equivilent to clamping the coordinate values at the border + # r = rows, c = cols, tl = top-left, br = bottom-right + r_tl = np.clip(np.arange(-half_w_h, im_h - half_w_h), 0, im_h - 1) + c_tl = np.clip(np.arange(-half_w_w, im_w - half_w_w), 0, im_w - 1) + mesh_tl = np.meshgrid(r_tl, c_tl, indexing="ij") + + r_br = np.clip(np.arange(rem_h, im_h + rem_h), 1, im_h - 1) + c_br = np.clip(np.arange(rem_w, im_w + rem_w), 1, im_w - 1) + mesh_br = np.meshgrid(r_br, c_br, indexing="ij") + + else: + # ------------------------------ + # interior border + # ------------------------------ + # data at the border is trim + # -ed and used for padding + # +---------------+ + # | +-----------+ | + # | |///////////| | + # | |///////////| | + # | |///////////| | + # | +-----------+ | + # +---------------+ + # + # "//" represents the effective + # FSS computation region + # ------------------------------ + im_h = fcst_partial_sums.shape[0] - self.window_size[0] + im_w = fcst_partial_sums.shape[1] - self.window_size[1] + mesh_tl = np.mgrid[0:im_h, 0:im_w] + mesh_br = (mesh_tl[0] + self.window_size[0], mesh_tl[1] + self.window_size[1]) + + # ---------------------------------- + # Computing window area from sum + # area table + # ---------------------------------- + # A B + # tl.0,tl.1 tl.0,br.1 + # +----------+ + # | | + # | | + # | | + # +----------+ + # br.0,tl.1 br.0,br.1 + # C D + # + # area = D - B - C + A + # ----------------------------------- + + obs_a = obs_partial_sums[mesh_tl[0], mesh_tl[1]] + obs_b = obs_partial_sums[mesh_tl[0], mesh_br[1]] + obs_c = obs_partial_sums[mesh_br[0], mesh_tl[1]] + obs_d = obs_partial_sums[mesh_br[0], mesh_br[1]] + self._obs_img = obs_d - obs_b - obs_c + obs_a + + fcst_a = fcst_partial_sums[mesh_tl[0], mesh_tl[1]] + fcst_b = fcst_partial_sums[mesh_tl[0], mesh_br[1]] + fcst_c = fcst_partial_sums[mesh_br[0], mesh_tl[1]] + fcst_d = fcst_partial_sums[mesh_br[0], mesh_br[1]] + self._fcst_img = fcst_d - fcst_b - fcst_c + fcst_a + + return self + + def _compute_fss_components(self) -> FssDecomposed: + diff = np.nanmean(np.power(self._obs_img - self._fcst_img, 2)) + fcst = np.nanmean(np.power(self._fcst_img, 2)) + obs = np.nanmean(np.power(self._obs_img, 2)) + return np.void((fcst, obs, diff), dtype=f8x3) diff --git a/src/scores/fast/fss/typing.py b/src/scores/fast/fss/typing.py new file mode 100644 index 00000000..ef4e89c5 --- /dev/null +++ b/src/scores/fast/fss/typing.py @@ -0,0 +1,31 @@ +""" +Specific type definitions for Fss backend +""" +from enum import Enum +from typing import TYPE_CHECKING, Union + +import numpy as np +import numpy.typing as npt + +# Note: soft keyword `type` only support from >=3.10 +# "tuple" of 64 bit floats +f8x3 = np.dtype("f8, f8, f8") +# Note: `TypeAlias` on variables not available for python <3.10 +if TYPE_CHECKING: # pragma: no cover + FssDecomposed = Union[np.ArrayLike, np.DtypeLike] +else: # pragma: no cover + FssDecomposed = npt.NDArray[f8x3] + + +class FssComputeMethod(Enum): + """ + Choice of compute backend for FSS. + - Standard + - NUMPY (default) + - Optimized + - NUMBA (currently unavailable) + """ + + INVALID = -1 + NUMPY = 1 # (default) + NUMBA = 2 diff --git a/src/scores/pandas/continuous.py b/src/scores/pandas/continuous.py index e2d521ae..19c2659c 100644 --- a/src/scores/pandas/continuous.py +++ b/src/scores/pandas/continuous.py @@ -46,10 +46,10 @@ def rmse( ) -> PandasType: """Calculate the Root Mean Squared Error from xarray or pandas objects. - A detailed explanation is on [Wikipedia](https://en.wikipedia.org/wiki/Root-mean-square_deviation) + A detailed explanation is on https://en.wikipedia.org/wiki/Root-mean-square_deviation - Dimensional reduction is not supported for pandas and the user should - convert their data to xarray to formulate the call to the base metric, `scores.continuous.rmse`. + .. math :: + \\sqrt{\\frac{1}{n} \\sum_{i=1}^n (\\text{forecast}_i - \\text{observed}_i)^2} Args: fcst: Forecast or predicted variables in pandas. @@ -60,6 +60,10 @@ def rmse( accounts for circularity. Angular `fcst` and `obs` data should be in degrees rather than radians. + Notes: + Dimensional reduction is not supported for pandas and users wishing this extra functionality + should convert their data to xarray to formulate the call to `scores.continuous.rmse`. + Returns: An object containing a single floating point number representing the root mean squared @@ -77,11 +81,13 @@ def mae( ) -> PandasType: """Calculates the mean absolute error from forecast and observed data. - A detailed explanation is on [Wikipedia](https://en.wikipedia.org/wiki/Mean_absolute_error) + A detailed explanation is on https://en.wikipedia.org/wiki/Mean_absolute_error + .. math :: + \\frac{1}{n} \\sum_{i=1}^n | \\text{forecast}_i - \\text{observed}_i | Dimensional reduction is not supported for pandas and the user should - convert their data to xarray to formulate the call to the base metric, `scores.continuous.mae`. + convert their data to xarray to formulate the call to `scores.continuous.mae`. Args: fcst: Forecast or predicted variables in pandas. diff --git a/src/scores/probability/crps_impl.py b/src/scores/probability/crps_impl.py index caf04958..57a2eddd 100644 --- a/src/scores/probability/crps_impl.py +++ b/src/scores/probability/crps_impl.py @@ -174,35 +174,39 @@ def crps_cdf( is used as an index for predictive CDF values. Various techniques are used to interpolate CDF values between indexed thresholds. - Given - - a predictive CDF `fcst` indexed at thresholds by variable x, - - an observation in CDF form `obs_cdf` (i.e., obs_cdf(x) = 0 if x < obs and 1 if x >= obs), - - a `threshold_weight` array indexed by variable x, + Given: + - a predictive CDF `fcst` indexed at thresholds by variable x + - an observation in CDF form `obs_cdf` (i.e., :math:`\\text{obs_cdf}(x) = 0` if :math:`x < \\text{obs}` and 1 if :math:`x >= \\text{obs}`) + - a `threshold_weight` array indexed by variable x The threshold-weighted CRPS is given by: - `CRPS = integral(threshold_weight(x) * (fcst(x) - obs_cdf(x))**2)`, over all thresholds x. - The usual CRPS is the threshold-weighted CRPS with `threshold_weight(x) = 1` for all x. - This can be decomposed into an over-forecast penalty - `integral(threshold_weight(x) * (fcst(x) - obs_cdf(x))**2)`, over all thresholds x where x >= obs - and an under-forecast penalty - `integral(threshold_weight(x) * (fcst(x) - obs_cdf(x))**2)`, over all thresholds x where x <= obs. + - :math:`twCRPS = \\int_{-\\infty}^{\\infty}{[\\text{threshold_weight}(x) \\times (\\text{fcst}(x) - \\text{obs_cdf}(x))^2]\\text{d}x}`, over all thresholds x. + - The usual CRPS is the threshold-weighted CRPS with :math:`\\text{threshold_weight}(x) = 1` for all x. + + This can be decomposed into an over-forecast penalty: + :math:`\\int_{-\\infty}^{\\infty}{[\\text{threshold_weight}(x) \\times \\text{fcst}(x) - \\text{obs_cdf}(x))^2]\\text{d}x}`, over all thresholds x where x >= obs + + and an under-forecast penalty: + :math:`\\int_{-\\infty}^{\\infty}{[\\text{threshold_weight}(x) \\times \\text{(fcst}(x) - \\text{obs_cdf}(x)^2]\\text{d}x}`, over all thresholds x where x <= obs. Note that the function `crps_cdf` is designed so that the `obs` argument contains actual observed values. `crps_cdf` will convert `obs` into CDF form in order to calculate the CRPS. - To calculate CRPS, integration is applied over the set of thresholds x taken from + To calculate CRPS, integration is applied over the set of thresholds x taken from: - `fcst[threshold_dim].values`, - `obs.values`. - `threshold_weight[threshold_dim].values` if applicable. - `additional_thresholds` if applicable. - With NaN values excluded. There are two methods of integration: - - "exact" gives the exact integral under that assumption that that `fcst` is - continuous and piecewise linear between its specified values, and that - `threshold_weight` (if supplied) is piecewise constant and right-continuous + - (with NaN values excluded) + + There are two methods of integration: + - "exact" gives the exact integral under that assumption that that `fcst` is + continuous and piecewise linear between its specified values, and that + `threshold_weight` (if supplied) is piecewise constant and right-continuous between its specified values. - - "trapz" simply uses a trapezoidal rule using the specified values, and so is - an approximation of the CRPS. To get an accurate approximation, the density + - "trapz" simply uses a trapezoidal rule using the specified values, and so is + an approximation of the CRPS. To get an accurate approximation, the density of threshold values can be increased by supplying `additional_thresholds`. Both methods of calculating CRPS may require adding additional values to the @@ -219,35 +223,38 @@ def crps_cdf( fcst: array of forecast CDFs, with the threshold dimension given by `threshold_dim`. obs: array of observations, not in CDF form. threshold_dim: name of the dimension in `fcst` that indexes the thresholds. - threshold_weight: weight to be applied along `threshold_dim` to calculat - threshold-weighted CRPS. Must contain `threshold_dim` as a dimension, and may - also include other dimensions from `fcst`. If `weight=None`, a weight of 1 + threshold_weight: weight to be applied along `threshold_dim` to calculate + threshold-weighted CRPS. Must contain `threshold_dim` as a dimension, and may + also include other dimensions from `fcst`. If `weight=None`, a weight of 1 is applied everywhere, which gives the usual CRPS. - additional_thresholds: additional thresholds values to add to `fcst` and (if + additional_thresholds: additional thresholds values to add to `fcst` and (if applicable) `threshold_weight` prior to calculating CRPS. - propagate_nans: If `True`, propagate NaN values along `threshold_dim` in `fcst` - and `threshold_weight` prior to calculating CRPS. This will result in CRPS - being NaN for these cases. If `False`, NaN values in `fcst` and `weight` will - be replaced, wherever possible, with non-NaN values using the fill method + propagate_nans: If `True`, propagate NaN values along `threshold_dim` in `fcst` + and `threshold_weight` prior to calculating CRPS. This will result in CRPS + being NaN for these cases. If `False`, NaN values in `fcst` and `weight` will + be replaced, wherever possible, with non-NaN values using the fill method specified by `fcst_fill_method` and `threshold_weight_fill_method`. - fcst_fill_method: how to fill values in `fcst` when NaNs have been introduced - (by including additional thresholds) or are specified to be removed (by - setting `propagate_nans=False`). Select one of: - - "linear": use linear interpolation, then replace any leading or - trailing NaNs using linear extrapolation. Afterwards, all values are - clipped to the closed interval [0, 1]. - - "step": apply forward filling, then replace any leading NaNs with 0. - - "forward": first apply forward filling, then remove any leading NaNs by - back filling. - - "backward": first apply back filling, then remove any trailing NaNs by - forward filling. - In most cases, "linear" is likely the appropriate choice. + fcst_fill_method: how to fill values in `fcst` when NaNs have been introduced + (by including additional thresholds) or are specified to be removed (by + setting `propagate_nans=False`). Select one of: + + - "linear": use linear interpolation, then replace any leading or \ + trailing NaNs using linear extrapolation. Afterwards, all values are \ + clipped to the closed interval [0, 1]. + - "step": apply forward filling, then replace any leading NaNs with 0. + - "forward": first apply forward filling, then remove any leading NaNs by \ + back filling. + - "backward": first apply back filling, then remove any trailing NaNs by \ + forward filling. + - (In most cases, "linear" is likely the appropriate choice.) + threshold_weight_fill_method: how to fill values in `threshold_weight` when NaNs have been introduced (by including additional thresholds) or are specified - to be removed (by setting `propagate_nans=False`). Select one of "linear", - "step", "forward" or "backward". If the weight function is continuous, - "linear" is probably the best choice. If it is an increasing step function, + to be removed (by setting `propagate_nans=False`). Select one of "linear", + "step", "forward" or "backward". If the weight function is continuous, + "linear" is probably the best choice. If it is an increasing step function, "forward" may be best. + integration_method (str): one of "exact" or "trapz". preserve_dims (Tuple[str]): dimensions to preserve in the output. All other dimensions are collapsed by taking the mean. @@ -288,11 +295,11 @@ def crps_cdf( - `scores.probability.crps_for_ensemble` References: - - Matheson, J. E., and R. L. Winkler, 1976: Scoring rules for continuous probability distributions. - Manage. Sci.,22, 1087–1095. - - Gneiting, T., & Ranjan, R. (2011). Comparing Density Forecasts Using Threshold- and - Quantile-Weighted Scoring Rules. - Journal of Business & Economic Statistics, 29(3), 411–422. http://www.jstor.org/stable/23243806 + - Matheson, J. E., and R. L. Winkler, 1976: Scoring rules for continuous probability distributions. \ + Management Science, 22(10), 1087–1095. https://doi.org/10.1287/mnsc.22.10.1087 + - Gneiting, T., & Ranjan, R. (2011). Comparing Density Forecasts Using Threshold- and \ + Quantile-Weighted Scoring Rules. \ + Journal of Business & Economic Statistics, 29(3), 411–422. https://doi.org/10.1198/jbes.2010.08110 """ dims = scores.utils.gather_dimensions( @@ -454,21 +461,22 @@ def crps_cdf_brier_decomposition( fcst (xr.DataArray): DataArray of CDF values with threshold dimension `threshold_dim`. obs (xr.DataArray): DataArray of observations, not in CDF form. threshold_dim (str): name of the threshold dimension in `fcst`. - additional_thresholds (Optional[Iterable[float]]): additional thresholds + additional_thresholds (Optional[Iterable[float]]): additional thresholds \ at which to calculate the mean Brier score. fcst_fill_method (Literal["linear", "step", "forward", "backward"]): How to fill NaN values in `fcst` that arise from new user-supplied thresholds or thresholds derived from observations. - - "linear": use linear interpolation, and if needed also extrapolate linearly. - Clip to 0 and 1. Needs at least two non-NaN values for interpolation, + + - "linear": use linear interpolation, and if needed also extrapolate linearly. \ + Clip to 0 and 1. Needs at least two non-NaN values for interpolation, \ so returns NaNs where this condition fails. - - "step": use forward filling then set remaining leading NaNs to 0. + - "step": use forward filling then set remaining leading NaNs to 0. \ Produces a step function CDF (i.e. piecewise constant). - - "forward": use forward filling then fill any remaining leading NaNs with + - "forward": use forward filling then fill any remaining leading NaNs with \ backward filling. - - "backward": use backward filling then fill any remaining trailing NaNs with + - "backward": use backward filling then fill any remaining trailing NaNs with \ forward filling. - dims: dimensions to preserve in the output. The dimension `threshold_dim` is always + dims: dimensions to preserve in the output. The dimension `threshold_dim` is always \ preserved, even if not specified here. Returns: @@ -566,13 +574,17 @@ def adjust_fcst_for_crps( ) -> xr.DataArray: """ This function takes a forecast cumulative distribution functions (CDF) `fcst`. + If `fcst` is not decreasing outside of specified tolerance, it returns `fcst`. - Otherwise, the CDF envelope for `fcst` is computed, and the CDF from among + + Otherwise, the CDF envelope for `fcst` is computed, and the CDF from among: - `fcst`, - the upper envelope, and - the lower envelope + that has the higher (i.e. worse) CRPS is returned. In the event of a tie, preference is given in the order `fcst` then upper. + See `scores.probability.functions.cdf_envelope` for details about the CDF envelope. The use case for this is when, either due to rounding or poor forecast process, the @@ -582,21 +594,26 @@ def adjust_fcst_for_crps( Whether a CDF is decreasing outside specified tolerance is determined as follows. For each CDF in `fcst`, the sum of incremental decreases along the threshold dimension - is calculated. For example, if the CDF values are - [0, 0.4, 0.3, 0.9, 0.88, 1] + is calculated. For example, if the CDF values are + + `[0, 0.4, 0.3, 0.9, 0.88, 1]` + then the sum of incremental decreases is -0.12. This CDF decreases outside specified tolerance if 0.12 > `decreasing_tolerance`. The adjusted array of forecast CDFs is determined as follows: - - any NaN values in `fcst` are propagated along `threshold_dim` so that in each case + - any NaN values in `fcst` are propagated along `threshold_dim` so that in each case \ the entire CDF is NaN; - any CDFs in `fcst` that are decreasing within specified tolerance are unchanged; - - any CDFs in `fcst` that are decreasing outside specified tolerance are replaced with - whichever of the upper or lower CDF envelope gives the highest CRPS, unless the original + - any CDFs in `fcst` that are decreasing outside specified tolerance are replaced with \ + whichever of the upper or lower CDF envelope gives the highest CRPS, unless the original \ values give a higher CRPS in which case original values are kept. + See `scores.probability.functions.cdf_envelope` for a description of the 'CDF envelope'. + If propagating NaNs is not desired, the user may first fill NaNs in `fcst` using `scores.probability.functions.fill_cdf`. + The CRPS for each forecast case is calculated using `crps`, with a weight of 1. Args: @@ -604,8 +621,7 @@ def adjust_fcst_for_crps( threshold_dim: name of the threshold dimension in `fcst`. obs: DataArray of observations. decreasing_tolerance: nonnegative tolerance value. - additional_thresholds: optional additional thresholds passed on to `crps` when - calculating CRPS. + additional_thresholds: optional additional thresholds passed on to `crps` when calculating CRPS. fcst_fill_method: `fcst` fill method passed on to `crps` when calculating CRPS. integration_method: integration method passed on to `crps` when calculating CRPS. @@ -765,22 +781,26 @@ def crps_for_ensemble( An ensemble of forecasts can also be thought of as a random sample from the predictive distribution. - Given an observation y, and ensemble member values {x_i} (for 1 <= i <= M), the CRPS is + Given an observation y, and ensemble member values :math:`x_i` (for 1 <= i <= M), the CRPS is calculated by the formula - CRPS({x_i}, y) = (1 / M) * sum(|x_i - y|) - (1 / 2 * K) * sum(|x_i - x_j|), + + + .. math:: + CRPS(x_i, y) = (1 / M) * sum(|x_i - y|) - (1 / 2 * K) * sum(|x_i - x_j|) + where the first sum is iterated over 1 <= i <= M and the second sum is iterated over 1 <= i <= M and 1 <= j <= M. - The value of the constant K in this formula depends on the method. - - If `method="ecdf"` then K = M ** 2. In this case the CRPS value returned is - the exact CRPS value for the emprical cumulation distribution function + The value of the constant K in this formula depends on the method: + - If `method="ecdf"` then :math:`K = M ** 2`. In this case the CRPS value returned is \ + the exact CRPS value for the emprical cumulation distribution function \ constructed using the ensemble values. - - If `method="fair"` then K = M * (M - 1). In this case the CRPS value returned - is the approximated CRPS where the ensemble values can be interpreted as a - random sample from the underlying predictive distribution. This interpretation - stems from the formula CRPS(F, Y) = E|X - Y| - E|X - X'|/2, where X and X' - are independent samples of the predictive distribution F, Y is the observation - (possibly unknown) and E denotes the expectation. This choice of K gives an + - If `method="fair"` then :math:`K = M * (M - 1)`. In this case the CRPS value returned \ + is the approximated CRPS where the ensemble values can be interpreted as a \ + random sample from the underlying predictive distribution. This interpretation \ + stems from the formula :math:`\\text{CRPS}(F, Y) = E|X - Y| - E|X - X'|/2`, where X and X' \ + are independent samples of the predictive distribution F, Y is the observation \ + (possibly unknown) and E denotes the expectation. This choice of K gives an \ unbiased estimate for the second expectation. Args: @@ -803,13 +823,14 @@ def crps_for_ensemble( `scores.probability.crps_cdf` References: - - C. Ferro (2014), "Fair scores for ensemble forecasts", Q J R Meteorol Soc - 140(683):1917-1923. - - T. Gneiting T and A. Raftery (2007), "Strictly proper scoring rules, prediction, - and estimation", J Am Stat Assoc, 102(477):359-37. - - M. Zamo and P. Naveau (2018), "Estimation of the Continuous Ranked Probability - Score with Limited Information and Applications to Ensemble Weather Forecasts", - Math Geosci 50:209-234, https://doi.org/10.1007/s11004-017-9709-7 + - C. Ferro (2014), "Fair scores for ensemble forecasts", Quarterly Journal of the \ + Royal Meteorol Society, 140(683):1917-1923. https://doi.org/10.1002/qj.2270 + - T. Gneiting T and A. Raftery (2007), "Strictly proper scoring rules, prediction, \ + and estimation", Journal of the American Statistical Association, 102(477):359-378. \ + https://doi.org/10.1198/016214506000001437 + - M. Zamo and P. Naveau (2018), "Estimation of the Continuous Ranked Probability \ + Score with Limited Information and Applications to Ensemble Weather Forecasts", \ + Mathematical Geosciences 50:209-234, https://doi.org/10.1007/s11004-017-9709-7 """ if method not in ["ecdf", "fair"]: raise ValueError("`method` must be one of 'ecdf' or 'fair'") diff --git a/src/scores/processing/cdf/cdf_functions.py b/src/scores/processing/cdf/cdf_functions.py index 54945eb4..c4290dda 100644 --- a/src/scores/processing/cdf/cdf_functions.py +++ b/src/scores/processing/cdf/cdf_functions.py @@ -151,13 +151,11 @@ def observed_cdf( def integrate_square_piecewise_linear(function_values: xr.DataArray, threshold_dim: str) -> xr.DataArray: """Calculates integral values and collapses `threshold_dim`. - Calculates integral(F(t)^2), where - - If t in a threshold value in `threshold_dim` then F(t) is in `function_values`, + Calculates :math:`\\int{(F(t)^2)}`, where: + - If t is a threshold value in `threshold_dim` then F(t) is in `function_values`, - F is piecewise linear between each of the t values in `threshold_dim`. - Returns value of the integral with `threshold_dim` collapsed and other dimensions preserved. - Returns NaN if there are less than two non-NaN function_values. - This function assumes that + This function assumes that: - `threshold_dim` is a dimension of `function_values` - coordinates of `threshold_dim` are increasing. @@ -166,7 +164,12 @@ def integrate_square_piecewise_linear(function_values: xr.DataArray, threshold_d threshold_dim (xr.DataArray): dimension along which to integrate. Returns: - xr.DataArray: Integral values and `threshold_dim` collapsed. + + xr.DataArray: Integral values and `threshold_dim` collapsed: + + - Returns value of the integral with `threshold_dim` collapsed and other dimensions preserved. + - Returns NaN if there are less than two non-NaN function_values. + """ # notation: Since F is piecewise linear we have @@ -240,15 +243,17 @@ def fill_cdf( method: Literal["linear", "step", "forward", "backward"], min_nonnan: int, ) -> xr.DataArray: - """Fills NaNs in a CDF of a real-valued random variable along `threshold_dim` with appropriate values between 0 and 1. + """ + Fills NaNs in a CDF of a real-valued random variable along `threshold_dim` with appropriate values between 0 and 1. Args: cdf (xr.DataArray): CDF values, where P(Y <= threshold) = cdf_value for each threshold in `threshold_dim`. threshold_dim (str): the threshold dimension in the CDF, along which filling is performed. - method (Literal["linear", "step", "forward", "backward"]): one of - - "linear": use linear interpolation, and if needed also extrapolate linearly. Clip to 0 and 1. + method (Literal["linear", "step", "forward", "backward"]): one of: + + - "linear": use linear interpolation, and if needed also extrapolate linearly. Clip to 0 and 1. \ Needs at least two non-NaN values for interpolation, so returns NaNs where this condition fails. - - "step": use forward filling then set remaining leading NaNs to 0. + - "step": use forward filling then set remaining leading NaNs to 0. \ Produces a step function CDF (i.e. piecewise constant). - "forward": use forward filling then fill any remaining leading NaNs with backward filling. - "backward": use backward filling then fill any remaining trailing NaNs with forward filling. @@ -310,8 +315,8 @@ def decreasing_cdfs(cdf: xr.DataArray, threshold_dim: str, tolerance: float) -> This is sometimes violated due to rounding issues or bad forecast process. `decreasing_cdfs` checks CDF values decrease beyond specified tolerance; that is, whenever the sum of the incremental decreases exceeds tolerarance. - For example, if the CDF values are - [0, 0.4, 0.3, 0.9, 0.88, 1] + + For example, if the CDF values are `[0, 0.4, 0.3, 0.9, 0.88, 1]` then the sum of incremental decreases is -0.12. Given a specified positive `tolerance`, the CDF values decrease beyond tolerance if the sum of incremental decreases < -`tolerance`. @@ -358,9 +363,10 @@ def cdf_envelope( The following example shows values from an original CDF that has a decreasing subsequence (and so is not a true CDF). The resulting "upper" and "lower" CDFs minimally adjust "original" so that "lower" <= "original" <= "upper". - "original": [0, .5, .2, .8, 1] - "upper": [0, .5, .5, .8, 1] - "lower": [0, .2, .2, .8, 1] + + - "original": [0, .5, .2, .8, 1] + - "upper": [0, .5, .5, .8, 1] + - "lower": [0, .2, .2, .8, 1] This function does not perform checks that `0 <= cdf <= 1`. @@ -369,13 +375,15 @@ def cdf_envelope( threshold_dim (str): dimension in fcst_cdf that contains the threshold ordinates. Returns: - An xarray DataArray consisting of three CDF arrays indexed along the `"cdf_type"` dimension + xr.DataArray: An xarray DataArray consisting of three CDF arrays indexed along the `"cdf_type"` dimension with the following indices: + - "original": same data as `cdf`. - - "upper": minimally adjusted "original" CDF that is nondecreasing and + - "upper": minimally adjusted "original" CDF that is nondecreasing and \ satisfies "upper" >= "original". - - "lower": minimally adjusted "original" CDF that is nondecreasing and + - "lower": minimally adjusted "original" CDF that is nondecreasing and \ satisfies "lower" <= "original". + NaN values in `cdf` are maintained in "original", "upper" and "lower". Raises: diff --git a/src/scores/processing/discretise.py b/src/scores/processing/discretise.py index f68ee507..25de0ac0 100644 --- a/src/scores/processing/discretise.py +++ b/src/scores/processing/discretise.py @@ -113,17 +113,17 @@ def binary_discretise( for a value to fall in the 'event' category (i.e. assigned to 1). Allowed modes are: - - '>=' values in `data` greater than or equal to the + - '>=' values in `data` greater than or equal to the \ corresponding threshold are assigned as 1. - - '>' values in `data` greater than the corresponding threshold + - '>' values in `data` greater than the corresponding threshold \ are assigned as 1. - - '<=' values in `data` less than or equal to the corresponding + - '<=' values in `data` less than or equal to the corresponding \ threshold are assigned as 1. - - '<' values in `data` less than the corresponding threshold + - '<' values in `data` less than the corresponding threshold \ are assigned as 1. - - '==' values in `data` equal to the corresponding threshold + - '==' values in `data` equal to the corresponding threshold \ are assigned as 1 - - '!=' values in `data` not equal to the corresponding threshold + - '!=' values in `data` not equal to the corresponding threshold \ are assigned as 1. abs_tolerance: If supplied, values in data that are diff --git a/src/scores/spatial/__init__.py b/src/scores/spatial/__init__.py new file mode 100644 index 00000000..b4f5dd25 --- /dev/null +++ b/src/scores/spatial/__init__.py @@ -0,0 +1,7 @@ +""" +Import the functions from the implementations into the public API +""" + +from scores.spatial.fss_impl import fss_2d, fss_2d_binary, fss_2d_single_field + +__all__ = ["fss_2d", "fss_2d_binary", "fss_2d_single_field"] diff --git a/src/scores/spatial/fss_impl.py b/src/scores/spatial/fss_impl.py new file mode 100644 index 00000000..e8d29468 --- /dev/null +++ b/src/scores/spatial/fss_impl.py @@ -0,0 +1,386 @@ +""" +This module contains methods related to the Fractions Skill Score (FSS). + +For an explanation of the FSS, and implementation considerations, see references below. + +Currently uses the default score defined Robert and Leans (2008) :sup:`[1,2]`. Fine grained controls are considered in `#353 `_. + +The default computation is performed using`numpy` :sup:`[3]` and summed-area tables, with future optimization options considered in: +- `#269 `_. +- `#270 `_. + +References: +1. Roberts, N. M., and H. W. Lean, 2008: Scale-Selective Verification of Rainfall Accumulations from + High-Resolution Forecasts of Convective Events. Monthly Weather Review, 136, 78–97, + https://doi.org/10.1175/2007mwr2123.1. +2. Mittermaier, M. P., 2021: A “Meta” Analysis of the Fractions Skill Score: The Limiting Case and + Implications for Aggregation. Monthly Weather Review, 149, 3491–3504, + https://doi.org/10.1175/mwr-d-18-0106.1. +3. FAGGIAN, N., B. ROUX, P. STEINLE, and B. EBERT, 2015: Fast calculation of the fractions skill + score. MAUSAM, 66, 457–466, https://doi.org/10.54302/mausam.v66i3.555. + +.. _GITHUB269: https://github.com/nci/scores/issues/269 +.. _GITHUB270: https://github.com/nci/scores/issues/270 +.. _GITHUB353: https://github.com/nci/scores/issues/353 +""" +import warnings +from typing import Callable, Optional, Tuple + +import numpy as np +import numpy.typing as npt +import xarray as xr + +from scores.fast.fss.fss_backends import get_compute_backend +from scores.fast.fss.typing import FssComputeMethod, FssDecomposed +from scores.typing import FlexibleDimensionTypes, XarrayLike +from scores.utils import ( + DimensionError, + DimensionWarning, + FieldTypeError, + NumpyThresholdOperator, + gather_dimensions, + left_identity_operator, +) + + +def fss_2d( # pylint: disable=too-many-locals,too-many-arguments + fcst: xr.DataArray, + obs: xr.DataArray, + *, # Force keywords arguments to be keyword-only + event_threshold: np.float64, + window_size: Tuple[int, int], + spatial_dims: Tuple[str, str], + zero_padding: bool = False, + reduce_dims: Optional[FlexibleDimensionTypes] = None, + preserve_dims: Optional[FlexibleDimensionTypes] = None, + threshold_operator: Callable = np.greater, + compute_method: FssComputeMethod = FssComputeMethod.NUMPY, + dask: str = "forbidden", # see: `xarray.apply_ufunc` for options +) -> xr.DataArray: + """ + Uses :py:func:`fss_2d_single_field` to compute the Fractions Skill Score (FSS) for each 2D + spatial field in the DataArray and then aggregates them over the output of gather dimensions. + + The aggregation method is the intended extension of the score defined by Robert and Leans (2008) + for multiple forecasts :sup:`[1,2]` + + .. note:: + This method takes in a ``threshold_operator`` to compare the input + fields against the ``event_threshold`` which defaults to ``numpy.greater``, + and is compatible with lightweight numpy operators. If you would + like to do more advanced binary discretization consider doing this + separately and using :py:func:`scores.spatial.fss_2d_binary` instead, which takes in a + pre-discretized binary field. + + Optionally aggregates the output along other dimensions if `reduce_dims` or + (mutually exclusive) ``preserve_dims`` are specified. + + For implementation for a single 2-D field, + see: :py:func:`fss_2d_single_field`. This offers the user the ability to use + their own aggregation methods, rather than ``xarray.ufunc`` + + .. warning:: + ``dask`` option is not fully tested + + Args: + fcst: An array of forecasts + obs: An array of observations (same spatial shape as ``fcst``) + event_threshold: A scalar to compare ``fcst`` and ``obs`` fields to generate a + binary "event" field. + window_size: A pair of positive integers ``(height, width)`` of the sliding + window_size; the window size must be greater than 0 and fit within + the shape of ``obs`` and ``fcst``. + spatial_dims: A pair of dimension names ``(x, y)`` where ``x`` and ``y`` are + the spatial dimensions to slide the window along to compute the FSS. + e.g. ``("lat", "lon")``. + zero_padding: If set to true, applies a 0-valued window border around the + data field before computing the FSS. If set to false (default), it + uses the edges (thickness = window size) of the input fields as the border. + One can think of it as: + - zero_padding = False => inner border + - zero_padding = True => outer border with 0 values + reduce_dims: Optional set of additional dimensions to aggregate over + (mutually exclusive to ``preserve_dims``). + preserve_dims: Optional set of dimensions to keep (all other dimensions + are reduced); By default all dimensions except ``spatial_dims`` + are preserved (mutually exclusive to ``reduce_dims``). + threshold_operator: The threshold operator used to generate the binary + event field. E.g. ``np.greater``. Note: this may depend on the backend + ``compute method`` and not all operators may be supported. Generally, + ``np.greater``, ``np.less`` and their counterparts. Defaults to "np.greater". + compute_method: currently only supports :py:obj:`FssComputeMethod.NUMPY` + see: :py:class:`scores.fast.fss.typing.FssComputeMethod` + dask: See ``xarray.apply_ufunc`` for options + + Returns: + An ``xarray.DataArray`` containing the FSS computed over the ``spatial_dims`` + + The resultant array will have the score grouped against the remaining + dimensions, unless ``reduce_dims``/``preserve_dims`` are specified; in which + case, they will be aggregated over the specified dimensions accordingly. + + For an exact usage please refer to ``FSS.ipynb`` in the tutorials. + + Raises: + DimensionError: Various errors are thrown if the input dimensions + do not conform. e.g. if the window size is larger than the input + arrays, or if the the spatial dimensions in the args are missing in + the input arrays. + + DimensionWarning: If ``spatial_dims`` are attempting to be preserved e.g. in ``preserve_dims`` + + References: + 1. Roberts, N. M., and H. W. Lean, 2008: Scale-Selective Verification of Rainfall + Accumulations from High-Resolution Forecasts of Convective Events. Monthly Weather + Review, 136, 78–97, https://doi.org/10.1175/2007mwr2123.1. + 2. Mittermaier, M. P., 2021: A “Meta” Analysis of the Fractions Skill Score: The Limiting + Case and Implications for Aggregation. Monthly Weather Review, 149, 3491–3504, + https://doi.org/10.1175/mwr-d-18-0106.1. + """ + + def _spatial_dims_exist(_dims): + s_spatial_dims = set(spatial_dims) + s_dims = set(_dims) + return ( + len(s_spatial_dims - s_dims) == 0 + and len(spatial_dims) == 2 # all spatial dims present # number of spatial dims = 2 + ) + + if not _spatial_dims_exist(fcst.dims): + raise DimensionError(f"missing spatial dims {spatial_dims} in fcst") + if not _spatial_dims_exist(obs.dims): + raise DimensionError(f"missing spatial dims {spatial_dims} in obs") + for s_dim in spatial_dims: + if obs[s_dim].shape != fcst[s_dim].shape: + raise DimensionError( + f"The spatial extent of fcst and obs do not match for the following spatial dimensions: {spatial_dims}." + ) + + # wrapper defined for convenience, since it's too long for a lambda. + def _fss_wrapper(da_fcst: xr.DataArray, da_obs: xr.DataArray) -> FssDecomposed: + fss_backend = get_compute_backend(compute_method) + fb_obj = fss_backend( + da_fcst, + da_obs, + event_threshold=event_threshold, + window_size=window_size, + zero_padding=zero_padding, + threshold_operator=NumpyThresholdOperator(threshold_operator), + ) + return fb_obj.compute_fss_decomposed() + + # apply ufunc to get the decomposed fractions skill score aggregated over + # the 2D spatial dims. + da_fss = xr.apply_ufunc( + _fss_wrapper, + fcst, + obs, + input_core_dims=[list(spatial_dims), list(spatial_dims)], + vectorize=True, + dask=dask, # pragma: no cover + ) + + # gather additional dimensions to aggregate over. + dims = gather_dimensions( + fcst.dims, + obs.dims, + reduce_dims=reduce_dims, + preserve_dims=preserve_dims, + ) + + if not (spatial_dims[0] in dims and spatial_dims[1] in dims): + # at least one spatial dim is trying to be preserved. + warnings.warn( + f"At least one of the provided spatial dims = {spatial_dims} is attempting " + "to be preserved. Please make sure `preserve_dims` and `reduce_dims` " + "are configured to not preserve `spatial_dims`, in order to suppress this " + "warning.", + DimensionWarning, + ) + + # trim any spatial dimensions as they shouldn't appear. + dims = set(dims) - set(spatial_dims) + + # apply ufunc again but this time to compute the fss, reducing + # any non-spatial dimensions. + da_fss = xr.apply_ufunc( + _aggregate_fss_decomposed, + da_fss, + input_core_dims=[list(dims)], + vectorize=True, + dask=dask, # pragma: no cover + ) + + return da_fss + + +def fss_2d_binary( # pylint: disable=too-many-locals,too-many-arguments + fcst: xr.DataArray, + obs: xr.DataArray, + *, # Force keywords arguments to be keyword-only + window_size: Tuple[int, int], + spatial_dims: Tuple[str, str], + zero_padding: bool = False, + reduce_dims: Optional[FlexibleDimensionTypes] = None, + preserve_dims: Optional[FlexibleDimensionTypes] = None, + compute_method: FssComputeMethod = FssComputeMethod.NUMPY, + check_boolean: bool = True, + dask: str = "forbidden", # see: `xarray.apply_ufunc` for options +) -> xr.DataArray: + """ + Computes the Fractions Skill Score (FSS) using a binary field. + + Takes in a binary (True or False) field of fcst and obs events. For example + the output of a binary threshold applied to a continuous field or an event + operator. + + Optionally the user can set ``check_boolean`` to ``True`` to check that the + field is boolean before any computations. Note: this asserts that the + underlying fields are of type ``np.bool_`` but does not coerce them. If + the user is confident that the input field is binary (but not necessarily + boolean), they may set this flag to ``False``, to allow for more flexible binary + configurations such as 0 and 1; this should give the same results. + + Uses ``fss_2d`` from the ``scores.spatial`` module to perform fss + computation, but without the threshold operation. + + .. seealso:: + :py:func:`scores.spatial.fss_2d` for more details and the continuous + version. As well as detailed argument definitions. + """ + + if check_boolean and not (fcst.dtype == np.bool_ and obs.dtype == np.bool_): + raise FieldTypeError("Input field is not boolean") + + # Note: this s a dummy value that will be discarded by the + # `left_identity_operator` mainly used to circumvent passing in "None" to + # threshold operators which can have undefined behaviour. + _phantom_event_threshold = -999 + + return fss_2d( # pylint: disable=too-many-locals,too-many-arguments + fcst, + obs, + event_threshold=_phantom_event_threshold, + window_size=window_size, + spatial_dims=spatial_dims, + zero_padding=zero_padding, + reduce_dims=reduce_dims, + preserve_dims=preserve_dims, + threshold_operator=left_identity_operator, + compute_method=compute_method, + dask=dask, # see: `xarray.apply_ufunc` for options + ) + + +def fss_2d_single_field( + fcst: npt.NDArray[np.float64], + obs: npt.NDArray[np.float64], + *, + event_threshold: np.float64, + window_size: Tuple[int, int], + zero_padding: bool = False, + threshold_operator: Callable = np.greater, + compute_method: FssComputeMethod = FssComputeMethod.NUMPY, +) -> np.float64: + """ + Calculates the Fractions Skill Score (FSS) :sup:`[1]` for a given forecast and observed + 2-D field. + + The FSS is computed by counting the squared sum of forecast and observation + events in a given window size. This is repeated over all possible window + positions that can fit in the input arrays. A common method to do this is + via a sliding window. + + While the various compute backends may have their own implementations; most + of them use some form of prefix sum algorithm to compute this quickly. + + For 2-D fields this data structure is known as the "Summed Area + Table" :sup:`[2]`. + + Once the squared sums are computed, the final FSS value can be derived by + accumulating the squared sums :sup:`[3]`. + + + The caller is responsible for making sure the input fields are in the 2-D + spatial domain. (Although it should work for any `np.array` as long as it's + 2D, and `window_size` is appropriately sized.) + + Args: + fcst: An array of forecasts + obs: An array of observations (same spatial shape as ``fcst``) + event_threshold: A scalar to compare ``fcst`` and ``obs`` fields to generate a + binary "event" field. + window_size: A pair of positive integers ``height, width)`` of the sliding + window; the window dimensions must be greater than 0 and fit within + the shape of ``obs`` and ``fcst``. + threshold_operator: The threshold operator used to generate the binary + event field. E.g. ``np.greater``. Note: this may depend on the backend + ``compute method`` and not all operators may be supported. Generally, + ``np.greater``, ``np.less`` and their counterparts. Defaults to "np.greater". + compute_method: currently only supports ``FssComputeMethod.NUMPY`` + see: :py:class:``FssComputeMethod`` + + Returns: + A float representing the accumulated FSS. + + Raises: + DimensionError: Various errors are thrown if the input dimensions + do not conform. e.g. if the window size is larger than the input + arrays, or if the the spatial dimensions of the input arrays do + not match. + + References: + 1. Roberts, N. M., and H. W. Lean, 2008: Scale-Selective Verification of Rainfall + Accumulations from High-Resolution Forecasts of Convective Events. Monthly Weather Review + 136, 78–97, https://doi.org/10.1175/2007mwr2123.1. + 2. https://en.wikipedia.org/wiki/Summed-area_table + 3. FAGGIAN, N., B. ROUX, P. STEINLE, and B. EBERT, 2015: Fast calculation of the fractions + skill score. MAUSAM, 66, 457–466, https://doi.org/10.54302/mausam.v66i3.555. + """ + fss_backend = get_compute_backend(compute_method) + fb_obj = fss_backend( + fcst, + obs, + window_size=window_size, + zero_padding=zero_padding, + event_threshold=event_threshold, + threshold_operator=NumpyThresholdOperator(threshold_operator), + ) + fss_score = fb_obj.compute_fss() + + return fss_score + + +def _aggregate_fss_decomposed(fss_d: FssDecomposed) -> np.float64: + """ + Aggregates the results of decomposed fss scores. + """ + # can't do ufuncs over custom void types currently... + fcst_sum = 0.0 + obs_sum = 0.0 + diff_sum = 0.0 + + if isinstance(fss_d, np.ndarray): + l = fss_d.size + + if l < 1: + return 0.0 + + with np.nditer(fss_d) as it: + for elem in it: + (fcst_, obs_, diff_) = elem.item() + fcst_sum += fcst_ / l + obs_sum += obs_ / l + diff_sum += diff_ / l + else: + (fcst_sum, obs_sum, diff_sum) = fss_d + + fss = 0.0 + denom = obs_sum + fcst_sum + + if denom > 0.0: + fss = 1.0 - diff_sum / denom + + fss_clamped = max(min(fss, 1.0), 0.0) + + return fss_clamped diff --git a/src/scores/utils.py b/src/scores/utils.py index d27c94a2..4f98860b 100644 --- a/src/scores/utils.py +++ b/src/scores/utils.py @@ -4,7 +4,8 @@ import warnings from collections.abc import Hashable, Iterable -from typing import Optional, Union +from dataclasses import dataclass, field +from typing import Callable, Dict, Generic, Optional, TypeVar, Union import numpy as np import pandas as pd @@ -39,13 +40,149 @@ """ -class DimensionError(Exception): +class DimensionError(ValueError): """ Custom exception used when attempting to operate over xarray DataArray or Dataset objects that do not have compatible dimensions. """ +class FieldTypeError(TypeError): + """ + Custom exception used when incompatible field types are used for a particular + operation. + """ + + +class DimensionWarning(UserWarning): + """ + Custom warning raised when dimensional arguments are ambiguous, but the + ambiguity is implicitly resolved by the underlying algorithm. The warning + can be nullified if the user resolves the ambiguity, by explicitly providing + supporting arguments. + + For example, in Fractions Skill Score (FSS) the user must provide spatial + dimensions used to compute the score. If the user also attempts to preserve + these dimensions, e.g. due to the default behaviour of `gather_dimensions`, + this will cause ambiguity. However, the underlying algorithm itself + necessitates that the spatial dimensions be reduced and hence takes + priority. This warning is raised to alert the user of this behaviour, as + well as actions that can be taken to suppress it. + """ + + +class CompatibilityError(ImportError): + """ + Custom exception used when attempting to access advanced functionality that + require extra/optional dependencies. + """ + + +T = TypeVar("T") + + +@dataclass +class BinaryOperator(Generic[T]): + """ + Generic datatype to represent binary operators. For specific event operators, + refer to ``scores.categorical.contingency.EventOperator``. + + Note: This operator should not be called directly or on every computation. + Rather, it is intended to perform validation before passing in the "real" + operator ``self.op`` via an unwrapping call to ``BinaryOperator.get`` + + Bad: + + .. code-block:: python + x = np.rand((1000, 1000)) + threshold = 0.5 + + for it in np.nditer(x): + # validation check is done every loop - BAD + numpy_op = NumpyThresholdOperator(np.less).get() + binary_item = numpy_op(it, threshold) + do_stuff(binary_item) + + Ok: + + .. code-block:: python + x = np.rand((1000, 1000)) + threshold = 0.5 + + # validation check is done one-time here - GOOD + numpy_op = NumpyThresholdOperator(np.less).get() + + for it in np.nditer(x): + binary_item = numpy_op(it, threshold) + do_stuff(binary_item) # some elementry processing function + + # EVEN BETTER: + # basic numpy operators are already vectorized so this will work fine. + binary_items = numpy_op(x, threshold) + vec_do_stuff(binary_items) # vectorized version + + Key takeaway - unwrap the operator using ``.get()`` as early as possible + """ + + op: Callable[[T, T], T] + valid_ops: Dict[str, Callable[[T, T], T]] = field(init=False) + + def __post_init__(self): + """ + Post initialization checks go here + """ + self._validate() + + def _validate(self): + """ + Default operator is valid + """ + # functions are objects so this should work in theory + if not self.op in self.valid_ops.values(): + # intentional list comprehension, for display reasons + raise ValueError( + "Invalid operator specified. Allowed operators: " + f"{[k for k in self.valid_ops.keys()]}" # pylint: disable=unnecessary-comprehension + ) + return self + + def get(self): + """ + Return the underlying operator + """ + return self.op + + +def left_identity_operator(x, _): + """ + A binary operator that takes in two inputs but only returns the first. + """ + return x + + +@dataclass +class NumpyThresholdOperator(BinaryOperator): + """ + Generic numpy threshold operator to avoid function call over-head, + for light-weight comparisons. For specific event operators, refer to + ``scores.processing.discretise`` + + Important: The input field must be the first operand and the threshold + should be the second operand otherwise some operators may have unintended + behaviour. + """ + + def __post_init__(self): + self.valid_ops = { + "numpy.greater": np.greater, + "numpy.greater_equal": np.greater_equal, + "numpy.less": np.less, + "numpy.less_equal": np.less_equal, + "scores.utils.left_identity_operator": left_identity_operator, + } + super().__post_init__() + + def gather_dimensions( # pylint: disable=too-many-branches fcst_dims: Iterable[Hashable], obs_dims: Iterable[Hashable], @@ -98,7 +235,7 @@ def gather_dimensions( # pylint: disable=too-many-branches if specified_dims == "all": if "all" in all_data_dims: - warnings.warn(WARN_ALL_DATA_CONFLICT_MSG) + warnings.warn(WARN_ALL_DATA_CONFLICT_MSG, DimensionWarning) elif specified_dims is not None: if isinstance(specified_dims, str): specified_dims = [specified_dims] diff --git a/tests/categorical/test_contingency.py b/tests/categorical/test_contingency.py index 4f62a30b..20e335d0 100644 --- a/tests/categorical/test_contingency.py +++ b/tests/categorical/test_contingency.py @@ -170,6 +170,8 @@ def test_categorical_table(): assert actual_table.sel(contingency="total_count") == 18 # Confirm calculations of metrics are correct + assert table.base_rate() == (9 + 1) / 18 + assert table.forecast_rate() == (9 + 2) / 18 assert table.accuracy() == (9 + 6) / 18 assert table.probability_of_detection() == 9 / (9 + 1) assert table.false_alarm_rate() == 2 / (2 + 6) @@ -185,6 +187,9 @@ def test_categorical_table(): assert table.heidke_skill_score() == (9 + 6 - (83 / 9)) / (18 - (83 / 9)) assert table.odds_ratio() == (9 / (9 + 1)) / (1 - (9 / (9 + 1))) / ((2 / (6 + 2)) / (1 - (2 / (6 + 2)))) assert table.odds_ratio_skill_score() == (9 * 6 - 1 * 2) / (9 * 6 + 1 * 2) + assert table.symmetric_extremal_dependence_index() == (np.log(0.25) - np.log(0.9) + np.log(0.1) - np.log(0.75)) / ( + np.log(0.25) + np.log(0.9) + np.log(0.1) + np.log(0.75) + ) # These methods are redirects to each other assert table.critical_success_index() == table.threat_score() @@ -216,19 +221,48 @@ def test_dimension_broadcasting(): should be compared to the observation, and the final accuracy should have an accuracy score at each time. - The actual calculated accuracy at each time is not tested here, this test is about - dimension handling not score calculation maths. + In this example, the same forecast values are repeated for each of 5 lead times + A single observations array is provided + Broadcasting rules mean the accuracy should be calculated at each lead time, + comparing the observations against each lead time """ base_forecasts = [simple_forecast + i * 0.5 for i in range(5)] complex_forecast = xr.concat(base_forecasts, dim="time") + complex_obs = xr.concat([simple_obs, simple_obs + 1], dim="source") match = scores.categorical.ThresholdEventOperator(default_event_threshold=1.3, default_op_fn=operator.gt) + + # Check dimension broadcasting for forecasts table = match.make_contingency_manager(complex_forecast, simple_obs) withtime = table.transform(preserve_dims="time") accuracy = withtime.accuracy() assert accuracy.dims == ("time",) assert len(accuracy.time) == 5 + assert accuracy[0] == (9 + 6) / 18 + assert accuracy[1] == (10 + 4) / 18 + assert accuracy[2] == (10 + 1) / 18 + assert accuracy[3] == (10 + 0) / 18 + assert accuracy[4] == (10 + 0) / 18 + + # Check dimension broadcasting against observations + table = match.make_contingency_manager(simple_forecast, complex_obs) + withsource = table.transform(preserve_dims="source") + accuracy = withsource.accuracy() + assert accuracy.dims == ("source",) + assert len(accuracy.source) == 2 + + # Check dimension broadcasting against forecasts and observations together + table = match.make_contingency_manager(complex_forecast, complex_obs) + preserved = table.transform(preserve_dims=["time", "source"]) + accuracy_time_source = preserved.accuracy() + assert accuracy_time_source.dims == ("time", "source") + assert len(accuracy_time_source.time) == 5 + assert len(accuracy_time_source.source) == 2 + + # The first "source" should match the previous calculations + xr.testing.assert_allclose(accuracy_time_source.sel(source=0), withtime.accuracy()) + def test_nan_handling(): """ diff --git a/tests/spatial/__init__.py b/tests/spatial/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/spatial/fss_test_data.py b/tests/spatial/fss_test_data.py new file mode 100644 index 00000000..138d70e4 --- /dev/null +++ b/tests/spatial/fss_test_data.py @@ -0,0 +1,185 @@ +""" +Test data for Fractions Skill Score (FSS) +""" + +import numpy as np +import pandas as pd +import xarray as xr + + +def generate(obs_pdf, fcst_pdf, *, seed=42): + """ + Generates random 2-D array for obs/fcst representing a 2-D grid + Args: + obs_pdf: (mu, stddev) + fcst_pdf: (mu, stddev) + + where, + mu := mean + stddev := standard deviation + in, + Normal distribution ~ N(mu, stddev) + """ + np.random.seed(seed) + h = 40 + w = 60 + obs = np.random.normal(loc=obs_pdf[0], scale=obs_pdf[1], size=(h, w)) + fcst = np.random.normal(loc=fcst_pdf[0], scale=fcst_pdf[1], size=(h, w)) + return (obs, fcst) + + +_PERIODS = 10 +_TIME_SERIES = pd.date_range( + start="2022-11-20T01:00:00.000000000", + freq="h", + periods=_PERIODS, +) + +EXPECTED_TEST_FSS_2D_REDUCE_TIME = xr.DataArray( + data=[0.941163, 0.906025], + coords={"lead_time": [1, 2]}, +) +EXPECTED_TEST_FSS_2D_PRESERVE_TIME = xr.DataArray( + data=[0.937116, 0.915296, 0.922467, 0.913091, 0.941321, 0.9137, 0.920675, 0.920409, 0.93085, 0.925647], + coords={"time": _TIME_SERIES}, +) +EXPECTED_TEST_FSS_2D_PRESERVE_ALL = xr.DataArray( + data=[ + [0.968599, 0.937148, 0.941111, 0.927991, 0.950237, 0.937238, 0.947837, 0.932682, 0.933721, 0.939457], + [0.907544, 0.891293, 0.903114, 0.897708, 0.931717, 0.892274, 0.893427, 0.906921, 0.92814, 0.910913], + ], + coords={"lead_time": [1, 2], "time": _TIME_SERIES}, +) + +FSS_CURATED_THRESHOLD = 2 +FSS_CURATED_WINDOW_SIZE_3X3 = 3 +FSS_CURATED_WINDOW_SIZE_4X4 = 4 + +# fmt: off +FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_OBS = np.array([ + [5, 1, 1, 2, 3], + [1, 1, 0, 1, 1], + [1, 3, 1, 3, 1], + [4, 3, 2, 1, 0], +]) + +FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_FCST = np.array([ + [4, 0, 1, 3, 4], + [1, 2, 0, 3, 0], + [1, 4, 3, 4, 2], + [5, 4, 3, 3, 2], +]) + +FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_ZERO_PADDED_OBS = np.array([ + [0, 0, 0, 0, 0, 0, 0,], + [0, 5, 1, 1, 2, 3, 0,], + [0, 1, 1, 0, 1, 1, 0,], + [0, 1, 3, 1, 3, 1, 0,], + [0, 4, 3, 2, 1, 0, 0,], + [0, 0, 0, 0, 0, 0, 0,], +]) + +FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_ZERO_PADDED_FCST = np.array([ + [0, 0, 0, 0, 0, 0, 0], + [0, 4, 0, 1, 3, 4, 0], + [0, 1, 2, 0, 3, 0, 0], + [0, 1, 4, 3, 4, 2, 0], + [0, 5, 4, 3, 3, 2, 0], + [0, 0, 0, 0, 0, 0, 0], +]) + +FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW_OBS = np.array([ + [5, 1, 1, 2, 3, 4], + [1, 1, 0, 1, 1, 4], + [1, 3, 1, 3, 1, 4], + [4, 3, 2, 1, 0, 4], + [1, 1, 1, 1, 1, 4], +]) + +FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW_FCST = np.array([ + [4, 0, 1, 3, 4, 3], + [1, 2, 0, 3, 0, 3], + [1, 4, 3, 4, 2, 2], + [5, 4, 3, 3, 2, 2], + [2, 2, 2, 2, 2, 2], +]) + +FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW_ZERO_PADDED_OBS = np.array([ + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 5, 1, 1, 2, 3, 4, 0, 0], + [0, 0, 1, 1, 0, 1, 1, 4, 0, 0], + [0, 0, 1, 3, 1, 3, 1, 4, 0, 0], + [0, 0, 4, 3, 2, 1, 0, 4, 0, 0], + [0, 0, 1, 1, 1, 1, 1, 4, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], +]) + +FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW_ZERO_PADDED_FCST = np.array([ + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 4, 0, 1, 3, 4, 3, 0, 0], + [0, 0, 1, 2, 0, 3, 0, 3, 0, 0], + [0, 0, 1, 4, 3, 4, 2, 2, 0, 0], + [0, 0, 5, 4, 3, 3, 2, 2, 0, 0], + [0, 0, 2, 2, 2, 2, 2, 2, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], +]) +# fmt: on + + +def _fss_naive_small_data(fcst, obs, ws, th): # pylint: disable=too-many-locals + """ + Naive `O(N^2 * W^2)` implementation for FSS calculations + + .. warning:: + To be used for testing and small data sizes only + """ + (h, w) = fcst.shape + sum_sq_fcst = 0 + sum_sq_obs = 0 + sum_sq_diff = 0 + for i in range(0, h - ws + 1): + for j in range(0, w - ws + 1): + sum_fcst = 0 + sum_obs = 0 + for k in range(i, i + ws): + for l in range(j, j + ws): + sum_fcst += 1 if fcst[k][l] > th else 0 + sum_obs += 1 if obs[k][l] > th else 0 + sum_sq_fcst += sum_fcst * sum_fcst + sum_sq_obs += sum_obs * sum_obs + sum_sq_diff += (sum_fcst - sum_obs) * (sum_fcst - sum_obs) + fss = 1 - sum_sq_diff / (sum_sq_fcst + sum_sq_obs) + return fss + + +EXPECTED_FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW = _fss_naive_small_data( + FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_OBS, + FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_FCST, + FSS_CURATED_WINDOW_SIZE_3X3, + FSS_CURATED_THRESHOLD, +) + +EXPECTED_FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_ZERO_PADDED = _fss_naive_small_data( + FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_ZERO_PADDED_OBS, + FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_ZERO_PADDED_FCST, + FSS_CURATED_WINDOW_SIZE_3X3, + FSS_CURATED_THRESHOLD, +) + +EXPECTED_FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW = _fss_naive_small_data( + FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW_OBS, + FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW_FCST, + FSS_CURATED_WINDOW_SIZE_4X4, + FSS_CURATED_THRESHOLD, +) + +EXPECTED_FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW_ZERO_PADDED = _fss_naive_small_data( + FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW_ZERO_PADDED_OBS, + FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW_ZERO_PADDED_FCST, + FSS_CURATED_WINDOW_SIZE_4X4, + FSS_CURATED_THRESHOLD, +) diff --git a/tests/spatial/test_fss.py b/tests/spatial/test_fss.py new file mode 100644 index 00000000..8bed60c2 --- /dev/null +++ b/tests/spatial/test_fss.py @@ -0,0 +1,415 @@ +""" +Contains unit tests for scores.probability.fss_impl +""" +import numpy as np +import pytest +import xarray as xr + +from scores import sample_data as sd +from scores.spatial.fss_impl import ( + _aggregate_fss_decomposed, + fss_2d, + fss_2d_binary, + fss_2d_single_field, +) +from scores.utils import DimensionError, FieldTypeError +from tests.spatial import fss_test_data as ftd + + +def test_nan_2d_single_field_curated_nan_cross(): + """ + Check that NaNs are handled as expected. + + This also applies to ``fss_2d`` and ``fss_2d_binary`` because they both use + ``fss_2d_single_field``. + """ + window_size = (ftd.FSS_CURATED_WINDOW_SIZE_3X3, ftd.FSS_CURATED_WINDOW_SIZE_3X3) + fcst = np.array(np.copy(ftd.FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_FCST), dtype=np.float64) + obs = np.array(np.copy(ftd.FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_OBS), dtype=np.float64) + # run a cross (+) through the dataset with NaN + fcst[2, :] = np.array(np.nan) + fcst[:, 2] = np.array(np.nan) + obs[2, :] = np.array(np.nan) + obs[:, 2] = np.array(np.nan) + res_1 = fss_2d_single_field( + obs, fcst, event_threshold=ftd.FSS_CURATED_THRESHOLD, window_size=window_size, zero_padding=False + ) + + fcst = np.copy(ftd.FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_FCST) + obs = np.copy(ftd.FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_OBS) + # run a cross (+) through the dataset with zero - this should + # give the same result. + fcst[2, :] = 0 + fcst[:, 2] = 0 + obs[2, :] = 0 + obs[:, 2] = 0 + res_2 = fss_2d_single_field( + obs, fcst, event_threshold=ftd.FSS_CURATED_THRESHOLD, window_size=window_size, zero_padding=False + ) + np.testing.assert_allclose(res_1, res_2) + + +def test_fss_2d_single_field_curated_1x1_window(): + """ + Check that a 1x1 window performs as expected + """ + window_size = (1, 1) + fcst = ftd.FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_FCST + obs = ftd.FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_OBS + th = ftd.FSS_CURATED_THRESHOLD + obs_bin = np.array(obs > th, dtype=np.int64) + fcst_bin = np.array(fcst > th, dtype=np.int64) + denom = np.sum(obs_bin * obs_bin + fcst_bin * fcst_bin) + numer = np.sum((obs_bin - fcst_bin) * (obs_bin - fcst_bin)) + expected = 1.0 - numer / denom + res = fss_2d_single_field(obs, fcst, event_threshold=th, window_size=window_size, zero_padding=False) + np.testing.assert_allclose(res, expected) + + +def test_fss_2d_single_field_curated_window_equal_data_size(): + """ + Check that a data = window size performs as expected + """ + window_size = (4, 5) + fcst = ftd.FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_FCST + obs = ftd.FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_OBS + th = ftd.FSS_CURATED_THRESHOLD + obs_bin = np.sum(obs > th) + fcst_bin = np.sum(fcst > th) + denom = obs_bin * obs_bin + fcst_bin * fcst_bin + numer = (obs_bin - fcst_bin) * (obs_bin - fcst_bin) + expected = 1.0 - numer / denom + res = fss_2d_single_field(obs, fcst, event_threshold=th, window_size=window_size, zero_padding=False) + np.testing.assert_allclose(res, expected) + + +@pytest.mark.parametrize( + ("obs", "fcst", "window_size", "event_threshold", "expected"), + [ + ( + ftd.FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_OBS, + ftd.FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_FCST, + (ftd.FSS_CURATED_WINDOW_SIZE_3X3, ftd.FSS_CURATED_WINDOW_SIZE_3X3), + ftd.FSS_CURATED_THRESHOLD, + ftd.EXPECTED_FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW, + ), + ( + ftd.FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW_OBS, + ftd.FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW_FCST, + (ftd.FSS_CURATED_WINDOW_SIZE_4X4, ftd.FSS_CURATED_WINDOW_SIZE_4X4), + ftd.FSS_CURATED_THRESHOLD, + ftd.EXPECTED_FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW, + ), + ], +) +def test_fss_2d_single_field_curated(obs, fcst, window_size, event_threshold, expected): + """ + Odd/even sized window with hand-crafted data + """ + res = fss_2d_single_field(obs, fcst, event_threshold=event_threshold, window_size=window_size, zero_padding=False) + np.testing.assert_allclose(res, expected) + + +@pytest.mark.parametrize( + ("obs", "obs_padded", "fcst", "fcst_padded", "window_size", "event_threshold", "expected"), + [ + ( + ftd.FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_OBS, + ftd.FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_ZERO_PADDED_OBS, + ftd.FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_FCST, + ftd.FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_ZERO_PADDED_FCST, + (ftd.FSS_CURATED_WINDOW_SIZE_3X3, ftd.FSS_CURATED_WINDOW_SIZE_3X3), + ftd.FSS_CURATED_THRESHOLD, + ftd.EXPECTED_FSS_CURATED_TEST_4X5_DATA_3X3_WINDOW_ZERO_PADDED, + ), + ( + ftd.FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW_OBS, + ftd.FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW_ZERO_PADDED_OBS, + ftd.FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW_FCST, + ftd.FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW_ZERO_PADDED_FCST, + (ftd.FSS_CURATED_WINDOW_SIZE_4X4, ftd.FSS_CURATED_WINDOW_SIZE_4X4), + ftd.FSS_CURATED_THRESHOLD, + ftd.EXPECTED_FSS_CURATED_TEST_5X6_DATA_4X4_WINDOW_ZERO_PADDED, + ), + ], +) +def test_fss_2d_single_field_curated_zero_padded( + obs, obs_padded, fcst, fcst_padded, window_size, event_threshold, expected +): + """ + Odd/even sized window with hand-crafted data, with zero padding. + + Also tests that pre-padded inputs match zero-padding from the algorithm. + """ + # test in-built zero-padding + res_1 = fss_2d_single_field(fcst, obs, event_threshold=event_threshold, window_size=window_size, zero_padding=True) + np.testing.assert_allclose(res_1, expected) + + # test pre-zero-padded input data + res_2 = fss_2d_single_field( + fcst_padded, obs_padded, event_threshold=event_threshold, window_size=window_size, zero_padding=False + ) + np.testing.assert_allclose(res_2, expected) + + # assert that both results are the same + np.testing.assert_allclose(res_1, res_2) + + +@pytest.mark.parametrize( + ("obs_pdf", "fcst_pdf", "window_size", "event_threshold", "expected"), + [ + ((0.0, 1.0), (0.0, 1.0), (10, 10), 0.5, 0.974733), + ((0.0, 1.0), (0.0, 2.0), (5, 5), 0.5, 0.88913), + ((0.0, 1.0), (1.0, 1.0), (10, 10), 0.25, 0.81069), + ], +) +def test_fss_2d_single_field(obs_pdf, fcst_pdf, window_size, event_threshold, expected): + """ + Integration test to check that fss is generally working for a single field + """ + # half the meaning of life, in order to maintain some mystery + seed = 21 + (obs, fcst) = ftd.generate(obs_pdf, fcst_pdf, seed=seed) + res = fss_2d_single_field(fcst, obs, event_threshold=event_threshold, window_size=window_size, zero_padding=False) + np.testing.assert_allclose(res, expected, rtol=1e-5) + + +@pytest.mark.parametrize( + ("obs_pdf", "fcst_pdf", "window_size", "event_threshold", "expected"), + [ + ((0.0, 1.0), (0.0, 1.0), (10, 10), 0.5, 0.972075), + ((0.0, 1.0), (0.0, 2.0), (5, 5), 0.5, 0.886119), + ((0.0, 1.0), (1.0, 1.0), (10, 10), 0.25, 0.811639), + ], +) +def test_fss_2d_single_field_zero_pad(obs_pdf, fcst_pdf, window_size, event_threshold, expected): + """ + Integration test to check that fss is generally working for a single field, + but with zero padding instead of interior border. + """ + # half the meaning of life, in order to maintain some mystery + seed = 21 + (obs, fcst) = ftd.generate(obs_pdf, fcst_pdf, seed=seed) + res = fss_2d_single_field(fcst, obs, event_threshold=event_threshold, window_size=window_size, zero_padding=True) + np.testing.assert_allclose(res, expected, rtol=1e-5) + + +@pytest.mark.filterwarnings("ignore::UserWarning") +@pytest.mark.parametrize( + ("window_size", "event_threshold", "reduce_dims", "preserve_dims", "expected"), + [ + ((2, 2), 2.0, None, None, xr.DataArray(0.968515)), + ((2, 2), 8.0, None, None, xr.DataArray(0.779375)), + ((5, 5), 2.0, None, None, xr.DataArray(0.993892)), + ((5, 5), 8.0, None, None, xr.DataArray(0.942294)), # output_dims: scalar + ((2, 2), 5.0, ["time"], None, ftd.EXPECTED_TEST_FSS_2D_REDUCE_TIME), # output_dims: 1 x lead_time + ((2, 2), 5.0, None, ["time"], ftd.EXPECTED_TEST_FSS_2D_PRESERVE_TIME), # output_dims: 1 x time + ( + (2, 2), + 5.0, + None, + ["time", "lead_time"], + ftd.EXPECTED_TEST_FSS_2D_PRESERVE_ALL, + ), # output_dims: time x lead_time + ], +) +def test_fss_2d(window_size, event_threshold, reduce_dims, preserve_dims, expected): + """ + Tests various combinations of window_size/event_threshold/preserve_dims/reduce_dims + + Note: does not include error/assertion testing this will be done separately. + """ + da_fcst = sd.continuous_forecast(large_size=False, lead_days=True) + da_obs = sd.continuous_observations(large_size=False) + res = fss_2d( + da_fcst, + da_obs, + event_threshold=event_threshold, + window_size=window_size, + spatial_dims=["lat", "lon"], + reduce_dims=reduce_dims, + preserve_dims=preserve_dims, + ) + xr.testing.assert_allclose(res, expected) + + +@pytest.mark.filterwarnings("ignore::UserWarning") +@pytest.mark.parametrize( + ("window_size", "event_threshold", "reduce_dims", "preserve_dims", "expected"), + [ + ((2, 2), 2.0, None, None, xr.DataArray(0.968515)), + ((2, 2), 8.0, None, None, xr.DataArray(0.779375)), + ((5, 5), 2.0, None, None, xr.DataArray(0.993892)), + ((5, 5), 8.0, None, None, xr.DataArray(0.942294)), # output_dims: scalar + ((2, 2), 5.0, ["time"], None, ftd.EXPECTED_TEST_FSS_2D_REDUCE_TIME), # output_dims: 1 x lead_time + ((2, 2), 5.0, None, ["time"], ftd.EXPECTED_TEST_FSS_2D_PRESERVE_TIME), # output_dims: 1 x time + ( + (2, 2), + 5.0, + None, + ["time", "lead_time"], + ftd.EXPECTED_TEST_FSS_2D_PRESERVE_ALL, + ), # output_dims: time x lead_time + ], +) +def test_fss_2d_binary(window_size, event_threshold, reduce_dims, preserve_dims, expected): + """ + Tests various combinations of window_size/event_threshold/preserve_dims/reduce_dims + + Note: does not include error/assertion testing this will be done separately. + """ + da_fcst = sd.continuous_forecast(large_size=False, lead_days=True) + da_obs = sd.continuous_observations(large_size=False) + res = fss_2d_binary( + np.greater(da_fcst, event_threshold), + np.greater(da_obs, event_threshold), + window_size=window_size, + spatial_dims=["lat", "lon"], + reduce_dims=reduce_dims, + preserve_dims=preserve_dims, + check_boolean=False, + ) + xr.testing.assert_allclose(res, expected) + + +@pytest.mark.filterwarnings("ignore::UserWarning") +def test_fss_2d_binary_bool_check(): + da_fcst = sd.continuous_forecast(large_size=False, lead_days=True) + da_obs = sd.continuous_observations(large_size=False) + with pytest.raises(FieldTypeError): + fss_2d_binary( + da_fcst, np.greater(da_obs, 0.5), window_size=(5, 5), spatial_dims=["lat", "lon"], check_boolean=True + ) + with pytest.raises(FieldTypeError): + fss_2d_binary( + np.greater(da_fcst, 0.5), da_obs, window_size=(5, 5), spatial_dims=["lat", "lon"], check_boolean=True + ) + + +def test_missing_spatial_dimensions(): + """ + Test for missing spatial dimensions in input data + """ + + # missing in forecast + da_fcst = sd.continuous_observations(large_size=False) + da_obs = sd.continuous_observations(large_size=False) + da_fcst = da_fcst.rename({"lat": "bat"}) + with pytest.raises(DimensionError): + fss_2d( + da_fcst, + da_obs, + event_threshold=5.0, + window_size=(5, 5), + spatial_dims=["lat", "lon"], + ) + + # missing in obs + da_fcst = sd.continuous_observations(large_size=False) + da_obs = sd.continuous_observations(large_size=False) + da_obs = da_fcst.rename({"lat": "mat"}) + with pytest.raises(DimensionError): + fss_2d( + da_fcst, + da_obs, + event_threshold=5.0, + window_size=(5, 5), + spatial_dims=["lat", "lon"], + ) + + +@pytest.mark.parametrize(("large_obs"), [(True), (False)]) +def test_invalid_input_dimensions(large_obs): + """ + Compares a large input with a small input - should raise an error + """ + # NOTE: large continuous forecast takes too long, so using continuous_observations + # to generate the forecast dataset as well. This test only really cares about the + # input spatial dimensions being compatible. + da_fcst = sd.continuous_observations(large_size=large_obs) + da_obs = sd.continuous_observations(large_size=not large_obs) + with pytest.raises(DimensionError): + fss_2d( + da_fcst, + da_obs, + event_threshold=5.0, + window_size=(5, 5), + spatial_dims=["lat", "lon"], + ) + + with pytest.raises(DimensionError): + fss_2d_single_field( + da_fcst[0].values, + da_obs[0].values, + event_threshold=5.0, + window_size=(5, 5), + ) + + with pytest.raises(DimensionError): + fss_2d( + da_fcst, + da_obs, + event_threshold=5.0, + window_size=(5, 5), + spatial_dims=["lat"], + ) + + +@pytest.mark.parametrize(("window_size"), [(50, 10), (-1, 5), (5, -1), (-1, -1), (10, 50), (50, 50)]) +def test_invalid_window_size(window_size): + """ + Check for various invalid window_size sizes (note: assumes small dataset is used) + """ + da_fcst = sd.continuous_forecast(large_size=False, lead_days=True) + da_obs = sd.continuous_observations(large_size=False) + with pytest.raises(DimensionError): + fss_2d( + da_fcst, + da_obs, + event_threshold=5.0, + window_size=window_size, + spatial_dims=["lat", "lon"], + ) + + +def test_zero_denom_fss(): + """ + Force denominator of the input field to be 0.0 to test against divide by zero + """ + da_fcst = sd.continuous_forecast(large_size=False, lead_days=True) + da_obs = sd.continuous_observations(large_size=False) + da_fcst[:] = 0.0 + da_obs[:] = 0.0 + res = fss_2d( + da_fcst, + da_obs, + event_threshold=5.0, + window_size=(5, 5), + spatial_dims=["lat", "lon"], + ) + assert res == xr.DataArray(0.0) + + +def test_zero_denom_fss_single_field(): + """ + Force denominator of the input field to be 0.0 to test against divide by zero + """ + # half the meaning of life, in order to maintain some mystery + seed = 21 + (obs, fcst) = ftd.generate((0, 0), (0, 0), seed=seed) + res = fss_2d_single_field(fcst, obs, event_threshold=1.0, window_size=(5, 5)) + assert res == 0.0 + + +def test_empty_score_aggregation(): + """ + Test for the scenario where the aggregation logic is being performed on an empty array. + + Theoretically this shouldn't be possible... but in practicality may happen. + + Note: `ufuncs` by design (which are typically implemented in C) + should obfuscate this and should not be reachable, think for loop: + `for (int i = 0; i < 0; i++) {...}` - this shouldn't reach any inner computations. + """ + fss = _aggregate_fss_decomposed(np.empty(shape=(0, 0))) + assert fss == 0.0 diff --git a/tests/test_utils.py b/tests/test_utils.py index e07b66ad..ad840f29 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -747,3 +747,12 @@ def test_check_binary_raises(da): def test_check_binary_doesnt_raise(da): """test check_binary doesn't raise""" check_binary(da, "my name") + + +def test_invalid_numpy_operator(): + """ + Test exception is raised for specifying an invalid operator + + """ + with pytest.raises(ValueError): + utils.NumpyThresholdOperator(sorted) diff --git a/tutorials/Binary_Contingency_Scores.ipynb b/tutorials/Binary_Contingency_Scores.ipynb index 8e4fe16b..bada0de6 100644 --- a/tutorials/Binary_Contingency_Scores.ipynb +++ b/tutorials/Binary_Contingency_Scores.ipynb @@ -33,6 +33,8 @@ " - allows the data to be aggregated and transformed (for example, reducing the latitude and longitude dimensions while preserving the height dimension in order to assess the accuracy at different vertical levels).\n", "3. Calculate the desired metrics from the contingency tables produced in step two. `scores` contains the following binary categorical scores:\n", " - accuracy (fraction correct)\n", + " - base rate\n", + " - forecast rate\n", " - frequency bias (bias score)\n", " - hit rate (true positive rate, probability of detection, sensitivity, recall)\n", " - false alarm ratio\n", @@ -46,6 +48,7 @@ " - Heidke skill score (Cohen's kappa)\n", " - odds ratio\n", " - odds ratio skill score (Yule's Q)\n", + " - Symmetric Extremal Dependence Index (SEDI)\n", "\n", "\n", "`scores` provides support for all three steps, plus convenient functions for working efficiently. Most of the `scores` APIs are functional in nature, however introducing some classes (see below) makes binary categorical metrics much easier to calculate and work with. This notebook starts with simple examples and builds up to more complex ones." @@ -3149,7 +3152,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.2" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/tutorials/Fractions_Skill_Score.ipynb b/tutorials/Fractions_Skill_Score.ipynb new file mode 100644 index 00000000..16e56a1b --- /dev/null +++ b/tutorials/Fractions_Skill_Score.ipynb @@ -0,0 +1,2380 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ee0d9b1c-9e0a-4fe1-891a-a74450c1dd3b", + "metadata": {}, + "source": [ + "# Fractions Skill Score (FSS)\n", + "\n", + "The original paper, and formal definitions can be found here: \n", + "\n", + "Roberts, N. M., & Lean, H. W. (2008). Scale-Selective Verification of Rainfall Accumulations from High-Resolution Forecasts of Convective Events. Monthly Weather Review, 136(1), 78-97. https://doi.org/10.1175/2007MWR2123.1\n", + "\n", + "\n", + "For an explanation of the FSS, and implementation considerations, see: \n", + "\n", + "Faggian, N., Roux, B., Steinle, P., & Ebert, B. (2015). Fast calculation of the fractions skill score. MAUSAM, 66(3), 457–466. https://doi.org/10.54302/mausam.v66i3.555" + ] + }, + { + "cell_type": "markdown", + "id": "2162e15f-0bbc-4b5a-856d-f70d550c779e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## FSS for a single 2D field\n", + "FSS is computed over 2D arrays representing the observations & forecasts in the spatial domain. The user has to make sure that the input dimensions correspond to the spatial domain e.g. `lat x lon`. Generally the computation involves sliding a window over the input field(s) and applying a threshold over the fcst and obs values.\n", + "\n", + "The resulting binary field is summed up to represent the populace (number of ones/\"true\" values in the window).\n", + "\n", + "The resulting 2-D field of rolling sums represents [\"_Integral Image_\"][integral_image] of the respective forecast and obeservation fields, which is then aggregated over all the sliding windows to compute the fractions skill score.\n", + "\n", + "The FSS is then roughly defined as:\n", + "$$\n", + " fss = 1 - \\frac{\\sum_{w}(p_o - p_f)^2}{\\sum_{w}p_o^2 + \\sum_{w}p_f^2}\n", + "$$\n", + "\n", + "where,\n", + "\n", + "- $p_o$ is the observation populace > threshold, in one window\n", + "- $p_f$ is the forecast populace > threshold, in one window\n", + "- $\\sum_{w}$ is the sum over all windows\n", + "\n", + "The implementation details are beyond the scope of this tutorial please refer to, [Fast calculation of the Fractions Skill Score][fss_ref] for more information.\n", + "\n", + "In summary, computation of a single field requires the following parameters:\n", + "\n", + "- forecast 2-D field (in spatial domain)\n", + "- observations 2-D field (in spatial domain)\n", + "- window size (width x height): The window size of the sliding window\n", + "- threshold: To compare the input fields against to generate a binary field\n", + "- compute method: (optional) currently only `numpy` is supported\n", + "\n", + "[fss_ref]: http://dx.doi.org/10.54302/mausam.v66i3.555\n", + "[integral_image]: https://en.wikipedia.org/wiki/Summed-area_table" + ] + }, + { + "cell_type": "markdown", + "id": "d466d1f0-1ce2-4517-8a76-dbabbcf14363", + "metadata": {}, + "source": [ + "### 1. Setup ###\n", + "\n", + "First let's create some random data for our forecast and observation fields. Let's also try out a few scenarios:\n", + "```\n", + "scenario 1: obs distribution = fcst distribution = N(0, 1)\n", + "scenario 2: fcst distribution biased = N(1, 1)\n", + "scenario 3: fcst distribution variant = N(0, 2)\n", + "\n", + "where N(mu, sigma) = normal distribution with mean = mu and standard deviation = sigma\n", + "```\n", + "\n", + "> Note: in practice, some fields like rainfall cannot be negative. The data is synthesized, purely to show the steps required to compute the score." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "36904d44-2047-4b9b-ae73-c7a49abd205b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " observations: shape=(400, 600), mean=0.00, stddev=1.00\n", + " forecast scenario 1: shape=(400, 600), mean=-0.00, stddev=1.00\n", + " forecast scenario 2: shape=(400, 600), mean=1.00, stddev=1.00\n", + " forecast scenario 3: shape=(400, 600), mean=-0.01, stddev=2.00\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "\n", + "# specify input spatial dimensions\n", + "num_cols = 600\n", + "num_rows = 400\n", + "\n", + "# set seed for reproducibility\n", + "np.random.seed(42)\n", + "\n", + "# generate random fields\n", + "obs = np.random.normal(loc=0.0, scale=1.0, size = (num_rows, num_cols))\n", + "fcst_1 = np.random.normal(loc=0.0, scale=1.0, size = (num_rows, num_cols))\n", + "fcst_2 = np.random.normal(loc=1.0, scale=1.0, size = (num_rows, num_cols))\n", + "fcst_3 = np.random.normal(loc=0.0, scale=2.0, size = (num_rows, num_cols))\n", + "\n", + "# print out the different scenarios\n", + "_summarize = lambda x, field: print(\n", + " \"{: >20}: shape={}, mean={:.2f}, stddev={:.2f}\".format(\n", + " field, x.shape, np.mean(x), np.std(x)\n", + "))\n", + "_summarize(obs, \"observations\")\n", + "_summarize(fcst_1, \"forecast scenario 1\")\n", + "_summarize(fcst_2, \"forecast scenario 2\")\n", + "_summarize(fcst_3, \"forecast scenario 3\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "65de79c5-eb7d-49e1-bb74-469d6ef4295e", + "metadata": {}, + "source": [ + "### 2. Define inputs ###\n", + "\n", + "We need to now specify the threshold, window size and compute method. For now, lets choose a single window, and threshold. While the current `fss` method doesn't allow for more than 1 threshold and window definition per call, we'll see how calculate multiple thresholds/windows in a later step." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ddfb3f97-89f1-4b8d-ba7a-1227d06b8757", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "window_size: (100, 100)\n", + "threshold: 0.5\n", + "compute_method: FssComputeMethod.NUMPY\n" + ] + } + ], + "source": [ + "window_size = (100, 100) # height * width or row size * col size\n", + "threshold = 0.5 # arbitrarily chosen\n", + "\n", + "# Note: the compute method is a specialized argument, it doesn't need to be specified but is shown here for illustration purposes.\n", + "# If you do not specify a compute method it will default to `NUMPY` which is the only method currently supported and is sufficient\n", + "# for most purposes.\n", + "from scores.fast.fss.typing import FssComputeMethod\n", + "compute_method = FssComputeMethod.NUMPY\n", + "\n", + "print(\"window_size:\", window_size)\n", + "print(\"threshold:\", threshold)\n", + "print(\"compute_method:\", compute_method)" + ] + }, + { + "cell_type": "markdown", + "id": "4e12430b-f3f9-43b6-bc60-9b1e0b77eb63", + "metadata": {}, + "source": [ + "### 3. Run FSS ###\n", + "\n", + "Since we only have spatial dims it is sufficient to use `scores.spatial.fss_2d_single_field` for this purpose.\n", + "\n", + "Here are reasons why a user may prefer to use this API:\n", + "\n", + "- The user can choose their own aggregation method\n", + "- The user wants more choice on how to parallelize the computations.\n", + "- The user wants to compute multiple windows/thresholds without the overhead of `xarray`.\n", + "\n", + "In practice however, the aggregated approach `scores.spatial.fss_2d` in the next part can potentially be simpler to use, especially for more common data types used in the field i.e. `netCDF/xarray`" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "bcf90715-41bb-4c11-8cf0-2b3d50696088", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " | fss score\n", + " | ---------\n", + "scenario 1 (same distribution) | 0.9998031509754236\n", + "scenario 2 (biased fcst) | 0.7436622752127978\n", + "scenario 3 (variant fcst) | 0.9658938686575778\n" + ] + } + ], + "source": [ + "from scores.spatial import fss_2d_single_field\n", + "\n", + "# compile scenarios\n", + "scenarios = {\n", + " \"scenario 1 (same distribution)\": [obs, fcst_1],\n", + " \"scenario 2 (biased fcst)\": [obs, fcst_2],\n", + " \"scenario 3 (variant fcst)\": [obs, fcst_3],\n", + "}\n", + "result = []\n", + "\n", + "# run through each scenario and compute FSS with inputs defined above\n", + "for s, v in scenarios.items():\n", + " _obs, _fcst = v\n", + " _fss = fss_2d_single_field(\n", + " _fcst,\n", + " _obs,\n", + " event_threshold=threshold,\n", + " window_size=window_size,\n", + " compute_method=compute_method,\n", + " )\n", + " result.append((s, _fss))\n", + "\n", + "# tabulate results\n", + "print(f\"{' '*30} | fss score\")\n", + "print(f\"{' '*30} | ---------\")\n", + "_ = [print(\"{:<30} | {}\".format(s, v)) for (s, v) in result]" + ] + }, + { + "cell_type": "markdown", + "id": "99f1dcb6-9b48-4394-9aa7-b1f1911897fb", + "metadata": {}, + "source": [ + "As apparent above, with the same distribution, we get a score close to 1, this is because FSS is invariant to the exact location within the sliding window where the binary fields match, and so only the total count is used. With a biased distribution the score dips a lot, which is expected with a threshold of 0.5 and a bias of 1.0. Whereas for a variant forecast, we still get a reasonable score, this is also expected since the variation isn't too large and the distributions still overlap quite a bit." + ] + }, + { + "cell_type": "markdown", + "id": "011e1c13-4f25-4a19-9b76-593e2968f348", + "metadata": {}, + "source": [ + "### 4. Multiple inputs ###\n", + "\n", + "Suppose now that we want to collate data for multiple thresholds/windows. There are several ways of doing this, including vectorization. The following will show one way of doing it that, while more verbose, would hopefully help decompose the operations required to create the final accumulated dataset.\n", + "\n", + "Now that we understand how the argument mapping works, let's re-create the mapping and run the fss, we'll also store the results in a `W x T x S` array before converting it to xarray for displaying.\n", + "```\n", + "W x T x S where,\n", + "W = number of windows\n", + "T = number of thresholds\n", + "S = number of scenarios\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "34b44931-accc-4024-a51a-bf766811234e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray (window_size: 5, threshold: 5, scenario: 3)> Size: 600B\n",
+       "array([[[0.99750859, 0.87817287, 0.9975254 ],\n",
+       "        [0.99610935, 0.8174165 , 0.99049278],\n",
+       "        [0.99434647, 0.74372786, 0.96275916],\n",
+       "        [0.99147751, 0.66198854, 0.90506356],\n",
+       "        [0.98687133, 0.57380178, 0.80923115]],\n",
+       "\n",
+       "       [[0.99937889, 0.87861151, 0.99939787],\n",
+       "        [0.99896402, 0.81789448, 0.99288328],\n",
+       "        [0.9984607 , 0.74403561, 0.96545096],\n",
+       "        [0.99769463, 0.66265241, 0.90811018],\n",
+       "        [0.9967834 , 0.57450354, 0.81262522]],\n",
+       "\n",
+       "       [[0.99973932, 0.87865374, 0.99974328],\n",
+       "        [0.99955596, 0.8179917 , 0.993333  ],\n",
+       "        [0.99935278, 0.74368841, 0.96571281],\n",
+       "        [0.99896659, 0.66250326, 0.90828967],\n",
+       "        [0.99866979, 0.57469957, 0.81327941]],\n",
+       "\n",
+       "       [[0.99985211, 0.87883698, 0.99986294],\n",
+       "        [0.99975723, 0.81818986, 0.99351877],\n",
+       "        [0.99966926, 0.74359123, 0.96582882],\n",
+       "        [0.99942888, 0.66249762, 0.90833126],\n",
+       "        [0.99928146, 0.57517972, 0.81375657]],\n",
+       "\n",
+       "       [[0.99990219, 0.87901309, 0.99991892],\n",
+       "        [0.99984583, 0.8184777 , 0.99364081],\n",
+       "        [0.99980315, 0.74366228, 0.96589387],\n",
+       "        [0.99964815, 0.66239176, 0.90830291],\n",
+       "        [0.99954721, 0.57541375, 0.81392659]]])\n",
+       "Coordinates:\n",
+       "  * window_size  (window_size) <U9 180B '[20 20]' '[40 40]' ... '[100 100]'\n",
+       "  * threshold    (threshold) <U4 80B '0.0' '0.25' '0.5' '0.75' '1.0'\n",
+       "  * scenario     (scenario) int64 24B 0 1 2\n",
+       "Attributes:\n",
+       "    description:  Fractions skill score
" + ], + "text/plain": [ + " Size: 600B\n", + "array([[[0.99750859, 0.87817287, 0.9975254 ],\n", + " [0.99610935, 0.8174165 , 0.99049278],\n", + " [0.99434647, 0.74372786, 0.96275916],\n", + " [0.99147751, 0.66198854, 0.90506356],\n", + " [0.98687133, 0.57380178, 0.80923115]],\n", + "\n", + " [[0.99937889, 0.87861151, 0.99939787],\n", + " [0.99896402, 0.81789448, 0.99288328],\n", + " [0.9984607 , 0.74403561, 0.96545096],\n", + " [0.99769463, 0.66265241, 0.90811018],\n", + " [0.9967834 , 0.57450354, 0.81262522]],\n", + "\n", + " [[0.99973932, 0.87865374, 0.99974328],\n", + " [0.99955596, 0.8179917 , 0.993333 ],\n", + " [0.99935278, 0.74368841, 0.96571281],\n", + " [0.99896659, 0.66250326, 0.90828967],\n", + " [0.99866979, 0.57469957, 0.81327941]],\n", + "\n", + " [[0.99985211, 0.87883698, 0.99986294],\n", + " [0.99975723, 0.81818986, 0.99351877],\n", + " [0.99966926, 0.74359123, 0.96582882],\n", + " [0.99942888, 0.66249762, 0.90833126],\n", + " [0.99928146, 0.57517972, 0.81375657]],\n", + "\n", + " [[0.99990219, 0.87901309, 0.99991892],\n", + " [0.99984583, 0.8184777 , 0.99364081],\n", + " [0.99980315, 0.74366228, 0.96589387],\n", + " [0.99964815, 0.66239176, 0.90830291],\n", + " [0.99954721, 0.57541375, 0.81392659]]])\n", + "Coordinates:\n", + " * window_size (window_size) \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray (lead_time_idx: 6)> Size: 48B\n",
+       "array([0.94464596, 0.89355836, 0.86178198, 0.84077059, 0.82559444,\n",
+       "       0.81435967])\n",
+       "Coordinates:\n",
+       "  * lead_time_idx  (lead_time_idx) int64 48B 0 1 2 3 4 5
" + ], + "text/plain": [ + " Size: 48B\n", + "array([0.94464596, 0.89355836, 0.86178198, 0.84077059, 0.82559444,\n", + " 0.81435967])\n", + "Coordinates:\n", + " * lead_time_idx (lead_time_idx) int64 48B 0 1 2 3 4 5" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fss_out" + ] + }, + { + "cell_type": "markdown", + "id": "5767cf98-db72-4194-ae70-ae8ef74cfe1c", + "metadata": {}, + "source": [ + "**Result:** We should now see a dataset of FSS values with only `lead_time_idx` as the preserved dimension. If we look at the data, the FSS decreases as the leadtime increases, because we made our simulated data distribution shift away from the obs as the lead time increased. So this roughly looks like what we'd expect." + ] + }, + { + "cell_type": "markdown", + "id": "9f85ad8d-882b-470e-93e5-3fde82af937b", + "metadata": {}, + "source": [ + "**Exercise - multiple windows/thresholds:** An exercise is left to the reader to implement a similar method to support multiple input args for `fss_2d`. See: \"4. Multiple Inputs\" in the previous section for an idea. Note that this version would be slightly trickier since the result of `fss_2d` is not necessarily a scalar, however, there are still many approaches to solving this." + ] + }, + { + "cell_type": "markdown", + "id": "5ec66a0d-1c18-49d5-a4dc-da30cd43ce15", + "metadata": {}, + "source": [ + "#### 2a. Binary input ####\n", + "\n", + "We can also run FSS with binary inputs. In this part we'll repeat the above computation, except we'll use a pre-discretized input, e.g. using\n", + "`scores.processing.discretise.comparative_discretise` and passing the result into `scores.spatial.fss_2d_binary`. Note that the default threshold operator for `fss_2d` is `np.greater`. So for a fair comparison, we'll have to use the \">\" mode using `comparative_discretise`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c1e9ff7e-22c1-4a15-b2b1-cc1b6b99aa16", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray (lead_time_idx: 6)> Size: 48B\n",
+       "array([0.94464596, 0.89355836, 0.86178198, 0.84077059, 0.82559444,\n",
+       "       0.81435967])\n",
+       "Coordinates:\n",
+       "  * lead_time_idx  (lead_time_idx) int64 48B 0 1 2 3 4 5
" + ], + "text/plain": [ + " Size: 48B\n", + "array([0.94464596, 0.89355836, 0.86178198, 0.84077059, 0.82559444,\n", + " 0.81435967])\n", + "Coordinates:\n", + " * lead_time_idx (lead_time_idx) int64 48B 0 1 2 3 4 5" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from scores.spatial import fss_2d_binary\n", + "from scores.processing.discretise import comparative_discretise\n", + "\n", + "threshold = 0.5\n", + "window_size = (100, 100)\n", + "\n", + "# pre-discretise the inputs\n", + "da_fcst_binary = comparative_discretise(da_fcst, threshold, \">\")\n", + "da_obs_binary = comparative_discretise(da_obs, threshold, \">\")\n", + "\n", + "fss_out = fss_2d_binary(\n", + " da_fcst_binary,\n", + " da_obs_binary,\n", + " window_size=window_size,\n", + " spatial_dims=[\"lat_idx\", \"lon_idx\"],\n", + " preserve_dims=[\"lead_time_idx\"],\n", + " check_boolean=False,\n", + ")\n", + "\n", + "fss_out" + ] + }, + { + "cell_type": "markdown", + "id": "af6e2a48-5ca2-45a7-9e0d-58a823a9c8c0", + "metadata": {}, + "source": [ + "**Result:** We see that this is the same result as using `fss_2d`" + ] + }, + { + "cell_type": "markdown", + "id": "8b6e0c07-289b-427e-b366-7f03e75a08bb", + "metadata": {}, + "source": [ + "#### 2b. Zero-padding ####\n", + "\n", + "By default `fss_2d` does a sliding window on \"full\" windows (i.e. all grid points in the window are interior to the input field). This is different to a convolutional kernel which slides the window from outside the domain, and hence would require zero-padding to avoid boundary conditions. This would result in \"partial\" windows. \n", + "\n", + "Most Fourier based sliding window algorithms (e.g. Fast Fourier Transform) provide some sort of option for padding. Without going into too much detail, the algorithm used here does not necessitate this as it doesn't deal with the frequency domain. Regardless, there are various reasons to include zero-padding in the computation. The option is hence given with the optional `zero_padding` argument set to `True` as shown below: " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "c310767c-77f0-4552-be78-3bb8a0e25adf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray (lead_time_idx: 6)> Size: 48B\n",
+       "array([0.94448153, 0.89335672, 0.86154282, 0.84051093, 0.82539875,\n",
+       "       0.81418548])\n",
+       "Coordinates:\n",
+       "  * lead_time_idx  (lead_time_idx) int64 48B 0 1 2 3 4 5
" + ], + "text/plain": [ + " Size: 48B\n", + "array([0.94448153, 0.89335672, 0.86154282, 0.84051093, 0.82539875,\n", + " 0.81418548])\n", + "Coordinates:\n", + " * lead_time_idx (lead_time_idx) int64 48B 0 1 2 3 4 5" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from scores.spatial import fss_2d\n", + "\n", + "threshold = 0.5\n", + "window_size = (100, 100)\n", + "\n", + "fss_out = fss_2d(\n", + " da_fcst,\n", + " da_obs,\n", + " event_threshold=threshold,\n", + " window_size=window_size,\n", + " spatial_dims=[\"lat_idx\", \"lon_idx\"],\n", + " preserve_dims=[\"lead_time_idx\"],\n", + " zero_padding=True, # set zero-padding to true\n", + ")\n", + "\n", + "fss_out\n" + ] + }, + { + "cell_type": "markdown", + "id": "3b47f235-9c47-4582-91bd-487b9ee8f688", + "metadata": {}, + "source": [ + "**Result:** Because zero-padding uses partial windows in its aggregation, we get slightly different results. However, on quick inspection its easy to see that they still match to a few decimal places. Note however, that the algorithms do tend to differ more for smaller input fields." + ] + }, + { + "cell_type": "markdown", + "id": "e9baad9a-4c3b-4e75-86b6-8ee6b844913e", + "metadata": {}, + "source": [ + "## Things to try next\n", + "\n", + "- Graph FSS over multiple thresholds and window sizes.\n", + "- Derive theoretical convergence of FSS, for a given threshold. Verify using simulations.\n", + "- Apply FSS over other distributions (e.g. uniform/beta). Validate the results." + ] + }, + { + "cell_type": "markdown", + "id": "6588edd3-7008-4d6c-858f-3f35fdbaa1fe", + "metadata": {}, + "source": [ + "## References\n", + "\n", + "1. [Roberts, N.M. and H.W. Lean, 2008: Scale-selective verification of rainfall accumulations from high-resolution forecasts of convective events. Mon. Wea. Rev., 136, 78-97.][1]\n", + "2. [Faggian, Nathan & Roux, Belinda & Steinle, Peter & Ebert, Elizabeth. (2015). Fast calculation of the Fractions Skill Score. Mausam. 66. 457-466. 10.54302/mausam.v66i3.555.][2]\n", + "3. [Summed-area table][3]\n", + "\n", + "[1]: https://doi.org/10.1175/2007MWR2123.1\n", + "[2]: http://dx.doi.org/10.54302/mausam.v66i3.555\n", + "[3]: https://en.wikipedia.org/wiki/Summed-area_table" + ] + }, + { + "cell_type": "markdown", + "id": "4227292c-769a-4062-b43f-c3325bc43e0d", + "metadata": {}, + "source": [ + "## Appendix\n", + "\n", + "**Accumulated FSS Algorithm**\n", + "\n", + "```\n", + "Let's assume our data set is of the form lat x lon x time x lead_time => N x M x T x L\n", + "where,\n", + " lat x lon = spatial dims\n", + " \n", + "1. [Map] Map the input field into a list of fields to be aggregated over. i.e.\n", + "\n", + " lat x lon x time x lead_time -> [lat x lon] x lead_time,\n", + " where,\n", + " [lat x lon] is a list of T spatial fields.\n", + " keep_dim = lead_time; spatial_dims = lat, lon; collapse_dim = time\n", + "\n", + "2. [Compute] For each lead time compute the decomposed fss score for [lat x lon] (len = T).\n", + " \n", + " The decomposed FSS score is a tuple containing the components required\n", + " to formulate the inal score:\n", + " \n", + " (obs_sum / T, fcst_sum / T, (obs_sum - fcst_sum) / T)\n", + "\n", + " The division by T is optional, but good to have for larger datasets to\n", + " avoid integer overflow issues when aggregating.\n", + "\n", + "3. [Reduce] Sum the decomposed score across the accumulated dimension.\n", + "\n", + " fss <- [0; 1 x L]\n", + " for each leadtime, lt {\n", + " obs_sum <- sum_T(fss_decomposed[lt][0])\n", + " fcst_sum <- sum_T(fss_decomposed[lt][1])\n", + " diff_sum <- sum_T(fss_decomposed[lt][2])\n", + " fss[lt] <- dff_sum / (obs_sum + fcst_sum)\n", + " }\n", + " return fss :: size := 1 x L\n", + "\n", + "\n", + "The reason why we need to keep track of the decomposed score instead of simply\n", + "aggregating the final score is because:\n", + "\n", + "1/x + 1/y != 1 / (x+y)\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f876c6b8-ca76-43a6-9119-bce993dc3e0a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/Tutorial_Gallery.ipynb b/tutorials/Tutorial_Gallery.ipynb index 14bdc196..46bca667 100644 --- a/tutorials/Tutorial_Gallery.ipynb +++ b/tutorials/Tutorial_Gallery.ipynb @@ -81,6 +81,20 @@ "- [Receiver Operating Characteristic (ROC)](./ROC.ipynb)\n" ] }, + { + "cell_type": "markdown", + "id": "7def466e-1cde-4e6e-8245-41b5aa7a04c7", + "metadata": { + "tags": [ + "nbsphinx-gallery" + ] + }, + "source": [ + "## Spatial\n", + "\n", + "- [Fractions Skill Score](./Fractions_Skill_Score.ipynb)" + ] + }, { "cell_type": "markdown", "id": "c8765192", @@ -109,6 +123,12 @@ "- [Isotonic Regression and Reliability Diagrams](./Isotonic_Regression_And_Reliability_Diagrams.ipynb)" ] }, + { + "cell_type": "markdown", + "id": "1035fdb8-418a-40f8-b15e-bab32192b9f1", + "metadata": {}, + "source": [] + }, { "cell_type": "markdown", "id": "6e1ed380",