ESMValGroup · bouweandela · Sep 1, 2025 · Jul 18, 2025 · Jul 22, 2025 · Jul 23, 2025
diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst
@@ -311,8 +311,10 @@ A list of the datasets for which a CMORizers is available is provided in the fol
 +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
 | ESACCI-AEROSOL               | abs550aer, od550aer, od550aerStderr, od550lt1aer, od870aer, od870aerStderr (aero)                    |   2  | NCL             |
 +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
+| ESACCI-BIOMASS               | agb (Lyr, frequency=yr)                                                                              |   2  | Python          |
++------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
 | ESACCI-CLOUD                 | clivi, clt, cltStderr, clwvi, lwp, rlut, rlutcs, rsut, rsutcs, rsdt, rlus, rsus, rsuscs (Amon),      |   2  | Python          |
-|                              | clt, clwvi, cod (day)                                                                                |   2  |                 |
+|                              | clt, clwvi, cod (day)                                                                                |      |                 |
 +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
 | ESACCI-FIRE                  | burntArea (Lmon)                                                                                     |   2  | NCL             |
 +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+

@@ -0,0 +1,18 @@
+attributes:
+  project_id: 'OBS6'
+  dataset_id: 'ESACCI-BIOMASS'
+  tier: 2
+  modeling_realm: sat
+  institution: 'GAMMA Remote Sensing'
+  reference: 'esacci-biomass'
+  source: 'ftp://anon-ftp.ceda.ac.uk/neodc/esacci/biomass/data/agb/maps/'
+  title: 'ESA CCI Biomass'
+  version: 'v6.0'
+  comment: ''
+variables:
+  agb:
+    mip: Lyr
+    frequency: yr
+    long_name: 'Above-ground biomass'
+    raw: agb
+    filename: ESACCI-BIOMASS-L4-AGB-MERGED-10000m-fv6.0.nc
@@ -451,6 +451,19 @@ datasets:
       Other years are not considered since they are not complete.
       Put all files in input_dir_path (no subdirectories with years).
 
+  ESACCI-BIOMASS:
+    tier: 2
+    source: ftp://anon-ftp.ceda.ac.uk/neodc/esacci/biomass/data/agb/maps/v6.0/netcdf
+    last_access: 2025-07-16
+    info: |
+        Download and processing instructions:
+        Use the following CLI to download all the files:
+        esmvaltool data download ESACCI-BIOMASS
+        The underlying downloader is located here:
+        /ESMValTool/esmvaltool/cmorizers/data/downloaders/datasets/esacci_biomass.py
+        and it will download all the file regridded to 10 km available on CEDA (2007, 2010, 2015-2021)
+        under a single directory as follow: ${RAWOBS}/Tier2/ESACCI-BIOMASS
+
   ESACCI-CLOUD:
     tier: 2
     source: https://public.satproj.klima.dwd.de/data/ESA_Cloud_CCI/CLD_PRODUCTS/v3.0/

@@ -0,0 +1,45 @@
+"""Script to download ESACCI-BIOMASS agb data from the CEDA."""
+
+from esmvaltool.cmorizers.data.downloaders.ftp import CCIDownloader
+
+
+def download_dataset(
+    config,
+    dataset,
+    dataset_info,
+    start_date,
+    end_date,
+    overwrite,
+):
+    """Download dataset.
+
+    Parameters
+    ----------
+    config : dict
+        ESMValTool's user configuration
+    dataset : str
+        Name of the dataset
+    dataset_info : dict
+         Dataset information from the datasets.yml file
+    start_date : datetime
+        Start of the interval to download
+    end_date : datetime
+        End of the interval to download
+    overwrite : bool
+        Overwrite already downloaded files
+    """
+    # Initialize the downloader
+    downloader = CCIDownloader(
+        config=config,
+        dataset=dataset,
+        dataset_info=dataset_info,
+        overwrite=overwrite,
+    )
+    downloader.ftp_name = "biomass"
+    downloader.connect()
+
+    # Set current working directory to the main directory with the files
+    downloader.set_cwd("/agb/maps/v6.0/netcdf")
+
+    # Download 10 km file
+    downloader.download_file("ESACCI-BIOMASS-L4-AGB-MERGED-10000m-fv6.0.nc")
@@ -0,0 +1,174 @@
+"""ESMValTool CMORizer for ESACCI-BIOMASS above-ground biomass (agb) data.
+
+Tier
+    Tier 2: other freely-available dataset.
+
+Source
+    ftp://anon-ftp.ceda.ac.uk/neodc/esacci/biomass/data/agb/maps
+
+Last access
+    20250716
+
+Download and processing instructions
+    Download 10 km file:
+      v6.0/netcd/ESACCI-BIOMASS-L4-AGB-MERGED-10000m-fv6.0.nc
+    Put file in ${RAWOBS}/Tier2/ESACCI-BIOMASS
+"""
+
+import datetime
+import glob
+import logging
+import os
+from copy import deepcopy
+
+import iris
+import numpy as np
+from dask import array as da
+from esmvalcore.cmor.table import CMOR_TABLES
+from esmvalcore.preprocessor import extract_time
+
+from esmvaltool.cmorizers.data.utilities import (
+    flip_dim_coord,
+    save_variable,
+    set_global_atts,
+)
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+def _extract_variable(in_files, var, cfg, out_dir):
+    logger.info(
+        "CMORizing variable '%s' from input files '%s'",
+        var["short_name"],
+        ", ".join(in_files),
+    )
+    attributes = deepcopy(cfg["attributes"])
+    attributes["mip"] = var["mip"]
+    attributes["raw"] = var["raw"]
+    attributes["frequency"] = var["frequency"]
+    cmor_table = CMOR_TABLES[attributes["project_id"]]
+    definition = cmor_table.get_variable(var["mip"], var["short_name"])
+
+    # load all input files (1 year) into 1 cube
+    cube_list = iris.load(in_files, var["raw"])
+
+    drop_attrs = ["valid_max", "valid_min"]
+
+    for cube in cube_list:
+        # set correct names
+        cube.var_name = definition.short_name
+        cube.standard_name = definition.standard_name
+        cube.long_name = definition.long_name
+
+        for attr in drop_attrs:
+            if attr in cube.attributes:
+                cube.attributes.pop(attr)
+
+        cube.coord("time").points = (
+            cube.coord("time").core_points().astype("float64")
+        )
+
+        # fix units
+        cube.convert_units(definition.units)
+
+        # set global attributes
+        set_global_atts(cube, attributes)
+
+        # roll longitude (-180...180 --> 0...360)
+        cube.coord("longitude").points = cube.coord("longitude").points + 180.0
+        nlon = len(cube.coord("longitude").points)
+        cube.data = da.roll(cube.core_data(), int(nlon / 2), axis=2)
+
+        # remove rouding errors introduced by da.roll
+        loncoord = cube.coord("longitude")
+        latcoord = cube.coord("latitude")
+        loncoord.points = np.round(loncoord.core_points(), 3)
+        latcoord.points = np.round(latcoord.core_points(), 3)
+
+        # flip latitudes
+        flip_dim_coord(cube, "latitude")
+
+        # fix coordinates
+        cube = _fix_coordinates(cube, definition)
+        cube.coord("latitude").attributes = None
+        cube.coord("longitude").attributes = None
+
+        # save each year to a separate output file
+        timecoord = cube.coord("time")
+        for time in timecoord.units.num2date(timecoord.points):
+            # extract current year
+            outcube = extract_time(cube, time.year, 1, 1, time.year, 12, 31)
+            # adjust time bounds to (year-01-01 00:00, year+1-01-01 00:00)
+            out_timecoord = outcube.coord("time")
+            start_date = datetime.datetime(time.year, 1, 1)
+            end_date = datetime.datetime(time.year + 1, 1, 1)
+            out_timecoord.bounds = np.array(
+                [
+                    out_timecoord.units.date2num(start_date),
+                    out_timecoord.units.date2num(end_date),
+                ]
+            )
+            # write output to file
+            logger.debug("Saving cube\n%s", outcube)
+            logger.debug("Setting time dimension to UNLIMITED while saving!")
+            save_variable(
+                outcube,
+                var["short_name"],
+                out_dir,
+                attributes,
+                unlimited_dimensions=["time"],
+            )
+
+    logger.info("Finished CMORizing %s", ", ".join(in_files))
+
+
+def _fix_coordinates(cube, definition):
+    """Fix coordinates."""
+    axis2def = {"T": "time", "X": "longitude", "Y": "latitude"}
+    axes = ["T", "X", "Y"]
+
+    for axis in axes:
+        coord_def = definition.coordinates.get(axis2def[axis])
+        if coord_def:
+            coord = cube.coord(axis=axis)
+            if axis == "T":
+                coord.convert_units("days since 1850-1-1 00:00:00.0")
+                coord.points = coord.core_points().astype("float64")
+                if coord.bounds is not None:
+                    coord.bounds = None
+
+            if len(coord.points) > 1:
+                if coord.bounds is not None:
+                    coord.bounds = None
+                coord.guess_bounds()
+            coord.standard_name = coord_def.standard_name
+            coord.var_name = coord_def.out_name
+            coord.long_name = coord_def.long_name
+
+    return cube
+
+
+def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
+    """Cmorize data."""
+    glob_attrs = cfg["attributes"]
+
+    logger.info(
+        "Starting cmorization for tier%s OBS files: %s",
+        glob_attrs["tier"],
+        glob_attrs["dataset_id"],
+    )
+    logger.info("Input data from: %s", in_dir)
+    logger.info("Output will be written to: %s", out_dir)
+    logger.info("CMORizing ESACCI-BIOMASS version %s", glob_attrs["version"])
+
+    for short_name, var in cfg["variables"].items():
+        filepattern = os.path.join(in_dir, var["filename"])
+        in_files = glob.glob(filepattern)
+        if "short_name" not in var:
+            var["short_name"] = short_name
+        if not in_files:
+            logger.info("%d: no data not found for variable %s", short_name)
+        else:
+            _extract_variable(in_files, var, cfg, out_dir)
diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml
@@ -202,6 +202,26 @@ diagnostics:
     scripts: null
 
 
+  ESACCI-BIOMASS:
+    description: ESACCI-BIOMASS check
+    variables:
+      agb_2007:
+        start_year: 2007
+        end_year: 2007
+        short_name: agb
+      agb_2010:
+        start_year: 2010
+        end_year: 2010
+        short_name: agb
+      agb_2015-2022:
+        start_year: 2015
+        end_year: 2022
+        short_name: agb
+    additional_datasets:
+      - {dataset: ESACCI-BIOMASS, project: OBS6, mip: Lyr, tier: 2, type: sat, version: v6.0, frequency: yr}
+    scripts: null
+
+
   ESACCI-CLOUD:
     description: ESACCI-CLOUD check
     variables:

diff --git a/esmvaltool/references/esacci-biomass.bibtex b/esmvaltool/references/esacci-biomass.bibtex
@@ -0,0 +1,10 @@
+@misc{esacci-biomass,
+	doi = {10.5285/95913ffb6467447ca72c4e9d8cf30501},
+	url = {https://dx.doi.org/10.5285/95913ffb6467447ca72c4e9d8cf30501},
+	year = {2025},
+	month = {apr},
+        day = {17},
+	publisher = {NERC EDS Centre for Environmental Data Analysis},
+	author = {Santoro, M. and Cartus, O.},
+	title = {ESA Biomass Climate Change Initiative (Biomass{\_}cci): Global datasets of forest above-ground biomass for the years 2007, 2010, 2015, 2016, 2017, 2018, 2019, 2020, 2021 and 2022, v6.0},
+}