ecmwf · OpheliaMiralles · Oct 30, 2025 · Oct 30, 2025 · Oct 31, 2025 · Oct 31, 2025
diff --git a/.gitignore b/.gitignore
@@ -128,6 +128,7 @@ _dev/
 _api/
 ./outputs
 *tmp_data/
+*/uv.lock
 
 # Project specific
 ?

diff --git a/models/src/anemoi/models/data_indices/collection.py b/models/src/anemoi/models/data_indices/collection.py
@@ -30,9 +30,15 @@ class IndexCollection:
     def __init__(self, data_config, name_to_index) -> None:
         self.config = OmegaConf.to_container(data_config, resolve=True)
         self.name_to_index = dict(sorted(name_to_index.items(), key=operator.itemgetter(1)))
-        self.forcing = [] if data_config.forcing is None else OmegaConf.to_container(data_config.forcing, resolve=True)
+        self.forcing = (
+            []
+            if data_config.get("forcing", None) is None
+            else OmegaConf.to_container(data_config.forcing, resolve=True)
+        )
         self.diagnostic = (
-            [] if data_config.diagnostic is None else OmegaConf.to_container(data_config.diagnostic, resolve=True)
+            []
+            if data_config.get("diagnostic", None) is None
+            else OmegaConf.to_container(data_config.diagnostic, resolve=True)
         )
         self.target = (
             [] if data_config.get("target", None) is None else OmegaConf.to_container(data_config.target, resolve=True)

diff --git a/models/src/anemoi/models/interface/__init__.py b/models/src/anemoi/models/interface/__init__.py
@@ -16,6 +16,7 @@
 from torch_geometric.data import HeteroData
 
 from anemoi.models.preprocessing import Processors
+from anemoi.models.preprocessing import StepwiseProcessors
 from anemoi.models.utils.config import get_multiple_datasets_config
 from anemoi.utils.config import DotDict
 
@@ -103,25 +104,36 @@ def _build_processors_for_dataset(
         tuple
             (pre_processors, post_processors, pre_processors_tendencies, post_processors_tendencies)
         """
+
         # Build processors for the dataset
-        processors = [
-            [name, instantiate(processor, data_indices=data_indices, statistics=statistics)]
-            for name, processor in processors_configs.items()
-        ]
+        def build_processors(statistics: dict) -> list:
+            return [
+                [name, instantiate(processor, data_indices=data_indices, statistics=statistics)]
+                for name, processor in processors_configs.items()
+            ]
 
+        processors = build_processors(statistics)
         pre_processors = Processors(processors)
         post_processors = Processors(processors, inverse=True)
 
         # Build tendencies processors if provided
         pre_processors_tendencies = None
         post_processors_tendencies = None
         if statistics_tendencies is not None:
-            processors_tendencies = [
-                [name, instantiate(processor, data_indices=data_indices, statistics=statistics_tendencies)]
-                for name, processor in processors_configs.items()
-            ]
-            pre_processors_tendencies = Processors(processors_tendencies)
-            post_processors_tendencies = Processors(processors_tendencies, inverse=True)
+            assert isinstance(statistics_tendencies, dict), "Tendency statistics must be a dict with per-step entries."
+            lead_times = statistics_tendencies.get("lead_times")
+            assert isinstance(lead_times, list), "Tendency statistics must include 'lead_times'."
+            assert all(
+                lead_time in statistics_tendencies for lead_time in lead_times
+            ), "Missing tendency statistics for one or more output steps."
+            pre_processors_tendencies = StepwiseProcessors(lead_times)
+            post_processors_tendencies = StepwiseProcessors(lead_times)
+            for lead_time in lead_times:
+                step_stats = statistics_tendencies[lead_time]
+                if step_stats is not None:
+                    step_processors = build_processors(step_stats)
+                    pre_processors_tendencies.set(lead_time, Processors(step_processors))
+                    post_processors_tendencies.set(lead_time, Processors(step_processors, inverse=True))
 
         return pre_processors, post_processors, pre_processors_tendencies, post_processors_tendencies
 

diff --git a/models/src/anemoi/models/models/base.py b/models/src/anemoi/models/models/base.py
@@ -67,6 +67,7 @@ def __init__(
             model_config.graph.hidden
         )  # assumed to be all the same because this is how we construct the graphs
         self.multi_step = model_config.training.multistep_input
+        self.multi_out = model_config.training.multistep_output
         self.num_channels = model_config.model.num_channels
 
         self.node_attributes = torch.nn.ModuleDict()
@@ -99,6 +100,7 @@ def _calculate_shapes_and_indices(self, data_indices: dict) -> None:
         self._internal_output_idx = {}
         self.input_dim = {}
         self.input_dim_latent = {}
+        self.output_dim = {}
 
         for dataset_name, dataset_indices in data_indices.items():
             self.num_input_channels[dataset_name] = len(dataset_indices.model.input)
@@ -107,6 +109,7 @@ def _calculate_shapes_and_indices(self, data_indices: dict) -> None:
             self._internal_input_idx[dataset_name] = dataset_indices.model.input.prognostic
             self._internal_output_idx[dataset_name] = dataset_indices.model.output.prognostic
             self.input_dim[dataset_name] = self._calculate_input_dim(dataset_name)
+            self.output_dim[dataset_name] = self._calculate_output_dim(dataset_name)
             self.input_dim_latent[dataset_name] = self._calculate_input_dim_latent(dataset_name)
 
     def _calculate_input_dim(self, dataset_name: str) -> int:
@@ -200,6 +203,9 @@ def _get_consistent_dim(self, x: dict[str, Tensor], dim: int) -> int:
 
         return dim_sizes[0]
 
+    def _calculate_output_dim(self, dataset_name: str):
+        return self.multi_out * self.num_output_channels[dataset_name]
+
     @abstractmethod
     def _build_networks(self, model_config: DotDict) -> None:
         """Builds the networks for the model."""

diff --git a/models/src/anemoi/models/models/diffusion_encoder_processor_decoder.py b/models/src/anemoi/models/models/diffusion_encoder_processor_decoder.py
@@ -12,6 +12,7 @@
 import warnings
 from typing import Callable
 from typing import Optional
+from typing import Sequence
 from typing import Union
 
 import einops
@@ -128,13 +129,14 @@ def _build_networks(self, model_config: DotDict) -> None:
                 in_channels_src=self.num_channels,
                 in_channels_dst=self.input_dim[dataset_name],
                 hidden_dim=self.num_channels,
-                out_channels_dst=self.num_output_channels[dataset_name],
+                out_channels_dst=self.output_dim[dataset_name],
                 edge_dim=self.decoder_graph_provider[dataset_name].edge_dim,
             )
 
     def _calculate_input_dim(self, dataset_name: str) -> int:
         input_dim = super()._calculate_input_dim(dataset_name)
-        input_dim += self.num_output_channels[dataset_name]  # input + noised targets
+        output_dim = self._calculate_output_dim()
+        input_dim += output_dim[dataset_name]  # input + noised targets
         return input_dim
 
     def _create_noise_conditioning_mlp(self) -> nn.Sequential:
@@ -159,7 +161,7 @@ def _assemble_input(self, x, y_noised, bse, grid_shard_shapes=None, model_comm_g
         x_data_latent = torch.cat(
             (
                 einops.rearrange(x, "batch time ensemble grid vars -> (batch ensemble grid) (time vars)"),
-                einops.rearrange(y_noised, "batch ensemble grid vars -> (batch ensemble grid) vars"),
+                einops.rearrange(y_noised, "batch time ensemble grid vars -> (batch ensemble grid) (time vars)"),
                 node_attributes_data,
             ),
             dim=-1,  # feature dimension
@@ -173,18 +175,21 @@ def _assemble_input(self, x, y_noised, bse, grid_shard_shapes=None, model_comm_g
     def _assemble_output(self, x_out, x_skip, batch_size, ensemble_size, dtype):
         x_out = einops.rearrange(
             x_out,
-            "(batch ensemble grid) vars -> batch ensemble grid vars",
+            "(batch ensemble grid) (time vars) -> batch time ensemble grid vars",
             batch=batch_size,
             ensemble=ensemble_size,
+            time=self.multi_out,
         ).to(dtype=dtype)
 
         return x_out
 
     def _make_noise_emb(self, noise_emb: torch.Tensor, repeat: int) -> torch.Tensor:
         out = einops.repeat(
-            noise_emb, "batch ensemble noise_level vars -> batch ensemble (repeat noise_level) vars", repeat=repeat
+            noise_emb,
+            "batch time ensemble noise_level vars -> batch time ensemble (repeat noise_level) vars",
+            repeat=repeat,
         )
-        out = einops.rearrange(out, "batch ensemble grid vars -> (batch ensemble grid) vars")
+        out = einops.rearrange(out, "batch time ensemble grid vars -> (batch ensemble grid) (time vars)")
         return out
 
     def _generate_noise_conditioning(
@@ -648,7 +653,7 @@ def sample(
 
             # Initialize output with noise
             batch_size, ensemble_size, grid_size = x_data.shape[0], x_data.shape[2], x_data.shape[-2]
-            shape = (batch_size, ensemble_size, grid_size, self.num_output_channels)
+            shape = (batch_size, self.multi_out, ensemble_size, grid_size, self.num_output_channels)
             y_init[dataset_name] = torch.randn(shape, device=x_data.device, dtype=sigmas.dtype) * sigmas[0]
 
         # Build diffusion sampler config dict from all inference defaults
@@ -725,6 +730,12 @@ def __init__(
             statistics=statistics,
             graph_data=graph_data,
         )
+        if self.multi_out > 1:
+            warnings.warn(
+                "The currently implemented normalization of the tendencies when the model has more than one output step is unconventional. Using"
+                "more than one output step with tendency diffusion models is currently highly experimental and results should be "
+                "cautiously interpreted."
+            )
 
     def _calculate_input_dim(self, dataset_name: str) -> int:
         input_dim = super()._calculate_input_dim(dataset_name)
@@ -746,6 +757,10 @@ def _assemble_input(
         grid_shard_shapes = grid_shard_shapes[dataset_name] if grid_shard_shapes is not None else None
 
         x_skip = self.residual[dataset_name](x, grid_shard_shapes, model_comm_group)
+        x_skip = x_skip.unsqueeze(1).expand(-1, self.multi_out, -1, -1, -1)
+        x_skip = einops.rearrange(x_skip, "batch time ensemble grid vars -> (batch ensemble) grid (time vars)")
+        # Get node attributes
+        node_attributes_data = self.node_attributes(self._graph_name_data, batch_size=bse)
 
         # Shard node attributes if grid sharding is enabled
         if grid_shard_shapes is not None:
@@ -758,7 +773,7 @@ def _assemble_input(
         x_data_latent = torch.cat(
             (
                 einops.rearrange(x, "batch time ensemble grid vars -> (batch ensemble grid) (time vars)"),
-                einops.rearrange(y_noised, "batch ensemble grid vars -> (batch ensemble grid) vars"),
+                einops.rearrange(y_noised, "batch time ensemble grid vars -> (batch ensemble grid) (time vars)"),
                 node_attributes_data,
             ),
             dim=-1,  # feature dimension
@@ -953,7 +968,7 @@ def _after_sampling(
         model_comm_group: Optional[ProcessGroup] = None,
         grid_shard_shapes: Optional[list] = None,
         gather_out: bool = True,
-        post_processors_tendencies: Optional[nn.Module] = None,
+        post_processors_tendencies: Optional[Sequence[Optional[nn.Module]]] = None,
         **kwargs,
     ) -> torch.Tensor:
         """Process sampled tendency to get state prediction.
@@ -968,14 +983,23 @@ def _after_sampling(
 
         # truncate x_t0 if needed
         x_t0 = self.apply_reference_state_truncation(x_t0, grid_shard_shapes, model_comm_group)
-
         # Convert tendency to state
-        out = self.add_tendency_to_state(
-            x_t0,
-            out,
-            post_processors,
-            post_processors_tendencies,
-        )
+        assert post_processors_tendencies is not None, "Per-step tendency processors must be provided."
+        assert (
+            len(post_processors_tendencies) == out.shape[1]
+        ), "Per-step tendency processors must match the number of output steps."
+        states = []
+        for step, post_proc in enumerate(post_processors_tendencies):
+            out_step = out[:, step : step + 1]
+            x_t0_step = x_t0[:, step : step + 1]
+            state_step = self.add_tendency_to_state(
+                x_t0_step,
+                out_step,
+                post_processors,
+                post_proc,
+            )
+            states.append(state_step)
+        out = torch.cat(states, dim=1)
 
         # Gather if needed
         if gather_out and model_comm_group is not None:
@@ -1004,7 +1028,8 @@ def apply_reference_state_truncation(
 
         for dataset_name, in_x in x.items():
             x_skip = self.residual[dataset_name](in_x, grid_shard_shapes[dataset_name], model_comm_group)
-            # x_skip.shape: (bs, ens, latlon, nvar)
+            x_skip = x_skip.unsqueeze(1).expand(-1, self.multi_out, -1, -1, -1)
+            # x_skip.shape: (bs, time ens, latlon, nvar)
             x_skips[dataset_name] = x_skip[..., self.data_indices[dataset_name].model.input.prognostic]
 
         return x_skips
diff --git a/models/src/anemoi/models/models/encoder_processor_decoder.py b/models/src/anemoi/models/models/encoder_processor_decoder.py
@@ -93,7 +93,7 @@ def _build_networks(self, model_config: DotDict) -> None:
                 in_channels_src=self.num_channels,
                 in_channels_dst=self.input_dim[dataset_name],
                 hidden_dim=self.num_channels,
-                out_channels_dst=self.num_output_channels[dataset_name],
+                out_channels_dst=self.output_dim[dataset_name],
                 edge_dim=self.decoder_graph_provider[dataset_name].edge_dim,
             )
 
@@ -143,21 +143,26 @@ def _assemble_output(
         x_out = (
             einops.rearrange(
                 x_out,
-                "(batch ensemble grid) vars -> batch ensemble grid vars",
+                "(batch ensemble grid) (time vars) -> batch time ensemble grid vars",
                 batch=batch_size,
                 ensemble=ensemble_size,
+                time=self.multi_out,
             )
             .to(dtype=dtype)
             .clone()
         )
 
         # residual connection (just for the prognostic variables)
         assert dataset_name is not None, "dataset_name must be provided for multi-dataset case"
-        x_out[..., self._internal_output_idx[dataset_name]] += x_skip[..., self._internal_input_idx[dataset_name]]
+        x_out[..., self._internal_output_idx[dataset_name]] += (
+            x_skip[..., self._internal_input_idx[dataset_name]].unsqueeze(1).expand(-1, self.multi_out, -1, -1, -1)
+        )
 
         for bounding in self.boundings[dataset_name]:
             # bounding performed in the order specified in the config file
             x_out = bounding(x_out)
+        # TODO(dieter): verify if this is needed or can be solved alternatively
+        x_out = x_out.contiguous()  # necessary after expand()
         return x_out
 
     def _assert_valid_sharding(

diff --git a/models/src/anemoi/models/models/ens_encoder_processor_decoder.py b/models/src/anemoi/models/models/ens_encoder_processor_decoder.py
@@ -117,16 +117,24 @@ def _assemble_output(
     ):
         ensemble_size = batch_ens_size // batch_size
         x_out = (
-            einops.rearrange(x_out, "(bs e n) f -> bs e n f", bs=batch_size, e=ensemble_size).to(dtype=dtype).clone()
+            einops.rearrange(x_out, "(bs e) t n f -> bs t e n f", bs=batch_size, e=ensemble_size, t=self.multi_out)
+            .to(dtype=dtype)
+            .clone()
         )
 
         # residual connection (just for the prognostic variables)
         assert dataset_name is not None, "dataset_name must be provided for multi-dataset case"
-        x_out[..., self._internal_output_idx[dataset_name]] += x_skip[..., self._internal_input_idx[dataset_name]]
+        x_out[..., self._internal_output_idx[dataset_name]] += einops.rearrange(
+            x_skip[..., self._internal_input_idx[dataset_name]].unsqueeze(1).expand(-1, self.multi_out, -1, -1),
+            "(batch ensemble) time grid var -> batch time ensemble grid var",
+            batch=batch_size,
+        ).to(dtype=dtype)
 
         for bounding in self.boundings[dataset_name]:
             # bounding performed in the order specified in the config file
             x_out = bounding(x_out)
+        # TODO(dieter): verify if this is needed or can be solved alternatively
+        x_out = x_out.contiguous()  # necessary after expand()
         return x_out
 
     def forward(

diff --git a/models/src/anemoi/models/models/interpolator.py b/models/src/anemoi/models/models/interpolator.py
@@ -99,7 +99,7 @@ def _assemble_input(
         x_data_latent = torch.cat(
             (
                 einops.rearrange(x, "batch time ensemble grid vars -> (batch ensemble grid) (time vars)"),
-                einops.rearrange(target_forcing, "batch ensemble grid vars -> (batch ensemble grid) (vars)"),
+                einops.rearrange(target_forcing, "batch time ensemble grid vars -> (batch ensemble grid) (time vars)"),
                 node_attributes_data,
             ),
             dim=-1,  # feature dimension
@@ -116,7 +116,7 @@ def _assemble_output(self, x_out, x_skip, batch_size, ensemble_size, dtype, data
         x_out = (
             einops.rearrange(
                 x_out,
-                "(batch ensemble grid) vars -> batch ensemble grid vars",
+                "(batch ensemble grid) (time vars) -> batch time ensemble grid vars",
                 batch=batch_size,
                 ensemble=ensemble_size,
             )
@@ -127,7 +127,9 @@ def _assemble_output(self, x_out, x_skip, batch_size, ensemble_size, dtype, data
         # residual connection (just for the prognostic variables)
         if x_skip is not None:
             # residual connection (just for the prognostic variables)
-            x_out[..., self._internal_output_idx[dataset_name]] += x_skip[..., self._internal_input_idx[dataset_name]]
+            x_out[..., self._internal_output_idx[dataset_name]] += (
+                x_skip[..., self._internal_input_idx[dataset_name]].unsqueeze(1).expand(-1, self.multi_out, -1, -1, -1)
+            )
 
         for bounding in self.boundings[dataset_name]:
             # bounding performed in the order specified in the config file