Merge branch 'ecmwf:develop' into pr/aw_rescale

ecmwf · Nov 15, 2024 · 5df91e1 · 5df91e1
2 parents cc4f38b + 76d3ef6
commit 5df91e1
Show file tree

Hide file tree

Showing 14 changed files with 337 additions and 108 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,7 @@ Keep it human-readable, your future self will thank you!
 ## [Unreleased](https://github.com/ecmwf/anemoi-training/compare/0.2.2...HEAD)
 
 ### Fixed
+- Rename loss_scaling to variable_loss_scaling [#138](https://github.com/ecmwf/anemoi-training/pull/138)
 - Refactored callbacks. [#60](https://github.com/ecmwf/anemoi-training/pulls/60)
     - Updated docs [#115](https://github.com/ecmwf/anemoi-training/pull/115)
     - Fix enabling LearningRateMonitor [#119](https://github.com/ecmwf/anemoi-training/pull/119)
@@ -21,6 +22,8 @@ Keep it human-readable, your future self will thank you!
 ### Added
 - Included more loss functions and allowed configuration [#70](https://github.com/ecmwf/anemoi-training/pull/70)
    - Fix that applies the metric_ranges in the post-processed variable space [#116](https://github.com/ecmwf/anemoi-training/pull/116)
+- Allow updates to scalars [#137](https://github.com/ecmwf/anemoi-training/pulls/137)
+    - Add without subsetting in ScaleTensor
 - Sub-hour datasets [#63](https://github.com/ecmwf/anemoi-training/pull/63)
 - Add synchronisation workflow [#92](https://github.com/ecmwf/anemoi-training/pull/92)
 - Feat: Anemoi Profiler compatible with mlflow and using Pytorch (Kineto) Profiler for memory report [38](https://github.com/ecmwf/anemoi-training/pull/38/)

diff --git a/docs/modules/losses.rst b/docs/modules/losses.rst
@@ -66,7 +66,7 @@ define whether to include them in the loss function by setting
 Currently, the following scalars are available for use:
 
 -  ``variable``: Scale by the feature/variable weights as defined in the
-   config ``config.training.loss_scaling``.
+   config ``config.training.variable_loss_scaling``.
 
 ********************
  Validation Metrics

diff --git a/docs/user-guide/training.rst b/docs/user-guide/training.rst
@@ -172,8 +172,8 @@ by setting ``config.data.normaliser``, such that:
 
 It is possible to change the weighting given to each of the variables in
 the loss function by changing
-``config.training.loss_scaling.pl.<pressure level variable>`` and
-``config.training.loss_scaling.sfc.<surface variable>``.
+``config.training.variable_loss_scaling.pl.<pressure level variable>``
+and ``config.training.variable_loss_scaling.sfc.<surface variable>``.
 
 It is also possible to change the scaling given to the pressure levels
 using ``config.training.pressure_level_scaler``. For almost all

diff --git a/src/anemoi/training/config/training/default.yaml b/src/anemoi/training/config/training/default.yaml
@@ -46,7 +46,8 @@ training_loss:
   # loss class to initialise
   _target_: anemoi.training.losses.mse.WeightedMSELoss
   # Scalars to include in loss calculation
-  # Available scalars include, 'variable'
+  # Available scalars include:
+  # - 'variable': See `variable_loss_scaling` for more information
   scalars: ['variable']
   ignore_nans: False
 
@@ -85,7 +86,9 @@ lr:
 # in order to keep a constant global_lr
 # global_lr = local_lr * num_gpus_per_node * num_nodes / gpus_per_model
 
-loss_scaling:
+# Variable loss scaling
+# 'variable' must be included in `scalars` in the losses for this to be applied.
+variable_loss_scaling:
   default: 1
   pl:
     q: 0.6 #1

diff --git a/src/anemoi/training/losses/huber.py b/src/anemoi/training/losses/huber.py
@@ -73,8 +73,8 @@ def forward(
         pred: torch.Tensor,
         target: torch.Tensor,
         squash: bool = True,
-        feature_indices: torch.Tensor | None = None,
-        feature_scale: bool = True,
+        scalar_indices: tuple[int, ...] | None = None,
+        without_scalars: list[str] | list[int] | None = None,
     ) -> torch.Tensor:
         """Calculates the lat-weighted Huber loss.
 
@@ -86,10 +86,11 @@ def forward(
             Target tensor, shape (bs, ensemble, lat*lon, n_outputs)
         squash : bool, optional
             Average last dimension, by default True
-        feature_indices:
-            feature indices (relative to full model output) of the features passed in pred and target
-        feature_scale:
-            If True, scale the loss by the feature_weights
+        scalar_indices: tuple[int,...], optional
+            Indices to subset the calculated scalar with, by default None
+        without_scalars: list[str] | list[int] | None, optional
+            list of scalars to exclude from scaling. Can be list of names or dimensions to exclude.
+            By default None
 
         Returns
         -------
@@ -98,6 +99,6 @@ def forward(
         """
         out = self.huber(pred, target)
 
-        if feature_scale:
-            out = self.scale_by_variable_scaling(out, feature_indices)
+        out = self.scale(out, scalar_indices, without_scalars=without_scalars)
+
         return self.scale_by_node_weights(out, squash)
diff --git a/src/anemoi/training/losses/logcosh.py b/src/anemoi/training/losses/logcosh.py
@@ -67,8 +67,8 @@ def forward(
         pred: torch.Tensor,
         target: torch.Tensor,
         squash: bool = True,
-        feature_indices: torch.Tensor | None = None,
-        feature_scale: bool = True,
+        scalar_indices: tuple[int, ...] | None = None,
+        without_scalars: list[str] | list[int] | None = None,
     ) -> torch.Tensor:
         """Calculates the lat-weighted LogCosh loss.
 
@@ -80,10 +80,11 @@ def forward(
             Target tensor, shape (bs, ensemble, lat*lon, n_outputs)
         squash : bool, optional
             Average last dimension, by default True
-        feature_indices:
-            feature indices (relative to full model output) of the features passed in pred and target
-        feature_scale:
-            If True, scale the loss by the feature_weights
+        scalar_indices: tuple[int,...], optional
+            Indices to subset the calculated scalar with, by default None
+        without_scalars: list[str] | list[int] | None, optional
+            list of scalars to exclude from scaling. Can be list of names or dimensions to exclude.
+            By default None
 
         Returns
         -------
@@ -92,7 +93,5 @@ def forward(
 
         """
         out = LogCosh.apply(pred - target)
-
-        if feature_scale:
-            out = self.scale(out, feature_indices)
+        out = self.scale(out, scalar_indices, without_scalars=without_scalars)
         return self.scale_by_node_weights(out, squash)
diff --git a/src/anemoi/training/losses/mae.py b/src/anemoi/training/losses/mae.py
@@ -53,8 +53,8 @@ def forward(
         pred: torch.Tensor,
         target: torch.Tensor,
         squash: bool = True,
-        feature_indices: torch.Tensor | None = None,
-        feature_scale: bool = True,
+        scalar_indices: tuple[int, ...] | None = None,
+        without_scalars: list[str] | list[int] | None = None,
     ) -> torch.Tensor:
         """Calculates the lat-weighted MAE loss.
 
@@ -66,18 +66,18 @@ def forward(
             Target tensor, shape (bs, ensemble, lat*lon, n_outputs)
         squash : bool, optional
             Average last dimension, by default True
-        feature_indices:
-            feature indices (relative to full model output) of the features passed in pred and target
-        feature_scale:
-            If True, scale the loss by the feature_weights
+        scalar_indices: tuple[int,...], optional
+            Indices to subset the calculated scalar with, by default None
+        without_scalars: list[str] | list[int] | None, optional
+            list of scalars to exclude from scaling. Can be list of names or dimensions to exclude.
+            By default None
+
 
         Returns
         -------
         torch.Tensor
             Weighted MAE loss
         """
         out = torch.abs(pred - target)
-
-        if feature_scale:
-            out = self.scale(out, feature_indices)
+        out = self.scale(out, scalar_indices, without_scalars=without_scalars)
         return self.scale_by_node_weights(out, squash)
diff --git a/src/anemoi/training/losses/mse.py b/src/anemoi/training/losses/mse.py
@@ -51,8 +51,8 @@ def forward(
         pred: torch.Tensor,
         target: torch.Tensor,
         squash: bool = True,
-        feature_indices: torch.Tensor | None = None,
-        feature_scale: bool = True,
+        scalar_indices: tuple[int, ...] | None = None,
+        without_scalars: list[str] | list[int] | None = None,
     ) -> torch.Tensor:
         """Calculates the lat-weighted MSE loss.
 
@@ -64,18 +64,17 @@ def forward(
             Target tensor, shape (bs, ensemble, lat*lon, n_outputs)
         squash : bool, optional
             Average last dimension, by default True
-        feature_indices:
-            feature indices (relative to full model output) of the features passed in pred and target
-        feature_scale:
-            If True, scale the loss by the feature_weights
+        scalar_indices: tuple[int,...], optional
+            Indices to subset the calculated scalar with, by default None
+        without_scalars: list[str] | list[int] | None, optional
+            list of scalars to exclude from scaling. Can be list of names or dimensions to exclude.
+            By default None
 
         Returns
         -------
         torch.Tensor
             Weighted MSE loss
         """
         out = torch.square(pred - target)
-
-        if feature_scale:
-            out = self.scale(out, feature_indices)
+        out = self.scale(out, scalar_indices, without_scalars=without_scalars)
         return self.scale_by_node_weights(out, squash)
diff --git a/src/anemoi/training/losses/rmse.py b/src/anemoi/training/losses/rmse.py
@@ -50,8 +50,8 @@ def forward(
         pred: torch.Tensor,
         target: torch.Tensor,
         squash: bool = True,
-        feature_indices: torch.Tensor | None = None,
-        feature_scale: bool = True,
+        scalar_indices: tuple[int, ...] | None = None,
+        without_scalars: list[str] | list[int] | None = None,
     ) -> torch.Tensor:
         """Calculates the lat-weighted RMSE loss.
 
@@ -63,10 +63,11 @@ def forward(
             Target tensor, shape (bs, ensemble, lat*lon, n_outputs)
         squash : bool, optional
             Average last dimension, by default True
-        feature_indices:
-            feature indices (relative to full model output) of the features passed in pred and target
-        feature_scale:
-            If True, scale the loss by the feature_weights
+        scalar_indices: tuple[int,...], optional
+            Indices to subset the calculated scalar with, by default None
+        without_scalars: list[str] | list[int] | None, optional
+            list of scalars to exclude from scaling. Can be list of names or dimensions to exclude.
+            By default None
 
         Returns
         -------
@@ -77,7 +78,7 @@ def forward(
             pred=pred,
             target=target,
             squash=squash,
-            feature_indices=feature_indices,
-            feature_scale=feature_scale,
+            scalar_indices=scalar_indices,
+            without_scalars=without_scalars,
         )
         return torch.sqrt(mse)