diff --git a/CHANGELOG.md b/CHANGELOG.md index 791f5977..21ef6ff3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Keep it human-readable, your future self will thank you! ## [Unreleased](https://github.com/ecmwf/anemoi-training/compare/0.2.2...HEAD) ### Fixed +- Rename loss_scaling to variable_loss_scaling [#138](https://github.com/ecmwf/anemoi-training/pull/138) - Refactored callbacks. [#60](https://github.com/ecmwf/anemoi-training/pulls/60) - Updated docs [#115](https://github.com/ecmwf/anemoi-training/pull/115) - Fix enabling LearningRateMonitor [#119](https://github.com/ecmwf/anemoi-training/pull/119) @@ -21,6 +22,8 @@ Keep it human-readable, your future self will thank you! ### Added - Included more loss functions and allowed configuration [#70](https://github.com/ecmwf/anemoi-training/pull/70) - Fix that applies the metric_ranges in the post-processed variable space [#116](https://github.com/ecmwf/anemoi-training/pull/116) +- Allow updates to scalars [#137](https://github.com/ecmwf/anemoi-training/pulls/137) + - Add without subsetting in ScaleTensor - Sub-hour datasets [#63](https://github.com/ecmwf/anemoi-training/pull/63) - Add synchronisation workflow [#92](https://github.com/ecmwf/anemoi-training/pull/92) - Feat: Anemoi Profiler compatible with mlflow and using Pytorch (Kineto) Profiler for memory report [38](https://github.com/ecmwf/anemoi-training/pull/38/) diff --git a/docs/modules/losses.rst b/docs/modules/losses.rst index 07ce18f2..32ad9783 100644 --- a/docs/modules/losses.rst +++ b/docs/modules/losses.rst @@ -66,7 +66,7 @@ define whether to include them in the loss function by setting Currently, the following scalars are available for use: - ``variable``: Scale by the feature/variable weights as defined in the - config ``config.training.loss_scaling``. + config ``config.training.variable_loss_scaling``. ******************** Validation Metrics diff --git a/docs/user-guide/training.rst b/docs/user-guide/training.rst index 695720b4..5be08222 100644 --- a/docs/user-guide/training.rst +++ b/docs/user-guide/training.rst @@ -172,8 +172,8 @@ by setting ``config.data.normaliser``, such that: It is possible to change the weighting given to each of the variables in the loss function by changing -``config.training.loss_scaling.pl.`` and -``config.training.loss_scaling.sfc.``. +``config.training.variable_loss_scaling.pl.`` +and ``config.training.variable_loss_scaling.sfc.``. It is also possible to change the scaling given to the pressure levels using ``config.training.pressure_level_scaler``. For almost all diff --git a/src/anemoi/training/config/training/default.yaml b/src/anemoi/training/config/training/default.yaml index 4da69b34..ce8715c2 100644 --- a/src/anemoi/training/config/training/default.yaml +++ b/src/anemoi/training/config/training/default.yaml @@ -46,7 +46,8 @@ training_loss: # loss class to initialise _target_: anemoi.training.losses.mse.WeightedMSELoss # Scalars to include in loss calculation - # Available scalars include, 'variable' + # Available scalars include: + # - 'variable': See `variable_loss_scaling` for more information scalars: ['variable'] ignore_nans: False @@ -85,7 +86,9 @@ lr: # in order to keep a constant global_lr # global_lr = local_lr * num_gpus_per_node * num_nodes / gpus_per_model -loss_scaling: +# Variable loss scaling +# 'variable' must be included in `scalars` in the losses for this to be applied. +variable_loss_scaling: default: 1 pl: q: 0.6 #1 diff --git a/src/anemoi/training/losses/huber.py b/src/anemoi/training/losses/huber.py index e42105c7..ed5b8d25 100644 --- a/src/anemoi/training/losses/huber.py +++ b/src/anemoi/training/losses/huber.py @@ -73,8 +73,8 @@ def forward( pred: torch.Tensor, target: torch.Tensor, squash: bool = True, - feature_indices: torch.Tensor | None = None, - feature_scale: bool = True, + scalar_indices: tuple[int, ...] | None = None, + without_scalars: list[str] | list[int] | None = None, ) -> torch.Tensor: """Calculates the lat-weighted Huber loss. @@ -86,10 +86,11 @@ def forward( Target tensor, shape (bs, ensemble, lat*lon, n_outputs) squash : bool, optional Average last dimension, by default True - feature_indices: - feature indices (relative to full model output) of the features passed in pred and target - feature_scale: - If True, scale the loss by the feature_weights + scalar_indices: tuple[int,...], optional + Indices to subset the calculated scalar with, by default None + without_scalars: list[str] | list[int] | None, optional + list of scalars to exclude from scaling. Can be list of names or dimensions to exclude. + By default None Returns ------- @@ -98,6 +99,6 @@ def forward( """ out = self.huber(pred, target) - if feature_scale: - out = self.scale_by_variable_scaling(out, feature_indices) + out = self.scale(out, scalar_indices, without_scalars=without_scalars) + return self.scale_by_node_weights(out, squash) diff --git a/src/anemoi/training/losses/logcosh.py b/src/anemoi/training/losses/logcosh.py index 6112d472..6f916177 100644 --- a/src/anemoi/training/losses/logcosh.py +++ b/src/anemoi/training/losses/logcosh.py @@ -67,8 +67,8 @@ def forward( pred: torch.Tensor, target: torch.Tensor, squash: bool = True, - feature_indices: torch.Tensor | None = None, - feature_scale: bool = True, + scalar_indices: tuple[int, ...] | None = None, + without_scalars: list[str] | list[int] | None = None, ) -> torch.Tensor: """Calculates the lat-weighted LogCosh loss. @@ -80,10 +80,11 @@ def forward( Target tensor, shape (bs, ensemble, lat*lon, n_outputs) squash : bool, optional Average last dimension, by default True - feature_indices: - feature indices (relative to full model output) of the features passed in pred and target - feature_scale: - If True, scale the loss by the feature_weights + scalar_indices: tuple[int,...], optional + Indices to subset the calculated scalar with, by default None + without_scalars: list[str] | list[int] | None, optional + list of scalars to exclude from scaling. Can be list of names or dimensions to exclude. + By default None Returns ------- @@ -92,7 +93,5 @@ def forward( """ out = LogCosh.apply(pred - target) - - if feature_scale: - out = self.scale(out, feature_indices) + out = self.scale(out, scalar_indices, without_scalars=without_scalars) return self.scale_by_node_weights(out, squash) diff --git a/src/anemoi/training/losses/mae.py b/src/anemoi/training/losses/mae.py index bea16ac2..b2112d98 100644 --- a/src/anemoi/training/losses/mae.py +++ b/src/anemoi/training/losses/mae.py @@ -53,8 +53,8 @@ def forward( pred: torch.Tensor, target: torch.Tensor, squash: bool = True, - feature_indices: torch.Tensor | None = None, - feature_scale: bool = True, + scalar_indices: tuple[int, ...] | None = None, + without_scalars: list[str] | list[int] | None = None, ) -> torch.Tensor: """Calculates the lat-weighted MAE loss. @@ -66,10 +66,12 @@ def forward( Target tensor, shape (bs, ensemble, lat*lon, n_outputs) squash : bool, optional Average last dimension, by default True - feature_indices: - feature indices (relative to full model output) of the features passed in pred and target - feature_scale: - If True, scale the loss by the feature_weights + scalar_indices: tuple[int,...], optional + Indices to subset the calculated scalar with, by default None + without_scalars: list[str] | list[int] | None, optional + list of scalars to exclude from scaling. Can be list of names or dimensions to exclude. + By default None + Returns ------- @@ -77,7 +79,5 @@ def forward( Weighted MAE loss """ out = torch.abs(pred - target) - - if feature_scale: - out = self.scale(out, feature_indices) + out = self.scale(out, scalar_indices, without_scalars=without_scalars) return self.scale_by_node_weights(out, squash) diff --git a/src/anemoi/training/losses/mse.py b/src/anemoi/training/losses/mse.py index 87365f8c..c30f8b9d 100644 --- a/src/anemoi/training/losses/mse.py +++ b/src/anemoi/training/losses/mse.py @@ -51,8 +51,8 @@ def forward( pred: torch.Tensor, target: torch.Tensor, squash: bool = True, - feature_indices: torch.Tensor | None = None, - feature_scale: bool = True, + scalar_indices: tuple[int, ...] | None = None, + without_scalars: list[str] | list[int] | None = None, ) -> torch.Tensor: """Calculates the lat-weighted MSE loss. @@ -64,10 +64,11 @@ def forward( Target tensor, shape (bs, ensemble, lat*lon, n_outputs) squash : bool, optional Average last dimension, by default True - feature_indices: - feature indices (relative to full model output) of the features passed in pred and target - feature_scale: - If True, scale the loss by the feature_weights + scalar_indices: tuple[int,...], optional + Indices to subset the calculated scalar with, by default None + without_scalars: list[str] | list[int] | None, optional + list of scalars to exclude from scaling. Can be list of names or dimensions to exclude. + By default None Returns ------- @@ -75,7 +76,5 @@ def forward( Weighted MSE loss """ out = torch.square(pred - target) - - if feature_scale: - out = self.scale(out, feature_indices) + out = self.scale(out, scalar_indices, without_scalars=without_scalars) return self.scale_by_node_weights(out, squash) diff --git a/src/anemoi/training/losses/rmse.py b/src/anemoi/training/losses/rmse.py index 34b913a9..6c97344a 100644 --- a/src/anemoi/training/losses/rmse.py +++ b/src/anemoi/training/losses/rmse.py @@ -50,8 +50,8 @@ def forward( pred: torch.Tensor, target: torch.Tensor, squash: bool = True, - feature_indices: torch.Tensor | None = None, - feature_scale: bool = True, + scalar_indices: tuple[int, ...] | None = None, + without_scalars: list[str] | list[int] | None = None, ) -> torch.Tensor: """Calculates the lat-weighted RMSE loss. @@ -63,10 +63,11 @@ def forward( Target tensor, shape (bs, ensemble, lat*lon, n_outputs) squash : bool, optional Average last dimension, by default True - feature_indices: - feature indices (relative to full model output) of the features passed in pred and target - feature_scale: - If True, scale the loss by the feature_weights + scalar_indices: tuple[int,...], optional + Indices to subset the calculated scalar with, by default None + without_scalars: list[str] | list[int] | None, optional + list of scalars to exclude from scaling. Can be list of names or dimensions to exclude. + By default None Returns ------- @@ -77,7 +78,7 @@ def forward( pred=pred, target=target, squash=squash, - feature_indices=feature_indices, - feature_scale=feature_scale, + scalar_indices=scalar_indices, + without_scalars=without_scalars, ) return torch.sqrt(mse) diff --git a/src/anemoi/training/losses/utils.py b/src/anemoi/training/losses/utils.py index 5e7c98a8..e98e0bfe 100644 --- a/src/anemoi/training/losses/utils.py +++ b/src/anemoi/training/losses/utils.py @@ -74,6 +74,7 @@ def __getitem__(self, dimension: int) -> int: return self.func(dimension) +# TODO(Harrison Cook): Consider moving this to subclass from a pytorch object and allow for device moving completely class ScaleTensor: """Dynamically resolved tensor scaling class. @@ -99,7 +100,7 @@ class ScaleTensor: """ tensors: dict[str, TENSOR_SPEC] - _specified_dimensions: list[tuple[int]] + _specified_dimensions: dict[str, tuple[int]] def __init__( self, @@ -120,13 +121,10 @@ def __init__( Kwargs form of {name: (dimension, tensor)} to add to the scalars """ self.tensors = {} - self._specified_dimensions = [] + self._specified_dimensions = {} - scalars = scalars or {} - scalars.update(named_tensors) - - for name, tensor_spec in scalars.items(): - self.add_scalar(*tensor_spec, name=name) + named_tensors.update(scalars or {}) + self.add(named_tensors) for tensor_spec in tensors: self.add_scalar(*tensor_spec) @@ -144,8 +142,10 @@ def get_dim_shape(dimension: int) -> int: if isinstance(dim_assign, tuple) and dimension in dim_assign: return tensor.shape[list(dim_assign).index(dimension)] + unique_dims = {dim for dim_assign in self._specified_dimensions.values() for dim in dim_assign} error_msg = ( - f"Could not find shape of dimension {dimension} with tensors in dims {list(self.tensors.keys())}" + f"Could not find shape of dimension {dimension}. " + f"Tensors are only specified for dimensions {list(unique_dims)}." ) raise IndexError(error_msg) @@ -175,8 +175,8 @@ def validate_scalar(self, dimension: int | tuple[int], scalar: torch.Tensor) -> if self.shape[dim] != scalar.shape[scalar_dim]: error_msg = ( - f"Scalar shape {scalar.shape} at dimension {scalar_dim}" - f"does not match shape of scalar at dimension {dim}. Expected {self.shape[dim]}", + f"Incoming scalar shape {scalar.shape} at dimension {scalar_dim} " + f"does not match shape of saved scalar. Expected {self.shape[dim]}" ) raise ValueError(error_msg) @@ -190,7 +190,7 @@ def add_scalar( """Add new scalar to be applied along `dimension`. Dimension can be a single int even for a multi-dimensional scalar, - in this case the dimensions are assigned as a range from the given int. + in this case the dimensions are assigned as a range starting from the given int. Negative indexes are also valid, and will be resolved against the tensor's ndim. Parameters @@ -210,6 +210,15 @@ def add_scalar( dimension = (dimension,) else: dimension = tuple(dimension + i for i in range(len(scalar.shape))) + else: + dimension = tuple(dimension) + + if name is None: + name = str(uuid.uuid4()) + + if name in self.tensors: + msg = f"Scalar {name!r} already exists in scalars." + raise ValueError(msg) try: self.validate_scalar(dimension, scalar) @@ -217,15 +226,79 @@ def add_scalar( error_msg = f"Validating tensor {name!r} raised an error." raise ValueError(error_msg) from e - if name is None: - name = str(uuid.uuid4()) + self.tensors[name] = (dimension, scalar) + self._specified_dimensions[name] = dimension - if name in self.tensors: - self._specified_dimensions.remove(self.tensors[name][0]) - self.tensors[name] = (dimension, self.tensors[name][1] * scalar) + def update_scalar(self, name: str, scalar: torch.Tensor, *, override: bool = False) -> None: + """Update an existing scalar maintaining original dimensions. + + If `override` is False, the scalar must be valid against the original dimensions. + If `override` is True, the scalar will be updated regardless of validity against original scalar. + + Parameters + ---------- + name : str + Name of the scalar to update + scalar : torch.Tensor + New scalar tensor + override : bool, optional + Whether to override the scalar ignoring dimension compatibility, by default False + """ + if name not in self.tensors: + msg = f"Scalar {name!r} not found in scalars." + raise ValueError(msg) + + dimension = self.tensors[name][0] + + if not override: + self.validate_scalar(dimension, scalar) + + original_scalar = self.tensors.pop(name) + original_dimension = self._specified_dimensions.pop(name) + + try: + self.add_scalar(dimension, scalar, name=name) + except ValueError: + self.tensors[name] = original_scalar + self._specified_dimensions[name] = original_dimension + raise + + def add(self, new_scalars: dict[str, TENSOR_SPEC] | list[TENSOR_SPEC] | None = None, **kwargs) -> None: + """Add multiple scalars to the existing scalars. + + Parameters + ---------- + new_scalars : dict[str, TENSOR_SPEC] | list[TENSOR_SPEC] | None, optional + Scalars to add, see `add_scalar` for more info, by default None + **kwargs: + Kwargs form of {name: (dimension, tensor)} to add to the scalars + """ + if isinstance(new_scalars, list): + for tensor_spec in new_scalars: + self.add_scalar(*tensor_spec) else: - self.tensors[name] = (dimension, scalar) - self._specified_dimensions.append(dimension) + kwargs.update(new_scalars or {}) + for name, tensor_spec in kwargs.items(): + self.add_scalar(*tensor_spec, name=name) + + def update(self, updated_scalars: dict[str, torch.Tensor] | None = None, override: bool = False, **kwargs) -> None: + """Update multiple scalars in the existing scalars. + + If `override` is False, the scalar must be valid against the original dimensions. + If `override` is True, the scalar will be updated regardless of shape. + + Parameters + ---------- + updated_scalars : dict[str, torch.Tensor] | None, optional + Scalars to update, referenced by name, by default None + override : bool, optional + Whether to override the scalar ignoring dimension compatibility, by default False + **kwargs: + Kwargs form of {name: tensor} to update in the scalars + """ + kwargs.update(updated_scalars or {}) + for name, tensor in kwargs.items(): + self.update_scalar(name, tensor, override=override) def subset(self, scalars: str | Sequence[str]) -> ScaleTensor: """Get subset of the scalars, filtering by name. @@ -246,6 +319,23 @@ def subset(self, scalars: str | Sequence[str]) -> ScaleTensor: scalars = [scalars] return ScaleTensor(**{name: self.tensors[name] for name in scalars}) + def without(self, scalars: str | Sequence[str]) -> ScaleTensor: + """Get subset of the scalars, filtering out by name. + + Parameters + ---------- + scalars : str | Sequence[str] + Name/s of the scalars to exclude + + Returns + ------- + ScaleTensor + Subset of self + """ + if isinstance(scalars, str): + scalars = [scalars] + return ScaleTensor(**{name: tensor for name, tensor in self.tensors.items() if name not in scalars}) + def subset_by_dim(self, dimensions: int | Sequence[int]) -> ScaleTensor: """Get subset of the scalars, filtering by dimension. @@ -274,6 +364,32 @@ def subset_by_dim(self, dimensions: int | Sequence[int]) -> ScaleTensor: return ScaleTensor(**subset_scalars) + def without_by_dim(self, dimensions: int | Sequence[int]) -> ScaleTensor: + """Get subset of the scalars, filtering out by dimension. + + Parameters + ---------- + dimensions : int | Sequence[int] + Dimensions to exclude scalars of + + Returns + ------- + ScaleTensor + Subset of self + """ + subset_scalars: dict[str, TENSOR_SPEC] = {} + + if isinstance(dimensions, int): + dimensions = (dimensions,) + + for name, (dim, scalar) in self.tensors.items(): + if isinstance(dim, int): + dim = (dim,) + if len(set(dimensions).intersection(dim)) == 0: + subset_scalars[name] = (dim, scalar) + + return ScaleTensor(**subset_scalars) + def resolve(self, ndim: int) -> ScaleTensor: """Resolve relative indexes in scalars by associating against ndim. @@ -313,7 +429,7 @@ def scale(self, tensor: torch.Tensor) -> torch.Tensor: torch.Tensor Scaled tensor """ - return tensor * self.get_scalar(tensor.ndim) + return tensor * self.get_scalar(tensor.ndim, device=tensor.device) def get_scalar(self, ndim: int, device: str | None = None) -> torch.Tensor: """Get completely resolved scalar tensor. @@ -364,13 +480,19 @@ def to(self, *args, **kwargs) -> None: def __mul__(self, tensor: torch.Tensor) -> torch.Tensor: return self.scale(tensor) + def __rmul__(self, tensor: torch.Tensor) -> torch.Tensor: + return self.scale(tensor) + def __repr__(self): - return f"ScalarTensor:\n - With {list(self.tensors.keys())}\n - With dims: {self._specified_dimensions}" + return ( + f"ScalarTensor:\n - With tensors : {list(self.tensors.keys())}\n" + f" - In dimensions : {list(self._specified_dimensions.values())}" + ) def __contains__(self, dimension: int | tuple[int] | str) -> bool: """Check if either scalar by name or dimension by int/tuple is being scaled.""" if isinstance(dimension, tuple): - return dimension in self._specified_dimensions + return dimension in self._specified_dimensions.values() if isinstance(dimension, str): return dimension in self.tensors diff --git a/src/anemoi/training/losses/weightedloss.py b/src/anemoi/training/losses/weightedloss.py index 81829b66..0deccc9d 100644 --- a/src/anemoi/training/losses/weightedloss.py +++ b/src/anemoi/training/losses/weightedloss.py @@ -42,6 +42,7 @@ def __init__( Registers: - self.node_weights: torch.Tensor of shape (N, ) + - self.scalar: ScaleTensor modified with `add_scalar` and `update_scalar` Parameters ---------- @@ -64,10 +65,16 @@ def __init__( def add_scalar(self, dimension: int | tuple[int], scalar: torch.Tensor, *, name: str | None = None) -> None: self.scalar.add_scalar(dimension=dimension, scalar=scalar, name=name) + @functools.wraps(ScaleTensor.update_scalar, assigned=("__doc__", "__annotations__")) + def update_scalar(self, name: str, scalar: torch.Tensor, *, override: bool = False) -> None: + self.scalar.update_scalar(name=name, scalar=scalar, override=override) + def scale( self, x: torch.Tensor, - feature_indices: torch.Tensor | None = None, + scalar_indices: tuple[int, ...] | None = None, + *, + without_scalars: list[str] | list[int] | None = None, ) -> torch.Tensor: """Scale a tensor by the variable_scaling. @@ -75,23 +82,32 @@ def scale( ---------- x : torch.Tensor Tensor to be scaled, shape (bs, ensemble, lat*lon, n_outputs) - feature_indices: - feature indices (relative to full model output) of the features passed in pred and target + scalar_indices: tuple[int,...], optional + Indices to subset the calculated scalar with, by default None. + without_scalars: list[str] | list[int] | None, optional + list of scalars to exclude from scaling. Can be list of names or dimensions to exclude. + By default None Returns ------- torch.Tensor Scaled error tensor """ - # Use feature_weights if available if len(self.scalar) == 0: return x - scalar = self.scalar.get_scalar(x.ndim).to(x) + scale_tensor = self.scalar + if without_scalars is not None and len(without_scalars) > 0: + if isinstance(without_scalars[0], str): + scale_tensor = self.scalar.without(without_scalars) + else: + scale_tensor = self.scalar.without_by_dim(without_scalars) + + scalar = scale_tensor.get_scalar(x.ndim).to(x) - if feature_indices is None: + if scalar_indices is None: return x * scalar - return x * scalar[..., feature_indices] + return x * scalar[scalar_indices] def scale_by_node_weights(self, x: torch.Tensor, squash: bool = True) -> torch.Tensor: """Scale a tensor by the node_weights. @@ -132,8 +148,9 @@ def forward( pred: torch.Tensor, target: torch.Tensor, squash: bool = True, - feature_indices: torch.Tensor | None = None, - feature_scale: bool = True, + *, + scalar_indices: tuple[int, ...] | None = None, + without_scalars: list[str] | list[int] | None = None, ) -> torch.Tensor: """Calculates the lat-weighted scaled loss. @@ -145,10 +162,11 @@ def forward( Target tensor, shape (bs, ensemble, lat*lon, n_outputs) squash : bool, optional Average last dimension, by default True - feature_indices: - feature indices (relative to full model output) of the features passed in pred and target - feature_scale: - If True, scale the loss by the feature_weights + scalar_indices: tuple[int,...], optional + Indices to subset the calculated scalar with, by default None + without_scalars: list[str] | list[int] | None, optional + list of scalars to exclude from scaling. Can be list of names or dimensions to exclude. + By default None Returns ------- @@ -157,8 +175,8 @@ def forward( """ out = pred - target - if feature_scale: - out = self.scale(out, feature_indices) + out = self.scale(out, scalar_indices, without_scalars=without_scalars) + return self.scale_by_node_weights(out, squash) @property @@ -168,7 +186,19 @@ def name(self) -> str: class FunctionalWeightedLoss(BaseWeightedLoss): - """WeightedLoss which a user can subclass and provide `calculate_difference`.""" + """WeightedLoss which a user can subclass and provide `calculate_difference`. + + `calculate_difference` should calculate the difference between the prediction and target. + All scaling and weighting is handled by the parent class. + + Example: + -------- + ```python + class MyLoss(FunctionalWeightedLoss): + def calculate_difference(self, pred, target): + return pred - target + ``` + """ def __init__( self, @@ -186,8 +216,9 @@ def forward( pred: torch.Tensor, target: torch.Tensor, squash: bool = True, - feature_indices: torch.Tensor | None = None, - feature_scale: bool = True, + *, + scalar_indices: tuple[int, ...] | None = None, + without_scalars: list[str] | list[int] | None = None, ) -> torch.Tensor: """Calculates the lat-weighted scaled loss. @@ -199,10 +230,12 @@ def forward( Target tensor, shape (bs, ensemble, lat*lon, n_outputs) squash : bool, optional Average last dimension, by default True - feature_indices: - feature indices (relative to full model output) of the features passed in pred and target - feature_scale: - If True, scale the loss by the feature_weights + scalar_indices: tuple[int,...], optional + Indices to subset the calculated scalar with, by default None + without_scalars: list[str] | list[int] | None, optional + list of scalars to exclude from scaling. Can be list of names or dimensions to exclude. + By default None + Returns ------- @@ -211,6 +244,5 @@ def forward( """ out = self.calculate_difference(pred, target) - if feature_scale: - out = self.scale(out, feature_indices) + out = self.scale(out, scalar_indices, without_scalars=without_scalars) return self.scale_by_node_weights(out, squash) diff --git a/src/anemoi/training/train/forecaster.py b/src/anemoi/training/train/forecaster.py index 9dba8316..654ae40a 100644 --- a/src/anemoi/training/train/forecaster.py +++ b/src/anemoi/training/train/forecaster.py @@ -96,7 +96,7 @@ def __init__( self.logger_enabled = config.diagnostics.log.wandb.enabled or config.diagnostics.log.mlflow.enabled - variable_scaling = self.get_feature_weights(config, data_indices) + variable_scaling = self.get_variable_scaling(config, data_indices) _, self.val_metric_ranges = self.get_val_metric_ranges(config, data_indices) @@ -254,13 +254,13 @@ def get_val_metric_ranges(config: DictConfig, data_indices: IndexCollection) -> return metric_ranges, metric_ranges_validation @staticmethod - def get_feature_weights( + def get_variable_scaling( config: DictConfig, data_indices: IndexCollection, ) -> torch.Tensor: - loss_scaling = ( + variable_loss_scaling = ( np.ones((len(data_indices.internal_data.output.full),), dtype=np.float32) - * config.training.loss_scaling.default + * config.training.variable_loss_scaling.default ) pressure_level = instantiate(config.training.pressure_level_scaler) @@ -275,20 +275,22 @@ def get_feature_weights( split = key.split("_") if len(split) > 1 and split[-1].isdigit(): # Apply pressure level scaling - if split[0] in config.training.loss_scaling.pl: - loss_scaling[idx] = config.training.loss_scaling.pl[split[0]] * pressure_level.scaler( + if split[0] in config.training.variable_loss_scaling.pl: + variable_loss_scaling[idx] = config.training.variable_loss_scaling.pl[ + split[0] + ] * pressure_level.scaler( int(split[-1]), ) else: LOGGER.debug("Parameter %s was not scaled.", key) else: # Apply surface variable scaling - if key in config.training.loss_scaling.sfc: - loss_scaling[idx] = config.training.loss_scaling.sfc[key] + if key in config.training.variable_loss_scaling.sfc: + variable_loss_scaling[idx] = config.training.variable_loss_scaling.sfc[key] else: LOGGER.debug("Parameter %s was not scaled.", key) - return torch.from_numpy(loss_scaling) + return torch.from_numpy(variable_loss_scaling) @staticmethod def get_node_weights(config: DictConfig, graph_data: HeteroData) -> torch.Tensor: @@ -462,8 +464,7 @@ def calculate_val_metrics( metrics[f"{metric_name}/{mkey}/{rollout_step + 1}"] = metric( y_pred_postprocessed[..., indices], y_postprocessed[..., indices], - feature_indices=indices, - feature_scale=mkey == "all", + scalar_indices=[..., indices], ) return metrics diff --git a/tests/train/test_loss_scaling.py b/tests/train/test_loss_scaling.py index 034325e5..8dd3772a 100644 --- a/tests/train/test_loss_scaling.py +++ b/tests/train/test_loss_scaling.py @@ -29,7 +29,7 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: }, }, "training": { - "loss_scaling": { + "variable_loss_scaling": { "default": 1, "sfc": { "z": 0.1, @@ -128,12 +128,15 @@ def fake_data(request: SubRequest) -> tuple[DictConfig, IndexCollection]: ], indirect=["fake_data"], ) -def test_loss_scaling_vals(fake_data: tuple[DictConfig, IndexCollection], expected_scaling: torch.Tensor) -> None: +def test_variable_loss_scaling_vals( + fake_data: tuple[DictConfig, IndexCollection], + expected_scaling: torch.Tensor, +) -> None: config, data_indices = fake_data - loss_scaling = GraphForecaster.get_feature_weights(config, data_indices) + variable_loss_scaling = GraphForecaster.get_variable_scaling(config, data_indices) - assert torch.allclose(loss_scaling, expected_scaling) + assert torch.allclose(variable_loss_scaling, expected_scaling) @pytest.mark.parametrize("fake_data", [linear_scaler], indirect=["fake_data"]) diff --git a/tests/train/test_scaler.py b/tests/train/test_scalar.py similarity index 66% rename from tests/train/test_scaler.py rename to tests/train/test_scalar.py index f3bad0d2..9a37e353 100644 --- a/tests/train/test_scaler.py +++ b/tests/train/test_scalar.py @@ -49,6 +49,39 @@ def test_scale_contains_subset_by_dim_indexing() -> None: assert "test" not in scale +def test_add_existing_scalar() -> None: + scale = ScaleTensor(test=(0, torch.tensor([2.0]))) + with pytest.raises(ValueError, match=r".*already exists.*"): + scale.add_scalar(0, torch.tensor(3.0), name="test") + + +def test_update_scalar() -> None: + scale = ScaleTensor(test=(0, torch.ones(2))) + scale.update_scalar("test", torch.tensor([3.0])) + torch.testing.assert_close(scale.tensors["test"][1], torch.tensor([3.0])) + + +def test_update_missing_scalar() -> None: + scale = ScaleTensor(test=(0, torch.ones(2))) + with pytest.raises(ValueError, match=r".*not found in scalars.*"): + scale.update_scalar("test_missing", torch.tensor([3.0])) + assert "test" in scale + assert (0,) in scale + + +def test_update_scalar_wrong_dim() -> None: + scale = ScaleTensor(test=(0, torch.ones((2, 3)))) + with pytest.raises(ValueError, match=r".*does not match shape of saved scalar.*"): + scale.update_scalar("test", torch.ones((2, 2))) + assert "test" in scale + assert 0 in scale + + +def test_update_scalar_wrong_dim_allow_override() -> None: + scale = ScaleTensor(test=(0, torch.ones((2, 3)))) + assert scale.update_scalar("test", torch.ones((2, 2)), override=True) is None + + @pytest.mark.parametrize( ("scalars", "input_tensor", "output"), [ @@ -133,3 +166,35 @@ def test_scale_tensor_two_dim( output = torch.tensor(output, dtype=torch.float32) torch.testing.assert_close(scale.scale(input_tensor), output) + + +def test_scalar_subset() -> None: + scale = ScaleTensor(test=(0, torch.tensor([2.0])), wow=(0, torch.tensor([3.0]))) + subset = scale.subset("test") + assert "test" in subset + assert "wow" not in subset + assert 0 in subset + + +def test_scalar_subset_without() -> None: + scale = ScaleTensor(test=(0, torch.tensor([2.0])), wow=(0, torch.tensor([3.0]))) + subset = scale.without("test") + assert "test" not in subset + assert "wow" in subset + assert 0 in subset + + +def test_scalar_subset_by_dim() -> None: + scale = ScaleTensor(test=(0, torch.tensor([2.0])), wow=(1, torch.tensor([3.0]))) + subset = scale.subset_by_dim(0) + assert "test" in subset + assert "wow" not in subset + assert 0 in subset + + +def test_scalar_subset_by_dim_without() -> None: + scale = ScaleTensor(test=(0, torch.tensor([2.0])), wow=(1, torch.tensor([3.0]))) + subset = scale.without_by_dim(0) + assert "test" not in subset + assert "wow" in subset + assert 0 not in subset