Merge pull request #347 from laserkelvin/loss-broadcasting-fix

Kelvin Lee · web-flow · commit 3d66b454aac0 · 2025-03-17T17:08:53.000-07:00
Loss broadcasting fix
diff --git a/matsciml/models/base.py b/matsciml/models/base.py
@@ -1103,9 +1103,18 @@ def _compute_losses(
             loss_func_signature = signature(loss_func.forward).parameters
             # TODO refactor this once outputs are homogenized
             if isinstance(predictions, dict):
-                kwargs = {"input": predictions[key], "target": target_val}
+                model_outputs = predictions[key]
             else:
-                kwargs = {"input": getattr(predictions, key), "target": target_val}
+                model_outputs = getattr(predictions, key)
+            # to ensure broadcasting works correctly
+            if model_outputs.shape != target_val.shape:
+                try:
+                    model_outputs = model_outputs.reshape(target_val.shape)
+                except RuntimeError as e:
+                    raise RuntimeError(
+                        f"Unable to reconcile prediction/label shapes; preds: {model_outputs.shape}, labels: {target_val.shape}"
+                    ) from e
+            kwargs = {"input": model_outputs, "target": target_val}
             if not isinstance(kwargs["input"], torch.Tensor):
                 raise KeyError(f"Expected model to produce output with key {key}.")
             # pack atoms per graph information too
@@ -1575,7 +1584,16 @@ def _compute_losses(
             else:
                 coefficient = self.loss_coeff[key]
 
-            losses[key] = self.loss_func(predictions[key], target_val) * (
+            model_outputs = predictions[key]
+            # attempt to reshape model outputs for correct broadcasting
+            if model_outputs.shape != target_val.shape:
+                try:
+                    model_outputs = model_outputs.reshape(target_val.shape)
+                except RuntimeError as e:
+                    raise RuntimeError(
+                        f"Unable to reconile shapes for preds/labels; preds: {model_outputs.shape}, labels: {target_val.shape}."
+                    ) from e
+            losses[key] = self.loss_func(model_outputs, target_val) * (
                 coefficient / predictions[key].numel()
             )
         total_loss: torch.Tensor = sum(losses.values())