[BugFix] Right log-prob size in transformer wrapper

vmoens · vmoens · commit 95d4b0486bfe · 2025-03-17T11:46:29.000Z
ghstack-source-id: 5226bb4d25bbaaf139b24cf96d096f1d732013d3 Pull Request resolved: pytorch/rl#2854
diff --git a/torchrl/data/tensor_specs.py b/torchrl/data/tensor_specs.py
@@ -4941,7 +4941,7 @@ def set(self, name: str, spec: TensorSpec) -> Composite:
                     spec.shape = self.shape
                 else:
                     raise ValueError(
-                        f"The shape of the spec {type(spec).__name__} and the Composite {type(self).__name__} mismatch: the first "
+                        f"The shapes of the spec {type(spec).__name__} and the {type(self).__name__} mismatch: the first "
                         f"{self.ndim} dimensions should match but got spec.shape={spec.shape} and "
                         f"Composite.shape={self.shape}."
                     )
diff --git a/torchrl/modules/llm/transformers_policy.py b/torchrl/modules/llm/transformers_policy.py
@@ -53,11 +53,11 @@ def log_probs_from_scores(td: TensorDictBase) -> TensorDictBase:
     - "tokens_out", "scores"
 
     """
-    # TODO: how do we avoid getting these?
     tokens_out = td["tokens_out", "sequences"]
     seq_len = tokens_out.shape[1]
 
     del td["tokens_out", "past_key_values"]
+
     scores = dict(td["tokens_out", "scores"].items())
     scores = torch.stack(
         [scores[str(k)] for k in range(len(scores))], 1
@@ -90,15 +90,18 @@ def log_probs_from_logits(td: TensorDictBase) -> TensorDictBase:
     - "forward", "past_key_values"
     - "forward"
     """
-    # TODO: how do we avoid getting these?
+    tokens_out = td["tokens_response", "input_ids"]
+    seq_len = tokens_out.shape[-1]
+
     del td["forward", "past_key_values"]
+
     scores = td["forward", "logits"]
+    scores = scores[..., -seq_len:, :]
     logits = scores - scores.logsumexp(dim=-1, keepdim=True)
     td["logits"] = scores
     del td["forward"]
     scores.shape[1]
-    tokens = td["tokens_in", "input_ids"]
-    log_probs = logits.gather(-1, tokens.unsqueeze(-1))
+    log_probs = logits.gather(-1, tokens_out.unsqueeze(-1))
     td["log_probs"] = log_probs
     return td
 

Original file line number	Diff line number	Diff line change
`@@ -4941,7 +4941,7 @@ def set(self, name: str, spec: TensorSpec) -> Composite:`
`4941`	`4941`	`spec.shape = self.shape`
`4942`	`4942`	`else:`
`4943`	`4943`	`raise ValueError(`
`4944`		`- f"The shape of the spec {type(spec).__name__} and the Composite {type(self).__name__} mismatch: the first "`
	`4944`	`+ f"The shapes of the spec {type(spec).__name__} and the {type(self).__name__} mismatch: the first "`
`4945`	`4945`	`f"{self.ndim} dimensions should match but got spec.shape={spec.shape} and "`
`4946`	`4946`	`f"Composite.shape={self.shape}."`
`4947`	`4947`	`)`