From a6d2c96de548286d1c14d186ab9558645c7367d1 Mon Sep 17 00:00:00 2001 From: Vincent Moens Date: Thu, 17 Apr 2025 13:53:42 +0100 Subject: [PATCH] Update [ghstack-poisoned] --- torchrl/envs/transforms/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchrl/envs/transforms/llm.py b/torchrl/envs/transforms/llm.py index 1a2d2d2db23..e7fc5db94d4 100644 --- a/torchrl/envs/transforms/llm.py +++ b/torchrl/envs/transforms/llm.py @@ -765,7 +765,7 @@ def _step( kl = curr_log_prob - log_prob if reward is None: reward = 0 - next_tensordict.set(self.out_keys[0], reward + self.coef * kl) + next_tensordict.set(self.out_keys[0], reward - self.coef * kl) return next_tensordict def forward(self, tensordict: TensorDictBase) -> TensorDictBase: