Skip to content

Commit a48f9c7

Browse files
authored
adds no-grad context for reference model to DPO (#473)
1 parent 2d7ca7e commit a48f9c7

File tree

1 file changed

+13
-11
lines changed

1 file changed

+13
-11
lines changed

ch07/04_preference-tuning-with-dpo/dpo-from-scratch.ipynb

+13-11
Original file line numberDiff line numberDiff line change
@@ -2149,16 +2149,18 @@
21492149
" labels=batch[\"rejected\"],\n",
21502150
" selection_mask=batch[\"rejected_mask\"]\n",
21512151
" )\n",
2152-
" ref_chosen_log_probas = compute_logprobs(\n",
2153-
" logits=reference_model(batch[\"chosen\"]),\n",
2154-
" labels=batch[\"chosen\"],\n",
2155-
" selection_mask=batch[\"chosen_mask\"]\n",
2156-
" )\n",
2157-
" ref_rejected_log_probas = compute_logprobs(\n",
2158-
" logits=reference_model(batch[\"rejected\"]),\n",
2159-
" labels=batch[\"rejected\"],\n",
2160-
" selection_mask=batch[\"rejected_mask\"]\n",
2161-
" )\n",
2152+
" \n",
2153+
" with torch.no_grad():\n",
2154+
" ref_chosen_log_probas = compute_logprobs(\n",
2155+
" logits=reference_model(batch[\"chosen\"]),\n",
2156+
" labels=batch[\"chosen\"],\n",
2157+
" selection_mask=batch[\"chosen_mask\"]\n",
2158+
" )\n",
2159+
" ref_rejected_log_probas = compute_logprobs(\n",
2160+
" logits=reference_model(batch[\"rejected\"]),\n",
2161+
" labels=batch[\"rejected\"],\n",
2162+
" selection_mask=batch[\"rejected_mask\"]\n",
2163+
" )\n",
21622164
" loss, chosen_rewards, rejected_rewards = compute_dpo_loss(\n",
21632165
" model_chosen_logprobs=policy_chosen_log_probas,\n",
21642166
" model_rejected_logprobs=policy_rejected_log_probas,\n",
@@ -3090,7 +3092,7 @@
30903092
"name": "python",
30913093
"nbconvert_exporter": "python",
30923094
"pygments_lexer": "ipython3",
3093-
"version": "3.11.4"
3095+
"version": "3.10.6"
30943096
}
30953097
},
30963098
"nbformat": 4,

0 commit comments

Comments
 (0)