Skip to content

Commit 9ce0be3

Browse files
casincarasbt
andauthored
potential little fixes appendix-D4 .ipynb (#427)
* Update appendix-D.ipynb - lr missing argument for passing peak_lr to the optimizer - filling 1 step gap for gradient clipping * adjustments --------- Co-authored-by: rasbt <[email protected]>
1 parent ba3137f commit 9ce0be3

File tree

1 file changed

+13
-6
lines changed

1 file changed

+13
-6
lines changed

appendix-D/01_main-chapter-code/appendix-D.ipynb

+13-6
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,8 @@
552552
"source": [
553553
"from previous_chapters import evaluate_model, generate_and_print_sample\n",
554554
"\n",
555+
"BOOK_VERSION = True\n",
556+
"\n",
555557
"\n",
556558
"def train_model(model, train_loader, val_loader, optimizer, device,\n",
557559
" n_epochs, eval_freq, eval_iter, start_context, tokenizer,\n",
@@ -595,9 +597,14 @@
595597
" loss.backward()\n",
596598
"\n",
597599
" # Apply gradient clipping after the warmup phase to avoid exploding gradients\n",
598-
" if global_step > warmup_steps:\n",
599-
" torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)\n",
600-
" \n",
600+
"\n",
601+
" if BOOK_VERSION:\n",
602+
" if global_step > warmup_steps:\n",
603+
" torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) \n",
604+
" else:\n",
605+
" if global_step >= warmup_steps: # the book originally used global_step > warmup_steps, which lead to a skipped clipping step after warmup\n",
606+
" torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)\n",
607+
" \n",
601608
" optimizer.step()\n",
602609
" tokens_seen += input_batch.numel()\n",
603610
"\n",
@@ -691,8 +698,8 @@
691698
"model = GPTModel(GPT_CONFIG_124M)\n",
692699
"model.to(device)\n",
693700
"\n",
694-
"peak_lr = 5e-4\n",
695-
"optimizer = torch.optim.AdamW(model.parameters(), weight_decay=0.1)\n",
701+
"peak_lr = 0.001 # this was originally set to 5e-4 in the book by mistake\n",
702+
"optimizer = torch.optim.AdamW(model.parameters(), lr=peak_lr, weight_decay=0.1) # the book accidentally omitted the lr assignment\n",
696703
"tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
697704
"\n",
698705
"n_epochs = 15\n",
@@ -817,7 +824,7 @@
817824
"name": "python",
818825
"nbconvert_exporter": "python",
819826
"pygments_lexer": "ipython3",
820-
"version": "3.10.6"
827+
"version": "3.11.4"
821828
}
822829
},
823830
"nbformat": 4,

0 commit comments

Comments
 (0)