huggingface · arun477 · Mar 16, 2024 · Mar 16, 2024 · Mar 21, 2024 · Mar 21, 2024
diff --git a/chapters/en/chapter5/evaluation.mdx b/chapters/en/chapter5/evaluation.mdx
@@ -291,6 +291,10 @@ the `"audio"` column), ignoring the rest (like the target transcriptions, which
 then iterate over this transformed datasets, appending the model outputs to a list to save the predictions. The
 following code cell will take approximately five minutes if running on a GPU with half-precision, peaking at 12GB memory:
 
+<Tip>
+  The Whisper model doesn't support Dhivehi, but we can use Sinhala instead as both are quite similar. We need to pass this in the generate_kwargs.
+</Tip>
+
 ```python
 from tqdm import tqdm
 from transformers.pipelines.pt_utils import KeyDataset
@@ -302,7 +306,7 @@ for prediction in tqdm(
     pipe(
         KeyDataset(common_voice_test, "audio"),
         max_new_tokens=128,
-        generate_kwargs={"task": "transcribe"},
+        generate_kwargs={"task": "transcribe", "language":"Sinhala"},
         batch_size=32,
     ),
     total=len(common_voice_test),

diff --git a/chapters/en/chapter5/fine-tuning.mdx b/chapters/en/chapter5/fine-tuning.mdx
@@ -448,6 +448,8 @@ training_args = Seq2SeqTrainingArguments(
     metric_for_best_model="wer",
     greater_is_better=False,
     push_to_hub=True,
+    dataloader_num_workers=1,
+    dataloader_prefetch_factor=1,
 )
 ```
 

diff --git a/chapters/en/chapter6/fine-tuning.mdx b/chapters/en/chapter6/fine-tuning.mdx
@@ -260,6 +260,12 @@ model from SpeechBrain.
 Create a function `create_speaker_embedding()` that takes an input audio waveform and outputs a 512-element vector 
 containing the corresponding speaker embedding.
 
+<Tip warning={true}>
+
+If you are using a recent version of speechbrain, replace the EncoderClassifier import with this: from speechbrain.inference.speaker import EncoderClassifier
+
+</Tip>
+
 ```py
 import os
 import torch