22
33# Model Configuration
44model :
5- model_name : " unsloth/Qwen3-14B " # Options: Qwen3-4B, Qwen3-8B, Qwen3-14B, Qwen3-32B
5+ model_name : " unsloth/Qwen1.5-3B " # Options: Qwen3-4B, Qwen3-8B, Qwen3-14B, Qwen3-32B
66 max_seq_length : 2048 # Sequence length (adjust based on your needs)
77 dtype : null # Auto-detect best dtype
88 load_in_4bit : true # Use 4-bit quantization for memory efficiency
@@ -87,19 +87,19 @@ wandb:
8787hardware :
8888 # For different GPU memory configurations
8989 gpu_16gb :
90- model_name : " unsloth/Qwen3-14B "
90+ model_name : " unsloth/Qwen1.5-3B "
9191 per_device_train_batch_size : 2
9292 gradient_accumulation_steps : 4
9393 max_seq_length : 2048
9494
9595 gpu_24gb :
96- model_name : " unsloth/Qwen3-14B "
96+ model_name : " unsloth/Qwen1.5-3B "
9797 per_device_train_batch_size : 4
9898 gradient_accumulation_steps : 2
9999 max_seq_length : 4096
100100
101101 gpu_40gb :
102- model_name : " unsloth/Qwen3-32B "
102+ model_name : " unsloth/Qwen1.5-3B "
103103 per_device_train_batch_size : 2
104104 gradient_accumulation_steps : 4
105105 max_seq_length : 4096
@@ -109,4 +109,4 @@ evaluation:
109109 eval_steps : 100 # Evaluation frequency
110110 eval_dataset : null # Evaluation dataset path
111111 metric_for_best_model : " loss" # Metric to track for best model
112- load_best_model_at_end : true # Load best model at end of training
112+ load_best_model_at_end : true # Load best model at end of training
0 commit comments