address comment

Danqing Wang (MPK) · Danqing Wang (MPK) · commit 34c5dee313e9 · 2025-03-18T16:05:32.000-07:00
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -94,7 +94,7 @@
     "static_llama",
     "qwen2_5",
     "phi-4-mini",
-    "smollm",
+    "smolllm2",
 ]
 TORCHTUNE_DEFINED_MODELS = ["llama3_2_vision"]
 
diff --git a/examples/models/smollm/convert_weights.py b/examples/models/smollm/convert_weights.py
@@ -42,11 +42,6 @@ def smollm_tune_to_meta(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.
         new_key = get_mapped_key(key, inverted_mapping_dict)
         converted_state_dict[new_key] = value
 
-    # Input and output embeddings are tied.
-    converted_state_dict["output.weight"] = converted_state_dict[
-        "tok_embeddings.weight"
-    ]
-
     return converted_state_dict
 
 
@@ -68,7 +63,7 @@ def main():
         checkpoint_dir=args.input_dir,
         checkpoint_files=["model.safetensors"],
         output_dir=".",
-        model_type="MISTRAL",
+        model_type="LLAMA",
     )
 
     print("Loading checkpoint...")

Original file line number	Diff line number	Diff line change
`@@ -94,7 +94,7 @@`
`94`	`94`	`"static_llama",`
`95`	`95`	`"qwen2_5",`
`96`	`96`	`"phi-4-mini",`
`97`		`- "smollm",`
	`97`	`+ "smolllm2",`
`98`	`98`	`]`
`99`	`99`	`TORCHTUNE_DEFINED_MODELS = ["llama3_2_vision"]`
`100`	`100`