huggingface · romitjain · Sep 23, 2025 · Sep 23, 2025 · Sep 29, 2025 · Sep 29, 2025
diff --git a/.gitignore b/.gitignore
@@ -143,3 +143,6 @@ wandb
 # method_comparison logs
 method_comparison/MetaMathQA/cancelled_results/
 method_comparison/MetaMathQA/temporary_results/
+
+.peft/
+test.py
diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
@@ -52,6 +52,7 @@
     SAFETENSORS_WEIGHTS_NAME,
     TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING,
     WEIGHTS_NAME,
+    ModulesToSaveWrapper,
     PeftType,
     TaskType,
     _get_batch_size,
@@ -1848,6 +1849,27 @@ def __init__(
         super().__init__(model, peft_config, adapter_name, **kwargs)
         self.base_model_prepare_inputs_for_generation = self.base_model.prepare_inputs_for_generation
 
+        # Condition to check if embedding layer (`embed_tokens`) is added
+        # in `modules_to_save` and we want to ensure the `lm_head`
+        # does not diverge from the `embed_tokens` layer
+        if (
+            peft_config.task_type == "CAUSAL_LM"
+            and hasattr(model.get_input_embeddings(), "modules_to_save")
+            and getattr(peft_config, "ensure_weight_tieing")
+        ):
+            module_keys = BaseTuner._get_tied_modules_to_save(self, model)
+            tied_module = getattr(model.get_input_embeddings().modules_to_save, adapter_name)
+
+            _set_trainable(
+                model,
+                adapter_name,
+                inference_mode=peft_config.inference_mode,
+                module_names=module_keys,
+                strict_module_check=True,
+                wrapper_cls=ModulesToSaveWrapper,
+                tied_module=tied_module,
+            )
+
     def forward(
         self,
         input_ids=None,

diff --git a/src/peft/tuners/lora/config.py b/src/peft/tuners/lora/config.py
@@ -663,6 +663,14 @@ class LoraConfig(PeftConfig):
     arrow_config: Optional[ArrowConfig] = field(
         default=None, metadata={"help": "The necessary config to apply arrow routing on the model."}
     )
+    ensure_weight_tieing: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "Whether to tie weights or not after peft initialization.Only supported for `task_type` == CAUSAL_LM"
+            )
+        },
+    )
 
     def to_dict(self):
         """

diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
@@ -51,7 +51,7 @@
 from peft.utils.peft_types import PeftType, TaskType
 
 from ..config import PeftConfig
-from ..utils import _get_submodules
+from ..utils import ModulesToSaveWrapper, _get_submodules
 from ._buffer_dict import BufferDict
 
 
@@ -1154,6 +1154,26 @@ def _get_tied_target_modules(self, model: nn.Module) -> list[str]:
                     tied_target_modules.append(target_module)
         return tied_target_modules
 
+    def _get_tied_modules_to_save(self, model: nn.Module) -> list[str]:
+        """
+        Get the list of modules that needs to be tied
+
+        For example: For models which have `embed_tokens` and `lm_head` as the tied keys this function will return
+        [`lm_head`]
+        """
+        model_config = self.get_model_config(model)
+        if (
+            model_config.get("tie_word_embeddings", False)
+            and model._tied_weights_keys is not None
+            and isinstance(model.get_input_embeddings(), ModulesToSaveWrapper)
+        ):
+            # Get the original reference of the `ModulesToSaveWrapper` for the embedding layer
+            module_keys = [".".join(n.split(".")[:-1]) for n in model._tied_weights_keys]
+
+            return module_keys
+
+        return []
+
     def __getattr__(self, name: str):
         """Forward missing attributes to the wrapped module."""
         try:

diff --git a/src/peft/utils/other.py b/src/peft/utils/other.py
@@ -506,10 +506,10 @@ class ModulesToSaveWrapper(AuxiliaryTrainingWrapper):
     # All names of layers that may contain adapter (trainable) weights
     adapter_layer_names: tuple[str, ...] = ("modules_to_save",)
 
-    def __init__(self, module_to_save, adapter_name):
-        super().__init__(module_to_save, adapter_name)
+    def __init__(self, module_to_save, adapter_name, tied_module=None):
+        super().__init__(module_to_save, adapter_name, tied_module=tied_module)
 
-    def init_modules(self, adapter_name):
+    def init_modules(self, adapter_name, **kwargs):
         # we treat each adapter separately, so we have multiple adapters, same (copied) module for each
         self.modules_to_save = torch.nn.ModuleDict({})
 
@@ -546,9 +546,17 @@ def update(self, adapter_name, **kwargs):
                 context_manager = deepspeed.zero.GatheredParameters(self.original_module.parameters(), modifier_rank=0)
                 break
 
+        tied_module = kwargs.get("tied_module", None)
+
         if adapter_name not in self.modules_to_save:
             with context_manager:
-                self.modules_to_save[adapter_name] = copy.deepcopy(self.original_module)
+                if tied_module:
+                    new_linear = torch.nn.Linear(*tied_module.weight.shape, bias=False)
+                    new_linear.weight = tied_module.weight
+
+                    self.modules_to_save[adapter_name] = new_linear
+                else:
+                    self.modules_to_save[adapter_name] = copy.deepcopy(self.original_module)
 
         if hasattr(self.modules_to_save[adapter_name], "_hf_hook"):
             old_hook = self.modules_to_save[adapter_name]._hf_hook