Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
4c6d15f
Tests and inital implementation for embed_tokens
romitjain Oct 29, 2025
4b91220
Minor fixes
romitjain Oct 30, 2025
46b803e
Fixed all tests and made updates to logic
romitjain Oct 31, 2025
37b1e06
Nit
romitjain Oct 31, 2025
8388aa8
Added contigious check for export
romitjain Nov 4, 2025
cd6c6d0
Apply suggestion from @BenjaminBossan
romitjain Nov 4, 2025
0cb44e8
Addressed PR comments
romitjain Nov 5, 2025
628ce10
Update src/peft/tuners/lora/model.py
romitjain Nov 7, 2025
602ce10
Update src/peft/tuners/lora/model.py
romitjain Nov 7, 2025
e2d0345
Apply suggestions from code review
romitjain Nov 7, 2025
7880032
Removed redundant change
romitjain Nov 7, 2025
f73af50
Merge branch 'enh/tie-target-modules' of github.com:romitjain/peft in…
romitjain Nov 7, 2025
46cca1e
Handling target_modules as str
romitjain Nov 7, 2025
2267a48
Update src/peft/tuners/tuners_utils.py
romitjain Nov 10, 2025
5d5b8e4
Updated regex matching
romitjain Nov 12, 2025
c7cfe40
Apply suggestion from @BenjaminBossan
romitjain Nov 13, 2025
8294ec7
Added find layer by tensor
romitjain Nov 13, 2025
7370a21
Merge branch 'main' of github.com:romitjain/peft into enh/tie-target-…
romitjain Nov 13, 2025
1da895f
Fixed tests
romitjain Nov 14, 2025
d86ff7d
Nit
romitjain Nov 18, 2025
dc03dd4
Small fix to ensure correct layer name gets saved for target modules
romitjain Nov 19, 2025
c79a64c
Merge branch 'main' of github.com:huggingface/peft into enh/tie-targe…
romitjain Nov 20, 2025
0715451
Merge branch 'main' of github.com:huggingface/peft into enh/tie-targe…
romitjain Dec 15, 2025
dbb0096
Apply suggestions from code review
romitjain Dec 15, 2025
06d4b7f
Merge branch 'enh/tie-target-modules' of github.com:romitjain/peft in…
romitjain Dec 15, 2025
67a71d6
Updated matching logic
romitjain Dec 15, 2025
8889558
Merge branch 'main' of github.com:romitjain/peft into enh/tie-target-…
romitjain Jan 5, 2026
9f7702f
Merge branch 'main' of github.com:huggingface/peft into enh/tie-targe…
romitjain Jan 12, 2026
4d5d681
Merge branch 'main' into enh/tie-target-modules
romitjain Jan 16, 2026
ba4d81f
Merge branch 'main' into enh/tie-target-modules
romitjain Jan 29, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/peft/peft_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,15 @@ def save_mutated_as_lora(peft_config, path_initial_model_for_weight_conversion,
output_state_dict = save_mutated_as_lora(
peft_config, path_initial_model_for_weight_conversion, output_state_dict, kwargs
)

# Before exporting the parameters we need to make sure all the tensors are contigious as saving
# non-contiguous parameters is not supported. Tensors can become non contigiuous
# if they are a transpose view of another tensor. This can happen
# during adapter tying or parameter sharing.
for k, v in output_state_dict.items():
if not v.is_contiguous():
output_state_dict[k] = v.contiguous()

safe_save_file(
output_state_dict,
os.path.join(output_dir, SAFETENSORS_WEIGHTS_NAME),
Expand Down
10 changes: 8 additions & 2 deletions src/peft/tuners/lora/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,11 @@ class LoraConfig(PeftConfig):
`target_parameters`. As an example, for Llama4, you can pass:
`target_parameters=['feed_forward.experts.gate_up_proj', 'feed_forward.experts.down_proj]`. Passing a
string for regex matching is not implemented yet.
ensure_weight_tying (`bool`, *optional*)
Whether to tie weights or not after peft initialization. This will ensure that the adapters added to the
tied layers are also tied. This is only applicable for layers passed via `modules_to_save` and
`target_modules`.

"""

r: int = field(default=8, metadata={"help": "Lora attention dimension"})
Expand Down Expand Up @@ -760,8 +765,8 @@ class LoraConfig(PeftConfig):
"help": (
"Whether to tie weights or not after peft initialization. "
"This will ensure that the adapters added to the tied layers "
"are also tied. This is applicable for layers passed via "
"`modules_to_save` and `trainable_token_indices`."
"are also tied. This is only applicable for layers passed via "
"`modules_to_save`, `target_modules` and `trainable_token_indices`."
)
},
)
Expand All @@ -786,6 +791,7 @@ def __post_init__(self):

if self.ensure_weight_tying:
self.modules_to_tie = None
self.target_modules_to_tie = None

if isinstance(self.target_parameters, str):
raise TypeError("`target_parameters` must be a list of strings or None.")
Expand Down
34 changes: 34 additions & 0 deletions src/peft/tuners/lora/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,19 @@ def update_layer(
adapter_name: str,
r: int,
lora_alpha: int,
lora_dropout,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this was added by merge commit, let me take a look and remove this if not required

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this looks like a wrong merge. #2960 refactored this code to only include lora_alpha and config instead of all those params + kwargs. lora_dropout to use_bdlora should be removed.

init_lora_weights,
use_rslora,
use_dora: bool = False,
use_alora: bool = False,
use_qalora: bool = False,
lora_bias: bool = False,
arrow_config: ArrowConfig = None,
qalora_group_size: int = 32,
inference_mode: bool = False,
tied_adapter: Optional[dict[str, nn.Parameter]] = None,
lora_ga_config=None,
use_bdlora=None,
config: LoraConfig,
**kwargs,
) -> None:
Expand Down Expand Up @@ -190,6 +203,17 @@ def update_layer(
# Actual trainable parameters
self.lora_A[adapter_name] = nn.Linear(self.in_features, r, bias=False)
self.lora_B[adapter_name] = nn.Linear(r, self.out_features, bias=lora_bias)

# Tying adapters is only implemented for Linear layers
# where the source is the embedding layer.
# Currently, this is the most prevelant way of tying layers (weight tying)
if tied_adapter:
lora_A_params = tied_adapter["lora_A"]
lora_B_params = tied_adapter["lora_B"]

self.lora_A[adapter_name].weight = torch.nn.Parameter(lora_A_params)
self.lora_B[adapter_name].weight = torch.nn.Parameter(lora_B_params)

self.lora_bias[adapter_name] = lora_bias

if use_rslora:
Expand Down Expand Up @@ -743,6 +767,16 @@ def __init__(
adapter_name,
r,
lora_alpha=lora_alpha,
lora_dropout=lora_dropout,
init_lora_weights=init_lora_weights,
use_rslora=use_rslora,
use_dora=use_dora,
use_alora=use_alora,
lora_bias=lora_bias,
arrow_config=arrow_config,
tied_adapter=kwargs.pop("tied_adapter", None),
lora_ga_config=lora_ga_config,
use_bdlora=use_bdlora,
Comment on lines +770 to +779
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove, merge artifact

config=config,
**kwargs,
)
Expand Down
106 changes: 97 additions & 9 deletions src/peft/tuners/lora/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import math
import operator
import re
import warnings
from contextlib import contextmanager
from dataclasses import replace
Expand All @@ -27,11 +28,7 @@
from torch import nn

from peft.import_utils import is_bnb_4bit_available, is_bnb_available
from peft.tuners.tuners_utils import (
BaseTuner,
BaseTunerLayer,
replicate_layers,
)
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, find_parameter_name_by_tensor, replicate_layers
from peft.utils import (
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
AuxiliaryTrainingWrapper,
Expand Down Expand Up @@ -202,13 +199,25 @@ def _create_and_replace(
r = lora_config.rank_pattern.get(r_key, lora_config.r)
alpha = lora_config.alpha_pattern.get(alpha_key, lora_config.lora_alpha)

# Checks if the target is marked as a tied layer
# If true, we add the reference to lora adapters of embedding layer in `tied_adapter`
is_tied = target_name in (getattr(lora_config, "target_modules_to_tie", []) or [])
tied_adapter = {}
if is_tied:
tied_module = self.model.get_input_embeddings()
emb_A = tied_module.lora_embedding_A[adapter_name]
emb_B = tied_module.lora_embedding_B[adapter_name]

tied_adapter = {"lora_A": emb_B.t(), "lora_B": emb_A.t()}

kwargs = {
"r": r,
"lora_alpha": alpha,
"target_name": current_key,
"loaded_in_8bit": getattr(self.model, "is_loaded_in_8bit", False),
"loaded_in_4bit": getattr(self.model, "is_loaded_in_4bit", False),
"parameter_name": parameter_name,
"tied_adapter": tied_adapter,
}

# for torchao merging, we need the get_apply_tensor_subclass from the quantization config
Expand Down Expand Up @@ -249,6 +258,7 @@ def _create_and_replace(
if adapter_name not in self.active_adapters:
# adding an additional adapter: it is not automatically trainable
new_module.requires_grad_(False)

self._replace_module(parent, target_name, new_module, target)

def _replace_module(self, parent, child_name, new_module, child):
Expand Down Expand Up @@ -857,8 +867,86 @@ def subtract_mutated_init(self, output_state_dict: dict[str, torch.Tensor], adap

return tensors_lora

def _add_modules_to_tie(self, peft_config, tied_weight_keys):
modules_to_save = set(getattr(peft_config, "modules_to_save", []) or [])
missing_keys = set(tied_weight_keys) - modules_to_save
def _add_modules_to_save_to_tie(self, peft_config: LoraConfig, tied_weight_keys: list[str]):
"""
Add embedding layer to `modules_to_save` and remove rest of the tied layers from `module_to_save`. Maintain a
separate set for layers to be tied in `peft_config.tied_weights_keys`.

Args:
peft_config (LoraConfig) -- The configuration of the Lora model.
tied_weight_keys (list[str]) -- Contains the layers tied to the embedding layer.
"""
tied_weight_keys = set(tied_weight_keys)
peft_config.modules_to_tie = tied_weight_keys

modules_to_save = getattr(peft_config, "modules_to_save", []) or []

embed_layer_name = find_parameter_name_by_tensor(self.model, self.model.get_input_embeddings())
# find_parameter_name_by_tensor returns the parameter name, so we need to strip the weight from the name
if embed_layer_name.endswith(".weight"):
embed_layer_name = embed_layer_name.removesuffix(".weight")
Comment on lines +886 to +887
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if it is ever the case that embed_layer_name ends with .weight. When does that happen? Do we have a test for that?

prefix, sep, suffix = embed_layer_name.partition(".")
if sep and "model" in prefix:
embed_layer_name = suffix

if embed_layer_name not in modules_to_save:
modules_to_save.append(embed_layer_name)
Comment on lines +892 to +893
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that it's not a problem but a reader might wonder

Suggested change
if embed_layer_name not in modules_to_save:
modules_to_save.append(embed_layer_name)
# the name suffix may already be included but we assume that adding the same module twice is not a problem
if embed_layer_name not in modules_to_save:
modules_to_save.append(embed_layer_name)


# Iterate over `tied_weight_keys` which are
# fully qualified keys and remove matching keys from
# `modules_to_save`. It will only remove first encounter
# in `module_to_save`, which should be safe, because `tied_weight_keys`
# is a unique set of keys
Comment on lines +895 to +899
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this comment but it would be vastly improved if would explain to what end we're doing this. Could you add the reason to this comment to inform the reader about why we're doing this?

for key in tied_weight_keys:
for m in modules_to_save:
if re.match(rf"(^|.*\.){m}($|\..*)", key):
modules_to_save.remove(m)
break

peft_config.modules_to_save = modules_to_save

def _add_targets_to_tie(self, peft_config: LoraConfig, tied_weight_keys: list[str]):
"""
Add embedding layer to `target_modules` and remove rest of the tied layers from `target_modules`. Maintain a
separate set for layers to be tied in `peft_config.target_modules_to_tie`

Args:
peft_config (LoraConfig) -- The configuration of the Lora model.
tied_weight_keys (list[str]) -- Contains the layers tied to the embedding layer.
"""
tied_weight_keys = set(tied_weight_keys)
peft_config.target_modules_to_tie = tied_weight_keys

raw_target_modules = getattr(peft_config, "target_modules", None)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@BenjaminBossan Please review this logic. I know this is a bit hacky! I am open to suggestions

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm yeah, this is rough. We can't really operate on the string like this, as there are too many possible ways that the regex could be formed. I wonder if we should just leave it be and deal with the tied module edge case in inject_adapter directly. I haven't fully thought this through, perhaps you already tried that and there is a caveat that I'm missing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#2879 (comment)

It should be possible, it would just make the flow very convoluted.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I redid this a bit. We just need to make sure that embed_tokens is present in the target_modules


embed_layer_name = find_parameter_name_by_tensor(self.model, self.model.get_input_embeddings())
# find_parameter_name_by_tensor returns the parameter name, so we need to strip the weight from the name
if embed_layer_name.endswith(".weight"):
embed_layer_name = embed_layer_name.removesuffix(".weight")
Comment on lines +924 to +925
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment as above

prefix, sep, suffix = embed_layer_name.partition(".")
if sep and "model" in prefix:
embed_layer_name = suffix
Comment on lines +926 to +928
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we split the fully-qualified name and make it a less precise name only to add it to the raw_target_modules where it could have been a fully-qualified name? If there's a good reason then it should be documented in a comment. If it is just escaping, use re.escape(embed_layer_name) and use that result in the new raw_target_modules string.


if isinstance(raw_target_modules, str):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that target_modules="all-linear" qualifies for this case (technically) but it probably doesn't reach this code path since it doesn't match the embedding layer. Still, I think we should have a test that makes sure that LoraConfig(target_modules="all-linear", ensure_weight_tying=True) doesn't break (if it doesn't exist already, I may have missed it!)

# The way weight tying is handled for adapters, we always want to add
# lora adapters to the input embedding layer (embed_tokens)
# instead of output embedding lauyer.
raw_target_modules = rf"(?:{raw_target_modules}|.*{embed_layer_name}$)"
peft_config.target_modules = raw_target_modules
return

peft_config.modules_to_tie = missing_keys
target_modules = set(raw_target_modules or [])
target_modules.add(embed_layer_name)

# Iterate over `tied_weight_keys` which are
# fully qualified keys and remove matching keys from
# `target_modules`. It will only remove first encounter
# in `target_modules`, which should be safe, because `tied_weight_keys`
Comment on lines +941 to +944
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above, misses the 'why'

# is a unique set of keys
for key in tied_weight_keys:
for m in target_modules:
if re.match(rf"(^|.*\.){m}($|\..*)", key):
target_modules.remove(m)
break

peft_config.target_modules = target_modules
Loading
Loading