Skip to content

Commit 8e83d81

Browse files
authored
Merge branch 'huggingface:main' into adaption_prompt_edits
2 parents 0a84e1a + b34d8a2 commit 8e83d81

File tree

8 files changed

+141
-12
lines changed

8 files changed

+141
-12
lines changed

setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from setuptools import find_packages, setup
1616

1717

18-
VERSION = "0.14.1.dev0"
18+
VERSION = "0.15.0"
1919

2020
extras = {}
2121
extras["quality"] = [
@@ -87,7 +87,7 @@
8787
)
8888

8989
# Release checklist
90-
# 1. Change the version in __init__.py and setup.py to the release version, e.g. from "0.6.0.dev0" to "0.6.0"
90+
# 1. Change the version in __init__.py and setup.py to the release version, e.g. from "0.6.1.dev0" to "0.7.0"
9191
# 2. Check if there are any deprecations that need to be addressed for this release by searching for "# TODO" in the code
9292
# 3. Commit these changes with the message: "Release: VERSION", create a PR and merge it.
9393
# 4. Add a tag in git to mark the release: "git tag -a VERSION -m 'Adds tag VERSION for pypi' "
@@ -107,4 +107,4 @@
107107
# twine upload dist/* -r pypi
108108
# 9. Add release notes to the tag on https://github.com/huggingface/peft/releases once everything is looking hunky-dory.
109109
# Check the notes here: https://docs.google.com/document/d/1k-sOIfykuKjWcOIALqjhFKz4amFEp-myeJUJEzNgjoU/edit?usp=sharing
110-
# 10. Update the version in __init__.py, setup.py to the bumped minor version + ".dev0" (e.g. from "0.6.0" to "0.7.0.dev0")
110+
# 10. Update the version in __init__.py, setup.py to the bumped patch version + ".dev0" (e.g. from "0.7.0" to "0.7.1.dev0")

src/peft/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
__version__ = "0.14.1.dev0"
15+
__version__ = "0.15.0"
1616

1717
from .auto import (
1818
MODEL_TYPE_TO_PEFT_MODEL_MAPPING,

src/peft/tuners/tuners_utils.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -452,9 +452,13 @@ def inject_adapter(
452452
# quite a lot. See: https://github.com/huggingface/diffusers/issues/9297
453453
# As there is a small chance for undiscovered bugs, we apply this optimization only if the list of
454454
# target_modules is sufficiently big.
455+
# We also exclude IA³ from this optimization. This is because IA³ has both target_modules and
456+
# feedforward_modules, which are coupled (the latter must be a subset). It would be possible to change the logic
457+
# to keep both in sync, but it's not quite trivial and probably not worth the effort. See #2429.
455458
if (
456459
isinstance(peft_config.target_modules, (list, set))
457-
and len(peft_config.target_modules) >= MIN_TARGET_MODULES_FOR_OPTIMIZATION
460+
and (len(peft_config.target_modules) >= MIN_TARGET_MODULES_FOR_OPTIMIZATION)
461+
and (peft_config.peft_type != PeftType.IA3)
458462
):
459463
names_no_target = [
460464
name
@@ -469,6 +473,13 @@ def inject_adapter(
469473
if not key:
470474
continue
471475
# Check for modules_to_save in case
476+
#
477+
# Note that this is redundant with PeftModel.set_additional_trainable_models but might be necessary
478+
# when calling inject_adapter without a PEFT model. This is outdated as it only focuses on
479+
# ModulesToSaveWrapper and ignores other potentially configured AuxiliaryTrainingWrapper instances.
480+
#
481+
# TODO: determine if there's a good reason for this and refactor to support AuxiliaryTrainingWrapper,
482+
# or remove if superfluous.
472483
if _check_for_modules_to_save and any(
473484
key.endswith(module_to_save) for module_to_save in peft_config.modules_to_save
474485
):

src/peft/utils/other.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,10 @@ def update(self, adapter_name, **kwargs):
499499
add_hook_to_module(self.modules_to_save[adapter_name], new_hook)
500500

501501
self.original_module.requires_grad_(False)
502+
503+
# note that there currently cannot be more than one active adapter for the same layer with modules to save
504+
# since there would be no clear way to decide which adapter's weights are the correct ones. therefore we
505+
# assume that there is only one active adapter. this precondition is enforced by _set_adapter.
502506
if adapter_name == self.active_adapter:
503507
self.modules_to_save[adapter_name].requires_grad_(True)
504508

@@ -550,6 +554,10 @@ def adapter_state_dict_load_map(self, adapter_name):
550554
return {k: f"modules_to_save.{adapter_name}.{k}" for k in self.adapter_state_dict(adapter_name)}
551555

552556
def adapter_state_dict(self, adapter_name):
557+
if adapter_name not in self._adapters:
558+
# In caes of multiple adapters, each bringing their own modules to save, each
559+
# ModulesToSaveWrapper will be queried but not every wrapper is obliged to serve the same adapters.
560+
return {}
553561
return self.modules_to_save[adapter_name].state_dict()
554562

555563
def unload_and_optionally_merge_module(
@@ -732,6 +740,7 @@ def _set_trainable(
732740
found_modules = set()
733741
# disable removal of duplicates to support targeting tied weights
734742
key_list = [key for key, _ in model.named_modules(remove_duplicate=False)]
743+
735744
for key in key_list:
736745
target_module_found = any(key.endswith(target_key) for target_key in module_names)
737746
if target_module_found:
@@ -776,6 +785,7 @@ def check_adapter_name(adapter_name):
776785
# if the adapter is found in this module, set it as the active adapter, else disable the adapters of this
777786
# module
778787
if adapter_name in module._adapters:
788+
module.enable_adapters(True)
779789
module.set_adapter(adapter_name)
780790
else:
781791
module.enable_adapters(False)

src/peft/utils/save_and_load.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import torch
2222
from huggingface_hub import file_exists, hf_hub_download
2323
from huggingface_hub.errors import EntryNotFoundError, LocalEntryNotFoundError
24-
from packaging import version
2524
from safetensors.torch import load_file as safe_load_file
2625

2726
from peft.mapping import PEFT_TYPE_TO_PREFIX_MAPPING
@@ -468,15 +467,13 @@ def renamed_dora_weights(k):
468467
return load_result
469468

470469

470+
# TODO: remove this function, use vanilla torch.load as soon as torch < 2.6.0 is no longer supported
471471
def torch_load(*args, weights_only=True, **kwargs):
472472
"""Call torch.load and handle weights_only.
473473
474474
Defaults to weights_only=True to anticipate upcoming switch on the PyTorch side.
475475
476476
"""
477-
# TODO: weights_only was added in 1.13, remove if 1.12 no longer needs to be supported
478-
if version.parse(torch.__version__) < version.parse("1.13"):
479-
return torch.load(*args, **kwargs)
480477
return torch.load(*args, weights_only=weights_only, **kwargs)
481478

482479

tests/test_custom_models.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1236,6 +1236,7 @@ def test_disable_adapters(self, test_name, model_id, config_cls, config_kwargs):
12361236
outputs_base = model(**X)
12371237
if issubclass(config_cls, (FourierFTConfig, TrainableTokensConfig)):
12381238
config_kwargs = config_kwargs.copy()
1239+
# override the default value and make PEFT operation a no-op
12391240
config_kwargs["init_weights"] = True
12401241
config = config_cls(
12411242
base_model_name_or_path=model_id,
@@ -1255,9 +1256,9 @@ def test_disable_adapters(self, test_name, model_id, config_cls, config_kwargs):
12551256
model.train()
12561257
# EmbConv1D is slow to learn for some reason
12571258
lr = 0.01 if model_id != "EmbConv1D" else 1.0
1258-
if isinstance(config_cls, LNTuningConfig):
1259-
# LayerNorm tuning is slow to learn
1260-
lr = 1.0
1259+
if isinstance(config, TrainableTokensConfig):
1260+
# TrainableTokens is only changing a small subset, so we need a higher lr to see the difference
1261+
lr = 2.0
12611262
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
12621263

12631264
# train at least 3 steps for all parameters to be updated (probably this is required because of symmetry

tests/test_other.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import copy
1516

1617
import pytest
1718
import torch
@@ -107,6 +108,84 @@ def test_get_peft_model_revision_warning(tmp_path):
107108
_ = get_peft_model(base_model, lora_config, revision=overwrite_revision)
108109

109110

111+
def test_load_multiple_adapters_different_modules_to_save(tmp_path):
112+
# This tests the error described in #2422 where loading multiple adapters with different modules_to_save
113+
# attributes fails (due to a regression from #2376).
114+
115+
model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-random-LlamaForCausalLM")
116+
117+
def peft_config(**kwargs):
118+
return LoraConfig(target_modules="all-linear", **kwargs)
119+
120+
original_model = copy.deepcopy(model)
121+
122+
peft_config_0 = peft_config(modules_to_save=["0.post_attention_layernorm"])
123+
peft_config_1 = peft_config(modules_to_save=["0.post_attention_layernorm"])
124+
peft_config_2 = peft_config(modules_to_save=["1.post_attention_layernorm"])
125+
126+
# Save adapter 0, nothing fancy, should be equal to base model weighs
127+
peft_model = get_peft_model(copy.deepcopy(original_model), peft_config_0)
128+
peft_model.save_pretrained(tmp_path / "adapter_0")
129+
130+
# Save adapter 1, modules to save weights are modified randomly, should be unique to adapter 1
131+
peft_model = get_peft_model(copy.deepcopy(original_model), peft_config_1)
132+
peft_model.model.model.layers[0].post_attention_layernorm.weight.data = torch.rand_like(
133+
peft_model.model.model.layers[0].post_attention_layernorm.weight.data
134+
)
135+
adapter_1_saved = peft_model.model.model.layers[0].post_attention_layernorm.weight.data.clone()
136+
peft_model.save_pretrained(tmp_path / "adapter_1")
137+
138+
# Save adapter 2, modules to save weights are modified randomly, should be unique to adapter 2
139+
peft_model = get_peft_model(copy.deepcopy(original_model), peft_config_2)
140+
peft_model.model.model.layers[1].post_attention_layernorm.weight.data = torch.rand_like(
141+
peft_model.model.model.layers[1].post_attention_layernorm.weight.data
142+
)
143+
adapter_2_saved = peft_model.model.model.layers[1].post_attention_layernorm.weight.data.clone()
144+
peft_model.save_pretrained(tmp_path / "adapter_2")
145+
146+
del peft_model
147+
148+
combined_model = PeftModel.from_pretrained(original_model, tmp_path / "adapter_0", adapter_name="adapter_0")
149+
combined_model.load_adapter(tmp_path / "adapter_1", adapter_name="adapter_1")
150+
combined_model.load_adapter(tmp_path / "adapter_2", adapter_name="adapter_2")
151+
152+
# For adapter 0 we expect every mentioned modules to save layer of this test to be equal to the original model
153+
# since we didn't modify it for adapter 0 and only adapter 0 is active.
154+
combined_model.set_adapter("adapter_0")
155+
assert torch.allclose(
156+
combined_model.model.model.layers[0].post_attention_layernorm.weight,
157+
original_model.model.layers[0].post_attention_layernorm.weight,
158+
)
159+
assert torch.allclose(
160+
combined_model.model.model.layers[1].post_attention_layernorm.weight,
161+
original_model.model.layers[1].post_attention_layernorm.weight,
162+
)
163+
164+
# For adapter 1 we expect that the modified module to save 0.post_attention_layernorm is modified, the other
165+
# module to save layers mentioned above should be untouched.
166+
combined_model.set_adapter("adapter_1")
167+
assert torch.allclose(
168+
combined_model.model.model.layers[0].post_attention_layernorm.weight,
169+
adapter_1_saved,
170+
)
171+
assert torch.allclose(
172+
combined_model.model.model.layers[1].post_attention_layernorm.weight,
173+
original_model.model.layers[1].post_attention_layernorm.weight,
174+
)
175+
176+
# For adapter 2 we expect its module to save layer (1.post_attention_layernorm) to be modified but the other
177+
# module to save weights should be kept original.
178+
combined_model.set_adapter("adapter_2")
179+
assert torch.allclose(
180+
combined_model.model.model.layers[0].post_attention_layernorm.weight,
181+
original_model.model.layers[0].post_attention_layernorm.weight,
182+
)
183+
assert torch.allclose(
184+
combined_model.model.model.layers[1].post_attention_layernorm.weight,
185+
adapter_2_saved,
186+
)
187+
188+
110189
class TestModulesToSaveAttributeAccess:
111190
"""Test attribute accces on the ModulesToSaveWrapper class.
112191

tests/test_tuners_utils.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
IA3Config,
3838
LoHaConfig,
3939
LoraConfig,
40+
PeftModel,
4041
PromptTuningConfig,
4142
VeraConfig,
4243
get_layer_status,
@@ -1502,6 +1503,36 @@ def __init__(self):
15021503
# target modules should *not* be simplified to "query" as that would match "single_transformers_blocks" too
15031504
assert model.peft_config["default"].target_modules != {"query"}
15041505

1506+
def test_find_minimal_target_modules_does_not_error_with_ia3(self, tmp_path):
1507+
# See #2429
1508+
# There is an issue with the compression of the target_modules attribute when using IA³. There, we additionally
1509+
# have the feedforward_modules attribute, which must be subset of target_modules. When target_modules is shrunk,
1510+
# the subset check will fail. This test ensures that this doesn't happen.
1511+
n_layers = MIN_TARGET_MODULES_FOR_OPTIMIZATION + 1
1512+
1513+
class InnerModule(nn.Module):
1514+
def __init__(self):
1515+
super().__init__()
1516+
self.query = nn.Linear(10, 10)
1517+
1518+
class OuterModule(nn.Module):
1519+
def __init__(self):
1520+
super().__init__()
1521+
self.blocks = nn.ModuleList([InnerModule() for _ in range(n_layers)])
1522+
1523+
target_modules = [f"blocks.{i}.query" for i in range(n_layers)]
1524+
feedforward_modules = [f"blocks.{i}.query" for i in range(n_layers)]
1525+
# the subset check happens here
1526+
config = IA3Config(target_modules=target_modules, feedforward_modules=feedforward_modules)
1527+
# the optimization step happens here, after the subset check, so at first we're fine, but we will run into an
1528+
# issue after a save/load roundtrip
1529+
model = get_peft_model(OuterModule(), config)
1530+
model.save_pretrained(tmp_path)
1531+
del model
1532+
1533+
# does not raise
1534+
PeftModel.from_pretrained(OuterModule(), tmp_path)
1535+
15051536

15061537
class TestRankAndAlphaPattern:
15071538
@pytest.fixture

0 commit comments

Comments
 (0)