FIX Improved handling of conv groups (huggingface#2567)

SP1029 · efraimdahl · commit 8476c23bec6f · 2025-07-12T13:08:41.000+02:00
More generalized handling of groups argument in LoRA/DoRA conv layers (previous solution: huggingface#2403).
diff --git a/src/peft/tuners/lora/dora.py b/src/peft/tuners/lora/dora.py
@@ -49,7 +49,8 @@ def update_layer(self, *, base_layer, lora_A, lora_B, scaling, place_on_cpu=Fals
 
             weight = dequantize_module_weight(base_layer)
             if weight.data.ndim >= 3:  # For handling LoRAs applied to Conv layers.
-                lora_weight = torch.mm(lora_B.flatten(start_dim=1), lora_A.flatten(start_dim=1))
+                r = lora_A.shape[0]
+                lora_weight = torch.mm(lora_B.view([-1, r]), lora_A.view([r, -1]))
                 lora_weight = lora_weight.reshape(weight.shape)
             else:
                 lora_weight = lora_B @ lora_A
@@ -145,7 +146,8 @@ def forward(self, x, *, lora_A, lora_B, scaling, base_layer, base_result=None):
         output.
         """
         weight = base_layer.weight
-        lora_weight = torch.mm(lora_B.weight.flatten(start_dim=1), lora_A.weight.flatten(start_dim=1))
+        r = lora_A.weight.shape[0]
+        lora_weight = torch.mm(lora_B.weight.view([-1, r]), lora_A.weight.view([r, -1]))
         lora_weight = lora_weight.reshape(weight.shape)
         magnitude = self.weight
         weight_norm = self.get_weight_norm(weight, lora_weight.detach(), scaling)
diff --git a/src/peft/tuners/lora/layer.py b/src/peft/tuners/lora/layer.py
@@ -1078,6 +1078,13 @@ def __init__(
         if base_layer.groups > 1:
             warnings.warn("LoRA adapter added to ConvNd layer with groups > 1. Merging is not supported.")
 
+        if r % base_layer.groups != 0:
+            raise ValueError(
+                f"Targeting a {base_layer.__class__.__name__} with groups={base_layer.groups} and rank {r}. "
+                "Currently, support is limited to conv layers where the rank is divisible by groups. "
+                "Either choose a different rank or do not target this specific layer."
+            )
+
         self._active_adapter = adapter_name
         self._kernel_dim = base_layer.weight.dim()
 
@@ -1123,7 +1130,7 @@ def update_layer(
         out_kernel = out_stride = (1,) * (self._kernel_dim - 2)
         self.lora_A[adapter_name] = conv_layer(self.in_features, r, kernel_size, stride, padding, bias=False)
         self.lora_B[adapter_name] = conv_layer(
-            r, self.out_features // base_layer.groups, out_kernel, out_stride, bias=lora_bias
+            r, self.out_features, out_kernel, out_stride, groups=base_layer.groups, bias=lora_bias
         )
         self.lora_bias[adapter_name] = lora_bias
 
diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py
@@ -118,7 +118,9 @@
     ("Conv2d 1 LoRA with DoRA", "Conv2d", LoraConfig, {"target_modules": ["conv2d"], "use_dora": True}),
     ("Conv2d 2 LoRA with DoRA", "Conv2d", LoraConfig, {"target_modules": ["conv2d", "lin0"], "use_dora": True}),
     ("Conv2d Groups LoRA", "Conv2dGroups", LoraConfig, {"target_modules": ["conv2d"]}),
+    ("Conv2d Groups2 LoRA", "Conv2dGroups2", LoraConfig, {"target_modules": ["conv2d"]}),
     ("Conv2d Groups LoRA with DoRA", "Conv2dGroups", LoraConfig, {"target_modules": ["conv2d"], "use_dora": True}),
+    ("Conv2d Groups2 LoRA with DoRA", "Conv2dGroups2", LoraConfig, {"target_modules": ["conv2d"], "use_dora": True}),
     ("Conv3d 1 LoRA", "Conv3d", LoraConfig, {"target_modules": ["conv3d"]}),
     ("Conv3d 2 LoRA", "Conv3d", LoraConfig, {"target_modules": ["conv3d", "lin0"]}),
     ("Conv3d 1 LoRA with DoRA", "Conv3d", LoraConfig, {"target_modules": ["conv3d"], "use_dora": True}),
@@ -1082,16 +1084,43 @@ def forward(self, X):
 class ModelConv2DGroups(nn.Module):
     def __init__(self):
         super().__init__()
-        self.conv2d = nn.Conv2d(5, 5, 3, groups=5)
+        self.lin0 = nn.Linear(90, 288)
+        # groups is set as 8 since default r=8
+        # hence to make r divisible by groups
+        self.conv2d = nn.Conv2d(16, 16, 3, groups=8)
         self.relu = nn.ReLU()
         self.flat = nn.Flatten()
-        self.lin0 = nn.Linear(5, 2)
+        self.lin1 = nn.Linear(16, 2)
         self.sm = nn.LogSoftmax(dim=-1)
         self.dtype = torch.float
 
     def forward(self, X):
         X = X.to(self.dtype)
-        X = X.reshape(-1, 5, 3, 3)
+        X = X.flatten()
+        X = self.lin0(X)
+        X = X.reshape(2, 16, 3, 3)
+        X = self.conv2d(X)
+        X = self.relu(X)
+        X = self.flat(X)
+        X = self.lin1(X)
+        X = self.sm(X)
+        return X
+
+
+class ModelConv2DGroups2(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv2d = nn.Conv2d(16, 32, 3, padding=1, groups=2)
+        self.relu = nn.ReLU()
+        self.flat = nn.Flatten()
+        self.lin0 = nn.Linear(12800, 2)
+        self.sm = nn.LogSoftmax(dim=-1)
+        self.dtype = torch.float
+
+    def forward(self, X):
+        # Note: needs a different input shape, thus ignore original input
+        X = torch.arange(9 * 16 * 20 * 20).view([9, 16, 20, 20]).to(self.conv2d.weight.device)
+        X = X.to(self.dtype)
         X = self.conv2d(X)
         X = self.relu(X)
         X = self.flat(X)
@@ -1170,6 +1199,9 @@ def from_pretrained(cls, model_id, torch_dtype=None):
         if model_id == "Conv2dGroups":
             return ModelConv2DGroups().to(torch_dtype)
 
+        if model_id == "Conv2dGroups2":
+            return ModelConv2DGroups2().to(torch_dtype)
+
         if model_id == "Conv3d":
             return ModelConv3D().to(torch_dtype)
 
@@ -1242,7 +1274,7 @@ def test_load_multiple_adapters(self, test_name, model_id, config_cls, config_kw
     @pytest.mark.parametrize("test_name, model_id, config_cls, config_kwargs", TEST_CASES)
     def test_merge_layers(self, test_name, model_id, config_cls, config_kwargs):
         # https://github.com/huggingface/peft/pull/2403
-        if model_id in ["Conv2dGroups"]:
+        if model_id in ["Conv2dGroups", "Conv2dGroups2"]:
             pytest.skip(
                 f"Skipping test for {model_id} as merging is not supported. (See https://github.com/huggingface/peft/pull/2403 for details)"
             )
@@ -1265,7 +1297,7 @@ def test_merge_layers(self, test_name, model_id, config_cls, config_kwargs):
     @pytest.mark.parametrize("test_name, model_id, config_cls, config_kwargs", TEST_CASES)
     def test_merge_layers_fp16(self, test_name, model_id, config_cls, config_kwargs):
         # https://github.com/huggingface/peft/pull/2403
-        if model_id in ["Conv2dGroups"]:
+        if model_id in ["Conv2dGroups", "Conv2dGroups2"]:
             pytest.skip(
                 f"Skipping test for {model_id} as merging is not supported. (See https://github.com/huggingface/peft/pull/2403 for details)"
             )
@@ -1280,7 +1312,7 @@ def test_merge_layers_fp16(self, test_name, model_id, config_cls, config_kwargs)
     @pytest.mark.parametrize("test_name, model_id, config_cls, config_kwargs", TEST_CASES)
     def test_merge_layers_is_idempotent(self, test_name, model_id, config_cls, config_kwargs):
         # https://github.com/huggingface/peft/pull/2403
-        if model_id in ["Conv2dGroups"]:
+        if model_id in ["Conv2dGroups", "Conv2dGroups2"]:
             pytest.skip(
                 f"Skipping test for {model_id} as merging is not supported. (See https://github.com/huggingface/peft/pull/2403 for details)"
             )
@@ -1296,7 +1328,7 @@ def test_merge_layers_is_idempotent(self, test_name, model_id, config_cls, confi
     @pytest.mark.parametrize("test_name, model_id, config_cls, config_kwargs", TEST_CASES)
     def test_safe_merge(self, test_name, model_id, config_cls, config_kwargs):
         # https://github.com/huggingface/peft/pull/2403
-        if model_id in ["Conv2dGroups"]:
+        if model_id in ["Conv2dGroups", "Conv2dGroups2"]:
             pytest.skip(
                 f"Skipping test for {model_id} as merging is not supported. (See https://github.com/huggingface/peft/pull/2403 for details)"
             )
@@ -1390,7 +1422,7 @@ def test_forward_float16(self, test_name, model_id, config_cls, config_kwargs):
         # check that none of this raises an error
         model(**X)
 
-        if model_id in ["Conv2dGroups"]:
+        if model_id in ["Conv2dGroups", "Conv2dGroups2"]:
             # this model does not support merging
             return
 
@@ -1432,7 +1464,7 @@ def test_forward_bfloat16(self, test_name, model_id, config_cls, config_kwargs):
         # check that none of this raises an error
         model(**X)
 
-        if model_id in ["Conv2dGroups"]:
+        if model_id in ["Conv2dGroups", "Conv2dGroups2"]:
             # this model does not support merging
             return
 
@@ -1473,7 +1505,7 @@ def test_forward_float16_no_autocast(self, test_name, model_id, config_cls, conf
         # check that none of this raises an error
         model(**X)
 
-        if model_id in ["Conv2dGroups"]:
+        if model_id in ["Conv2dGroups", "Conv2dGroups2"]:
             # this model does not support merging
             return
 
@@ -1514,7 +1546,7 @@ def test_forward_bfloat16_no_autocast(self, test_name, model_id, config_cls, con
         # check that none of this raises an error
         model(**X)
 
-        if model_id in ["Conv2dGroups"]:
+        if model_id in ["Conv2dGroups", "Conv2dGroups2"]:
             # this model does not support merging
             return
 
@@ -1685,7 +1717,7 @@ def test_disable_adapters(self, test_name, model_id, config_cls, config_kwargs):
     @pytest.mark.parametrize("test_name, model_id, config_cls, config_kwargs", TEST_CASES)
     def test_disable_adapters_with_merging(self, test_name, model_id, config_cls, config_kwargs):
         # https://github.com/huggingface/peft/pull/2403
-        if model_id in ["Conv2dGroups"]:
+        if model_id in ["Conv2dGroups", "Conv2dGroups2"]:
             pytest.skip(
                 f"Skipping test for {model_id} as merging is not supported. (See https://github.com/huggingface/peft/pull/2403 for details)"
             )
diff --git a/tests/test_initialization.py b/tests/test_initialization.py
@@ -1324,6 +1324,79 @@ def test_lora_incompatible_mamba_modules(self):
         with pytest.raises(ValueError, match=msg):
             get_peft_model(model, config)
 
+    def get_model_conv2d_groups(self):
+        class ModelConv2DGroups(nn.Module):
+            """For testing when groups argument is used in conv layer"""
+
+            def __init__(self):
+                super().__init__()
+                self.conv2d = nn.Conv2d(16, 32, 3, padding=1, groups=2)
+                self.relu = nn.ReLU()
+                self.flat = nn.Flatten()
+                self.lin0 = nn.Linear(12800, 2)
+                self.sm = nn.LogSoftmax(dim=-1)
+                self.dtype = torch.float
+
+            def forward(self, X):
+                # This is ignoring input since main usage is for checking raising of error when peft is applied
+                X = torch.arange(9 * 16 * 20 * 20).view([9, 16, 20, 20]).to(self.conv2d.weight.device)
+                X = X.to(self.dtype)
+                X = self.conv2d(X)
+                X = self.relu(X)
+                X = self.flat(X)
+                X = self.lin0(X)
+                X = self.sm(X)
+                return X
+
+        return ModelConv2DGroups().eval().to(self.torch_device)
+
+    @pytest.mark.parametrize(
+        "config_cls, config_kwargs",
+        [
+            pytest.param(LoraConfig, {"r": 8, "target_modules": ["conv2d"]}, id="lora with rank divisible by groups"),
+            pytest.param(LoraConfig, {"r": 2, "target_modules": ["conv2d"]}, id="lora with rank equal to groups"),
+            pytest.param(
+                LoraConfig, {"r": 1, "target_modules": ["conv2d"]}, id="lora with rank not divisible by groups"
+            ),
+            pytest.param(
+                LoraConfig,
+                {"r": 8, "target_modules": ["conv2d"], "use_dora": True},
+                id="dora with rank divisible by groups",
+            ),
+            pytest.param(
+                LoraConfig,
+                {"r": 2, "target_modules": ["conv2d"], "use_dora": True},
+                id="dora with rank equal to groups",
+            ),
+            pytest.param(
+                LoraConfig,
+                {"r": 1, "target_modules": ["conv2d"], "use_dora": True},
+                id="dora with rank not divisible by groups",
+            ),
+        ],
+    )
+    def test_error_raised_if_rank_not_divisible_by_groups(self, config_cls, config_kwargs):
+        # This test checks if error is raised when rank is not divisible by groups for conv layer since
+        # currently, support is limited to conv layers where the rank is divisible by groups in lora and dora
+        base_model = self.get_model_conv2d_groups()
+        peft_config = config_cls(**config_kwargs)
+        r = config_kwargs["r"]
+        base_layer = base_model.conv2d
+        groups = base_layer.groups
+        if r % groups != 0:
+            with pytest.raises(
+                ValueError,
+                match=(
+                    f"Targeting a {base_layer.__class__.__name__} with groups={base_layer.groups} and rank {r}. "
+                    "Currently, support is limited to conv layers where the rank is divisible by groups. "
+                    "Either choose a different rank or do not target this specific layer."
+                ),
+            ):
+                peft_model = get_peft_model(base_model, peft_config)
+        else:
+            # No error should be raised
+            peft_model = get_peft_model(base_model, peft_config)
+
 
 class TestLokrInitialization:
     torch_device = infer_device()