add reshape_activations_m to convert_weight_compressed_conv1x1_to_matmul pattern to resolve activation reshape from [1,1,num_head,head_dim] to [1,hidden_in,1,1] situation

bopeng1234 · bopeng1234 · commit eb718d679736 · 2026-02-04T12:53:54.000+08:00
diff --git a/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp b/src/common/transformations/src/transformations/op_conversions/convert_weight_compressed_conv1x1_to_matmul.cpp
@@ -40,7 +40,9 @@ ov::pass::ConvertWeightCompressedConv1x1ToMatmul::ConvertWeightCompressedConv1x1
     auto first_input_m = ov::pass::pattern::any_input();
     auto a_order_m = ov::pass::pattern::wrap_type<ov::op::v0::Constant>();
     auto transpose_activations_m = ov::pass::pattern::wrap_type<ov::op::v1::Transpose>({first_input_m, a_order_m});
-    auto reshape_activations_m = ov::pass::pattern::wrap_type<ov::op::v1::Reshape>({first_input_m, a_order_m});
+    auto reshape_activations_m =
+        ov::pass::pattern::wrap_type<ov::op::v1::Reshape>({first_input_m, a_order_m},
+                                                          pattern::shape_matches("[?, hidden_in, 1, 1]"));
     auto a_m =
         std::make_shared<ov::pass::pattern::op::Or>(OutputVector{transpose_activations_m, reshape_activations_m});
 
@@ -211,6 +213,26 @@ ov::pass::ConvertWeightCompressedConv1x1ToMatmul::ConvertWeightCompressedConv1x1
             scaled_weight = final_weight_reshape;
         }
 
+        // When activation is reshaped to [?, hidden_in, 1, 1], two possible cases:
+        // 1. reshape from [..., hidden_in]
+        //    direct use it in matmul.
+        // 2. reshape from [..., num_head, head_dim]
+        //    can't use it directly, need reshape it to [..., hidden_in], then use in matmul.
+        if (pattern_map.count(reshape_activations_m)) {
+            auto reshape_activations = pattern_map.at(reshape_activations_m).get_node_shared_ptr();
+            auto shape_in = reshape_activations->get_input_partial_shape(0);
+            auto shape_out = reshape_activations->get_output_partial_shape(0);
+            if (shape_in[-1].is_dynamic() || shape_in[-1].get_length() != shape_out[1].get_length()) {
+                auto reshape_const =
+                    std::make_shared<ov::op::v0::Constant>(ov::element::i64,
+                                                           ov::Shape{4},
+                                                           std::vector<int64_t>{1, 1, -1, shape_out[1].get_length()});
+                auto reshape_activations_new = std::make_shared<ov::op::v1::Reshape>(activation, reshape_const, false);
+                ov::copy_runtime_info(reshape_activations, reshape_activations_new);
+                activation = reshape_activations_new;
+            }
+        }
+
         auto matmul = std::make_shared<ov::op::v0::MatMul>(activation, scaled_weight, false, true);
         ov::copy_runtime_info(conv1x1, matmul);
         std::shared_ptr<Node> matmul_out;
diff --git a/src/common/transformations/tests/op_conversions/convert_weight_compressed_conv1x1_to_matmul_test.cpp b/src/common/transformations/tests/op_conversions/convert_weight_compressed_conv1x1_to_matmul_test.cpp
@@ -40,17 +40,21 @@ struct Conv1x1ToMatmulTestParams {
     bool with_bias;
     bool with_convert;
     bool with_param_weight;
+    bool with_act_new_reshape;
     std::string activation_op_type;
 };
 
 std::shared_ptr<ov::Model> gen_model(const Conv1x1ToMatmulTestParams& p) {
-    auto input = std::make_shared<ov::opset1::Parameter>(ov::element::f16, ov::Shape{1, 1, 2, 10});
+    auto input = std::make_shared<ov::opset1::Parameter>(
+        ov::element::f16,
+        (p.activation_op_type == "Reshape" && p.with_act_new_reshape) ? ov::Shape{1, 1, 2, 5} : ov::Shape{1, 1, 1, 10});
+
     std::shared_ptr<ov::Node> act_node;
     if (p.activation_op_type == "Transpose") {
         auto transpose_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {0, 3, 1, 2});
         act_node = std::make_shared<ov::opset1::Transpose>(input, transpose_const);
     } else {
-        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {1, 10, 1, 2});
+        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {1, 10, 1, 1});
         act_node = std::make_shared<ov::opset1::Reshape>(input, reshape_const, false);
     }
 
@@ -114,15 +118,17 @@ std::shared_ptr<ov::Model> gen_model(const Conv1x1ToMatmulTestParams& p) {
         auto transpose_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {0, 2, 3, 1});
         out_node = std::make_shared<ov::opset1::Transpose>(current_node, transpose_const);
     } else {
-        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {1, 1, 2, 15});
+        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {1, 1, 1, 15});
         out_node = std::make_shared<ov::opset1::Reshape>(current_node, reshape_const, false);
     }
 
     return std::make_shared<ov::Model>(ov::OutputVector{out_node}, params);
 }
 
 std::shared_ptr<ov::Model> gen_model_ref(const Conv1x1ToMatmulTestParams& p) {
-    auto input = std::make_shared<ov::opset1::Parameter>(ov::element::f16, ov::Shape{1, 1, 2, 10});
+    auto input = std::make_shared<ov::opset1::Parameter>(
+        ov::element::f16,
+        (p.activation_op_type == "Reshape" && p.with_act_new_reshape) ? ov::Shape{1, 1, 2, 5} : ov::Shape{1, 1, 1, 10});
 
     std::shared_ptr<ov::Node> weights_node;
     ov::ParameterVector params = {input};
@@ -162,7 +168,12 @@ std::shared_ptr<ov::Model> gen_model_ref(const Conv1x1ToMatmulTestParams& p) {
         mul = std::make_shared<ov::opset1::Reshape>(mul, reshape_const, false);
     }
 
-    auto matmul = std::make_shared<ov::op::v0::MatMul>(input, mul, false, true);
+    std::shared_ptr<ov::Node> act_node = input;
+    if (p.activation_op_type == "Reshape" && p.with_act_new_reshape) {
+        auto reshape_const = ov::opset1::Constant::create(ov::element::i64, ov::Shape{4}, {1, 1, 1, 10});
+        act_node = std::make_shared<ov::opset1::Reshape>(input, reshape_const, false);
+    }
+    auto matmul = std::make_shared<ov::op::v0::MatMul>(act_node, mul, false, true);
     current_node = matmul;
 
     if (p.with_bias) {
@@ -175,7 +186,7 @@ std::shared_ptr<ov::Model> gen_model_ref(const Conv1x1ToMatmulTestParams& p) {
 
     std::shared_ptr<ov::Node> out_node;
     if (p.activation_op_type == "Reshape") {
-        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {1, 1, 2, 15});
+        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{4}, {1, 1, 1, 15});
         out_node = std::make_shared<ov::opset1::Reshape>(current_node, reshape_const, false);
     } else {
         out_node = current_node;
@@ -187,33 +198,45 @@ std::shared_ptr<ov::Model> gen_model_ref(const Conv1x1ToMatmulTestParams& p) {
 
 class ConvertWeightCompressedConv1x1ToMatmulTest
     : public TransformationTestsF,
-      public WithParamInterface<std::tuple<bool, bool, bool, bool, bool, std::string>> {
+      public WithParamInterface<std::tuple<bool, bool, bool, bool, bool, bool, std::string>> {
 public:
     static std::string get_test_case_name(
-        const testing::TestParamInfo<std::tuple<bool, bool, bool, bool, bool, std::string>>& obj) {
-        const auto& [with_group_quant, with_zp, with_bias, with_convert, with_param_weight, activation_op_type] =
-            obj.param;
+        const testing::TestParamInfo<std::tuple<bool, bool, bool, bool, bool, bool, std::string>>& obj) {
+        const auto& [with_group_quant,
+                     with_zp,
+                     with_bias,
+                     with_convert,
+                     with_param_weight,
+                     with_act_new_reshape,
+                     activation_op_type] = obj.param;
 
         std::ostringstream result;
         result << "with_group_quant=" << with_group_quant << "_";
         result << "with_zp=" << with_zp << "_";
         result << "with_bias=" << with_bias << "_";
         result << "with_convert=" << with_convert << "_";
         result << "with_param_weight=" << with_param_weight << "_";
+        result << "with_act_new_reshape=" << with_act_new_reshape << "_";
         result << "activation_op_type=" << activation_op_type;
         return result.str();
     }
 
 protected:
     void SetUp() override {
         TransformationTestsF::SetUp();
-        const auto& [with_group_quant, with_zp, with_bias, with_convert, with_param_weight, activation_op_type] =
-            GetParam();
+        const auto& [with_group_quant,
+                     with_zp,
+                     with_bias,
+                     with_convert,
+                     with_param_weight,
+                     with_act_new_reshape,
+                     activation_op_type] = GetParam();
         Conv1x1ToMatmulTestParams params{with_group_quant,
                                          with_zp,
                                          with_bias,
                                          with_convert,
                                          with_param_weight,
+                                         with_act_new_reshape,
                                          activation_op_type};
         model = gen_model(params);
         model_ref = gen_model_ref(params);
@@ -230,6 +253,7 @@ INSTANTIATE_TEST_SUITE_P(TransformationTests,
                                             ::testing::Bool(),
                                             ::testing::Bool(),
                                             ::testing::Bool(),
+                                            ::testing::Bool(),
                                             ::testing::Values("Transpose", "Reshape")),
                          ConvertWeightCompressedConv1x1ToMatmulTest::get_test_case_name);