openvinotoolkit · wilson-seok · Jun 5, 2025 · Jun 10, 2025 · ahnyoung-paul · Jun 9, 2025
@@ -30,6 +30,32 @@ namespace ov::intel_gpu {
 
 namespace {
 
+bool is_valid_order(const std::vector<size_t>& target_order, size_t dims) {
+    static const std::vector<std::vector<size_t>> allowed_orders_4d = {
+        {0, 1, 2, 3},
+        {0, 1, 3, 2},
+        {1, 2, 3, 0},
+        {0, 2, 1, 3},
+        {0, 3, 1, 2},
+        {1, 2, 0, 3},
+        {2, 0, 1, 3},
+        {3, 0, 1, 2}
+    };
+
+    static const std::vector<std::vector<size_t>> allowed_orders_3d = {
 if (dims.size() > order.size()) { 
 if (dims.size() > order.size()) { 
+        {0, 1, 2, 3},
+        {0, 1, 3, 2},
+        {1, 2, 3, 0},
+        {0, 2, 1, 3},
+        {1, 2, 0, 3},
+        {2, 0, 1, 3},
+        {3, 0, 1, 2}
+    };
+
+    const auto& allowed_orders = (dims < 4) ? allowed_orders_3d : allowed_orders_4d;
+    return cldnn::one_of(target_order, allowed_orders);
+}
+
 bool has_optimized_version(const ov::Output<ov::Node>& output, bool supports_immad) {
     if (!output.get_element_type().is_real())
         return false;
@@ -42,17 +68,6 @@ bool has_optimized_version(const ov::Output<ov::Node>& output, bool supports_imm
         return false;
 
     auto transpose_order = ov::as_type_ptr<ov::op::v0::Constant>(order_node)->cast_vector<int64_t>();
-    static const std::vector<std::vector<size_t>> allowed_orders = {
-        {0, 1, 2, 3},
-        {0, 1, 3, 2},
-        {1, 2, 3, 0},
-        {0, 2, 1, 3},
-        {0, 3, 1, 2},
-        {1, 2, 0, 3},
-        {2, 0, 1, 3},
-        {3, 0, 1, 2},
-    };
-
     const auto expected_dims_num = 4;
 
     std::vector<size_t> order(std::begin(transpose_order), std::end(transpose_order));
@@ -67,10 +82,8 @@ bool has_optimized_version(const ov::Output<ov::Node>& output, bool supports_imm
     for (size_t i = 0; i < order.size(); ++i) {
         target_permute_order[order[i]] = i;
     }
-    if (!cldnn::one_of(target_permute_order, allowed_orders))
-        return false;
 
-    return true;
+    return is_valid_order(target_permute_order, transpose_order.size());
 }
 }  // namespace
 

@@ -90,4 +90,93 @@ INSTANTIATE_TEST_SUITE_P(smoke_TransposeMatMulFusion_basic,
                          TransposeMatmulFuseTest,
                          ::testing::Combine(::testing::ValuesIn(allowed_order), ::testing::ValuesIn(input_precisions)),
                          TransposeMatmulFuseTest::getTestCaseName);
+
+using TransposesOrderParams = std::tuple<std::vector<int64_t>,  // transpose_a orders
+                                        std::vector<int64_t>,  // transpose_c orders
+                                        ov::element::Type>;    // input precision
+class TransposeMatmulTransposeFuse3DTest : public ::testing::Test, public testing::WithParamInterface<TransposesOrderParams> {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<TransposesOrderParams> obj) {
+        std::vector<int64_t> target_order_a;
+        std::vector<int64_t> target_order_c;
+        ov::element::Type input_precision;
+
+        std::tie(target_order_a, target_order_c, input_precision) = obj.param;
+
+        std::ostringstream result;
+        result << "transpose_a_order=[";
+        for (const auto& order : target_order_a) {
+            result << order << "_";
+        }
+        result << "transpose_c_order=[";
+        for (const auto& order : target_order_c) {
+            result << order << "_";
+        }
+        result << "]_input_precision=" << input_precision;
+        return result.str();
+    }
+
+protected:
+    std::shared_ptr<ov::Model> init_subgraph(ov::element::Type& input_precision,
+                                             const std::vector<int64_t>& target_transpose_order_a,
+                                             const std::vector<int64_t>& target_transpose_order_c) {
+        ov::PartialShape input_a_shape = ov::PartialShape{-1, -1, -1};
+        ov::PartialShape input_b_shape = ov::PartialShape{-1, -1, -1};
+
+        auto input_a = std::make_shared<ov::op::v0::Parameter>(input_precision, input_a_shape);
+        auto input_b = std::make_shared<ov::op::v0::Parameter>(input_precision, input_b_shape);
+
+        auto transpose_order_a = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{target_transpose_order_a.size()}, target_transpose_order_a);
+        auto transpose_a = std::make_shared<ov::op::v1::Transpose>(input_a, transpose_order_a);
+
+        auto matmul = std::make_shared<ov::op::v0::MatMul>(transpose_a, input_b, false, false);
+
+        auto transpose_order_c = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{target_transpose_order_c.size()}, target_transpose_order_c);
+        auto transpose_c = std::make_shared<ov::op::v1::Transpose>(matmul, transpose_order_c);
+
+        auto model = std::make_shared<ov::Model>(ov::NodeVector{transpose_c}, ov::ParameterVector{input_a, input_b});
+        return model;
+    }
+
+private:
+    ov::element::Type input_precision = ov::element::f16;
+    std::vector<int64_t> order = {0, 1, 2};
+};
+
+TEST_P(TransposeMatmulTransposeFuse3DTest, smoke_allowed_transposes_order) {
+    std::vector<int64_t> target_order_a;
+    std::vector<int64_t> target_order_c;
+    ov::element::Type input_precision;
+    std::tie(target_order_a, target_order_c, input_precision) = GetParam();
+    auto function = init_subgraph(input_precision, target_order_a, target_order_c);
+
+    std::string targetDevice = ov::test::utils::DEVICE_GPU;
+    ov::Shape input_a_shape = {10, 2, 32};
+    ov::Shape input_b_shape = {2, 32, 32};
+
+    auto input_tensor_a = ov::test::utils::create_and_fill_tensor(input_precision, input_a_shape, 0.0f, 1.0f);
+    auto input_tensor_b = ov::test::utils::create_and_fill_tensor(input_precision, input_b_shape, 0.0f, 1.0f);
+
+    auto core = ov::test::utils::PluginCache::get().core();
+    ov::CompiledModel cM = core->compile_model(function, targetDevice, {ov::hint::inference_precision(input_precision)});
+    auto request = cM.create_infer_request();
+    request.set_input_tensor(0, ov::Tensor(input_precision, input_a_shape, input_tensor_a.data()));
+    request.set_input_tensor(1, ov::Tensor(input_precision, input_b_shape, input_tensor_b.data()));
+    request.infer();
+}
+
+const std::vector<std::vector<int64_t>> allowed_order_a_3d = {
+    {1, 0, 2},
+};
+
+const std::vector<std::vector<int64_t>> allowed_order_c_3d = {
+    {1, 2, 0}
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_TransposeMatMulFusion_basic,
+                         TransposeMatmulTransposeFuse3DTest,
+                         ::testing::Combine(::testing::ValuesIn(allowed_order_a_3d),
+                                            ::testing::ValuesIn(allowed_order_c_3d),
+                                            ::testing::ValuesIn(input_precisions)),
+                         TransposeMatmulTransposeFuse3DTest::getTestCaseName);
 }  // namespace