Skip to content

[GPU] Add allowed order for 3d transpose fusion in transformation #30873

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,32 @@ namespace ov::intel_gpu {

namespace {

bool is_valid_order(const std::vector<size_t>& target_order, size_t dims) {
static const std::vector<std::vector<size_t>> allowed_orders_4d = {
{0, 1, 2, 3},
{0, 1, 3, 2},
{1, 2, 3, 0},
{0, 2, 1, 3},
{0, 3, 1, 2},
{1, 2, 0, 3},
{2, 0, 1, 3},
{3, 0, 1, 2}
};

static const std::vector<std::vector<size_t>> allowed_orders_3d = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does it have a 4-dimensional order instead of a 3-dimensional order, even though it is a 3D checking order?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ahnyoung-paul The 3D order is converted to 4D in onednn side

. So I use 4D order.

Copy link
Contributor

@michal-miotk michal-miotk Jun 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: hmm maybe there is someway to create common list for 3d and 4d orders to avoid comparison by future reader(programmer) of this code or at least maybe add what these lists differ(0, 3, 1, 2) at end of allowed_orders_4d

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ahnyoung-paul @michal-miotk I modified this order check per input/output transpose of matmul not 3d/4d. It is aligned with onednn gemm input/output transpose order whitelists.

{0, 1, 2, 3},
{0, 1, 3, 2},
{1, 2, 3, 0},
{0, 2, 1, 3},
{1, 2, 0, 3},
{2, 0, 1, 3},
{3, 0, 1, 2}
};

const auto& allowed_orders = (dims < 4) ? allowed_orders_3d : allowed_orders_4d;
return cldnn::one_of(target_order, allowed_orders);
}

bool has_optimized_version(const ov::Output<ov::Node>& output, bool supports_immad) {
if (!output.get_element_type().is_real())
return false;
Expand All @@ -42,17 +68,6 @@ bool has_optimized_version(const ov::Output<ov::Node>& output, bool supports_imm
return false;

auto transpose_order = ov::as_type_ptr<ov::op::v0::Constant>(order_node)->cast_vector<int64_t>();
static const std::vector<std::vector<size_t>> allowed_orders = {
{0, 1, 2, 3},
{0, 1, 3, 2},
{1, 2, 3, 0},
{0, 2, 1, 3},
{0, 3, 1, 2},
{1, 2, 0, 3},
{2, 0, 1, 3},
{3, 0, 1, 2},
};

const auto expected_dims_num = 4;

std::vector<size_t> order(std::begin(transpose_order), std::end(transpose_order));
Expand All @@ -67,10 +82,8 @@ bool has_optimized_version(const ov::Output<ov::Node>& output, bool supports_imm
for (size_t i = 0; i < order.size(); ++i) {
target_permute_order[order[i]] = i;
}
if (!cldnn::one_of(target_permute_order, allowed_orders))
return false;

return true;
return is_valid_order(target_permute_order, transpose_order.size());
}
} // namespace

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,93 @@ INSTANTIATE_TEST_SUITE_P(smoke_TransposeMatMulFusion_basic,
TransposeMatmulFuseTest,
::testing::Combine(::testing::ValuesIn(allowed_order), ::testing::ValuesIn(input_precisions)),
TransposeMatmulFuseTest::getTestCaseName);

using TransposesOrderParams = std::tuple<std::vector<int64_t>, // transpose_a orders
std::vector<int64_t>, // transpose_c orders
ov::element::Type>; // input precision
class TransposeMatmulTransposeFuse3DTest : public ::testing::Test, public testing::WithParamInterface<TransposesOrderParams> {
public:
static std::string getTestCaseName(testing::TestParamInfo<TransposesOrderParams> obj) {
std::vector<int64_t> target_order_a;
std::vector<int64_t> target_order_c;
ov::element::Type input_precision;

std::tie(target_order_a, target_order_c, input_precision) = obj.param;

std::ostringstream result;
result << "transpose_a_order=[";
for (const auto& order : target_order_a) {
result << order << "_";
}
result << "transpose_c_order=[";
for (const auto& order : target_order_c) {
result << order << "_";
}
result << "]_input_precision=" << input_precision;
return result.str();
}

protected:
std::shared_ptr<ov::Model> init_subgraph(ov::element::Type& input_precision,
const std::vector<int64_t>& target_transpose_order_a,
const std::vector<int64_t>& target_transpose_order_c) {
ov::PartialShape input_a_shape = ov::PartialShape{-1, -1, -1};
ov::PartialShape input_b_shape = ov::PartialShape{-1, -1, -1};

auto input_a = std::make_shared<ov::op::v0::Parameter>(input_precision, input_a_shape);
auto input_b = std::make_shared<ov::op::v0::Parameter>(input_precision, input_b_shape);

auto transpose_order_a = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{target_transpose_order_a.size()}, target_transpose_order_a);
auto transpose_a = std::make_shared<ov::op::v1::Transpose>(input_a, transpose_order_a);

auto matmul = std::make_shared<ov::op::v0::MatMul>(transpose_a, input_b, false, false);

auto transpose_order_c = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{target_transpose_order_c.size()}, target_transpose_order_c);
auto transpose_c = std::make_shared<ov::op::v1::Transpose>(matmul, transpose_order_c);

auto model = std::make_shared<ov::Model>(ov::NodeVector{transpose_c}, ov::ParameterVector{input_a, input_b});
return model;
}

private:
ov::element::Type input_precision = ov::element::f16;
std::vector<int64_t> order = {0, 1, 2};
};

TEST_P(TransposeMatmulTransposeFuse3DTest, smoke_allowed_transposes_order) {
std::vector<int64_t> target_order_a;
std::vector<int64_t> target_order_c;
ov::element::Type input_precision;
std::tie(target_order_a, target_order_c, input_precision) = GetParam();
auto function = init_subgraph(input_precision, target_order_a, target_order_c);

std::string targetDevice = ov::test::utils::DEVICE_GPU;
ov::Shape input_a_shape = {10, 2, 32};
ov::Shape input_b_shape = {2, 32, 32};

auto input_tensor_a = ov::test::utils::create_and_fill_tensor(input_precision, input_a_shape, 0.0f, 1.0f);
auto input_tensor_b = ov::test::utils::create_and_fill_tensor(input_precision, input_b_shape, 0.0f, 1.0f);

auto core = ov::test::utils::PluginCache::get().core();
ov::CompiledModel cM = core->compile_model(function, targetDevice, {ov::hint::inference_precision(input_precision)});
auto request = cM.create_infer_request();
request.set_input_tensor(0, ov::Tensor(input_precision, input_a_shape, input_tensor_a.data()));
request.set_input_tensor(1, ov::Tensor(input_precision, input_b_shape, input_tensor_b.data()));
request.infer();
}

const std::vector<std::vector<int64_t>> allowed_order_a_3d = {
{1, 0, 2},
};

const std::vector<std::vector<int64_t>> allowed_order_c_3d = {
{1, 2, 0}
};

INSTANTIATE_TEST_SUITE_P(smoke_TransposeMatMulFusion_basic,
TransposeMatmulTransposeFuse3DTest,
::testing::Combine(::testing::ValuesIn(allowed_order_a_3d),
::testing::ValuesIn(allowed_order_c_3d),
::testing::ValuesIn(input_precisions)),
TransposeMatmulTransposeFuse3DTest::getTestCaseName);
} // namespace
Loading