Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL][Matrix] Added tests for new IGC flag #1653

Draft
wants to merge 1 commit into
base: intel
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/XMX8/element_wise_all_ops_bf16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <random>
Expand Down
4 changes: 3 additions & 1 deletion SYCL/Matrix/Legacy/XMX8/element_wise_all_ops_half.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: matrix-xmx8
// REQUIRES: matrix-xmx8,gpu

// Only runs on DPAS because AMX implementation does not support half data type
// yet
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <random>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/XMX8/element_wise_all_ops_int8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <random>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

// XFAIL: gpu

Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/XMX8/element_wise_irreg_sum_rows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

// this code calculates the sum of rows into a global array of number of rows
// elements. First, partial reduction is computed inside each SG, then atomic
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/XMX8/element_wise_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/XMX8/joint_matrix_bf16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/XMX8/joint_matrix_bfloat16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/XMX8/joint_matrix_bfloat16_32x64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

// XFAIL: *

Expand Down
4 changes: 3 additions & 1 deletion SYCL/Matrix/Legacy/XMX8/joint_matrix_half.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: matrix-xmx8
// REQUIRES: matrix-xmx8,gpu

// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// Only run on the GPU because half is not supported on AMX hardware
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/XMX8/joint_matrix_int8_vnni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

// XFAIL: *

Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/XMX8/joint_matrix_ss_int8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/XMX8/joint_matrix_su_int8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/XMX8/joint_matrix_us_int8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/XMX8/joint_matrix_uu_int8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/element_wise_all_ops_bf16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <random>
Expand Down
4 changes: 3 additions & 1 deletion SYCL/Matrix/Legacy/element_wise_all_ops_half.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: matrix
// REQUIRES: matrix,gpu

// Only runs on DPAS because AMX implementation does not support half data type
// yet
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <random>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/element_wise_all_ops_int8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <random>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/element_wise_all_ops_int8_packed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

// XFAIL: gpu

Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/element_wise_irreg_sum_rows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

// this code calculates the sum of rows into a global array of number of rows
// elements. First, partial reduction is computed inside each SG, then atomic
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/element_wise_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/joint_matrix_bf16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/joint_matrix_bfloat16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/joint_matrix_bfloat16_32x64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

// XFAIL: *

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

// This tests support of col major layout for matrix B which does transpose and
// then VNNI transform. This is currently only available on AMX
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

// This tests support of row major layout for matrix B which does automatic VNNI
// transform. This is currently only available on AMX
Expand Down
4 changes: 3 additions & 1 deletion SYCL/Matrix/Legacy/joint_matrix_half.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: matrix
// REQUIRES: matrix,gpu

// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// Only run on the GPU because half is not supported on AMX hardware
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

// This tests support of col major layout for matrix B which does transpose and
// then VNNI transform. This is currently only available on AMX
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/joint_matrix_int8_vnni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

// XFAIL: gpu

Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/joint_matrix_ss_int8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/joint_matrix_su_int8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/joint_matrix_us_int8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/Legacy/joint_matrix_uu_int8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=1
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/XMX8/element_wise_all_ops_bf16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <random>
Expand Down
4 changes: 3 additions & 1 deletion SYCL/Matrix/XMX8/element_wise_all_ops_half.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: matrix-xmx8
// REQUIRES: matrix-xmx8,gpu

// Only runs on DPAS because AMX implementation does not support half data type
// yet
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <random>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/XMX8/element_wise_all_ops_int8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <random>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/XMX8/element_wise_all_ops_int8_packed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

// This test stores the matrix B that is VNNIed (packed) in a row major fashion.
// This is expected to fail on the GPU because the implementation does not
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/XMX8/element_wise_irreg_sum_rows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

// this code calculates the sum of rows into a global array of number of rows
// elements. First, partial reduction is computed inside each SG, then atomic
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/XMX8/element_wise_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 2 additions & 0 deletions SYCL/Matrix/XMX8/joint_matrix_bfloat16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// RUN: %clangxx -fsycl %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=0 %GPU_RUN_PLACEHOLDER %t.out
// RUN: env IGC_JointMatrixLoadStoreOpt=1 %GPU_RUN_PLACEHOLDER %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
Loading