diff --git a/llvm/include/llvm/SYCLLowerIR/SYCLUtils.h b/llvm/include/llvm/SYCLLowerIR/SYCLUtils.h index 241859a47426d..c9ebcdae53f4b 100644 --- a/llvm/include/llvm/SYCLLowerIR/SYCLUtils.h +++ b/llvm/include/llvm/SYCLLowerIR/SYCLUtils.h @@ -22,6 +22,7 @@ namespace llvm { namespace sycl { namespace utils { constexpr char ATTR_SYCL_MODULE_ID[] = "sycl-module-id"; +constexpr char ATTR_SYCL_OPTLEVEL[] = "sycl-optlevel"; using CallGraphNodeAction = ::std::function; using CallGraphFunctionFilter = diff --git a/llvm/test/tools/sycl-post-link/assert/property-1.ll b/llvm/test/tools/sycl-post-link/assert/property-1.ll index 57f76e414b3a8..81e7c674187be 100644 --- a/llvm/test/tools/sycl-post-link/assert/property-1.ll +++ b/llvm/test/tools/sycl-post-link/assert/property-1.ll @@ -12,9 +12,9 @@ ; RUN: FileCheck %s -input-file=%t_0.prop --implicit-check-not TheKernel2 ; ; RUN: sycl-post-link -split=kernel -symbols -S < %s -o %t.table -; RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes=CHECK-K1 -; RUN: FileCheck %s -input-file=%t_1.prop --check-prefixes=CHECK-K2 -; RUN: FileCheck %s -input-file=%t_2.prop --check-prefixes=CHECK-K3 +; RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes=CHECK-K3 +; RUN: FileCheck %s -input-file=%t_1.prop --check-prefixes=CHECK-K1 +; RUN: FileCheck %s -input-file=%t_2.prop --check-prefixes=CHECK-K2 ; SYCL source: ; void foo() { diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll index 687e50e0f6ffd..faec71a602ffd 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll @@ -10,29 +10,29 @@ ; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M1-IR \ +; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M2-IR \ +; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M2-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M1-SYMS \ +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M1-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M2-SYMS \ +; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M2-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 ; RUN: sycl-post-link -split=source -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M1-IR \ +; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M2-IR \ +; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M2-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M1-SYMS \ +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M1-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M2-SYMS \ +; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M2-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 ; RUN: sycl-post-link -split=kernel -symbols -S < %s -o %t.table diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll index 83d32c1688608..773424fa91fcb 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll @@ -21,12 +21,12 @@ ; CHECK-TABLE-NEXT: _2.sym ; CHECK-TABLE-EMPTY: -; CHECK-M0-SYMS: kernel0 +; CHECK-M0-SYMS: kernel3 -; CHECK-M1-SYMS: kernel3 +; CHECK-M1-SYMS: kernel1 +; CHECK-M1-SYMS: kernel2 -; CHECK-M2-SYMS: kernel1 -; CHECK-M2-SYMS: kernel2 +; CHECK-M2-SYMS: kernel0 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" target triple = "spir64-unknown-linux" diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-1.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-1.ll index c9fe37276faae..fa5ffe782a7db 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-1.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-1.ll @@ -10,29 +10,29 @@ ; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M1-IR \ +; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M2-IR \ +; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M2-IR \ ; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M1-SYMS \ +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M1-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel2 -; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M2-SYMS \ +; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M2-SYMS \ ; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 ; RUN: sycl-post-link -split=source -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M1-IR \ +; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel2 -; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M2-IR \ +; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M2-IR \ ; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M1-SYMS \ +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M1-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel2 -; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M2-SYMS \ +; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M2-SYMS \ ; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 ; RUN: sycl-post-link -split=kernel -symbols -S < %s -o %t.table diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-2.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-2.ll index 533de091641ca..cb38a596a7ba9 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-2.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-2.ll @@ -4,16 +4,16 @@ ; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE ; -; RUN: FileCheck %s -input-file=%t_1.sym --check-prefix CHECK-M0-SYMS \ -; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 \ +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix CHECK-M0-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel2 +; +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefix CHECK-M1-SYMS \ +; RUN: --implicit-check-not kernel1 --implicit-check-not kernel3 \ ; RUN: --implicit-check-not kernel2 ; ; RUN: FileCheck %s -input-file=%t_2.sym --check-prefix CHECK-M2-SYMS \ -; RUN: --implicit-check-not kernel0 --implicit-check-not kernel3 -; -; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix CHECK-M1-SYMS \ ; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 \ -; RUN: --implicit-check-not kernel3 +; RUN: --implicit-check-not kernel0 ; CHECK-TABLE: Code ; CHECK-TABLE-NEXT: _0.sym @@ -21,12 +21,12 @@ ; CHECK-TABLE-NEXT: _2.sym ; CHECK-TABLE-EMPTY: -; CHECK-M0-SYMS: kernel3 +; CHECK-M0-SYMS: kernel1 +; CHECK-M0-SYMS: kernel2 ; CHECK-M1-SYMS: kernel0 -; CHECK-M2-SYMS: kernel1 -; CHECK-M2-SYMS: kernel2 +; CHECK-M2-SYMS: kernel3 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" target triple = "spir64-unknown-linux" diff --git a/llvm/test/tools/sycl-post-link/device-code-split/split-with-kernel-declarations.ll b/llvm/test/tools/sycl-post-link/device-code-split/split-with-kernel-declarations.ll index 9b56190a71d06..595427a786e7b 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/split-with-kernel-declarations.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/split-with-kernel-declarations.ll @@ -8,9 +8,9 @@ ; ; RUN: sycl-post-link -split=kernel -symbols -S < %s -o %t1.table ; RUN: FileCheck %s -input-file=%t1.table --check-prefix CHECK-PER-KERNEL-TABLE -; RUN: FileCheck %s -input-file=%t1_0.sym --check-prefix CHECK-PER-KERNEL-SYM0 -; RUN: FileCheck %s -input-file=%t1_1.sym --check-prefix CHECK-PER-KERNEL-SYM1 -; RUN: FileCheck %s -input-file=%t1_2.sym --check-prefix CHECK-PER-KERNEL-SYM2 +; RUN: FileCheck %s -input-file=%t1_0.sym --check-prefix CHECK-PER-KERNEL-SYM1 +; RUN: FileCheck %s -input-file=%t1_1.sym --check-prefix CHECK-PER-KERNEL-SYM2 +; RUN: FileCheck %s -input-file=%t1_2.sym --check-prefix CHECK-PER-KERNEL-SYM0 ; With per-source split, there should be two device images ; CHECK-PER-SOURCE-TABLE: [Code|Properties|Symbols] diff --git a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_kernels_in_one_module.ll b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_kernels_in_one_module.ll index 02bc8a42def25..03ccc2f5f696b 100644 --- a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_kernels_in_one_module.ll +++ b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_kernels_in_one_module.ll @@ -1,6 +1,6 @@ ; RUN: sycl-post-link --device-globals --split=source -S < %s -o %t.files.table -; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefix CHECK-MOD0 -; RUN: FileCheck %s -input-file=%t.files_1.ll --check-prefix CHECK-MOD1 +; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefix CHECK-MOD1 +; RUN: FileCheck %s -input-file=%t.files_1.ll --check-prefix CHECK-MOD0 ; This test is intended to check that sycl-post-link generates no errors ; when a device global variable with the 'device_image_scope' property diff --git a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_global.ll b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_global.ll index fd141fbaead7a..3b94a48cb3e90 100644 --- a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_global.ll +++ b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_global.ll @@ -1,7 +1,7 @@ ; RUN: sycl-post-link --device-globals --split=source -S < %s -o %t.files.table -; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefix CHECK-MOD0 -; RUN: FileCheck %s -input-file=%t.files_1.ll --check-prefix CHECK-MOD1 -; RUN: FileCheck %s -input-file=%t.files_2.ll --check-prefix CHECK-MOD2 +; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefix CHECK-MOD2 +; RUN: FileCheck %s -input-file=%t.files_1.ll --check-prefix CHECK-MOD0 +; RUN: FileCheck %s -input-file=%t.files_2.ll --check-prefix CHECK-MOD1 ; This test is intended to check that sycl-post-link generates no error if the ; 'device_image_scope' property is attached to not a device global variable. diff --git a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_img_scope.ll b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_img_scope.ll index 5162e0f0456d3..847ecbc2b102d 100644 --- a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_img_scope.ll +++ b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_img_scope.ll @@ -1,7 +1,7 @@ ; RUN: sycl-post-link --device-globals --split=source -S < %s -o %t.files.table -; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefix CHECK-MOD0 -; RUN: FileCheck %s -input-file=%t.files_1.ll --check-prefix CHECK-MOD1 -; RUN: FileCheck %s -input-file=%t.files_2.ll --check-prefix CHECK-MOD2 +; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefix CHECK-MOD2 +; RUN: FileCheck %s -input-file=%t.files_1.ll --check-prefix CHECK-MOD0 +; RUN: FileCheck %s -input-file=%t.files_2.ll --check-prefix CHECK-MOD1 ; ModuleID = 'llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_img_scope.ll' source_filename = "llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_img_scope.ll" diff --git a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_two_vars_ok.ll b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_two_vars_ok.ll index b06ad62371ae4..9afd208726d79 100644 --- a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_two_vars_ok.ll +++ b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_two_vars_ok.ll @@ -1,6 +1,6 @@ ; RUN: sycl-post-link --device-globals --split=source -S < %s -o %t.files.table -; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefix CHECK-MOD0 -; RUN: FileCheck %s -input-file=%t.files_1.ll --check-prefix CHECK-MOD1 +; RUN: FileCheck %s -input-file=%t.files_1.ll --check-prefix CHECK-MOD0 +; RUN: FileCheck %s -input-file=%t.files_2.ll --check-prefix CHECK-MOD1 ; This test is intended to check that sycl-post-link generates no errors ; when each device global variable with the 'device_image_scope' property diff --git a/llvm/test/tools/sycl-post-link/device-requirements/aspects.ll b/llvm/test/tools/sycl-post-link/device-requirements/aspects.ll index 730b97e139748..98e92650e516f 100644 --- a/llvm/test/tools/sycl-post-link/device-requirements/aspects.ll +++ b/llvm/test/tools/sycl-post-link/device-requirements/aspects.ll @@ -17,8 +17,8 @@ ; RUN: FileCheck %s -input-file=%t.files_0.prop --check-prefix CHECK-PROP-AUTO-SPLIT ; RUN: sycl-post-link -split=kernel < %s -o %t.files.table -; RUN: FileCheck %s -input-file=%t.files_0.prop --check-prefix CHECK-PROP-KERNEL-SPLIT-0 -; RUN: FileCheck %s -input-file=%t.files_1.prop --check-prefix CHECK-PROP-KERNEL-SPLIT-1 +; RUN: FileCheck %s -input-file=%t.files_0.prop --check-prefix CHECK-PROP-KERNEL-SPLIT-1 +; RUN: FileCheck %s -input-file=%t.files_1.prop --check-prefix CHECK-PROP-KERNEL-SPLIT-0 ; CHECK-PROP-AUTO-SPLIT: [SYCL/device requirements] ; CHECK-PROP-AUTO-SPLIT-NEXT: aspects=2|gCAAAAAAAAAAAAAABAAAAYAAAAQCAAAAMAAAAA diff --git a/llvm/test/tools/sycl-post-link/emit_exported_symbols.ll b/llvm/test/tools/sycl-post-link/emit_exported_symbols.ll index 167b3a8dc6558..49a94eefa2575 100644 --- a/llvm/test/tools/sycl-post-link/emit_exported_symbols.ll +++ b/llvm/test/tools/sycl-post-link/emit_exported_symbols.ll @@ -6,17 +6,17 @@ ; ; Per-module split ; RUN: sycl-post-link -symbols -split=source -emit-exported-symbols -S < %s -o %t.per_module.files.table -; RUN: FileCheck %s -input-file=%t.per_module.files_0.prop -implicit-check-not="NotExported" --check-prefix=CHECK-PERMODULE-0-PROP -; RUN: FileCheck %s -input-file=%t.per_module.files_1.prop -implicit-check-not="NotExported" --check-prefix=CHECK-KERNELONLY-PROP +; RUN: FileCheck %s -input-file=%t.per_module.files_0.prop -implicit-check-not="NotExported" --check-prefix=CHECK-KERNELONLY-PROP +; RUN: FileCheck %s -input-file=%t.per_module.files_1.prop -implicit-check-not="NotExported" --check-prefix=CHECK-PERMODULE-0-PROP ; RUN: FileCheck %s -input-file=%t.per_module.files_2.prop -implicit-check-not="NotExported" --check-prefix=CHECK-PERMODULE-2-PROP ; ; Per-kernel split ; RUN: sycl-post-link -symbols -split=kernel -emit-exported-symbols -S < %s -o %t.per_kernel.files.table -; RUN: FileCheck %s -input-file=%t.per_kernel.files_0.prop --implicit-check-not="NotExported" --check-prefix=CHECK-PERKERNEL-0-PROP -; RUN: FileCheck %s -input-file=%t.per_kernel.files_1.prop --implicit-check-not="NotExported" --check-prefix=CHECK-PERKERNEL-1-PROP -; RUN: FileCheck %s -input-file=%t.per_kernel.files_2.prop --implicit-check-not="NotExported" --check-prefix=CHECK-PERKERNEL-2-PROP -; RUN: FileCheck %s -input-file=%t.per_kernel.files_3.prop --implicit-check-not="NotExported" --check-prefix=CHECK-KERNELONLY-PROP -; RUN: FileCheck %s -input-file=%t.per_kernel.files_4.prop --implicit-check-not="NotExported" --check-prefix=CHECK-KERNELONLY-PROP +; RUN: FileCheck %s -input-file=%t.per_kernel.files_0.prop --implicit-check-not="NotExported" --check-prefix=CHECK-KERNELONLY-PROP +; RUN: FileCheck %s -input-file=%t.per_kernel.files_1.prop --implicit-check-not="NotExported" --check-prefix=CHECK-KERNELONLY-PROP +; RUN: FileCheck %s -input-file=%t.per_kernel.files_2.prop --implicit-check-not="NotExported" --check-prefix=CHECK-PERKERNEL-0-PROP +; RUN: FileCheck %s -input-file=%t.per_kernel.files_3.prop --implicit-check-not="NotExported" --check-prefix=CHECK-PERKERNEL-1-PROP +; RUN: FileCheck %s -input-file=%t.per_kernel.files_4.prop --implicit-check-not="NotExported" --check-prefix=CHECK-PERKERNEL-2-PROP target triple = "spir64-unknown-unknown" diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll b/llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll index 8a1acc02cf756..6bae075384405 100644 --- a/llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll +++ b/llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll @@ -9,16 +9,16 @@ ; RUN: sycl-post-link -split=source -symbols -split-esimd -lower-esimd -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table -; RUN: FileCheck %s -input-file=%t_esimd_large_grf_1.ll --check-prefixes CHECK-ESIMD-LargeGRF-IR --implicit-check-not='RegisterAllocMode' -; RUN: FileCheck %s -input-file=%t_esimd_large_grf_1.prop --check-prefixes CHECK-ESIMD-LargeGRF-PROP -; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYCL-SYM -; RUN: FileCheck %s -input-file=%t_esimd_0.sym --check-prefixes CHECK-ESIMD-SYM -; RUN: FileCheck %s -input-file=%t_esimd_large_grf_1.sym --check-prefixes CHECK-ESIMD-LargeGRF-SYM +; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-ESIMD-LargeGRF-IR --implicit-check-not='RegisterAllocMode' +; RUN: FileCheck %s -input-file=%t_esimd_0.prop --check-prefixes CHECK-ESIMD-LargeGRF-PROP +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYCL-SYM +; RUN: FileCheck %s -input-file=%t_esimd_1.sym --check-prefixes CHECK-ESIMD-SYM +; RUN: FileCheck %s -input-file=%t_esimd_0.sym --check-prefixes CHECK-ESIMD-LargeGRF-SYM ; CHECK: [Code|Properties|Symbols] -; CHECK: {{.*}}esimd-large-grf.ll.tmp_0.ll|{{.*}}esimd-large-grf.ll.tmp_0.prop|{{.*}}esimd-large-grf.ll.tmp_0.sym -; CHECK: {{.*}}esimd-large-grf.ll.tmp_esimd_0.ll|{{.*}}esimd-large-grf.ll.tmp_esimd_0.prop|{{.*}}esimd-large-grf.ll.tmp_esimd_0.sym +; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym ; CHECK: {{.*}}_1.ll|{{.*}}_1.prop|{{.*}}_1.sym +; CHECK: {{.*}}_esimd_1.ll|{{.*}}_esimd_1.prop|{{.*}}_esimd_1.sym ; CHECK-ESIMD-LargeGRF-PROP: isEsimdImage=1|1 ; CHECK-ESIMD-LargeGRF-PROP: isLargeGRF=1|1 diff --git a/llvm/test/tools/sycl-post-link/sycl-large-grf.ll b/llvm/test/tools/sycl-post-link/sycl-large-grf.ll index cb76430e8c4a2..105926200b0e9 100644 --- a/llvm/test/tools/sycl-post-link/sycl-large-grf.ll +++ b/llvm/test/tools/sycl-post-link/sycl-large-grf.ll @@ -9,13 +9,13 @@ ; RUN: sycl-post-link -split=source -symbols -split-esimd -lower-esimd -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table -; RUN: FileCheck %s -input-file=%t_large_grf_1.ll --check-prefixes CHECK-LARGE-GRF-IR -; RUN: FileCheck %s -input-file=%t_large_grf_1.prop --check-prefixes CHECK-LARGE-GRF-PROP -; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYCL-SYM -; RUN: FileCheck %s -input-file=%t_large_grf_1.sym --check-prefixes CHECK-LARGE-GRF-SYM +; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-LARGE-GRF-IR +; RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes CHECK-LARGE-GRF-PROP +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYCL-SYM +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-LARGE-GRF-SYM ; CHECK: [Code|Properties|Symbols] -; CHECK: {{.*}}-large-grf.ll.tmp_0.ll|{{.*}}-large-grf.ll.tmp_0.prop|{{.*}}-large-grf.ll.tmp_0.sym +; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym ; CHECK: {{.*}}_1.ll|{{.*}}_1.prop|{{.*}}_1.sym ; CHECK-LARGE-GRF-PROP: isLargeGRF=1|1 diff --git a/llvm/test/tools/sycl-post-link/sycl-opt-level-external-funcs.ll b/llvm/test/tools/sycl-post-link/sycl-opt-level-external-funcs.ll new file mode 100644 index 0000000000000..053760fa3b86f --- /dev/null +++ b/llvm/test/tools/sycl-post-link/sycl-opt-level-external-funcs.ll @@ -0,0 +1,68 @@ +; This test checks, that 'optLevel' property is only emitted based on the module +; entry points +; +; In this test we have functions 'foo' and 'boo' defined in different +; translation units. They are both entry points and 'foo' calls 'boo'. +; As a result, we expect two modules: +; - module with 'foo' (as entry point) and 'bar' (included as dependency) with +; 'optLevel' set to 1 (taken from 'foo') +; - module with 'bar' (as entry point) with 'optLevel' set to 2 (taken from +; 'bar') + +; RUN: sycl-post-link -split=source -symbols -S < %s -o %t.table +; RUN: FileCheck %s -input-file=%t.table +; RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes CHECK-OPT-LEVEL-PROP-0 +; RUN: FileCheck %s -input-file=%t_1.prop --check-prefixes CHECK-OPT-LEVEL-PROP-1 +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYM-0 +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYM-1 +; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK-IR-0 +; RUN: FileCheck %s -input-file=%t_1.ll --check-prefix CHECK-IR-1 + +; CHECK: [Code|Properties|Symbols] +; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym +; CHECK: {{.*}}_1.ll|{{.*}}_1.prop|{{.*}}_1.sym +; CHECK-EMPTY: + +; CHECK-OPT-LEVEL-PROP-0: optLevel=1|1 +; CHECK-OPT-LEVEL-PROP-1: optLevel=1|2 +; CHECK-SYM-0: _Z3fooii +; CHECK-SYM-0-EMPTY: +; CHECK-SYM-1: _Z3barii +; +; CHECK-IR-0-DAG: define {{.*}} @_Z3fooii{{.*}} #[[#ATTR0:]] +; CHECK-IR-0-DAG: define {{.*}} @_Z3barii{{.*}} #[[#ATTR1:]] +; CHECK-IR-0-DAG: attributes #[[#ATTR0]] = { {{.*}} "sycl-optlevel"="1" } +; CHECK-IR-0-DAG: attributes #[[#ATTR1]] = { {{.*}} "sycl-optlevel"="2" } +; +; CHECK-IR-1: define {{.*}} @_Z3barii{{.*}} #[[#ATTR0:]] +; CHECK-IR-1: attributes #[[#ATTR0]] = { {{.*}} "sycl-optlevel"="2" } + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +define dso_local spir_func noundef i32 @_Z3fooii(i32 noundef %a, i32 noundef %b) local_unnamed_addr #0 { +entry: + %call = call i32 @_Z3barii(i32 %a, i32 %b) + %sub = sub nsw i32 %a, %call + ret i32 %sub +} + +define dso_local spir_func noundef i32 @_Z3barii(i32 noundef %a, i32 noundef %b) #1 { +entry: + %retval = alloca i32, align 4 + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %retval.ascast = addrspacecast i32* %retval to i32 addrspace(4)* + %a.addr.ascast = addrspacecast i32* %a.addr to i32 addrspace(4)* + %b.addr.ascast = addrspacecast i32* %b.addr to i32 addrspace(4)* + store i32 %a, i32 addrspace(4)* %a.addr.ascast, align 4 + store i32 %b, i32 addrspace(4)* %b.addr.ascast, align 4 + %0 = load i32, i32 addrspace(4)* %a.addr.ascast, align 4 + %1 = load i32, i32 addrspace(4)* %b.addr.ascast, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "sycl-module-id"="test3.cpp" "sycl-optlevel"="1" } +attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "sycl-module-id"="test2.cpp" "sycl-optlevel"="2" } + diff --git a/llvm/test/tools/sycl-post-link/sycl-opt-level.ll b/llvm/test/tools/sycl-post-link/sycl-opt-level.ll index 349e6e445027d..8967d3bc02621 100644 --- a/llvm/test/tools/sycl-post-link/sycl-opt-level.ll +++ b/llvm/test/tools/sycl-post-link/sycl-opt-level.ll @@ -8,13 +8,18 @@ ; RUN: FileCheck %s -input-file=%t.table ; RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes CHECK-OPT-LEVEL-PROP-0 ; RUN: FileCheck %s -input-file=%t_1.prop --check-prefixes CHECK-OPT-LEVEL-PROP-1 +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYM-0 +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYM-1 ; CHECK: [Code|Properties|Symbols] ; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym ; CHECK: {{.*}}_1.ll|{{.*}}_1.prop|{{.*}}_1.sym +; CHECK-EMPTY: -; CHECK-OPT-LEVEL-PROP-0: optLevel=1|0 -; CHECK-OPT-LEVEL-PROP-1: optLevel=1|2 +; CHECK-OPT-LEVEL-PROP-0: optLevel=1|2 +; CHECK-OPT-LEVEL-PROP-1: optLevel=1|0 +; CHECK-SYM-0: _Z3fooii +; CHECK-SYM-1: _Z3booii target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown" diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index 75546fe42b166..b23bb7e4c0e1d 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -31,6 +31,7 @@ #include #include #include +#include using namespace llvm; using namespace llvm::module_split; @@ -42,7 +43,6 @@ constexpr char GLOBAL_SCOPE_NAME[] = ""; constexpr char SYCL_SCOPE_NAME[] = ""; constexpr char ESIMD_SCOPE_NAME[] = ""; constexpr char ESIMD_MARKER_MD[] = "sycl_explicit_simd"; -constexpr char ATTR_OPT_LEVEL[] = "sycl-optlevel"; bool hasIndirectFunctionsOrCalls(const Module &M) { for (const auto &F : M.functions()) { @@ -200,68 +200,6 @@ groupEntryPointsByKernelType(ModuleDesc &MD, return EntryPointGroups; } -// This function decides how entry points of the input module M will be -// distributed ("split") into multiple modules based on the command options and -// IR attributes. The decision is recorded in the output vector EntryPointGroups -// which contains pairs of group id and entry points for that group. Each such -// group along with IR it depends on (globals, functions from its call graph, -// ...) will constitute a separate module. -EntryPointGroupVec groupEntryPointsByScope(ModuleDesc &MD, - EntryPointsGroupScope EntryScope, - bool EmitOnlyKernelsAsEntryPoints) { - EntryPointGroupVec EntryPointGroups{}; - // Use MapVector for deterministic order of traversal (helps tests). - MapVector EntryPointMap; - Module &M = MD.getModule(); - - // Only process module entry points: - for (Function &F : M.functions()) { - if (!isEntryPoint(F, EmitOnlyKernelsAsEntryPoints) || - !MD.isEntryPointCandidate(F)) - continue; - - switch (EntryScope) { - case Scope_PerKernel: - EntryPointMap[F.getName()].insert(&F); - break; - - case Scope_PerModule: { - if (!llvm::sycl::utils::isSYCLExternalFunction(&F)) - // TODO It may make sense to group all entry points w/o the attribute - // into a separate module rather than issuing an error. Should probably - // be controlled by an option. - error("no '" + Twine(llvm::sycl::utils::ATTR_SYCL_MODULE_ID) + - "' attribute for entry point '" + F.getName() + - "', per-module split is not possible"); - - Attribute Id = F.getFnAttribute(llvm::sycl::utils::ATTR_SYCL_MODULE_ID); - StringRef Val = Id.getValueAsString(); - EntryPointMap[Val].insert(&F); - break; - } - - case Scope_Global: - // the map key is not significant here - EntryPointMap[GLOBAL_SCOPE_NAME].insert(&F); - break; - } - } - - if (!EntryPointMap.empty()) { - EntryPointGroups.reserve(EntryPointMap.size()); - for (auto &EPG : EntryPointMap) { - EntryPointGroups.emplace_back(EPG.first, std::move(EPG.second), - MD.getEntryPointGroup().Props); - EntryPointGroup &G = EntryPointGroups.back(); - G.Props.Scope = EntryScope; - } - } else { - // No entry points met, record this. - EntryPointGroups.emplace_back(GLOBAL_SCOPE_NAME, EntryPointSet{}); - } - return EntryPointGroups; -} - // Represents a call graph between functions in a module. Nodes are functions, // edges are "calls" relation. class CallGraph { @@ -424,24 +362,6 @@ getSplitterByKernelType(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { return std::make_unique(std::move(MD), std::move(Groups)); } -std::unique_ptr -getSplitterByMode(ModuleDesc &&MD, IRSplitMode Mode, - bool AutoSplitIsGlobalScope, - bool EmitOnlyKernelsAsEntryPoints) { - EntryPointsGroupScope Scope = - selectDeviceCodeGroupScope(MD.getModule(), Mode, AutoSplitIsGlobalScope); - EntryPointGroupVec Groups = - groupEntryPointsByScope(MD, Scope, EmitOnlyKernelsAsEntryPoints); - assert(!Groups.empty() && "At least one group is expected"); - bool DoSplit = (Mode != SPLIT_NONE && - (Groups.size() > 1 || !Groups.cbegin()->Functions.empty())); - - if (DoSplit) - return std::make_unique(std::move(MD), std::move(Groups)); - else - return std::make_unique(std::move(MD), std::move(Groups)); -} - void ModuleSplitterBase::verifyNoCrossModuleDeviceGlobalUsage() { const Module &M = getInputModule(); // Early exit if there is only one group @@ -674,9 +594,7 @@ void ModuleDesc::dump() const { llvm::errs() << "split_module::ModuleDesc[" << Name << "] {\n"; llvm::errs() << " ESIMD:" << toString(EntryPoints.Props.HasESIMD) << ", SpecConstMet:" << (Props.SpecConstsMet ? "YES" : "NO") - << ", LargeGRF:" - << (EntryPoints.Props.UsesLargeGRF ? "YES" : "NO") - << ", OptLevel:" << EntryPoints.getOptLevel() << "\n"; + << "\n"; dumpEntryPoints(entries(), EntryPoints.GroupId.c_str(), 1); llvm::errs() << "}\n"; } @@ -708,193 +626,271 @@ void EntryPointGroup::rebuildFromNames(const std::vector &Names, } namespace { -// Data structure, which represent a combination of all possible optional -// features used in a function. +// This is a helper class, which allows to group/categorize function based on +// provided rules. It is intended to be used in device code split +// implementation. // -// It has extra methods to be useable as a key in llvm::DenseMap. -struct UsedOptionalFeatures { - SmallVector Aspects; - bool UsesLargeGRF = false; - int OptLevel = -1; - SmallVector ReqdWorkGroupSize; - // TODO: extend this further with reqd-sub-group-size and other properties - - UsedOptionalFeatures() = default; - - UsedOptionalFeatures(const Function *F) { - if (const MDNode *MDN = F->getMetadata("sycl_used_aspects")) { - auto ExtractIntegerFromMDNodeOperand = [=](const MDOperand &N) { - Constant *C = cast(N.get())->getValue(); - return C->getUniqueInteger().getSExtValue(); - }; - - // !sycl_used_aspects is supposed to contain unique values, no duplicates - // are expected here - llvm::transform(MDN->operands(), std::back_inserter(Aspects), - ExtractIntegerFromMDNodeOperand); - llvm::sort(Aspects); - } - - if (F->hasFnAttribute(::sycl::kernel_props::ATTR_LARGE_GRF)) - UsesLargeGRF = true; - - if (F->hasFnAttribute(ATTR_OPT_LEVEL)) - if (F->getFnAttribute(ATTR_OPT_LEVEL) - .getValueAsString() - .getAsInteger(10, OptLevel)) - OptLevel = -1; - - if (const MDNode *MDN = F->getMetadata("reqd_work_group_size")) { - size_t NumOperands = MDN->getNumOperands(); - assert(NumOperands >= 1 && NumOperands <= 3 && - "reqd_work_group_size does not have between 1 and 3 operands."); - ReqdWorkGroupSize.reserve(NumOperands); - for (const MDOperand &MDOp : MDN->operands()) - ReqdWorkGroupSize.push_back( - mdconst::extract(MDOp)->getZExtValue()); - } - - llvm::hash_code AspectsHash = - llvm::hash_combine_range(Aspects.begin(), Aspects.end()); - llvm::hash_code LargeGRFHash = llvm::hash_value(UsesLargeGRF); - llvm::hash_code ReqdWorkGroupSizeHash = llvm::hash_combine_range( - ReqdWorkGroupSize.begin(), ReqdWorkGroupSize.end()); - llvm::hash_code OptLevelHash = llvm::hash_value(OptLevel); - Hash = static_cast(llvm::hash_combine( - AspectsHash, LargeGRFHash, ReqdWorkGroupSizeHash, OptLevelHash)); - } - - std::string generateModuleName(StringRef BaseName) const { - std::string Ret = BaseName.str(); - if (!ReqdWorkGroupSize.empty()) { - Ret += "-reqd-wg-size"; - for (int V : ReqdWorkGroupSize) - Ret += "-" + std::to_string(V); - } +// "Rule" is a simple routine, which returns a string for an llvm::Function +// passed to it. There could be more than one rule and they are applied in order +// of their registration. Results obtained from those rules are concatenated +// together to produce the final result. +// +// There are some predefined rules for the most popular use-cases, like grouping +// functions together based on an attribute value or presence of a metadata. +// However, there is also a possibility to register a custom callback function +// as a rule, to implement custom/more complex logic. +class FunctionsCategorizer { +public: + FunctionsCategorizer() = default; - if (Aspects.empty()) - return Ret + "-no-aspects"; + std::string computeCategoryFor(Function *) const; - Ret += "-aspects"; - for (int A : Aspects) { - Ret += "-" + std::to_string(A); - } + // Accepts a callback, which should return a string based on provided + // function, which will be used as an entry points group identifier. + void registerRule(const std::function &Callback) { + Rules.emplace_back(Rule::RKind::K_Callback, Callback); + } - if (UsesLargeGRF) - Ret += "-large-grf"; + // Creates a simple rule, which adds a value of a string attribute into a + // resulting identifier. + void registerSimpleStringAttributeRule(StringRef AttrName) { + Rules.emplace_back(Rule::RKind::K_SimpleStringAttribute, AttrName); + } - if (OptLevel != -1) - Ret += "-O" + std::to_string(OptLevel); + // Creates a simple rule, which adds one or another value to a resulting + // identifier based on the presence of a metadata on a function. + void registerSimpleFlagAttributeRule(StringRef AttrName, + StringRef IfPresentStr, + StringRef IfAbsentStr = "") { + Rules.emplace_back(Rule::RKind::K_FlagAttribute, + Rule::FlagRuleData{AttrName, IfPresentStr, IfAbsentStr}); + } - return Ret; + // Creates a simple rule, which adds one or another value to a resulting + // identifier based on the presence of a metadata on a function. + void registerSimpleFlagMetadataRule(StringRef MetadataName, + StringRef IfPresentStr, + StringRef IfAbsentStr = "") { + Rules.emplace_back( + Rule::RKind::K_FlagMetadata, + Rule::FlagRuleData{MetadataName, IfPresentStr, IfAbsentStr}); } - static UsedOptionalFeatures getTombstone() { - UsedOptionalFeatures Ret; - Ret.IsTombstoneKey = true; - return Ret; + // Creates a rule, which adds a list of dash-separated integers converted + // into strings listed in a metadata to a resulting identifier. + void registerListOfIntegersInMetadataRule(StringRef MetadataName) { + Rules.emplace_back(Rule::RKind::K_IntegersListMetadata, MetadataName); } - static UsedOptionalFeatures getEmpty() { - UsedOptionalFeatures Ret; - Ret.IsEmpty = true; - return Ret; + // Creates a rule, which adds a list of sorted dash-separated integers + // converted into strings listed in a metadata to a resulting identifier. + void registerListOfIntegersInMetadataSortedRule(StringRef MetadataName) { + Rules.emplace_back(Rule::RKind::K_SortedIntegersListMetadata, MetadataName); } private: - // For DenseMap: - llvm::hash_code Hash = {}; - bool IsTombstoneKey = false; - bool IsEmpty = false; + struct Rule { + struct FlagRuleData { + StringRef Name, IfPresentStr, IfAbsentStr; + }; -public: - bool operator==(const UsedOptionalFeatures &Other) const { - // Tombstone does not compare equal to any other item - if (IsTombstoneKey || Other.IsTombstoneKey) - return false; + private: + std::variant> + Storage; + + public: + enum class RKind { + // Custom callback function + K_Callback, + // Copy value of the specified attribute, if present + K_SimpleStringAttribute, + // Use one or another string based on the specified metadata presence + K_FlagMetadata, + // Use one or another string based on the specified attribute presence + K_FlagAttribute, + // Concatenate and use list of integers from the specified metadata + K_IntegersListMetadata, + // Sort, concatenate and use list of integers from the specified metadata + K_SortedIntegersListMetadata + }; + RKind Kind; + + // Returns an index into std::variant<...> Storage defined above, which + // corresponds to the specified rule Kind. + constexpr static std::size_t storage_index(RKind K) { + switch (K) { + case RKind::K_SimpleStringAttribute: + case RKind::K_IntegersListMetadata: + case RKind::K_SortedIntegersListMetadata: + return 0; + case RKind::K_Callback: + return 2; + case RKind::K_FlagMetadata: + case RKind::K_FlagAttribute: + return 1; + } + // can't use llvm_unreachable in constexpr context + return std::variant_npos; + } - if (Aspects.size() != Other.Aspects.size()) - return false; + template auto getStorage() const { + return std::get(Storage); + } - for (size_t I = 0, E = Aspects.size(); I != E; ++I) { - if (Aspects[I] != Other.Aspects[I]) - return false; + template + Rule(RKind K, Args... args) : Storage(args...), Kind(K) { + assert(storage_index(K) == Storage.index()); } - return IsEmpty == Other.IsEmpty && UsesLargeGRF == Other.UsesLargeGRF && - OptLevel == Other.OptLevel; - } + Rule(Rule &&Other) = default; + }; - unsigned hash() const { return static_cast(Hash); } + std::vector Rules; }; -struct UsedOptionalFeaturesAsKeyInfo { - static inline UsedOptionalFeatures getEmptyKey() { - return UsedOptionalFeatures::getEmpty(); - } +std::string FunctionsCategorizer::computeCategoryFor(Function *F) const { + SmallString<256> Result; + for (const auto &R : Rules) { + switch (R.Kind) { + case Rule::RKind::K_Callback: + Result += R.getStorage()(F); + break; - static inline UsedOptionalFeatures getTombstoneKey() { - return UsedOptionalFeatures::getTombstone(); - } + case Rule::RKind::K_SimpleStringAttribute: { + StringRef AttrName = R.getStorage(); + if (F->hasFnAttribute(AttrName)) { + Attribute Attr = F->getFnAttribute(AttrName); + Result += Attr.getValueAsString(); + } + } break; + + case Rule::RKind::K_FlagMetadata: { + Rule::FlagRuleData Data = R.getStorage(); + if (F->hasMetadata(Data.Name)) + Result += Data.IfPresentStr; + else + Result += Data.IfAbsentStr; + } break; + + case Rule::RKind::K_IntegersListMetadata: { + StringRef MetadataName = + R.getStorage(); + if (F->hasMetadata(MetadataName)) { + auto *MDN = F->getMetadata(MetadataName); + for (const MDOperand &MDOp : MDN->operands()) + Result += + "-" + std::to_string( + mdconst::extract(MDOp)->getZExtValue()); + } + } break; - static unsigned getHashValue(const UsedOptionalFeatures &Value) { - return Value.hash(); - } + case Rule::RKind::K_SortedIntegersListMetadata: { + StringRef MetadataName = + R.getStorage(); + if (F->hasMetadata(MetadataName)) { + MDNode *MDN = F->getMetadata(MetadataName); + + SmallVector Values; + for (const MDOperand &MDOp : MDN->operands()) + Values.push_back(mdconst::extract(MDOp)->getZExtValue()); + + llvm::sort(Values); - static bool isEqual(const UsedOptionalFeatures &LHS, - const UsedOptionalFeatures &RHS) { - return LHS == RHS; + for (std::uint64_t V : Values) + Result += "-" + std::to_string(V); + } + } break; + + case Rule::RKind::K_FlagAttribute: { + Rule::FlagRuleData Data = R.getStorage(); + if (F->hasFnAttribute(Data.Name)) + Result += Data.IfPresentStr; + else + Result += Data.IfAbsentStr; + } break; + } + + Result += "-"; } -}; + + return (std::string)Result; +} } // namespace std::unique_ptr -getSplitterByOptionalFeatures(ModuleDesc &&MD, - bool EmitOnlyKernelsAsEntryPoints) { - EntryPointGroupVec Groups; - - DenseMap - PropertiesToFunctionsMap; +getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly, + bool EmitOnlyKernelsAsEntryPoints) { + FunctionsCategorizer Categorizer; - Module &M = MD.getModule(); + EntryPointsGroupScope Scope = + selectDeviceCodeGroupScope(MD.getModule(), Mode, IROutputOnly); + + switch (Scope) { + case Scope_Global: + // We simply perform entry points filtering, but group all of them together. + Categorizer.registerRule( + [](Function *) -> std::string { return GLOBAL_SCOPE_NAME; }); + break; + case Scope_PerKernel: + // Per-kernel split is quite simple: every kernel goes into a separate + // module and that's it, no other rules required. + Categorizer.registerRule( + [](Function *F) -> std::string { return F->getName().str(); }); + break; + case Scope_PerModule: + // The most complex case, because we should account for many other features + // like aspects used in a kernel, large-grf mode, reqd-work-group-size, etc. + + // This is core of per-source device code split + Categorizer.registerSimpleStringAttributeRule( + sycl::utils::ATTR_SYCL_MODULE_ID); + + // Optional features + // Note: Add more rules at the end of the list to avoid chaning orders of + // output files in existing tests. + Categorizer.registerSimpleFlagAttributeRule( + ::sycl::kernel_props::ATTR_LARGE_GRF, "large-grf"); + Categorizer.registerListOfIntegersInMetadataSortedRule("sycl_used_aspects"); + Categorizer.registerListOfIntegersInMetadataRule("reqd_work_group_size"); + Categorizer.registerSimpleStringAttributeRule( + sycl::utils::ATTR_SYCL_OPTLEVEL); + break; + } + + // std::map is used here to ensure stable ordering of entry point groups, + // which is based on their contents, this greatly helps LIT tests + std::map EntryPointsMap; // Only process module entry points: - for (auto &F : M.functions()) { - if (!isEntryPoint(F, EmitOnlyKernelsAsEntryPoints) || - !MD.isEntryPointCandidate(F)) { + for (auto &F : MD.getModule().functions()) { + if (!isEntryPoint(F, EmitOnlyKernelsAsEntryPoints)) continue; - } - auto Key = UsedOptionalFeatures(&F); - PropertiesToFunctionsMap[std::move(Key)].insert(&F); + std::string Key = Categorizer.computeCategoryFor(&F); + EntryPointsMap[std::move(Key)].insert(&F); } - if (PropertiesToFunctionsMap.empty()) { + EntryPointGroupVec Groups; + + if (EntryPointsMap.empty()) { // No entry points met, record this. Groups.emplace_back(GLOBAL_SCOPE_NAME, EntryPointSet{}); } else { - Groups.reserve(PropertiesToFunctionsMap.size()); - for (auto &It : PropertiesToFunctionsMap) { - const UsedOptionalFeatures &Features = It.first; - EntryPointSet &EntryPoints = It.second; - - // Start with properties of a source module - EntryPointGroup::Properties MDProps = MD.getEntryPointGroup().Props; - // Propagate LargeGRF flag to entry points group - if (Features.UsesLargeGRF) - MDProps.UsesLargeGRF = true; - if (Features.OptLevel != -1) - MDProps.OptLevel = Features.OptLevel; - Groups.emplace_back( - Features.generateModuleName(MD.getEntryPointGroup().GroupId), - std::move(EntryPoints), MDProps); - } + Groups.reserve(EntryPointsMap.size()); + // Start with properties of a source module + EntryPointGroup::Properties MDProps = MD.getEntryPointGroup().Props; + for (auto &[Key, EntryPoints] : EntryPointsMap) + Groups.emplace_back(Key, std::move(EntryPoints), MDProps); } - if (Groups.size() > 1) + bool DoSplit = (Mode != SPLIT_NONE && + (Groups.size() > 1 || !Groups.cbegin()->Functions.empty())); + + if (DoSplit) return std::make_unique(std::move(MD), std::move(Groups)); - else - return std::make_unique(std::move(MD), std::move(Groups)); + + return std::make_unique(std::move(MD), std::move(Groups)); } } // namespace module_split diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.h b/llvm/tools/sycl-post-link/ModuleSplitter.h index ab5d636fcb398..3652f756ce1e1 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.h +++ b/llvm/tools/sycl-post-link/ModuleSplitter.h @@ -55,10 +55,6 @@ struct EntryPointGroup { struct Properties { // Whether all EPs are ESIMD, SYCL or there are both kinds. SyclEsimdSplitStatus HasESIMD = SyclEsimdSplitStatus::SYCL_AND_ESIMD; - // Whether any of the EPs use large GRF mode. - bool UsesLargeGRF = false; - // front-end opt level for kernel compilation - int OptLevel = -1; // Scope represented by EPs in a group EntryPointsGroupScope Scope = Scope_Global; @@ -67,10 +63,7 @@ struct EntryPointGroup { Res.HasESIMD = HasESIMD == Other.HasESIMD ? HasESIMD : SyclEsimdSplitStatus::SYCL_AND_ESIMD; - Res.UsesLargeGRF = UsesLargeGRF || Other.UsesLargeGRF; // Scope remains global - // OptLevel is expected to be the same for both merging EPGs - assert(OptLevel == Other.OptLevel && "OptLevels are not same"); return Res; } }; @@ -94,11 +87,6 @@ struct EntryPointGroup { bool isSycl() const { return Props.HasESIMD == SyclEsimdSplitStatus::SYCL_ONLY; } - // Tells if some entry points use large GRF mode. - bool isLargeGRF() const { return Props.UsesLargeGRF; } - - // Returns opt level - int getOptLevel() const { return Props.OptLevel; } void saveNames(std::vector &Dest) const; void rebuildFromNames(const std::vector &Names, const Module &M); @@ -153,8 +141,6 @@ class ModuleDesc { bool isESIMD() const { return EntryPoints.isEsimd(); } bool isSYCL() const { return EntryPoints.isSycl(); } - bool isLargeGRF() const { return EntryPoints.isLargeGRF(); } - int getOptLevel() const { return EntryPoints.getOptLevel(); } const EntryPointSet &entries() const { return EntryPoints.Functions; } const EntryPointGroup &getEntryPointGroup() const { return EntryPoints; } @@ -253,16 +239,11 @@ class ModuleSplitterBase { }; std::unique_ptr -getSplitterByKernelType(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints); +getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly, + bool EmitOnlyKernelsAsEntryPoints); std::unique_ptr -getSplitterByMode(ModuleDesc &&MD, IRSplitMode Mode, - bool AutoSplitIsGlobalScope, - bool EmitOnlyKernelsAsEntryPoints); - -std::unique_ptr -getSplitterByOptionalFeatures(ModuleDesc &&MD, - bool EmitOnlyKernelsAsEntryPoints); +getSplitterByKernelType(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints); #ifndef NDEBUG void dumpEntryPoints(const EntryPointSet &C, const char *msg = "", int Tab = 0); diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index b9fcff8a99bdc..a676cab38fc59 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -40,6 +40,7 @@ #include "llvm/SYCLLowerIR/HostPipes.h" #include "llvm/SYCLLowerIR/LowerInvokeSimd.h" #include "llvm/SYCLLowerIR/LowerKernelProps.h" +#include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/InitLLVM.h" @@ -452,11 +453,49 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, if (MD.isESIMD()) { PropSet[PropSetRegTy::SYCL_MISC_PROP].insert({"isEsimdImage", true}); } - if (MD.isLargeGRF()) - PropSet[PropSetRegTy::SYCL_MISC_PROP].insert({"isLargeGRF", true}); - if (MD.getOptLevel() != -1) - PropSet[PropSetRegTy::SYCL_MISC_PROP].insert( - {"optLevel", MD.getOptLevel()}); + + { + // check for large GRF property + bool HasLargeGRF = llvm::any_of(MD.entries(), [](const Function *F) { + return F->hasFnAttribute(::sycl::kernel_props::ATTR_LARGE_GRF); + }); + + if (HasLargeGRF) + PropSet[PropSetRegTy::SYCL_MISC_PROP].insert({"isLargeGRF", true}); + } + // FIXME: Remove 'if' below when possible + // GPU backend has a problem with accepting optimization level options in form + // described by Level Zero specification (-ze-opt-level=1) when 'invoke_simd' + // functionality is involved. JIT compilation results in the following error: + // error: VLD: Failed to compile SPIR-V with following error: + // invalid api option: -ze-opt-level=O1 + // -11 (PI_ERROR_BUILD_PROGRAM_FAILURE) + // 'if' below essentially preserves the behavior (presumably mistakenly) + // implemented in intel/llvm#8763: ignore 'optLevel' property for images which + // were produced my merge after ESIMD split + if (MD.getEntryPointGroup().Props.HasESIMD != + module_split::SyclEsimdSplitStatus::SYCL_AND_ESIMD) { + // Handle sycl-optlevel property + int OptLevel = -1; + for (const Function *F : MD.entries()) { + if (!F->hasFnAttribute(llvm::sycl::utils::ATTR_SYCL_OPTLEVEL)) + continue; + + // getAsInteger returns true on error + if (!F->getFnAttribute(llvm::sycl::utils::ATTR_SYCL_OPTLEVEL) + .getValueAsString() + .getAsInteger(10, OptLevel)) { + // It is expected that device-code split has separated kernels with + // different values of sycl-optlevel attribute. Therefore, it is enough + // to only look at the first function with such attribute to compute + // the property for the whole device image. + break; + } + } + + if (OptLevel != -1) + PropSet[PropSetRegTy::SYCL_MISC_PROP].insert({"optLevel", OptLevel}); + } { std::vector FuncNames = getKernelNamesUsingAssert(M); for (const StringRef &FName : FuncNames) @@ -578,9 +617,6 @@ bool lowerEsimdConstructs(module_split::ModuleDesc &MD) { // Compute the filename suffix for the module StringRef getModuleSuffix(const module_split::ModuleDesc &MD) { - if (MD.isLargeGRF()) { - return MD.isESIMD() ? "_esimd_large_grf" : "_large_grf"; - } return MD.isESIMD() ? "_esimd" : ""; } @@ -768,60 +804,22 @@ processInputModule(std::unique_ptr M) { (SplitMode == module_split::SPLIT_AUTO)) && "invalid split mode for IR-only output"); - // Top-level per-kernel/per-source splitter. SYCL/ESIMD splitting is applied - // to modules resulting from all other kinds of splitting. - std::unique_ptr ScopedSplitter = - module_split::getSplitterByMode(module_split::ModuleDesc{std::move(M)}, - SplitMode, IROutputOnly, - EmitOnlyKernelsAsEntryPoints); - - SmallVector TopLevelModules; + std::unique_ptr Splitter = + module_split::getDeviceCodeSplitter( + module_split::ModuleDesc{std::move(M)}, SplitMode, IROutputOnly, + EmitOnlyKernelsAsEntryPoints); + bool SplitOccurred = Splitter->remainingSplits() > 1; + Modified |= SplitOccurred; - // FIXME: this check should be performed on all split levels + // FIXME: this check is not performed for ESIMD splits if (DeviceGlobals) - ScopedSplitter->verifyNoCrossModuleDeviceGlobalUsage(); - - const bool SplitByScope = ScopedSplitter->remainingSplits() > 1; - bool SplitByOptionalFeatures = false; + Splitter->verifyNoCrossModuleDeviceGlobalUsage(); - while (ScopedSplitter->hasMoreSplits()) { - module_split::ModuleDesc MD = ScopedSplitter->nextSplit(); - - if (IROutputOnly || SplitMode == module_split::SPLIT_NONE) { - // We can't perform any kind of split. - TopLevelModules.emplace_back(std::move(MD)); - continue; - } - - std::unique_ptr OptionalFeaturesSplitter = - module_split::getSplitterByOptionalFeatures( - std::move(MD), EmitOnlyKernelsAsEntryPoints); - - // Here we perform second-level splitting based on device-specific - // features used/declared in entry points. - // This step is mandatory, because it is required for functional - // correctness, i.e. to prevent speculative compilation of kernels that use - // optional features on a HW which doesn't support them. - SplitByOptionalFeatures |= OptionalFeaturesSplitter->remainingSplits() > 1; - - while (OptionalFeaturesSplitter->hasMoreSplits()) { - TopLevelModules.emplace_back(OptionalFeaturesSplitter->nextSplit()); - } - } - - Modified |= SplitByScope; - Modified |= SplitByOptionalFeatures; - - // TODO this nested splitting scheme will not scale well when other split - // "dimensions" will be added. Some infra/"split manager" needs to be - // implemented in this case - e.g. all needed splitters are registered, then - // split manager applies them in the order added and runs needed tforms on the - // "leaf" ModuleDesc's resulted from splitting. Some bookkeeping is needed for - // ESIMD splitter to link back needed modules. - - // Based on results from the top-level splitting, we perform some lower-level - // splitting for various unique features. - for (module_split::ModuleDesc &MDesc : TopLevelModules) { + // It is important that we *DO NOT* preserve all the splits in memory at the + // same time, because it leads to a huge RAM consumption by the tool on bigger + // inputs. + while (Splitter->hasMoreSplits()) { + module_split::ModuleDesc MDesc = Splitter->nextSplit(); DUMP_ENTRY_POINTS(MDesc.entries(), MDesc.Name.c_str(), 1); MDesc.fixupLinkageOfDirectInvokeSimdTargets(); @@ -835,10 +833,9 @@ processInputModule(std::unique_ptr M) { std::unique_ptr ESIMDSplitter = module_split::getSplitterByKernelType(std::move(MDesc), EmitOnlyKernelsAsEntryPoints); - const bool SplitByESIMD = ESIMDSplitter->remainingSplits() > 1; - Modified |= SplitByESIMD; + bool ESIMDSplitOccurred = ESIMDSplitter->remainingSplits() > 1; - if (SplitByESIMD && SplitByScope && + if (ESIMDSplitOccurred && SplitOccurred && (SplitMode == module_split::SPLIT_PER_KERNEL) && !SplitEsimd) { // Controversial state reached - SYCL and ESIMD entry points resulting // from SYCL/ESIMD split (which is done always) are linked back, since @@ -848,6 +845,8 @@ processInputModule(std::unique_ptr M) { SplitEsimd.ValueStr + " must also be specified"); } SmallVector MMs; + SplitOccurred |= ESIMDSplitOccurred; + Modified |= SplitOccurred; while (ESIMDSplitter->hasMoreSplits()) { module_split::ModuleDesc MDesc2 = ESIMDSplitter->nextSplit(); @@ -883,9 +882,6 @@ processInputModule(std::unique_ptr M) { Modified = true; } - bool SplitOccurred = - SplitByScope || SplitByESIMD || SplitByOptionalFeatures; - if (IROutputOnly) { if (SplitOccurred) { error("some modules had to be split, '-" + IROutputOnly.ArgStr +