diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 8aa4046c87358..d8cef424a9c5b 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -48,6 +48,7 @@ #include "llvm/SYCLLowerIR/ESIMD/ESIMDVerifier.h" #include "llvm/SYCLLowerIR/LowerWGLocalMemory.h" #include "llvm/SYCLLowerIR/MutatePrintfAddrspace.h" +#include "llvm/SYCLLowerIR/SYCLAddOptLevelAttribute.h" #include "llvm/SYCLLowerIR/SYCLPropagateAspectsUsage.h" #include "llvm/Support/BuryPointer.h" #include "llvm/Support/CommandLine.h" @@ -60,8 +61,8 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Transforms/IPO/DeadArgumentElimination.h" #include "llvm/TargetParser/Triple.h" +#include "llvm/Transforms/IPO/DeadArgumentElimination.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" #include "llvm/Transforms/InstCombine/InstCombine.h" @@ -1045,6 +1046,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline( MPM.addPass(SYCLPropagateAspectsUsagePass(/*ExcludeAspects=*/{}, /*ValidateAspects=*/false)); + // Add attribute corresponding to optimization level. + MPM.addPass(SYCLAddOptLevelAttributePass(CodeGenOpts.OptimizationLevel)); + // Add SPIRITTAnnotations pass to the pass manager if // -fsycl-instrument-device-code option was passed. This option can be // used only with spir triple. diff --git a/clang/test/CodeGenSYCL/sycl-add-opt-level-attrib.cpp b/clang/test/CodeGenSYCL/sycl-add-opt-level-attrib.cpp new file mode 100644 index 0000000000000..f5f7b8a452c04 --- /dev/null +++ b/clang/test/CodeGenSYCL/sycl-add-opt-level-attrib.cpp @@ -0,0 +1,18 @@ +// RUN: %clangxx %s -O0 -S -o %t.ll -fsycl-device-only +// RUN: FileCheck %s --input-file %t.ll -check-prefixes=CHECK-IR +// CHECK-IR: define {{.*}} spir_kernel void @{{.*}}main{{.*}}sycl{{.*}}handler{{.*}}() #[[ATTR:[0-9]+]] +// CHECK-IR: attributes #[[ATTR]] = { {{.*}} "sycl-optlevel"="0" {{.*}}} + +// This test checks adding of the attribute 'sycl-optlevel' +// by the clang front-end + +#include + +int main() { + sycl::queue q; + q.submit([&](sycl::handler &h) { + h.single_task([=]() {}); + }); + return 0; +} + diff --git a/llvm/include/llvm/SYCLLowerIR/SYCLAddOptLevelAttribute.h b/llvm/include/llvm/SYCLLowerIR/SYCLAddOptLevelAttribute.h new file mode 100644 index 0000000000000..4f77568611ec3 --- /dev/null +++ b/llvm/include/llvm/SYCLLowerIR/SYCLAddOptLevelAttribute.h @@ -0,0 +1,33 @@ +//===----- SYCLAddOptLevelAttribute.h - SYCLAddOptLevelAttribute Pass -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Pass adds 'sycl-optlevel' function attribute based on optimization level +// passed in. +// +//===----------------------------------------------------------------------===// +// +#ifndef LLVM_SYCL_ADD_OPT_LEVEL_ATTRIBUTE_H +#define LLVM_SYCL_ADD_OPT_LEVEL_ATTRIBUTE_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class SYCLAddOptLevelAttributePass + : public PassInfoMixin { +public: + SYCLAddOptLevelAttributePass(int OptLevel = -1) : OptLevel{OptLevel} {}; + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); + +private: + int OptLevel; +}; + +} // namespace llvm + +#endif // LLVM_SYCL_ADD_OPT_LEVEL_ATTRIBUTE_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 3d5e43c7c0df8..37e385c9fdacb 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -90,6 +90,7 @@ #include "llvm/SYCLLowerIR/LowerWGLocalMemory.h" #include "llvm/SYCLLowerIR/LowerWGScope.h" #include "llvm/SYCLLowerIR/MutatePrintfAddrspace.h" +#include "llvm/SYCLLowerIR/SYCLAddOptLevelAttribute.h" #include "llvm/SYCLLowerIR/SYCLPropagateAspectsUsage.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index a6bf05ab18810..507e5f62ba95a 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -140,6 +140,7 @@ MODULE_PASS("deadargelim-sycl", DeadArgumentEliminationSYCLPass()) MODULE_PASS("sycllowerwglocalmemory", SYCLLowerWGLocalMemoryPass()) MODULE_PASS("lower-esimd-kernel-attrs", SYCLFixupESIMDKernelWrapperMDPass()) MODULE_PASS("sycl-propagate-aspects-usage", SYCLPropagateAspectsUsagePass()) +MODULE_PASS("sycl-add-opt-level-attribute", SYCLAddOptLevelAttributePass()) MODULE_PASS("compile-time-properties", CompileTimePropertiesPass()) #undef MODULE_PASS diff --git a/llvm/lib/SYCLLowerIR/CMakeLists.txt b/llvm/lib/SYCLLowerIR/CMakeLists.txt index 3fe3bb06cc69d..f084bb16a8e68 100644 --- a/llvm/lib/SYCLLowerIR/CMakeLists.txt +++ b/llvm/lib/SYCLLowerIR/CMakeLists.txt @@ -63,6 +63,7 @@ add_llvm_component_library(LLVMSYCLLowerIR LowerWGLocalMemory.cpp LowerWGScope.cpp MutatePrintfAddrspace.cpp + SYCLAddOptLevelAttribute.cpp SYCLPropagateAspectsUsage.cpp SYCLUtils.cpp diff --git a/llvm/lib/SYCLLowerIR/SYCLAddOptLevelAttribute.cpp b/llvm/lib/SYCLLowerIR/SYCLAddOptLevelAttribute.cpp new file mode 100644 index 0000000000000..dc0620ccf87e9 --- /dev/null +++ b/llvm/lib/SYCLLowerIR/SYCLAddOptLevelAttribute.cpp @@ -0,0 +1,30 @@ +//===---- SYCLAddOptLevelAttribute.cpp - SYCLAddOptLevelAttribute Pass ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// Pass adds 'sycl-optlevel' function attribute based on optimization level +// passed in. +//===---------------------------------------------------------------------===// + +#include "llvm/SYCLLowerIR/SYCLAddOptLevelAttribute.h" + +#include "llvm/IR/Module.h" + +using namespace llvm; + +PreservedAnalyses +SYCLAddOptLevelAttributePass::run(Module &M, ModuleAnalysisManager &MAM) { + // Here, we add a function attribute 'sycl-optlevel' to store the + // optimization level. + assert(OptLevel >= 0 && "Invalid optimization level!"); + for (Function &F : M.functions()) { + if (F.isDeclaration()) + continue; + F.addFnAttr("sycl-optlevel", std::to_string(OptLevel)); + } + return PreservedAnalyses::all(); +} diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll index faec71a602ffd..687e50e0f6ffd 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll @@ -10,29 +10,29 @@ ; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ +; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M1-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M2-IR \ +; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M2-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M1-SYMS \ +; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M1-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M2-SYMS \ +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M2-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 ; RUN: sycl-post-link -split=source -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ +; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M1-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M2-IR \ +; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M2-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M1-SYMS \ +; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M1-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M2-SYMS \ +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M2-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 ; RUN: sycl-post-link -split=kernel -symbols -S < %s -o %t.table diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-2.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-2.ll index c724ca3284909..533de091641ca 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-2.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-2.ll @@ -4,14 +4,14 @@ ; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE ; -; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix CHECK-M0-SYMS \ +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefix CHECK-M0-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 \ ; RUN: --implicit-check-not kernel2 ; -; RUN: FileCheck %s -input-file=%t_1.sym --check-prefix CHECK-M2-SYMS \ +; RUN: FileCheck %s -input-file=%t_2.sym --check-prefix CHECK-M2-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel3 ; -; RUN: FileCheck %s -input-file=%t_2.sym --check-prefix CHECK-M1-SYMS \ +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix CHECK-M1-SYMS \ ; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 \ ; RUN: --implicit-check-not kernel3 diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll b/llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll index 919eabccfa3c4..8a1acc02cf756 100644 --- a/llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll +++ b/llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll @@ -9,16 +9,16 @@ ; RUN: sycl-post-link -split=source -symbols -split-esimd -lower-esimd -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table -; RUN: FileCheck %s -input-file=%t_esimd_large_grf_0.ll --check-prefixes CHECK-ESIMD-LargeGRF-IR --implicit-check-not='RegisterAllocMode' -; RUN: FileCheck %s -input-file=%t_esimd_large_grf_0.prop --check-prefixes CHECK-ESIMD-LargeGRF-PROP -; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYCL-SYM -; RUN: FileCheck %s -input-file=%t_esimd_1.sym --check-prefixes CHECK-ESIMD-SYM -; RUN: FileCheck %s -input-file=%t_esimd_large_grf_0.sym --check-prefixes CHECK-ESIMD-LargeGRF-SYM +; RUN: FileCheck %s -input-file=%t_esimd_large_grf_1.ll --check-prefixes CHECK-ESIMD-LargeGRF-IR --implicit-check-not='RegisterAllocMode' +; RUN: FileCheck %s -input-file=%t_esimd_large_grf_1.prop --check-prefixes CHECK-ESIMD-LargeGRF-PROP +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYCL-SYM +; RUN: FileCheck %s -input-file=%t_esimd_0.sym --check-prefixes CHECK-ESIMD-SYM +; RUN: FileCheck %s -input-file=%t_esimd_large_grf_1.sym --check-prefixes CHECK-ESIMD-LargeGRF-SYM ; CHECK: [Code|Properties|Symbols] -; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym -; CHECK: {{.*}}esimd-large-grf.ll.tmp_1.ll|{{.*}}esimd-large-grf.ll.tmp_1.prop|{{.*}}esimd-large-grf.ll.tmp_1.sym -; CHECK: {{.*}}esimd-large-grf.ll.tmp_esimd_1.ll|{{.*}}esimd-large-grf.ll.tmp_esimd_1.prop|{{.*}}esimd-large-grf.ll.tmp_esimd_1.sym +; CHECK: {{.*}}esimd-large-grf.ll.tmp_0.ll|{{.*}}esimd-large-grf.ll.tmp_0.prop|{{.*}}esimd-large-grf.ll.tmp_0.sym +; CHECK: {{.*}}esimd-large-grf.ll.tmp_esimd_0.ll|{{.*}}esimd-large-grf.ll.tmp_esimd_0.prop|{{.*}}esimd-large-grf.ll.tmp_esimd_0.sym +; CHECK: {{.*}}_1.ll|{{.*}}_1.prop|{{.*}}_1.sym ; CHECK-ESIMD-LargeGRF-PROP: isEsimdImage=1|1 ; CHECK-ESIMD-LargeGRF-PROP: isLargeGRF=1|1 diff --git a/llvm/test/tools/sycl-post-link/sycl-large-grf.ll b/llvm/test/tools/sycl-post-link/sycl-large-grf.ll index 26340ddad59e9..cb76430e8c4a2 100644 --- a/llvm/test/tools/sycl-post-link/sycl-large-grf.ll +++ b/llvm/test/tools/sycl-post-link/sycl-large-grf.ll @@ -9,14 +9,14 @@ ; RUN: sycl-post-link -split=source -symbols -split-esimd -lower-esimd -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table -; RUN: FileCheck %s -input-file=%t_large_grf_0.ll --check-prefixes CHECK-LARGE-GRF-IR -; RUN: FileCheck %s -input-file=%t_large_grf_0.prop --check-prefixes CHECK-LARGE-GRF-PROP -; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYCL-SYM -; RUN: FileCheck %s -input-file=%t_large_grf_0.sym --check-prefixes CHECK-LARGE-GRF-SYM +; RUN: FileCheck %s -input-file=%t_large_grf_1.ll --check-prefixes CHECK-LARGE-GRF-IR +; RUN: FileCheck %s -input-file=%t_large_grf_1.prop --check-prefixes CHECK-LARGE-GRF-PROP +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYCL-SYM +; RUN: FileCheck %s -input-file=%t_large_grf_1.sym --check-prefixes CHECK-LARGE-GRF-SYM ; CHECK: [Code|Properties|Symbols] -; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym -; CHECK: {{.*}}-large-grf.ll.tmp_1.ll|{{.*}}-large-grf.ll.tmp_1.prop|{{.*}}-large-grf.ll.tmp_1.sym +; CHECK: {{.*}}-large-grf.ll.tmp_0.ll|{{.*}}-large-grf.ll.tmp_0.prop|{{.*}}-large-grf.ll.tmp_0.sym +; CHECK: {{.*}}_1.ll|{{.*}}_1.prop|{{.*}}_1.sym ; CHECK-LARGE-GRF-PROP: isLargeGRF=1|1 diff --git a/llvm/test/tools/sycl-post-link/sycl-opt-level.ll b/llvm/test/tools/sycl-post-link/sycl-opt-level.ll new file mode 100644 index 0000000000000..349e6e445027d --- /dev/null +++ b/llvm/test/tools/sycl-post-link/sycl-opt-level.ll @@ -0,0 +1,46 @@ +; This test checks parsing of the attribute 'sycl-optlevel' +; by the sycl-post-link-tool: +; In addition to splitting requested by user, the kernels are also split based +; on their optimization levels. +; sycl-post-link adds 'optLevel' property to the device binary + +; RUN: sycl-post-link -split=source -symbols -S < %s -o %t.table +; RUN: FileCheck %s -input-file=%t.table +; RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes CHECK-OPT-LEVEL-PROP-0 +; RUN: FileCheck %s -input-file=%t_1.prop --check-prefixes CHECK-OPT-LEVEL-PROP-1 + +; CHECK: [Code|Properties|Symbols] +; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym +; CHECK: {{.*}}_1.ll|{{.*}}_1.prop|{{.*}}_1.sym + +; CHECK-OPT-LEVEL-PROP-0: optLevel=1|0 +; CHECK-OPT-LEVEL-PROP-1: optLevel=1|2 + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +define dso_local spir_func noundef i32 @_Z3fooii(i32 noundef %a, i32 noundef %b) local_unnamed_addr #0 { +entry: + %sub = sub nsw i32 %a, %b + ret i32 %sub +} + +define dso_local spir_func noundef i32 @_Z3booii(i32 noundef %a, i32 noundef %b) #1 { +entry: + %retval = alloca i32, align 4 + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %retval.ascast = addrspacecast i32* %retval to i32 addrspace(4)* + %a.addr.ascast = addrspacecast i32* %a.addr to i32 addrspace(4)* + %b.addr.ascast = addrspacecast i32* %b.addr to i32 addrspace(4)* + store i32 %a, i32 addrspace(4)* %a.addr.ascast, align 4 + store i32 %b, i32 addrspace(4)* %b.addr.ascast, align 4 + %0 = load i32, i32 addrspace(4)* %a.addr.ascast, align 4 + %1 = load i32, i32 addrspace(4)* %b.addr.ascast, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "sycl-module-id"="test3.cpp" "sycl-optlevel"="2" } +attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "sycl-module-id"="test2.cpp" "sycl-optlevel"="0" } + diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index c571d4c25f0c4..75546fe42b166 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -42,6 +42,7 @@ constexpr char GLOBAL_SCOPE_NAME[] = ""; constexpr char SYCL_SCOPE_NAME[] = ""; constexpr char ESIMD_SCOPE_NAME[] = ""; constexpr char ESIMD_MARKER_MD[] = "sycl_explicit_simd"; +constexpr char ATTR_OPT_LEVEL[] = "sycl-optlevel"; bool hasIndirectFunctionsOrCalls(const Module &M) { for (const auto &F : M.functions()) { @@ -674,7 +675,8 @@ void ModuleDesc::dump() const { llvm::errs() << " ESIMD:" << toString(EntryPoints.Props.HasESIMD) << ", SpecConstMet:" << (Props.SpecConstsMet ? "YES" : "NO") << ", LargeGRF:" - << (EntryPoints.Props.UsesLargeGRF ? "YES" : "NO") << "\n"; + << (EntryPoints.Props.UsesLargeGRF ? "YES" : "NO") + << ", OptLevel:" << EntryPoints.getOptLevel() << "\n"; dumpEntryPoints(entries(), EntryPoints.GroupId.c_str(), 1); llvm::errs() << "}\n"; } @@ -713,6 +715,7 @@ namespace { struct UsedOptionalFeatures { SmallVector Aspects; bool UsesLargeGRF = false; + int OptLevel = -1; SmallVector ReqdWorkGroupSize; // TODO: extend this further with reqd-sub-group-size and other properties @@ -735,6 +738,12 @@ struct UsedOptionalFeatures { if (F->hasFnAttribute(::sycl::kernel_props::ATTR_LARGE_GRF)) UsesLargeGRF = true; + if (F->hasFnAttribute(ATTR_OPT_LEVEL)) + if (F->getFnAttribute(ATTR_OPT_LEVEL) + .getValueAsString() + .getAsInteger(10, OptLevel)) + OptLevel = -1; + if (const MDNode *MDN = F->getMetadata("reqd_work_group_size")) { size_t NumOperands = MDN->getNumOperands(); assert(NumOperands >= 1 && NumOperands <= 3 && @@ -750,8 +759,9 @@ struct UsedOptionalFeatures { llvm::hash_code LargeGRFHash = llvm::hash_value(UsesLargeGRF); llvm::hash_code ReqdWorkGroupSizeHash = llvm::hash_combine_range( ReqdWorkGroupSize.begin(), ReqdWorkGroupSize.end()); - Hash = static_cast( - llvm::hash_combine(AspectsHash, LargeGRFHash, ReqdWorkGroupSizeHash)); + llvm::hash_code OptLevelHash = llvm::hash_value(OptLevel); + Hash = static_cast(llvm::hash_combine( + AspectsHash, LargeGRFHash, ReqdWorkGroupSizeHash, OptLevelHash)); } std::string generateModuleName(StringRef BaseName) const { @@ -773,6 +783,9 @@ struct UsedOptionalFeatures { if (UsesLargeGRF) Ret += "-large-grf"; + if (OptLevel != -1) + Ret += "-O" + std::to_string(OptLevel); + return Ret; } @@ -808,7 +821,8 @@ struct UsedOptionalFeatures { return false; } - return IsEmpty == Other.IsEmpty && UsesLargeGRF == Other.UsesLargeGRF; + return IsEmpty == Other.IsEmpty && UsesLargeGRF == Other.UsesLargeGRF && + OptLevel == Other.OptLevel; } unsigned hash() const { return static_cast(Hash); } @@ -869,6 +883,8 @@ getSplitterByOptionalFeatures(ModuleDesc &&MD, // Propagate LargeGRF flag to entry points group if (Features.UsesLargeGRF) MDProps.UsesLargeGRF = true; + if (Features.OptLevel != -1) + MDProps.OptLevel = Features.OptLevel; Groups.emplace_back( Features.generateModuleName(MD.getEntryPointGroup().GroupId), std::move(EntryPoints), MDProps); diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.h b/llvm/tools/sycl-post-link/ModuleSplitter.h index ffc29c163e2b6..ab5d636fcb398 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.h +++ b/llvm/tools/sycl-post-link/ModuleSplitter.h @@ -57,6 +57,8 @@ struct EntryPointGroup { SyclEsimdSplitStatus HasESIMD = SyclEsimdSplitStatus::SYCL_AND_ESIMD; // Whether any of the EPs use large GRF mode. bool UsesLargeGRF = false; + // front-end opt level for kernel compilation + int OptLevel = -1; // Scope represented by EPs in a group EntryPointsGroupScope Scope = Scope_Global; @@ -67,6 +69,8 @@ struct EntryPointGroup { : SyclEsimdSplitStatus::SYCL_AND_ESIMD; Res.UsesLargeGRF = UsesLargeGRF || Other.UsesLargeGRF; // Scope remains global + // OptLevel is expected to be the same for both merging EPGs + assert(OptLevel == Other.OptLevel && "OptLevels are not same"); return Res; } }; @@ -93,6 +97,9 @@ struct EntryPointGroup { // Tells if some entry points use large GRF mode. bool isLargeGRF() const { return Props.UsesLargeGRF; } + // Returns opt level + int getOptLevel() const { return Props.OptLevel; } + void saveNames(std::vector &Dest) const; void rebuildFromNames(const std::vector &Names, const Module &M); void rebuild(const Module &M); @@ -147,6 +154,7 @@ class ModuleDesc { bool isESIMD() const { return EntryPoints.isEsimd(); } bool isSYCL() const { return EntryPoints.isSycl(); } bool isLargeGRF() const { return EntryPoints.isLargeGRF(); } + int getOptLevel() const { return EntryPoints.getOptLevel(); } const EntryPointSet &entries() const { return EntryPoints.Functions; } const EntryPointGroup &getEntryPointGroup() const { return EntryPoints; } diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 13468c241da0a..b9fcff8a99bdc 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -454,6 +454,9 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, } if (MD.isLargeGRF()) PropSet[PropSetRegTy::SYCL_MISC_PROP].insert({"isLargeGRF", true}); + if (MD.getOptLevel() != -1) + PropSet[PropSetRegTy::SYCL_MISC_PROP].insert( + {"optLevel", MD.getOptLevel()}); { std::vector FuncNames = getKernelNamesUsingAssert(M); for (const StringRef &FName : FuncNames) diff --git a/sycl/doc/design/PropagateCompilerFlagsToRuntime.md b/sycl/doc/design/PropagateCompilerFlagsToRuntime.md new file mode 100644 index 0000000000000..3981151782140 --- /dev/null +++ b/sycl/doc/design/PropagateCompilerFlagsToRuntime.md @@ -0,0 +1,134 @@ +# Propagation of optimization levels used by front-end compiler to backend + +In order to ease the process of debugging, there is a user requirement to +compile different modules with different levels of optimization. This document +proposes a compiler flow that will enable propagation of compiler options +specified from front-end to the runtimes and eventually to the backend. +Currently, only `O0`/`O1`/`O2`/`O3` options are handled. +Please note that this document only describes support for JIT path. AOT path +support will be added later. + +## Background + +When building an application with several source and object files, it is +possible to specify the optimization parameters individually for each source +file/object file (for each invocation of the DPCPP compiler). The SYCL runtime +should pass the original optimization options (e.g. `-O0` or `-O2`) used when +building an object file to the device backend compiler. This will improve the +debugging experience by selectively disabling/enabling optimizations for each +source file, and therefore achieving better debuggability and better performance +as needed. + +The current behavior is that the optimization level option is captured at link +time and converted into its backend-specific equivalent. This option is +propagated to the backend. For example, If `-O0` option is specified during +link-time when using the OpenCL backend, the SYCL runtime will pass +`-cl-opt-disable` option to the backend device compiler for all modules +essentially disabling optimizations globally. Otherwise, if the `-O0` +option is not specified for linker, it will not pass `-cl-opt-disable` option at +all, therefore making the kernels mostly undebuggable, regardless of the +original front-end compiler options. Link-time capturing of optimization option +is the essence of the current implementation and this leads to loss of +information about the compile-time options. Proposed design aims to rectify this +behavior. + +Here is an example that demonstrates this pain point: + +``` +clang++ -c test_host.cpp -o test_host.o +clang++ -c -fsycl test_device_1.cpp -o test_device_1.o +clang++ -c -fsycl -g -O0 test_device_2.cpp -o test_device_2.o +clang++ -fsycl -g test_host.o test_device_1.o test_device_2.o -o test +``` + +In this scenario, the fat binary is 'test' and there are no compilation flags +sent across to the backend compiler. Though the user wanted to have full +debuggability with test_device_2.cpp module, some of the debuggability is lost. + +Another scenario is shown below: + +``` +clang++ -c -g -O0 -fsycl test.cpp -o test.o +clang++ -g -fsycl test.o -o test +``` + +In this scenario, the fat binary is 'test' and there are no compilation flags +sent across to the backend compiler. Though the user wanted to have full +debuggability with test.cpp module, some of the debuggability is lost. The user +was not able to set a breakpoint inside device code. + +## Requirements + +In order to support module-level debuggability, the user will compile different +module files with different levels of optimization. These optimization levels +must be preserved and made use of during the backend compilation. The following +is a key requirement for this feature. +- If the user specifies `-Ox` as a front-end compile option for a particular +module, this option must be converted to appropriate backend option and then +propagated fo use during backend JIT compilation. + +The following table specifies the appropriate backend options for level-zero and +OpenCL backends. + +| Front-end option | L0 backend option | OpenCL backend option | +| ---------------- | ----------------- | --------------------- | +| -O0 | -ze-opt-disable | -cl-opt-disable | +| -O1 | -ze-opt-level=1 | /* no option */ | +| -O2 | -ze-opt-level=1 | /* no option */ | +| -O3 | -ze-opt-level=2 | /* no option */ | + + +## Proposed design + +This chapter discusses changes required in various stages of the compilation +pipeline. + + +### Changes to the clang front-end + +For each function in SYCL device code, we add a new function attribute that is +named `sycl-optlevel`. Value of this attribute is set to the optimization level +which was used to compile the overlying module. + +### Changes to the sycl-post-link tool + +During device code split performed in the `sycl-post-link` tool, optimization +level attribute `sycl-optlevel` is treated as an optional feature, +i.e. device code split algorithm ensures that no kernels with different values +of sycl-optlevel are bundled into the same device image. See also optional +kernel features [design document](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OptionalDeviceFeatures.md#changes-to-the-post-link-tool). +The `sycl-post-link` tool also adds a new property into the +`SYCL/misc properties` property set for each device code module. This entry will +be used to store the optimization level. Name of this property is `optLevel` and +the value is stored as a 32-bit integer. If there is a module where the user did +not specify an optimization module, there is no new entry in the property set. + +### Changes to the SYCL runtime + +In the SYCL runtime, the device image properties can be accessed to extract the +associated optimization level. Once the optimization level is available, it is +converted to its equivalent frontend option string +(`-O0`, `-O1`, `-O2`, or `-O3`). This frontend option string is passed into a +query that is made to the plugin to identify the correct backend option. This +backend option is added to the existing list of compiler options and is sent to +the backend. + +### Changes to the plugin + +A new plugin API has been added. It takes the frontend option string as input in +string format and returns `pi_result`. A string format is used for sending the +frontend option so that this API can be used for querying other frontend +options as well. The signature of this API is as follows: + +```C++ +pi_result piPluginGetBackendOption(pi_platform platform, + const char *frontend_option, + const char **backend_option); +``` + +In the level-zero and OpenCL plugins, the table provided in the 'Requirements' +section is used as a guide to identify the appropriate backend option. +The option is returned in `backend_option`. For other plugins (HIP, cuda, and +ESIMD emulator), empty string is returned. This API returns `PI_SUCCESS` for +valid inputs (frontend_option != ""). For invalid inputs, it returns +`PI_ERROR_INVALID_VALUE`. diff --git a/sycl/doc/index.rst b/sycl/doc/index.rst index 3ff96f2139716..57188fa144e1b 100644 --- a/sycl/doc/index.rst +++ b/sycl/doc/index.rst @@ -49,6 +49,7 @@ Design Documents for the oneAPI DPC++ Compiler design/KernelFusionJIT design/NonRelocatableDeviceCode design/DeviceAspectTraitDesign + design/PropagateCompilerFlagsToRuntime New OpenCL Extensions New SPIR-V Extensions diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index 02c2f38cf3b38..237e93da0c771 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -155,6 +155,8 @@ _PI_API(piGetDeviceAndHostTimer) _PI_API(piextEnqueueDeviceGlobalVariableWrite) _PI_API(piextEnqueueDeviceGlobalVariableRead) +_PI_API(piPluginGetBackendOption) + // Queue create and get APIs for immediate commandlists _PI_API(piextQueueCreate2) _PI_API(piextQueueGetNativeHandle2) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 1a3a42cd7a6b1..bcc8ec8fe9b73 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -2063,6 +2063,17 @@ __SYCL_EXPORT pi_result piTearDown(void *PluginParameter); /// timestamp __SYCL_EXPORT pi_result piPluginGetLastError(char **message); +/// API to get backend specific option. +/// \param frontend_option is a string that contains frontend option. +/// \param backend_option is used to return the backend option corresponding to +/// frontend option. +/// +/// \return PI_SUCCESS is returned for valid frontend_option. If a valid backend +/// option is not available, an empty string is returned. +__SYCL_EXPORT pi_result piPluginGetBackendOption(pi_platform platform, + const char *frontend_option, + const char **backend_option); + /// Queries device for it's global timestamp in nanoseconds, and updates /// HostTime with the value of the host timer at the closest possible point in /// time to that at which DeviceTime was returned. diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 4c40d0f78c4ab..c55bfcf373e52 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -25,6 +25,7 @@ #include #include #include +#include // Forward declarations void enableCUDATracing(); @@ -80,6 +81,25 @@ pi_result cuda_piPluginGetLastError(char **message) { return ErrorMessageCode; } +// Returns plugin specific backend option. +// Current support is only for optimization options. +// Return empty string for cuda. +// TODO: Determine correct string to be passed. +pi_result cuda_piPluginGetBackendOption(pi_platform, + const char *frontend_option, + const char **backend_option) { + using namespace std::literals; + if (frontend_option == nullptr) + return PI_ERROR_INVALID_VALUE; + if (frontend_option == "-O0"sv || frontend_option == "-O1"sv || + frontend_option == "-O2"sv || frontend_option == "-O3"sv || + frontend_option == ""sv) { + *backend_option = ""; + return PI_SUCCESS; + } + return PI_ERROR_INVALID_VALUE; +} + // Iterates over the event wait list, returns correct pi_result error codes. // Invokes the callback for the latest event of each queue in the wait list. // The callback must take a single pi_event argument and return a pi_result. @@ -5824,6 +5844,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piPluginGetLastError, cuda_piPluginGetLastError) _PI_CL(piTearDown, cuda_piTearDown) _PI_CL(piGetDeviceAndHostTimer, cuda_piGetDeviceAndHostTimer) + _PI_CL(piPluginGetBackendOption, cuda_piPluginGetBackendOption) #undef _PI_CL diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index b6796fe7e689f..b9b0b2bcd3da0 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -167,6 +168,24 @@ pi_result piPluginGetLastError(char **message) { return ErrorMessageCode; } +// Returns plugin specific backend option. +// Current support is only for optimization options. +// Return empty string for esimd emulator. +// TODO: Determine correct string to be passed. +pi_result piPluginGetBackendOption(pi_platform, const char *frontend_option, + const char **backend_option) { + using namespace std::literals; + if (frontend_option == nullptr) + return PI_ERROR_INVALID_VALUE; + if (frontend_option == "-O0"sv || frontend_option == "-O1"sv || + frontend_option == "-O2"sv || frontend_option == "-O3"sv || + frontend_option == ""sv) { + *backend_option = ""; + return PI_SUCCESS; + } + return PI_ERROR_INVALID_VALUE; +} + using IDBuilder = sycl::detail::Builder; template diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 625898f04ed02..0abe0424db884 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -25,6 +25,7 @@ #include #include #include +#include namespace { // Hipify doesn't support cuArrayGetDescriptor, on AMD the hipArray can just be @@ -132,6 +133,24 @@ pi_result hip_piPluginGetLastError(char **message) { return ErrorMessageCode; } +// Returns plugin specific backend option. +// Current support is only for optimization options. +// Return empty string for hip. +// TODO: Determine correct string to be passed. +pi_result hip_piPluginGetBackendOption(pi_platform, const char *frontend_option, + const char **backend_option) { + using namespace std::literals; + if (frontend_option == nullptr) + return PI_ERROR_INVALID_VALUE; + if (frontend_option == "-O0"sv || frontend_option == "-O1"sv || + frontend_option == "-O2"sv || frontend_option == "-O3"sv || + frontend_option == ""sv) { + *backend_option = ""; + return PI_SUCCESS; + } + return PI_ERROR_INVALID_VALUE; +} + // Iterates over the event wait list, returns correct pi_result error codes. // Invokes the callback for the latest event of each queue in the wait list. // The callback must take a single pi_event argument and return a pi_result. @@ -5643,6 +5662,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piPluginGetLastError, hip_piPluginGetLastError) _PI_CL(piTearDown, hip_piTearDown) _PI_CL(piGetDeviceAndHostTimer, hip_piGetDeviceAndHostTimer) + _PI_CL(piPluginGetBackendOption, hip_piPluginGetBackendOption) #undef _PI_CL diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 4967a46489f26..2ecfac3a23b2c 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -2175,6 +2176,36 @@ pi_result piPluginGetLastError(char **message) { return pi2ur::piPluginGetLastError(message); } +// Returns plugin specific backend option. +// Current support is only for optimization options. +// Return '-ze-opt-disable' for frontend_option = -O0. +// Return '-ze-opt-level=1' for frontend_option = -O1 or -O2. +// Return '-ze-opt-level=2' for frontend_option = -O3. +pi_result piPluginGetBackendOption(pi_platform, const char *frontend_option, + const char **backend_option) { + using namespace std::literals; + if (frontend_option == nullptr) { + return PI_ERROR_INVALID_VALUE; + } + if (frontend_option == ""sv) { + *backend_option = ""; + return PI_SUCCESS; + } + if (frontend_option == "-O0"sv) { + *backend_option = "-ze-opt-disable"; + return PI_SUCCESS; + } + if (frontend_option == "-O1"sv || frontend_option == "-O2"sv) { + *backend_option = "-ze-opt-level=1"; + return PI_SUCCESS; + } + if (frontend_option == "-O3"sv) { + *backend_option = "-ze-opt-level=2"; + return PI_SUCCESS; + } + return PI_ERROR_INVALID_VALUE; +} + pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, pi_uint32 NumEntries, pi_device *Devices, pi_uint32 *NumDevices) { diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 414c4b3fc281b..681c716dfe01b 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #define CHECK_ERR_SET_NULL_RET(err, ptr, reterr) \ @@ -96,6 +97,30 @@ pi_result piPluginGetLastError(char **message) { return ErrorMessageCode; } +// Returns plugin specific backend option. +// Current support is only for optimization options. +// Return '-cl-opt-disable' for frontend_option = -O0 and '' for others. +pi_result piPluginGetBackendOption(pi_platform, const char *frontend_option, + const char **backend_option) { + using namespace std::literals; + if (frontend_option == nullptr) + return PI_ERROR_INVALID_VALUE; + if (frontend_option == ""sv) { + *backend_option = ""; + return PI_SUCCESS; + } + if (!strcmp(frontend_option, "-O0")) { + *backend_option = "-cl-opt-disable"; + return PI_SUCCESS; + } + if (frontend_option == "-O1"sv || frontend_option == "-O2"sv || + frontend_option == "-O3"sv) { + *backend_option = ""; + return PI_SUCCESS; + } + return PI_ERROR_INVALID_VALUE; +} + static cl_int getPlatformVersion(cl_platform_id plat, OCLV::OpenCLVersion &version) { cl_int ret_err = CL_INVALID_VALUE; @@ -2329,6 +2354,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piPluginGetLastError, piPluginGetLastError) _PI_CL(piTearDown, piTearDown) _PI_CL(piGetDeviceAndHostTimer, piGetDeviceAndHostTimer) + _PI_CL(piPluginGetBackendOption, piPluginGetBackendOption) #undef _PI_CL diff --git a/sycl/source/detail/plugin.hpp b/sycl/source/detail/plugin.hpp index 6e3fa916caa5f..72879bc0f0482 100644 --- a/sycl/source/detail/plugin.hpp +++ b/sycl/source/detail/plugin.hpp @@ -235,6 +235,14 @@ class plugin { void *getLibraryHandle() { return MLibraryHandle; } int unload() { return RT::unloadPlugin(MLibraryHandle); } + // Get backend option. + void getBackendOption(pi_platform platform, const char *frontend_option, + const char **backend_option) const { + RT::PiResult Err = call_nocheck( + platform, frontend_option, backend_option); + checkPiResult(Err); + } + // return the index of PiPlatforms. // If not found, add it and return its index. // The function is expected to be called in a thread safe manner. diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 7fdf942cd2d16..a84bc290430ea 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -359,6 +360,20 @@ static bool getUint32PropAsBool(const RTDeviceBinaryImage &Img, return Prop && (DeviceBinaryProperty(Prop).asUint32() != 0); } +static std::string getUint32PropAsOptStr(const RTDeviceBinaryImage &Img, + const char *PropName) { + pi_device_binary_property Prop = Img.getProperty(PropName); + std::stringstream ss; + if (!Prop) + return ""; + int optLevel = DeviceBinaryProperty(Prop).asUint32(); + if (optLevel < 0 || optLevel > 3) + return ""; + ss << "-O" << optLevel; + std::string temp = ss.str(); + return temp; +} + static void appendCompileOptionsFromImage(std::string &CompileOpts, const RTDeviceBinaryImage &Img, const std::vector &Devs, @@ -400,6 +415,32 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts, // metadata. CompileOpts += isEsimdImage ? "-doubleGRF" : "-ze-opt-large-register-file"; } + // Add optimization flags. + auto str = getUint32PropAsOptStr(Img, "optLevel"); + const char *optLevelStr = str.c_str(); + // TODO: Passing these options to vector compiler causes build failure in + // backend. Will pass the flags once backend compilation issue is resolved. + // Update only if compile options are not overwritten by environment + // variable. + if (!isEsimdImage && !CompileOptsEnv && optLevelStr != nullptr && + optLevelStr[0] != '\0') { + // Making sure all devices have the same platform. + assert(!Devs.empty() && + std::all_of(Devs.begin(), Devs.end(), [&](const device &Dev) { + return Dev.get_platform() == Devs[0].get_platform(); + })); + const char *backend_option = nullptr; + // Empty string is returned in backend_option when no appropriate backend + // option is available for a given frontend option. + Plugin.getBackendOption( + detail::getSyclObjImpl(Devs[0].get_platform())->getHandleRef(), + optLevelStr, &backend_option); + if (backend_option && backend_option[0] != '\0') { + if (!CompileOpts.empty()) + CompileOpts += " "; + CompileOpts += std::string(backend_option); + } + } if ((Plugin.getBackend() == backend::ext_oneapi_level_zero || Plugin.getBackend() == backend::opencl) && std::all_of(Devs.begin(), Devs.end(), diff --git a/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-level-zero.cpp b/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-level-zero.cpp new file mode 100644 index 0000000000000..c1b1013039b09 --- /dev/null +++ b/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-level-zero.cpp @@ -0,0 +1,36 @@ +// REQUIRES: level_zero + +// RUN: %clangxx -O0 -fsycl %s -o %t0.out +// RUN: env ONEAPI_DEVICE_SELECTOR=level_zero:gpu SYCL_PI_TRACE=-1 %t0.out 2>&1 %GPU_CHECK_PLACEHOLDER --check-prefixes=CHECK0 +// RUN: %clangxx -O1 -fsycl %s -o %t1.out +// RUN: env ONEAPI_DEVICE_SELECTOR=level_zero:gpu SYCL_PI_TRACE=-1 %t1.out 2>&1 %GPU_CHECK_PLACEHOLDER --check-prefixes=CHECK1 +// RUN: %clangxx -O2 -fsycl %s -o %t2.out +// RUN: env ONEAPI_DEVICE_SELECTOR=level_zero:gpu SYCL_PI_TRACE=-1 %t2.out 2>&1 %GPU_CHECK_PLACEHOLDER --check-prefixes=CHECK2 +// RUN: %clangxx -O3 -fsycl %s -o %t3.out +// RUN: env ONEAPI_DEVICE_SELECTOR=level_zero:gpu SYCL_PI_TRACE=-1 %t3.out 2>&1 %GPU_CHECK_PLACEHOLDER --check-prefixes=CHECK3 + +// This test verifies the propagation of front-end compiler optimization +// option to the backend. +// API call in device code: +// Following is expected addition of options for level_zero backend: +// Front-end option | L0 backend option +// -O0 | -ze-opt-disable +// -O1 | -ze-opt-level=1 +// -O2 | -ze-opt-level=1 +// -O3 | -ze-opt-level=2 + +#include + +int main() { + sycl::queue q; + q.submit([&](sycl::handler &h) { h.single_task([=]() {}); }); + std::cout << "sycl-optlevel test passed\n"; + return 0; +} + +// CHECK-LABEL: ---> piProgramBuild( +// CHECK0: -ze-opt-disable +// CHECK1: -ze-opt-level=1 +// CHECK2: -ze-opt-level=1 +// CHECK3: -ze-opt-level=2 +// CHECK: ) ---> pi_result : PI_SUCCESS diff --git a/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-opencl.cpp b/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-opencl.cpp new file mode 100644 index 0000000000000..52a17f7a2cb13 --- /dev/null +++ b/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-opencl.cpp @@ -0,0 +1,45 @@ +// REQUIRES: opencl + +// RUN: %clangxx -O0 -fsycl %s -o %t0.out +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t0.out 2>&1 %CPU_CHECK_PLACEHOLDER --check-prefixes=CHECKOCL0 +// RUN: %GPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t0.out 2>&1 %GPU_CHECK_PLACEHOLDER --check-prefixes=CHECKOCL0 +// RUN: %clangxx -O1 -fsycl %s -o %t1.out +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t1.out 2>&1 %CPU_CHECK_PLACEHOLDER --check-prefixes=CHECKOCL1 +// RUN: %GPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t1.out 2>&1 %GPU_CHECK_PLACEHOLDER --check-prefixes=CHECKOCL1 +// RUN: %clangxx -O2 -fsycl %s -o %t2.out +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t2.out 2>&1 %CPU_CHECK_PLACEHOLDER --check-prefixes=CHECKOCL2 +// RUN: %GPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t2.out 2>&1 %GPU_CHECK_PLACEHOLDER --check-prefixes=CHECKOCL2 +// RUN: %clangxx -O3 -fsycl %s -o %t3.out +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t3.out 2>&1 %CPU_CHECK_PLACEHOLDER --check-prefixes=CHECKOCL3 +// RUN: %GPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t3.out 2>&1 %GPU_CHECK_PLACEHOLDER --check-prefixes=CHECKOCL3 + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -O0 %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// This test verifies the propagation of front-end compiler optimization +// option to the backend. +// API call in device code: +// Following is expected addition of options for opencl backend: +// Front-end option | OpenCL backend option +// -O0 | -cl-opt-disable +// -O1 | /* no option */ +// -O2 | /* no option */ +// -O3 | /* no option */ + +#include + +int main() { + sycl::queue q; + q.submit([&](sycl::handler &h) { h.single_task([=]() {}); }); + std::cout << "sycl-optlevel test passed\n"; + return 0; +} + +// CHECK-LABEL: ---> piProgramBuild( +// CHECKOCL0: -cl-opt-disable +// CHECKOCL1-NOT: -cl-opt-disable +// CHECKOCL2-NOT: -cl-opt-disable +// CHECKOCL3-NOT: -cl-opt-disable +// CHECK: ) ---> pi_result : PI_SUCCESS diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index c47d8d9f428ae..720b747726ddf 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -59,6 +59,7 @@ piMemRelease piMemRetain piPlatformGetInfo piPlatformsGet +piPluginGetBackendOption piPluginGetLastError piPluginInit piProgramBuild diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index f62f5e6ca59ac..e5dcbab7a4173 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -22,6 +22,7 @@ piMemBufferCreate piMemBufferPartition piMemImageCreate piPlatformsGet +piPluginGetBackendOption piPluginGetLastError piPluginInit piProgramCreate diff --git a/sycl/test/check_device_code/group_barrier.cpp b/sycl/test/check_device_code/group_barrier.cpp index 1b94e6300619b..bf9069eea8558 100644 --- a/sycl/test/check_device_code/group_barrier.cpp +++ b/sycl/test/check_device_code/group_barrier.cpp @@ -38,29 +38,29 @@ int main() { }); return 0; } -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 4, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 3, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 1, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 0, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 3, i32 3, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 3, i32 4, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 3, i32 3, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 3, i32 2, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 3, i32 1, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 3, i32 0, i32 912) #2 +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 4, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 3, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 1, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 0, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 3, i32 3, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 3, i32 4, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 3, i32 3, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 3, i32 2, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 3, i32 1, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 3, i32 0, i32 912) -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 4, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 3, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 1, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 0, i32 912) #2 +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 4, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 3, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 1, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 0, i32 912) -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 4, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 3, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 1, i32 912) #2 -// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 0, i32 912) #2 +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 4, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 3, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 1, i32 912) +// CHECK: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 0, i32 912) diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index 7ebab2627b506..7928411ccaa79 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1143,6 +1143,13 @@ inline pi_result mock_piPluginGetLastError(char **message) { return PI_SUCCESS; } +inline pi_result mock_piPluginGetBackendOption(pi_platform platform, + const char *frontend_option, + const char **backend_option) { + *backend_option = ""; + return PI_SUCCESS; +} + // Returns the wall-clock timestamp of host for deviceTime and hostTime inline pi_result mock_piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime,