-
Notifications
You must be signed in to change notification settings - Fork 740
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SYCL] Add support for -foffload-fp32-prec-div/sqrt options. #15836
base: sycl
Are you sure you want to change the base?
Changes from 7 commits
f8caf83
00ffb5a
795dd38
78a9005
50e71c0
54f2409
bdf78d7
8cd6d8b
755d630
27011c8
ff2b3d9
07752e2
aa909d2
fcc4786
24711fd
56314b7
e643027
b25e5ac
ce00296
bc01759
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -372,6 +372,8 @@ BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 2, FEM_UnsetOnCommandLine, " | |||||||||
ENUM_LANGOPT(Float16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for Float16 arithmetic") | ||||||||||
ENUM_LANGOPT(BFloat16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for BFloat16 arithmetic") | ||||||||||
BENIGN_ENUM_LANGOPT(FPAccuracy, FPAccuracyKind, 3, FPA_Default, "Accuracy for floating point operations and library functions") | ||||||||||
LANGOPT(OffloadFp32PrecDiv, 1, 1, "Return correctly rounded results of fdiv") | ||||||||||
LANGOPT(OffloadFp32PrecSqrt, 1, 1, "Return correctly rounded results of sqrt") | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||
LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment") | ||||||||||
LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility") | ||||||||||
LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting") | ||||||||||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -1157,6 +1157,22 @@ defm cx_fortran_rules: BoolOptionWithoutMarshalling<"f", "cx-fortran-rules", | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
NegFlag<SetFalse, [], [ClangOption, CC1Option], "Range reduction is disabled " | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
"for complex arithmetic operations">>; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
defm offload_fp32_prec_div: BoolOption<"f", "offload-fp32-prec-div", | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
LangOpts<"OffloadFp32PrecDiv">, DefaultTrue, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
PosFlag<SetTrue, [], [ClangOption, CC1Option], "fdiv operations in offload device " | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
"code are required to return correctly rounded results.">, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
NegFlag<SetFalse, [], [ClangOption, CC1Option], "fdiv operations in offload device " | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
"code are not required to return correctly rounded results.">>, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Group<f_Group>; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
defm offload_fp32_prec_sqrt: BoolOption<"f", "offload-fp32-prec-sqrt", | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
LangOpts<"OffloadFp32PrecSqrt">, DefaultTrue, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
PosFlag<SetTrue, [], [ClangOption, CC1Option], "sqrt operations in offload device " | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
"code are required to return correctly rounded results.">, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
NegFlag<SetFalse, [], [ClangOption, CC1Option], "sqrt operations in offload device " | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
"code are not required to return correctly rounded results.">>, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Group<f_Group>; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// OpenCL-only Options | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group<opencl_Group>, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Visibility<[ClangOption, CC1Option]>, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -24099,6 +24099,7 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD( | |||||||||
.Case("sincos", llvm::Intrinsic::fpbuiltin_sincos) | ||||||||||
.Case("exp10", llvm::Intrinsic::fpbuiltin_exp10) | ||||||||||
.Case("rsqrt", llvm::Intrinsic::fpbuiltin_rsqrt) | ||||||||||
.Case("sqrt", llvm::Intrinsic::fpbuiltin_sqrt) | ||||||||||
.Default(0); | ||||||||||
} else { | ||||||||||
// The function has a clang builtin. Create an attribute for it | ||||||||||
|
@@ -24200,7 +24201,8 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD( | |||||||||
// a TU fp-accuracy requested. | ||||||||||
const LangOptions &LangOpts = getLangOpts(); | ||||||||||
if (hasFuncNameRequestedFPAccuracy(Name, LangOpts) || | ||||||||||
!LangOpts.FPAccuracyVal.empty()) { | ||||||||||
!LangOpts.FPAccuracyVal.empty() || !LangOpts.OffloadFp32PrecDiv || | ||||||||||
!LangOpts.OffloadFp32PrecSqrt) { | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||
llvm::Function *Func = | ||||||||||
CGM.getIntrinsic(FPAccuracyIntrinsicID, IRArgs[0]->getType()); | ||||||||||
return CreateBuiltinCallWithAttr(*this, Name, Func, ArrayRef(IRArgs), | ||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1889,15 +1889,23 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes( | |
Int32Ty, convertFPAccuracyToAspect(FuncMapIt->second))); | ||
} | ||
} | ||
if (FuncAttrs.attrs().size() == 0) | ||
if (FuncAttrs.attrs().size() == 0) { | ||
StringRef FPAccuracyVal; | ||
if (!getLangOpts().FPAccuracyVal.empty()) { | ||
StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin( | ||
FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin( | ||
ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal)); | ||
assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected"); | ||
FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal); | ||
MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( | ||
Int32Ty, convertFPAccuracyToAspect(getLangOpts().FPAccuracyVal))); | ||
} | ||
if (Name == "sqrt" && !getLangOpts().OffloadFp32PrecSqrt) | ||
FPAccuracyVal = "3.0"; | ||
if (Name == "fdiv" && !getLangOpts().OffloadFp32PrecDiv) | ||
FPAccuracyVal = "2.5"; | ||
if (!FPAccuracyVal.empty()) | ||
FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How is the combination supposed to work? If the condition in 1894 was true, would two fpbuiltin-max-error attributes get added? Once in 1898 and again in 1907? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the condition in 1894 is satisfied, then the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, maybe I wasn't clear before. Let me type out what I am asking:
Couldn't you get into the Is this combination supposed to work? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh! good catch. I think this will fix it. |
||
} | ||
} | ||
|
||
/// Add denormal-fp-math and denormal-fp-math-f32 as appropriate for the | ||
|
@@ -5790,10 +5798,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, | |
// Emit the actual call/invoke instruction. | ||
llvm::CallBase *CI; | ||
if (!InvokeDest) { | ||
if (!getLangOpts().FPAccuracyFuncMap.empty() || | ||
!getLangOpts().FPAccuracyVal.empty()) { | ||
const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl); | ||
if (FD && FD->getNameInfo().getName().isIdentifier()) { | ||
const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl); | ||
if (FD && FD->getNameInfo().getName().isIdentifier()) { | ||
StringRef FuncName = FD->getName(); | ||
const bool IsFloat32Type = FD->getReturnType()->isFloat32Type(); | ||
if (!getLangOpts().FPAccuracyFuncMap.empty() || | ||
!getLangOpts().FPAccuracyVal.empty() || | ||
(FuncName == "sqrt" && !getLangOpts().OffloadFp32PrecSqrt && | ||
IsFloat32Type) || | ||
(FuncName == "fdiv" && !getLangOpts().OffloadFp32PrecDiv && | ||
IsFloat32Type)) { | ||
CI = MaybeEmitFPBuiltinofFD(IRFuncTy, IRCallArgs, CalleePtr, | ||
FD->getName(), FD->getBuiltinID()); | ||
if (CI) | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -1721,6 +1721,39 @@ static void CollectARMPACBTIOptions(const ToolChain &TC, const ArgList &Args, | |||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
static void EmitAccuracyDiag(const Driver &D, const JobAction &JA, | ||||||||||||||||||||||||||||
StringRef AccuracValStr, StringRef TargetPrecStr) { | ||||||||||||||||||||||||||||
if (JA.isDeviceOffloading(Action::OFK_SYCL)) { | ||||||||||||||||||||||||||||
D.Diag(clang::diag::warn_acuracy_conflicts_with_explicit_target_prec_option) | ||||||||||||||||||||||||||||
<< AccuracValStr << TargetPrecStr; | ||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
void Clang::AddSPIRTargetArgs(const ArgList &Args, ArgStringList &CmdArgs, | ||||||||||||||||||||||||||||
const JobAction &JA, const Driver &D) const { | ||||||||||||||||||||||||||||
if (JA.isDeviceOffloading(Action::OFK_SYCL)) { | ||||||||||||||||||||||||||||
if (Arg *A = Args.getLastArg(options::OPT_ffp_model_EQ)) { | ||||||||||||||||||||||||||||
if (!strcmp(A->getValue(), "fast")) { | ||||||||||||||||||||||||||||
CmdArgs.push_back("-fno-offload-fp32-prec-div"); | ||||||||||||||||||||||||||||
CmdArgs.push_back("-fno-offload-fp32-prec-sqrt"); | ||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we allow users to override with
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure. I would think that users could choose to compile with: I think that's just following the last command wins rule. In which case we need a compilated process here to find the order in which the options interact with one another. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm... If that's the case we may want to integrate the logic into where all of the other FP model options are being manipulated in the larger for loop here: llvm/clang/lib/Driver/ToolChains/Clang.cpp Line 2994 in bdf78d7
-cc1 option under the IsDeviceOffloading condition.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay and that would work for OpenMP too! |
||||||||||||||||||||||||||||
} else { | ||||||||||||||||||||||||||||
if (Arg *A = Args.getLastArg(options::OPT_ffp_accuracy_EQ)) { | ||||||||||||||||||||||||||||
if (Args.getLastArg(options::OPT_fno_offload_fp32_prec_div)) | ||||||||||||||||||||||||||||
EmitAccuracyDiag(D, JA, A->getValue(), "-fno-offload-fp32-prec-div"); | ||||||||||||||||||||||||||||
if (Args.getLastArg(options::OPT_fno_offload_fp32_prec_sqrt)) | ||||||||||||||||||||||||||||
EmitAccuracyDiag(D, JA, A->getValue(), "-fno-offload-fp32-prec-sqrt"); | ||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||
if (!Args.hasFlag(options::OPT_foffload_fp32_prec_div, | ||||||||||||||||||||||||||||
options::OPT_fno_offload_fp32_prec_div, true)) | ||||||||||||||||||||||||||||
CmdArgs.push_back("-fno-offload-fp32-prec-div"); | ||||||||||||||||||||||||||||
if (!Args.hasFlag(options::OPT_foffload_fp32_prec_sqrt, | ||||||||||||||||||||||||||||
options::OPT_fno_offload_fp32_prec_sqrt, true)) | ||||||||||||||||||||||||||||
CmdArgs.push_back("-fno-offload-fp32-prec-sqrt"); | ||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args, | ||||||||||||||||||||||||||||
ArgStringList &CmdArgs, bool KernelOrKext) const { | ||||||||||||||||||||||||||||
RenderARMABI(getToolChain().getDriver(), Triple, Args, CmdArgs); | ||||||||||||||||||||||||||||
|
@@ -1771,8 +1804,10 @@ void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args, | |||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple, | ||||||||||||||||||||||||||||
const ArgList &Args, bool KernelOrKext, | ||||||||||||||||||||||||||||
ArgStringList &CmdArgs) const { | ||||||||||||||||||||||||||||
ArgStringList &CmdArgs, | ||||||||||||||||||||||||||||
const JobAction &JA) const { | ||||||||||||||||||||||||||||
const ToolChain &TC = getToolChain(); | ||||||||||||||||||||||||||||
const Driver &D = TC.getDriver(); | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
// Add the target features | ||||||||||||||||||||||||||||
getTargetFeatures(TC.getDriver(), EffectiveTriple, Args, CmdArgs, false); | ||||||||||||||||||||||||||||
|
@@ -1782,6 +1817,12 @@ void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple, | |||||||||||||||||||||||||||
default: | ||||||||||||||||||||||||||||
break; | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Inadvertent line removal? |
||||||||||||||||||||||||||||
case llvm::Triple::spir: | ||||||||||||||||||||||||||||
case llvm::Triple::spir64: | ||||||||||||||||||||||||||||
mdtoguchi marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||||
case llvm::Triple::spirv32: | ||||||||||||||||||||||||||||
case llvm::Triple::spirv64: | ||||||||||||||||||||||||||||
AddSPIRTargetArgs(Args, CmdArgs, JA, D); | ||||||||||||||||||||||||||||
break; | ||||||||||||||||||||||||||||
case llvm::Triple::arm: | ||||||||||||||||||||||||||||
case llvm::Triple::armeb: | ||||||||||||||||||||||||||||
case llvm::Triple::thumb: | ||||||||||||||||||||||||||||
|
@@ -6806,7 +6847,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, | |||||||||||||||||||||||||||
CmdArgs.push_back(Args.MakeArgString(CPU)); | ||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
RenderTargetOptions(Triple, Args, KernelOrKext, CmdArgs); | ||||||||||||||||||||||||||||
RenderTargetOptions(Triple, Args, KernelOrKext, CmdArgs, JA); | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
// Add clang-cl arguments. | ||||||||||||||||||||||||||||
types::ID InputType = Input.getType(); | ||||||||||||||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,14 +47,18 @@ class LLVM_LIBRARY_VISIBILITY Clang : public Tool { | |
|
||
void RenderTargetOptions(const llvm::Triple &EffectiveTriple, | ||
const llvm::opt::ArgList &Args, bool KernelOrKext, | ||
llvm::opt::ArgStringList &CmdArgs) const; | ||
llvm::opt::ArgStringList &CmdArgs, | ||
const JobAction &JA) const; | ||
|
||
void AddAArch64TargetArgs(const llvm::opt::ArgList &Args, | ||
llvm::opt::ArgStringList &CmdArgs) const; | ||
void AddARMTargetArgs(const llvm::Triple &Triple, | ||
const llvm::opt::ArgList &Args, | ||
llvm::opt::ArgStringList &CmdArgs, | ||
bool KernelOrKext) const; | ||
void AddSPIRTargetArgs(const llvm::opt::ArgList &Args, | ||
llvm::opt::ArgStringList &CmdArgs, const JobAction &JA, | ||
const Driver &D) const; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changes not needed anymore? |
||
void AddARM64TargetArgs(const llvm::opt::ArgList &Args, | ||
llvm::opt::ArgStringList &CmdArgs) const; | ||
void AddLoongArchTargetArgs(const llvm::opt::ArgList &Args, | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,156 @@ | ||||||
// DEFINE: %{common_opts_spirv32} = -internal-isystem %S/Inputs \ | ||||||
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown | ||||||
|
||||||
// DEFINE: %{common_opts_spirv64} = -internal-isystem %S/Inputs \ | ||||||
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
// DEFINE: %{common_opts_spir} = -internal-isystem %S/Inputs \ | ||||||
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
// DEFINE: %{common_opts_spir64} = -internal-isystem %S/Inputs \ | ||||||
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spirv32} %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix PREC-SQRT %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spirv32} -foffload-fp32-prec-sqrt %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix PREC-SQRT %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spirv32} -fno-offload-fp32-prec-sqrt %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spirv32} -foffload-fp32-prec-div %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix PREC-DIV %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spirv32} -fno-offload-fp32-prec-div %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix ROUNDED-DIV %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spirv32} -ffast-math \ | ||||||
// RUN:-fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix ROUNDED-SQRT-FAST %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spirv32} -ffast-math \ | ||||||
// RUN: -fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix ROUNDED-DIV-FAST %s | ||||||
|
||||||
// | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spirv64} %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix PREC-SQRT %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spirv64} -foffload-fp32-prec-sqrt %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix PREC-SQRT %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spirv64} -fno-offload-fp32-prec-sqrt %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spirv64} -foffload-fp32-prec-div %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix PREC-DIV %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spirv64} -fno-offload-fp32-prec-div %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix ROUNDED-DIV %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spirv64} -ffast-math -fno-offload-fp32-prec-div \ | ||||||
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix ROUNDED-SQRT-FAST %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spirv64} -ffast-math -fno-offload-fp32-prec-div \ | ||||||
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix ROUNDED-DIV-FAST %s | ||||||
|
||||||
// | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spir} %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix PREC-SQRT %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spir} -foffload-fp32-prec-sqrt %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix PREC-SQRT %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spir} -fno-offload-fp32-prec-sqrt %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spir} -foffload-fp32-prec-div %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix PREC-DIV %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spir} -fno-offload-fp32-prec-div %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix ROUNDED-DIV %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spir} -ffast-math -fno-offload-fp32-prec-div \ | ||||||
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix ROUNDED-SQRT-FAST %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spir} -ffast-math -fno-offload-fp32-prec-div \ | ||||||
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix ROUNDED-DIV-FAST %s | ||||||
|
||||||
// | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spir64} %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix PREC-SQRT %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spir64} -foffload-fp32-prec-sqrt %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix PREC-SQRT %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spir64} -fno-offload-fp32-prec-sqrt %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spir64} -foffload-fp32-prec-div %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix PREC-DIV %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spir64} -fno-offload-fp32-prec-div %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix ROUNDED-DIV %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spir64} -ffast-math -fno-offload-fp32-prec-div \ | ||||||
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix ROUNDED-SQRT-FAST %s | ||||||
|
||||||
// RUN: %clang_cc1 %{common_opts_spir64} -ffast-math -fno-offload-fp32-prec-div \ | ||||||
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \ | ||||||
// RUN: | FileCheck --check-prefix ROUNDED-DIV-FAST %s | ||||||
|
||||||
#include "sycl.hpp" | ||||||
|
||||||
extern "C" SYCL_EXTERNAL float sqrt(float); | ||||||
extern "C" SYCL_EXTERNAL float fdiv(float, float); | ||||||
|
||||||
using namespace sycl; | ||||||
|
||||||
int main() { | ||||||
const unsigned array_size = 4; | ||||||
range<1> numOfItems{array_size}; | ||||||
float Value1 = .5f; | ||||||
float Value2 = .9f; | ||||||
queue deviceQueue; | ||||||
|
||||||
deviceQueue.submit([&](handler& cgh) { | ||||||
cgh.parallel_for<class KernelSqrt>(numOfItems, | ||||||
[=](id<1> wiID) { | ||||||
// PREC-SQRT: call spir_func float @sqrt(float noundef {{.*}}) | ||||||
// ROUNDED-SQRT: call float @llvm.fpbuiltin.sqrt.f32(float {{.*}}) #[[ATTR_SQRT:[0-9]+]] | ||||||
// ROUNDED-DIV: call spir_func float @sqrt(float noundef {{.*}}) | ||||||
(void)sqrt(Value1); | ||||||
}); | ||||||
}); | ||||||
|
||||||
deviceQueue.submit([&](handler& cgh) { | ||||||
cgh.parallel_for<class KernelFdiv>(numOfItems, | ||||||
[=](id<1> wiID) { | ||||||
// PREC-SQRT: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}}) | ||||||
// ROUNDED-SQRT: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}}) | ||||||
|
||||||
// ROUNDED-SQRT-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.sqrt.f32(float {{.*}}) #[[ATTR_SQRT:[0-9]+]] | ||||||
|
||||||
// PREC-DIV: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}}) | ||||||
// ROUNDED-DIV: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]] | ||||||
// ROUNDED-DIV-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]] | ||||||
(void)fdiv(Value1, Value1); | ||||||
}); | ||||||
}); | ||||||
|
||||||
return 0; | ||||||
} | ||||||
|
||||||
// ROUNDED-SQRT: attributes #[[ATTR_SQRT]] = {{.*}}"fpbuiltin-max-error"="3.0" | ||||||
// ROUNDED-SQRT-FAST: attributes #[[ATTR_SQRT]] = {{.*}}"fpbuiltin-max-error"="3.0" | ||||||
// ROUNDED-DIV: attributes #[[ATTR_DIV]] = {{.*}}"fpbuiltin-max-error"="2.5" | ||||||
// ROUNDED-DIV-FAST: attributes #[[ATTR_DIV]] = {{.*}}"fpbuiltin-max-error"="2.5" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.