Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] Add support for -foffload-fp32-prec-div/sqrt options. #15836

Open
wants to merge 20 commits into
base: sycl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/DiagnosticCommonKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,11 @@ def err_ppc_impossible_musttail: Error<
def err_aix_musttail_unsupported: Error<
"'musttail' attribute is not supported on AIX">;

def warn_acuracy_conflicts_with_explicit_target_prec_option : Warning<
"floating point accuracy control '%0' conflicts with explicit target "
"precision option '%1'">,
InGroup<DiagGroup<"accuracy-conflicts-with-explicit-target-prec-option">>;

// Source manager
def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal;
def err_file_modified : Error<
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/FPOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,6 @@ OPTION(BFloat16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, Float16Exce
OPTION(FPAccuracy, LangOptions::FPAccuracyKind, 3, BFloat16ExcessPrecision)
OPTION(MathErrno, bool, 1, FPAccuracy)
OPTION(ComplexRange, LangOptions::ComplexRangeKind, 2, MathErrno)
OPTION(OffloadFp32PrecDiv, bool, 1, ComplexRange)
OPTION(OffloadFp32PrecSqrt, bool, 1, OffloadFp32PrecDiv)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
OPTION(OffloadFp32PrecDiv, bool, 1, ComplexRange)
OPTION(OffloadFp32PrecSqrt, bool, 1, OffloadFp32PrecDiv)
OPTION(OffloadFP32PrecDiv, bool, 1, ComplexRange)
OPTION(OffloadFP32PrecSqrt, bool, 1, OffloadFP32PrecDiv)

#undef OPTION
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,8 @@ BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 2, FEM_UnsetOnCommandLine, "
ENUM_LANGOPT(Float16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for Float16 arithmetic")
ENUM_LANGOPT(BFloat16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for BFloat16 arithmetic")
BENIGN_ENUM_LANGOPT(FPAccuracy, FPAccuracyKind, 3, FPA_Default, "Accuracy for floating point operations and library functions")
LANGOPT(OffloadFp32PrecDiv, 1, 1, "Return correctly rounded results of fdiv")
LANGOPT(OffloadFp32PrecSqrt, 1, 1, "Return correctly rounded results of sqrt")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
LANGOPT(OffloadFp32PrecDiv, 1, 1, "Return correctly rounded results of fdiv")
LANGOPT(OffloadFp32PrecSqrt, 1, 1, "Return correctly rounded results of sqrt")
LANGOPT(OffloadFP32PrecDiv, 1, 1, "Return correctly rounded results of fdiv")
LANGOPT(OffloadFP32PrecSqrt, 1, 1, "Return correctly rounded results of sqrt")

LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment")
LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility")
LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting")
Expand Down
16 changes: 16 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1157,6 +1157,22 @@ defm cx_fortran_rules: BoolOptionWithoutMarshalling<"f", "cx-fortran-rules",
NegFlag<SetFalse, [], [ClangOption, CC1Option], "Range reduction is disabled "
"for complex arithmetic operations">>;

defm offload_fp32_prec_div: BoolOption<"f", "offload-fp32-prec-div",
LangOpts<"OffloadFp32PrecDiv">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;

defm offload_fp32_prec_sqrt: BoolOption<"f", "offload-fp32-prec-sqrt",
LangOpts<"OffloadFp32PrecSqrt">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
defm offload_fp32_prec_div: BoolOption<"f", "offload-fp32-prec-div",
LangOpts<"OffloadFp32PrecDiv">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;
defm offload_fp32_prec_sqrt: BoolOption<"f", "offload-fp32-prec-sqrt",
LangOpts<"OffloadFp32PrecSqrt">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;
defm offload_fp32_prec_div: BoolOption<"f", "offload-fp32-prec-div",
LangOpts<"OffloadFp32PrecDiv">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;
defm offload_fp32_prec_sqrt: BoolOption<"f", "offload-fp32-prec-sqrt",
LangOpts<"OffloadFp32PrecSqrt">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;


// OpenCL-only Options
def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group<opencl_Group>,
Visibility<[ClangOption, CC1Option]>,
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24099,6 +24099,7 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
.Case("sincos", llvm::Intrinsic::fpbuiltin_sincos)
.Case("exp10", llvm::Intrinsic::fpbuiltin_exp10)
.Case("rsqrt", llvm::Intrinsic::fpbuiltin_rsqrt)
.Case("sqrt", llvm::Intrinsic::fpbuiltin_sqrt)
.Default(0);
} else {
// The function has a clang builtin. Create an attribute for it
Expand Down Expand Up @@ -24200,7 +24201,8 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
// a TU fp-accuracy requested.
const LangOptions &LangOpts = getLangOpts();
if (hasFuncNameRequestedFPAccuracy(Name, LangOpts) ||
!LangOpts.FPAccuracyVal.empty()) {
!LangOpts.FPAccuracyVal.empty() || !LangOpts.OffloadFp32PrecDiv ||
!LangOpts.OffloadFp32PrecSqrt) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
!LangOpts.FPAccuracyVal.empty() || !LangOpts.OffloadFp32PrecDiv ||
!LangOpts.OffloadFp32PrecSqrt) {
!LangOpts.FPAccuracyVal.empty() || !LangOpts.OffloadFP32PrecDiv ||
!LangOpts.OffloadFP32PrecSqrt) {

llvm::Function *Func =
CGM.getIntrinsic(FPAccuracyIntrinsicID, IRArgs[0]->getType());
return CreateBuiltinCallWithAttr(*this, Name, Func, ArrayRef(IRArgs),
Expand Down
26 changes: 20 additions & 6 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1889,15 +1889,23 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
Int32Ty, convertFPAccuracyToAspect(FuncMapIt->second)));
}
}
if (FuncAttrs.attrs().size() == 0)
if (FuncAttrs.attrs().size() == 0) {
StringRef FPAccuracyVal;
if (!getLangOpts().FPAccuracyVal.empty()) {
StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal));
assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
Int32Ty, convertFPAccuracyToAspect(getLangOpts().FPAccuracyVal)));
}
if (Name == "sqrt" && !getLangOpts().OffloadFp32PrecSqrt)
FPAccuracyVal = "3.0";
if (Name == "fdiv" && !getLangOpts().OffloadFp32PrecDiv)
FPAccuracyVal = "2.5";
if (!FPAccuracyVal.empty())
FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is the combination supposed to work? If the condition in 1894 was true, would two fpbuiltin-max-error attributes get added? Once in 1898 and again in 1907?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the condition in 1894 is satisfied, then the FuncAttrs.size() != 0); we will not get into this code.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, maybe I wasn't clear before. Let me type out what I am asking:

  if (FuncAttrs.attrs().size() == 0) {
    StringRef FPAccuracyVal;
    if (!getLangOpts().FPAccuracyVal.empty()) {
      ...
      FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(...);
      FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);  // #Attr here 1
      ... 
    }
    if (Name == "sqrt" && !getLangOpts().OffloadFp32PrecSqrt)
      FPAccuracyVal = "3.0";
    if (Name == "fdiv" && !getLangOpts().OffloadFp32PrecDiv)
      FPAccuracyVal = "2.5";
    if (!FPAccuracyVal.empty())
      FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);    // #Attr here 2

Couldn't you get into the size == 0 block, set FPAccuracyVal, add the attribute (#1), and if name is one of sqrt or fdiv, set FPAccuracyVal again, and then add the attribute again (#2)?

Is this combination supposed to work?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh! good catch. I think this will fix it.

}
}

/// Add denormal-fp-math and denormal-fp-math-f32 as appropriate for the
Expand Down Expand Up @@ -5790,10 +5798,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Emit the actual call/invoke instruction.
llvm::CallBase *CI;
if (!InvokeDest) {
if (!getLangOpts().FPAccuracyFuncMap.empty() ||
!getLangOpts().FPAccuracyVal.empty()) {
const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
if (FD && FD->getNameInfo().getName().isIdentifier()) {
const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
if (FD && FD->getNameInfo().getName().isIdentifier()) {
StringRef FuncName = FD->getName();
const bool IsFloat32Type = FD->getReturnType()->isFloat32Type();
if (!getLangOpts().FPAccuracyFuncMap.empty() ||
!getLangOpts().FPAccuracyVal.empty() ||
(FuncName == "sqrt" && !getLangOpts().OffloadFp32PrecSqrt &&
IsFloat32Type) ||
(FuncName == "fdiv" && !getLangOpts().OffloadFp32PrecDiv &&
IsFloat32Type)) {
CI = MaybeEmitFPBuiltinofFD(IRFuncTy, IRCallArgs, CalleePtr,
FD->getName(), FD->getBuiltinID());
if (CI)
Expand Down
45 changes: 43 additions & 2 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1721,6 +1721,39 @@ static void CollectARMPACBTIOptions(const ToolChain &TC, const ArgList &Args,
}
}

static void EmitAccuracyDiag(const Driver &D, const JobAction &JA,
StringRef AccuracValStr, StringRef TargetPrecStr) {
if (JA.isDeviceOffloading(Action::OFK_SYCL)) {
D.Diag(clang::diag::warn_acuracy_conflicts_with_explicit_target_prec_option)
<< AccuracValStr << TargetPrecStr;
}
}

void Clang::AddSPIRTargetArgs(const ArgList &Args, ArgStringList &CmdArgs,
const JobAction &JA, const Driver &D) const {
if (JA.isDeviceOffloading(Action::OFK_SYCL)) {
if (Arg *A = Args.getLastArg(options::OPT_ffp_model_EQ)) {
if (!strcmp(A->getValue(), "fast")) {
CmdArgs.push_back("-fno-offload-fp32-prec-div");
CmdArgs.push_back("-fno-offload-fp32-prec-sqrt");
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we allow users to override with -foffload-fp32-prec-div|sqrt?

Suggested change
if (!strcmp(A->getValue(), "fast")) {
CmdArgs.push_back("-fno-offload-fp32-prec-div");
CmdArgs.push_back("-fno-offload-fp32-prec-sqrt");
}
if (!strcmp(A->getValue(), "fast")) {
if (!Args.hasFlag(option::OPT_foffload_fp32_prec_div,
option::OPT_fno_offload_fp32_prec_div, false))
CmdArgs.push_back("-fno-offload-fp32-prec-div");
if (!Args.hasFlag(option::OPT_foffload_fp32_prec_sqrt,
option::OPT_fno_offload_fp32_prec_sqrt, false))
CmdArgs.push_back("-fno-offload-fp32-prec-sqrt");
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure. I would think that users could choose to compile with:
clang -fsycl -ffp-model=fast -foffload-fp32-prec-sqrt hello.cpp
or:
clang -fsycl -foffload-fp32-prec-sqrt -ffp-model=fast hello.cpp
These shouldn't give the same result. In the first one, the sqrt results are precise. In the second one, they are rounded.

I think that's just following the last command wins rule. In which case we need a compilated process here to find the order in which the options interact with one another.

Copy link
Contributor

@mdtoguchi mdtoguchi Oct 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm... If that's the case we may want to integrate the logic into where all of the other FP model options are being manipulated in the larger for loop here:

static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
and only add the -cc1 option under the IsDeviceOffloading condition.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay and that would work for OpenMP too!

} else {
if (Arg *A = Args.getLastArg(options::OPT_ffp_accuracy_EQ)) {
if (Args.getLastArg(options::OPT_fno_offload_fp32_prec_div))
EmitAccuracyDiag(D, JA, A->getValue(), "-fno-offload-fp32-prec-div");
if (Args.getLastArg(options::OPT_fno_offload_fp32_prec_sqrt))
EmitAccuracyDiag(D, JA, A->getValue(), "-fno-offload-fp32-prec-sqrt");
}
if (!Args.hasFlag(options::OPT_foffload_fp32_prec_div,
options::OPT_fno_offload_fp32_prec_div, true))
CmdArgs.push_back("-fno-offload-fp32-prec-div");
if (!Args.hasFlag(options::OPT_foffload_fp32_prec_sqrt,
options::OPT_fno_offload_fp32_prec_sqrt, true))
CmdArgs.push_back("-fno-offload-fp32-prec-sqrt");
}
}
}

void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args,
ArgStringList &CmdArgs, bool KernelOrKext) const {
RenderARMABI(getToolChain().getDriver(), Triple, Args, CmdArgs);
Expand Down Expand Up @@ -1771,8 +1804,10 @@ void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args,

void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple,
const ArgList &Args, bool KernelOrKext,
ArgStringList &CmdArgs) const {
ArgStringList &CmdArgs,
const JobAction &JA) const {
const ToolChain &TC = getToolChain();
const Driver &D = TC.getDriver();

// Add the target features
getTargetFeatures(TC.getDriver(), EffectiveTriple, Args, CmdArgs, false);
Expand All @@ -1782,6 +1817,12 @@ void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple,
default:
break;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inadvertent line removal?

case llvm::Triple::spir:
case llvm::Triple::spir64:
mdtoguchi marked this conversation as resolved.
Show resolved Hide resolved
case llvm::Triple::spirv32:
case llvm::Triple::spirv64:
AddSPIRTargetArgs(Args, CmdArgs, JA, D);
break;
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
Expand Down Expand Up @@ -6806,7 +6847,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(Args.MakeArgString(CPU));
}

RenderTargetOptions(Triple, Args, KernelOrKext, CmdArgs);
RenderTargetOptions(Triple, Args, KernelOrKext, CmdArgs, JA);

// Add clang-cl arguments.
types::ID InputType = Input.getType();
Expand Down
6 changes: 5 additions & 1 deletion clang/lib/Driver/ToolChains/Clang.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,18 @@ class LLVM_LIBRARY_VISIBILITY Clang : public Tool {

void RenderTargetOptions(const llvm::Triple &EffectiveTriple,
const llvm::opt::ArgList &Args, bool KernelOrKext,
llvm::opt::ArgStringList &CmdArgs) const;
llvm::opt::ArgStringList &CmdArgs,
const JobAction &JA) const;

void AddAArch64TargetArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
void AddARMTargetArgs(const llvm::Triple &Triple,
const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs,
bool KernelOrKext) const;
void AddSPIRTargetArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs, const JobAction &JA,
const Driver &D) const;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changes not needed anymore?

void AddARM64TargetArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
void AddLoongArchTargetArgs(const llvm::opt::ArgList &Args,
Expand Down
156 changes: 156 additions & 0 deletions clang/test/CodeGenSYCL/offload-fp32-div-sqrt.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
// DEFINE: %{common_opts_spirv32} = -internal-isystem %S/Inputs \
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown

// DEFINE: %{common_opts_spirv64} = -internal-isystem %S/Inputs \
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv64-unknown-unknown


// DEFINE: %{common_opts_spir} = -internal-isystem %S/Inputs \
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown
// DEFINE: -fsycl-is-device -emit-llvm -triple spir32-unknown-unknown


// DEFINE: %{common_opts_spir64} = -internal-isystem %S/Inputs \
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// DEFINE: -fsycl-is-device -emit-llvm -triple spirv32-unknown-unknown
// DEFINE: -fsycl-is-device -emit-llvm -triple spir64-unknown-unknown


// RUN: %clang_cc1 %{common_opts_spirv32} %s -o - \
// RUN: | FileCheck --check-prefix PREC-SQRT %s

// RUN: %clang_cc1 %{common_opts_spirv32} -foffload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix PREC-SQRT %s

// RUN: %clang_cc1 %{common_opts_spirv32} -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s

// RUN: %clang_cc1 %{common_opts_spirv32} -foffload-fp32-prec-div %s -o - \
// RUN: | FileCheck --check-prefix PREC-DIV %s

// RUN: %clang_cc1 %{common_opts_spirv32} -fno-offload-fp32-prec-div %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-DIV %s

// RUN: %clang_cc1 %{common_opts_spirv32} -ffast-math \
// RUN:-fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-SQRT-FAST %s

// RUN: %clang_cc1 %{common_opts_spirv32} -ffast-math \
// RUN: -fno-offload-fp32-prec-div -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-DIV-FAST %s

//

// RUN: %clang_cc1 %{common_opts_spirv64} %s -o - \
// RUN: | FileCheck --check-prefix PREC-SQRT %s

// RUN: %clang_cc1 %{common_opts_spirv64} -foffload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix PREC-SQRT %s

// RUN: %clang_cc1 %{common_opts_spirv64} -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s

// RUN: %clang_cc1 %{common_opts_spirv64} -foffload-fp32-prec-div %s -o - \
// RUN: | FileCheck --check-prefix PREC-DIV %s

// RUN: %clang_cc1 %{common_opts_spirv64} -fno-offload-fp32-prec-div %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-DIV %s

// RUN: %clang_cc1 %{common_opts_spirv64} -ffast-math -fno-offload-fp32-prec-div \
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-SQRT-FAST %s

// RUN: %clang_cc1 %{common_opts_spirv64} -ffast-math -fno-offload-fp32-prec-div \
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-DIV-FAST %s

//

// RUN: %clang_cc1 %{common_opts_spir} %s -o - \
// RUN: | FileCheck --check-prefix PREC-SQRT %s

// RUN: %clang_cc1 %{common_opts_spir} -foffload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix PREC-SQRT %s

// RUN: %clang_cc1 %{common_opts_spir} -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s

// RUN: %clang_cc1 %{common_opts_spir} -foffload-fp32-prec-div %s -o - \
// RUN: | FileCheck --check-prefix PREC-DIV %s

// RUN: %clang_cc1 %{common_opts_spir} -fno-offload-fp32-prec-div %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-DIV %s

// RUN: %clang_cc1 %{common_opts_spir} -ffast-math -fno-offload-fp32-prec-div \
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-SQRT-FAST %s

// RUN: %clang_cc1 %{common_opts_spir} -ffast-math -fno-offload-fp32-prec-div \
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-DIV-FAST %s

//

// RUN: %clang_cc1 %{common_opts_spir64} %s -o - \
// RUN: | FileCheck --check-prefix PREC-SQRT %s

// RUN: %clang_cc1 %{common_opts_spir64} -foffload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix PREC-SQRT %s

// RUN: %clang_cc1 %{common_opts_spir64} -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-SQRT %s

// RUN: %clang_cc1 %{common_opts_spir64} -foffload-fp32-prec-div %s -o - \
// RUN: | FileCheck --check-prefix PREC-DIV %s

// RUN: %clang_cc1 %{common_opts_spir64} -fno-offload-fp32-prec-div %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-DIV %s

// RUN: %clang_cc1 %{common_opts_spir64} -ffast-math -fno-offload-fp32-prec-div \
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-SQRT-FAST %s

// RUN: %clang_cc1 %{common_opts_spir64} -ffast-math -fno-offload-fp32-prec-div \
// RUN: -fno-offload-fp32-prec-sqrt %s -o - \
// RUN: | FileCheck --check-prefix ROUNDED-DIV-FAST %s

#include "sycl.hpp"

extern "C" SYCL_EXTERNAL float sqrt(float);
extern "C" SYCL_EXTERNAL float fdiv(float, float);

using namespace sycl;

int main() {
const unsigned array_size = 4;
range<1> numOfItems{array_size};
float Value1 = .5f;
float Value2 = .9f;
queue deviceQueue;

deviceQueue.submit([&](handler& cgh) {
cgh.parallel_for<class KernelSqrt>(numOfItems,
[=](id<1> wiID) {
// PREC-SQRT: call spir_func float @sqrt(float noundef {{.*}})
// ROUNDED-SQRT: call float @llvm.fpbuiltin.sqrt.f32(float {{.*}}) #[[ATTR_SQRT:[0-9]+]]
// ROUNDED-DIV: call spir_func float @sqrt(float noundef {{.*}})
(void)sqrt(Value1);
});
});

deviceQueue.submit([&](handler& cgh) {
cgh.parallel_for<class KernelFdiv>(numOfItems,
[=](id<1> wiID) {
// PREC-SQRT: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}})
// ROUNDED-SQRT: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}})

// ROUNDED-SQRT-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.sqrt.f32(float {{.*}}) #[[ATTR_SQRT:[0-9]+]]

// PREC-DIV: call spir_func float @fdiv(float noundef {{.*}}, float noundef {{.*}})
// ROUNDED-DIV: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]]
// ROUNDED-DIV-FAST: call reassoc nnan ninf nsz arcp afn float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_DIV:[0-9]+]]
(void)fdiv(Value1, Value1);
});
});

return 0;
}

// ROUNDED-SQRT: attributes #[[ATTR_SQRT]] = {{.*}}"fpbuiltin-max-error"="3.0"
// ROUNDED-SQRT-FAST: attributes #[[ATTR_SQRT]] = {{.*}}"fpbuiltin-max-error"="3.0"
// ROUNDED-DIV: attributes #[[ATTR_DIV]] = {{.*}}"fpbuiltin-max-error"="2.5"
// ROUNDED-DIV-FAST: attributes #[[ATTR_DIV]] = {{.*}}"fpbuiltin-max-error"="2.5"
Loading
Loading