Skip to content

Commit dfca8ec

Browse files
committed
[AMDGPU][clang] provide device implementation for __builtin_logb and __builtin_scalbn
Clang generates library calls for __builtin_* functions which can be a problem for GPUs that cannot handle them. This patch generates a device implementations for __builtin_logb and __builtin_scalbn by emitting LLVM IRs. Only emit IRs when FP exceptions are disabled and math-errno is unset.
1 parent 0d19efa commit dfca8ec

File tree

5 files changed

+1154
-3
lines changed

5 files changed

+1154
-3
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

+32-3
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,33 @@ using namespace clang;
4343
using namespace CodeGen;
4444
using namespace llvm;
4545

46+
/// Some builtins do not have library implementation on some targets and
47+
/// are instead emitted as LLVM IRs by some target builtin emitters.
48+
/// FIXME: Remove this when library support is added
49+
static bool shouldEmitBuiltinAsIR(unsigned BuiltinID,
50+
const Builtin::Context &BI,
51+
const CodeGenFunction &CGF) {
52+
if (!CGF.CGM.getLangOpts().MathErrno &&
53+
CGF.CurFPFeatures.getExceptionMode() ==
54+
LangOptions::FPExceptionModeKind::FPE_Ignore &&
55+
!CGF.CGM.getTargetCodeGenInfo().supportsLibCall()) {
56+
switch (BuiltinID) {
57+
default:
58+
return false;
59+
case Builtin::BIlogbf:
60+
case Builtin::BI__builtin_logbf:
61+
case Builtin::BIlogb:
62+
case Builtin::BI__builtin_logb:
63+
case Builtin::BIscalbnf:
64+
case Builtin::BI__builtin_scalbnf:
65+
case Builtin::BIscalbn:
66+
case Builtin::BI__builtin_scalbn:
67+
return true;
68+
}
69+
}
70+
return false;
71+
}
72+
4673
static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
4774
unsigned BuiltinID, const CallExpr *E,
4875
ReturnValueSlot ReturnValue,
@@ -2414,7 +2441,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
24142441
// disabled.
24152442
// Math intrinsics are generated only when math-errno is disabled. Any pragmas
24162443
// or attributes that affect math-errno should prevent or allow math
2417-
// intrincs to be generated. Intrinsics are generated:
2444+
// intrinsics to be generated. Intrinsics are generated:
24182445
// 1- In fast math mode, unless math-errno is overriden
24192446
// via '#pragma float_control(precise, on)', or via an
24202447
// 'attribute__((optnone))'.
@@ -5999,13 +6026,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
59996026
// If this is an alias for a lib function (e.g. __builtin_sin), emit
60006027
// the call using the normal call path, but using the unmangled
60016028
// version of the function name.
6002-
if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6029+
const auto &BI = getContext().BuiltinInfo;
6030+
if (!shouldEmitBuiltinAsIR(BuiltinID, BI, *this) &&
6031+
BI.isLibFunction(BuiltinID))
60036032
return emitLibraryCall(*this, FD, E,
60046033
CGM.getBuiltinLibFunction(FD, BuiltinID));
60056034

60066035
// If this is a predefined lib function (e.g. malloc), emit the call
60076036
// using exactly the normal call path.
6008-
if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6037+
if (BI.isPredefinedLibFunction(BuiltinID))
60096038
return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD));
60106039

60116040
// Check that a call to a target specific builtin has the correct target

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

+72
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,27 @@ using namespace CodeGen;
2323
using namespace llvm;
2424

2525
namespace {
26+
27+
// Has second type mangled argument.
28+
static Value *
29+
emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E,
30+
Intrinsic::ID IntrinsicID,
31+
Intrinsic::ID ConstrainedIntrinsicID) {
32+
llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
33+
llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
34+
35+
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
36+
if (CGF.Builder.getIsFPConstrained()) {
37+
Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
38+
{Src0->getType(), Src1->getType()});
39+
return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
40+
}
41+
42+
Function *F =
43+
CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
44+
return CGF.Builder.CreateCall(F, {Src0, Src1});
45+
}
46+
2647
// If \p E is not null pointer, insert address space cast to match return
2748
// type of \p E if necessary.
2849
Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
@@ -1142,6 +1163,57 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
11421163
case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
11431164
return emitBuiltinWithOneOverloadedType<2>(
11441165
*this, E, Intrinsic::amdgcn_s_prefetch_data);
1166+
case Builtin::BIlogbf:
1167+
case Builtin::BI__builtin_logbf: {
1168+
Value *Src0 = EmitScalarExpr(E->getArg(0));
1169+
Function *FrExpFunc = CGM.getIntrinsic(
1170+
Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});
1171+
CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);
1172+
Value *Exp = Builder.CreateExtractValue(FrExp, 1);
1173+
Value *Add = Builder.CreateAdd(
1174+
Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);
1175+
Value *SIToFP = Builder.CreateSIToFP(Add, Builder.getFloatTy());
1176+
Value *Fabs =
1177+
emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
1178+
Value *FCmpONE = Builder.CreateFCmpONE(
1179+
Fabs, ConstantFP::getInfinity(Builder.getFloatTy()));
1180+
Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1181+
Value *FCmpOEQ =
1182+
Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getFloatTy()));
1183+
Value *Sel2 = Builder.CreateSelect(
1184+
FCmpOEQ,
1185+
ConstantFP::getInfinity(Builder.getFloatTy(), /*Negative=*/true), Sel1);
1186+
return Sel2;
1187+
}
1188+
case Builtin::BIlogb:
1189+
case Builtin::BI__builtin_logb: {
1190+
Value *Src0 = EmitScalarExpr(E->getArg(0));
1191+
Function *FrExpFunc = CGM.getIntrinsic(
1192+
Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});
1193+
CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);
1194+
Value *Exp = Builder.CreateExtractValue(FrExp, 1);
1195+
Value *Add = Builder.CreateAdd(
1196+
Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);
1197+
Value *SIToFP = Builder.CreateSIToFP(Add, Builder.getDoubleTy());
1198+
Value *Fabs =
1199+
emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
1200+
Value *FCmpONE = Builder.CreateFCmpONE(
1201+
Fabs, ConstantFP::getInfinity(Builder.getDoubleTy()));
1202+
Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1203+
Value *FCmpOEQ =
1204+
Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getDoubleTy()));
1205+
Value *Sel2 = Builder.CreateSelect(
1206+
FCmpOEQ,
1207+
ConstantFP::getInfinity(Builder.getDoubleTy(), /*Negative=*/true),
1208+
Sel1);
1209+
return Sel2;
1210+
}
1211+
case Builtin::BIscalbnf:
1212+
case Builtin::BI__builtin_scalbnf:
1213+
case Builtin::BIscalbn:
1214+
case Builtin::BI__builtin_scalbn:
1215+
return emitBinaryExpMaybeConstrainedFPBuiltin(
1216+
*this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);
11451217
default:
11461218
return nullptr;
11471219
}

clang/lib/CodeGen/TargetInfo.h

+4
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ class TargetCodeGenInfo {
7171
return *SwiftInfo;
7272
}
7373

74+
/// supportsLibCall - Query to whether or not target supports all
75+
/// lib calls.
76+
virtual bool supportsLibCall() const { return true; }
77+
7478
/// setTargetAttributes - Provides a convenient hook to handle extra
7579
/// target-specific attributes for the given global.
7680
virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,

clang/lib/CodeGen/Targets/AMDGPU.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
302302
AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
303303
: TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {}
304304

305+
bool supportsLibCall() const override { return false; }
305306
void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
306307
CodeGenModule &CGM) const;
307308

0 commit comments

Comments
 (0)