Skip to content

Commit 77de8a0

Browse files
authored
[AMDGPU][clang] provide device implementation for __builtin_logb and … (#129347)
…__builtin_scalbn Clang generates library calls for __builtin_* functions which can be a problem for GPUs that cannot handle them. This patch generates call to device implementation for __builtin_logb and ldexp intrinsic for __builtin_scalbn.
1 parent 7a24238 commit 77de8a0

File tree

5 files changed

+1154
-3
lines changed

5 files changed

+1154
-3
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,33 @@ using namespace clang;
4343
using namespace CodeGen;
4444
using namespace llvm;
4545

46+
/// Some builtins do not have library implementation on some targets and
47+
/// are instead emitted as LLVM IRs by some target builtin emitters.
48+
/// FIXME: Remove this when library support is added
49+
static bool shouldEmitBuiltinAsIR(unsigned BuiltinID,
50+
const Builtin::Context &BI,
51+
const CodeGenFunction &CGF) {
52+
if (!CGF.CGM.getLangOpts().MathErrno &&
53+
CGF.CurFPFeatures.getExceptionMode() ==
54+
LangOptions::FPExceptionModeKind::FPE_Ignore &&
55+
!CGF.CGM.getTargetCodeGenInfo().supportsLibCall()) {
56+
switch (BuiltinID) {
57+
default:
58+
return false;
59+
case Builtin::BIlogbf:
60+
case Builtin::BI__builtin_logbf:
61+
case Builtin::BIlogb:
62+
case Builtin::BI__builtin_logb:
63+
case Builtin::BIscalbnf:
64+
case Builtin::BI__builtin_scalbnf:
65+
case Builtin::BIscalbn:
66+
case Builtin::BI__builtin_scalbn:
67+
return true;
68+
}
69+
}
70+
return false;
71+
}
72+
4673
static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
4774
unsigned BuiltinID, const CallExpr *E,
4875
ReturnValueSlot ReturnValue,
@@ -2622,7 +2649,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
26222649
// disabled.
26232650
// Math intrinsics are generated only when math-errno is disabled. Any pragmas
26242651
// or attributes that affect math-errno should prevent or allow math
2625-
// intrincs to be generated. Intrinsics are generated:
2652+
// intrinsics to be generated. Intrinsics are generated:
26262653
// 1- In fast math mode, unless math-errno is overriden
26272654
// via '#pragma float_control(precise, on)', or via an
26282655
// 'attribute__((optnone))'.
@@ -6225,13 +6252,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
62256252
// If this is an alias for a lib function (e.g. __builtin_sin), emit
62266253
// the call using the normal call path, but using the unmangled
62276254
// version of the function name.
6228-
if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6255+
const auto &BI = getContext().BuiltinInfo;
6256+
if (!shouldEmitBuiltinAsIR(BuiltinID, BI, *this) &&
6257+
BI.isLibFunction(BuiltinID))
62296258
return emitLibraryCall(*this, FD, E,
62306259
CGM.getBuiltinLibFunction(FD, BuiltinID));
62316260

62326261
// If this is a predefined lib function (e.g. malloc), emit the call
62336262
// using exactly the normal call path.
6234-
if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6263+
if (BI.isPredefinedLibFunction(BuiltinID))
62356264
return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD));
62366265

62376266
// Check that a call to a target specific builtin has the correct target

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,27 @@ using namespace CodeGen;
2323
using namespace llvm;
2424

2525
namespace {
26+
27+
// Has second type mangled argument.
28+
static Value *
29+
emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E,
30+
Intrinsic::ID IntrinsicID,
31+
Intrinsic::ID ConstrainedIntrinsicID) {
32+
llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
33+
llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
34+
35+
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
36+
if (CGF.Builder.getIsFPConstrained()) {
37+
Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
38+
{Src0->getType(), Src1->getType()});
39+
return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
40+
}
41+
42+
Function *F =
43+
CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
44+
return CGF.Builder.CreateCall(F, {Src0, Src1});
45+
}
46+
2647
// If \p E is not null pointer, insert address space cast to match return
2748
// type of \p E if necessary.
2849
Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
@@ -1184,6 +1205,57 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
11841205
case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
11851206
return emitBuiltinWithOneOverloadedType<2>(
11861207
*this, E, Intrinsic::amdgcn_s_prefetch_data);
1208+
case Builtin::BIlogbf:
1209+
case Builtin::BI__builtin_logbf: {
1210+
Value *Src0 = EmitScalarExpr(E->getArg(0));
1211+
Function *FrExpFunc = CGM.getIntrinsic(
1212+
Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});
1213+
CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);
1214+
Value *Exp = Builder.CreateExtractValue(FrExp, 1);
1215+
Value *Add = Builder.CreateAdd(
1216+
Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);
1217+
Value *SIToFP = Builder.CreateSIToFP(Add, Builder.getFloatTy());
1218+
Value *Fabs =
1219+
emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
1220+
Value *FCmpONE = Builder.CreateFCmpONE(
1221+
Fabs, ConstantFP::getInfinity(Builder.getFloatTy()));
1222+
Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1223+
Value *FCmpOEQ =
1224+
Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getFloatTy()));
1225+
Value *Sel2 = Builder.CreateSelect(
1226+
FCmpOEQ,
1227+
ConstantFP::getInfinity(Builder.getFloatTy(), /*Negative=*/true), Sel1);
1228+
return Sel2;
1229+
}
1230+
case Builtin::BIlogb:
1231+
case Builtin::BI__builtin_logb: {
1232+
Value *Src0 = EmitScalarExpr(E->getArg(0));
1233+
Function *FrExpFunc = CGM.getIntrinsic(
1234+
Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});
1235+
CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);
1236+
Value *Exp = Builder.CreateExtractValue(FrExp, 1);
1237+
Value *Add = Builder.CreateAdd(
1238+
Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);
1239+
Value *SIToFP = Builder.CreateSIToFP(Add, Builder.getDoubleTy());
1240+
Value *Fabs =
1241+
emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
1242+
Value *FCmpONE = Builder.CreateFCmpONE(
1243+
Fabs, ConstantFP::getInfinity(Builder.getDoubleTy()));
1244+
Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1245+
Value *FCmpOEQ =
1246+
Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getDoubleTy()));
1247+
Value *Sel2 = Builder.CreateSelect(
1248+
FCmpOEQ,
1249+
ConstantFP::getInfinity(Builder.getDoubleTy(), /*Negative=*/true),
1250+
Sel1);
1251+
return Sel2;
1252+
}
1253+
case Builtin::BIscalbnf:
1254+
case Builtin::BI__builtin_scalbnf:
1255+
case Builtin::BIscalbn:
1256+
case Builtin::BI__builtin_scalbn:
1257+
return emitBinaryExpMaybeConstrainedFPBuiltin(
1258+
*this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);
11871259
default:
11881260
return nullptr;
11891261
}

clang/lib/CodeGen/TargetInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ class TargetCodeGenInfo {
7171
return *SwiftInfo;
7272
}
7373

74+
/// supportsLibCall - Query to whether or not target supports all
75+
/// lib calls.
76+
virtual bool supportsLibCall() const { return true; }
77+
7478
/// setTargetAttributes - Provides a convenient hook to handle extra
7579
/// target-specific attributes for the given global.
7680
virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,7 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
298298
AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
299299
: TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {}
300300

301+
bool supportsLibCall() const override { return false; }
301302
void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
302303
CodeGenModule &CGM) const;
303304

0 commit comments

Comments
 (0)