Skip to content

Commit daecc30

Browse files
authored
AMDGPU: Replace sqrt OpenCL libcalls with llvm.sqrt (#74197)
The library implementation is just a wrapper around a call to the intrinsic, but loses metadata. Swap out the call site to the intrinsic so that the lowering can see the !fpmath metadata and fast math flags. Since d56e0d0, clang started placing !fpmath on OpenCL library sqrt calls. Also don't bother emitting native_sqrt anymore, it's just another wrapper around llvm.sqrt.
1 parent 81df51f commit daecc30

File tree

3 files changed

+43
-70
lines changed

3 files changed

+43
-70
lines changed

llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp

Lines changed: 3 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,6 @@ class AMDGPULibCalls {
8787
Constant *copr0, Constant *copr1);
8888
bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
8989

90-
// sqrt
91-
bool fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
92-
9390
/// Insert a value to sincos function \p Fsincos. Returns (value of sin, value
9491
/// of cos, sincos call).
9592
std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg,
@@ -672,8 +669,6 @@ bool AMDGPULibCalls::fold(CallInst *CI) {
672669

673670
// Specialized optimizations for each function call.
674671
//
675-
// TODO: Handle other simple intrinsic wrappers. Sqrt.
676-
//
677672
// TODO: Handle native functions
678673
switch (FInfo.getId()) {
679674
case AMDGPULibFunc::EI_EXP:
@@ -794,7 +789,9 @@ bool AMDGPULibCalls::fold(CallInst *CI) {
794789
case AMDGPULibFunc::EI_ROOTN:
795790
return fold_rootn(FPOp, B, FInfo);
796791
case AMDGPULibFunc::EI_SQRT:
797-
return fold_sqrt(FPOp, B, FInfo);
792+
// TODO: Allow with strictfp + constrained intrinsic
793+
return tryReplaceLibcallWithSimpleIntrinsic(
794+
B, CI, Intrinsic::sqrt, true, true, /*AllowStrictFP=*/false);
798795
case AMDGPULibFunc::EI_COS:
799796
case AMDGPULibFunc::EI_SIN:
800797
return fold_sincos(FPOp, B, FInfo);
@@ -1273,29 +1270,6 @@ bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
12731270
return true;
12741271
}
12751272

1276-
// fold sqrt -> native_sqrt (x)
1277-
bool AMDGPULibCalls::fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B,
1278-
const FuncInfo &FInfo) {
1279-
if (!isUnsafeMath(FPOp))
1280-
return false;
1281-
1282-
if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
1283-
(FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
1284-
Module *M = B.GetInsertBlock()->getModule();
1285-
1286-
if (FunctionCallee FPExpr = getNativeFunction(
1287-
M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
1288-
Value *opr0 = FPOp->getOperand(0);
1289-
LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
1290-
<< "sqrt(" << *opr0 << ")\n");
1291-
Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
1292-
replaceCall(FPOp, nval);
1293-
return true;
1294-
}
1295-
}
1296-
return false;
1297-
}
1298-
12991273
std::tuple<Value *, Value *, Value *>
13001274
AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B,
13011275
FunctionCallee Fsincos) {

0 commit comments

Comments
 (0)