Skip to content

Commit 38e2103

Browse files
authored
[NFCI][SYCL] Refactor selection of FP builtin calls (#16966)
This is an upstream from a closed source repository. Co-author: Stasenko, Alexander P <[email protected]> --------- Signed-off-by: Sidorov, Dmitry <[email protected]>
1 parent 1413481 commit 38e2103

File tree

5 files changed

+144
-47
lines changed

5 files changed

+144
-47
lines changed

llvm/include/llvm/Analysis/TargetLibraryInfo.h

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define LLVM_ANALYSIS_TARGETLIBRARYINFO_H
1111

1212
#include "llvm/ADT/DenseMap.h"
13+
#include "llvm/Analysis/TargetTransformInfo.h"
1314
#include "llvm/IR/Constants.h"
1415
#include "llvm/IR/InstrTypes.h"
1516
#include "llvm/IR/IntrinsicInst.h"
@@ -83,6 +84,47 @@ class VecDesc {
8384
NotLibFunc
8485
};
8586

87+
/// Contains all possible FPBuiltin replacement choices by
88+
/// selectFnForFPBuiltinCalls function.
89+
struct FPBuiltinReplacement {
90+
enum Kind {
91+
Unexpected0dot5,
92+
UnrecognizedFPAttrs,
93+
NoSuitableReplacement,
94+
ReplaceWithLLVMIR,
95+
ReplaceWithAltMathFunction,
96+
ReplaceWithApproxNVPTXCallsOrFallback
97+
};
98+
99+
FPBuiltinReplacement(Kind K, const StringRef &ImplName = StringRef())
100+
: RepKind(K), AltMathFunctionImplName(ImplName) {
101+
// Check that ImplName is non-empty only if K is
102+
// ReplaceWithAltMathFunction.
103+
assert((K != Kind::ReplaceWithAltMathFunction || !ImplName.empty()) &&
104+
"Expected non-empty function name");
105+
}
106+
FPBuiltinReplacement(const FPBuiltinReplacement &O)
107+
: RepKind(O()), AltMathFunctionImplName(O.altMathFunctionImplName()) {}
108+
FPBuiltinReplacement &operator=(const FPBuiltinReplacement &O) {
109+
this->RepKind = O();
110+
this->AltMathFunctionImplName = O.altMathFunctionImplName();
111+
return *this;
112+
}
113+
~FPBuiltinReplacement() {}
114+
Kind operator()() const { return RepKind; }
115+
bool isReplaceble() const { return RepKind > Kind::NoSuitableReplacement; }
116+
const StringRef &altMathFunctionImplName() const {
117+
return AltMathFunctionImplName;
118+
}
119+
120+
private:
121+
/// In case of RepKind = Kind::ReplaceWithAltMathFunction
122+
/// AltMathFunctionImplName also contains the name of the alternate math
123+
/// function implementation.
124+
Kind RepKind;
125+
StringRef AltMathFunctionImplName;
126+
};
127+
86128
/// Implementation of the target library information.
87129
///
88130
/// This class constructs tables that hold the target library information and
@@ -224,6 +266,16 @@ class TargetLibraryInfoImpl {
224266
/// given alternate math library.
225267
void addAltMathFunctionsFromLib(enum AltMathLibrary AltLib);
226268

269+
// Select an alternate math library implementation that meets the criteria
270+
// described by an FPBuiltinIntrinsic call.
271+
StringRef
272+
selectFPBuiltinImplementation(const FPBuiltinIntrinsic *Builtin) const;
273+
274+
/// Returns the replacement choice for the given FPBuiltinIntrinsic call.
275+
FPBuiltinReplacement
276+
selectFnForFPBuiltinCalls(const FPBuiltinIntrinsic &BuiltinCall,
277+
const TargetTransformInfo &TTI) const;
278+
227279
/// Select an alternate math library implementation that meets the criteria
228280
/// described by an FPBuiltinIntrinsic call.
229281
StringRef selectFPBuiltinImplementation(FPBuiltinIntrinsic *Builtin) const;
@@ -649,6 +701,13 @@ class TargetLibraryInfo {
649701
bool isKnownVectorFunctionInLibrary(StringRef F) const {
650702
return this->isFunctionVectorizable(F);
651703
}
704+
705+
/// Returns the replacement choice for the given FPBuiltinIntrinsic call.
706+
FPBuiltinReplacement
707+
selectFnForFPBuiltinCalls(const FPBuiltinIntrinsic &BuiltinCall,
708+
const TargetTransformInfo &TTI) const {
709+
return Impl->selectFnForFPBuiltinCalls(BuiltinCall, TTI);
710+
}
652711
};
653712

654713
/// Analysis pass providing the \c TargetLibraryInfo.

llvm/include/llvm/IR/IntrinsicInst.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -718,7 +718,7 @@ class FPBuiltinIntrinsic : public IntrinsicInst {
718718
/// Check the callsite attributes for this FPBuiltinIntrinsic against a list
719719
/// of FP attributes that the caller knows how to process to see if the
720720
/// current intrinsic has unrecognized attributes
721-
bool hasUnrecognizedFPAttrs(const StringSet<> HandledAttrs);
721+
bool hasUnrecognizedFPAttrs(const StringSet<> HandledAttrs) const;
722722

723723
/// Methods for support type inquiry through isa, cast, and dyn_cast:
724724
/// @{

llvm/lib/Analysis/TargetLibraryInfo.cpp

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1334,7 +1334,7 @@ void TargetLibraryInfoImpl::addAltMathFunctionsFromLib(
13341334
/// Select an alternate math library implementation that meets the criteria
13351335
/// described by an FPBuiltinIntrinsic call.
13361336
StringRef TargetLibraryInfoImpl::selectFPBuiltinImplementation(
1337-
FPBuiltinIntrinsic *Builtin) const {
1337+
const FPBuiltinIntrinsic *Builtin) const {
13381338
// TODO: Handle the case of no specified accuracy.
13391339
if (Builtin->getRequiredAccuracy() == std::nullopt)
13401340
return StringRef();
@@ -1353,6 +1353,66 @@ StringRef TargetLibraryInfoImpl::selectFPBuiltinImplementation(
13531353
return I->FnImplName;
13541354
}
13551355

1356+
FPBuiltinReplacement TargetLibraryInfoImpl::selectFnForFPBuiltinCalls(
1357+
const FPBuiltinIntrinsic &BuiltinCall,
1358+
const TargetTransformInfo &TTI) const {
1359+
auto DefaultOpIsCorrectlyRounded = [](const FPBuiltinIntrinsic &BuiltinCall) {
1360+
switch (BuiltinCall.getIntrinsicID()) {
1361+
case Intrinsic::fpbuiltin_fadd:
1362+
case Intrinsic::fpbuiltin_fsub:
1363+
case Intrinsic::fpbuiltin_fmul:
1364+
case Intrinsic::fpbuiltin_fdiv:
1365+
case Intrinsic::fpbuiltin_frem:
1366+
case Intrinsic::fpbuiltin_sqrt:
1367+
case Intrinsic::fpbuiltin_ldexp:
1368+
return true;
1369+
default:
1370+
return false;
1371+
}
1372+
};
1373+
StringSet<> RecognizedAttrs = {FPBuiltinIntrinsic::FPBUILTIN_MAX_ERROR};
1374+
if (BuiltinCall.hasUnrecognizedFPAttrs(std::move(RecognizedAttrs)))
1375+
return FPBuiltinReplacement(FPBuiltinReplacement::UnrecognizedFPAttrs);
1376+
Triple T(BuiltinCall.getModule()->getTargetTriple());
1377+
const auto Accuracy = BuiltinCall.getRequiredAccuracy();
1378+
// For fpbuiltin.sqrt, it should always use the native operation for
1379+
// x86-based targets because the native instruction is faster (even faster
1380+
// than the low-accuracy SVML implementation).
1381+
if (T.isX86() && BuiltinCall.getIntrinsicID() == Intrinsic::fpbuiltin_sqrt &&
1382+
TTI.haveFastSqrt(BuiltinCall.getOperand(0)->getType()))
1383+
return FPBuiltinReplacement(FPBuiltinReplacement::ReplaceWithLLVMIR);
1384+
// Several functions for SYCL and CUDA requires "0.5" accuracy levels,
1385+
// which means correctly rounded results. For now x86 host and NVPTX
1386+
// AltMathLibrary doesn't have such ability. For such accuracy level,
1387+
// the fpbuiltins should be replaced by equivalent IR operation or
1388+
// llvmbuiltins.
1389+
if ((T.isX86() || T.isNVPTX()) && Accuracy == 0.5) {
1390+
if (DefaultOpIsCorrectlyRounded(BuiltinCall))
1391+
return FPBuiltinReplacement(FPBuiltinReplacement::ReplaceWithLLVMIR);
1392+
return FPBuiltinReplacement(FPBuiltinReplacement::Unexpected0dot5);
1393+
}
1394+
// AltMathLibrary don't have implementation for CUDA approximate precision
1395+
// builtins. Lets map them on NVPTX intrinsics. If no appropriate intrinsics
1396+
// are known - skip to emit an error.
1397+
if (T.isNVPTX() && Accuracy > 0.5) {
1398+
return FPBuiltinReplacement(
1399+
FPBuiltinReplacement::ReplaceWithApproxNVPTXCallsOrFallback);
1400+
}
1401+
1402+
/// Call TLI to select a function implementation to call
1403+
const StringRef OutAltMathFunctionImplName =
1404+
selectFPBuiltinImplementation(&BuiltinCall);
1405+
if (OutAltMathFunctionImplName.empty()) {
1406+
// Operations that require correct rounding by default can always be
1407+
// replaced with the LLVM IR equivalent representation.
1408+
if (DefaultOpIsCorrectlyRounded(BuiltinCall))
1409+
return FPBuiltinReplacement(FPBuiltinReplacement::ReplaceWithLLVMIR);
1410+
return FPBuiltinReplacement(FPBuiltinReplacement::NoSuitableReplacement);
1411+
}
1412+
return FPBuiltinReplacement(FPBuiltinReplacement::ReplaceWithAltMathFunction,
1413+
OutAltMathFunctionImplName);
1414+
}
1415+
13561416
static bool compareByScalarFnName(const VecDesc &LHS, const VecDesc &RHS) {
13571417
return LHS.getScalarFnName() < RHS.getScalarFnName();
13581418
}

llvm/lib/IR/IntrinsicInst.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ std::optional<float> FPBuiltinIntrinsic::getRequiredAccuracy() const {
305305
}
306306

307307
bool FPBuiltinIntrinsic::hasUnrecognizedFPAttrs(
308-
const StringSet<> recognizedAttrs) {
308+
const StringSet<> recognizedAttrs) const {
309309
AttributeSet FnAttrs = getAttributes().getFnAttrs();
310310
for (const Attribute &Attr : FnAttrs) {
311311
if (!Attr.isStringAttribute())

llvm/lib/Transforms/Scalar/FPBuiltinFnSelection.cpp

Lines changed: 22 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -164,54 +164,28 @@ static bool selectFnForFPBuiltinCalls(const TargetLibraryInfo &TLI,
164164
dbgs() << BuiltinCall.getRequiredAccuracy().value() << "\n";
165165
});
166166

167-
StringSet<> RecognizedAttrs = {FPBuiltinIntrinsic::FPBUILTIN_MAX_ERROR};
168-
if (BuiltinCall.hasUnrecognizedFPAttrs(RecognizedAttrs)) {
167+
const FPBuiltinReplacement Replacement =
168+
TLI.selectFnForFPBuiltinCalls(BuiltinCall, TTI);
169+
170+
switch (Replacement()) {
171+
default:
172+
llvm_unreachable("Unexpected replacement");
173+
case FPBuiltinReplacement::Unexpected0dot5:
174+
report_fatal_error("Unexpected fpbuiltin requiring 0.5 max error.");
175+
return false;
176+
case FPBuiltinReplacement::UnrecognizedFPAttrs:
169177
report_fatal_error(
170178
Twine(BuiltinCall.getCalledFunction()->getName()) +
171179
Twine(" was called with unrecognized floating-point attributes.\n"),
172180
false);
173181
return false;
174-
}
175-
176-
Triple T(BuiltinCall.getModule()->getTargetTriple());
177-
// for fpbuiltin.sqrt, it should always use the native operation for
178-
// x86-based targets because the native instruction is faster (even faster
179-
// than the low-accuracy SVML implementation).
180-
if (T.isX86() && BuiltinCall.getIntrinsicID() == Intrinsic::fpbuiltin_sqrt &&
181-
TTI.haveFastSqrt(BuiltinCall.getOperand(0)->getType()))
182-
return replaceWithLLVMIR(BuiltinCall);
183-
184-
// Several functions for "sycl" and "cuda" requires "0.5" accuracy levels,
185-
// which means correctly rounded results. For now x86 host and NVPTX
186-
// AltMathLibrary doesn't have such ability. For such accuracy level, the
187-
// fpbuiltins should be replaced by equivalent IR operation or llvmbuiltins.
188-
if ((T.isX86() || T.isNVPTX()) &&
189-
BuiltinCall.getRequiredAccuracy().value() == 0.5) {
190-
switch (BuiltinCall.getIntrinsicID()) {
191-
case Intrinsic::fpbuiltin_fadd:
192-
case Intrinsic::fpbuiltin_fsub:
193-
case Intrinsic::fpbuiltin_fmul:
194-
case Intrinsic::fpbuiltin_fdiv:
195-
case Intrinsic::fpbuiltin_frem:
196-
case Intrinsic::fpbuiltin_sqrt:
197-
case Intrinsic::fpbuiltin_ldexp:
198-
return replaceWithLLVMIR(BuiltinCall);
199-
default:
200-
report_fatal_error("Unexpected fpbuiltin requiring 0.5 max error.");
201-
}
202-
}
203-
204-
// AltMathLibrary don't have implementation for CUDA approximate precision
205-
// builtins. Lets map them on NVPTX intrinsics. If no appropriate intrinsics
206-
// are known - skip to emit an error.
207-
if (T.isNVPTX() && BuiltinCall.getRequiredAccuracy().value() > 0.5)
182+
case FPBuiltinReplacement::ReplaceWithApproxNVPTXCallsOrFallback: {
208183
if (replaceWithApproxNVPTXCallsOrFallback(
209184
BuiltinCall, BuiltinCall.getRequiredAccuracy()))
210185
return true;
211-
212-
/// Call TLI to select a function implementation to call
213-
StringRef ImplName = TLI.selectFPBuiltinImplementation(&BuiltinCall);
214-
if (ImplName.empty()) {
186+
[[fallthrough]];
187+
}
188+
case FPBuiltinReplacement::NoSuitableReplacement: {
215189
LLVM_DEBUG(dbgs() << "No matching implementation found!\n");
216190
std::string RequiredAccuracy;
217191
if (BuiltinCall.getRequiredAccuracy() == std::nullopt)
@@ -228,10 +202,14 @@ static bool selectFnForFPBuiltinCalls(const TargetLibraryInfo &TLI,
228202
false);
229203
return false;
230204
}
231-
232-
LLVM_DEBUG(dbgs() << "Selected " << ImplName << "\n");
233-
234-
return replaceWithAltMathFunction(BuiltinCall, ImplName);
205+
case FPBuiltinReplacement::ReplaceWithLLVMIR:
206+
return replaceWithLLVMIR(BuiltinCall);
207+
case FPBuiltinReplacement::ReplaceWithAltMathFunction:
208+
LLVM_DEBUG(dbgs() << "Selected " << Replacement.altMathFunctionImplName()
209+
<< "\n");
210+
return replaceWithAltMathFunction(BuiltinCall,
211+
Replacement.altMathFunctionImplName());
212+
}
235213
}
236214

237215
static bool runImpl(const TargetLibraryInfo &TLI,

0 commit comments

Comments
 (0)