-
Notifications
You must be signed in to change notification settings - Fork 787
[NFCI][SYCL] Refactor selection of FP builtin calls #16966
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1334,7 +1334,7 @@ void TargetLibraryInfoImpl::addAltMathFunctionsFromLib( | |
/// Select an alternate math library implementation that meets the criteria | ||
/// described by an FPBuiltinIntrinsic call. | ||
StringRef TargetLibraryInfoImpl::selectFPBuiltinImplementation( | ||
FPBuiltinIntrinsic *Builtin) const { | ||
const FPBuiltinIntrinsic *Builtin) const { | ||
// TODO: Handle the case of no specified accuracy. | ||
if (Builtin->getRequiredAccuracy() == std::nullopt) | ||
return StringRef(); | ||
|
@@ -1353,6 +1353,66 @@ StringRef TargetLibraryInfoImpl::selectFPBuiltinImplementation( | |
return I->FnImplName; | ||
} | ||
|
||
FPBuiltinReplacement TargetLibraryInfoImpl::selectFnForFPBuiltinCalls( | ||
const FPBuiltinIntrinsic &BuiltinCall, | ||
const TargetTransformInfo &TTI) const { | ||
auto DefaultOpIsCorrectlyRounded = [](const FPBuiltinIntrinsic &BuiltinCall) { | ||
switch (BuiltinCall.getIntrinsicID()) { | ||
case Intrinsic::fpbuiltin_fadd: | ||
case Intrinsic::fpbuiltin_fsub: | ||
case Intrinsic::fpbuiltin_fmul: | ||
case Intrinsic::fpbuiltin_fdiv: | ||
case Intrinsic::fpbuiltin_frem: | ||
case Intrinsic::fpbuiltin_sqrt: | ||
case Intrinsic::fpbuiltin_ldexp: | ||
return true; | ||
default: | ||
return false; | ||
} | ||
}; | ||
StringSet<> RecognizedAttrs = {FPBuiltinIntrinsic::FPBUILTIN_MAX_ERROR}; | ||
if (BuiltinCall.hasUnrecognizedFPAttrs(std::move(RecognizedAttrs))) | ||
return FPBuiltinReplacement(FPBuiltinReplacement::UnrecognizedFPAttrs); | ||
Triple T(BuiltinCall.getModule()->getTargetTriple()); | ||
const auto Accuracy = BuiltinCall.getRequiredAccuracy(); | ||
// For fpbuiltin.sqrt, it should always use the native operation for | ||
// x86-based targets because the native instruction is faster (even faster | ||
// than the low-accuracy SVML implementation). | ||
if (T.isX86() && BuiltinCall.getIntrinsicID() == Intrinsic::fpbuiltin_sqrt && | ||
TTI.haveFastSqrt(BuiltinCall.getOperand(0)->getType())) | ||
return FPBuiltinReplacement(FPBuiltinReplacement::ReplaceWithLLVMIR); | ||
// Several functions for SYCL and CUDA requires "0.5" accuracy levels, | ||
// which means correctly rounded results. For now x86 host and NVPTX | ||
// AltMathLibrary doesn't have such ability. For such accuracy level, | ||
// the fpbuiltins should be replaced by equivalent IR operation or | ||
// llvmbuiltins. | ||
if ((T.isX86() || T.isNVPTX()) && Accuracy == 0.5) { | ||
asudarsa marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (DefaultOpIsCorrectlyRounded(BuiltinCall)) | ||
return FPBuiltinReplacement(FPBuiltinReplacement::ReplaceWithLLVMIR); | ||
return FPBuiltinReplacement(FPBuiltinReplacement::Unexpected0dot5); | ||
} | ||
// AltMathLibrary don't have implementation for CUDA approximate precision | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just curious: Does 'Accuracy > 0.5' map to 'approximate precision'? Thanks There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 'Accuracy > 0.5' maps on 'not-precise'. Regarding if it maps on nvvm approx intrinsics/nvptx approx instruction - it depends on the operation and what ptx spec mandates. More details is in the discussion with Joshua in #16714 . |
||
// builtins. Lets map them on NVPTX intrinsics. If no appropriate intrinsics | ||
// are known - skip to emit an error. | ||
if (T.isNVPTX() && Accuracy > 0.5) { | ||
return FPBuiltinReplacement( | ||
FPBuiltinReplacement::ReplaceWithApproxNVPTXCallsOrFallback); | ||
} | ||
|
||
/// Call TLI to select a function implementation to call | ||
const StringRef OutAltMathFunctionImplName = | ||
selectFPBuiltinImplementation(&BuiltinCall); | ||
if (OutAltMathFunctionImplName.empty()) { | ||
// Operations that require correct rounding by default can always be | ||
// replaced with the LLVM IR equivalent representation. | ||
if (DefaultOpIsCorrectlyRounded(BuiltinCall)) | ||
return FPBuiltinReplacement(FPBuiltinReplacement::ReplaceWithLLVMIR); | ||
return FPBuiltinReplacement(FPBuiltinReplacement::NoSuitableReplacement); | ||
} | ||
return FPBuiltinReplacement(FPBuiltinReplacement::ReplaceWithAltMathFunction, | ||
OutAltMathFunctionImplName); | ||
} | ||
|
||
static bool compareByScalarFnName(const VecDesc &LHS, const VecDesc &RHS) { | ||
return LHS.getScalarFnName() < RHS.getScalarFnName(); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we want to assign to AltMathFunctionImplName even if K is not set to ReplaceWithAltMathFunction?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think not. In this case we should to either error out or attempt to lower to standard to LLVM instructions/intrinsics or target specific intrinsics if applicable (like nvvm approx math intrinsics).