|
64 | 64 |
|
65 | 65 | using namespace llvm;
|
66 | 66 |
|
| 67 | +static cl::opt<bool> DisableFPCallFolding( |
| 68 | + "disable-fp-call-folding", |
| 69 | + cl::desc("Disable constant-folding of FP intrinsics and libcalls."), |
| 70 | + cl::init(false), cl::Hidden); |
| 71 | + |
67 | 72 | namespace {
|
68 | 73 |
|
69 | 74 | //===----------------------------------------------------------------------===//
|
@@ -1576,6 +1581,17 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
|
1576 | 1581 | return false;
|
1577 | 1582 | if (Call->getFunctionType() != F->getFunctionType())
|
1578 | 1583 | return false;
|
| 1584 | + |
| 1585 | + // Allow FP calls (both libcalls and intrinsics) to avoid being folded. |
| 1586 | + // This can be useful for GPU targets or in cross-compilation scenarios |
| 1587 | + // when the exact target FP behaviour is required, and the host compiler's |
| 1588 | + // behaviour may be slightly different from the device's run-time behaviour. |
| 1589 | + if (DisableFPCallFolding && (F->getReturnType()->isFloatingPointTy() || |
| 1590 | + any_of(F->args(), [](const Argument &Arg) { |
| 1591 | + return Arg.getType()->isFloatingPointTy(); |
| 1592 | + }))) |
| 1593 | + return false; |
| 1594 | + |
1579 | 1595 | switch (F->getIntrinsicID()) {
|
1580 | 1596 | // Operations that do not operate floating-point numbers and do not depend on
|
1581 | 1597 | // FP environment can be folded even in strictfp functions.
|
@@ -1700,7 +1716,6 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
|
1700 | 1716 | case Intrinsic::x86_avx512_vcvtsd2usi64:
|
1701 | 1717 | case Intrinsic::x86_avx512_cvttsd2usi:
|
1702 | 1718 | case Intrinsic::x86_avx512_cvttsd2usi64:
|
1703 |
| - return !Call->isStrictFP(); |
1704 | 1719 |
|
1705 | 1720 | // NVVM FMax intrinsics
|
1706 | 1721 | case Intrinsic::nvvm_fmax_d:
|
@@ -1775,6 +1790,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
|
1775 | 1790 | case Intrinsic::nvvm_d2ull_rn:
|
1776 | 1791 | case Intrinsic::nvvm_d2ull_rp:
|
1777 | 1792 | case Intrinsic::nvvm_d2ull_rz:
|
| 1793 | + return !Call->isStrictFP(); |
1778 | 1794 |
|
1779 | 1795 | // Sign operations are actually bitwise operations, they do not raise
|
1780 | 1796 | // exceptions even for SNANs.
|
@@ -3909,8 +3925,12 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID,
|
3909 | 3925 | Constant *llvm::ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,
|
3910 | 3926 | Constant *RHS, Type *Ty,
|
3911 | 3927 | Instruction *FMFSource) {
|
3912 |
| - return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS}, |
3913 |
| - dyn_cast_if_present<CallBase>(FMFSource)); |
| 3928 | + auto *Call = dyn_cast_if_present<CallBase>(FMFSource); |
| 3929 | + // Ensure we check flags like StrictFP that might prevent this from getting |
| 3930 | + // folded before generating a result. |
| 3931 | + if (Call && !canConstantFoldCallTo(Call, Call->getCalledFunction())) |
| 3932 | + return nullptr; |
| 3933 | + return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS}, Call); |
3914 | 3934 | }
|
3915 | 3935 |
|
3916 | 3936 | Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
|
|
0 commit comments