Skip to content

Commit 1f7885c

Browse files
[ConstantFolding] Add flag to disable call folding (#140270)
Add an optional flag to disable constant-folding for function calls. This applies to both intrinsics and libcalls. This is not necessary in most cases, so is disabled by default, but in cases that require bit-exact precision between the result from constant-folding and run-time execution, having this flag can be useful, and may help with debugging. Cases where mismatches can occur include GPU execution vs host-side folding, cross-compilation scenarios, or compilation vs execution environments with different math library versions. This applies only to calls, rather than all FP arithmetic. Methods such as fast-math-flags can be used to limit reassociation, fma-fusion etc, and basic arithmetic operations are precisely defined in IEEE 754. However, other math operations such as sqrt, sin, pow etc. represented by either libcalls or intrinsics are less well defined, and may vary more between different architectures/library implementations. As this option is not intended for most common use-cases, this patch takes the more conservative approach of disabling constant-folding even for operations like fmax, copysign, fabs etc. in order to keep the implementation simple, rather than sprinkling checks for this flag throughout. The use-cases for this option are similar to StrictFP, but it is only limited to FP call folding, rather than all FP operations, as it is about precise arithmetic results, rather than FP environment behaviours. It also can be used to when linking .bc files compiled with different StrictFP settings with llvm-link.
1 parent 7c99601 commit 1f7885c

File tree

2 files changed

+77
-3
lines changed

2 files changed

+77
-3
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@
6464

6565
using namespace llvm;
6666

67+
static cl::opt<bool> DisableFPCallFolding(
68+
"disable-fp-call-folding",
69+
cl::desc("Disable constant-folding of FP intrinsics and libcalls."),
70+
cl::init(false), cl::Hidden);
71+
6772
namespace {
6873

6974
//===----------------------------------------------------------------------===//
@@ -1576,6 +1581,17 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
15761581
return false;
15771582
if (Call->getFunctionType() != F->getFunctionType())
15781583
return false;
1584+
1585+
// Allow FP calls (both libcalls and intrinsics) to avoid being folded.
1586+
// This can be useful for GPU targets or in cross-compilation scenarios
1587+
// when the exact target FP behaviour is required, and the host compiler's
1588+
// behaviour may be slightly different from the device's run-time behaviour.
1589+
if (DisableFPCallFolding && (F->getReturnType()->isFloatingPointTy() ||
1590+
any_of(F->args(), [](const Argument &Arg) {
1591+
return Arg.getType()->isFloatingPointTy();
1592+
})))
1593+
return false;
1594+
15791595
switch (F->getIntrinsicID()) {
15801596
// Operations that do not operate floating-point numbers and do not depend on
15811597
// FP environment can be folded even in strictfp functions.
@@ -1700,7 +1716,6 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
17001716
case Intrinsic::x86_avx512_vcvtsd2usi64:
17011717
case Intrinsic::x86_avx512_cvttsd2usi:
17021718
case Intrinsic::x86_avx512_cvttsd2usi64:
1703-
return !Call->isStrictFP();
17041719

17051720
// NVVM FMax intrinsics
17061721
case Intrinsic::nvvm_fmax_d:
@@ -1775,6 +1790,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
17751790
case Intrinsic::nvvm_d2ull_rn:
17761791
case Intrinsic::nvvm_d2ull_rp:
17771792
case Intrinsic::nvvm_d2ull_rz:
1793+
return !Call->isStrictFP();
17781794

17791795
// Sign operations are actually bitwise operations, they do not raise
17801796
// exceptions even for SNANs.
@@ -3909,8 +3925,12 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID,
39093925
Constant *llvm::ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,
39103926
Constant *RHS, Type *Ty,
39113927
Instruction *FMFSource) {
3912-
return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS},
3913-
dyn_cast_if_present<CallBase>(FMFSource));
3928+
auto *Call = dyn_cast_if_present<CallBase>(FMFSource);
3929+
// Ensure we check flags like StrictFP that might prevent this from getting
3930+
// folded before generating a result.
3931+
if (Call && !canConstantFoldCallTo(Call, Call->getCalledFunction()))
3932+
return nullptr;
3933+
return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS}, Call);
39143934
}
39153935

39163936
Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s --check-prefixes CHECK,FOLDING_ENABLED
3+
; RUN: opt < %s -disable-fp-call-folding -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s --check-prefixes CHECK,FOLDING_DISABLED
4+
5+
; Check that we can disable folding of intrinsic calls via both the -disable-fp-call-folding flag and the strictfp attribute.
6+
7+
; Should be folded by default unless -disable-fp-call-folding is set
8+
define float @test_fmax_ftz_nan_xorsign_abs_f() {
9+
; FOLDING_ENABLED-LABEL: define float @test_fmax_ftz_nan_xorsign_abs_f() {
10+
; FOLDING_ENABLED-NEXT: ret float -2.000000e+00
11+
;
12+
; FOLDING_DISABLED-LABEL: define float @test_fmax_ftz_nan_xorsign_abs_f() {
13+
; FOLDING_DISABLED-NEXT: [[RES:%.*]] = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 1.250000e+00, float -2.000000e+00)
14+
; FOLDING_DISABLED-NEXT: ret float [[RES]]
15+
;
16+
%res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 1.25, float -2.0)
17+
ret float %res
18+
}
19+
20+
; Check that -disable-fp-call-folding triggers for LLVM instrincis, not just NVPTX target-specific ones.
21+
define float @test_llvm_sin() {
22+
; FOLDING_ENABLED-LABEL: define float @test_llvm_sin() {
23+
; FOLDING_ENABLED-NEXT: ret float 0x3FDEAEE880000000
24+
;
25+
; FOLDING_DISABLED-LABEL: define float @test_llvm_sin() {
26+
; FOLDING_DISABLED-NEXT: [[RES:%.*]] = call float @llvm.sin.f32(float 5.000000e-01)
27+
; FOLDING_DISABLED-NEXT: ret float [[RES]]
28+
;
29+
%res = call float @llvm.sin.f32(float 0.5)
30+
ret float %res
31+
}
32+
33+
; Should not be folded, even when -disable-fp-call-folding is not set, as it is marked as strictfp.
34+
define float @test_fmax_ftz_nan_f_strictfp() {
35+
; CHECK-LABEL: define float @test_fmax_ftz_nan_f_strictfp() {
36+
; CHECK-NEXT: [[RES:%.*]] = call float @llvm.nvvm.fmax.ftz.nan.f(float 1.250000e+00, float -2.000000e+00) #[[ATTR1:[0-9]+]]
37+
; CHECK-NEXT: ret float [[RES]]
38+
;
39+
%res = call float @llvm.nvvm.fmax.ftz.nan.f(float 1.25, float -2.0) #1
40+
ret float %res
41+
}
42+
43+
; Check that strictfp disables folding for LLVM math intrinsics like sin.f32
44+
; even when -disable-fp-call-folding is not set.
45+
define float @test_llvm_sin_strictfp() {
46+
; CHECK-LABEL: define float @test_llvm_sin_strictfp() {
47+
; CHECK-NEXT: [[RES:%.*]] = call float @llvm.sin.f32(float 5.000000e-01) #[[ATTR1]]
48+
; CHECK-NEXT: ret float [[RES]]
49+
;
50+
%res = call float @llvm.sin.f32(float 0.5) #1
51+
ret float %res
52+
}
53+
54+
attributes #1 = { strictfp }

0 commit comments

Comments
 (0)