Skip to content

Commit cd88bfc

Browse files
authored
ConstantFolding: Do not fold fcmp of denormal without known mode (#115407)
Fixes #114947
1 parent 1e5bfac commit cd88bfc

File tree

2 files changed

+223
-40
lines changed

2 files changed

+223
-40
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 105 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1266,14 +1266,16 @@ Constant *llvm::ConstantFoldCompareInstOperands(
12661266
return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI);
12671267
}
12681268

1269-
// Flush any denormal constant float input according to denormal handling
1270-
// mode.
1271-
Ops0 = FlushFPConstant(Ops0, I, /* IsOutput */ false);
1272-
if (!Ops0)
1273-
return nullptr;
1274-
Ops1 = FlushFPConstant(Ops1, I, /* IsOutput */ false);
1275-
if (!Ops1)
1276-
return nullptr;
1269+
if (CmpInst::isFPPredicate(Predicate)) {
1270+
// Flush any denormal constant float input according to denormal handling
1271+
// mode.
1272+
Ops0 = FlushFPConstant(Ops0, I, /*IsOutput=*/false);
1273+
if (!Ops0)
1274+
return nullptr;
1275+
Ops1 = FlushFPConstant(Ops1, I, /*IsOutput=*/false);
1276+
if (!Ops1)
1277+
return nullptr;
1278+
}
12771279

12781280
return ConstantFoldCompareInstruction(Predicate, Ops0, Ops1);
12791281
}
@@ -1298,47 +1300,110 @@ Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS,
12981300
return ConstantFoldBinaryInstruction(Opcode, LHS, RHS);
12991301
}
13001302

1301-
Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *I,
1302-
bool IsOutput) {
1303-
if (!I || !I->getParent() || !I->getFunction())
1304-
return Operand;
1303+
static ConstantFP *flushDenormalConstant(Type *Ty, const APFloat &APF,
1304+
DenormalMode::DenormalModeKind Mode) {
1305+
switch (Mode) {
1306+
case DenormalMode::Dynamic:
1307+
return nullptr;
1308+
case DenormalMode::IEEE:
1309+
return ConstantFP::get(Ty->getContext(), APF);
1310+
case DenormalMode::PreserveSign:
1311+
return ConstantFP::get(
1312+
Ty->getContext(),
1313+
APFloat::getZero(APF.getSemantics(), APF.isNegative()));
1314+
case DenormalMode::PositiveZero:
1315+
return ConstantFP::get(Ty->getContext(),
1316+
APFloat::getZero(APF.getSemantics(), false));
1317+
default:
1318+
break;
1319+
}
13051320

1306-
ConstantFP *CFP = dyn_cast<ConstantFP>(Operand);
1307-
if (!CFP)
1308-
return Operand;
1321+
llvm_unreachable("unknown denormal mode");
1322+
}
1323+
1324+
/// Return the denormal mode that can be assumed when executing a floating point
1325+
/// operation at \p CtxI.
1326+
static DenormalMode getInstrDenormalMode(const Instruction *CtxI, Type *Ty) {
1327+
if (!CtxI || !CtxI->getParent() || !CtxI->getFunction())
1328+
return DenormalMode::getDynamic();
1329+
return CtxI->getFunction()->getDenormalMode(Ty->getFltSemantics());
1330+
}
13091331

1332+
static ConstantFP *flushDenormalConstantFP(ConstantFP *CFP,
1333+
const Instruction *Inst,
1334+
bool IsOutput) {
13101335
const APFloat &APF = CFP->getValueAPF();
1311-
// TODO: Should this canonicalize nans?
13121336
if (!APF.isDenormal())
1313-
return Operand;
1337+
return CFP;
13141338

1315-
Type *Ty = CFP->getType();
1316-
DenormalMode DenormMode =
1317-
I->getFunction()->getDenormalMode(Ty->getFltSemantics());
1318-
DenormalMode::DenormalModeKind Mode =
1319-
IsOutput ? DenormMode.Output : DenormMode.Input;
1320-
switch (Mode) {
1321-
default:
1322-
llvm_unreachable("unknown denormal mode");
1323-
case DenormalMode::Dynamic:
1324-
return nullptr;
1325-
case DenormalMode::IEEE:
1339+
DenormalMode Mode = getInstrDenormalMode(Inst, CFP->getType());
1340+
return flushDenormalConstant(CFP->getType(), APF,
1341+
IsOutput ? Mode.Output : Mode.Input);
1342+
}
1343+
1344+
Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *Inst,
1345+
bool IsOutput) {
1346+
if (ConstantFP *CFP = dyn_cast<ConstantFP>(Operand))
1347+
return flushDenormalConstantFP(CFP, Inst, IsOutput);
1348+
1349+
if (isa<ConstantAggregateZero, UndefValue, ConstantExpr>(Operand))
13261350
return Operand;
1327-
case DenormalMode::PreserveSign:
1328-
if (APF.isDenormal()) {
1329-
return ConstantFP::get(
1330-
Ty->getContext(),
1331-
APFloat::getZero(Ty->getFltSemantics(), APF.isNegative()));
1351+
1352+
Type *Ty = Operand->getType();
1353+
VectorType *VecTy = dyn_cast<VectorType>(Ty);
1354+
if (VecTy) {
1355+
if (auto *Splat = dyn_cast_or_null<ConstantFP>(Operand->getSplatValue())) {
1356+
ConstantFP *Folded = flushDenormalConstantFP(Splat, Inst, IsOutput);
1357+
if (!Folded)
1358+
return nullptr;
1359+
return ConstantVector::getSplat(VecTy->getElementCount(), Folded);
13321360
}
1333-
return Operand;
1334-
case DenormalMode::PositiveZero:
1335-
if (APF.isDenormal()) {
1336-
return ConstantFP::get(Ty->getContext(),
1337-
APFloat::getZero(Ty->getFltSemantics(), false));
1361+
1362+
Ty = VecTy->getElementType();
1363+
}
1364+
1365+
if (const auto *CV = dyn_cast<ConstantVector>(Operand)) {
1366+
SmallVector<Constant *, 16> NewElts;
1367+
for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1368+
Constant *Element = CV->getAggregateElement(i);
1369+
if (isa<UndefValue>(Element)) {
1370+
NewElts.push_back(Element);
1371+
continue;
1372+
}
1373+
1374+
ConstantFP *CFP = dyn_cast<ConstantFP>(Element);
1375+
if (!CFP)
1376+
return nullptr;
1377+
1378+
ConstantFP *Folded = flushDenormalConstantFP(CFP, Inst, IsOutput);
1379+
if (!Folded)
1380+
return nullptr;
1381+
NewElts.push_back(Folded);
13381382
}
1339-
return Operand;
1383+
1384+
return ConstantVector::get(NewElts);
1385+
}
1386+
1387+
if (const auto *CDV = dyn_cast<ConstantDataVector>(Operand)) {
1388+
SmallVector<Constant *, 16> NewElts;
1389+
for (unsigned I = 0, E = CDV->getNumElements(); I < E; ++I) {
1390+
const APFloat &Elt = CDV->getElementAsAPFloat(I);
1391+
if (!Elt.isDenormal()) {
1392+
NewElts.push_back(ConstantFP::get(Ty, Elt));
1393+
} else {
1394+
DenormalMode Mode = getInstrDenormalMode(Inst, Ty);
1395+
ConstantFP *Folded =
1396+
flushDenormalConstant(Ty, Elt, IsOutput ? Mode.Output : Mode.Input);
1397+
if (!Folded)
1398+
return nullptr;
1399+
NewElts.push_back(Folded);
1400+
}
1401+
}
1402+
1403+
return ConstantVector::get(NewElts);
13401404
}
1341-
return Operand;
1405+
1406+
return nullptr;
13421407
}
13431408

13441409
Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=ipsccp < %s | FileCheck %s
3+
4+
5+
define i1 @no_fold_fcmp_denormal_double_ieee_dynamic_denormal_undef() #0 {
6+
; CHECK-LABEL: define i1 @no_fold_fcmp_denormal_double_ieee_dynamic_denormal_undef(
7+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
8+
; CHECK-NEXT: [[CMP:%.*]] = fcmp une double 0x8000000000000, undef
9+
; CHECK-NEXT: ret i1 [[CMP]]
10+
;
11+
%cmp = fcmp une double 0x8000000000000, undef
12+
ret i1 %cmp
13+
}
14+
15+
define i1 @no_fold_fcmp_denormal_double_ieee_dynamic_denormal_poison() #0 {
16+
; CHECK-LABEL: define i1 @no_fold_fcmp_denormal_double_ieee_dynamic_denormal_poison(
17+
; CHECK-SAME: ) #[[ATTR0]] {
18+
; CHECK-NEXT: [[CMP:%.*]] = fcmp une double 0x8000000000000, poison
19+
; CHECK-NEXT: ret i1 [[CMP]]
20+
;
21+
%cmp = fcmp une double 0x8000000000000, poison
22+
ret i1 %cmp
23+
}
24+
25+
define i1 @no_fold_fcmp_denormal_double_ieee_dynamic() #0 {
26+
; CHECK-LABEL: define i1 @no_fold_fcmp_denormal_double_ieee_dynamic(
27+
; CHECK-SAME: ) #[[ATTR0]] {
28+
; CHECK-NEXT: [[CMP:%.*]] = fcmp une double 0x8000000000000, 0.000000e+00
29+
; CHECK-NEXT: ret i1 [[CMP]]
30+
;
31+
%cmp = fcmp une double 0x8000000000000, 0.0
32+
ret i1 %cmp
33+
}
34+
35+
define i1 @fold_fcmp_nondenormal_double_ieee_dynamic() #0 {
36+
; CHECK-LABEL: define i1 @fold_fcmp_nondenormal_double_ieee_dynamic(
37+
; CHECK-SAME: ) #[[ATTR0]] {
38+
; CHECK-NEXT: ret i1 true
39+
;
40+
%cmp = fcmp une double 2.0, 0.0
41+
ret i1 %cmp
42+
}
43+
44+
define <2 x i1> @no_fold_fcmp_denormal_double_ieee_dynamic_vector_splat() #0 {
45+
; CHECK-LABEL: define <2 x i1> @no_fold_fcmp_denormal_double_ieee_dynamic_vector_splat(
46+
; CHECK-SAME: ) #[[ATTR0]] {
47+
; CHECK-NEXT: [[CMP:%.*]] = fcmp une <2 x double> splat (double 0x8000000000000), zeroinitializer
48+
; CHECK-NEXT: ret <2 x i1> [[CMP]]
49+
;
50+
%cmp = fcmp une <2 x double> splat (double 0x8000000000000), zeroinitializer
51+
ret <2 x i1> %cmp
52+
}
53+
54+
define <2 x i1> @fold_fcmp_nondenormal_double_ieee_dynamic_vector_splat() #0 {
55+
; CHECK-LABEL: define <2 x i1> @fold_fcmp_nondenormal_double_ieee_dynamic_vector_splat(
56+
; CHECK-SAME: ) #[[ATTR0]] {
57+
; CHECK-NEXT: ret <2 x i1> splat (i1 true)
58+
;
59+
%cmp = fcmp une <2 x double> splat (double 2.0), zeroinitializer
60+
ret <2 x i1> %cmp
61+
}
62+
63+
define <2 x i1> @fold_fcmp_nondenormal_double_ieee_dynamic_vector_nonsplat() #0 {
64+
; CHECK-LABEL: define <2 x i1> @fold_fcmp_nondenormal_double_ieee_dynamic_vector_nonsplat(
65+
; CHECK-SAME: ) #[[ATTR0]] {
66+
; CHECK-NEXT: ret <2 x i1> <i1 false, i1 true>
67+
;
68+
%cmp = fcmp une <2 x double> <double 2.0, double 4.0>, <double 2.0, double 8.0>
69+
ret <2 x i1> %cmp
70+
}
71+
72+
define <3 x i1> @fold_fcmp_nondenormal_double_ieee_dynamic_vector_nonsplat_undef() #0 {
73+
; CHECK-LABEL: define <3 x i1> @fold_fcmp_nondenormal_double_ieee_dynamic_vector_nonsplat_undef(
74+
; CHECK-SAME: ) #[[ATTR0]] {
75+
; CHECK-NEXT: ret <3 x i1> <i1 true, i1 true, i1 false>
76+
;
77+
%cmp = fcmp une <3 x double> <double 2.0, double undef, double 4.0>, <double 1.0, double undef, double 4.0>
78+
ret <3 x i1> %cmp
79+
}
80+
81+
define <2 x i1> @fold_fcmp_nondenormal_double_ieee_dynamic_zero() #0 {
82+
; CHECK-LABEL: define <2 x i1> @fold_fcmp_nondenormal_double_ieee_dynamic_zero(
83+
; CHECK-SAME: ) #[[ATTR0]] {
84+
; CHECK-NEXT: ret <2 x i1> zeroinitializer
85+
;
86+
%cmp = fcmp une <2 x double> zeroinitializer, zeroinitializer
87+
ret <2 x i1> %cmp
88+
}
89+
90+
define <2 x i1> @no_fold_fcmp_denormal_double_ieee_dynamic_vector_nonsplat() #0 {
91+
; CHECK-LABEL: define <2 x i1> @no_fold_fcmp_denormal_double_ieee_dynamic_vector_nonsplat(
92+
; CHECK-SAME: ) #[[ATTR0]] {
93+
; CHECK-NEXT: [[CMP:%.*]] = fcmp une <2 x double> <double 0x8000000000000, double 1.000000e+00>, zeroinitializer
94+
; CHECK-NEXT: ret <2 x i1> [[CMP]]
95+
;
96+
%cmp = fcmp une <2 x double> <double 0x8000000000000, double 1.0>, zeroinitializer
97+
ret <2 x i1> %cmp
98+
}
99+
100+
define <vscale x 2 x i1> @fold_fcmp_nondenormal_double_ieee_dynamic_scalable_vector_splat() #0 {
101+
; CHECK-LABEL: define <vscale x 2 x i1> @fold_fcmp_nondenormal_double_ieee_dynamic_scalable_vector_splat(
102+
; CHECK-SAME: ) #[[ATTR0]] {
103+
; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
104+
;
105+
%cmp = fcmp une <vscale x 2 x double> splat (double 2.0), zeroinitializer
106+
ret <vscale x 2 x i1> %cmp
107+
}
108+
109+
define <vscale x 2 x i1> @no_fold_fcmp_denormal_double_ieee_dynamic_scalaable_vector_splat() #0 {
110+
; CHECK-LABEL: define <vscale x 2 x i1> @no_fold_fcmp_denormal_double_ieee_dynamic_scalaable_vector_splat(
111+
; CHECK-SAME: ) #[[ATTR0]] {
112+
; CHECK-NEXT: ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
113+
;
114+
%cmp = fcmp une <vscale x 2 x double> splat (double 0x8000000000000), zeroinitializer
115+
ret <vscale x 2 x i1> %cmp
116+
}
117+
118+
attributes #0 = { "denormal-fp-math"="ieee,dynamic" }

0 commit comments

Comments
 (0)