Skip to content

Commit 86f0399

Browse files
c8efdtcxzyw
andauthored
[InstCombine] Fold expression using basic properties of floor and ceiling function (#107107)
alive2: ~~https://alive2.llvm.org/ce/z/Ag3Ki7~~ https://alive2.llvm.org/ce/z/ywP5t2 related: #76438 This patch adds the following foldings: `floor(x) <= x --> true` and `x <= ceil(x) --> true`. We leverage the properties of these math functions and ensure there is no floating point input of `nan`. --------- Co-authored-by: Yingwei Zheng <[email protected]>
1 parent 012dbec commit 86f0399

File tree

2 files changed

+332
-0
lines changed

2 files changed

+332
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8178,6 +8178,75 @@ static Instruction *foldFCmpFSubIntoFCmp(FCmpInst &I, Instruction *LHSI,
81788178
return nullptr;
81798179
}
81808180

8181+
static Instruction *foldFCmpWithFloorAndCeil(FCmpInst &I,
8182+
InstCombinerImpl &IC) {
8183+
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
8184+
Type *OpType = LHS->getType();
8185+
CmpInst::Predicate Pred = I.getPredicate();
8186+
8187+
bool FloorX = match(LHS, m_Intrinsic<Intrinsic::floor>(m_Specific(RHS)));
8188+
bool CeilX = match(LHS, m_Intrinsic<Intrinsic::ceil>(m_Specific(RHS)));
8189+
8190+
if (!FloorX && !CeilX) {
8191+
if ((FloorX = match(RHS, m_Intrinsic<Intrinsic::floor>(m_Specific(LHS)))) ||
8192+
(CeilX = match(RHS, m_Intrinsic<Intrinsic::ceil>(m_Specific(LHS))))) {
8193+
std::swap(LHS, RHS);
8194+
Pred = I.getSwappedPredicate();
8195+
}
8196+
}
8197+
8198+
switch (Pred) {
8199+
case FCmpInst::FCMP_OLE:
8200+
// fcmp ole floor(x), x => fcmp ord x, 0
8201+
if (FloorX)
8202+
return new FCmpInst(FCmpInst::FCMP_ORD, RHS, ConstantFP::getZero(OpType),
8203+
"", &I);
8204+
break;
8205+
case FCmpInst::FCMP_OGT:
8206+
// fcmp ogt floor(x), x => false
8207+
if (FloorX)
8208+
return IC.replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
8209+
break;
8210+
case FCmpInst::FCMP_OGE:
8211+
// fcmp oge ceil(x), x => fcmp ord x, 0
8212+
if (CeilX)
8213+
return new FCmpInst(FCmpInst::FCMP_ORD, RHS, ConstantFP::getZero(OpType),
8214+
"", &I);
8215+
break;
8216+
case FCmpInst::FCMP_OLT:
8217+
// fcmp olt ceil(x), x => false
8218+
if (CeilX)
8219+
return IC.replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
8220+
break;
8221+
case FCmpInst::FCMP_ULE:
8222+
// fcmp ule floor(x), x => true
8223+
if (FloorX)
8224+
return IC.replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
8225+
break;
8226+
case FCmpInst::FCMP_UGT:
8227+
// fcmp ugt floor(x), x => fcmp uno x, 0
8228+
if (FloorX)
8229+
return new FCmpInst(FCmpInst::FCMP_UNO, RHS, ConstantFP::getZero(OpType),
8230+
"", &I);
8231+
break;
8232+
case FCmpInst::FCMP_UGE:
8233+
// fcmp uge ceil(x), x => true
8234+
if (CeilX)
8235+
return IC.replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
8236+
break;
8237+
case FCmpInst::FCMP_ULT:
8238+
// fcmp ult ceil(x), x => fcmp uno x, 0
8239+
if (CeilX)
8240+
return new FCmpInst(FCmpInst::FCMP_UNO, RHS, ConstantFP::getZero(OpType),
8241+
"", &I);
8242+
break;
8243+
default:
8244+
break;
8245+
}
8246+
8247+
return nullptr;
8248+
}
8249+
81818250
Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
81828251
bool Changed = false;
81838252

@@ -8382,6 +8451,9 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
83828451
if (Instruction *R = foldSqrtWithFcmpZero(I, *this))
83838452
return R;
83848453

8454+
if (Instruction *R = foldFCmpWithFloorAndCeil(I, *this))
8455+
return R;
8456+
83858457
if (match(Op0, m_FNeg(m_Value(X)))) {
83868458
// fcmp pred (fneg X), C --> fcmp swap(pred) X, -C
83878459
Constant *C;
Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3+
4+
define i1 @floor_x_ole(float %x) {
5+
; CHECK-LABEL: @floor_x_ole(
6+
; CHECK-NEXT: [[RET:%.*]] = fcmp ninf ord float [[X:%.*]], 0.000000e+00
7+
; CHECK-NEXT: ret i1 [[RET]]
8+
;
9+
%floor = call float @llvm.floor.f32(float %x)
10+
%ret = fcmp ninf ole float %floor, %x
11+
ret i1 %ret
12+
}
13+
14+
define i1 @floor_x_ule(float %x) {
15+
; CHECK-LABEL: @floor_x_ule(
16+
; CHECK-NEXT: ret i1 true
17+
;
18+
%floor = call float @llvm.floor.f32(float %x)
19+
%ret = fcmp ule float %floor, %x
20+
ret i1 %ret
21+
}
22+
23+
define i1 @floor_x_ogt(float %x) {
24+
; CHECK-LABEL: @floor_x_ogt(
25+
; CHECK-NEXT: ret i1 false
26+
;
27+
%floor = call float @llvm.floor.f32(float %x)
28+
%ret = fcmp ogt float %floor, %x
29+
ret i1 %ret
30+
}
31+
32+
define i1 @floor_x_ugt(float %x) {
33+
; CHECK-LABEL: @floor_x_ugt(
34+
; CHECK-NEXT: [[RET:%.*]] = fcmp ninf uno float [[X:%.*]], 0.000000e+00
35+
; CHECK-NEXT: ret i1 [[RET]]
36+
;
37+
%floor = call float @llvm.floor.f32(float %x)
38+
%ret = fcmp ninf ugt float %floor, %x
39+
ret i1 %ret
40+
}
41+
42+
define i1 @x_floor_oge(float %x) {
43+
; CHECK-LABEL: @x_floor_oge(
44+
; CHECK-NEXT: [[RET:%.*]] = fcmp ninf ord float [[X:%.*]], 0.000000e+00
45+
; CHECK-NEXT: ret i1 [[RET]]
46+
;
47+
%floor = call float @llvm.floor.f32(float %x)
48+
%ret = fcmp ninf oge float %x, %floor
49+
ret i1 %ret
50+
}
51+
52+
define i1 @x_floor_uge(float %x) {
53+
; CHECK-LABEL: @x_floor_uge(
54+
; CHECK-NEXT: ret i1 true
55+
;
56+
%floor = call float @llvm.floor.f32(float %x)
57+
%ret = fcmp uge float %x, %floor
58+
ret i1 %ret
59+
}
60+
61+
define i1 @x_floor_olt(float %x) {
62+
; CHECK-LABEL: @x_floor_olt(
63+
; CHECK-NEXT: ret i1 false
64+
;
65+
%floor = call float @llvm.floor.f32(float %x)
66+
%ret = fcmp olt float %x, %floor
67+
ret i1 %ret
68+
}
69+
70+
define i1 @x_floor_ult(float %x) {
71+
; CHECK-LABEL: @x_floor_ult(
72+
; CHECK-NEXT: [[RET:%.*]] = fcmp ninf uno float [[X:%.*]], 0.000000e+00
73+
; CHECK-NEXT: ret i1 [[RET]]
74+
;
75+
%floor = call float @llvm.floor.f32(float %x)
76+
%ret = fcmp ninf ult float %x, %floor
77+
ret i1 %ret
78+
}
79+
80+
define <2 x i1> @x_floor_olt_vec(<2 x float> %x) {
81+
; CHECK-LABEL: @x_floor_olt_vec(
82+
; CHECK-NEXT: ret <2 x i1> zeroinitializer
83+
;
84+
%floor = call <2 x float> @llvm.floor.f32(<2 x float> %x)
85+
%ret = fcmp olt <2 x float> %x, %floor
86+
ret <2 x i1> %ret
87+
}
88+
89+
define i1 @x_floor_ole_neg(float %x) {
90+
; CHECK-LABEL: @x_floor_ole_neg(
91+
; CHECK-NEXT: [[FLOOR:%.*]] = call float @llvm.floor.f32(float [[X:%.*]])
92+
; CHECK-NEXT: [[RET:%.*]] = fcmp ole float [[X]], [[FLOOR]]
93+
; CHECK-NEXT: ret i1 [[RET]]
94+
;
95+
%floor = call float @llvm.floor.f32(float %x)
96+
%ret = fcmp ole float %x, %floor
97+
ret i1 %ret
98+
}
99+
100+
define i1 @x_floor_ogt_neg(float %x) {
101+
; CHECK-LABEL: @x_floor_ogt_neg(
102+
; CHECK-NEXT: [[FLOOR:%.*]] = call float @llvm.floor.f32(float [[X:%.*]])
103+
; CHECK-NEXT: [[RET:%.*]] = fcmp ogt float [[X]], [[FLOOR]]
104+
; CHECK-NEXT: ret i1 [[RET]]
105+
;
106+
%floor = call float @llvm.floor.f32(float %x)
107+
%ret = fcmp ogt float %x, %floor
108+
ret i1 %ret
109+
}
110+
111+
define i1 @x_floor_ueq_neg(float %x) {
112+
; CHECK-LABEL: @x_floor_ueq_neg(
113+
; CHECK-NEXT: [[FLOOR:%.*]] = call float @llvm.floor.f32(float [[X:%.*]])
114+
; CHECK-NEXT: [[RET:%.*]] = fcmp ueq float [[X]], [[FLOOR]]
115+
; CHECK-NEXT: ret i1 [[RET]]
116+
;
117+
%floor = call float @llvm.floor.f32(float %x)
118+
%ret = fcmp ueq float %x, %floor
119+
ret i1 %ret
120+
}
121+
122+
define i1 @x_floor_une_neg(float %x) {
123+
; CHECK-LABEL: @x_floor_une_neg(
124+
; CHECK-NEXT: [[FLOOR:%.*]] = call float @llvm.floor.f32(float [[X:%.*]])
125+
; CHECK-NEXT: [[RET:%.*]] = fcmp une float [[X]], [[FLOOR]]
126+
; CHECK-NEXT: ret i1 [[RET]]
127+
;
128+
%floor = call float @llvm.floor.f32(float %x)
129+
%ret = fcmp une float %x, %floor
130+
ret i1 %ret
131+
}
132+
133+
define i1 @ceil_x_oge(float %x) {
134+
; CHECK-LABEL: @ceil_x_oge(
135+
; CHECK-NEXT: [[RET:%.*]] = fcmp ninf ord float [[X:%.*]], 0.000000e+00
136+
; CHECK-NEXT: ret i1 [[RET]]
137+
;
138+
%ceil = call float @llvm.ceil.f32(float %x)
139+
%ret = fcmp ninf oge float %ceil, %x
140+
ret i1 %ret
141+
}
142+
143+
define i1 @ceil_x_uge(float %x) {
144+
; CHECK-LABEL: @ceil_x_uge(
145+
; CHECK-NEXT: ret i1 true
146+
;
147+
%ceil = call float @llvm.ceil.f32(float %x)
148+
%ret = fcmp uge float %ceil, %x
149+
ret i1 %ret
150+
}
151+
152+
define i1 @ceil_x_olt(float %x) {
153+
; CHECK-LABEL: @ceil_x_olt(
154+
; CHECK-NEXT: ret i1 false
155+
;
156+
%ceil = call float @llvm.ceil.f32(float %x)
157+
%ret = fcmp olt float %ceil, %x
158+
ret i1 %ret
159+
}
160+
161+
define i1 @ceil_x_ult(float %x) {
162+
; CHECK-LABEL: @ceil_x_ult(
163+
; CHECK-NEXT: [[RET:%.*]] = fcmp ninf uno float [[X:%.*]], 0.000000e+00
164+
; CHECK-NEXT: ret i1 [[RET]]
165+
;
166+
%ceil = call float @llvm.ceil.f32(float %x)
167+
%ret = fcmp ninf ult float %ceil, %x
168+
ret i1 %ret
169+
}
170+
171+
define i1 @x_ceil_ole(float %x) {
172+
; CHECK-LABEL: @x_ceil_ole(
173+
; CHECK-NEXT: [[RET:%.*]] = fcmp ninf ord float [[X:%.*]], 0.000000e+00
174+
; CHECK-NEXT: ret i1 [[RET]]
175+
;
176+
%ceil = call float @llvm.ceil.f32(float %x)
177+
%ret = fcmp ninf ole float %x, %ceil
178+
ret i1 %ret
179+
}
180+
181+
define i1 @x_ceil_ule(float %x) {
182+
; CHECK-LABEL: @x_ceil_ule(
183+
; CHECK-NEXT: ret i1 true
184+
;
185+
%ceil = call float @llvm.ceil.f32(float %x)
186+
%ret = fcmp ule float %x, %ceil
187+
ret i1 %ret
188+
}
189+
190+
define i1 @x_ceil_ogt(float %x) {
191+
; CHECK-LABEL: @x_ceil_ogt(
192+
; CHECK-NEXT: ret i1 false
193+
;
194+
%ceil = call float @llvm.ceil.f32(float %x)
195+
%ret = fcmp ogt float %x, %ceil
196+
ret i1 %ret
197+
}
198+
199+
define i1 @x_ceil_ugt(float %x) {
200+
; CHECK-LABEL: @x_ceil_ugt(
201+
; CHECK-NEXT: [[RET:%.*]] = fcmp ninf uno float [[X:%.*]], 0.000000e+00
202+
; CHECK-NEXT: ret i1 [[RET]]
203+
;
204+
%ceil = call float @llvm.ceil.f32(float %x)
205+
%ret = fcmp ninf ugt float %x, %ceil
206+
ret i1 %ret
207+
}
208+
209+
define <2 x i1> @x_ceil_ogt_vec(<2 x float> %x) {
210+
; CHECK-LABEL: @x_ceil_ogt_vec(
211+
; CHECK-NEXT: ret <2 x i1> zeroinitializer
212+
;
213+
%ceil = call <2 x float> @llvm.ceil.f32(<2 x float> %x)
214+
%ret = fcmp ogt <2 x float> %x, %ceil
215+
ret <2 x i1> %ret
216+
}
217+
218+
define i1 @x_ceil_oge_neg(float %x) {
219+
; CHECK-LABEL: @x_ceil_oge_neg(
220+
; CHECK-NEXT: [[CEIL:%.*]] = call float @llvm.ceil.f32(float [[X:%.*]])
221+
; CHECK-NEXT: [[RET:%.*]] = fcmp oge float [[X]], [[CEIL]]
222+
; CHECK-NEXT: ret i1 [[RET]]
223+
;
224+
%ceil = call float @llvm.ceil.f32(float %x)
225+
%ret = fcmp oge float %x, %ceil
226+
ret i1 %ret
227+
}
228+
229+
define i1 @x_ceil_olt_neg(float %x) {
230+
; CHECK-LABEL: @x_ceil_olt_neg(
231+
; CHECK-NEXT: [[CEIL:%.*]] = call float @llvm.ceil.f32(float [[X:%.*]])
232+
; CHECK-NEXT: [[RET:%.*]] = fcmp olt float [[X]], [[CEIL]]
233+
; CHECK-NEXT: ret i1 [[RET]]
234+
;
235+
%ceil = call float @llvm.ceil.f32(float %x)
236+
%ret = fcmp olt float %x, %ceil
237+
ret i1 %ret
238+
}
239+
240+
define i1 @x_ceil_oeq_neg(float %x) {
241+
; CHECK-LABEL: @x_ceil_oeq_neg(
242+
; CHECK-NEXT: [[CEIL:%.*]] = call float @llvm.ceil.f32(float [[X:%.*]])
243+
; CHECK-NEXT: [[RET:%.*]] = fcmp oeq float [[X]], [[CEIL]]
244+
; CHECK-NEXT: ret i1 [[RET]]
245+
;
246+
%ceil = call float @llvm.ceil.f32(float %x)
247+
%ret = fcmp oeq float %x, %ceil
248+
ret i1 %ret
249+
}
250+
251+
define i1 @x_ceil_one_neg(float %x) {
252+
; CHECK-LABEL: @x_ceil_one_neg(
253+
; CHECK-NEXT: [[CEIL:%.*]] = call float @llvm.ceil.f32(float [[X:%.*]])
254+
; CHECK-NEXT: [[RET:%.*]] = fcmp one float [[X]], [[CEIL]]
255+
; CHECK-NEXT: ret i1 [[RET]]
256+
;
257+
%ceil = call float @llvm.ceil.f32(float %x)
258+
%ret = fcmp one float %x, %ceil
259+
ret i1 %ret
260+
}

0 commit comments

Comments
 (0)