Skip to content

Commit ff36411

Browse files
authored
[InstCombine] Use zext's nneg flag for icmp folding (#70845)
This PR fixes #55013 : the max intrinsics is not generated for this simple loop case : https://godbolt.org/z/hxz1xhMPh. This is caused by a ICMP not being folded into a select, thus not generating the max intrinsics. For the story : Since LLVM 14, SCCP pass got smarter by folding sext into zext for positive ranges : https://reviews.llvm.org/D81756. After this change, InstCombine was sometimes unable to fold ICMP correctly as both of the arguments pointed to mismatched zext/sext. To fix this, @rotateright implemented this fix : https://reviews.llvm.org/D124419 that tries to resolve the mismatch by knowing if the argument of a zext is positive (in which case, it is like a sext) by using ValueTracking, however ValueTracking is not smart enough to infer that the value is positive in some cases. Recently, @nikic implemented #67982 which keeps the information that a zext is non-negative. This PR simply uses this information to do the folding accordingly. TLDR : This PR uses the recent nneg tag on zext to fold the icmp accordingly in instcombine. This PR also contains test cases for sext/zext folding with InstCombine as well as a x86 regression tests for the max/min case.
1 parent d05bada commit ff36411

File tree

3 files changed

+242
-5
lines changed

3 files changed

+242
-5
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5554,11 +5554,16 @@ Instruction *InstCombinerImpl::foldICmpWithZextOrSext(ICmpInst &ICmp) {
55545554
return new ICmpInst(ICmp.getPredicate(), Builder.CreateOr(X, Y),
55555555
Constant::getNullValue(X->getType()));
55565556

5557-
// If we have mismatched casts, treat the zext of a non-negative source as
5558-
// a sext to simulate matching casts. Otherwise, we are done.
5559-
// TODO: Can we handle some predicates (equality) without non-negative?
5560-
if ((IsZext0 && isKnownNonNegative(X, DL, 0, &AC, &ICmp, &DT)) ||
5561-
(IsZext1 && isKnownNonNegative(Y, DL, 0, &AC, &ICmp, &DT)))
5557+
// If we have mismatched casts and zext has the nneg flag, we can
5558+
// treat the "zext nneg" as "sext". Otherwise, we cannot fold and quit.
5559+
5560+
auto *NonNegInst0 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(0));
5561+
auto *NonNegInst1 = dyn_cast<PossiblyNonNegInst>(ICmp.getOperand(1));
5562+
5563+
bool IsNonNeg0 = NonNegInst0 && NonNegInst0->hasNonNeg();
5564+
bool IsNonNeg1 = NonNegInst1 && NonNegInst1->hasNonNeg();
5565+
5566+
if ((IsZext0 && IsNonNeg0) || (IsZext1 && IsNonNeg1))
55625567
IsSignedExt = true;
55635568
else
55645569
return nullptr;

llvm/test/Transforms/InstCombine/icmp-ext-ext.ll

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,17 @@ define i1 @zext_sext_sgt(i8 %x, i8 %y) {
130130
ret i1 %c
131131
}
132132

133+
define i1 @zext_nneg_sext_sgt(i8 %x, i8 %y) {
134+
; CHECK-LABEL: @zext_nneg_sext_sgt(
135+
; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]]
136+
; CHECK-NEXT: ret i1 [[C]]
137+
;
138+
%a = zext nneg i8 %x to i32
139+
%b = sext i8 %y to i32
140+
%c = icmp sgt i32 %a, %b
141+
ret i1 %c
142+
}
143+
133144
define i1 @zext_sext_ugt(i8 %x, i8 %y) {
134145
; CHECK-LABEL: @zext_sext_ugt(
135146
; CHECK-NEXT: [[A:%.*]] = zext i8 [[X:%.*]] to i32
@@ -143,6 +154,18 @@ define i1 @zext_sext_ugt(i8 %x, i8 %y) {
143154
ret i1 %c
144155
}
145156

157+
158+
define i1 @zext_nneg_sext_ugt(i8 %x, i8 %y) {
159+
; CHECK-LABEL: @zext_nneg_sext_ugt(
160+
; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X:%.*]], [[Y:%.*]]
161+
; CHECK-NEXT: ret i1 [[C]]
162+
;
163+
%a = zext nneg i8 %x to i32
164+
%b = sext i8 %y to i32
165+
%c = icmp ugt i32 %a, %b
166+
ret i1 %c
167+
}
168+
146169
define i1 @zext_sext_eq(i8 %x, i8 %y) {
147170
; CHECK-LABEL: @zext_sext_eq(
148171
; CHECK-NEXT: [[A:%.*]] = zext i8 [[X:%.*]] to i32
@@ -156,6 +179,18 @@ define i1 @zext_sext_eq(i8 %x, i8 %y) {
156179
ret i1 %c
157180
}
158181

182+
define i1 @zext_nneg_sext_eq(i8 %x, i8 %y) {
183+
; CHECK-LABEL: @zext_nneg_sext_eq(
184+
; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
185+
; CHECK-NEXT: ret i1 [[C]]
186+
;
187+
%a = zext nneg i8 %x to i32
188+
%b = sext i8 %y to i32
189+
%c = icmp eq i32 %a, %b
190+
ret i1 %c
191+
}
192+
193+
159194
define i1 @zext_sext_sle_op0_narrow(i8 %x, i16 %y) {
160195
; CHECK-LABEL: @zext_sext_sle_op0_narrow(
161196
; CHECK-NEXT: [[A:%.*]] = zext i8 [[X:%.*]] to i32
@@ -169,6 +204,19 @@ define i1 @zext_sext_sle_op0_narrow(i8 %x, i16 %y) {
169204
ret i1 %c
170205
}
171206

207+
208+
define i1 @zext_nneg_sext_sle_op0_narrow(i8 %x, i16 %y) {
209+
; CHECK-LABEL: @zext_nneg_sext_sle_op0_narrow(
210+
; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X:%.*]] to i16
211+
; CHECK-NEXT: [[C:%.*]] = icmp sle i16 [[TMP1]], [[Y:%.*]]
212+
; CHECK-NEXT: ret i1 [[C]]
213+
;
214+
%a = zext nneg i8 %x to i32
215+
%b = sext i16 %y to i32
216+
%c = icmp sle i32 %a, %b
217+
ret i1 %c
218+
}
219+
172220
define i1 @zext_sext_ule_op0_wide(i9 %x, i8 %y) {
173221
; CHECK-LABEL: @zext_sext_ule_op0_wide(
174222
; CHECK-NEXT: [[A:%.*]] = zext i9 [[X:%.*]] to i32
@@ -182,6 +230,18 @@ define i1 @zext_sext_ule_op0_wide(i9 %x, i8 %y) {
182230
ret i1 %c
183231
}
184232

233+
define i1 @zext_nneg_sext_ule_op0_wide(i9 %x, i8 %y) {
234+
; CHECK-LABEL: @zext_nneg_sext_ule_op0_wide(
235+
; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[Y:%.*]] to i9
236+
; CHECK-NEXT: [[C:%.*]] = icmp uge i9 [[TMP1]], [[X:%.*]]
237+
; CHECK-NEXT: ret i1 [[C]]
238+
;
239+
%a = zext nneg i9 %x to i32
240+
%b = sext i8 %y to i32
241+
%c = icmp ule i32 %a, %b
242+
ret i1 %c
243+
}
244+
185245
define i1 @sext_zext_slt(i8 %x, i8 %y) {
186246
; CHECK-LABEL: @sext_zext_slt(
187247
; CHECK-NEXT: [[A:%.*]] = sext i8 [[X:%.*]] to i32
@@ -195,6 +255,18 @@ define i1 @sext_zext_slt(i8 %x, i8 %y) {
195255
ret i1 %c
196256
}
197257

258+
259+
define i1 @sext_zext_nneg_slt(i8 %x, i8 %y) {
260+
; CHECK-LABEL: @sext_zext_nneg_slt(
261+
; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X:%.*]], [[Y:%.*]]
262+
; CHECK-NEXT: ret i1 [[C]]
263+
;
264+
%a = sext i8 %x to i32
265+
%b = zext nneg i8 %y to i32
266+
%c = icmp slt i32 %a, %b
267+
ret i1 %c
268+
}
269+
198270
define i1 @sext_zext_ult(i8 %x, i8 %y) {
199271
; CHECK-LABEL: @sext_zext_ult(
200272
; CHECK-NEXT: [[A:%.*]] = sext i8 [[X:%.*]] to i32
@@ -208,6 +280,17 @@ define i1 @sext_zext_ult(i8 %x, i8 %y) {
208280
ret i1 %c
209281
}
210282

283+
define i1 @sext_zext_nneg_ult(i8 %x, i8 %y) {
284+
; CHECK-LABEL: @sext_zext_nneg_ult(
285+
; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]]
286+
; CHECK-NEXT: ret i1 [[C]]
287+
;
288+
%a = sext i8 %x to i32
289+
%b = zext nneg i8 %y to i32
290+
%c = icmp ult i32 %a, %b
291+
ret i1 %c
292+
}
293+
211294
define <2 x i1> @sext_zext_ne(<2 x i8> %x, <2 x i8> %y) {
212295
; CHECK-LABEL: @sext_zext_ne(
213296
; CHECK-NEXT: [[A:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32>
@@ -221,6 +304,18 @@ define <2 x i1> @sext_zext_ne(<2 x i8> %x, <2 x i8> %y) {
221304
ret <2 x i1> %c
222305
}
223306

307+
308+
define <2 x i1> @sext_zext_nneg_ne(<2 x i8> %x, <2 x i8> %y) {
309+
; CHECK-LABEL: @sext_zext_nneg_ne(
310+
; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[X:%.*]], [[Y:%.*]]
311+
; CHECK-NEXT: ret <2 x i1> [[C]]
312+
;
313+
%a = sext <2 x i8> %x to <2 x i32>
314+
%b = zext nneg <2 x i8> %y to <2 x i32>
315+
%c = icmp ne <2 x i32> %a, %b
316+
ret <2 x i1> %c
317+
}
318+
224319
define i1 @sext_zext_sge_op0_narrow(i5 %x, i8 %y) {
225320
; CHECK-LABEL: @sext_zext_sge_op0_narrow(
226321
; CHECK-NEXT: [[A:%.*]] = sext i5 [[X:%.*]] to i32
@@ -234,6 +329,19 @@ define i1 @sext_zext_sge_op0_narrow(i5 %x, i8 %y) {
234329
ret i1 %c
235330
}
236331

332+
333+
define i1 @sext_zext_nneg_sge_op0_narrow(i5 %x, i8 %y) {
334+
; CHECK-LABEL: @sext_zext_nneg_sge_op0_narrow(
335+
; CHECK-NEXT: [[TMP1:%.*]] = sext i5 [[X:%.*]] to i8
336+
; CHECK-NEXT: [[C:%.*]] = icmp sge i8 [[TMP1]], [[Y:%.*]]
337+
; CHECK-NEXT: ret i1 [[C]]
338+
;
339+
%a = sext i5 %x to i32
340+
%b = zext nneg i8 %y to i32
341+
%c = icmp sge i32 %a, %b
342+
ret i1 %c
343+
}
344+
237345
define i1 @sext_zext_uge_op0_wide(i16 %x, i8 %y) {
238346
; CHECK-LABEL: @sext_zext_uge_op0_wide(
239347
; CHECK-NEXT: [[A:%.*]] = sext i16 [[X:%.*]] to i32
@@ -247,6 +355,19 @@ define i1 @sext_zext_uge_op0_wide(i16 %x, i8 %y) {
247355
ret i1 %c
248356
}
249357

358+
359+
define i1 @sext_zext_nneg_uge_op0_wide(i16 %x, i8 %y) {
360+
; CHECK-LABEL: @sext_zext_nneg_uge_op0_wide(
361+
; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[Y:%.*]] to i16
362+
; CHECK-NEXT: [[C:%.*]] = icmp ule i16 [[TMP1]], [[X:%.*]]
363+
; CHECK-NEXT: ret i1 [[C]]
364+
;
365+
%a = sext i16 %x to i32
366+
%b = zext nneg i8 %y to i32
367+
%c = icmp uge i32 %a, %b
368+
ret i1 %c
369+
}
370+
250371
define i1 @zext_sext_sgt_known_nonneg(i8 %x, i8 %y) {
251372
; CHECK-LABEL: @zext_sext_sgt_known_nonneg(
252373
; CHECK-NEXT: [[N:%.*]] = udiv i8 127, [[X:%.*]]
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
; RUN: opt < %s -O3 -S | FileCheck %s
2+
; See issue #55013 and PR #70845 for more details.
3+
; This test comes from the following C program, compiled with clang
4+
;
5+
;; short vecreduce_smin_v2i16(int n, short* v)
6+
;; {
7+
;; short p = 0;
8+
;; for (int i = 0; i < n; ++i)
9+
;; p = p > v[i] ? v[i] : p;
10+
;; return p;
11+
;; }
12+
;
13+
;; short vecreduce_smax_v2i16(int n, short* v)
14+
;; {
15+
;; short p = 0;
16+
;; for (int i = 0; i < n; ++i)
17+
;; p = p < v[i] ? v[i] : p;
18+
;; return p;
19+
;; }
20+
21+
define i16 @vecreduce_smin_v2i16(i32 %n, ptr %v) {
22+
; CHECK-LABEL: define i16 @vecreduce_smin_v2i16(
23+
; CHECK: @llvm.smin.v2i16
24+
25+
entry:
26+
br label %for.cond
27+
28+
for.cond: ; preds = %for.inc, %entry
29+
%p.0 = phi i16 [ 0, %entry ], [ %conv8, %for.inc ]
30+
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
31+
%cmp = icmp slt i32 %i.0, %n
32+
br i1 %cmp, label %for.body, label %for.end
33+
34+
for.body: ; preds = %for.cond
35+
%conv = sext i16 %p.0 to i32
36+
%idxprom = sext i32 %i.0 to i64
37+
%arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom
38+
%0 = load i16, ptr %arrayidx, align 2
39+
%conv1 = sext i16 %0 to i32
40+
%cmp2 = icmp sgt i32 %conv, %conv1
41+
br i1 %cmp2, label %cond.true, label %cond.false
42+
43+
cond.true: ; preds = %for.body
44+
%idxprom4 = sext i32 %i.0 to i64
45+
%arrayidx5 = getelementptr inbounds i16, ptr %v, i64 %idxprom4
46+
%1 = load i16, ptr %arrayidx5, align 2
47+
%conv6 = sext i16 %1 to i32
48+
br label %cond.end
49+
50+
cond.false: ; preds = %for.body
51+
%conv7 = sext i16 %p.0 to i32
52+
br label %cond.end
53+
54+
cond.end: ; preds = %cond.false, %cond.true
55+
%cond = phi i32 [ %conv6, %cond.true ], [ %conv7, %cond.false ]
56+
%conv8 = trunc i32 %cond to i16
57+
br label %for.inc
58+
59+
for.inc: ; preds = %cond.end
60+
%inc = add nsw i32 %i.0, 1
61+
br label %for.cond
62+
63+
for.end: ; preds = %for.cond
64+
ret i16 %p.0
65+
}
66+
67+
define i16 @vecreduce_smax_v2i16(i32 %n, ptr %v) {
68+
; CHECK-LABEL: define i16 @vecreduce_smax_v2i16(
69+
; CHECK: @llvm.smax.v2i16
70+
71+
entry:
72+
br label %for.cond
73+
74+
for.cond: ; preds = %for.inc, %entry
75+
%p.0 = phi i16 [ 0, %entry ], [ %conv8, %for.inc ]
76+
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
77+
%cmp = icmp slt i32 %i.0, %n
78+
br i1 %cmp, label %for.body, label %for.end
79+
80+
for.body: ; preds = %for.cond
81+
%conv = sext i16 %p.0 to i32
82+
%idxprom = sext i32 %i.0 to i64
83+
%arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom
84+
%0 = load i16, ptr %arrayidx, align 2
85+
%conv1 = sext i16 %0 to i32
86+
%cmp2 = icmp slt i32 %conv, %conv1
87+
br i1 %cmp2, label %cond.true, label %cond.false
88+
89+
cond.true: ; preds = %for.body
90+
%idxprom4 = sext i32 %i.0 to i64
91+
%arrayidx5 = getelementptr inbounds i16, ptr %v, i64 %idxprom4
92+
%1 = load i16, ptr %arrayidx5, align 2
93+
%conv6 = sext i16 %1 to i32
94+
br label %cond.end
95+
96+
cond.false: ; preds = %for.body
97+
%conv7 = sext i16 %p.0 to i32
98+
br label %cond.end
99+
100+
cond.end: ; preds = %cond.false, %cond.true
101+
%cond = phi i32 [ %conv6, %cond.true ], [ %conv7, %cond.false ]
102+
%conv8 = trunc i32 %cond to i16
103+
br label %for.inc
104+
105+
for.inc: ; preds = %cond.end
106+
%inc = add nsw i32 %i.0, 1
107+
br label %for.cond
108+
109+
for.end: ; preds = %for.cond
110+
ret i16 %p.0
111+
}

0 commit comments

Comments
 (0)