Skip to content

Commit b7b3d17

Browse files
authored
[InstSimplify] Implement simple folds for ucmp/scmp intrinsics (#95601)
This patch adds folds for the cases where both operands are the same or where it can be established that the first operand is less than, equal to, or greater than the second operand.
1 parent e4e350e commit b7b3d17

File tree

2 files changed

+225
-0
lines changed

2 files changed

+225
-0
lines changed

llvm/lib/Analysis/InstructionSimplify.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6504,6 +6504,25 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
65046504

65056505
break;
65066506
}
6507+
case Intrinsic::scmp:
6508+
case Intrinsic::ucmp: {
6509+
// Fold to a constant if the relationship between operands can be
6510+
// established with certainty
6511+
if (isICmpTrue(CmpInst::ICMP_EQ, Op0, Op1, Q, RecursionLimit))
6512+
return Constant::getNullValue(ReturnType);
6513+
6514+
ICmpInst::Predicate PredGT =
6515+
IID == Intrinsic::scmp ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
6516+
if (isICmpTrue(PredGT, Op0, Op1, Q, RecursionLimit))
6517+
return ConstantInt::get(ReturnType, 1);
6518+
6519+
ICmpInst::Predicate PredLT =
6520+
IID == Intrinsic::scmp ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
6521+
if (isICmpTrue(PredLT, Op0, Op1, Q, RecursionLimit))
6522+
return ConstantInt::getSigned(ReturnType, -1);
6523+
6524+
break;
6525+
}
65076526
case Intrinsic::usub_with_overflow:
65086527
case Intrinsic::ssub_with_overflow:
65096528
// X - X -> { 0, false }

llvm/test/Transforms/InstSimplify/uscmp.ll

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,3 +96,209 @@ define <4 x i8> @scmp_nonsplat() {
9696
%1 = call <4 x i8> @llvm.scmp(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 -1, i32 1, i32 -2, i32 4>)
9797
ret <4 x i8> %1
9898
}
99+
100+
define i8 @scmp_with_itself(i32 %x) {
101+
; CHECK-LABEL: define i8 @scmp_with_itself(
102+
; CHECK-SAME: i32 [[X:%.*]]) {
103+
; CHECK-NEXT: ret i8 0
104+
;
105+
%1 = call i8 @llvm.scmp(i32 %x, i32 %x)
106+
ret i8 %1
107+
}
108+
109+
define <4 x i8> @ucmp_vec_with_itself(<4 x i32> %x) {
110+
; CHECK-LABEL: define <4 x i8> @ucmp_vec_with_itself(
111+
; CHECK-SAME: <4 x i32> [[X:%.*]]) {
112+
; CHECK-NEXT: ret <4 x i8> zeroinitializer
113+
;
114+
%1 = call <4 x i8> @llvm.scmp(<4 x i32> %x, <4 x i32> %x)
115+
ret <4 x i8> %1
116+
}
117+
118+
define i8 @scmp_known_gt(i32 %x, i32 %y) {
119+
; CHECK-LABEL: define i8 @scmp_known_gt(
120+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
121+
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X]], [[Y]]
122+
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
123+
; CHECK-NEXT: ret i8 1
124+
;
125+
%1 = icmp sgt i32 %x, %y
126+
call void @llvm.assume(i1 %1)
127+
128+
%2 = call i8 @llvm.scmp(i32 %x, i32 %y)
129+
ret i8 %2
130+
}
131+
132+
define i8 @scmp_known_eq(i32 %x, i32 %y) {
133+
; CHECK-LABEL: define i8 @scmp_known_eq(
134+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
135+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], [[Y]]
136+
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
137+
; CHECK-NEXT: ret i8 0
138+
;
139+
%1 = icmp eq i32 %x, %y
140+
call void @llvm.assume(i1 %1)
141+
142+
%2 = call i8 @llvm.scmp(i32 %x, i32 %y)
143+
ret i8 %2
144+
}
145+
146+
define i8 @scmp_known_lt(i32 %x, i32 %y) {
147+
; CHECK-LABEL: define i8 @scmp_known_lt(
148+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
149+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X]], [[Y]]
150+
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
151+
; CHECK-NEXT: ret i8 -1
152+
;
153+
%1 = icmp slt i32 %x, %y
154+
call void @llvm.assume(i1 %1)
155+
156+
%2 = call i8 @llvm.scmp(i32 %x, i32 %y)
157+
ret i8 %2
158+
}
159+
160+
define i8 @ucmp_known_gt(i32 %x, i32 %y) {
161+
; CHECK-LABEL: define i8 @ucmp_known_gt(
162+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
163+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X]], [[Y]]
164+
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
165+
; CHECK-NEXT: ret i8 1
166+
;
167+
%1 = icmp ugt i32 %x, %y
168+
call void @llvm.assume(i1 %1)
169+
170+
%2 = call i8 @llvm.ucmp(i32 %x, i32 %y)
171+
ret i8 %2
172+
}
173+
174+
define i8 @ucmp_known_eq(i32 %x, i32 %y) {
175+
; CHECK-LABEL: define i8 @ucmp_known_eq(
176+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
177+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], [[Y]]
178+
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
179+
; CHECK-NEXT: ret i8 0
180+
;
181+
%1 = icmp eq i32 %x, %y
182+
call void @llvm.assume(i1 %1)
183+
184+
%2 = call i8 @llvm.ucmp(i32 %x, i32 %y)
185+
ret i8 %2
186+
}
187+
188+
define i8 @ucmp_known_lt(i32 %x, i32 %y) {
189+
; CHECK-LABEL: define i8 @ucmp_known_lt(
190+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
191+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X]], [[Y]]
192+
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
193+
; CHECK-NEXT: ret i8 -1
194+
;
195+
%1 = icmp ult i32 %x, %y
196+
call void @llvm.assume(i1 %1)
197+
198+
%2 = call i8 @llvm.ucmp(i32 %x, i32 %y)
199+
ret i8 %2
200+
}
201+
202+
define i8 @ucmp_with_addition(i32 %x) {
203+
; CHECK-LABEL: define i8 @ucmp_with_addition(
204+
; CHECK-SAME: i32 [[X:%.*]]) {
205+
; CHECK-NEXT: ret i8 -1
206+
;
207+
%1 = add nuw i32 %x, 1
208+
%2 = call i8 @llvm.ucmp(i32 %x, i32 %1)
209+
ret i8 %2
210+
}
211+
212+
define i8 @ucmp_with_addition2(i32 %x) {
213+
; CHECK-LABEL: define i8 @ucmp_with_addition2(
214+
; CHECK-SAME: i32 [[X:%.*]]) {
215+
; CHECK-NEXT: ret i8 1
216+
;
217+
%1 = add nuw i32 %x, 1
218+
%2 = call i8 @llvm.ucmp(i32 %1, i32 %x)
219+
ret i8 %2
220+
}
221+
222+
define <4 x i8> @ucmp_with_addition_vec(<4 x i32> %x) {
223+
; CHECK-LABEL: define <4 x i8> @ucmp_with_addition_vec(
224+
; CHECK-SAME: <4 x i32> [[X:%.*]]) {
225+
; CHECK-NEXT: ret <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>
226+
;
227+
%1 = add nuw <4 x i32> %x, splat(i32 1)
228+
%2 = call <4 x i8> @llvm.ucmp(<4 x i32> %x, <4 x i32> %1)
229+
ret <4 x i8> %2
230+
}
231+
232+
; Negative case: mismatched signedness of predicates
233+
define i8 @scmp_known_ugt(i32 %x, i32 %y) {
234+
; CHECK-LABEL: define i8 @scmp_known_ugt(
235+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
236+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X]], [[Y]]
237+
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
238+
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[Y]])
239+
; CHECK-NEXT: ret i8 [[TMP2]]
240+
;
241+
%1 = icmp ugt i32 %x, %y
242+
call void @llvm.assume(i1 %1)
243+
244+
%2 = call i8 @llvm.scmp(i32 %x, i32 %y)
245+
ret i8 %2
246+
}
247+
248+
define i8 @scmp_known_ult(i32 %x, i32 %y) {
249+
; CHECK-LABEL: define i8 @scmp_known_ult(
250+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
251+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X]], [[Y]]
252+
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
253+
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[Y]])
254+
; CHECK-NEXT: ret i8 [[TMP2]]
255+
;
256+
%1 = icmp ult i32 %x, %y
257+
call void @llvm.assume(i1 %1)
258+
259+
%2 = call i8 @llvm.scmp(i32 %x, i32 %y)
260+
ret i8 %2
261+
}
262+
263+
define i8 @ucmp_known_sgt(i32 %x, i32 %y) {
264+
; CHECK-LABEL: define i8 @ucmp_known_sgt(
265+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
266+
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X]], [[Y]]
267+
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
268+
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
269+
; CHECK-NEXT: ret i8 [[TMP2]]
270+
;
271+
%1 = icmp sgt i32 %x, %y
272+
call void @llvm.assume(i1 %1)
273+
274+
%2 = call i8 @llvm.ucmp(i32 %x, i32 %y)
275+
ret i8 %2
276+
}
277+
278+
define i8 @ucmp_known_slt(i32 %x, i32 %y) {
279+
; CHECK-LABEL: define i8 @ucmp_known_slt(
280+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
281+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X]], [[Y]]
282+
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
283+
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
284+
; CHECK-NEXT: ret i8 [[TMP2]]
285+
;
286+
%1 = icmp slt i32 %x, %y
287+
call void @llvm.assume(i1 %1)
288+
289+
%2 = call i8 @llvm.ucmp(i32 %x, i32 %y)
290+
ret i8 %2
291+
}
292+
293+
; Negative case: no nuw flag
294+
define i8 @ucmp_with_addition_no_nuw(i32 %x) {
295+
; CHECK-LABEL: define i8 @ucmp_with_addition_no_nuw(
296+
; CHECK-SAME: i32 [[X:%.*]]) {
297+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], 1
298+
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[TMP1]])
299+
; CHECK-NEXT: ret i8 [[TMP2]]
300+
;
301+
%1 = add i32 %x, 1
302+
%2 = call i8 @llvm.ucmp(i32 %x, i32 %1)
303+
ret i8 %2
304+
}

0 commit comments

Comments
 (0)