Skip to content

Commit d68d217

Browse files
Poseydon42nikic
andauthored
[InstCombine] Fold ucmp/scmp(x, y) >> N to zext/sext(x < y) when N is one less than the width of the result of ucmp/scmp (#104009)
Proof: https://alive2.llvm.org/ce/z/4diUqN --------- Co-authored-by: Nikita Popov <[email protected]>
1 parent dd3f131 commit d68d217

File tree

2 files changed

+122
-0
lines changed

2 files changed

+122
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,21 @@ Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) {
511511
if (match(Op1, m_Or(m_Value(), m_SpecificInt(BitWidth - 1))))
512512
return replaceOperand(I, 1, ConstantInt::get(Ty, BitWidth - 1));
513513

514+
Instruction *CmpIntr;
515+
if ((I.getOpcode() == Instruction::LShr ||
516+
I.getOpcode() == Instruction::AShr) &&
517+
match(Op0, m_OneUse(m_Instruction(CmpIntr))) &&
518+
isa<CmpIntrinsic>(CmpIntr) &&
519+
match(Op1, m_SpecificInt(Ty->getScalarSizeInBits() - 1))) {
520+
Value *Cmp =
521+
Builder.CreateICmp(cast<CmpIntrinsic>(CmpIntr)->getLTPredicate(),
522+
CmpIntr->getOperand(0), CmpIntr->getOperand(1));
523+
return CastInst::Create(I.getOpcode() == Instruction::LShr
524+
? Instruction::ZExt
525+
: Instruction::SExt,
526+
Cmp, Ty);
527+
}
528+
514529
return nullptr;
515530
}
516531

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3+
4+
declare void @use(i8 %val)
5+
6+
; ucmp/scmp(x, y) >> N folds to either zext(x < y) or sext(x < y)
7+
; if N is one less than the width of result of ucmp/scmp
8+
define i8 @ucmp_to_zext(i32 %x, i32 %y) {
9+
; CHECK-LABEL: define i8 @ucmp_to_zext(
10+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
11+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X]], [[Y]]
12+
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i8
13+
; CHECK-NEXT: ret i8 [[TMP2]]
14+
;
15+
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
16+
%2 = lshr i8 %1, 7
17+
ret i8 %2
18+
}
19+
20+
define i8 @ucmp_to_sext(i32 %x, i32 %y) {
21+
; CHECK-LABEL: define i8 @ucmp_to_sext(
22+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
23+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X]], [[Y]]
24+
; CHECK-NEXT: [[TMP2:%.*]] = sext i1 [[TMP1]] to i8
25+
; CHECK-NEXT: ret i8 [[TMP2]]
26+
;
27+
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
28+
%2 = ashr i8 %1, 7
29+
ret i8 %2
30+
}
31+
32+
define i8 @scmp_to_zext(i32 %x, i32 %y) {
33+
; CHECK-LABEL: define i8 @scmp_to_zext(
34+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
35+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X]], [[Y]]
36+
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i8
37+
; CHECK-NEXT: ret i8 [[TMP2]]
38+
;
39+
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
40+
%2 = lshr i8 %1, 7
41+
ret i8 %2
42+
}
43+
44+
define i8 @scmp_to_sext(i32 %x, i32 %y) {
45+
; CHECK-LABEL: define i8 @scmp_to_sext(
46+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
47+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X]], [[Y]]
48+
; CHECK-NEXT: [[TMP2:%.*]] = sext i1 [[TMP1]] to i8
49+
; CHECK-NEXT: ret i8 [[TMP2]]
50+
;
51+
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
52+
%2 = ashr i8 %1, 7
53+
ret i8 %2
54+
}
55+
56+
define <4 x i8> @scmp_to_sext_vec(<4 x i32> %x, <4 x i32> %y) {
57+
; CHECK-LABEL: define <4 x i8> @scmp_to_sext_vec(
58+
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
59+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X]], [[Y]]
60+
; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i8>
61+
; CHECK-NEXT: ret <4 x i8> [[TMP2]]
62+
;
63+
%1 = call <4 x i8> @llvm.scmp(<4 x i32> %x, <4 x i32> %y)
64+
%2 = ashr <4 x i8> %1, <i8 7, i8 7, i8 7, i8 7>
65+
ret <4 x i8> %2
66+
}
67+
68+
; Negative test: incorrect shift amount
69+
define i8 @ucmp_to_zext_neg1(i32 %x, i32 %y) {
70+
; CHECK-LABEL: define i8 @ucmp_to_zext_neg1(
71+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
72+
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
73+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], 5
74+
; CHECK-NEXT: ret i8 [[TMP2]]
75+
;
76+
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
77+
%2 = lshr i8 %1, 5
78+
ret i8 %2
79+
}
80+
81+
; Negative test: shift amount is not a constant
82+
define i8 @ucmp_to_zext_neg2(i32 %x, i32 %y, i8 %s) {
83+
; CHECK-LABEL: define i8 @ucmp_to_zext_neg2(
84+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], i8 [[S:%.*]]) {
85+
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
86+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], [[S]]
87+
; CHECK-NEXT: ret i8 [[TMP2]]
88+
;
89+
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
90+
%2 = lshr i8 %1, %s
91+
ret i8 %2
92+
}
93+
94+
; Negative test: the result of ucmp/scmp is used more than once
95+
define i8 @ucmp_to_zext_neg3(i32 %x, i32 %y) {
96+
; CHECK-LABEL: define i8 @ucmp_to_zext_neg3(
97+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
98+
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
99+
; CHECK-NEXT: call void @use(i8 [[TMP1]])
100+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], 7
101+
; CHECK-NEXT: ret i8 [[TMP2]]
102+
;
103+
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
104+
call void @use(i8 %1)
105+
%2 = lshr i8 %1, 7
106+
ret i8 %2
107+
}

0 commit comments

Comments
 (0)