Skip to content

Commit 4028dd2

Browse files
authored
[InstSimplify] Fold converted urem to 0 if there's no overlapping bits (#71528)
When folding urem instructions we can end up not recognizing that the output will always be 0 due to Value*s being different, despite generating the same data (in this case, 2 different calls to vscale). This patch recognizes the (x << N) & (add (x << M), -1) pattern that instcombine replaces urem with after the two vscale calls have been reduced to one via CSE, then replaces with 0 when x is a power of 2 and N >= M.
1 parent 32c3dec commit 4028dd2

File tree

2 files changed

+78
-14
lines changed

2 files changed

+78
-14
lines changed

llvm/lib/Analysis/InstructionSimplify.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2028,6 +2028,16 @@ static Value *simplifyAndCommutative(Value *Op0, Value *Op1,
20282028
isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
20292029
return Constant::getNullValue(Op1->getType());
20302030

2031+
// (x << N) & ((x << M) - 1) --> 0, where x is known to be a power of 2 and
2032+
// M <= N.
2033+
const APInt *Shift1, *Shift2;
2034+
if (match(Op0, m_Shl(m_Value(X), m_APInt(Shift1))) &&
2035+
match(Op1, m_Add(m_Shl(m_Specific(X), m_APInt(Shift2)), m_AllOnes())) &&
2036+
isKnownToBeAPowerOfTwo(X, Q.DL, /*OrZero*/ true, /*Depth*/ 0, Q.AC,
2037+
Q.CxtI) &&
2038+
Shift1->uge(*Shift2))
2039+
return Constant::getNullValue(Op0->getType());
2040+
20312041
if (Value *V =
20322042
simplifyAndOrWithICmpEq(Instruction::And, Op0, Op1, Q, MaxRecurse))
20332043
return V;

llvm/test/Transforms/InstSimplify/po2-shift-add-and-to-zero.ll

Lines changed: 68 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,7 @@ define i64 @f1() #0 {
3535
; CHECK-LABEL: define i64 @f1
3636
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
3737
; CHECK-NEXT: entry:
38-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
39-
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
40-
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 3
41-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], -1
42-
; CHECK-NEXT: [[REM:%.*]] = and i64 [[TMP1]], [[TMP3]]
43-
; CHECK-NEXT: ret i64 [[REM]]
38+
; CHECK-NEXT: ret i64 0
4439
;
4540
entry:
4641
%0 = call i64 @llvm.vscale.i64()
@@ -55,24 +50,19 @@ entry:
5550
define i64 @test_pow2_or_zero(i64 %arg) {
5651
; CHECK-LABEL: define i64 @test_pow2_or_zero
5752
; CHECK-SAME: (i64 [[ARG:%.*]]) {
58-
; CHECK-NEXT: [[NEG:%.*]] = sub i64 0, [[ARG]]
59-
; CHECK-NEXT: [[X:%.*]] = and i64 [[NEG]], [[ARG]]
60-
; CHECK-NEXT: [[SHL1:%.*]] = shl i64 [[X]], 4
61-
; CHECK-NEXT: [[SHL2:%.*]] = shl i64 [[X]], 3
62-
; CHECK-NEXT: [[MASK:%.*]] = add i64 [[SHL2]], -1
63-
; CHECK-NEXT: [[REM:%.*]] = and i64 [[SHL1]], [[MASK]]
64-
; CHECK-NEXT: ret i64 [[REM]]
53+
; CHECK-NEXT: ret i64 0
6554
;
6655
%neg = sub i64 0, %arg
6756
%x = and i64 %neg, %arg
6857
%shl1 = shl i64 %x, 4
6958
%shl2 = shl i64 %x, 3
7059
%mask = add i64 %shl2, -1
71-
%rem = and i64 %shl1, %mask
60+
%rem = and i64 %mask, %shl1
7261
ret i64 %rem
7362
}
7463

7564
;; Make sure it doesn't work if the value isn't known to be a power of 2.
65+
;; In this case a vscale without a `vscale_range` attribute on the function.
7666
define i64 @no_pow2() {
7767
; CHECK-LABEL: define i64 @no_pow2() {
7868
; CHECK-NEXT: entry:
@@ -92,6 +82,70 @@ entry:
9282
ret i64 %rem
9383
}
9484

85+
;; Make sure it doesn't work if the shift on the -1 side is greater
86+
define i64 @minus_shift_greater(i64 %arg) {
87+
; CHECK-LABEL: define i64 @minus_shift_greater
88+
; CHECK-SAME: (i64 [[ARG:%.*]]) {
89+
; CHECK-NEXT: [[NEG:%.*]] = sub i64 0, [[ARG]]
90+
; CHECK-NEXT: [[X:%.*]] = and i64 [[NEG]], [[ARG]]
91+
; CHECK-NEXT: [[SHL1:%.*]] = shl i64 [[X]], 3
92+
; CHECK-NEXT: [[SHL2:%.*]] = shl i64 [[X]], 4
93+
; CHECK-NEXT: [[MASK:%.*]] = add i64 [[SHL2]], -1
94+
; CHECK-NEXT: [[REM:%.*]] = and i64 [[SHL1]], [[MASK]]
95+
; CHECK-NEXT: ret i64 [[REM]]
96+
;
97+
%neg = sub i64 0, %arg
98+
%x = and i64 %neg, %arg
99+
%shl1 = shl i64 %x, 3
100+
%shl2 = shl i64 %x, 4
101+
%mask = add i64 %shl2, -1
102+
%rem = and i64 %shl1, %mask
103+
ret i64 %rem
104+
}
105+
106+
;; Make sure it doesn't work if the subtract isn't one.
107+
define i64 @sub2(i64 %arg) {
108+
; CHECK-LABEL: define i64 @sub2
109+
; CHECK-SAME: (i64 [[ARG:%.*]]) {
110+
; CHECK-NEXT: [[NEG:%.*]] = sub i64 0, [[ARG]]
111+
; CHECK-NEXT: [[X:%.*]] = and i64 [[NEG]], [[ARG]]
112+
; CHECK-NEXT: [[SHL1:%.*]] = shl i64 [[X]], 4
113+
; CHECK-NEXT: [[SHL2:%.*]] = shl i64 [[X]], 3
114+
; CHECK-NEXT: [[MASK:%.*]] = add i64 [[SHL2]], -2
115+
; CHECK-NEXT: [[REM:%.*]] = and i64 [[SHL1]], [[MASK]]
116+
; CHECK-NEXT: ret i64 [[REM]]
117+
;
118+
%neg = sub i64 0, %arg
119+
%x = and i64 %neg, %arg
120+
%shl1 = shl i64 %x, 4
121+
%shl2 = shl i64 %x, 3
122+
%mask = add i64 %shl2, -2
123+
%rem = and i64 %shl1, %mask
124+
ret i64 %rem
125+
}
126+
127+
;; Make sure it doesn't work with a right shift
128+
;; Make sure it doesn't work if the subtract isn't one.
129+
define i64 @rightshift(i64 %arg) {
130+
; CHECK-LABEL: define i64 @rightshift
131+
; CHECK-SAME: (i64 [[ARG:%.*]]) {
132+
; CHECK-NEXT: [[NEG:%.*]] = sub i64 0, [[ARG]]
133+
; CHECK-NEXT: [[X:%.*]] = and i64 [[NEG]], [[ARG]]
134+
; CHECK-NEXT: [[SHL1:%.*]] = shl i64 [[X]], 4
135+
; CHECK-NEXT: [[SHL2:%.*]] = lshr i64 [[X]], 3
136+
; CHECK-NEXT: [[MASK:%.*]] = add i64 [[SHL2]], -1
137+
; CHECK-NEXT: [[REM:%.*]] = and i64 [[SHL1]], [[MASK]]
138+
; CHECK-NEXT: ret i64 [[REM]]
139+
;
140+
%neg = sub i64 0, %arg
141+
%x = and i64 %neg, %arg
142+
%shl1 = shl i64 %x, 4
143+
%shl2 = lshr i64 %x, 3
144+
%mask = add i64 %shl2, -1
145+
%rem = and i64 %shl1, %mask
146+
ret i64 %rem
147+
}
148+
95149
declare i64 @llvm.vscale.i64()
96150

97151
attributes #0 = { vscale_range(1,16) }

0 commit comments

Comments
 (0)