Skip to content

Commit 0f95710

Browse files
committed
[InstCombine] Shift amount reassociation: shl-trunc-shl pattern
Summary: Currently `reassociateShiftAmtsOfTwoSameDirectionShifts()` only handles two shifts one after another. If the shifts are `shl`, we still can easily perform the fold, with no extra legality checks: https://rise4fun.com/Alive/OQbM If we have right-shift however, we won't be able to make it any simpler than it already is. After this the only thing missing here is constant-folding: (`NewShAmt >= bitwidth(X)`) * If it's a logical shift, then constant-fold to `0` (not `undef`) * If it's a `ashr`, then a splat of original signbit https://rise4fun.com/Alive/E1K https://rise4fun.com/Alive/i0V Reviewers: spatel, nikic, xbolva00 Reviewed By: spatel Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65380 llvm-svn: 368059
1 parent 8fbb6ce commit 0f95710

File tree

3 files changed

+89
-68
lines changed

3 files changed

+89
-68
lines changed

llvm/include/llvm/IR/PatternMatch.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,6 +1270,12 @@ inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) {
12701270
return CastClass_match<OpTy, Instruction::ZExt>(Op);
12711271
}
12721272

1273+
template <typename OpTy>
1274+
inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, OpTy>
1275+
m_ZExtOrSelf(const OpTy &Op) {
1276+
return m_CombineOr(m_ZExt(Op), Op);
1277+
}
1278+
12731279
template <typename OpTy>
12741280
inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
12751281
CastClass_match<OpTy, Instruction::SExt>>

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

Lines changed: 66 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -27,42 +27,84 @@ using namespace PatternMatch;
2727
// This is valid for any shift, but they must be identical.
2828
static Instruction *
2929
reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0,
30-
const SimplifyQuery &SQ) {
31-
// Look for: (x shiftopcode ShAmt0) shiftopcode ShAmt1
32-
Value *X, *ShAmt1, *ShAmt0;
30+
const SimplifyQuery &SQ,
31+
InstCombiner::BuilderTy &Builder) {
32+
// Look for a shift of some instruction, ignore zext of shift amount if any.
33+
Instruction *Sh0Op0;
34+
Value *ShAmt0;
35+
if (!match(Sh0,
36+
m_Shift(m_Instruction(Sh0Op0), m_ZExtOrSelf(m_Value(ShAmt0)))))
37+
return nullptr;
38+
39+
// If there is a truncation between the two shifts, we must make note of it
40+
// and look through it. The truncation imposes additional constraints on the
41+
// transform.
3342
Instruction *Sh1;
34-
if (!match(Sh0, m_Shift(m_CombineAnd(m_Shift(m_Value(X), m_Value(ShAmt1)),
35-
m_Instruction(Sh1)),
36-
m_Value(ShAmt0))))
43+
Value *Trunc = nullptr;
44+
match(Sh0Op0,
45+
m_CombineOr(m_CombineAnd(m_Trunc(m_Instruction(Sh1)), m_Value(Trunc)),
46+
m_Instruction(Sh1)));
47+
48+
// Inner shift: (x shiftopcode ShAmt1)
49+
Value *X, *ShAmt1;
50+
if (!match(Sh1, m_Shift(m_Value(X), m_ZExtOrSelf(m_Value(ShAmt1)))))
3751
return nullptr;
3852

3953
// The shift opcodes must be identical.
4054
Instruction::BinaryOps ShiftOpcode = Sh0->getOpcode();
4155
if (ShiftOpcode != Sh1->getOpcode())
4256
return nullptr;
57+
58+
// Did we match a pattern with truncation ?
59+
if (Trunc) {
60+
// For right-shifts we can't do any such simplifications. Leave as-is.
61+
if (ShiftOpcode != Instruction::BinaryOps::Shl)
62+
return nullptr; // FIXME: still could perform constant-folding.
63+
// If we saw truncation, we'll need to produce extra instruction,
64+
// and for that one of the operands of the shift must be one-use.
65+
if (!match(Sh0, m_c_BinOp(m_OneUse(m_Value()), m_Value())))
66+
return nullptr;
67+
}
68+
4369
// Can we fold (ShAmt0+ShAmt1) ?
44-
Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, ShAmt0, ShAmt1,
45-
SQ.getWithInstruction(Sh0));
70+
auto *NewShAmt = dyn_cast_or_null<Constant>(
71+
SimplifyAddInst(ShAmt0, ShAmt1, /*isNSW=*/false, /*isNUW=*/false,
72+
SQ.getWithInstruction(Sh0)));
4673
if (!NewShAmt)
4774
return nullptr; // Did not simplify.
48-
// Is the new shift amount smaller than the bit width?
49-
// FIXME: could also rely on ConstantRange.
50-
unsigned BitWidth = X->getType()->getScalarSizeInBits();
51-
if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
52-
APInt(BitWidth, BitWidth))))
53-
return nullptr;
75+
// Is the new shift amount smaller than the bit width of inner shift?
76+
if (!match(NewShAmt, m_SpecificInt_ICMP(
77+
ICmpInst::Predicate::ICMP_ULT,
78+
APInt(NewShAmt->getType()->getScalarSizeInBits(),
79+
X->getType()->getScalarSizeInBits()))))
80+
return nullptr; // FIXME: could perform constant-folding.
81+
5482
// All good, we can do this fold.
83+
NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, X->getType());
84+
5585
BinaryOperator *NewShift = BinaryOperator::Create(ShiftOpcode, X, NewShAmt);
56-
// If both of the original shifts had the same flag set, preserve the flag.
57-
if (ShiftOpcode == Instruction::BinaryOps::Shl) {
58-
NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() &&
59-
Sh1->hasNoUnsignedWrap());
60-
NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() &&
61-
Sh1->hasNoSignedWrap());
62-
} else {
63-
NewShift->setIsExact(Sh0->isExact() && Sh1->isExact());
86+
87+
// The flags can only be propagated if there wasn't a trunc.
88+
if (!Trunc) {
89+
// If the pattern did not involve trunc, and both of the original shifts
90+
// had the same flag set, preserve the flag.
91+
if (ShiftOpcode == Instruction::BinaryOps::Shl) {
92+
NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() &&
93+
Sh1->hasNoUnsignedWrap());
94+
NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() &&
95+
Sh1->hasNoSignedWrap());
96+
} else {
97+
NewShift->setIsExact(Sh0->isExact() && Sh1->isExact());
98+
}
6499
}
65-
return NewShift;
100+
101+
Instruction *Ret = NewShift;
102+
if (Trunc) {
103+
Builder.Insert(NewShift);
104+
Ret = CastInst::Create(Instruction::Trunc, NewShift, Sh0->getType());
105+
}
106+
107+
return Ret;
66108
}
67109

68110
// If we have some pattern that leaves only some low bits set, and then performs
@@ -158,7 +200,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
158200
return Res;
159201

160202
if (Instruction *NewShift =
161-
reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ))
203+
reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ, Builder))
162204
return NewShift;
163205

164206
// (C1 shift (A add C2)) -> (C1 shift C2) shift A)

llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll

Lines changed: 17 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,8 @@
1212

1313
define i16 @t0(i32 %x, i16 %y) {
1414
; CHECK-LABEL: @t0(
15-
; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]]
16-
; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
17-
; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
18-
; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
19-
; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -24
20-
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[T3]], [[T4]]
15+
; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i16
16+
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[X_TR]], 8
2117
; CHECK-NEXT: ret i16 [[T5]]
2218
;
2319
%t0 = sub i16 32, %y
@@ -31,12 +27,8 @@ define i16 @t0(i32 %x, i16 %y) {
3127

3228
define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) {
3329
; CHECK-LABEL: @t1_vec_splat(
34-
; CHECK-NEXT: [[T0:%.*]] = sub <2 x i16> <i16 32, i16 32>, [[Y:%.*]]
35-
; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32>
36-
; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[X:%.*]], [[T1]]
37-
; CHECK-NEXT: [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16>
38-
; CHECK-NEXT: [[T4:%.*]] = add <2 x i16> [[Y]], <i16 -24, i16 -24>
39-
; CHECK-NEXT: [[T5:%.*]] = shl <2 x i16> [[T3]], [[T4]]
30+
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 8, i32 8>
31+
; CHECK-NEXT: [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16>
4032
; CHECK-NEXT: ret <2 x i16> [[T5]]
4133
;
4234
%t0 = sub <2 x i16> <i16 32, i16 32>, %y
@@ -50,12 +42,8 @@ define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) {
5042

5143
define <2 x i16> @t2_vec_nonsplat(<2 x i32> %x, <2 x i16> %y) {
5244
; CHECK-LABEL: @t2_vec_nonsplat(
53-
; CHECK-NEXT: [[T0:%.*]] = sub <2 x i16> <i16 32, i16 30>, [[Y:%.*]]
54-
; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32>
55-
; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[X:%.*]], [[T1]]
56-
; CHECK-NEXT: [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16>
57-
; CHECK-NEXT: [[T4:%.*]] = add <2 x i16> [[Y]], <i16 -24, i16 0>
58-
; CHECK-NEXT: [[T5:%.*]] = shl <2 x i16> [[T3]], [[T4]]
45+
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 8, i32 30>
46+
; CHECK-NEXT: [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16>
5947
; CHECK-NEXT: ret <2 x i16> [[T5]]
6048
;
6149
%t0 = sub <2 x i16> <i16 32, i16 30>, %y
@@ -71,12 +59,8 @@ define <2 x i16> @t2_vec_nonsplat(<2 x i32> %x, <2 x i16> %y) {
7159

7260
define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) {
7361
; CHECK-LABEL: @t3_vec_nonsplat_undef0(
74-
; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 undef, i16 32>, [[Y:%.*]]
75-
; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
76-
; CHECK-NEXT: [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]]
77-
; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
78-
; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -24, i16 -24, i16 -24>
79-
; CHECK-NEXT: [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]]
62+
; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
63+
; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
8064
; CHECK-NEXT: ret <3 x i16> [[T5]]
8165
;
8266
%t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y
@@ -90,12 +74,8 @@ define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) {
9074

9175
define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
9276
; CHECK-LABEL: @t4_vec_nonsplat_undef1(
93-
; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 32, i16 32>, [[Y:%.*]]
94-
; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
95-
; CHECK-NEXT: [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]]
96-
; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
97-
; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -24, i16 undef, i16 -24>
98-
; CHECK-NEXT: [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]]
77+
; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
78+
; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
9979
; CHECK-NEXT: ret <3 x i16> [[T5]]
10080
;
10181
%t0 = sub <3 x i16> <i16 32, i16 32, i16 32>, %y
@@ -109,12 +89,8 @@ define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
10989

11090
define <3 x i16> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
11191
; CHECK-LABEL: @t5_vec_nonsplat_undef1(
112-
; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 undef, i16 32>, [[Y:%.*]]
113-
; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
114-
; CHECK-NEXT: [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]]
115-
; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
116-
; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -24, i16 undef, i16 -24>
117-
; CHECK-NEXT: [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]]
92+
; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
93+
; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
11894
; CHECK-NEXT: ret <3 x i16> [[T5]]
11995
;
12096
%t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y
@@ -137,9 +113,9 @@ define i16 @t6_extrause0(i32 %x, i16 %y) {
137113
; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
138114
; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
139115
; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
140-
; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -24
141116
; CHECK-NEXT: call void @use16(i16 [[T3]])
142-
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[T3]], [[T4]]
117+
; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X]] to i16
118+
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[X_TR]], 8
143119
; CHECK-NEXT: ret i16 [[T5]]
144120
;
145121
%t0 = sub i16 32, %y
@@ -154,13 +130,10 @@ define i16 @t6_extrause0(i32 %x, i16 %y) {
154130

155131
define i16 @t7_extrause1(i32 %x, i16 %y) {
156132
; CHECK-LABEL: @t7_extrause1(
157-
; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]]
158-
; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
159-
; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
160-
; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
161-
; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -24
133+
; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y:%.*]], -24
162134
; CHECK-NEXT: call void @use16(i16 [[T4]])
163-
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[T3]], [[T4]]
135+
; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i16
136+
; CHECK-NEXT: [[T5:%.*]] = shl i16 [[X_TR]], 8
164137
; CHECK-NEXT: ret i16 [[T5]]
165138
;
166139
%t0 = sub i16 32, %y

0 commit comments

Comments
 (0)