Skip to content

Commit 2f6b073

Browse files
committed
[InstCombine] fold cast of right-shift if high bits are not demanded
(masked) trunc (lshr X, C) --> (masked) lshr (trunc X), C Narrowing the shift should be better for analysis and can lead to follow-on transforms as shown. Attempt at a general proof in Alive2: https://alive2.llvm.org/ce/z/tRnnSF Here are a couple of the specific tests: https://alive2.llvm.org/ce/z/bCnTp- https://alive2.llvm.org/ce/z/TfaHnb Differential Revision: https://reviews.llvm.org/D110170
1 parent 43d6991 commit 2f6b073

File tree

2 files changed

+55
-21
lines changed

2 files changed

+55
-21
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -385,8 +385,26 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
385385
Known = KnownBits::commonBits(LHSKnown, RHSKnown);
386386
break;
387387
}
388-
case Instruction::ZExt:
389388
case Instruction::Trunc: {
389+
// If we do not demand the high bits of a right-shifted and truncated value,
390+
// then we may be able to truncate it before the shift.
391+
Value *X;
392+
const APInt *C;
393+
if (match(I->getOperand(0), m_OneUse(m_LShr(m_Value(X), m_APInt(C))))) {
394+
// The shift amount must be valid (not poison) in the narrow type, and
395+
// it must not be greater than the high bits demanded of the result.
396+
if (C->ult(I->getType()->getScalarSizeInBits()) &&
397+
C->ule(DemandedMask.countLeadingZeros())) {
398+
// trunc (lshr X, C) --> lshr (trunc X), C
399+
IRBuilderBase::InsertPointGuard Guard(Builder);
400+
Builder.SetInsertPoint(I);
401+
Value *Trunc = Builder.CreateTrunc(X, I->getType());
402+
return Builder.CreateLShr(Trunc, C->getZExtValue());
403+
}
404+
}
405+
}
406+
LLVM_FALLTHROUGH;
407+
case Instruction::ZExt: {
390408
unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
391409

392410
APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth);

llvm/test/Transforms/InstCombine/trunc-demand.ll

Lines changed: 36 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ declare void @use8(i8)
66

77
define i6 @trunc_lshr(i8 %x) {
88
; CHECK-LABEL: @trunc_lshr(
9-
; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 2
10-
; CHECK-NEXT: [[T:%.*]] = trunc i8 [[S]] to i6
11-
; CHECK-NEXT: [[R:%.*]] = and i6 [[T]], 14
9+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i8 [[X:%.*]] to i6
10+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i6 [[TMP1]], 2
11+
; CHECK-NEXT: [[R:%.*]] = and i6 [[TMP2]], 14
1212
; CHECK-NEXT: ret i6 [[R]]
1313
;
1414
%s = lshr i8 %x, 2
@@ -17,19 +17,22 @@ define i6 @trunc_lshr(i8 %x) {
1717
ret i6 %r
1818
}
1919

20+
; The 'and' is eliminated.
21+
2022
define i6 @trunc_lshr_exact_mask(i8 %x) {
2123
; CHECK-LABEL: @trunc_lshr_exact_mask(
22-
; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 2
23-
; CHECK-NEXT: [[T:%.*]] = trunc i8 [[S]] to i6
24-
; CHECK-NEXT: [[R:%.*]] = and i6 [[T]], 15
25-
; CHECK-NEXT: ret i6 [[R]]
24+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i8 [[X:%.*]] to i6
25+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i6 [[TMP1]], 2
26+
; CHECK-NEXT: ret i6 [[TMP2]]
2627
;
2728
%s = lshr i8 %x, 2
2829
%t = trunc i8 %s to i6
2930
%r = and i6 %t, 15
3031
ret i6 %r
3132
}
3233

34+
; negative test - a high bit of x is in the result
35+
3336
define i6 @trunc_lshr_big_mask(i8 %x) {
3437
; CHECK-LABEL: @trunc_lshr_big_mask(
3538
; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 2
@@ -43,6 +46,8 @@ define i6 @trunc_lshr_big_mask(i8 %x) {
4346
ret i6 %r
4447
}
4548

49+
; negative test - too many uses
50+
4651
define i6 @trunc_lshr_use1(i8 %x) {
4752
; CHECK-LABEL: @trunc_lshr_use1(
4853
; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 2
@@ -58,6 +63,8 @@ define i6 @trunc_lshr_use1(i8 %x) {
5863
ret i6 %r
5964
}
6065

66+
; negative test - too many uses
67+
6168
define i6 @trunc_lshr_use2(i8 %x) {
6269
; CHECK-LABEL: @trunc_lshr_use2(
6370
; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 2
@@ -73,11 +80,13 @@ define i6 @trunc_lshr_use2(i8 %x) {
7380
ret i6 %r
7481
}
7582

83+
; Splat vectors are ok.
84+
7685
define <2 x i7> @trunc_lshr_vec_splat(<2 x i16> %x) {
7786
; CHECK-LABEL: @trunc_lshr_vec_splat(
78-
; CHECK-NEXT: [[S:%.*]] = lshr <2 x i16> [[X:%.*]], <i16 5, i16 5>
79-
; CHECK-NEXT: [[T:%.*]] = trunc <2 x i16> [[S]] to <2 x i7>
80-
; CHECK-NEXT: [[R:%.*]] = and <2 x i7> [[T]], <i7 1, i7 1>
87+
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i16> [[X:%.*]] to <2 x i7>
88+
; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i7> [[TMP1]], <i7 5, i7 5>
89+
; CHECK-NEXT: [[R:%.*]] = and <2 x i7> [[TMP2]], <i7 1, i7 1>
8190
; CHECK-NEXT: ret <2 x i7> [[R]]
8291
;
8392
%s = lshr <2 x i16> %x, <i16 5, i16 5>
@@ -86,19 +95,22 @@ define <2 x i7> @trunc_lshr_vec_splat(<2 x i16> %x) {
8695
ret <2 x i7> %r
8796
}
8897

98+
; The 'and' is eliminated.
99+
89100
define <2 x i7> @trunc_lshr_vec_splat_exact_mask(<2 x i16> %x) {
90101
; CHECK-LABEL: @trunc_lshr_vec_splat_exact_mask(
91-
; CHECK-NEXT: [[S:%.*]] = lshr <2 x i16> [[X:%.*]], <i16 6, i16 6>
92-
; CHECK-NEXT: [[T:%.*]] = trunc <2 x i16> [[S]] to <2 x i7>
93-
; CHECK-NEXT: [[R:%.*]] = and <2 x i7> [[T]], <i7 1, i7 1>
94-
; CHECK-NEXT: ret <2 x i7> [[R]]
102+
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i16> [[X:%.*]] to <2 x i7>
103+
; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i7> [[TMP1]], <i7 6, i7 6>
104+
; CHECK-NEXT: ret <2 x i7> [[TMP2]]
95105
;
96106
%s = lshr <2 x i16> %x, <i16 6, i16 6>
97107
%t = trunc <2 x i16> %s to <2 x i7>
98108
%r = and <2 x i7> %t, <i7 1, i7 1>
99109
ret <2 x i7> %r
100110
}
101111

112+
; negative test - the shift is too big for the narrow type
113+
102114
define <2 x i7> @trunc_lshr_big_shift(<2 x i16> %x) {
103115
; CHECK-LABEL: @trunc_lshr_big_shift(
104116
; CHECK-NEXT: [[S:%.*]] = lshr <2 x i16> [[X:%.*]], <i16 7, i16 7>
@@ -112,11 +124,13 @@ define <2 x i7> @trunc_lshr_big_shift(<2 x i16> %x) {
112124
ret <2 x i7> %r
113125
}
114126

127+
; High bits could also be set rather than cleared.
128+
115129
define i6 @or_trunc_lshr(i8 %x) {
116130
; CHECK-LABEL: @or_trunc_lshr(
117-
; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 1
118-
; CHECK-NEXT: [[T:%.*]] = trunc i8 [[S]] to i6
119-
; CHECK-NEXT: [[R:%.*]] = or i6 [[T]], -32
131+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i8 [[X:%.*]] to i6
132+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i6 [[TMP1]], 1
133+
; CHECK-NEXT: [[R:%.*]] = or i6 [[TMP2]], -32
120134
; CHECK-NEXT: ret i6 [[R]]
121135
;
122136
%s = lshr i8 %x, 1
@@ -127,9 +141,9 @@ define i6 @or_trunc_lshr(i8 %x) {
127141

128142
define i6 @or_trunc_lshr_more(i8 %x) {
129143
; CHECK-LABEL: @or_trunc_lshr_more(
130-
; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 4
131-
; CHECK-NEXT: [[T:%.*]] = trunc i8 [[S]] to i6
132-
; CHECK-NEXT: [[R:%.*]] = or i6 [[T]], -4
144+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i8 [[X:%.*]] to i6
145+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i6 [[TMP1]], 4
146+
; CHECK-NEXT: [[R:%.*]] = or i6 [[TMP2]], -4
133147
; CHECK-NEXT: ret i6 [[R]]
134148
;
135149
%s = lshr i8 %x, 4
@@ -138,6 +152,8 @@ define i6 @or_trunc_lshr_more(i8 %x) {
138152
ret i6 %r
139153
}
140154

155+
; negative test - need all high bits to be undemanded
156+
141157
define i6 @or_trunc_lshr_small_mask(i8 %x) {
142158
; CHECK-LABEL: @or_trunc_lshr_small_mask(
143159
; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 4

0 commit comments

Comments
 (0)