Skip to content

Commit 9ff9c1d

Browse files
committed
[InstCombine] matchRotate - support (uniform) constant rotation amounts (PR46895)
This patch adds handling of rotation patterns with constant shift amounts - the next bit will be how we want to support non-uniform constant vectors. Differential Revision: https://reviews.llvm.org/D87452
1 parent 994ef4e commit 9ff9c1d

File tree

5 files changed

+38
-52
lines changed

5 files changed

+38
-52
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2087,8 +2087,6 @@ static Instruction *matchRotate(Instruction &Or) {
20872087
// TODO: Can we reduce the code duplication between this and the related
20882088
// rotate matching code under visitSelect and visitTrunc?
20892089
unsigned Width = Or.getType()->getScalarSizeInBits();
2090-
if (!isPowerOf2_32(Width))
2091-
return nullptr;
20922090

20932091
// First, find an or'd pair of opposite shifts with the same shifted operand:
20942092
// or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1)
@@ -2110,6 +2108,18 @@ static Instruction *matchRotate(Instruction &Or) {
21102108
// Match the shift amount operands for a rotate pattern. This always matches
21112109
// a subtraction on the R operand.
21122110
auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * {
2111+
// Check for constant shift amounts that sum to the bitwidth.
2112+
// TODO: Support non-uniform shift amounts.
2113+
const APInt *LC, *RC;
2114+
if (match(L, m_APInt(LC)) && match(R, m_APInt(RC)))
2115+
if (LC->ult(Width) && RC->ult(Width) && (*LC + *RC) == Width)
2116+
return L;
2117+
2118+
// For non-constant cases we don't support non-pow2 shift masks.
2119+
// TODO: Is it worth matching urem as well?
2120+
if (!isPowerOf2_32(Width))
2121+
return nullptr;
2122+
21132123
// The shift amount may be masked with negation:
21142124
// (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
21152125
Value *X;

llvm/test/Transforms/InstCombine/bswap.ll

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,7 @@ define i32 @bswap32_and_first(i32 %x) {
123123

124124
define i32 @bswap32_and_first_extra_use(i32 %x) {
125125
; CHECK-LABEL: @bswap32_and_first_extra_use(
126-
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], 16
127-
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], 16
128-
; CHECK-NEXT: [[SWAPHALF:%.*]] = or i32 [[SHL]], [[SHR]]
126+
; CHECK-NEXT: [[SWAPHALF:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 16)
129127
; CHECK-NEXT: [[T:%.*]] = and i32 [[SWAPHALF]], 16711935
130128
; CHECK-NEXT: [[BSWAP:%.*]] = call i32 @llvm.bswap.i32(i32 [[X]])
131129
; CHECK-NEXT: call void @extra_use(i32 [[T]])
@@ -169,10 +167,8 @@ define i32 @bswap32_shl_first(i32 %x) {
169167

170168
define i32 @bswap32_shl_first_extra_use(i32 %x) {
171169
; CHECK-LABEL: @bswap32_shl_first_extra_use(
172-
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 16
173-
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X]], 24
174-
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[SHR]], 8
175-
; CHECK-NEXT: [[T:%.*]] = or i32 [[TMP1]], [[TMP2]]
170+
; CHECK-NEXT: [[SWAPHALF:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 16)
171+
; CHECK-NEXT: [[T:%.*]] = shl i32 [[SWAPHALF]], 8
176172
; CHECK-NEXT: [[BSWAP:%.*]] = call i32 @llvm.bswap.i32(i32 [[X]])
177173
; CHECK-NEXT: call void @extra_use(i32 [[T]])
178174
; CHECK-NEXT: ret i32 [[BSWAP]]

llvm/test/Transforms/InstCombine/fsh.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -521,9 +521,9 @@ define i33 @fshr_multi_use(i33 %a) {
521521

522522
define i33 @expanded_fshr_multi_use(i33 %a) {
523523
; CHECK-LABEL: @expanded_fshr_multi_use(
524-
; CHECK-NEXT: [[TMP:%.*]] = lshr i33 [[A:%.*]], 1
525-
; CHECK-NEXT: [[C:%.*]] = lshr i33 [[A]], 24
526-
; CHECK-NEXT: [[D:%.*]] = xor i33 [[C]], [[TMP]]
524+
; CHECK-NEXT: [[B:%.*]] = call i33 @llvm.fshl.i33(i33 [[A:%.*]], i33 [[A]], i33 32)
525+
; CHECK-NEXT: [[C:%.*]] = lshr i33 [[B]], 23
526+
; CHECK-NEXT: [[D:%.*]] = xor i33 [[C]], [[B]]
527527
; CHECK-NEXT: [[E:%.*]] = and i33 [[D]], 31
528528
; CHECK-NEXT: ret i33 [[E]]
529529
;

llvm/test/Transforms/InstCombine/or-concat.ll

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,9 @@ define <2 x i64> @concat_bswap32_unary_split_vector(<2 x i64> %a0) {
4747

4848
define i64 @concat_bswap32_unary_flip(i64 %a0) {
4949
; CHECK-LABEL: @concat_bswap32_unary_flip(
50-
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A0:%.*]], 32
51-
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[A0]], 32
52-
; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]]
53-
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
54-
; CHECK-NEXT: ret i64 [[TMP4]]
50+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.fshl.i64(i64 [[A0:%.*]], i64 [[A0]], i64 32)
51+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
52+
; CHECK-NEXT: ret i64 [[TMP2]]
5553
;
5654
%1 = lshr i64 %a0, 32
5755
%2 = trunc i64 %1 to i32
@@ -67,11 +65,9 @@ define i64 @concat_bswap32_unary_flip(i64 %a0) {
6765

6866
define <2 x i64> @concat_bswap32_unary_flip_vector(<2 x i64> %a0) {
6967
; CHECK-LABEL: @concat_bswap32_unary_flip_vector(
70-
; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[A0:%.*]], <i64 32, i64 32>
71-
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[A0]], <i64 32, i64 32>
72-
; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
73-
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP3]])
74-
; CHECK-NEXT: ret <2 x i64> [[TMP4]]
68+
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> <i64 32, i64 32>)
69+
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP1]])
70+
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
7571
;
7672
%1 = lshr <2 x i64> %a0, <i64 32, i64 32>
7773
%2 = trunc <2 x i64> %1 to <2 x i32>
@@ -162,11 +158,9 @@ define <2 x i64> @concat_bitreverse32_unary_split_vector(<2 x i64> %a0) {
162158

163159
define i64 @concat_bitreverse32_unary_flip(i64 %a0) {
164160
; CHECK-LABEL: @concat_bitreverse32_unary_flip(
165-
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A0:%.*]], 32
166-
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[A0]], 32
167-
; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]]
168-
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[TMP3]])
169-
; CHECK-NEXT: ret i64 [[TMP4]]
161+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.fshl.i64(i64 [[A0:%.*]], i64 [[A0]], i64 32)
162+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[TMP1]])
163+
; CHECK-NEXT: ret i64 [[TMP2]]
170164
;
171165
%1 = lshr i64 %a0, 32
172166
%2 = trunc i64 %1 to i32
@@ -182,11 +176,9 @@ define i64 @concat_bitreverse32_unary_flip(i64 %a0) {
182176

183177
define <2 x i64> @concat_bitreverse32_unary_flip_vector(<2 x i64> %a0) {
184178
; CHECK-LABEL: @concat_bitreverse32_unary_flip_vector(
185-
; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[A0:%.*]], <i64 32, i64 32>
186-
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[A0]], <i64 32, i64 32>
187-
; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
188-
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP3]])
189-
; CHECK-NEXT: ret <2 x i64> [[TMP4]]
179+
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> <i64 32, i64 32>)
180+
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP1]])
181+
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
190182
;
191183
%1 = lshr <2 x i64> %a0, <i64 32, i64 32>
192184
%2 = trunc <2 x i64> %1 to <2 x i32>

llvm/test/Transforms/InstCombine/rotate.ll

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,14 @@
33

44
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
55

6-
; TODO: Canonicalize rotate by constant to funnel shift intrinsics.
6+
; Canonicalize rotate by constant to funnel shift intrinsics.
77
; This should help cost modeling for vectorization, inlining, etc.
88
; If a target does not have a rotate instruction, the expansion will
99
; be exactly these same 3 basic ops (shl/lshr/or).
1010

1111
define i32 @rotl_i32_constant(i32 %x) {
1212
; CHECK-LABEL: @rotl_i32_constant(
13-
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], 11
14-
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], 21
15-
; CHECK-NEXT: [[R:%.*]] = or i32 [[SHR]], [[SHL]]
13+
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 11)
1614
; CHECK-NEXT: ret i32 [[R]]
1715
;
1816
%shl = shl i32 %x, 11
@@ -23,9 +21,7 @@ define i32 @rotl_i32_constant(i32 %x) {
2321

2422
define i42 @rotr_i42_constant(i42 %x) {
2523
; CHECK-LABEL: @rotr_i42_constant(
26-
; CHECK-NEXT: [[SHL:%.*]] = shl i42 [[X:%.*]], 31
27-
; CHECK-NEXT: [[SHR:%.*]] = lshr i42 [[X]], 11
28-
; CHECK-NEXT: [[R:%.*]] = or i42 [[SHR]], [[SHL]]
24+
; CHECK-NEXT: [[R:%.*]] = call i42 @llvm.fshl.i42(i42 [[X:%.*]], i42 [[X]], i42 31)
2925
; CHECK-NEXT: ret i42 [[R]]
3026
;
3127
%shl = shl i42 %x, 31
@@ -36,9 +32,7 @@ define i42 @rotr_i42_constant(i42 %x) {
3632

3733
define i8 @rotr_i8_constant_commute(i8 %x) {
3834
; CHECK-LABEL: @rotr_i8_constant_commute(
39-
; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[X:%.*]], 5
40-
; CHECK-NEXT: [[SHR:%.*]] = lshr i8 [[X]], 3
41-
; CHECK-NEXT: [[R:%.*]] = or i8 [[SHL]], [[SHR]]
35+
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[X]], i8 5)
4236
; CHECK-NEXT: ret i8 [[R]]
4337
;
4438
%shl = shl i8 %x, 5
@@ -49,9 +43,7 @@ define i8 @rotr_i8_constant_commute(i8 %x) {
4943

5044
define i88 @rotl_i88_constant_commute(i88 %x) {
5145
; CHECK-LABEL: @rotl_i88_constant_commute(
52-
; CHECK-NEXT: [[SHL:%.*]] = shl i88 [[X:%.*]], 44
53-
; CHECK-NEXT: [[SHR:%.*]] = lshr i88 [[X]], 44
54-
; CHECK-NEXT: [[R:%.*]] = or i88 [[SHL]], [[SHR]]
46+
; CHECK-NEXT: [[R:%.*]] = call i88 @llvm.fshl.i88(i88 [[X:%.*]], i88 [[X]], i88 44)
5547
; CHECK-NEXT: ret i88 [[R]]
5648
;
5749
%shl = shl i88 %x, 44
@@ -64,9 +56,7 @@ define i88 @rotl_i88_constant_commute(i88 %x) {
6456

6557
define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) {
6658
; CHECK-LABEL: @rotl_v2i16_constant_splat(
67-
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], <i16 1, i16 1>
68-
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[X]], <i16 15, i16 15>
69-
; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]]
59+
; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> <i16 1, i16 1>)
7060
; CHECK-NEXT: ret <2 x i16> [[R]]
7161
;
7262
%shl = shl <2 x i16> %x, <i16 1, i16 1>
@@ -79,9 +69,7 @@ define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) {
7969

8070
define <2 x i17> @rotr_v2i17_constant_splat(<2 x i17> %x) {
8171
; CHECK-LABEL: @rotr_v2i17_constant_splat(
82-
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[X:%.*]], <i17 12, i17 12>
83-
; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X]], <i17 5, i17 5>
84-
; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]]
72+
; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> <i17 12, i17 12>)
8573
; CHECK-NEXT: ret <2 x i17> [[R]]
8674
;
8775
%shl = shl <2 x i17> %x, <i17 12, i17 12>
@@ -90,7 +78,7 @@ define <2 x i17> @rotr_v2i17_constant_splat(<2 x i17> %x) {
9078
ret <2 x i17> %r
9179
}
9280

93-
; Allow arbitrary shift constants.
81+
; TODO: Allow arbitrary shift constants.
9482

9583
define <2 x i32> @rotr_v2i32_constant_nonsplat(<2 x i32> %x) {
9684
; CHECK-LABEL: @rotr_v2i32_constant_nonsplat(

0 commit comments

Comments
 (0)