Skip to content

Commit 9716d75

Browse files
[GlobalISel] Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a
1 parent 5cd6e21 commit 9716d75

File tree

10 files changed

+231
-100
lines changed

10 files changed

+231
-100
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -874,6 +874,10 @@ class CombinerHelper {
874874
/// Remove references to rhs if it is undef
875875
bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo);
876876

877+
/// Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not
878+
/// reference a.
879+
bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo);
880+
877881
/// Use a function which takes in a MachineIRBuilder to perform a combine.
878882
/// By default, it erases the instruction def'd on \p MO from the function.
879883
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo);

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1585,6 +1585,13 @@ def combine_shuffle_undef_rhs : GICombineRule<
15851585
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
15861586
>;
15871587

1588+
def combine_shuffle_disjoint_mask : GICombineRule<
1589+
(defs root:$root, build_fn_matchinfo:$matchinfo),
1590+
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
1591+
[{ return Helper.matchShuffleDisjointMask(*${root}, ${matchinfo}); }]),
1592+
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
1593+
>;
1594+
15881595
// match_extract_of_element and insert_vector_elt_oob must be the first!
15891596
def vector_ops_combines: GICombineGroup<[
15901597
match_extract_of_element_undef_vector,
@@ -1936,7 +1943,8 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
19361943
def prefer_sign_combines : GICombineGroup<[nneg_zext]>;
19371944

19381945
def shuffle_combines : GICombineGroup<[combine_shuffle_concat,
1939-
combine_shuffle_undef_rhs]>;
1946+
combine_shuffle_undef_rhs,
1947+
combine_shuffle_disjoint_mask]>;
19401948

19411949
def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
19421950
vector_ops_combines, freeze_combines, cast_combines,

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7724,3 +7724,69 @@ bool CombinerHelper::matchShuffleUndefRHS(MachineInstr &MI,
77247724

77257725
return true;
77267726
}
7727+
7728+
static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) {
7729+
const unsigned MaskSize = Mask.size();
7730+
for (unsigned I = 0; I < MaskSize; ++I) {
7731+
int Idx = Mask[I];
7732+
if (Idx < 0)
7733+
continue;
7734+
7735+
if (Idx < (int)NumElems)
7736+
Mask[I] = Idx + NumElems;
7737+
else
7738+
Mask[I] = Idx - NumElems;
7739+
}
7740+
}
7741+
7742+
bool CombinerHelper::matchShuffleDisjointMask(MachineInstr &MI,
7743+
BuildFnTy &MatchInfo) {
7744+
7745+
auto &Shuffle = cast<GShuffleVector>(MI);
7746+
// If any of the two inputs is already undef, don't check the mask again to
7747+
// prevent infinite loop
7748+
if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
7749+
return false;
7750+
7751+
if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
7752+
return false;
7753+
7754+
const LLT DstTy = MRI.getType(Shuffle.getReg(0));
7755+
const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
7756+
if (!isLegalOrBeforeLegalizer(
7757+
{TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, Src1Ty}}))
7758+
return false;
7759+
7760+
ArrayRef<int> Mask = Shuffle.getMask();
7761+
const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
7762+
7763+
bool TouchesSrc1 = false;
7764+
bool TouchesSrc2 = false;
7765+
const unsigned NumElems = Mask.size();
7766+
for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
7767+
if (Mask[Idx] < 0)
7768+
continue;
7769+
7770+
if (Mask[Idx] < (int)NumSrcElems)
7771+
TouchesSrc1 = true;
7772+
else
7773+
TouchesSrc2 = true;
7774+
}
7775+
7776+
if (!(TouchesSrc1 ^ TouchesSrc2))
7777+
return false;
7778+
7779+
Register NewSrc1 = Shuffle.getSrc1Reg();
7780+
SmallVector<int, 16> NewMask(Mask);
7781+
if (TouchesSrc2) {
7782+
NewSrc1 = Shuffle.getSrc2Reg();
7783+
commuteMask(NewMask, NumSrcElems);
7784+
}
7785+
7786+
MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
7787+
auto Undef = B.buildUndef(Src1Ty);
7788+
B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, Undef, NewMask);
7789+
};
7790+
7791+
return true;
7792+
}

llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -598,9 +598,9 @@ body: |
598598
; CHECK: liveins: $x0, $x1
599599
; CHECK-NEXT: {{ $}}
600600
; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0
601-
; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1
602601
; CHECK-NEXT: %idx:_(s64) = COPY $x1
603-
; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(undef, 0, 0, 0)
602+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
603+
; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(undef, 0, 0, 0)
604604
; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %idx(s64)
605605
; CHECK-NEXT: $w0 = COPY %extract(s32)
606606
; CHECK-NEXT: RET_ReallyLR implicit $x0

llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,9 +1395,9 @@ body: |
13951395
; CHECK: liveins: $x0, $x1
13961396
; CHECK-NEXT: {{ $}}
13971397
; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0
1398-
; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1
13991398
; CHECK-NEXT: %idx:_(s64) = G_CONSTANT i64 0
1400-
; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(3, 0, 0, 0)
1399+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
1400+
; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(3, 0, 0, 0)
14011401
; CHECK-NEXT: %freeze_sv:_(<4 x s32>) = G_FREEZE %sv
14021402
; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %freeze_sv(<4 x s32>), %idx(s64)
14031403
; CHECK-NEXT: $w0 = COPY %extract(s32)
@@ -1422,9 +1422,9 @@ body: |
14221422
; CHECK-NEXT: {{ $}}
14231423
; CHECK-NEXT: %elt:_(s32) = COPY $w0
14241424
; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0
1425-
; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1
14261425
; CHECK-NEXT: %idx:_(s64) = G_CONSTANT i64 0
1427-
; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(3, 0, 0, 0)
1426+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
1427+
; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(3, 0, 0, 0)
14281428
; CHECK-NEXT: %freeze_sv:_(<4 x s32>) = G_FREEZE %sv
14291429
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE %elt
14301430
; CHECK-NEXT: %extract:_(<4 x s32>) = G_INSERT_VECTOR_ELT %freeze_sv, [[FREEZE]](s32), %idx(s64)
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple aarch64-apple-ios -run-pass=aarch64-prelegalizer-combiner %s -o - | FileCheck %s
3+
4+
---
5+
name: shuffle_vector_unused_lhs
6+
tracksRegLiveness: true
7+
body: |
8+
bb.1:
9+
liveins: $d0, $d1
10+
11+
; CHECK-LABEL: name: shuffle_vector_unused_lhs
12+
; CHECK: liveins: $d0, $d1
13+
; CHECK-NEXT: {{ $}}
14+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d1
15+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
16+
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(1, 0, 1, 0)
17+
; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
18+
%0:_(<2 x s32>) = COPY $d0
19+
%1:_(<2 x s32>) = COPY $d1
20+
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1(<2 x s32>), shufflemask(3, 2, 3, 2)
21+
RET_ReallyLR implicit %2
22+
...
23+
24+
---
25+
name: shuffle_vector_unused_rhs
26+
tracksRegLiveness: true
27+
body: |
28+
bb.1:
29+
liveins: $d0, $d1
30+
31+
; CHECK-LABEL: name: shuffle_vector_unused_rhs
32+
; CHECK: liveins: $d0, $d1
33+
; CHECK-NEXT: {{ $}}
34+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
35+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
36+
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(0, 0, 1, 1)
37+
; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
38+
%0:_(<2 x s32>) = COPY $d0
39+
%1:_(<2 x s32>) = COPY $d1
40+
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,0,1,1)
41+
RET_ReallyLR implicit %2
42+
...
43+
44+
---
45+
name: shuffle_vector_both_used
46+
tracksRegLiveness: true
47+
body: |
48+
bb.1:
49+
liveins: $d0, $d1
50+
51+
; CHECK-LABEL: name: shuffle_vector_both_used
52+
; CHECK: liveins: $d0, $d1
53+
; CHECK-NEXT: {{ $}}
54+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
55+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
56+
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(0, 2, 1, 3)
57+
; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
58+
%0:_(<2 x s32>) = COPY $d0
59+
%1:_(<2 x s32>) = COPY $d1
60+
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,2,1,3)
61+
RET_ReallyLR implicit %2
62+
...
63+
64+
---
65+
name: shuffle_vector_undef_elems
66+
tracksRegLiveness: true
67+
body: |
68+
bb.1:
69+
liveins: $d0, $d1
70+
71+
; CHECK-LABEL: name: shuffle_vector_undef_elems
72+
; CHECK: liveins: $d0, $d1
73+
; CHECK-NEXT: {{ $}}
74+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
75+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
76+
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(undef, 0, 1, undef)
77+
; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
78+
%0:_(<2 x s32>) = COPY $d0
79+
%1:_(<2 x s32>) = COPY $d1
80+
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(-1,0,1,-1)
81+
RET_ReallyLR implicit %2
82+
...
83+
84+
---
85+
name: shuffle_vector_scalar
86+
tracksRegLiveness: true
87+
body: |
88+
bb.1:
89+
liveins: $x0, $x1
90+
91+
; CHECK-LABEL: name: shuffle_vector_scalar
92+
; CHECK: liveins: $x0, $x1
93+
; CHECK-NEXT: {{ $}}
94+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
95+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY]](s64), [[COPY]](s64), [[COPY]](s64)
96+
; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>)
97+
%0:_(s64) = COPY $x0
98+
%1:_(s64) = COPY $x1
99+
%2:_(<4 x s64>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0, 0, 0)
100+
RET_ReallyLR implicit %2
101+
...

llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -201,21 +201,21 @@ define void @matrix_mul_double_shuffle(i32 %N, ptr nocapture %C, ptr nocapture r
201201
; CHECK-GI: // %bb.0: // %vector.header
202202
; CHECK-GI-NEXT: and w9, w3, #0xffff
203203
; CHECK-GI-NEXT: adrp x8, .LCPI2_0
204-
; CHECK-GI-NEXT: dup v1.4s, w9
204+
; CHECK-GI-NEXT: dup v0.4s, w9
205205
; CHECK-GI-NEXT: mov w9, w0
206-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI2_0]
206+
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
207207
; CHECK-GI-NEXT: and x8, x9, #0xfffffff8
208208
; CHECK-GI-NEXT: .LBB2_1: // %vector.body
209209
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
210210
; CHECK-GI-NEXT: ldrh w9, [x2], #16
211211
; CHECK-GI-NEXT: subs x8, x8, #8
212-
; CHECK-GI-NEXT: mov v0.s[0], w9
212+
; CHECK-GI-NEXT: mov v2.s[0], w9
213213
; CHECK-GI-NEXT: mov w9, w0
214214
; CHECK-GI-NEXT: add w0, w0, #8
215215
; CHECK-GI-NEXT: lsl x9, x9, #2
216-
; CHECK-GI-NEXT: tbl v3.16b, { v0.16b, v1.16b }, v2.16b
217-
; CHECK-GI-NEXT: mul v3.4s, v1.4s, v3.4s
218-
; CHECK-GI-NEXT: str q3, [x1, x9]
216+
; CHECK-GI-NEXT: tbl v2.16b, { v2.16b, v3.16b }, v1.16b
217+
; CHECK-GI-NEXT: mul v2.4s, v0.4s, v2.4s
218+
; CHECK-GI-NEXT: str q2, [x1, x9]
219219
; CHECK-GI-NEXT: b.ne .LBB2_1
220220
; CHECK-GI-NEXT: // %bb.2: // %for.end12
221221
; CHECK-GI-NEXT: ret

llvm/test/CodeGen/AArch64/aarch64-smull.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2385,9 +2385,8 @@ define <2 x i32> @do_stuff(<2 x i64> %0, <2 x i64> %1) {
23852385
;
23862386
; CHECK-GI-LABEL: do_stuff:
23872387
; CHECK-GI: // %bb.0:
2388-
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
23892388
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
2390-
; CHECK-GI-NEXT: ext v2.16b, v1.16b, v2.16b, #8
2389+
; CHECK-GI-NEXT: mov d2, v1.d[1]
23912390
; CHECK-GI-NEXT: umull v0.2d, v2.2s, v0.2s
23922391
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
23932392
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s

llvm/test/CodeGen/AArch64/arm64-ext.ll

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,10 @@ define <8 x i8> @test_vextd_undef(<8 x i8> %tmp1, <8 x i8> %tmp2) {
6868
}
6969

7070
define <8 x i8> @test_vextd_undef2(<8 x i8> %tmp1, <8 x i8> %tmp2) {
71-
; CHECK-SD-LABEL: test_vextd_undef2:
72-
; CHECK-SD: // %bb.0:
73-
; CHECK-SD-NEXT: ext v0.8b, v0.8b, v0.8b, #6
74-
; CHECK-SD-NEXT: ret
75-
;
76-
; CHECK-GI-LABEL: test_vextd_undef2:
77-
; CHECK-GI: // %bb.0:
78-
; CHECK-GI-NEXT: ext v0.8b, v1.8b, v0.8b, #6
79-
; CHECK-GI-NEXT: ret
71+
; CHECK-LABEL: test_vextd_undef2:
72+
; CHECK: // %bb.0:
73+
; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #6
74+
; CHECK-NEXT: ret
8075
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 5>
8176
ret <8 x i8> %tmp3
8277
}

0 commit comments

Comments
 (0)