Skip to content

Commit 6cbd91e

Browse files
authored
[AArch64][GlobalISel] Combine G_UNMERGE(G_DUPLANE16) -> G_DUPLANE16 (#142731)
We will generate G_UNMERGE(G_DUPLANE16) due to the legalization of shuffle vector splats with mismatching vector sizes. The G_DUPLANE intrinsics can handle different vector sizes (128bit and 64bit output, for example), and we can combine away the unmerge.
1 parent 274f5a8 commit 6cbd91e

File tree

6 files changed

+382
-669
lines changed

6 files changed

+382
-669
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,21 @@ def form_duplane : GICombineRule <
172172
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
173173
>;
174174

175+
// Clean up G_UNMERGE(G_DUPLANE16) -> G_DUPLANE16
176+
class unmerge_duplane<Instruction Op> : GICombineRule <
177+
(defs root:$root),
178+
(match (Op $a, $src, $c),
179+
(G_UNMERGE_VALUES $d1, $d2, $a):$root,
180+
[{ return MRI.getType(${d1}.getReg()).getSizeInBits() == 64; }]),
181+
(apply (GIReplaceReg $d2, $d1), (Op $d1, $src, $c))
182+
>;
183+
def unmerge_duplane8 : unmerge_duplane<G_DUPLANE8>;
184+
def unmerge_duplane16 : unmerge_duplane<G_DUPLANE16>;
185+
def unmerge_duplane32 : unmerge_duplane<G_DUPLANE32>;
186+
// G_DUPLANE64 is not included as the result in scalar.
187+
def unmerge_duplanes : GICombineGroup<[unmerge_duplane8, unmerge_duplane16,
188+
unmerge_duplane32]>;
189+
175190
def shuffle_vector_lowering : GICombineGroup<[dup, form_duplane, rev, ext, zip,
176191
uzp, trn, fullrev, shuf_to_ins]>;
177192

@@ -325,7 +340,8 @@ def AArch64PostLegalizerLowering
325340
lower_vector_fcmp, form_truncstore,
326341
vector_sext_inreg_to_shift,
327342
unmerge_ext_to_unmerge, lower_mulv2s64,
328-
vector_unmerge_lowering, insertelt_nonconst]> {
343+
vector_unmerge_lowering, insertelt_nonconst,
344+
unmerge_duplanes]> {
329345
}
330346

331347
// Post-legalization combines which are primarily optimizations.
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
---
5+
name: unmerge_dup8
6+
legalized: true
7+
body: |
8+
bb.1.entry:
9+
; CHECK-LABEL: name: unmerge_dup8
10+
; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0
11+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
12+
; CHECK-NEXT: [[DUPLANE8_:%[0-9]+]]:_(<8 x s8>) = G_DUPLANE8 [[COPY]], [[C]](s64)
13+
; CHECK-NEXT: $d0 = COPY [[DUPLANE8_]](<8 x s8>)
14+
; CHECK-NEXT: $d1 = COPY [[DUPLANE8_]](<8 x s8>)
15+
; CHECK-NEXT: RET_ReallyLR implicit $x0
16+
%0:_(<16 x s8>) = COPY $q0
17+
%1:_(s64) = G_CONSTANT i64 1
18+
%2:_(<16 x s8>) = G_DUPLANE8 %0, %1
19+
%3:_(<8 x s8>), %4:_(<8 x s8>) = G_UNMERGE_VALUES %2
20+
$d0 = COPY %3
21+
$d1 = COPY %4
22+
RET_ReallyLR implicit $x0
23+
24+
...
25+
---
26+
name: unmerge_dup16
27+
legalized: true
28+
body: |
29+
bb.1.entry:
30+
; CHECK-LABEL: name: unmerge_dup16
31+
; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
32+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
33+
; CHECK-NEXT: [[DUPLANE16_:%[0-9]+]]:_(<4 x s16>) = G_DUPLANE16 [[COPY]], [[C]](s64)
34+
; CHECK-NEXT: $d0 = COPY [[DUPLANE16_]](<4 x s16>)
35+
; CHECK-NEXT: $d1 = COPY [[DUPLANE16_]](<4 x s16>)
36+
; CHECK-NEXT: RET_ReallyLR implicit $x0
37+
%0:_(<8 x s16>) = COPY $q0
38+
%1:_(s64) = G_CONSTANT i64 1
39+
%2:_(<8 x s16>) = G_DUPLANE16 %0, %1
40+
%3:_(<4 x s16>), %4:_(<4 x s16>) = G_UNMERGE_VALUES %2
41+
$d0 = COPY %3
42+
$d1 = COPY %4
43+
RET_ReallyLR implicit $x0
44+
45+
...
46+
---
47+
name: unmerge_dup32
48+
legalized: true
49+
body: |
50+
bb.1.entry:
51+
; CHECK-LABEL: name: unmerge_dup32
52+
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
53+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
54+
; CHECK-NEXT: [[DUPLANE32_:%[0-9]+]]:_(<2 x s32>) = G_DUPLANE32 [[COPY]], [[C]](s64)
55+
; CHECK-NEXT: $d0 = COPY [[DUPLANE32_]](<2 x s32>)
56+
; CHECK-NEXT: $d1 = COPY [[DUPLANE32_]](<2 x s32>)
57+
; CHECK-NEXT: RET_ReallyLR implicit $x0
58+
%0:_(<4 x s32>) = COPY $q0
59+
%1:_(s64) = G_CONSTANT i64 1
60+
%2:_(<4 x s32>) = G_DUPLANE32 %0, %1
61+
%3:_(<2 x s32>), %4:_(<2 x s32>) = G_UNMERGE_VALUES %2
62+
$d0 = COPY %3
63+
$d1 = COPY %4
64+
RET_ReallyLR implicit $x0
65+
66+
...
67+
---
68+
name: unmerge_dup64
69+
legalized: true
70+
body: |
71+
bb.1.entry:
72+
; CHECK-LABEL: name: unmerge_dup64
73+
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
74+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
75+
; CHECK-NEXT: [[DUPLANE64_:%[0-9]+]]:_(<2 x s64>) = G_DUPLANE64 [[COPY]], [[C]](s64)
76+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
77+
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DUPLANE64_]](<2 x s64>), [[C1]](s64)
78+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
79+
; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DUPLANE64_]](<2 x s64>), [[C2]](s64)
80+
; CHECK-NEXT: $d0 = COPY [[EVEC]](s64)
81+
; CHECK-NEXT: $d1 = COPY [[EVEC1]](s64)
82+
; CHECK-NEXT: RET_ReallyLR implicit $x0
83+
%0:_(<2 x s64>) = COPY $q0
84+
%1:_(s64) = G_CONSTANT i64 1
85+
%2:_(<2 x s64>) = G_DUPLANE64 %0, %1
86+
%3:_(s64), %4:_(s64) = G_UNMERGE_VALUES %2
87+
$d0 = COPY %3
88+
$d1 = COPY %4
89+
RET_ReallyLR implicit $x0
90+
91+
...

llvm/test/CodeGen/AArch64/arm64-dup.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -401,16 +401,10 @@ define <4 x i16> @test_build_illegal(<4 x i32> %in) {
401401
; SelectionDAGBuilder here. We then added a DUPLANE on top of that, preventing
402402
; the formation of an indexed-by-7 MLS.
403403
define <4 x i16> @test_high_splat(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
404-
; CHECK-SD-LABEL: test_high_splat:
405-
; CHECK-SD: // %bb.0: // %entry
406-
; CHECK-SD-NEXT: mls.4h v0, v1, v2[7]
407-
; CHECK-SD-NEXT: ret
408-
;
409-
; CHECK-GI-LABEL: test_high_splat:
410-
; CHECK-GI: // %bb.0: // %entry
411-
; CHECK-GI-NEXT: dup.8h v2, v2[7]
412-
; CHECK-GI-NEXT: mls.4h v0, v2, v1
413-
; CHECK-GI-NEXT: ret
404+
; CHECK-LABEL: test_high_splat:
405+
; CHECK: // %bb.0: // %entry
406+
; CHECK-NEXT: mls.4h v0, v1, v2[7]
407+
; CHECK-NEXT: ret
414408
entry:
415409
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
416410
%mul = mul <4 x i16> %shuffle, %b

0 commit comments

Comments
 (0)