Skip to content

Commit 383a02f

Browse files
committed
[AArch64][GlobalISel] Combine G_UNMERGE(G_DUPLANE16) -> G_DUPLANE16
We will generate G_UNMERGE(G_DUPLANE16) due to the legalization of shuffle vector splats with mismatching vector sizes. The G_DUPLANE intrinsics can handle different vector sizes (128bit and 64bit output, for example), and we can combine away the unmerge.
1 parent 3894bdc commit 383a02f

File tree

5 files changed

+276
-637
lines changed

5 files changed

+276
-637
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,20 @@ def form_duplane : GICombineRule <
172172
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
173173
>;
174174

175+
// Clean up G_UNMERGE(G_DUPLANE16) -> G_DUPLANE16
176+
class unmerge_duplane<Instruction Op> : GICombineRule <
177+
(defs root:$root),
178+
(match (Op $a, $src, $c),
179+
(G_UNMERGE_VALUES $d1, $d2, $a):$root,
180+
[{ return MRI.getType(${d1}.getReg()).getSizeInBits() == 64; }]),
181+
(apply (GIReplaceReg $d2, $d1), (Op $d1, $src, $c))
182+
>;
183+
def unmerge_duplane8 : unmerge_duplane<G_DUPLANE8>;
184+
def unmerge_duplane16 : unmerge_duplane<G_DUPLANE16>;
185+
def unmerge_duplane32 : unmerge_duplane<G_DUPLANE32>;
186+
def unmerge_duplane64 : unmerge_duplane<G_DUPLANE64>;
187+
def unmerge_duplanes : GICombineGroup<[unmerge_duplane8, unmerge_duplane16, unmerge_duplane32, unmerge_duplane64]>;
188+
175189
def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, fullrev,
176190
form_duplane, shuf_to_ins]>;
177191

@@ -325,7 +339,8 @@ def AArch64PostLegalizerLowering
325339
lower_vector_fcmp, form_truncstore,
326340
vector_sext_inreg_to_shift,
327341
unmerge_ext_to_unmerge, lower_mulv2s64,
328-
vector_unmerge_lowering, insertelt_nonconst]> {
342+
vector_unmerge_lowering, insertelt_nonconst,
343+
unmerge_duplanes]> {
329344
}
330345

331346
// Post-legalization combines which are primarily optimizations.

llvm/test/CodeGen/AArch64/arm64-dup.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -422,16 +422,10 @@ define <4 x i16> @test_build_illegal(<4 x i32> %in) {
422422
; SelectionDAGBuilder here. We then added a DUPLANE on top of that, preventing
423423
; the formation of an indexed-by-7 MLS.
424424
define <4 x i16> @test_high_splat(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
425-
; CHECK-SD-LABEL: test_high_splat:
426-
; CHECK-SD: // %bb.0: // %entry
427-
; CHECK-SD-NEXT: mls.4h v0, v1, v2[7]
428-
; CHECK-SD-NEXT: ret
429-
;
430-
; CHECK-GI-LABEL: test_high_splat:
431-
; CHECK-GI: // %bb.0: // %entry
432-
; CHECK-GI-NEXT: dup.8h v2, v2[7]
433-
; CHECK-GI-NEXT: mls.4h v0, v2, v1
434-
; CHECK-GI-NEXT: ret
425+
; CHECK-LABEL: test_high_splat:
426+
; CHECK: // %bb.0: // %entry
427+
; CHECK-NEXT: mls.4h v0, v1, v2[7]
428+
; CHECK-NEXT: ret
435429
entry:
436430
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
437431
%mul = mul <4 x i16> %shuffle, %b

0 commit comments

Comments
 (0)