Skip to content

Commit 8930135

Browse files
committed
[AArch64][GlobalISel] Combine G_UNMERGE(G_DUPLANE16) -> G_DUPLANE16
We will generate G_UNMERGE(G_DUPLANE16) due to the legalization of shuffle vector splats with mismatching vector sizes. The G_DUPLANE intrinsics can handle different vector sizes (128bit and 64bit output, for example), and we can combine away the unmerge.
1 parent acd264d commit 8930135

File tree

5 files changed

+291
-669
lines changed

5 files changed

+291
-669
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,21 @@ def form_duplane : GICombineRule <
172172
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
173173
>;
174174

175+
// Clean up G_UNMERGE(G_DUPLANE16) -> G_DUPLANE16
176+
class unmerge_duplane<Instruction Op> : GICombineRule <
177+
(defs root:$root),
178+
(match (Op $a, $src, $c),
179+
(G_UNMERGE_VALUES $d1, $d2, $a):$root,
180+
[{ return MRI.getType(${d1}.getReg()).getSizeInBits() == 64; }]),
181+
(apply (GIReplaceReg $d2, $d1), (Op $d1, $src, $c))
182+
>;
183+
def unmerge_duplane8 : unmerge_duplane<G_DUPLANE8>;
184+
def unmerge_duplane16 : unmerge_duplane<G_DUPLANE16>;
185+
def unmerge_duplane32 : unmerge_duplane<G_DUPLANE32>;
186+
def unmerge_duplane64 : unmerge_duplane<G_DUPLANE64>;
187+
def unmerge_duplanes : GICombineGroup<[unmerge_duplane8, unmerge_duplane16,
188+
unmerge_duplane32, unmerge_duplane64]>;
189+
175190
def shuffle_vector_lowering : GICombineGroup<[dup, form_duplane, rev, ext, zip,
176191
uzp, trn, fullrev, shuf_to_ins]>;
177192

@@ -325,7 +340,8 @@ def AArch64PostLegalizerLowering
325340
lower_vector_fcmp, form_truncstore,
326341
vector_sext_inreg_to_shift,
327342
unmerge_ext_to_unmerge, lower_mulv2s64,
328-
vector_unmerge_lowering, insertelt_nonconst]> {
343+
vector_unmerge_lowering, insertelt_nonconst,
344+
unmerge_duplanes]> {
329345
}
330346

331347
// Post-legalization combines which are primarily optimizations.

llvm/test/CodeGen/AArch64/arm64-dup.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -401,16 +401,10 @@ define <4 x i16> @test_build_illegal(<4 x i32> %in) {
401401
; SelectionDAGBuilder here. We then added a DUPLANE on top of that, preventing
402402
; the formation of an indexed-by-7 MLS.
403403
define <4 x i16> @test_high_splat(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
404-
; CHECK-SD-LABEL: test_high_splat:
405-
; CHECK-SD: // %bb.0: // %entry
406-
; CHECK-SD-NEXT: mls.4h v0, v1, v2[7]
407-
; CHECK-SD-NEXT: ret
408-
;
409-
; CHECK-GI-LABEL: test_high_splat:
410-
; CHECK-GI: // %bb.0: // %entry
411-
; CHECK-GI-NEXT: dup.8h v2, v2[7]
412-
; CHECK-GI-NEXT: mls.4h v0, v2, v1
413-
; CHECK-GI-NEXT: ret
404+
; CHECK-LABEL: test_high_splat:
405+
; CHECK: // %bb.0: // %entry
406+
; CHECK-NEXT: mls.4h v0, v1, v2[7]
407+
; CHECK-NEXT: ret
414408
entry:
415409
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
416410
%mul = mul <4 x i16> %shuffle, %b

0 commit comments

Comments
 (0)