Skip to content

Commit d06a574

Browse files
committed
[AArch64][GlobalISel] Prefer DUPLANE to REV
Some shuffles containing undefs can match multiple instructions, such as <3,u,u,u> being either a duplane or a rev. This changes the order that different shuffles are considered, so that duplane is preferred which is simpler and more likely to lead to further combines.
1 parent 3894bdc commit d06a574

File tree

8 files changed

+42
-57
lines changed

8 files changed

+42
-57
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,8 @@ def form_duplane : GICombineRule <
172172
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
173173
>;
174174

175-
def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, fullrev,
176-
form_duplane, shuf_to_ins]>;
175+
def shuffle_vector_lowering : GICombineGroup<[dup, form_duplane, rev, ext, zip,
176+
uzp, trn, fullrev, shuf_to_ins]>;
177177

178178
// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's
179179
def vector_unmerge_lowering : GICombineRule <

llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,8 @@ body: |
255255
; CHECK: liveins: $q0, $q1
256256
; CHECK-NEXT: {{ $}}
257257
; CHECK-NEXT: %v1:_(<8 x s16>) = COPY $q0
258-
; CHECK-NEXT: %shuf:_(<8 x s16>) = G_REV64 %v1
258+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
259+
; CHECK-NEXT: %shuf:_(<8 x s16>) = G_DUPLANE16 %v1, [[C]](s64)
259260
; CHECK-NEXT: $q0 = COPY %shuf(<8 x s16>)
260261
; CHECK-NEXT: RET_ReallyLR implicit $q0
261262
%v1:_(<8 x s16>) = COPY $q0
@@ -298,8 +299,8 @@ body: |
298299
; CHECK: liveins: $q0
299300
; CHECK-NEXT: {{ $}}
300301
; CHECK-NEXT: %v1:_(<2 x s64>) = COPY $q0
301-
; CHECK-NEXT: %v2:_(<2 x s64>) = G_IMPLICIT_DEF
302-
; CHECK-NEXT: %shuf:_(<2 x s64>) = G_TRN2 %v1, %v2
302+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
303+
; CHECK-NEXT: %shuf:_(<2 x s64>) = G_DUPLANE64 %v1, [[C]](s64)
303304
; CHECK-NEXT: $q0 = COPY %shuf(<2 x s64>)
304305
; CHECK-NEXT: RET_ReallyLR implicit $q0
305306
%v1:_(<2 x s64>) = COPY $q0

llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-rev.mir

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,11 @@ body: |
3838
; CHECK: liveins: $d0, $d1
3939
; CHECK-NEXT: {{ $}}
4040
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
41-
; CHECK-NEXT: [[REV64_:%[0-9]+]]:_(<2 x s32>) = G_REV64 [[COPY]]
42-
; CHECK-NEXT: $d0 = COPY [[REV64_]](<2 x s32>)
41+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
42+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
43+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[DEF]](<2 x s32>)
44+
; CHECK-NEXT: [[DUPLANE32_:%[0-9]+]]:_(<2 x s32>) = G_DUPLANE32 [[CONCAT_VECTORS]], [[C]](s64)
45+
; CHECK-NEXT: $d0 = COPY [[DUPLANE32_]](<2 x s32>)
4346
; CHECK-NEXT: RET_ReallyLR implicit $d0
4447
%0:_(<2 x s32>) = COPY $d0
4548
%1:_(<2 x s32>) = COPY $d1

llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ define i32 @addp_v4i32(<4 x i32> %a, <4 x i32> %b) {
139139
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
140140
; CHECK-GI-NEXT: mov d1, v0.d[1]
141141
; CHECK-GI-NEXT: addp v0.2s, v0.2s, v1.2s
142-
; CHECK-GI-NEXT: rev64 v1.2s, v0.2s
142+
; CHECK-GI-NEXT: dup v1.2s, v0.s[1]
143143
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
144144
; CHECK-GI-NEXT: fmov w0, s0
145145
; CHECK-GI-NEXT: ret

llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll

Lines changed: 18 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -677,23 +677,14 @@ entry:
677677
}
678678

679679
define i16 @test_vqrdmlahh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) {
680-
; CHECK-SD-LABEL: test_vqrdmlahh_lane_s16:
681-
; CHECK-SD: // %bb.0: // %entry
682-
; CHECK-SD-NEXT: fmov s1, w0
683-
; CHECK-SD-NEXT: fmov s2, w1
684-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
685-
; CHECK-SD-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[3]
686-
; CHECK-SD-NEXT: umov w0, v1.h[0]
687-
; CHECK-SD-NEXT: ret
688-
;
689-
; CHECK-GI-LABEL: test_vqrdmlahh_lane_s16:
690-
; CHECK-GI: // %bb.0: // %entry
691-
; CHECK-GI-NEXT: rev64 v0.4h, v0.4h
692-
; CHECK-GI-NEXT: fmov s1, w0
693-
; CHECK-GI-NEXT: fmov s2, w1
694-
; CHECK-GI-NEXT: sqrdmlah v1.4h, v2.4h, v0.4h
695-
; CHECK-GI-NEXT: umov w0, v1.h[0]
696-
; CHECK-GI-NEXT: ret
680+
; CHECK-LABEL: test_vqrdmlahh_lane_s16:
681+
; CHECK: // %bb.0: // %entry
682+
; CHECK-NEXT: fmov s1, w0
683+
; CHECK-NEXT: fmov s2, w1
684+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
685+
; CHECK-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[3]
686+
; CHECK-NEXT: umov w0, v1.h[0]
687+
; CHECK-NEXT: ret
697688
entry:
698689
%0 = insertelement <4 x i16> undef, i16 %a, i64 0
699690
%1 = insertelement <4 x i16> undef, i16 %b, i64 0
@@ -739,7 +730,7 @@ define i16 @test_vqrdmlahh_laneq_s16(i16 %a, i16 %b, <8 x i16> %c) {
739730
;
740731
; CHECK-GI-LABEL: test_vqrdmlahh_laneq_s16:
741732
; CHECK-GI: // %bb.0: // %entry
742-
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #14
733+
; CHECK-GI-NEXT: dup v0.8h, v0.h[7]
743734
; CHECK-GI-NEXT: fmov s1, w0
744735
; CHECK-GI-NEXT: fmov s2, w1
745736
; CHECK-GI-NEXT: sqrdmlah v1.4h, v2.4h, v0.4h
@@ -857,23 +848,14 @@ entry:
857848
}
858849

859850
define i16 @test_vqrdmlshh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) {
860-
; CHECK-SD-LABEL: test_vqrdmlshh_lane_s16:
861-
; CHECK-SD: // %bb.0: // %entry
862-
; CHECK-SD-NEXT: fmov s1, w0
863-
; CHECK-SD-NEXT: fmov s2, w1
864-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
865-
; CHECK-SD-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[3]
866-
; CHECK-SD-NEXT: umov w0, v1.h[0]
867-
; CHECK-SD-NEXT: ret
868-
;
869-
; CHECK-GI-LABEL: test_vqrdmlshh_lane_s16:
870-
; CHECK-GI: // %bb.0: // %entry
871-
; CHECK-GI-NEXT: rev64 v0.4h, v0.4h
872-
; CHECK-GI-NEXT: fmov s1, w0
873-
; CHECK-GI-NEXT: fmov s2, w1
874-
; CHECK-GI-NEXT: sqrdmlsh v1.4h, v2.4h, v0.4h
875-
; CHECK-GI-NEXT: umov w0, v1.h[0]
876-
; CHECK-GI-NEXT: ret
851+
; CHECK-LABEL: test_vqrdmlshh_lane_s16:
852+
; CHECK: // %bb.0: // %entry
853+
; CHECK-NEXT: fmov s1, w0
854+
; CHECK-NEXT: fmov s2, w1
855+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
856+
; CHECK-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[3]
857+
; CHECK-NEXT: umov w0, v1.h[0]
858+
; CHECK-NEXT: ret
877859
entry:
878860
%0 = insertelement <4 x i16> undef, i16 %a, i64 0
879861
%1 = insertelement <4 x i16> undef, i16 %b, i64 0
@@ -919,7 +901,7 @@ define i16 @test_vqrdmlshh_laneq_s16(i16 %a, i16 %b, <8 x i16> %c) {
919901
;
920902
; CHECK-GI-LABEL: test_vqrdmlshh_laneq_s16:
921903
; CHECK-GI: // %bb.0: // %entry
922-
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #14
904+
; CHECK-GI-NEXT: dup v0.8h, v0.h[7]
923905
; CHECK-GI-NEXT: fmov s1, w0
924906
; CHECK-GI-NEXT: fmov s2, w1
925907
; CHECK-GI-NEXT: sqrdmlsh v1.4h, v2.4h, v0.4h

llvm/test/CodeGen/AArch64/bitcast.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ define <4 x i16> @foo1(<2 x i32> %a) {
1515
; CHECK-GI-NEXT: mov w8, #58712 // =0xe558
1616
; CHECK-GI-NEXT: mov v1.s[0], w8
1717
; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s
18-
; CHECK-GI-NEXT: rev32 v0.4h, v0.4h
18+
; CHECK-GI-NEXT: dup v0.4h, v0.h[1]
1919
; CHECK-GI-NEXT: ret
2020
%1 = shufflevector <2 x i32> <i32 58712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
2121
; Can't optimize the following bitcast to scalar_to_vector.
@@ -35,7 +35,7 @@ define <4 x i16> @foo2(<2 x i32> %a) {
3535
; CHECK-GI-NEXT: mov w8, #712 // =0x2c8
3636
; CHECK-GI-NEXT: mov v1.s[0], w8
3737
; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s
38-
; CHECK-GI-NEXT: rev32 v0.4h, v0.4h
38+
; CHECK-GI-NEXT: dup v0.4h, v0.h[1]
3939
; CHECK-GI-NEXT: ret
4040
%1 = shufflevector <2 x i32> <i32 712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
4141
; Can't optimize the following bitcast to scalar_to_vector.

llvm/test/CodeGen/AArch64/dup.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -675,13 +675,12 @@ define <3 x i64> @duplane0_v3i64(<3 x i64> %b) {
675675
;
676676
; CHECK-GI-LABEL: duplane0_v3i64:
677677
; CHECK-GI: // %bb.0: // %entry
678-
; CHECK-GI-NEXT: fmov d2, d0
678+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
679679
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
680-
; CHECK-GI-NEXT: mov v2.d[1], v1.d[0]
681-
; CHECK-GI-NEXT: dup v0.2d, v2.d[0]
682-
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
680+
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
681+
; CHECK-GI-NEXT: dup v0.2d, v0.d[0]
683682
; CHECK-GI-NEXT: mov d1, v0.d[1]
684-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
683+
; CHECK-GI-NEXT: fmov d2, d0
685684
; CHECK-GI-NEXT: ret
686685
entry:
687686
%c = shufflevector <3 x i64> %b, <3 x i64> poison, <3 x i32> zeroinitializer
@@ -1517,13 +1516,12 @@ define <3 x double> @duplane0_v3double(<3 x double> %b) {
15171516
;
15181517
; CHECK-GI-LABEL: duplane0_v3double:
15191518
; CHECK-GI: // %bb.0: // %entry
1520-
; CHECK-GI-NEXT: fmov d2, d0
1519+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
15211520
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
1522-
; CHECK-GI-NEXT: mov v2.d[1], v1.d[0]
1523-
; CHECK-GI-NEXT: dup v0.2d, v2.d[0]
1524-
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
1521+
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
1522+
; CHECK-GI-NEXT: dup v0.2d, v0.d[0]
15251523
; CHECK-GI-NEXT: mov d1, v0.d[1]
1526-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
1524+
; CHECK-GI-NEXT: fmov d2, d0
15271525
; CHECK-GI-NEXT: ret
15281526
entry:
15291527
%c = shufflevector <3 x double> %b, <3 x double> poison, <3 x i32> zeroinitializer

llvm/test/CodeGen/AArch64/shufflevector.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -406,8 +406,9 @@ define <3 x ptr> @shufflevector_v3p0(<3 x ptr> %a, <3 x ptr> %b) {
406406
; CHECK-GI-NEXT: fmov x8, d5
407407
; CHECK-GI-NEXT: mov v1.d[0], x8
408408
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v2.16b, #8
409-
; CHECK-GI-NEXT: fmov x10, d1
409+
; CHECK-GI-NEXT: dup v1.2d, v1.d[0]
410410
; CHECK-GI-NEXT: mov d2, v0.d[1]
411+
; CHECK-GI-NEXT: fmov x10, d1
411412
; CHECK-GI-NEXT: fmov d1, d2
412413
; CHECK-GI-NEXT: fmov d2, x10
413414
; CHECK-GI-NEXT: ret

0 commit comments

Comments
 (0)