Skip to content

Commit 01c8cd6

Browse files
authored
[AArch64][GlobalISel] Full reverse shuffles. (#119083)
A full shuffle reverse needs to use EXT+REV64. This adds handling for more types than SDAG so long as the mask is isReverseMask to make the patterns simpler.
1 parent 6cbc64e commit 01c8cd6

File tree

4 files changed

+74
-78
lines changed

4 files changed

+74
-78
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,15 @@ def ext: GICombineRule <
131131
(apply [{ applyEXT(*${root}, ${matchinfo}); }])
132132
>;
133133

134+
def fullrev: GICombineRule <
135+
(defs root:$root, shuffle_matchdata:$matchinfo),
136+
(match (G_IMPLICIT_DEF $src2),
137+
(G_SHUFFLE_VECTOR $src, $src1, $src2, $mask):$root,
138+
[{ return ShuffleVectorInst::isReverseMask(${mask}.getShuffleMask(),
139+
${mask}.getShuffleMask().size()); }]),
140+
(apply [{ applyFullRev(*${root}, MRI); }])
141+
>;
142+
134143
def insertelt_nonconst: GICombineRule <
135144
(defs root:$root, shuffle_matchdata:$matchinfo),
136145
(match (wip_match_opcode G_INSERT_VECTOR_ELT):$root,
@@ -163,7 +172,7 @@ def form_duplane : GICombineRule <
163172
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
164173
>;
165174

166-
def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
175+
def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, fullrev,
167176
form_duplane, shuf_to_ins]>;
168177

169178
// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,19 @@ void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
405405
MI.eraseFromParent();
406406
}
407407

408+
void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
409+
Register Dst = MI.getOperand(0).getReg();
410+
Register Src = MI.getOperand(1).getReg();
411+
LLT DstTy = MRI.getType(Dst);
412+
assert(DstTy.getSizeInBits() == 128 &&
413+
"Expected 128bit vector in applyFullRev");
414+
MachineIRBuilder MIRBuilder(MI);
415+
auto Cst = MIRBuilder.buildConstant(LLT::scalar(32), 8);
416+
auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {DstTy}, {Src});
417+
MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});
418+
MI.eraseFromParent();
419+
}
420+
408421
bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
409422
assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
410423

llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -440,11 +440,10 @@ define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
440440
;
441441
; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
442442
; CHECK-GI: // %bb.0: // %entry
443-
; CHECK-GI-NEXT: adrp x8, .LCPI14_0
444-
; CHECK-GI-NEXT: sshll v2.8h, v0.8b, #0
443+
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
445444
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
446-
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI14_0]
447-
; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
445+
; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
446+
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
448447
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
449448
; CHECK-GI-NEXT: ret
450449
entry:
@@ -493,11 +492,10 @@ define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
493492
;
494493
; CHECK-GI-LABEL: shufzext_v8i8_v8i16:
495494
; CHECK-GI: // %bb.0: // %entry
496-
; CHECK-GI-NEXT: adrp x8, .LCPI16_0
497-
; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
495+
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
498496
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
499-
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI16_0]
500-
; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
497+
; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
498+
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
501499
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
502500
; CHECK-GI-NEXT: ret
503501
entry:

llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll

Lines changed: 45 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,11 @@ entry:
2323
}
2424

2525
define <4 x i32> @v4i32(<4 x i32> %a) {
26-
; CHECK-SD-LABEL: v4i32:
27-
; CHECK-SD: // %bb.0: // %entry
28-
; CHECK-SD-NEXT: rev64 v0.4s, v0.4s
29-
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
30-
; CHECK-SD-NEXT: ret
31-
;
32-
; CHECK-GI-LABEL: v4i32:
33-
; CHECK-GI: // %bb.0: // %entry
34-
; CHECK-GI-NEXT: adrp x8, .LCPI2_0
35-
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
36-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI2_0]
37-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
38-
; CHECK-GI-NEXT: ret
26+
; CHECK-LABEL: v4i32:
27+
; CHECK: // %bb.0: // %entry
28+
; CHECK-NEXT: rev64 v0.4s, v0.4s
29+
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
30+
; CHECK-NEXT: ret
3931
entry:
4032
%V128 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
4133
ret <4 x i32> %V128
@@ -52,19 +44,11 @@ entry:
5244
}
5345

5446
define <8 x i16> @v8i16(<8 x i16> %a) {
55-
; CHECK-SD-LABEL: v8i16:
56-
; CHECK-SD: // %bb.0: // %entry
57-
; CHECK-SD-NEXT: rev64 v0.8h, v0.8h
58-
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
59-
; CHECK-SD-NEXT: ret
60-
;
61-
; CHECK-GI-LABEL: v8i16:
62-
; CHECK-GI: // %bb.0: // %entry
63-
; CHECK-GI-NEXT: adrp x8, .LCPI4_0
64-
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
65-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
66-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
67-
; CHECK-GI-NEXT: ret
47+
; CHECK-LABEL: v8i16:
48+
; CHECK: // %bb.0: // %entry
49+
; CHECK-NEXT: rev64 v0.8h, v0.8h
50+
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
51+
; CHECK-NEXT: ret
6852
entry:
6953
%V128 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
7054
ret <8 x i16> %V128
@@ -93,6 +77,22 @@ entry:
9377
ret <8 x i16> %V128
9478
}
9579

80+
define <4 x i16> @v8i16_3(<8 x i16> %a) {
81+
; CHECK-SD-LABEL: v8i16_3:
82+
; CHECK-SD: // %bb.0: // %entry
83+
; CHECK-SD-NEXT: rev64 v0.4h, v0.4h
84+
; CHECK-SD-NEXT: ret
85+
;
86+
; CHECK-GI-LABEL: v8i16_3:
87+
; CHECK-GI: // %bb.0: // %entry
88+
; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
89+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
90+
; CHECK-GI-NEXT: ret
91+
entry:
92+
%V128 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
93+
ret <4 x i16> %V128
94+
}
95+
9696
define <4 x i16> @v4i16(<4 x i16> %a) {
9797
; CHECK-LABEL: v4i16:
9898
; CHECK: // %bb.0: // %entry
@@ -104,19 +104,11 @@ entry:
104104
}
105105

106106
define <16 x i8> @v16i8(<16 x i8> %a) {
107-
; CHECK-SD-LABEL: v16i8:
108-
; CHECK-SD: // %bb.0: // %entry
109-
; CHECK-SD-NEXT: rev64 v0.16b, v0.16b
110-
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
111-
; CHECK-SD-NEXT: ret
112-
;
113-
; CHECK-GI-LABEL: v16i8:
114-
; CHECK-GI: // %bb.0: // %entry
115-
; CHECK-GI-NEXT: adrp x8, .LCPI7_0
116-
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
117-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI7_0]
118-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
119-
; CHECK-GI-NEXT: ret
107+
; CHECK-LABEL: v16i8:
108+
; CHECK: // %bb.0: // %entry
109+
; CHECK-NEXT: rev64 v0.16b, v0.16b
110+
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
111+
; CHECK-NEXT: ret
120112
entry:
121113
%V128 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
122114
ret <16 x i8> %V128
@@ -125,18 +117,18 @@ entry:
125117
define <16 x i8> @v16i8_2(<8 x i8> %a, <8 x i8> %b) {
126118
; CHECK-SD-LABEL: v16i8_2:
127119
; CHECK-SD: // %bb.0: // %entry
128-
; CHECK-SD-NEXT: adrp x8, .LCPI8_0
120+
; CHECK-SD-NEXT: adrp x8, .LCPI9_0
129121
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
130-
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI8_0]
122+
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
131123
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
132124
; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
133125
; CHECK-SD-NEXT: ret
134126
;
135127
; CHECK-GI-LABEL: v16i8_2:
136128
; CHECK-GI: // %bb.0: // %entry
137-
; CHECK-GI-NEXT: adrp x8, .LCPI8_0
129+
; CHECK-GI-NEXT: adrp x8, .LCPI9_0
138130
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
139-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI8_0]
131+
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
140132
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
141133
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
142134
; CHECK-GI-NEXT: ret
@@ -166,19 +158,11 @@ entry:
166158
}
167159

168160
define <4 x float> @v4f32(<4 x float> %a) {
169-
; CHECK-SD-LABEL: v4f32:
170-
; CHECK-SD: // %bb.0: // %entry
171-
; CHECK-SD-NEXT: rev64 v0.4s, v0.4s
172-
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
173-
; CHECK-SD-NEXT: ret
174-
;
175-
; CHECK-GI-LABEL: v4f32:
176-
; CHECK-GI: // %bb.0: // %entry
177-
; CHECK-GI-NEXT: adrp x8, .LCPI11_0
178-
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
179-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI11_0]
180-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
181-
; CHECK-GI-NEXT: ret
161+
; CHECK-LABEL: v4f32:
162+
; CHECK: // %bb.0: // %entry
163+
; CHECK-NEXT: rev64 v0.4s, v0.4s
164+
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
165+
; CHECK-NEXT: ret
182166
entry:
183167
%V128 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
184168
ret <4 x float> %V128
@@ -195,19 +179,11 @@ entry:
195179
}
196180

197181
define <8 x half> @v8f16(<8 x half> %a) {
198-
; CHECK-SD-LABEL: v8f16:
199-
; CHECK-SD: // %bb.0: // %entry
200-
; CHECK-SD-NEXT: rev64 v0.8h, v0.8h
201-
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
202-
; CHECK-SD-NEXT: ret
203-
;
204-
; CHECK-GI-LABEL: v8f16:
205-
; CHECK-GI: // %bb.0: // %entry
206-
; CHECK-GI-NEXT: adrp x8, .LCPI13_0
207-
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
208-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI13_0]
209-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
210-
; CHECK-GI-NEXT: ret
182+
; CHECK-LABEL: v8f16:
183+
; CHECK: // %bb.0: // %entry
184+
; CHECK-NEXT: rev64 v0.8h, v0.8h
185+
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
186+
; CHECK-NEXT: ret
211187
entry:
212188
%V128 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
213189
ret <8 x half> %V128

0 commit comments

Comments
 (0)