Skip to content

Commit a7cfff8

Browse files
authored
[AArch64][GlobalISel] Lower Shuffle Vector to REV (#79591)
Add lowering for i16 and i32 vectors for Shuffle Vector instructions with REV mask
1 parent 2c552d3 commit a7cfff8

File tree

2 files changed

+49
-104
lines changed

2 files changed

+49
-104
lines changed

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -242,15 +242,22 @@ bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
242242

243243
unsigned NumElts = Ty.getNumElements();
244244

245-
// Try to produce G_REV64
246-
if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) {
247-
MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src});
248-
return true;
245+
// Try to produce a G_REV instruction
246+
for (unsigned LaneSize : {64U, 32U, 16U}) {
247+
if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) {
248+
unsigned Opcode;
249+
if (LaneSize == 64U)
250+
Opcode = AArch64::G_REV64;
251+
else if (LaneSize == 32U)
252+
Opcode = AArch64::G_REV32;
253+
else
254+
Opcode = AArch64::G_REV16;
255+
256+
MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});
257+
return true;
258+
}
249259
}
250260

251-
// TODO: Produce G_REV32 and G_REV16 once we have proper legalization support.
252-
// This should be identical to above, but with a constant 32 and constant
253-
// 16.
254261
return false;
255262
}
256263

llvm/test/CodeGen/AArch64/arm64-rev.ll

Lines changed: 35 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -296,120 +296,66 @@ define <4 x float> @test_vrev64Qf(ptr %A) nounwind {
296296
}
297297

298298
define <8 x i8> @test_vrev32D8(ptr %A) nounwind {
299-
; CHECK-SD-LABEL: test_vrev32D8:
300-
; CHECK-SD: // %bb.0:
301-
; CHECK-SD-NEXT: ldr d0, [x0]
302-
; CHECK-SD-NEXT: rev32.8b v0, v0
303-
; CHECK-SD-NEXT: ret
304-
;
305-
; CHECK-GI-LABEL: test_vrev32D8:
306-
; CHECK-GI: // %bb.0:
307-
; CHECK-GI-NEXT: ldr d0, [x0]
308-
; CHECK-GI-NEXT: adrp x8, .LCPI19_0
309-
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI19_0]
310-
; CHECK-GI-NEXT: mov.d v0[1], v0[0]
311-
; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
312-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
313-
; CHECK-GI-NEXT: ret
299+
; CHECK-LABEL: test_vrev32D8:
300+
; CHECK: // %bb.0:
301+
; CHECK-NEXT: ldr d0, [x0]
302+
; CHECK-NEXT: rev32.8b v0, v0
303+
; CHECK-NEXT: ret
314304
%tmp1 = load <8 x i8>, ptr %A
315305
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
316306
ret <8 x i8> %tmp2
317307
}
318308

319309
define <4 x i16> @test_vrev32D16(ptr %A) nounwind {
320-
; CHECK-SD-LABEL: test_vrev32D16:
321-
; CHECK-SD: // %bb.0:
322-
; CHECK-SD-NEXT: ldr d0, [x0]
323-
; CHECK-SD-NEXT: rev32.4h v0, v0
324-
; CHECK-SD-NEXT: ret
325-
;
326-
; CHECK-GI-LABEL: test_vrev32D16:
327-
; CHECK-GI: // %bb.0:
328-
; CHECK-GI-NEXT: ldr d0, [x0]
329-
; CHECK-GI-NEXT: adrp x8, .LCPI20_0
330-
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI20_0]
331-
; CHECK-GI-NEXT: mov.d v0[1], v0[0]
332-
; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
333-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
334-
; CHECK-GI-NEXT: ret
310+
; CHECK-LABEL: test_vrev32D16:
311+
; CHECK: // %bb.0:
312+
; CHECK-NEXT: ldr d0, [x0]
313+
; CHECK-NEXT: rev32.4h v0, v0
314+
; CHECK-NEXT: ret
335315
%tmp1 = load <4 x i16>, ptr %A
336316
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
337317
ret <4 x i16> %tmp2
338318
}
339319

340320
define <16 x i8> @test_vrev32Q8(ptr %A) nounwind {
341-
; CHECK-SD-LABEL: test_vrev32Q8:
342-
; CHECK-SD: // %bb.0:
343-
; CHECK-SD-NEXT: ldr q0, [x0]
344-
; CHECK-SD-NEXT: rev32.16b v0, v0
345-
; CHECK-SD-NEXT: ret
346-
;
347-
; CHECK-GI-LABEL: test_vrev32Q8:
348-
; CHECK-GI: // %bb.0:
349-
; CHECK-GI-NEXT: adrp x8, .LCPI21_0
350-
; CHECK-GI-NEXT: ldr q0, [x0]
351-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI21_0]
352-
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
353-
; CHECK-GI-NEXT: ret
321+
; CHECK-LABEL: test_vrev32Q8:
322+
; CHECK: // %bb.0:
323+
; CHECK-NEXT: ldr q0, [x0]
324+
; CHECK-NEXT: rev32.16b v0, v0
325+
; CHECK-NEXT: ret
354326
%tmp1 = load <16 x i8>, ptr %A
355327
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
356328
ret <16 x i8> %tmp2
357329
}
358330

359331
define <8 x i16> @test_vrev32Q16(ptr %A) nounwind {
360-
; CHECK-SD-LABEL: test_vrev32Q16:
361-
; CHECK-SD: // %bb.0:
362-
; CHECK-SD-NEXT: ldr q0, [x0]
363-
; CHECK-SD-NEXT: rev32.8h v0, v0
364-
; CHECK-SD-NEXT: ret
365-
;
366-
; CHECK-GI-LABEL: test_vrev32Q16:
367-
; CHECK-GI: // %bb.0:
368-
; CHECK-GI-NEXT: adrp x8, .LCPI22_0
369-
; CHECK-GI-NEXT: ldr q0, [x0]
370-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI22_0]
371-
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
372-
; CHECK-GI-NEXT: ret
332+
; CHECK-LABEL: test_vrev32Q16:
333+
; CHECK: // %bb.0:
334+
; CHECK-NEXT: ldr q0, [x0]
335+
; CHECK-NEXT: rev32.8h v0, v0
336+
; CHECK-NEXT: ret
373337
%tmp1 = load <8 x i16>, ptr %A
374338
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
375339
ret <8 x i16> %tmp2
376340
}
377341

378342
define <8 x i8> @test_vrev16D8(ptr %A) nounwind {
379-
; CHECK-SD-LABEL: test_vrev16D8:
380-
; CHECK-SD: // %bb.0:
381-
; CHECK-SD-NEXT: ldr d0, [x0]
382-
; CHECK-SD-NEXT: rev16.8b v0, v0
383-
; CHECK-SD-NEXT: ret
384-
;
385-
; CHECK-GI-LABEL: test_vrev16D8:
386-
; CHECK-GI: // %bb.0:
387-
; CHECK-GI-NEXT: ldr d0, [x0]
388-
; CHECK-GI-NEXT: adrp x8, .LCPI23_0
389-
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI23_0]
390-
; CHECK-GI-NEXT: mov.d v0[1], v0[0]
391-
; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
392-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
393-
; CHECK-GI-NEXT: ret
343+
; CHECK-LABEL: test_vrev16D8:
344+
; CHECK: // %bb.0:
345+
; CHECK-NEXT: ldr d0, [x0]
346+
; CHECK-NEXT: rev16.8b v0, v0
347+
; CHECK-NEXT: ret
394348
%tmp1 = load <8 x i8>, ptr %A
395349
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
396350
ret <8 x i8> %tmp2
397351
}
398352

399353
define <16 x i8> @test_vrev16Q8(ptr %A) nounwind {
400-
; CHECK-SD-LABEL: test_vrev16Q8:
401-
; CHECK-SD: // %bb.0:
402-
; CHECK-SD-NEXT: ldr q0, [x0]
403-
; CHECK-SD-NEXT: rev16.16b v0, v0
404-
; CHECK-SD-NEXT: ret
405-
;
406-
; CHECK-GI-LABEL: test_vrev16Q8:
407-
; CHECK-GI: // %bb.0:
408-
; CHECK-GI-NEXT: adrp x8, .LCPI24_0
409-
; CHECK-GI-NEXT: ldr q0, [x0]
410-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI24_0]
411-
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
412-
; CHECK-GI-NEXT: ret
354+
; CHECK-LABEL: test_vrev16Q8:
355+
; CHECK: // %bb.0:
356+
; CHECK-NEXT: ldr q0, [x0]
357+
; CHECK-NEXT: rev16.16b v0, v0
358+
; CHECK-NEXT: ret
413359
%tmp1 = load <16 x i8>, ptr %A
414360
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
415361
ret <16 x i8> %tmp2
@@ -429,19 +375,11 @@ define <8 x i8> @test_vrev64D8_undef(ptr %A) nounwind {
429375
}
430376

431377
define <8 x i16> @test_vrev32Q16_undef(ptr %A) nounwind {
432-
; CHECK-SD-LABEL: test_vrev32Q16_undef:
433-
; CHECK-SD: // %bb.0:
434-
; CHECK-SD-NEXT: ldr q0, [x0]
435-
; CHECK-SD-NEXT: rev32.8h v0, v0
436-
; CHECK-SD-NEXT: ret
437-
;
438-
; CHECK-GI-LABEL: test_vrev32Q16_undef:
439-
; CHECK-GI: // %bb.0:
440-
; CHECK-GI-NEXT: adrp x8, .LCPI26_0
441-
; CHECK-GI-NEXT: ldr q0, [x0]
442-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI26_0]
443-
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
444-
; CHECK-GI-NEXT: ret
378+
; CHECK-LABEL: test_vrev32Q16_undef:
379+
; CHECK: // %bb.0:
380+
; CHECK-NEXT: ldr q0, [x0]
381+
; CHECK-NEXT: rev32.8h v0, v0
382+
; CHECK-NEXT: ret
445383
%tmp1 = load <8 x i16>, ptr %A
446384
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
447385
ret <8 x i16> %tmp2

0 commit comments

Comments
 (0)