Skip to content

Commit f3b9b94

Browse files
committed
[AArch64][GISel] Expand arm64-dup and arm64-rev tests for global isel. NFC
1 parent 76cfdbe commit f3b9b94

File tree

2 files changed

+508
-538
lines changed

2 files changed

+508
-538
lines changed

llvm/test/CodeGen/AArch64/arm64-dup.ll

Lines changed: 155 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
2+
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4+
5+
; CHECK-GI: warning: Instruction selection used fallback path for v_shuffledup8
6+
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v_shuffledup16
7+
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vduplane8
8+
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vduplane16
9+
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_perfectshuffle_dupext_v4i16
10+
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_perfectshuffle_dupext_v4f16
311

412
define <8 x i8> @v_dup8(i8 %A) nounwind {
513
; CHECK-LABEL: v_dup8:
@@ -365,10 +373,19 @@ define <2 x i64> @h(i64 %a, i64 %b) nounwind readnone {
365373
;
366374
; *However*, it is a dup vD.4h, vN.h[2*idx].
367375
define <4 x i16> @test_build_illegal(<4 x i32> %in) {
368-
; CHECK-LABEL: test_build_illegal:
369-
; CHECK: // %bb.0:
370-
; CHECK-NEXT: dup.4h v0, v0[6]
371-
; CHECK-NEXT: ret
376+
; CHECK-SD-LABEL: test_build_illegal:
377+
; CHECK-SD: // %bb.0:
378+
; CHECK-SD-NEXT: dup.4h v0, v0[6]
379+
; CHECK-SD-NEXT: ret
380+
;
381+
; CHECK-GI-LABEL: test_build_illegal:
382+
; CHECK-GI: // %bb.0:
383+
; CHECK-GI-NEXT: mov.h v1[1], v0[0]
384+
; CHECK-GI-NEXT: mov s0, v0[3]
385+
; CHECK-GI-NEXT: mov.h v1[2], v0[0]
386+
; CHECK-GI-NEXT: mov.h v1[3], v0[0]
387+
; CHECK-GI-NEXT: fmov d0, d1
388+
; CHECK-GI-NEXT: ret
372389
%val = extractelement <4 x i32> %in, i32 3
373390
%smallval = trunc i32 %val to i16
374391
%vec = insertelement <4x i16> undef, i16 %smallval, i32 3
@@ -380,10 +397,16 @@ define <4 x i16> @test_build_illegal(<4 x i32> %in) {
380397
; SelectionDAGBuilder here. We then added a DUPLANE on top of that, preventing
381398
; the formation of an indexed-by-7 MLS.
382399
define <4 x i16> @test_high_splat(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
383-
; CHECK-LABEL: test_high_splat:
384-
; CHECK: // %bb.0: // %entry
385-
; CHECK-NEXT: mls.4h v0, v1, v2[7]
386-
; CHECK-NEXT: ret
400+
; CHECK-SD-LABEL: test_high_splat:
401+
; CHECK-SD: // %bb.0: // %entry
402+
; CHECK-SD-NEXT: mls.4h v0, v1, v2[7]
403+
; CHECK-SD-NEXT: ret
404+
;
405+
; CHECK-GI-LABEL: test_high_splat:
406+
; CHECK-GI: // %bb.0: // %entry
407+
; CHECK-GI-NEXT: dup.8h v2, v2[7]
408+
; CHECK-GI-NEXT: mls.4h v0, v2, v1
409+
; CHECK-GI-NEXT: ret
387410
entry:
388411
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
389412
%mul = mul <4 x i16> %shuffle, %b
@@ -418,34 +441,65 @@ define <4 x half> @test_perfectshuffle_dupext_v4f16(<4 x half> %a, <4 x half> %b
418441
}
419442

420443
define <4 x i32> @test_perfectshuffle_dupext_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
421-
; CHECK-LABEL: test_perfectshuffle_dupext_v4i32:
422-
; CHECK: // %bb.0:
423-
; CHECK-NEXT: trn1.4s v0, v0, v0
424-
; CHECK-NEXT: mov.d v0[1], v1[0]
425-
; CHECK-NEXT: ret
444+
; CHECK-SD-LABEL: test_perfectshuffle_dupext_v4i32:
445+
; CHECK-SD: // %bb.0:
446+
; CHECK-SD-NEXT: trn1.4s v0, v0, v0
447+
; CHECK-SD-NEXT: mov.d v0[1], v1[0]
448+
; CHECK-SD-NEXT: ret
449+
;
450+
; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4i32:
451+
; CHECK-GI: // %bb.0:
452+
; CHECK-GI-NEXT: adrp x8, .LCPI35_0
453+
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
454+
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
455+
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0]
456+
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
457+
; CHECK-GI-NEXT: ret
426458
%r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
427459
ret <4 x i32> %r
428460
}
429461

430462
define <4 x float> @test_perfectshuffle_dupext_v4f32(<4 x float> %a, <4 x float> %b) nounwind {
431-
; CHECK-LABEL: test_perfectshuffle_dupext_v4f32:
432-
; CHECK: // %bb.0:
433-
; CHECK-NEXT: trn1.4s v0, v0, v0
434-
; CHECK-NEXT: mov.d v0[1], v1[0]
435-
; CHECK-NEXT: ret
463+
; CHECK-SD-LABEL: test_perfectshuffle_dupext_v4f32:
464+
; CHECK-SD: // %bb.0:
465+
; CHECK-SD-NEXT: trn1.4s v0, v0, v0
466+
; CHECK-SD-NEXT: mov.d v0[1], v1[0]
467+
; CHECK-SD-NEXT: ret
468+
;
469+
; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4f32:
470+
; CHECK-GI: // %bb.0:
471+
; CHECK-GI-NEXT: adrp x8, .LCPI36_0
472+
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
473+
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
474+
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
475+
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
476+
; CHECK-GI-NEXT: ret
436477
%r = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
437478
ret <4 x float> %r
438479
}
439480

440481
define void @disguised_dup(<4 x float> %x, ptr %p1, ptr %p2) {
441-
; CHECK-LABEL: disguised_dup:
442-
; CHECK: // %bb.0:
443-
; CHECK-NEXT: ext.16b v1, v0, v0, #4
444-
; CHECK-NEXT: mov.s v1[2], v0[0]
445-
; CHECK-NEXT: dup.4s v0, v0[0]
446-
; CHECK-NEXT: str q1, [x0]
447-
; CHECK-NEXT: str q0, [x1]
448-
; CHECK-NEXT: ret
482+
; CHECK-SD-LABEL: disguised_dup:
483+
; CHECK-SD: // %bb.0:
484+
; CHECK-SD-NEXT: ext.16b v1, v0, v0, #4
485+
; CHECK-SD-NEXT: mov.s v1[2], v0[0]
486+
; CHECK-SD-NEXT: dup.4s v0, v0[0]
487+
; CHECK-SD-NEXT: str q1, [x0]
488+
; CHECK-SD-NEXT: str q0, [x1]
489+
; CHECK-SD-NEXT: ret
490+
;
491+
; CHECK-GI-LABEL: disguised_dup:
492+
; CHECK-GI: // %bb.0:
493+
; CHECK-GI-NEXT: adrp x8, .LCPI37_1
494+
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
495+
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_1]
496+
; CHECK-GI-NEXT: adrp x8, .LCPI37_0
497+
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
498+
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0]
499+
; CHECK-GI-NEXT: tbl.16b v2, { v0, v1 }, v2
500+
; CHECK-GI-NEXT: str q0, [x0]
501+
; CHECK-GI-NEXT: str q2, [x1]
502+
; CHECK-GI-NEXT: ret
449503
%shuf = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 0>
450504
%dup = shufflevector <4 x float> %shuf, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
451505
store <4 x float> %shuf, ptr %p1, align 8
@@ -454,42 +508,71 @@ define void @disguised_dup(<4 x float> %x, ptr %p1, ptr %p2) {
454508
}
455509

456510
define <2 x i32> @dup_const2(<2 x i32> %A) nounwind {
457-
; CHECK-LABEL: dup_const2:
458-
; CHECK: // %bb.0:
459-
; CHECK-NEXT: mov w8, #32770
460-
; CHECK-NEXT: movk w8, #128, lsl #16
461-
; CHECK-NEXT: dup.2s v1, w8
462-
; CHECK-NEXT: add.2s v0, v0, v1
463-
; CHECK-NEXT: ret
511+
; CHECK-SD-LABEL: dup_const2:
512+
; CHECK-SD: // %bb.0:
513+
; CHECK-SD-NEXT: mov w8, #32770 // =0x8002
514+
; CHECK-SD-NEXT: movk w8, #128, lsl #16
515+
; CHECK-SD-NEXT: dup.2s v1, w8
516+
; CHECK-SD-NEXT: add.2s v0, v0, v1
517+
; CHECK-SD-NEXT: ret
518+
;
519+
; CHECK-GI-LABEL: dup_const2:
520+
; CHECK-GI: // %bb.0:
521+
; CHECK-GI-NEXT: adrp x8, .LCPI38_0
522+
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI38_0]
523+
; CHECK-GI-NEXT: add.2s v0, v0, v1
524+
; CHECK-GI-NEXT: ret
464525
%tmp2 = add <2 x i32> %A, <i32 8421378, i32 8421378>
465526
ret <2 x i32> %tmp2
466527
}
467528

468529
define <2 x i32> @dup_const4_ext(<4 x i32> %A) nounwind {
469-
; CHECK-LABEL: dup_const4_ext:
470-
; CHECK: // %bb.0:
471-
; CHECK-NEXT: mov w8, #32769
472-
; CHECK-NEXT: movk w8, #128, lsl #16
473-
; CHECK-NEXT: dup.2s v1, w8
474-
; CHECK-NEXT: add.2s v0, v0, v1
475-
; CHECK-NEXT: ret
530+
; CHECK-SD-LABEL: dup_const4_ext:
531+
; CHECK-SD: // %bb.0:
532+
; CHECK-SD-NEXT: mov w8, #32769 // =0x8001
533+
; CHECK-SD-NEXT: movk w8, #128, lsl #16
534+
; CHECK-SD-NEXT: dup.2s v1, w8
535+
; CHECK-SD-NEXT: add.2s v0, v0, v1
536+
; CHECK-SD-NEXT: ret
537+
;
538+
; CHECK-GI-LABEL: dup_const4_ext:
539+
; CHECK-GI: // %bb.0:
540+
; CHECK-GI-NEXT: adrp x8, .LCPI39_0
541+
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI39_0]
542+
; CHECK-GI-NEXT: add.4s v0, v0, v1
543+
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #0
544+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
545+
; CHECK-GI-NEXT: ret
476546
%tmp1 = add <4 x i32> %A, <i32 8421377, i32 8421377, i32 8421377, i32 8421377>
477547
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
478548
ret <2 x i32> %tmp2
479549
}
480550

481551
define <4 x i32> @dup_const24(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C) nounwind {
482-
; CHECK-LABEL: dup_const24:
483-
; CHECK: // %bb.0:
484-
; CHECK-NEXT: mov w8, #32768
485-
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
486-
; CHECK-NEXT: movk w8, #128, lsl #16
487-
; CHECK-NEXT: dup.4s v3, w8
488-
; CHECK-NEXT: add.2s v0, v0, v3
489-
; CHECK-NEXT: mov.d v0[1], v1[0]
490-
; CHECK-NEXT: add.4s v1, v2, v3
491-
; CHECK-NEXT: eor.16b v0, v1, v0
492-
; CHECK-NEXT: ret
552+
; CHECK-SD-LABEL: dup_const24:
553+
; CHECK-SD: // %bb.0:
554+
; CHECK-SD-NEXT: mov w8, #32768 // =0x8000
555+
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
556+
; CHECK-SD-NEXT: movk w8, #128, lsl #16
557+
; CHECK-SD-NEXT: dup.4s v3, w8
558+
; CHECK-SD-NEXT: add.2s v0, v0, v3
559+
; CHECK-SD-NEXT: mov.d v0[1], v1[0]
560+
; CHECK-SD-NEXT: add.4s v1, v2, v3
561+
; CHECK-SD-NEXT: eor.16b v0, v1, v0
562+
; CHECK-SD-NEXT: ret
563+
;
564+
; CHECK-GI-LABEL: dup_const24:
565+
; CHECK-GI: // %bb.0:
566+
; CHECK-GI-NEXT: adrp x8, .LCPI40_1
567+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
568+
; CHECK-GI-NEXT: ldr d3, [x8, :lo12:.LCPI40_1]
569+
; CHECK-GI-NEXT: adrp x8, .LCPI40_0
570+
; CHECK-GI-NEXT: add.2s v0, v0, v3
571+
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI40_0]
572+
; CHECK-GI-NEXT: mov.d v0[1], v1[0]
573+
; CHECK-GI-NEXT: add.4s v1, v2, v3
574+
; CHECK-GI-NEXT: eor.16b v0, v1, v0
575+
; CHECK-GI-NEXT: ret
493576
%tmp1 = add <2 x i32> %A, <i32 8421376, i32 8421376>
494577
%tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
495578
%tmp3 = add <4 x i32> %C, <i32 8421376, i32 8421376, i32 8421376, i32 8421376>
@@ -498,10 +581,16 @@ define <4 x i32> @dup_const24(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C) nounwind
498581
}
499582

500583
define <8 x i16> @bitcast_i64_v8i16(i64 %a) {
501-
; CHECK-LABEL: bitcast_i64_v8i16:
502-
; CHECK: // %bb.0:
503-
; CHECK-NEXT: dup.8h v0, w0
504-
; CHECK-NEXT: ret
584+
; CHECK-SD-LABEL: bitcast_i64_v8i16:
585+
; CHECK-SD: // %bb.0:
586+
; CHECK-SD-NEXT: dup.8h v0, w0
587+
; CHECK-SD-NEXT: ret
588+
;
589+
; CHECK-GI-LABEL: bitcast_i64_v8i16:
590+
; CHECK-GI: // %bb.0:
591+
; CHECK-GI-NEXT: fmov d0, x0
592+
; CHECK-GI-NEXT: dup.8h v0, v0[0]
593+
; CHECK-GI-NEXT: ret
505594
%b = bitcast i64 %a to <4 x i16>
506595
%r = shufflevector <4 x i16> %b, <4 x i16> poison, <8 x i32> zeroinitializer
507596
ret <8 x i16> %r
@@ -541,11 +630,16 @@ define <8 x half> @bitcast_i64_v8f16(i64 %a) {
541630
}
542631

543632
define <2 x i64> @bitcast_i64_v2f64(i64 %a) {
544-
; CHECK-LABEL: bitcast_i64_v2f64:
545-
; CHECK: // %bb.0:
546-
; CHECK-NEXT: fmov d0, x0
547-
; CHECK-NEXT: dup.2d v0, v0[0]
548-
; CHECK-NEXT: ret
633+
; CHECK-SD-LABEL: bitcast_i64_v2f64:
634+
; CHECK-SD: // %bb.0:
635+
; CHECK-SD-NEXT: fmov d0, x0
636+
; CHECK-SD-NEXT: dup.2d v0, v0[0]
637+
; CHECK-SD-NEXT: ret
638+
;
639+
; CHECK-GI-LABEL: bitcast_i64_v2f64:
640+
; CHECK-GI: // %bb.0:
641+
; CHECK-GI-NEXT: dup.2d v0, x0
642+
; CHECK-GI-NEXT: ret
549643
%b = bitcast i64 %a to <1 x i64>
550644
%r = shufflevector <1 x i64> %b, <1 x i64> poison, <2 x i32> zeroinitializer
551645
ret <2 x i64> %r

0 commit comments

Comments
 (0)