1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
2
+ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3
+ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
+
5
+ ; CHECK-GI: warning: Instruction selection used fallback path for v_shuffledup8
6
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v_shuffledup16
7
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vduplane8
8
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vduplane16
9
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_perfectshuffle_dupext_v4i16
10
+ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_perfectshuffle_dupext_v4f16
3
11
4
12
define <8 x i8 > @v_dup8 (i8 %A ) nounwind {
5
13
; CHECK-LABEL: v_dup8:
@@ -365,10 +373,19 @@ define <2 x i64> @h(i64 %a, i64 %b) nounwind readnone {
365
373
;
366
374
; *However*, it is a dup vD.4h, vN.h[2*idx].
367
375
define <4 x i16 > @test_build_illegal (<4 x i32 > %in ) {
368
- ; CHECK-LABEL: test_build_illegal:
369
- ; CHECK: // %bb.0:
370
- ; CHECK-NEXT: dup.4h v0, v0[6]
371
- ; CHECK-NEXT: ret
376
+ ; CHECK-SD-LABEL: test_build_illegal:
377
+ ; CHECK-SD: // %bb.0:
378
+ ; CHECK-SD-NEXT: dup.4h v0, v0[6]
379
+ ; CHECK-SD-NEXT: ret
380
+ ;
381
+ ; CHECK-GI-LABEL: test_build_illegal:
382
+ ; CHECK-GI: // %bb.0:
383
+ ; CHECK-GI-NEXT: mov.h v1[1], v0[0]
384
+ ; CHECK-GI-NEXT: mov s0, v0[3]
385
+ ; CHECK-GI-NEXT: mov.h v1[2], v0[0]
386
+ ; CHECK-GI-NEXT: mov.h v1[3], v0[0]
387
+ ; CHECK-GI-NEXT: fmov d0, d1
388
+ ; CHECK-GI-NEXT: ret
372
389
%val = extractelement <4 x i32 > %in , i32 3
373
390
%smallval = trunc i32 %val to i16
374
391
%vec = insertelement <4x i16 > undef , i16 %smallval , i32 3
@@ -380,10 +397,16 @@ define <4 x i16> @test_build_illegal(<4 x i32> %in) {
380
397
; SelectionDAGBuilder here. We then added a DUPLANE on top of that, preventing
381
398
; the formation of an indexed-by-7 MLS.
382
399
define <4 x i16 > @test_high_splat (<4 x i16 > %a , <4 x i16 > %b , <8 x i16 > %v ) #0 {
383
- ; CHECK-LABEL: test_high_splat:
384
- ; CHECK: // %bb.0: // %entry
385
- ; CHECK-NEXT: mls.4h v0, v1, v2[7]
386
- ; CHECK-NEXT: ret
400
+ ; CHECK-SD-LABEL: test_high_splat:
401
+ ; CHECK-SD: // %bb.0: // %entry
402
+ ; CHECK-SD-NEXT: mls.4h v0, v1, v2[7]
403
+ ; CHECK-SD-NEXT: ret
404
+ ;
405
+ ; CHECK-GI-LABEL: test_high_splat:
406
+ ; CHECK-GI: // %bb.0: // %entry
407
+ ; CHECK-GI-NEXT: dup.8h v2, v2[7]
408
+ ; CHECK-GI-NEXT: mls.4h v0, v2, v1
409
+ ; CHECK-GI-NEXT: ret
387
410
entry:
388
411
%shuffle = shufflevector <8 x i16 > %v , <8 x i16 > undef , <4 x i32 > <i32 7 , i32 7 , i32 7 , i32 7 >
389
412
%mul = mul <4 x i16 > %shuffle , %b
@@ -418,34 +441,65 @@ define <4 x half> @test_perfectshuffle_dupext_v4f16(<4 x half> %a, <4 x half> %b
418
441
}
419
442
420
443
define <4 x i32 > @test_perfectshuffle_dupext_v4i32 (<4 x i32 > %a , <4 x i32 > %b ) nounwind {
421
- ; CHECK-LABEL: test_perfectshuffle_dupext_v4i32:
422
- ; CHECK: // %bb.0:
423
- ; CHECK-NEXT: trn1.4s v0, v0, v0
424
- ; CHECK-NEXT: mov.d v0[1], v1[0]
425
- ; CHECK-NEXT: ret
444
+ ; CHECK-SD-LABEL: test_perfectshuffle_dupext_v4i32:
445
+ ; CHECK-SD: // %bb.0:
446
+ ; CHECK-SD-NEXT: trn1.4s v0, v0, v0
447
+ ; CHECK-SD-NEXT: mov.d v0[1], v1[0]
448
+ ; CHECK-SD-NEXT: ret
449
+ ;
450
+ ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4i32:
451
+ ; CHECK-GI: // %bb.0:
452
+ ; CHECK-GI-NEXT: adrp x8, .LCPI35_0
453
+ ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
454
+ ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
455
+ ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0]
456
+ ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
457
+ ; CHECK-GI-NEXT: ret
426
458
%r = shufflevector <4 x i32 > %a , <4 x i32 > %b , <4 x i32 > <i32 0 , i32 0 , i32 4 , i32 5 >
427
459
ret <4 x i32 > %r
428
460
}
429
461
430
462
define <4 x float > @test_perfectshuffle_dupext_v4f32 (<4 x float > %a , <4 x float > %b ) nounwind {
431
- ; CHECK-LABEL: test_perfectshuffle_dupext_v4f32:
432
- ; CHECK: // %bb.0:
433
- ; CHECK-NEXT: trn1.4s v0, v0, v0
434
- ; CHECK-NEXT: mov.d v0[1], v1[0]
435
- ; CHECK-NEXT: ret
463
+ ; CHECK-SD-LABEL: test_perfectshuffle_dupext_v4f32:
464
+ ; CHECK-SD: // %bb.0:
465
+ ; CHECK-SD-NEXT: trn1.4s v0, v0, v0
466
+ ; CHECK-SD-NEXT: mov.d v0[1], v1[0]
467
+ ; CHECK-SD-NEXT: ret
468
+ ;
469
+ ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4f32:
470
+ ; CHECK-GI: // %bb.0:
471
+ ; CHECK-GI-NEXT: adrp x8, .LCPI36_0
472
+ ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
473
+ ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
474
+ ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
475
+ ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
476
+ ; CHECK-GI-NEXT: ret
436
477
%r = shufflevector <4 x float > %a , <4 x float > %b , <4 x i32 > <i32 0 , i32 0 , i32 4 , i32 5 >
437
478
ret <4 x float > %r
438
479
}
439
480
440
481
define void @disguised_dup (<4 x float > %x , ptr %p1 , ptr %p2 ) {
441
- ; CHECK-LABEL: disguised_dup:
442
- ; CHECK: // %bb.0:
443
- ; CHECK-NEXT: ext.16b v1, v0, v0, #4
444
- ; CHECK-NEXT: mov.s v1[2], v0[0]
445
- ; CHECK-NEXT: dup.4s v0, v0[0]
446
- ; CHECK-NEXT: str q1, [x0]
447
- ; CHECK-NEXT: str q0, [x1]
448
- ; CHECK-NEXT: ret
482
+ ; CHECK-SD-LABEL: disguised_dup:
483
+ ; CHECK-SD: // %bb.0:
484
+ ; CHECK-SD-NEXT: ext.16b v1, v0, v0, #4
485
+ ; CHECK-SD-NEXT: mov.s v1[2], v0[0]
486
+ ; CHECK-SD-NEXT: dup.4s v0, v0[0]
487
+ ; CHECK-SD-NEXT: str q1, [x0]
488
+ ; CHECK-SD-NEXT: str q0, [x1]
489
+ ; CHECK-SD-NEXT: ret
490
+ ;
491
+ ; CHECK-GI-LABEL: disguised_dup:
492
+ ; CHECK-GI: // %bb.0:
493
+ ; CHECK-GI-NEXT: adrp x8, .LCPI37_1
494
+ ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
495
+ ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_1]
496
+ ; CHECK-GI-NEXT: adrp x8, .LCPI37_0
497
+ ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
498
+ ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0]
499
+ ; CHECK-GI-NEXT: tbl.16b v2, { v0, v1 }, v2
500
+ ; CHECK-GI-NEXT: str q0, [x0]
501
+ ; CHECK-GI-NEXT: str q2, [x1]
502
+ ; CHECK-GI-NEXT: ret
449
503
%shuf = shufflevector <4 x float > %x , <4 x float > undef , <4 x i32 > <i32 1 , i32 2 , i32 0 , i32 0 >
450
504
%dup = shufflevector <4 x float > %shuf , <4 x float > undef , <4 x i32 > <i32 3 , i32 2 , i32 2 , i32 3 >
451
505
store <4 x float > %shuf , ptr %p1 , align 8
@@ -454,42 +508,71 @@ define void @disguised_dup(<4 x float> %x, ptr %p1, ptr %p2) {
454
508
}
455
509
456
510
define <2 x i32 > @dup_const2 (<2 x i32 > %A ) nounwind {
457
- ; CHECK-LABEL: dup_const2:
458
- ; CHECK: // %bb.0:
459
- ; CHECK-NEXT: mov w8, #32770
460
- ; CHECK-NEXT: movk w8, #128, lsl #16
461
- ; CHECK-NEXT: dup.2s v1, w8
462
- ; CHECK-NEXT: add.2s v0, v0, v1
463
- ; CHECK-NEXT: ret
511
+ ; CHECK-SD-LABEL: dup_const2:
512
+ ; CHECK-SD: // %bb.0:
513
+ ; CHECK-SD-NEXT: mov w8, #32770 // =0x8002
514
+ ; CHECK-SD-NEXT: movk w8, #128, lsl #16
515
+ ; CHECK-SD-NEXT: dup.2s v1, w8
516
+ ; CHECK-SD-NEXT: add.2s v0, v0, v1
517
+ ; CHECK-SD-NEXT: ret
518
+ ;
519
+ ; CHECK-GI-LABEL: dup_const2:
520
+ ; CHECK-GI: // %bb.0:
521
+ ; CHECK-GI-NEXT: adrp x8, .LCPI38_0
522
+ ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI38_0]
523
+ ; CHECK-GI-NEXT: add.2s v0, v0, v1
524
+ ; CHECK-GI-NEXT: ret
464
525
%tmp2 = add <2 x i32 > %A , <i32 8421378 , i32 8421378 >
465
526
ret <2 x i32 > %tmp2
466
527
}
467
528
468
529
define <2 x i32 > @dup_const4_ext (<4 x i32 > %A ) nounwind {
469
- ; CHECK-LABEL: dup_const4_ext:
470
- ; CHECK: // %bb.0:
471
- ; CHECK-NEXT: mov w8, #32769
472
- ; CHECK-NEXT: movk w8, #128, lsl #16
473
- ; CHECK-NEXT: dup.2s v1, w8
474
- ; CHECK-NEXT: add.2s v0, v0, v1
475
- ; CHECK-NEXT: ret
530
+ ; CHECK-SD-LABEL: dup_const4_ext:
531
+ ; CHECK-SD: // %bb.0:
532
+ ; CHECK-SD-NEXT: mov w8, #32769 // =0x8001
533
+ ; CHECK-SD-NEXT: movk w8, #128, lsl #16
534
+ ; CHECK-SD-NEXT: dup.2s v1, w8
535
+ ; CHECK-SD-NEXT: add.2s v0, v0, v1
536
+ ; CHECK-SD-NEXT: ret
537
+ ;
538
+ ; CHECK-GI-LABEL: dup_const4_ext:
539
+ ; CHECK-GI: // %bb.0:
540
+ ; CHECK-GI-NEXT: adrp x8, .LCPI39_0
541
+ ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI39_0]
542
+ ; CHECK-GI-NEXT: add.4s v0, v0, v1
543
+ ; CHECK-GI-NEXT: ext.16b v0, v0, v0, #0
544
+ ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
545
+ ; CHECK-GI-NEXT: ret
476
546
%tmp1 = add <4 x i32 > %A , <i32 8421377 , i32 8421377 , i32 8421377 , i32 8421377 >
477
547
%tmp2 = shufflevector <4 x i32 > %tmp1 , <4 x i32 > undef , <2 x i32 > <i32 0 , i32 1 >
478
548
ret <2 x i32 > %tmp2
479
549
}
480
550
481
551
define <4 x i32 > @dup_const24 (<2 x i32 > %A , <2 x i32 > %B , <4 x i32 > %C ) nounwind {
482
- ; CHECK-LABEL: dup_const24:
483
- ; CHECK: // %bb.0:
484
- ; CHECK-NEXT: mov w8, #32768
485
- ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
486
- ; CHECK-NEXT: movk w8, #128, lsl #16
487
- ; CHECK-NEXT: dup.4s v3, w8
488
- ; CHECK-NEXT: add.2s v0, v0, v3
489
- ; CHECK-NEXT: mov.d v0[1], v1[0]
490
- ; CHECK-NEXT: add.4s v1, v2, v3
491
- ; CHECK-NEXT: eor.16b v0, v1, v0
492
- ; CHECK-NEXT: ret
552
+ ; CHECK-SD-LABEL: dup_const24:
553
+ ; CHECK-SD: // %bb.0:
554
+ ; CHECK-SD-NEXT: mov w8, #32768 // =0x8000
555
+ ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
556
+ ; CHECK-SD-NEXT: movk w8, #128, lsl #16
557
+ ; CHECK-SD-NEXT: dup.4s v3, w8
558
+ ; CHECK-SD-NEXT: add.2s v0, v0, v3
559
+ ; CHECK-SD-NEXT: mov.d v0[1], v1[0]
560
+ ; CHECK-SD-NEXT: add.4s v1, v2, v3
561
+ ; CHECK-SD-NEXT: eor.16b v0, v1, v0
562
+ ; CHECK-SD-NEXT: ret
563
+ ;
564
+ ; CHECK-GI-LABEL: dup_const24:
565
+ ; CHECK-GI: // %bb.0:
566
+ ; CHECK-GI-NEXT: adrp x8, .LCPI40_1
567
+ ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
568
+ ; CHECK-GI-NEXT: ldr d3, [x8, :lo12:.LCPI40_1]
569
+ ; CHECK-GI-NEXT: adrp x8, .LCPI40_0
570
+ ; CHECK-GI-NEXT: add.2s v0, v0, v3
571
+ ; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI40_0]
572
+ ; CHECK-GI-NEXT: mov.d v0[1], v1[0]
573
+ ; CHECK-GI-NEXT: add.4s v1, v2, v3
574
+ ; CHECK-GI-NEXT: eor.16b v0, v1, v0
575
+ ; CHECK-GI-NEXT: ret
493
576
%tmp1 = add <2 x i32 > %A , <i32 8421376 , i32 8421376 >
494
577
%tmp4 = shufflevector <2 x i32 > %tmp1 , <2 x i32 > %B , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
495
578
%tmp3 = add <4 x i32 > %C , <i32 8421376 , i32 8421376 , i32 8421376 , i32 8421376 >
@@ -498,10 +581,16 @@ define <4 x i32> @dup_const24(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C) nounwind
498
581
}
499
582
500
583
define <8 x i16 > @bitcast_i64_v8i16 (i64 %a ) {
501
- ; CHECK-LABEL: bitcast_i64_v8i16:
502
- ; CHECK: // %bb.0:
503
- ; CHECK-NEXT: dup.8h v0, w0
504
- ; CHECK-NEXT: ret
584
+ ; CHECK-SD-LABEL: bitcast_i64_v8i16:
585
+ ; CHECK-SD: // %bb.0:
586
+ ; CHECK-SD-NEXT: dup.8h v0, w0
587
+ ; CHECK-SD-NEXT: ret
588
+ ;
589
+ ; CHECK-GI-LABEL: bitcast_i64_v8i16:
590
+ ; CHECK-GI: // %bb.0:
591
+ ; CHECK-GI-NEXT: fmov d0, x0
592
+ ; CHECK-GI-NEXT: dup.8h v0, v0[0]
593
+ ; CHECK-GI-NEXT: ret
505
594
%b = bitcast i64 %a to <4 x i16 >
506
595
%r = shufflevector <4 x i16 > %b , <4 x i16 > poison, <8 x i32 > zeroinitializer
507
596
ret <8 x i16 > %r
@@ -541,11 +630,16 @@ define <8 x half> @bitcast_i64_v8f16(i64 %a) {
541
630
}
542
631
543
632
define <2 x i64 > @bitcast_i64_v2f64 (i64 %a ) {
544
- ; CHECK-LABEL: bitcast_i64_v2f64:
545
- ; CHECK: // %bb.0:
546
- ; CHECK-NEXT: fmov d0, x0
547
- ; CHECK-NEXT: dup.2d v0, v0[0]
548
- ; CHECK-NEXT: ret
633
+ ; CHECK-SD-LABEL: bitcast_i64_v2f64:
634
+ ; CHECK-SD: // %bb.0:
635
+ ; CHECK-SD-NEXT: fmov d0, x0
636
+ ; CHECK-SD-NEXT: dup.2d v0, v0[0]
637
+ ; CHECK-SD-NEXT: ret
638
+ ;
639
+ ; CHECK-GI-LABEL: bitcast_i64_v2f64:
640
+ ; CHECK-GI: // %bb.0:
641
+ ; CHECK-GI-NEXT: dup.2d v0, x0
642
+ ; CHECK-GI-NEXT: ret
549
643
%b = bitcast i64 %a to <1 x i64 >
550
644
%r = shufflevector <1 x i64 > %b , <1 x i64 > poison, <2 x i32 > zeroinitializer
551
645
ret <2 x i64 > %r
0 commit comments