Skip to content

Commit 7de4189

Browse files
author
git apple-llvm automerger
committed
Merge commit '914051f0a8bf' from apple/master into swift/master-next
2 parents febcf0e + 914051f commit 7de4189

File tree

2 files changed

+35
-44
lines changed

2 files changed

+35
-44
lines changed

llvm/lib/Target/ARM/ARMInstrMVE.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4382,6 +4382,10 @@ let Predicates = [HasMVEInt] in {
43824382
// vector types (v4i1<>v8i1, etc.) also as part of lowering vector shuffles.
43834383
def predicate_cast : SDNode<"ARMISD::PREDICATE_CAST", SDTUnaryOp>;
43844384

4385+
def load_align4 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
4386+
return cast<LoadSDNode>(N)->getAlignment() >= 4;
4387+
}]>;
4388+
43854389
let Predicates = [HasMVEInt] in {
43864390
foreach VT = [ v4i1, v8i1, v16i1 ] in {
43874391
def : Pat<(i32 (predicate_cast (VT VCCR:$src))),
@@ -4394,6 +4398,13 @@ let Predicates = [HasMVEInt] in {
43944398
(VT (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>;
43954399
}
43964400

4401+
// If we happen to be casting from a load we can convert that straight
4402+
// into a predicate load, so long as the load is of the correct type.
4403+
foreach VT = [ v4i1, v8i1, v16i1 ] in {
4404+
def : Pat<(VT (predicate_cast (i32 (load_align4 taddrmode_imm7<2>:$addr)))),
4405+
(VT (VLDR_P0_off taddrmode_imm7<2>:$addr))>;
4406+
}
4407+
43974408
// Here we match the specific SDNode type 'ARMVectorRegCastImpl'
43984409
// rather than the more general 'ARMVectorRegCast' which would also
43994410
// match some bitconverts. If we use the latter in cases where the

llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll

Lines changed: 24 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -386,18 +386,16 @@ define arm_aapcs_vfpcc <4 x i32> @load_predcastzext(i16* %i, <4 x i32> %a) {
386386
define arm_aapcs_vfpcc <4 x i32> @load_bc4(i32* %i, <4 x i32> %a) {
387387
; CHECK-LE-LABEL: load_bc4:
388388
; CHECK-LE: @ %bb.0:
389-
; CHECK-LE-NEXT: ldr r0, [r0]
389+
; CHECK-LE-NEXT: vldr p0, [r0]
390390
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
391-
; CHECK-LE-NEXT: vmsr p0, r0
392391
; CHECK-LE-NEXT: vpsel q0, q0, q1
393392
; CHECK-LE-NEXT: bx lr
394393
;
395394
; CHECK-BE-LABEL: load_bc4:
396395
; CHECK-BE: @ %bb.0:
397-
; CHECK-BE-NEXT: ldr r0, [r0]
396+
; CHECK-BE-NEXT: vldr p0, [r0]
398397
; CHECK-BE-NEXT: vrev64.32 q1, q0
399398
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
400-
; CHECK-BE-NEXT: vmsr p0, r0
401399
; CHECK-BE-NEXT: vpsel q1, q1, q0
402400
; CHECK-BE-NEXT: vrev64.32 q0, q1
403401
; CHECK-BE-NEXT: bx lr
@@ -410,19 +408,17 @@ define arm_aapcs_vfpcc <4 x i32> @load_bc4(i32* %i, <4 x i32> %a) {
410408
define arm_aapcs_vfpcc <8 x i16> @load_predcast8(i32* %i, <8 x i16> %a) {
411409
; CHECK-LE-LABEL: load_predcast8:
412410
; CHECK-LE: @ %bb.0:
413-
; CHECK-LE-NEXT: ldr r0, [r0]
411+
; CHECK-LE-NEXT: vldr p0, [r0]
414412
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
415-
; CHECK-LE-NEXT: vmsr p0, r0
416413
; CHECK-LE-NEXT: vpsel q0, q0, q1
417414
; CHECK-LE-NEXT: bx lr
418415
;
419416
; CHECK-BE-LABEL: load_predcast8:
420417
; CHECK-BE: @ %bb.0:
421-
; CHECK-BE-NEXT: ldr r0, [r0]
422418
; CHECK-BE-NEXT: vrev64.16 q1, q0
423419
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
420+
; CHECK-BE-NEXT: vldr p0, [r0]
424421
; CHECK-BE-NEXT: vrev32.16 q0, q0
425-
; CHECK-BE-NEXT: vmsr p0, r0
426422
; CHECK-BE-NEXT: vpsel q1, q1, q0
427423
; CHECK-BE-NEXT: vrev64.16 q0, q1
428424
; CHECK-BE-NEXT: bx lr
@@ -435,19 +431,17 @@ define arm_aapcs_vfpcc <8 x i16> @load_predcast8(i32* %i, <8 x i16> %a) {
435431
define arm_aapcs_vfpcc <16 x i8> @load_predcast16(i32* %i, <16 x i8> %a) {
436432
; CHECK-LE-LABEL: load_predcast16:
437433
; CHECK-LE: @ %bb.0:
438-
; CHECK-LE-NEXT: ldr r0, [r0]
434+
; CHECK-LE-NEXT: vldr p0, [r0]
439435
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
440-
; CHECK-LE-NEXT: vmsr p0, r0
441436
; CHECK-LE-NEXT: vpsel q0, q0, q1
442437
; CHECK-LE-NEXT: bx lr
443438
;
444439
; CHECK-BE-LABEL: load_predcast16:
445440
; CHECK-BE: @ %bb.0:
446-
; CHECK-BE-NEXT: ldr r0, [r0]
447441
; CHECK-BE-NEXT: vrev64.8 q1, q0
448442
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
443+
; CHECK-BE-NEXT: vldr p0, [r0]
449444
; CHECK-BE-NEXT: vrev32.8 q0, q0
450-
; CHECK-BE-NEXT: vmsr p0, r0
451445
; CHECK-BE-NEXT: vpsel q1, q1, q0
452446
; CHECK-BE-NEXT: vrev64.8 q0, q1
453447
; CHECK-BE-NEXT: bx lr
@@ -484,18 +478,18 @@ define arm_aapcs_vfpcc <4 x i32> @load_bc4_align2(i32* %i, <4 x i32> %a) {
484478
define arm_aapcs_vfpcc <4 x i32> @load_bc4_offset(i16* %i, <4 x i32> %a) {
485479
; CHECK-LE-LABEL: load_bc4_offset:
486480
; CHECK-LE: @ %bb.0:
487-
; CHECK-LE-NEXT: ldr.w r0, [r0, #6]
481+
; CHECK-LE-NEXT: adds r0, #6
488482
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
489-
; CHECK-LE-NEXT: vmsr p0, r0
483+
; CHECK-LE-NEXT: vldr p0, [r0]
490484
; CHECK-LE-NEXT: vpsel q0, q0, q1
491485
; CHECK-LE-NEXT: bx lr
492486
;
493487
; CHECK-BE-LABEL: load_bc4_offset:
494488
; CHECK-BE: @ %bb.0:
495-
; CHECK-BE-NEXT: ldr.w r0, [r0, #6]
489+
; CHECK-BE-NEXT: adds r0, #6
496490
; CHECK-BE-NEXT: vrev64.32 q1, q0
491+
; CHECK-BE-NEXT: vldr p0, [r0]
497492
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
498-
; CHECK-BE-NEXT: vmsr p0, r0
499493
; CHECK-BE-NEXT: vpsel q1, q1, q0
500494
; CHECK-BE-NEXT: vrev64.32 q0, q1
501495
; CHECK-BE-NEXT: bx lr
@@ -510,18 +504,16 @@ define arm_aapcs_vfpcc <4 x i32> @load_bc4_offset(i16* %i, <4 x i32> %a) {
510504
define arm_aapcs_vfpcc <4 x i32> @load_bc4_range4(i32* %i, <4 x i32> %a) {
511505
; CHECK-LE-LABEL: load_bc4_range4:
512506
; CHECK-LE: @ %bb.0:
513-
; CHECK-LE-NEXT: ldr r0, [r0, #4]
507+
; CHECK-LE-NEXT: vldr p0, [r0, #4]
514508
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
515-
; CHECK-LE-NEXT: vmsr p0, r0
516509
; CHECK-LE-NEXT: vpsel q0, q0, q1
517510
; CHECK-LE-NEXT: bx lr
518511
;
519512
; CHECK-BE-LABEL: load_bc4_range4:
520513
; CHECK-BE: @ %bb.0:
521-
; CHECK-BE-NEXT: ldr r0, [r0, #4]
514+
; CHECK-BE-NEXT: vldr p0, [r0, #4]
522515
; CHECK-BE-NEXT: vrev64.32 q1, q0
523516
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
524-
; CHECK-BE-NEXT: vmsr p0, r0
525517
; CHECK-BE-NEXT: vpsel q1, q1, q0
526518
; CHECK-BE-NEXT: vrev64.32 q0, q1
527519
; CHECK-BE-NEXT: bx lr
@@ -535,18 +527,16 @@ define arm_aapcs_vfpcc <4 x i32> @load_bc4_range4(i32* %i, <4 x i32> %a) {
535527
define arm_aapcs_vfpcc <4 x i32> @load_bc4_range(i32* %i, <4 x i32> %a) {
536528
; CHECK-LE-LABEL: load_bc4_range:
537529
; CHECK-LE: @ %bb.0:
538-
; CHECK-LE-NEXT: ldr.w r0, [r0, #508]
530+
; CHECK-LE-NEXT: vldr p0, [r0, #508]
539531
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
540-
; CHECK-LE-NEXT: vmsr p0, r0
541532
; CHECK-LE-NEXT: vpsel q0, q0, q1
542533
; CHECK-LE-NEXT: bx lr
543534
;
544535
; CHECK-BE-LABEL: load_bc4_range:
545536
; CHECK-BE: @ %bb.0:
546-
; CHECK-BE-NEXT: ldr.w r0, [r0, #508]
537+
; CHECK-BE-NEXT: vldr p0, [r0, #508]
547538
; CHECK-BE-NEXT: vrev64.32 q1, q0
548539
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
549-
; CHECK-BE-NEXT: vmsr p0, r0
550540
; CHECK-BE-NEXT: vpsel q1, q1, q0
551541
; CHECK-BE-NEXT: vrev64.32 q0, q1
552542
; CHECK-BE-NEXT: bx lr
@@ -560,22 +550,16 @@ define arm_aapcs_vfpcc <4 x i32> @load_bc4_range(i32* %i, <4 x i32> %a) {
560550
define arm_aapcs_vfpcc <4 x i32> @load_bc4_range2(i32* %i, <4 x i32> %a) {
561551
; CHECK-LE-LABEL: load_bc4_range2:
562552
; CHECK-LE: @ %bb.0:
563-
; CHECK-LE-NEXT: movw r1, #65028
553+
; CHECK-LE-NEXT: vldr p0, [r0, #-508]
564554
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
565-
; CHECK-LE-NEXT: movt r1, #65535
566-
; CHECK-LE-NEXT: ldr r0, [r0, r1]
567-
; CHECK-LE-NEXT: vmsr p0, r0
568555
; CHECK-LE-NEXT: vpsel q0, q0, q1
569556
; CHECK-LE-NEXT: bx lr
570557
;
571558
; CHECK-BE-LABEL: load_bc4_range2:
572559
; CHECK-BE: @ %bb.0:
573-
; CHECK-BE-NEXT: movw r1, #65028
560+
; CHECK-BE-NEXT: vldr p0, [r0, #-508]
574561
; CHECK-BE-NEXT: vrev64.32 q1, q0
575-
; CHECK-BE-NEXT: movt r1, #65535
576562
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
577-
; CHECK-BE-NEXT: ldr r0, [r0, r1]
578-
; CHECK-BE-NEXT: vmsr p0, r0
579563
; CHECK-BE-NEXT: vpsel q1, q1, q0
580564
; CHECK-BE-NEXT: vrev64.32 q0, q1
581565
; CHECK-BE-NEXT: bx lr
@@ -589,18 +573,18 @@ define arm_aapcs_vfpcc <4 x i32> @load_bc4_range2(i32* %i, <4 x i32> %a) {
589573
define arm_aapcs_vfpcc <4 x i32> @load_bc4_range3(i32* %i, <4 x i32> %a) {
590574
; CHECK-LE-LABEL: load_bc4_range3:
591575
; CHECK-LE: @ %bb.0:
592-
; CHECK-LE-NEXT: ldr.w r0, [r0, #512]
576+
; CHECK-LE-NEXT: add.w r0, r0, #512
593577
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
594-
; CHECK-LE-NEXT: vmsr p0, r0
578+
; CHECK-LE-NEXT: vldr p0, [r0]
595579
; CHECK-LE-NEXT: vpsel q0, q0, q1
596580
; CHECK-LE-NEXT: bx lr
597581
;
598582
; CHECK-BE-LABEL: load_bc4_range3:
599583
; CHECK-BE: @ %bb.0:
600-
; CHECK-BE-NEXT: ldr.w r0, [r0, #512]
584+
; CHECK-BE-NEXT: add.w r0, r0, #512
601585
; CHECK-BE-NEXT: vrev64.32 q1, q0
586+
; CHECK-BE-NEXT: vldr p0, [r0]
602587
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
603-
; CHECK-BE-NEXT: vmsr p0, r0
604588
; CHECK-BE-NEXT: vpsel q1, q1, q0
605589
; CHECK-BE-NEXT: vrev64.32 q0, q1
606590
; CHECK-BE-NEXT: bx lr
@@ -614,22 +598,18 @@ define arm_aapcs_vfpcc <4 x i32> @load_bc4_range3(i32* %i, <4 x i32> %a) {
614598
define arm_aapcs_vfpcc <4 x i32> @load_bc4_range5(i32* %i, <4 x i32> %a) {
615599
; CHECK-LE-LABEL: load_bc4_range5:
616600
; CHECK-LE: @ %bb.0:
617-
; CHECK-LE-NEXT: movw r1, #65024
601+
; CHECK-LE-NEXT: sub.w r0, r0, #512
618602
; CHECK-LE-NEXT: vmov.i32 q1, #0x0
619-
; CHECK-LE-NEXT: movt r1, #65535
620-
; CHECK-LE-NEXT: ldr r0, [r0, r1]
621-
; CHECK-LE-NEXT: vmsr p0, r0
603+
; CHECK-LE-NEXT: vldr p0, [r0]
622604
; CHECK-LE-NEXT: vpsel q0, q0, q1
623605
; CHECK-LE-NEXT: bx lr
624606
;
625607
; CHECK-BE-LABEL: load_bc4_range5:
626608
; CHECK-BE: @ %bb.0:
627-
; CHECK-BE-NEXT: movw r1, #65024
609+
; CHECK-BE-NEXT: sub.w r0, r0, #512
628610
; CHECK-BE-NEXT: vrev64.32 q1, q0
629-
; CHECK-BE-NEXT: movt r1, #65535
611+
; CHECK-BE-NEXT: vldr p0, [r0]
630612
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
631-
; CHECK-BE-NEXT: ldr r0, [r0, r1]
632-
; CHECK-BE-NEXT: vmsr p0, r0
633613
; CHECK-BE-NEXT: vpsel q1, q1, q0
634614
; CHECK-BE-NEXT: vrev64.32 q0, q1
635615
; CHECK-BE-NEXT: bx lr

0 commit comments

Comments
 (0)