@@ -39,24 +39,28 @@ define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
39
39
; CHECK-NEXT: adr r3, .LCPI1_0
40
40
; CHECK-NEXT: vdup.32 q1, r1
41
41
; CHECK-NEXT: vldrw.u32 q0, [r3]
42
- ; CHECK-NEXT: ldr r3, [sp, #40]
43
42
; CHECK-NEXT: vadd.i32 q2, q0, r1
44
43
; CHECK-NEXT: vdup.32 q0, r2
45
44
; CHECK-NEXT: vcmp.u32 hi, q1, q2
46
45
; CHECK-NEXT: ldr r2, [sp, #32]
47
46
; CHECK-NEXT: vpnot
48
47
; CHECK-NEXT: vpst
49
48
; CHECK-NEXT: vcmpt.u32 hi, q0, q2
50
- ; CHECK-NEXT: vmov q2[2], q2[0], r3 , r2
49
+ ; CHECK-NEXT: vmov.32 q2[0] , r2
51
50
; CHECK-NEXT: ldr r2, [sp, #36]
52
- ; CHECK-NEXT: ldr r3, [sp, #44]
53
- ; CHECK-NEXT: vmov q2[3], q2[1], r3, r2
51
+ ; CHECK-NEXT: vmov.32 q2[1], r2
52
+ ; CHECK-NEXT: ldr r2, [sp, #40]
53
+ ; CHECK-NEXT: vmov.32 q2[2], r2
54
+ ; CHECK-NEXT: ldr r2, [sp, #44]
55
+ ; CHECK-NEXT: vmov.32 q2[3], r2
54
56
; CHECK-NEXT: ldr r2, [sp]
55
- ; CHECK-NEXT: ldr r3, [sp, #8]
56
- ; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
57
+ ; CHECK-NEXT: vmov.32 q3[0], r2
57
58
; CHECK-NEXT: ldr r2, [sp, #4]
58
- ; CHECK-NEXT: ldr r3, [sp, #12]
59
- ; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
59
+ ; CHECK-NEXT: vmov.32 q3[1], r2
60
+ ; CHECK-NEXT: ldr r2, [sp, #8]
61
+ ; CHECK-NEXT: vmov.32 q3[2], r2
62
+ ; CHECK-NEXT: ldr r2, [sp, #12]
63
+ ; CHECK-NEXT: vmov.32 q3[3], r2
60
64
; CHECK-NEXT: adr r2, .LCPI1_1
61
65
; CHECK-NEXT: vpsel q2, q3, q2
62
66
; CHECK-NEXT: vstrw.32 q2, [r0]
@@ -66,19 +70,21 @@ define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
66
70
; CHECK-NEXT: vcmp.u32 hi, q1, q2
67
71
; CHECK-NEXT: vmrs r1, p0
68
72
; CHECK-NEXT: eors r1, r2
69
- ; CHECK-NEXT: ldr r2, [sp, #56]
70
73
; CHECK-NEXT: vmsr p0, r1
71
- ; CHECK-NEXT: ldr r1, [sp, #52 ]
74
+ ; CHECK-NEXT: ldr r1, [sp, #48 ]
72
75
; CHECK-NEXT: vpst
73
76
; CHECK-NEXT: vcmpt.u32 hi, q0, q2
77
+ ; CHECK-NEXT: vmov.32 q0[0], r1
78
+ ; CHECK-NEXT: ldr r1, [sp, #52]
74
79
; CHECK-NEXT: vmov.32 q0[1], r1
75
- ; CHECK-NEXT: ldr r1, [sp, #48]
76
- ; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
80
+ ; CHECK-NEXT: ldr r1, [sp, #56]
81
+ ; CHECK-NEXT: vmov.32 q0[2], r1
82
+ ; CHECK-NEXT: ldr r1, [sp, #16]
83
+ ; CHECK-NEXT: vmov.32 q1[0], r1
77
84
; CHECK-NEXT: ldr r1, [sp, #20]
78
- ; CHECK-NEXT: ldr r2, [sp, #24]
79
85
; CHECK-NEXT: vmov.32 q1[1], r1
80
- ; CHECK-NEXT: ldr r1, [sp, #16 ]
81
- ; CHECK-NEXT: vmov q1[2], q1[0], r2 , r1
86
+ ; CHECK-NEXT: ldr r1, [sp, #24 ]
87
+ ; CHECK-NEXT: vmov.32 q1[2], r1
82
88
; CHECK-NEXT: vpsel q0, q1, q0
83
89
; CHECK-NEXT: vmov r1, s2
84
90
; CHECK-NEXT: vmov.f32 s2, s1
@@ -407,75 +413,81 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) {
407
413
define void @test_width2 (i32* nocapture readnone %x , i32* nocapture %y , i8 zeroext %m ) {
408
414
; CHECK-LABEL: test_width2:
409
415
; CHECK: @ %bb.0: @ %entry
410
- ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
411
- ; CHECK-NEXT: sub sp, #4
412
- ; CHECK-NEXT: vpush {d8, d9}
416
+ ; CHECK-NEXT: push {r4, r5, r6, lr}
417
+ ; CHECK-NEXT: vpush {d8, d9, d10, d11}
413
418
; CHECK-NEXT: sub sp, #8
414
419
; CHECK-NEXT: cmp r2, #0
415
420
; CHECK-NEXT: beq.w .LBB4_3
416
421
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
417
422
; CHECK-NEXT: adds r0, r2, #1
418
- ; CHECK-NEXT: vmov q1[2], q1[0], r2, r2
423
+ ; CHECK-NEXT: movs r3, #1
419
424
; CHECK-NEXT: bic r0, r0, #1
420
- ; CHECK-NEXT: adr r2, .LCPI4_0
425
+ ; CHECK-NEXT: vmov.32 q2[0], r2
421
426
; CHECK-NEXT: subs r0, #2
422
- ; CHECK-NEXT: movs r3, #1
423
427
; CHECK-NEXT: vmov.i64 q0, #0xffffffff
424
- ; CHECK-NEXT: vldrw.u32 q2, [r2]
428
+ ; CHECK-NEXT: vmov.32 q2[2], r2
429
+ ; CHECK-NEXT: movs r6, #0
425
430
; CHECK-NEXT: add.w lr, r3, r0, lsr #1
426
- ; CHECK-NEXT: mov.w r8, #0
431
+ ; CHECK-NEXT: adr r3, .LCPI4_0
427
432
; CHECK-NEXT: dls lr, lr
428
- ; CHECK-NEXT: vand q1, q1, q0
433
+ ; CHECK-NEXT: vldrw.u32 q1, [r3]
434
+ ; CHECK-NEXT: vand q2, q2, q0
429
435
; CHECK-NEXT: .LBB4_2: @ %vector.body
430
436
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
431
- ; CHECK-NEXT: vmov q3[2], q3[0], r8, r8
432
- ; CHECK-NEXT: vmov r7, s4
437
+ ; CHECK-NEXT: vmov.32 q3[0], r6
438
+ ; CHECK-NEXT: vmov r5, s8
439
+ ; CHECK-NEXT: vmov.32 q3[2], r6
440
+ ; CHECK-NEXT: vmov r0, s9
433
441
; CHECK-NEXT: vand q3, q3, q0
434
- ; CHECK-NEXT: vmov r6, s5
442
+ ; CHECK-NEXT: adds r6, #2
435
443
; CHECK-NEXT: vmov r3, s14
436
- ; CHECK-NEXT: add.w r8, r8, #2
437
- ; CHECK-NEXT: vmov r9, s12
438
444
; CHECK-NEXT: vmov r2, s15
439
- ; CHECK-NEXT: vmov r0, s7
440
445
; CHECK-NEXT: adds r3, #1
441
- ; CHECK-NEXT: vmov q3[2], q3[0], r3, r9
442
- ; CHECK-NEXT: vand q3, q3, q0
443
446
; CHECK-NEXT: adc r12, r2, #0
444
- ; CHECK-NEXT: vmov r5, s12
445
- ; CHECK-NEXT: vmov r4, s13
446
- ; CHECK-NEXT: vmov r2, s6
447
- ; CHECK-NEXT: subs r7, r5, r7
448
- ; CHECK-NEXT: vmov r7, s14
449
- ; CHECK-NEXT: sbcs r4, r6
450
- ; CHECK-NEXT: vmov r6, s15
451
- ; CHECK-NEXT: mov.w r4, #0
452
- ; CHECK-NEXT: it lo
453
- ; CHECK-NEXT: movlo r4, #1
454
- ; CHECK-NEXT: cmp r4, #0
455
- ; CHECK-NEXT: csetm r4, ne
456
- ; CHECK-NEXT: subs r2, r7, r2
457
- ; CHECK-NEXT: sbcs.w r0, r6, r0
447
+ ; CHECK-NEXT: vmov r2, s12
448
+ ; CHECK-NEXT: vmov.32 q3[0], r2
449
+ ; CHECK-NEXT: vmov.32 q3[2], r3
450
+ ; CHECK-NEXT: vand q3, q3, q0
451
+ ; CHECK-NEXT: vmov r4, s12
452
+ ; CHECK-NEXT: teq.w r4, r2
453
+ ; CHECK-NEXT: cset r2, ne
454
+ ; CHECK-NEXT: tst.w r2, #1
455
+ ; CHECK-NEXT: csetm r2, ne
456
+ ; CHECK-NEXT: vmov.32 q4[0], r2
457
+ ; CHECK-NEXT: vmov.32 q4[1], r2
458
+ ; CHECK-NEXT: vmov r2, s14
459
+ ; CHECK-NEXT: eors r3, r2
460
+ ; CHECK-NEXT: orrs.w r3, r3, r12
461
+ ; CHECK-NEXT: cset r3, ne
462
+ ; CHECK-NEXT: tst.w r3, #1
463
+ ; CHECK-NEXT: csetm r3, ne
464
+ ; CHECK-NEXT: subs r5, r4, r5
465
+ ; CHECK-NEXT: vmov.32 q4[2], r3
466
+ ; CHECK-NEXT: vmov r5, s10
467
+ ; CHECK-NEXT: vmov.32 q4[3], r3
468
+ ; CHECK-NEXT: vmov r3, s13
469
+ ; CHECK-NEXT: veor q4, q4, q1
470
+ ; CHECK-NEXT: sbcs.w r0, r3, r0
471
+ ; CHECK-NEXT: vmov r3, s11
458
472
; CHECK-NEXT: mov.w r0, #0
459
473
; CHECK-NEXT: it lo
460
474
; CHECK-NEXT: movlo r0, #1
461
475
; CHECK-NEXT: cmp r0, #0
462
476
; CHECK-NEXT: csetm r0, ne
463
- ; CHECK-NEXT: vmov q3[2], q3[0], r0, r4
464
- ; CHECK-NEXT: vmov q3[3], q3[1], r0, r4
465
- ; CHECK-NEXT: eor.w r0, r7, r3
466
- ; CHECK-NEXT: orrs.w r0, r0, r12
467
- ; CHECK-NEXT: cset r0, ne
468
- ; CHECK-NEXT: tst.w r0, #1
469
- ; CHECK-NEXT: csetm r0, ne
470
- ; CHECK-NEXT: teq.w r5, r9
471
- ; CHECK-NEXT: cset r2, ne
472
- ; CHECK-NEXT: tst.w r2, #1
473
- ; CHECK-NEXT: csetm r2, ne
474
- ; CHECK-NEXT: vmov q4[2], q4[0], r0, r2
475
- ; CHECK-NEXT: vmov q4[3], q4[1], r0, r2
476
- ; CHECK-NEXT: veor q4, q4, q2
477
- ; CHECK-NEXT: vand q4, q4, q3
477
+ ; CHECK-NEXT: subs r2, r2, r5
478
+ ; CHECK-NEXT: vmov.32 q5[0], r0
479
+ ; CHECK-NEXT: vmov.32 q5[1], r0
480
+ ; CHECK-NEXT: vmov r0, s15
478
481
; CHECK-NEXT: @ implicit-def: $q3
482
+ ; CHECK-NEXT: sbcs r0, r3
483
+ ; CHECK-NEXT: mov.w r0, #0
484
+ ; CHECK-NEXT: it lo
485
+ ; CHECK-NEXT: movlo r0, #1
486
+ ; CHECK-NEXT: cmp r0, #0
487
+ ; CHECK-NEXT: csetm r0, ne
488
+ ; CHECK-NEXT: vmov.32 q5[2], r0
489
+ ; CHECK-NEXT: vmov.32 q5[3], r0
490
+ ; CHECK-NEXT: vand q4, q4, q5
479
491
; CHECK-NEXT: vmov r2, s16
480
492
; CHECK-NEXT: vmov r0, s18
481
493
; CHECK-NEXT: and r2, r2, #1
@@ -507,9 +519,8 @@ define void @test_width2(i32* nocapture readnone %x, i32* nocapture %y, i8 zeroe
507
519
; CHECK-NEXT: le lr, .LBB4_2
508
520
; CHECK-NEXT: .LBB4_3: @ %for.cond.cleanup
509
521
; CHECK-NEXT: add sp, #8
510
- ; CHECK-NEXT: vpop {d8, d9}
511
- ; CHECK-NEXT: add sp, #4
512
- ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
522
+ ; CHECK-NEXT: vpop {d8, d9, d10, d11}
523
+ ; CHECK-NEXT: pop {r4, r5, r6, pc}
513
524
; CHECK-NEXT: .p2align 4
514
525
; CHECK-NEXT: @ %bb.4:
515
526
; CHECK-NEXT: .LCPI4_0:
0 commit comments