Skip to content

Commit 6e913e4

Browse files
committed
Revert "[ARM] Match dual lane vmovs from insert_vector_elt"
This one needed more testing.
1 parent fc712eb commit 6e913e4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+6897
-5716
lines changed

llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4790,14 +4790,6 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
47904790
}
47914791
}
47924792
}
4793-
if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
4794-
assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
4795-
if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
4796-
MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
4797-
ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
4798-
return false;
4799-
}
4800-
}
48014793
return true;
48024794
}
48034795

llvm/lib/Target/ARM/ARMInstrMVE.td

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5845,41 +5845,6 @@ def MVE_VMOV_rr_q : MVE_VMOV_64bit<(outs rGPR:$Rt, rGPR:$Rt2), (ins MQPR:$Qd),
58455845
let AsmMatchConverter = "cvtMVEVMOVQtoDReg";
58465846
}
58475847

5848-
let Predicates = [HasMVEInt] in {
5849-
// Double lane moves. There are a number of patterns here. We know that the
5850-
// insertelt's will be in descending order by index, and need to match the 5
5851-
// patterns that might contain 2-0 or 3-1 pairs. These are:
5852-
// 3 2 1 0 -> vmovqrr 31; vmovqrr 20
5853-
// 3 2 1 -> vmovqrr 31; vmov 2
5854-
// 3 1 -> vmovqrr 31
5855-
// 2 1 0 -> vmovqrr 20; vmov 1
5856-
// 2 0 -> vmovqrr 20
5857-
// The other potential patterns will be handled by single lane inserts.
5858-
def : Pat<(insertelt (insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
5859-
rGPR:$srcA, (i32 0)),
5860-
rGPR:$srcB, (i32 1)),
5861-
rGPR:$srcC, (i32 2)),
5862-
rGPR:$srcD, (i32 3)),
5863-
(MVE_VMOV_q_rr (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcC, rGPR:$srcA, (i32 2), (i32 0)),
5864-
rGPR:$srcD, rGPR:$srcB, (i32 3), (i32 1))>;
5865-
def : Pat<(insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
5866-
rGPR:$srcB, (i32 1)),
5867-
rGPR:$srcC, (i32 2)),
5868-
rGPR:$srcD, (i32 3)),
5869-
(MVE_VMOV_q_rr (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$srcC, (i32 2)),
5870-
rGPR:$srcD, rGPR:$srcB, (i32 3), (i32 1))>;
5871-
def : Pat<(insertelt (insertelt (v4i32 MQPR:$src1), rGPR:$srcA, (i32 1)), rGPR:$srcB, (i32 3)),
5872-
(MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcB, rGPR:$srcA, (i32 3), (i32 1))>;
5873-
def : Pat<(insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
5874-
rGPR:$srcB, (i32 0)),
5875-
rGPR:$srcC, (i32 1)),
5876-
rGPR:$srcD, (i32 2)),
5877-
(MVE_VMOV_q_rr (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$srcC, (i32 1)),
5878-
rGPR:$srcD, rGPR:$srcB, (i32 2), (i32 0))>;
5879-
def : Pat<(insertelt (insertelt (v4i32 MQPR:$src1), rGPR:$srcA, (i32 0)), rGPR:$srcB, (i32 2)),
5880-
(MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcB, rGPR:$srcA, (i32 2), (i32 0))>;
5881-
}
5882-
58835848
// end of coproc mov
58845849

58855850
// start of MVE interleaving load/store

llvm/test/CodeGen/Thumb2/active_lane_mask.ll

Lines changed: 75 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -39,24 +39,28 @@ define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
3939
; CHECK-NEXT: adr r3, .LCPI1_0
4040
; CHECK-NEXT: vdup.32 q1, r1
4141
; CHECK-NEXT: vldrw.u32 q0, [r3]
42-
; CHECK-NEXT: ldr r3, [sp, #40]
4342
; CHECK-NEXT: vadd.i32 q2, q0, r1
4443
; CHECK-NEXT: vdup.32 q0, r2
4544
; CHECK-NEXT: vcmp.u32 hi, q1, q2
4645
; CHECK-NEXT: ldr r2, [sp, #32]
4746
; CHECK-NEXT: vpnot
4847
; CHECK-NEXT: vpst
4948
; CHECK-NEXT: vcmpt.u32 hi, q0, q2
50-
; CHECK-NEXT: vmov q2[2], q2[0], r3, r2
49+
; CHECK-NEXT: vmov.32 q2[0], r2
5150
; CHECK-NEXT: ldr r2, [sp, #36]
52-
; CHECK-NEXT: ldr r3, [sp, #44]
53-
; CHECK-NEXT: vmov q2[3], q2[1], r3, r2
51+
; CHECK-NEXT: vmov.32 q2[1], r2
52+
; CHECK-NEXT: ldr r2, [sp, #40]
53+
; CHECK-NEXT: vmov.32 q2[2], r2
54+
; CHECK-NEXT: ldr r2, [sp, #44]
55+
; CHECK-NEXT: vmov.32 q2[3], r2
5456
; CHECK-NEXT: ldr r2, [sp]
55-
; CHECK-NEXT: ldr r3, [sp, #8]
56-
; CHECK-NEXT: vmov q3[2], q3[0], r3, r2
57+
; CHECK-NEXT: vmov.32 q3[0], r2
5758
; CHECK-NEXT: ldr r2, [sp, #4]
58-
; CHECK-NEXT: ldr r3, [sp, #12]
59-
; CHECK-NEXT: vmov q3[3], q3[1], r3, r2
59+
; CHECK-NEXT: vmov.32 q3[1], r2
60+
; CHECK-NEXT: ldr r2, [sp, #8]
61+
; CHECK-NEXT: vmov.32 q3[2], r2
62+
; CHECK-NEXT: ldr r2, [sp, #12]
63+
; CHECK-NEXT: vmov.32 q3[3], r2
6064
; CHECK-NEXT: adr r2, .LCPI1_1
6165
; CHECK-NEXT: vpsel q2, q3, q2
6266
; CHECK-NEXT: vstrw.32 q2, [r0]
@@ -66,19 +70,21 @@ define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
6670
; CHECK-NEXT: vcmp.u32 hi, q1, q2
6771
; CHECK-NEXT: vmrs r1, p0
6872
; CHECK-NEXT: eors r1, r2
69-
; CHECK-NEXT: ldr r2, [sp, #56]
7073
; CHECK-NEXT: vmsr p0, r1
71-
; CHECK-NEXT: ldr r1, [sp, #52]
74+
; CHECK-NEXT: ldr r1, [sp, #48]
7275
; CHECK-NEXT: vpst
7376
; CHECK-NEXT: vcmpt.u32 hi, q0, q2
77+
; CHECK-NEXT: vmov.32 q0[0], r1
78+
; CHECK-NEXT: ldr r1, [sp, #52]
7479
; CHECK-NEXT: vmov.32 q0[1], r1
75-
; CHECK-NEXT: ldr r1, [sp, #48]
76-
; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
80+
; CHECK-NEXT: ldr r1, [sp, #56]
81+
; CHECK-NEXT: vmov.32 q0[2], r1
82+
; CHECK-NEXT: ldr r1, [sp, #16]
83+
; CHECK-NEXT: vmov.32 q1[0], r1
7784
; CHECK-NEXT: ldr r1, [sp, #20]
78-
; CHECK-NEXT: ldr r2, [sp, #24]
7985
; CHECK-NEXT: vmov.32 q1[1], r1
80-
; CHECK-NEXT: ldr r1, [sp, #16]
81-
; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
86+
; CHECK-NEXT: ldr r1, [sp, #24]
87+
; CHECK-NEXT: vmov.32 q1[2], r1
8288
; CHECK-NEXT: vpsel q0, q1, q0
8389
; CHECK-NEXT: vmov r1, s2
8490
; CHECK-NEXT: vmov.f32 s2, s1
@@ -407,75 +413,81 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) {
407413
define void @test_width2(i32* nocapture readnone %x, i32* nocapture %y, i8 zeroext %m) {
408414
; CHECK-LABEL: test_width2:
409415
; CHECK: @ %bb.0: @ %entry
410-
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
411-
; CHECK-NEXT: sub sp, #4
412-
; CHECK-NEXT: vpush {d8, d9}
416+
; CHECK-NEXT: push {r4, r5, r6, lr}
417+
; CHECK-NEXT: vpush {d8, d9, d10, d11}
413418
; CHECK-NEXT: sub sp, #8
414419
; CHECK-NEXT: cmp r2, #0
415420
; CHECK-NEXT: beq.w .LBB4_3
416421
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
417422
; CHECK-NEXT: adds r0, r2, #1
418-
; CHECK-NEXT: vmov q1[2], q1[0], r2, r2
423+
; CHECK-NEXT: movs r3, #1
419424
; CHECK-NEXT: bic r0, r0, #1
420-
; CHECK-NEXT: adr r2, .LCPI4_0
425+
; CHECK-NEXT: vmov.32 q2[0], r2
421426
; CHECK-NEXT: subs r0, #2
422-
; CHECK-NEXT: movs r3, #1
423427
; CHECK-NEXT: vmov.i64 q0, #0xffffffff
424-
; CHECK-NEXT: vldrw.u32 q2, [r2]
428+
; CHECK-NEXT: vmov.32 q2[2], r2
429+
; CHECK-NEXT: movs r6, #0
425430
; CHECK-NEXT: add.w lr, r3, r0, lsr #1
426-
; CHECK-NEXT: mov.w r8, #0
431+
; CHECK-NEXT: adr r3, .LCPI4_0
427432
; CHECK-NEXT: dls lr, lr
428-
; CHECK-NEXT: vand q1, q1, q0
433+
; CHECK-NEXT: vldrw.u32 q1, [r3]
434+
; CHECK-NEXT: vand q2, q2, q0
429435
; CHECK-NEXT: .LBB4_2: @ %vector.body
430436
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
431-
; CHECK-NEXT: vmov q3[2], q3[0], r8, r8
432-
; CHECK-NEXT: vmov r7, s4
437+
; CHECK-NEXT: vmov.32 q3[0], r6
438+
; CHECK-NEXT: vmov r5, s8
439+
; CHECK-NEXT: vmov.32 q3[2], r6
440+
; CHECK-NEXT: vmov r0, s9
433441
; CHECK-NEXT: vand q3, q3, q0
434-
; CHECK-NEXT: vmov r6, s5
442+
; CHECK-NEXT: adds r6, #2
435443
; CHECK-NEXT: vmov r3, s14
436-
; CHECK-NEXT: add.w r8, r8, #2
437-
; CHECK-NEXT: vmov r9, s12
438444
; CHECK-NEXT: vmov r2, s15
439-
; CHECK-NEXT: vmov r0, s7
440445
; CHECK-NEXT: adds r3, #1
441-
; CHECK-NEXT: vmov q3[2], q3[0], r3, r9
442-
; CHECK-NEXT: vand q3, q3, q0
443446
; CHECK-NEXT: adc r12, r2, #0
444-
; CHECK-NEXT: vmov r5, s12
445-
; CHECK-NEXT: vmov r4, s13
446-
; CHECK-NEXT: vmov r2, s6
447-
; CHECK-NEXT: subs r7, r5, r7
448-
; CHECK-NEXT: vmov r7, s14
449-
; CHECK-NEXT: sbcs r4, r6
450-
; CHECK-NEXT: vmov r6, s15
451-
; CHECK-NEXT: mov.w r4, #0
452-
; CHECK-NEXT: it lo
453-
; CHECK-NEXT: movlo r4, #1
454-
; CHECK-NEXT: cmp r4, #0
455-
; CHECK-NEXT: csetm r4, ne
456-
; CHECK-NEXT: subs r2, r7, r2
457-
; CHECK-NEXT: sbcs.w r0, r6, r0
447+
; CHECK-NEXT: vmov r2, s12
448+
; CHECK-NEXT: vmov.32 q3[0], r2
449+
; CHECK-NEXT: vmov.32 q3[2], r3
450+
; CHECK-NEXT: vand q3, q3, q0
451+
; CHECK-NEXT: vmov r4, s12
452+
; CHECK-NEXT: teq.w r4, r2
453+
; CHECK-NEXT: cset r2, ne
454+
; CHECK-NEXT: tst.w r2, #1
455+
; CHECK-NEXT: csetm r2, ne
456+
; CHECK-NEXT: vmov.32 q4[0], r2
457+
; CHECK-NEXT: vmov.32 q4[1], r2
458+
; CHECK-NEXT: vmov r2, s14
459+
; CHECK-NEXT: eors r3, r2
460+
; CHECK-NEXT: orrs.w r3, r3, r12
461+
; CHECK-NEXT: cset r3, ne
462+
; CHECK-NEXT: tst.w r3, #1
463+
; CHECK-NEXT: csetm r3, ne
464+
; CHECK-NEXT: subs r5, r4, r5
465+
; CHECK-NEXT: vmov.32 q4[2], r3
466+
; CHECK-NEXT: vmov r5, s10
467+
; CHECK-NEXT: vmov.32 q4[3], r3
468+
; CHECK-NEXT: vmov r3, s13
469+
; CHECK-NEXT: veor q4, q4, q1
470+
; CHECK-NEXT: sbcs.w r0, r3, r0
471+
; CHECK-NEXT: vmov r3, s11
458472
; CHECK-NEXT: mov.w r0, #0
459473
; CHECK-NEXT: it lo
460474
; CHECK-NEXT: movlo r0, #1
461475
; CHECK-NEXT: cmp r0, #0
462476
; CHECK-NEXT: csetm r0, ne
463-
; CHECK-NEXT: vmov q3[2], q3[0], r0, r4
464-
; CHECK-NEXT: vmov q3[3], q3[1], r0, r4
465-
; CHECK-NEXT: eor.w r0, r7, r3
466-
; CHECK-NEXT: orrs.w r0, r0, r12
467-
; CHECK-NEXT: cset r0, ne
468-
; CHECK-NEXT: tst.w r0, #1
469-
; CHECK-NEXT: csetm r0, ne
470-
; CHECK-NEXT: teq.w r5, r9
471-
; CHECK-NEXT: cset r2, ne
472-
; CHECK-NEXT: tst.w r2, #1
473-
; CHECK-NEXT: csetm r2, ne
474-
; CHECK-NEXT: vmov q4[2], q4[0], r0, r2
475-
; CHECK-NEXT: vmov q4[3], q4[1], r0, r2
476-
; CHECK-NEXT: veor q4, q4, q2
477-
; CHECK-NEXT: vand q4, q4, q3
477+
; CHECK-NEXT: subs r2, r2, r5
478+
; CHECK-NEXT: vmov.32 q5[0], r0
479+
; CHECK-NEXT: vmov.32 q5[1], r0
480+
; CHECK-NEXT: vmov r0, s15
478481
; CHECK-NEXT: @ implicit-def: $q3
482+
; CHECK-NEXT: sbcs r0, r3
483+
; CHECK-NEXT: mov.w r0, #0
484+
; CHECK-NEXT: it lo
485+
; CHECK-NEXT: movlo r0, #1
486+
; CHECK-NEXT: cmp r0, #0
487+
; CHECK-NEXT: csetm r0, ne
488+
; CHECK-NEXT: vmov.32 q5[2], r0
489+
; CHECK-NEXT: vmov.32 q5[3], r0
490+
; CHECK-NEXT: vand q4, q4, q5
479491
; CHECK-NEXT: vmov r2, s16
480492
; CHECK-NEXT: vmov r0, s18
481493
; CHECK-NEXT: and r2, r2, #1
@@ -507,9 +519,8 @@ define void @test_width2(i32* nocapture readnone %x, i32* nocapture %y, i8 zeroe
507519
; CHECK-NEXT: le lr, .LBB4_2
508520
; CHECK-NEXT: .LBB4_3: @ %for.cond.cleanup
509521
; CHECK-NEXT: add sp, #8
510-
; CHECK-NEXT: vpop {d8, d9}
511-
; CHECK-NEXT: add sp, #4
512-
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
522+
; CHECK-NEXT: vpop {d8, d9, d10, d11}
523+
; CHECK-NEXT: pop {r4, r5, r6, pc}
513524
; CHECK-NEXT: .p2align 4
514525
; CHECK-NEXT: @ %bb.4:
515526
; CHECK-NEXT: .LCPI4_0:

llvm/test/CodeGen/Thumb2/mve-abs.ll

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,18 +42,21 @@ define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) {
4242
; CHECK: @ %bb.0: @ %entry
4343
; CHECK-NEXT: vmov r0, s1
4444
; CHECK-NEXT: vmov r1, s0
45-
; CHECK-NEXT: vmov r3, s3
46-
; CHECK-NEXT: vmov r2, s2
4745
; CHECK-NEXT: adds.w r1, r1, r0, asr #31
48-
; CHECK-NEXT: adc.w r12, r0, r0, asr #31
46+
; CHECK-NEXT: adc.w r2, r0, r0, asr #31
47+
; CHECK-NEXT: eor.w r2, r2, r0, asr #31
48+
; CHECK-NEXT: eor.w r0, r1, r0, asr #31
49+
; CHECK-NEXT: vmov.32 q1[0], r0
50+
; CHECK-NEXT: vmov r0, s3
51+
; CHECK-NEXT: vmov r1, s2
52+
; CHECK-NEXT: vmov.32 q1[1], r2
53+
; CHECK-NEXT: adds.w r1, r1, r0, asr #31
4954
; CHECK-NEXT: eor.w r1, r1, r0, asr #31
50-
; CHECK-NEXT: adds.w r2, r2, r3, asr #31
51-
; CHECK-NEXT: eor.w r0, r12, r0, asr #31
52-
; CHECK-NEXT: eor.w r2, r2, r3, asr #31
53-
; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
54-
; CHECK-NEXT: adc.w r1, r3, r3, asr #31
55-
; CHECK-NEXT: eor.w r1, r1, r3, asr #31
56-
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
55+
; CHECK-NEXT: vmov.32 q1[2], r1
56+
; CHECK-NEXT: adc.w r1, r0, r0, asr #31
57+
; CHECK-NEXT: eor.w r0, r1, r0, asr #31
58+
; CHECK-NEXT: vmov.32 q1[3], r0
59+
; CHECK-NEXT: vmov q0, q1
5760
; CHECK-NEXT: bx lr
5861
entry:
5962
%0 = icmp slt <2 x i64> %s1, zeroinitializer

0 commit comments

Comments
 (0)