Skip to content

Commit 78ff736

Browse files
authored
[ARM] Fix VMOVRRD combine with non-canonical inserts. (#109639)
In some situations, in the test case here with the multiple calls being late legalized, we can see inserts of the form: ``` b = insert a, x, 0 c = insert b, y, 1 d = insert c, z, 0 bc = bitcast d e = extract bc, 0 r = vmovrrd e ``` The redundant insert will usually be removed, but in some cases are not prior to PerformVMOVRRDCombine. The code was finding the first insert from each lane (x and y), as opposed to the last (z and y).
1 parent f6a8eb9 commit 78ff736

File tree

2 files changed

+68
-2
lines changed

2 files changed

+68
-2
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15131,9 +15131,9 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
1513115131
SDValue Op0, Op1;
1513215132
while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
1513315133
if (isa<ConstantSDNode>(BV.getOperand(2))) {
15134-
if (BV.getConstantOperandVal(2) == Offset)
15134+
if (BV.getConstantOperandVal(2) == Offset && !Op0)
1513515135
Op0 = BV.getOperand(1);
15136-
if (BV.getConstantOperandVal(2) == Offset + 1)
15136+
if (BV.getConstantOperandVal(2) == Offset + 1 && !Op1)
1513715137
Op1 = BV.getOperand(1);
1513815138
}
1513915139
BV = BV.getOperand(0);

llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,3 +543,69 @@ define <4 x i32> @insertextract(i32 %x, i32 %y) {
543543
%4 = insertelement <4 x i32> %3, i32 %y, i32 3
544544
ret <4 x i32> %4
545545
}
546+
547+
declare void @print_uint32x4_t(<4 x i32> %val)
548+
define i32 @main(i64 %x, i64 %y) {
549+
; CHECK-LE-LABEL: main:
550+
; CHECK-LE: @ %bb.0: @ %entry
551+
; CHECK-LE-NEXT: .save {r4, lr}
552+
; CHECK-LE-NEXT: push {r4, lr}
553+
; CHECK-LE-NEXT: .vsave {d8, d9}
554+
; CHECK-LE-NEXT: vpush {d8, d9}
555+
; CHECK-LE-NEXT: .pad #8
556+
; CHECK-LE-NEXT: sub sp, #8
557+
; CHECK-LE-NEXT: vmov.32 q4[2], r2
558+
; CHECK-LE-NEXT: mov r4, r1
559+
; CHECK-LE-NEXT: mov r1, r0
560+
; CHECK-LE-NEXT: vmov.32 q4[3], r3
561+
; CHECK-LE-NEXT: movs r0, #0
562+
; CHECK-LE-NEXT: mov r2, r1
563+
; CHECK-LE-NEXT: mov r3, r4
564+
; CHECK-LE-NEXT: vstr d9, [sp]
565+
; CHECK-LE-NEXT: bl print_uint32x4_t
566+
; CHECK-LE-NEXT: movs r0, #0
567+
; CHECK-LE-NEXT: movs r2, #1
568+
; CHECK-LE-NEXT: mov r3, r4
569+
; CHECK-LE-NEXT: vstr d9, [sp]
570+
; CHECK-LE-NEXT: bl print_uint32x4_t
571+
; CHECK-LE-NEXT: movs r0, #0
572+
; CHECK-LE-NEXT: add sp, #8
573+
; CHECK-LE-NEXT: vpop {d8, d9}
574+
; CHECK-LE-NEXT: pop {r4, pc}
575+
;
576+
; CHECK-BE-LABEL: main:
577+
; CHECK-BE: @ %bb.0: @ %entry
578+
; CHECK-BE-NEXT: .save {r4, lr}
579+
; CHECK-BE-NEXT: push {r4, lr}
580+
; CHECK-BE-NEXT: .vsave {d8, d9}
581+
; CHECK-BE-NEXT: vpush {d8, d9}
582+
; CHECK-BE-NEXT: .pad #8
583+
; CHECK-BE-NEXT: sub sp, #8
584+
; CHECK-BE-NEXT: vmov.32 q0[2], r2
585+
; CHECK-BE-NEXT: mov r4, r1
586+
; CHECK-BE-NEXT: mov r1, r0
587+
; CHECK-BE-NEXT: vmov.32 q0[3], r3
588+
; CHECK-BE-NEXT: vrev64.32 q4, q0
589+
; CHECK-BE-NEXT: movs r0, #0
590+
; CHECK-BE-NEXT: mov r2, r1
591+
; CHECK-BE-NEXT: mov r3, r4
592+
; CHECK-BE-NEXT: vstr d9, [sp]
593+
; CHECK-BE-NEXT: bl print_uint32x4_t
594+
; CHECK-BE-NEXT: movs r0, #0
595+
; CHECK-BE-NEXT: movs r2, #1
596+
; CHECK-BE-NEXT: mov r3, r4
597+
; CHECK-BE-NEXT: vstr d9, [sp]
598+
; CHECK-BE-NEXT: bl print_uint32x4_t
599+
; CHECK-BE-NEXT: movs r0, #0
600+
; CHECK-BE-NEXT: add sp, #8
601+
; CHECK-BE-NEXT: vpop {d8, d9}
602+
; CHECK-BE-NEXT: pop {r4, pc}
603+
entry:
604+
%a = insertelement <2 x i64> poison, i64 %x, i64 0
605+
%b = insertelement <2 x i64> %a, i64 %y, i64 1
606+
%c = bitcast <2 x i64> %b to <4 x i32>
607+
%i = insertelement <4 x i32> %c, i32 1, i64 0
608+
tail call void @print_uint32x4_t(i32 0, <4 x i32> %c)
609+
tail call void @print_uint32x4_t(i32 0, <4 x i32> %i)
610+
ret i32 0
611+
}

0 commit comments

Comments
 (0)