[ARM] Fix VMOVRRD combine with non-canonical inserts. #109639

davemgreen · 2024-09-23T09:21:17Z

In some situations, in the test case here with the multiple calls being late legalized, we can see inserts of the form:

     b = insert a, x, 0
    c = insert b, y, 1
   d = insert c, z, 0
  bc = bitcast d
 e = extract bc, 0
r = vmovrrd e

The redundant insert will usually be removed, but in some cases are not prior to PerformVMOVRRDCombine. The code was finding the first insert from each lane (x and y), as opposed to the last (z and y).

In rare situations, in the test case here with the multiple calls being late legalized, we can see inserts of the form: ``` b = insert a, x, 0 c = insert b, y, 1 d = insert c, z, 0 bc = bitcast d e = extract bc, 0 r = vmovrrd e ``` The redundent insert will usually be removed, but in some cases are not prior to PerformVMOVRRDCombine. The code was finding the last insert from each lane (b and c), as opposed to the last (d and c).

llvmbot · 2024-09-23T09:21:50Z

@llvm/pr-subscribers-backend-arm

Author: David Green (davemgreen)

Changes

In some situations, in the test case here with the multiple calls being late legalized, we can see inserts of the form:

     b = insert a, x, 0
    c = insert b, y, 1
   d = insert c, z, 0
  bc = bitcast d
 e = extract bc, 0
r = vmovrrd e

The redundent insert will usually be removed, but in some cases are not prior to PerformVMOVRRDCombine. The code was finding the last insert from each lane (x and y), as opposed to the last (z and y).

Full diff: https://github.com/llvm/llvm-project/pull/109639.diff

2 Files Affected:

(modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (+2-2)
(modified) llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll (+66)

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index db564d7e841aa9..a03928b618df03 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -15131,9 +15131,9 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
     SDValue Op0, Op1;
     while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
       if (isa<ConstantSDNode>(BV.getOperand(2))) {
-        if (BV.getConstantOperandVal(2) == Offset)
+        if (BV.getConstantOperandVal(2) == Offset && !Op0)
           Op0 = BV.getOperand(1);
-        if (BV.getConstantOperandVal(2) == Offset + 1)
+        if (BV.getConstantOperandVal(2) == Offset + 1 && !Op1)
           Op1 = BV.getOperand(1);
       }
       BV = BV.getOperand(0);
diff --git a/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll b/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll
index 4b76906034057f..41d2c02a73cd0a 100644
--- a/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll
@@ -543,3 +543,69 @@ define <4 x i32> @insertextract(i32 %x, i32 %y) {
   %4 = insertelement <4 x i32> %3, i32 %y, i32 3
   ret <4 x i32> %4
 }
+
+declare void @print_uint32x4_t(<4 x i32> %val)
+define i32 @main(i64 %x, i64 %y) {
+; CHECK-LE-LABEL: main:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    .save {r4, lr}
+; CHECK-LE-NEXT:    push {r4, lr}
+; CHECK-LE-NEXT:    .vsave {d8, d9}
+; CHECK-LE-NEXT:    vpush {d8, d9}
+; CHECK-LE-NEXT:    .pad #8
+; CHECK-LE-NEXT:    sub sp, #8
+; CHECK-LE-NEXT:    vmov.32 q4[2], r2
+; CHECK-LE-NEXT:    mov r4, r1
+; CHECK-LE-NEXT:    mov r1, r0
+; CHECK-LE-NEXT:    vmov.32 q4[3], r3
+; CHECK-LE-NEXT:    movs r0, #0
+; CHECK-LE-NEXT:    mov r2, r1
+; CHECK-LE-NEXT:    mov r3, r4
+; CHECK-LE-NEXT:    vstr d9, [sp]
+; CHECK-LE-NEXT:    bl print_uint32x4_t
+; CHECK-LE-NEXT:    movs r0, #0
+; CHECK-LE-NEXT:    movs r2, #1
+; CHECK-LE-NEXT:    mov r3, r4
+; CHECK-LE-NEXT:    vstr d9, [sp]
+; CHECK-LE-NEXT:    bl print_uint32x4_t
+; CHECK-LE-NEXT:    movs r0, #0
+; CHECK-LE-NEXT:    add sp, #8
+; CHECK-LE-NEXT:    vpop {d8, d9}
+; CHECK-LE-NEXT:    pop {r4, pc}
+;
+; CHECK-BE-LABEL: main:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    .save {r4, lr}
+; CHECK-BE-NEXT:    push {r4, lr}
+; CHECK-BE-NEXT:    .vsave {d8, d9}
+; CHECK-BE-NEXT:    vpush {d8, d9}
+; CHECK-BE-NEXT:    .pad #8
+; CHECK-BE-NEXT:    sub sp, #8
+; CHECK-BE-NEXT:    vmov.32 q0[2], r2
+; CHECK-BE-NEXT:    mov r4, r1
+; CHECK-BE-NEXT:    mov r1, r0
+; CHECK-BE-NEXT:    vmov.32 q0[3], r3
+; CHECK-BE-NEXT:    vrev64.32 q4, q0
+; CHECK-BE-NEXT:    movs r0, #0
+; CHECK-BE-NEXT:    mov r2, r1
+; CHECK-BE-NEXT:    mov r3, r4
+; CHECK-BE-NEXT:    vstr d9, [sp]
+; CHECK-BE-NEXT:    bl print_uint32x4_t
+; CHECK-BE-NEXT:    movs r0, #0
+; CHECK-BE-NEXT:    movs r2, #1
+; CHECK-BE-NEXT:    mov r3, r4
+; CHECK-BE-NEXT:    vstr d9, [sp]
+; CHECK-BE-NEXT:    bl print_uint32x4_t
+; CHECK-BE-NEXT:    movs r0, #0
+; CHECK-BE-NEXT:    add sp, #8
+; CHECK-BE-NEXT:    vpop {d8, d9}
+; CHECK-BE-NEXT:    pop {r4, pc}
+entry:
+  %a = insertelement <2 x i64> poison, i64 %x, i64 0
+  %b = insertelement <2 x i64> %a, i64 %y, i64 1
+  %c = bitcast <2 x i64> %b to <4 x i32>
+  %i = insertelement <4 x i32> %c, i32 1, i64 0
+  tail call void @print_uint32x4_t(i32 0, <4 x i32> %c)
+  tail call void @print_uint32x4_t(i32 0, <4 x i32> %i)
+  ret i32 0
+}

ostannard

Nit: One of the "last"s in the commit message should be "first".

LGTM

davemgreen · 2024-09-24T07:10:39Z

Thanks

davemgreen requested review from ostannard, statham-arm and john-brawn-arm September 23, 2024 09:21

llvmbot added the backend:ARM label Sep 23, 2024

ostannard approved these changes Sep 23, 2024

View reviewed changes

davemgreen merged commit 78ff736 into llvm:main Sep 24, 2024
10 checks passed

davemgreen deleted the gh-mve-vmovrrdinsert branch September 24, 2024 07:10

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[ARM] Fix VMOVRRD combine with non-canonical inserts. #109639

[ARM] Fix VMOVRRD combine with non-canonical inserts. #109639

Uh oh!

davemgreen commented Sep 23, 2024 •

edited

Loading

Uh oh!

llvmbot commented Sep 23, 2024

Uh oh!

ostannard left a comment

Uh oh!

davemgreen commented Sep 24, 2024

Uh oh!

Uh oh!

Uh oh!

[ARM] Fix VMOVRRD combine with non-canonical inserts. #109639

[ARM] Fix VMOVRRD combine with non-canonical inserts. #109639

Uh oh!

Conversation

davemgreen commented Sep 23, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Sep 23, 2024

Uh oh!

ostannard left a comment

Choose a reason for hiding this comment

Uh oh!

davemgreen commented Sep 24, 2024

Uh oh!

Uh oh!

Uh oh!

davemgreen commented Sep 23, 2024 •

edited

Loading