Skip to content

Commit 6ab792b

Browse files
committed
[ARM] Simplify extract of VMOVDRR
Under SoftFP calling conventions, we can be left with extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) patterns that can simplify to a or b, depending on the extract lane. Differential Revision: https://reviews.llvm.org/D94990
1 parent 4648098 commit 6ab792b

File tree

2 files changed

+54
-58
lines changed

2 files changed

+54
-58
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13949,7 +13949,8 @@ static SDValue PerformInsertEltCombine(SDNode *N,
1394913949
}
1395013950

1395113951
static SDValue PerformExtractEltCombine(SDNode *N,
13952-
TargetLowering::DAGCombinerInfo &DCI) {
13952+
TargetLowering::DAGCombinerInfo &DCI,
13953+
const ARMSubtarget *ST) {
1395313954
SDValue Op0 = N->getOperand(0);
1395413955
EVT VT = N->getValueType(0);
1395513956
SDLoc dl(N);
@@ -13968,6 +13969,19 @@ static SDValue PerformExtractEltCombine(SDNode *N,
1396813969
return X;
1396913970
}
1397013971

13972+
// extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
13973+
if (Op0.getValueType() == MVT::v4i32 &&
13974+
isa<ConstantSDNode>(N->getOperand(1)) &&
13975+
Op0.getOpcode() == ISD::BITCAST &&
13976+
Op0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
13977+
Op0.getOperand(0).getValueType() == MVT::v2f64) {
13978+
SDValue BV = Op0.getOperand(0);
13979+
unsigned Offset = N->getConstantOperandVal(1);
13980+
SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);
13981+
if (MOV.getOpcode() == ARMISD::VMOVDRR)
13982+
return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
13983+
}
13984+
1397113985
return SDValue();
1397213986
}
1397313987

@@ -16340,7 +16354,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
1634016354
case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
1634116355
case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
1634216356
case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
16343-
case ISD::EXTRACT_VECTOR_ELT: return PerformExtractEltCombine(N, DCI);
16357+
case ISD::EXTRACT_VECTOR_ELT:
16358+
return PerformExtractEltCombine(N, DCI, Subtarget);
1634416359
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
1634516360
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
1634616361
case ARMISD::VDUP: return PerformVDUPCombine(N, DCI, Subtarget);

llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll

Lines changed: 37 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -95,51 +95,38 @@ define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) {
9595
; CHECK-MVE: @ %bb.0: @ %entry
9696
; CHECK-MVE-NEXT: .save {r7, lr}
9797
; CHECK-MVE-NEXT: push {r7, lr}
98-
; CHECK-MVE-NEXT: vmov d1, r2, r3
99-
; CHECK-MVE-NEXT: add r2, sp, #8
100-
; CHECK-MVE-NEXT: vldrw.u32 q1, [r2]
101-
; CHECK-MVE-NEXT: vmov d0, r0, r1
102-
; CHECK-MVE-NEXT: vmov r1, s2
103-
; CHECK-MVE-NEXT: vmov r3, s6
104-
; CHECK-MVE-NEXT: vmov r0, s3
105-
; CHECK-MVE-NEXT: vmov r2, s7
106-
; CHECK-MVE-NEXT: adds.w lr, r1, r3
98+
; CHECK-MVE-NEXT: add.w r12, sp, #8
99+
; CHECK-MVE-NEXT: vldrw.u32 q0, [r12]
100+
; CHECK-MVE-NEXT: vmov lr, s2
101+
; CHECK-MVE-NEXT: vmov r12, s3
102+
; CHECK-MVE-NEXT: adds.w r2, r2, lr
103+
; CHECK-MVE-NEXT: adc.w r12, r12, r3
107104
; CHECK-MVE-NEXT: vmov r3, s0
108-
; CHECK-MVE-NEXT: vmov r1, s4
109-
; CHECK-MVE-NEXT: adc.w r12, r0, r2
110-
; CHECK-MVE-NEXT: vmov r2, s1
111-
; CHECK-MVE-NEXT: vmov r0, s5
112-
; CHECK-MVE-NEXT: adds r1, r1, r3
113-
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, lr
114-
; CHECK-MVE-NEXT: adcs r0, r2
115-
; CHECK-MVE-NEXT: vmov q0[3], q0[1], r0, r12
116-
; CHECK-MVE-NEXT: vmov r0, r1, d0
117-
; CHECK-MVE-NEXT: vmov r2, r3, d1
105+
; CHECK-MVE-NEXT: adds r0, r0, r3
106+
; CHECK-MVE-NEXT: vmov q1[2], q1[0], r0, r2
107+
; CHECK-MVE-NEXT: vmov r0, s1
108+
; CHECK-MVE-NEXT: adcs r0, r1
109+
; CHECK-MVE-NEXT: vmov q1[3], q1[1], r0, r12
110+
; CHECK-MVE-NEXT: vmov r0, r1, d2
111+
; CHECK-MVE-NEXT: vmov r2, r3, d3
118112
; CHECK-MVE-NEXT: pop {r7, pc}
119113
;
120114
; CHECK-BE-LABEL: vector_add_i64:
121115
; CHECK-BE: @ %bb.0: @ %entry
122116
; CHECK-BE-NEXT: .save {r7, lr}
123117
; CHECK-BE-NEXT: push {r7, lr}
124-
; CHECK-BE-NEXT: vmov d1, r3, r2
125-
; CHECK-BE-NEXT: add r2, sp, #8
126-
; CHECK-BE-NEXT: vmov d0, r1, r0
127-
; CHECK-BE-NEXT: vrev64.32 q1, q0
128-
; CHECK-BE-NEXT: vldrw.u32 q0, [r2]
129-
; CHECK-BE-NEXT: vmov r1, s7
130-
; CHECK-BE-NEXT: vmov r3, s3
131-
; CHECK-BE-NEXT: vmov r0, s6
132-
; CHECK-BE-NEXT: vmov r2, s2
133-
; CHECK-BE-NEXT: adds.w r12, r1, r3
134-
; CHECK-BE-NEXT: vmov r3, s5
135-
; CHECK-BE-NEXT: vmov r1, s0
136-
; CHECK-BE-NEXT: adc.w lr, r0, r2
137-
; CHECK-BE-NEXT: vmov r0, s1
138-
; CHECK-BE-NEXT: vmov r2, s4
139-
; CHECK-BE-NEXT: adds r0, r0, r3
140-
; CHECK-BE-NEXT: adcs r1, r2
141-
; CHECK-BE-NEXT: vmov q0[2], q0[0], r1, lr
142-
; CHECK-BE-NEXT: vmov q0[3], q0[1], r0, r12
118+
; CHECK-BE-NEXT: add.w r12, sp, #8
119+
; CHECK-BE-NEXT: vldrw.u32 q0, [r12]
120+
; CHECK-BE-NEXT: vmov lr, s3
121+
; CHECK-BE-NEXT: vmov r12, s2
122+
; CHECK-BE-NEXT: adds.w lr, lr, r3
123+
; CHECK-BE-NEXT: vmov r3, s0
124+
; CHECK-BE-NEXT: adc.w r12, r12, r2
125+
; CHECK-BE-NEXT: vmov r2, s1
126+
; CHECK-BE-NEXT: adds r1, r1, r2
127+
; CHECK-BE-NEXT: adcs r0, r3
128+
; CHECK-BE-NEXT: vmov q0[2], q0[0], r0, r12
129+
; CHECK-BE-NEXT: vmov q0[3], q0[1], r1, lr
143130
; CHECK-BE-NEXT: vrev64.32 q1, q0
144131
; CHECK-BE-NEXT: vmov r1, r0, d2
145132
; CHECK-BE-NEXT: vmov r3, r2, d3
@@ -149,24 +136,18 @@ define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) {
149136
; CHECK-FP: @ %bb.0: @ %entry
150137
; CHECK-FP-NEXT: .save {r7, lr}
151138
; CHECK-FP-NEXT: push {r7, lr}
152-
; CHECK-FP-NEXT: vmov d1, r2, r3
153-
; CHECK-FP-NEXT: vmov d0, r0, r1
154-
; CHECK-FP-NEXT: add r0, sp, #8
155-
; CHECK-FP-NEXT: vldrw.u32 q1, [r0]
156-
; CHECK-FP-NEXT: vmov r1, s2
157-
; CHECK-FP-NEXT: vmov r0, s3
158-
; CHECK-FP-NEXT: vmov r3, s6
159-
; CHECK-FP-NEXT: vmov r2, s7
160-
; CHECK-FP-NEXT: adds.w lr, r1, r3
161-
; CHECK-FP-NEXT: vmov r3, s0
162-
; CHECK-FP-NEXT: vmov r1, s4
163-
; CHECK-FP-NEXT: adc.w r12, r0, r2
164-
; CHECK-FP-NEXT: vmov r2, s1
165-
; CHECK-FP-NEXT: vmov r0, s5
166-
; CHECK-FP-NEXT: adds r1, r1, r3
167-
; CHECK-FP-NEXT: vmov q0[2], q0[0], r1, lr
168-
; CHECK-FP-NEXT: adcs r0, r2
169-
; CHECK-FP-NEXT: vmov q0[3], q0[1], r0, r12
139+
; CHECK-FP-NEXT: add.w r12, sp, #8
140+
; CHECK-FP-NEXT: vldrw.u32 q0, [r12]
141+
; CHECK-FP-NEXT: vmov lr, s2
142+
; CHECK-FP-NEXT: vmov r12, s3
143+
; CHECK-FP-NEXT: adds.w lr, lr, r2
144+
; CHECK-FP-NEXT: vmov r2, s0
145+
; CHECK-FP-NEXT: adc.w r12, r12, r3
146+
; CHECK-FP-NEXT: vmov r3, s1
147+
; CHECK-FP-NEXT: adds r0, r0, r2
148+
; CHECK-FP-NEXT: adcs r1, r3
149+
; CHECK-FP-NEXT: vmov q0[2], q0[0], r0, lr
150+
; CHECK-FP-NEXT: vmov q0[3], q0[1], r1, r12
170151
; CHECK-FP-NEXT: vmov r0, r1, d0
171152
; CHECK-FP-NEXT: vmov r2, r3, d1
172153
; CHECK-FP-NEXT: pop {r7, pc}

0 commit comments

Comments
 (0)