Skip to content

Commit fe6057a

Browse files
committed
[AArch64] Custom lower concat(v4i8 load, ...)
We already have custom lowering for v4i8 load, which loads as a f32, converts to a vector and bitcasts and extends the result to a v4i16. This adds some custom lowering of concat(v4i8 load, ...) to keep the result as an f32 and create a buildvector of the resulting f32 loads. This helps not create all the extends and bitcasts, which are often difficult to fully clean up. Differential Revision: https://reviews.llvm.org/D121400
1 parent f83d833 commit fe6057a

File tree

3 files changed

+146
-186
lines changed

3 files changed

+146
-186
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,6 +1108,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
11081108
setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
11091109
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
11101110
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1111+
1112+
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8, Custom);
11111113
}
11121114

11131115
if (Subtarget->hasSVE()) {
@@ -11014,6 +11016,40 @@ SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
1101411016
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
1101511017
return LowerFixedLengthConcatVectorsToSVE(Op, DAG);
1101611018

11019+
if (Op.getOperand(0).getValueType() == MVT::v4i8) {
11020+
// If we have a concat of v4i8 loads, convert them to a buildvector of f32
11021+
// loads to prevent having to go through the v4i8 load legalization that
11022+
// needs to extend each element into a larger type.
11023+
if (Op.getNumOperands() % 2 == 0 && all_of(Op->op_values(), [](SDValue V) {
11024+
return V.getValueType() == MVT::v4i8 &&
11025+
(V.getOpcode() == ISD::LOAD || V.isUndef());
11026+
})) {
11027+
EVT NVT =
11028+
EVT::getVectorVT(*DAG.getContext(), MVT::f32, Op.getNumOperands());
11029+
SmallVector<SDValue> Ops;
11030+
SDLoc DL(Op);
11031+
11032+
for (unsigned i = 0; i < Op.getNumOperands(); i++) {
11033+
SDValue V = Op.getOperand(i);
11034+
if (V.isUndef())
11035+
Ops.push_back(DAG.getUNDEF(MVT::f32));
11036+
else {
11037+
LoadSDNode *LD = cast<LoadSDNode>(V);
11038+
if (!LD->isSimple() || LD->isIndexed() ||
11039+
LD->getExtensionType() != ISD::NON_EXTLOAD)
11040+
return SDValue();
11041+
Ops.push_back(DAG.getLoad(MVT::f32, DL, LD->getChain(),
11042+
LD->getBasePtr(), LD->getMemOperand()));
11043+
}
11044+
}
11045+
return DAG.getBitcast(Op.getValueType(),
11046+
DAG.getBuildVector(NVT, DL, Ops));
11047+
}
11048+
11049+
// Let the default expansion happen
11050+
return SDValue();
11051+
}
11052+
1101711053
assert(Op.getValueType().isScalableVector() &&
1101811054
isTypeLegal(Op.getValueType()) &&
1101911055
"Expected legal scalable vector type!");
@@ -19116,8 +19152,10 @@ void AArch64TargetLowering::ReplaceNodeResults(
1911619152
ReplaceExtractSubVectorResults(N, Results, DAG);
1911719153
return;
1911819154
case ISD::INSERT_SUBVECTOR:
19119-
// Custom lowering has been requested for INSERT_SUBVECTOR -- but delegate
19120-
// to common code for result type legalisation
19155+
case ISD::CONCAT_VECTORS:
19156+
// Custom lowering has been requested for INSERT_SUBVECTOR and
19157+
// CONCAT_VECTORS -- but delegate to common code for result type
19158+
// legalisation
1912119159
return;
1912219160
case ISD::INTRINSIC_WO_CHAIN: {
1912319161
EVT VT = N->getValueType(0);

llvm/test/CodeGen/AArch64/insert-extend.ll

Lines changed: 88 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,8 @@ define <8 x i8> @load4_v4i8_add(float %tmp, <4 x i8> *%a, <4 x i8> *%b) {
55
; CHECK-LABEL: load4_v4i8_add:
66
; CHECK: // %bb.0:
77
; CHECK-NEXT: ldp s0, s1, [x0]
8-
; CHECK-NEXT: ldp s2, s3, [x1]
9-
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
10-
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
11-
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
12-
; CHECK-NEXT: ushll v3.8h, v3.8b, #0
13-
; CHECK-NEXT: uzp1 v0.8b, v0.8b, v2.8b
14-
; CHECK-NEXT: uzp1 v1.8b, v1.8b, v3.8b
8+
; CHECK-NEXT: ld1 { v0.s }[1], [x1], #4
9+
; CHECK-NEXT: ld1 { v1.s }[1], [x1]
1510
; CHECK-NEXT: add v0.8b, v0.8b, v1.8b
1611
; CHECK-NEXT: ret
1712
%la = load <4 x i8>, <4 x i8> *%a
@@ -30,13 +25,8 @@ define <8 x i16> @load4_v4i8_zext_add(float %tmp, <4 x i8> *%a, <4 x i8> *%b) {
3025
; CHECK-LABEL: load4_v4i8_zext_add:
3126
; CHECK: // %bb.0:
3227
; CHECK-NEXT: ldp s0, s1, [x0]
33-
; CHECK-NEXT: ldp s2, s3, [x1]
34-
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
35-
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
36-
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
37-
; CHECK-NEXT: ushll v3.8h, v3.8b, #0
38-
; CHECK-NEXT: uzp1 v0.8b, v0.8b, v2.8b
39-
; CHECK-NEXT: uzp1 v1.8b, v1.8b, v3.8b
28+
; CHECK-NEXT: ld1 { v0.s }[1], [x1], #4
29+
; CHECK-NEXT: ld1 { v1.s }[1], [x1]
4030
; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b
4131
; CHECK-NEXT: ret
4232
%la = load <4 x i8>, <4 x i8> *%a
@@ -59,103 +49,49 @@ define i32 @large(i8* nocapture noundef readonly %p1, i32 noundef %st1, i8* noca
5949
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
6050
; CHECK-NEXT: sxtw x8, w1
6151
; CHECK-NEXT: // kill: def $w3 killed $w3 def $x3
62-
; CHECK-NEXT: sxtw x9, w3
63-
; CHECK-NEXT: ldp s0, s1, [x0]
64-
; CHECK-NEXT: ldp s2, s3, [x2]
65-
; CHECK-NEXT: add x10, x0, x8
66-
; CHECK-NEXT: add x11, x2, x9
67-
; CHECK-NEXT: ushll v4.8h, v0.8b, #0
68-
; CHECK-NEXT: ushll v0.8h, v3.8b, #0
69-
; CHECK-NEXT: ldp s5, s3, [x10]
70-
; CHECK-NEXT: add x10, x10, x8
52+
; CHECK-NEXT: sxtw x11, w3
53+
; CHECK-NEXT: add x9, x0, x8
54+
; CHECK-NEXT: add x12, x2, x11
55+
; CHECK-NEXT: add x10, x9, x8
56+
; CHECK-NEXT: add x13, x12, x11
7157
; CHECK-NEXT: add x8, x10, x8
72-
; CHECK-NEXT: ldp s6, s7, [x11]
73-
; CHECK-NEXT: ldp s16, s17, [x10]
74-
; CHECK-NEXT: ldp s18, s21, [x8]
75-
; CHECK-NEXT: add x11, x11, x9
76-
; CHECK-NEXT: add x9, x11, x9
77-
; CHECK-NEXT: ushll v5.8h, v5.8b, #0
78-
; CHECK-NEXT: ushll v16.8h, v16.8b, #0
79-
; CHECK-NEXT: ushll v18.8h, v18.8b, #0
80-
; CHECK-NEXT: ldp s19, s20, [x11]
81-
; CHECK-NEXT: uzp1 v16.8b, v18.8b, v16.8b
82-
; CHECK-NEXT: uzp1 v4.8b, v5.8b, v4.8b
83-
; CHECK-NEXT: ldp s18, s5, [x9]
84-
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
85-
; CHECK-NEXT: ushll v6.8h, v6.8b, #0
86-
; CHECK-NEXT: ushll v19.8h, v19.8b, #0
87-
; CHECK-NEXT: ushll v18.8h, v18.8b, #0
88-
; CHECK-NEXT: uzp1 v2.8b, v6.8b, v2.8b
89-
; CHECK-NEXT: uzp1 v18.8b, v18.8b, v19.8b
58+
; CHECK-NEXT: add x11, x13, x11
59+
; CHECK-NEXT: ldp s1, s5, [x9]
60+
; CHECK-NEXT: ldp s0, s4, [x8]
61+
; CHECK-NEXT: ld1 { v0.s }[1], [x10], #4
62+
; CHECK-NEXT: ld1 { v1.s }[1], [x0], #4
63+
; CHECK-NEXT: ldp s2, s6, [x11]
64+
; CHECK-NEXT: ldp s3, s7, [x12]
65+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
9066
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
91-
; CHECK-NEXT: ushll v3.8h, v3.8b, #0
92-
; CHECK-NEXT: ushll v17.8h, v17.8b, #0
93-
; CHECK-NEXT: ushll v20.8h, v20.8b, #0
94-
; CHECK-NEXT: ushll v6.8h, v16.8b, #0
95-
; CHECK-NEXT: ushll v4.8h, v4.8b, #0
96-
; CHECK-NEXT: ushll v16.8h, v18.8b, #0
67+
; CHECK-NEXT: ld1 { v2.s }[1], [x13], #4
68+
; CHECK-NEXT: ld1 { v3.s }[1], [x2], #4
69+
; CHECK-NEXT: ld1 { v4.s }[1], [x10]
70+
; CHECK-NEXT: ld1 { v5.s }[1], [x0]
71+
; CHECK-NEXT: ld1 { v6.s }[1], [x13]
72+
; CHECK-NEXT: ld1 { v7.s }[1], [x2]
9773
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
98-
; CHECK-NEXT: ushll v19.8h, v21.8b, #0
99-
; CHECK-NEXT: ushll v5.8h, v5.8b, #0
100-
; CHECK-NEXT: ushll v7.8h, v7.8b, #0
101-
; CHECK-NEXT: usubl v18.4s, v6.4h, v16.4h
102-
; CHECK-NEXT: usubl2 v6.4s, v6.8h, v16.8h
103-
; CHECK-NEXT: usubl v16.4s, v4.4h, v2.4h
104-
; CHECK-NEXT: usubl2 v2.4s, v4.8h, v2.8h
105-
; CHECK-NEXT: uzp1 v4.8b, v19.8b, v17.8b
106-
; CHECK-NEXT: uzp1 v1.8b, v3.8b, v1.8b
107-
; CHECK-NEXT: uzp1 v3.8b, v5.8b, v20.8b
108-
; CHECK-NEXT: uzp1 v0.8b, v7.8b, v0.8b
109-
; CHECK-NEXT: ushll v4.8h, v4.8b, #0
11074
; CHECK-NEXT: ushll v3.8h, v3.8b, #0
111-
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
112-
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
113-
; CHECK-NEXT: usubl2 v5.4s, v4.8h, v3.8h
114-
; CHECK-NEXT: usubl v3.4s, v4.4h, v3.4h
115-
; CHECK-NEXT: usubl2 v4.4s, v1.8h, v0.8h
116-
; CHECK-NEXT: usubl v0.4s, v1.4h, v0.4h
117-
; CHECK-NEXT: shl v1.4s, v3.4s, #16
118-
; CHECK-NEXT: shl v3.4s, v5.4s, #16
75+
; CHECK-NEXT: usubl v16.4s, v0.4h, v2.4h
76+
; CHECK-NEXT: usubl2 v0.4s, v0.8h, v2.8h
77+
; CHECK-NEXT: usubl v2.4s, v1.4h, v3.4h
78+
; CHECK-NEXT: usubl2 v1.4s, v1.8h, v3.8h
79+
; CHECK-NEXT: ushll v3.8h, v4.8b, #0
80+
; CHECK-NEXT: ushll v4.8h, v5.8b, #0
81+
; CHECK-NEXT: ushll v5.8h, v6.8b, #0
82+
; CHECK-NEXT: ushll v6.8h, v7.8b, #0
83+
; CHECK-NEXT: usubl2 v7.4s, v3.8h, v5.8h
84+
; CHECK-NEXT: usubl v3.4s, v3.4h, v5.4h
85+
; CHECK-NEXT: usubl2 v5.4s, v4.8h, v6.8h
86+
; CHECK-NEXT: usubl v4.4s, v4.4h, v6.4h
87+
; CHECK-NEXT: shl v3.4s, v3.4s, #16
88+
; CHECK-NEXT: shl v6.4s, v7.4s, #16
89+
; CHECK-NEXT: shl v5.4s, v5.4s, #16
11990
; CHECK-NEXT: shl v4.4s, v4.4s, #16
120-
; CHECK-NEXT: add v1.4s, v1.4s, v18.4s
121-
; CHECK-NEXT: shl v0.4s, v0.4s, #16
122-
; CHECK-NEXT: add v3.4s, v3.4s, v6.4s
91+
; CHECK-NEXT: add v0.4s, v6.4s, v0.4s
92+
; CHECK-NEXT: add v3.4s, v3.4s, v16.4s
93+
; CHECK-NEXT: add v1.4s, v5.4s, v1.4s
12394
; CHECK-NEXT: add v2.4s, v4.4s, v2.4s
124-
; CHECK-NEXT: rev64 v4.4s, v3.4s
125-
; CHECK-NEXT: rev64 v5.4s, v1.4s
126-
; CHECK-NEXT: add v0.4s, v0.4s, v16.4s
127-
; CHECK-NEXT: rev64 v6.4s, v2.4s
128-
; CHECK-NEXT: rev64 v7.4s, v0.4s
129-
; CHECK-NEXT: add v16.4s, v3.4s, v4.4s
130-
; CHECK-NEXT: add v17.4s, v1.4s, v5.4s
131-
; CHECK-NEXT: sub v1.4s, v1.4s, v5.4s
132-
; CHECK-NEXT: trn2 v5.4s, v16.4s, v17.4s
133-
; CHECK-NEXT: add v18.4s, v2.4s, v6.4s
134-
; CHECK-NEXT: add v19.4s, v0.4s, v7.4s
135-
; CHECK-NEXT: sub v2.4s, v2.4s, v6.4s
136-
; CHECK-NEXT: sub v0.4s, v0.4s, v7.4s
137-
; CHECK-NEXT: sub v3.4s, v3.4s, v4.4s
138-
; CHECK-NEXT: trn2 v4.4s, v19.4s, v18.4s
139-
; CHECK-NEXT: ext v6.16b, v5.16b, v16.16b, #8
140-
; CHECK-NEXT: zip1 v7.4s, v0.4s, v2.4s
141-
; CHECK-NEXT: trn2 v16.4s, v17.4s, v16.4s
142-
; CHECK-NEXT: ext v4.16b, v19.16b, v4.16b, #8
143-
; CHECK-NEXT: zip1 v20.4s, v3.4s, v1.4s
144-
; CHECK-NEXT: ext v7.16b, v0.16b, v7.16b, #8
145-
; CHECK-NEXT: ext v17.16b, v16.16b, v17.16b, #8
146-
; CHECK-NEXT: zip2 v18.4s, v19.4s, v18.4s
147-
; CHECK-NEXT: zip2 v1.4s, v3.4s, v1.4s
148-
; CHECK-NEXT: mov v0.s[3], v2.s[2]
149-
; CHECK-NEXT: mov v5.d[1], v4.d[1]
150-
; CHECK-NEXT: mov v20.d[1], v7.d[1]
151-
; CHECK-NEXT: mov v17.d[1], v18.d[1]
152-
; CHECK-NEXT: mov v16.d[1], v4.d[1]
153-
; CHECK-NEXT: mov v1.d[1], v0.d[1]
154-
; CHECK-NEXT: mov v6.d[1], v18.d[1]
155-
; CHECK-NEXT: add v0.4s, v17.4s, v16.4s
156-
; CHECK-NEXT: add v2.4s, v1.4s, v20.4s
157-
; CHECK-NEXT: sub v3.4s, v5.4s, v6.4s
158-
; CHECK-NEXT: sub v1.4s, v20.4s, v1.4s
15995
; CHECK-NEXT: rev64 v4.4s, v0.4s
16096
; CHECK-NEXT: rev64 v5.4s, v3.4s
16197
; CHECK-NEXT: rev64 v6.4s, v1.4s
@@ -164,43 +100,77 @@ define i32 @large(i8* nocapture noundef readonly %p1, i32 noundef %st1, i8* noca
164100
; CHECK-NEXT: add v17.4s, v3.4s, v5.4s
165101
; CHECK-NEXT: add v18.4s, v1.4s, v6.4s
166102
; CHECK-NEXT: add v19.4s, v2.4s, v7.4s
167-
; CHECK-NEXT: sub v2.4s, v2.4s, v7.4s
168103
; CHECK-NEXT: sub v1.4s, v1.4s, v6.4s
169-
; CHECK-NEXT: sub v3.4s, v3.4s, v5.4s
104+
; CHECK-NEXT: sub v2.4s, v2.4s, v7.4s
170105
; CHECK-NEXT: sub v0.4s, v0.4s, v4.4s
106+
; CHECK-NEXT: sub v3.4s, v3.4s, v5.4s
107+
; CHECK-NEXT: trn2 v4.4s, v16.4s, v17.4s
108+
; CHECK-NEXT: trn2 v5.4s, v19.4s, v18.4s
109+
; CHECK-NEXT: zip1 v7.4s, v2.4s, v1.4s
110+
; CHECK-NEXT: trn2 v20.4s, v17.4s, v16.4s
111+
; CHECK-NEXT: zip1 v6.4s, v0.4s, v3.4s
112+
; CHECK-NEXT: zip2 v18.4s, v19.4s, v18.4s
113+
; CHECK-NEXT: ext v5.16b, v19.16b, v5.16b, #8
114+
; CHECK-NEXT: ext v16.16b, v4.16b, v16.16b, #8
115+
; CHECK-NEXT: ext v7.16b, v2.16b, v7.16b, #8
116+
; CHECK-NEXT: ext v17.16b, v20.16b, v17.16b, #8
117+
; CHECK-NEXT: zip2 v0.4s, v0.4s, v3.4s
118+
; CHECK-NEXT: mov v2.s[3], v1.s[2]
119+
; CHECK-NEXT: mov v4.d[1], v5.d[1]
120+
; CHECK-NEXT: mov v6.d[1], v7.d[1]
121+
; CHECK-NEXT: mov v17.d[1], v18.d[1]
122+
; CHECK-NEXT: mov v20.d[1], v5.d[1]
123+
; CHECK-NEXT: mov v0.d[1], v2.d[1]
124+
; CHECK-NEXT: mov v16.d[1], v18.d[1]
125+
; CHECK-NEXT: add v1.4s, v17.4s, v20.4s
126+
; CHECK-NEXT: add v2.4s, v0.4s, v6.4s
127+
; CHECK-NEXT: sub v3.4s, v4.4s, v16.4s
128+
; CHECK-NEXT: sub v0.4s, v6.4s, v0.4s
129+
; CHECK-NEXT: rev64 v4.4s, v1.4s
130+
; CHECK-NEXT: rev64 v5.4s, v3.4s
131+
; CHECK-NEXT: rev64 v6.4s, v0.4s
132+
; CHECK-NEXT: rev64 v7.4s, v2.4s
133+
; CHECK-NEXT: add v16.4s, v1.4s, v4.4s
134+
; CHECK-NEXT: add v17.4s, v3.4s, v5.4s
135+
; CHECK-NEXT: add v18.4s, v0.4s, v6.4s
136+
; CHECK-NEXT: add v19.4s, v2.4s, v7.4s
137+
; CHECK-NEXT: sub v2.4s, v2.4s, v7.4s
138+
; CHECK-NEXT: sub v0.4s, v0.4s, v6.4s
139+
; CHECK-NEXT: sub v3.4s, v3.4s, v5.4s
140+
; CHECK-NEXT: sub v1.4s, v1.4s, v4.4s
171141
; CHECK-NEXT: ext v4.16b, v2.16b, v19.16b, #12
172-
; CHECK-NEXT: ext v5.16b, v1.16b, v18.16b, #12
142+
; CHECK-NEXT: ext v5.16b, v0.16b, v18.16b, #12
173143
; CHECK-NEXT: ext v7.16b, v3.16b, v17.16b, #12
174144
; CHECK-NEXT: rev64 v16.4s, v16.4s
175145
; CHECK-NEXT: ext v6.16b, v4.16b, v2.16b, #4
176146
; CHECK-NEXT: ext v17.16b, v4.16b, v4.16b, #8
177-
; CHECK-NEXT: ext v18.16b, v5.16b, v1.16b, #4
147+
; CHECK-NEXT: ext v18.16b, v5.16b, v0.16b, #4
178148
; CHECK-NEXT: ext v19.16b, v5.16b, v5.16b, #8
179149
; CHECK-NEXT: ext v20.16b, v7.16b, v3.16b, #4
180150
; CHECK-NEXT: ext v21.16b, v7.16b, v7.16b, #8
181151
; CHECK-NEXT: rev64 v7.4s, v7.4s
182-
; CHECK-NEXT: trn2 v0.4s, v16.4s, v0.4s
152+
; CHECK-NEXT: trn2 v1.4s, v16.4s, v1.4s
183153
; CHECK-NEXT: rev64 v5.4s, v5.4s
184154
; CHECK-NEXT: rev64 v4.4s, v4.4s
185155
; CHECK-NEXT: ext v6.16b, v6.16b, v17.16b, #12
186156
; CHECK-NEXT: ext v17.16b, v18.16b, v19.16b, #12
187157
; CHECK-NEXT: ext v18.16b, v20.16b, v21.16b, #12
188158
; CHECK-NEXT: ext v3.16b, v7.16b, v3.16b, #4
189-
; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8
190-
; CHECK-NEXT: ext v1.16b, v5.16b, v1.16b, #4
159+
; CHECK-NEXT: ext v7.16b, v1.16b, v1.16b, #8
160+
; CHECK-NEXT: ext v0.16b, v5.16b, v0.16b, #4
191161
; CHECK-NEXT: ext v2.16b, v4.16b, v2.16b, #4
192162
; CHECK-NEXT: add v4.4s, v18.4s, v3.4s
193-
; CHECK-NEXT: add v5.4s, v0.4s, v7.4s
194-
; CHECK-NEXT: add v16.4s, v17.4s, v1.4s
163+
; CHECK-NEXT: add v5.4s, v1.4s, v7.4s
164+
; CHECK-NEXT: add v16.4s, v17.4s, v0.4s
195165
; CHECK-NEXT: add v19.4s, v6.4s, v2.4s
196166
; CHECK-NEXT: sub v3.4s, v18.4s, v3.4s
197-
; CHECK-NEXT: sub v0.4s, v0.4s, v7.4s
167+
; CHECK-NEXT: sub v1.4s, v1.4s, v7.4s
198168
; CHECK-NEXT: sub v2.4s, v6.4s, v2.4s
199-
; CHECK-NEXT: sub v1.4s, v17.4s, v1.4s
169+
; CHECK-NEXT: sub v0.4s, v17.4s, v0.4s
200170
; CHECK-NEXT: mov v19.d[1], v2.d[1]
201-
; CHECK-NEXT: mov v16.d[1], v1.d[1]
171+
; CHECK-NEXT: mov v16.d[1], v0.d[1]
202172
; CHECK-NEXT: mov v4.d[1], v3.d[1]
203-
; CHECK-NEXT: mov v5.d[1], v0.d[1]
173+
; CHECK-NEXT: mov v5.d[1], v1.d[1]
204174
; CHECK-NEXT: movi v0.8h, #1
205175
; CHECK-NEXT: movi v7.2d, #0x00ffff0000ffff
206176
; CHECK-NEXT: ushr v1.4s, v4.4s, #15

0 commit comments

Comments
 (0)