Skip to content

Commit 3242e77

Browse files
authored
[ARM][Codegen] Fix vector data miscompilation in arm32be (#105519)
Fix #102418, resolved the issue of generating incorrect vrev during vectorization in big-endian scenarios
1 parent fe6c025 commit 3242e77

File tree

10 files changed

+147
-124
lines changed

10 files changed

+147
-124
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7123,19 +7123,6 @@ static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
71237123
ImmMask <<= 1;
71247124
}
71257125

7126-
if (DAG.getDataLayout().isBigEndian()) {
7127-
// Reverse the order of elements within the vector.
7128-
unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
7129-
unsigned Mask = (1 << BytesPerElem) - 1;
7130-
unsigned NumElems = 8 / BytesPerElem;
7131-
unsigned NewImm = 0;
7132-
for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
7133-
unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
7134-
NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
7135-
}
7136-
Imm = NewImm;
7137-
}
7138-
71397126
// Op=1, Cmode=1110.
71407127
OpCmode = 0x1e;
71417128
VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
@@ -7968,7 +7955,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
79687955

79697956
if (Val.getNode()) {
79707957
SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
7971-
return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7958+
return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
79727959
}
79737960

79747961
// Try an immediate VMVN.
@@ -7978,7 +7965,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
79787965
VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
79797966
if (Val.getNode()) {
79807967
SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
7981-
return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7968+
return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
79827969
}
79837970

79847971
// Use vmov.f32 to materialize other v2f32 and v4f32 splats.
@@ -18606,7 +18593,9 @@ static SDValue PerformBITCASTCombine(SDNode *N,
1860618593

1860718594
// We may have a bitcast of something that has already had this bitcast
1860818595
// combine performed on it, so skip past any VECTOR_REG_CASTs.
18609-
while (Src.getOpcode() == ARMISD::VECTOR_REG_CAST)
18596+
if (Src.getOpcode() == ARMISD::VECTOR_REG_CAST &&
18597+
Src.getOperand(0).getValueType().getScalarSizeInBits() <=
18598+
Src.getValueType().getScalarSizeInBits())
1861018599
Src = Src.getOperand(0);
1861118600

1861218601
// Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that

llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,8 @@ define void @conv_v4i16_to_v4f16( <4 x i16> %a, ptr %store ) {
101101
; CHECK-NEXT: vmov.i64 d16, #0xffff00000000ffff
102102
; CHECK-NEXT: vldr d17, [r0]
103103
; CHECK-NEXT: vrev64.16 d18, d0
104-
; CHECK-NEXT: vrev64.16 d17, d17
105-
; CHECK-NEXT: vrev64.16 d16, d16
106104
; CHECK-NEXT: vadd.i16 d16, d18, d16
105+
; CHECK-NEXT: vrev64.16 d17, d17
107106
; CHECK-NEXT: vadd.f16 d16, d16, d17
108107
; CHECK-NEXT: vrev64.16 d16, d16
109108
; CHECK-NEXT: vstr d16, [r0]

llvm/test/CodeGen/ARM/big-endian-vmov.ll

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ define arm_aapcs_vfpcc <8 x i8> @vmov_i8() {
1010
;
1111
; CHECK-BE-LABEL: vmov_i8:
1212
; CHECK-BE: @ %bb.0:
13-
; CHECK-BE-NEXT: vmov.i64 d0, #0xff
13+
; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000000000
14+
; CHECK-BE-NEXT: vrev64.8 d0, d16
1415
; CHECK-BE-NEXT: bx lr
1516
ret <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1>
1617
}
@@ -23,7 +24,8 @@ define arm_aapcs_vfpcc <4 x i16> @vmov_i16_a() {
2324
;
2425
; CHECK-BE-LABEL: vmov_i16_a:
2526
; CHECK-BE: @ %bb.0:
26-
; CHECK-BE-NEXT: vmov.i64 d0, #0xffff
27+
; CHECK-BE-NEXT: vmov.i64 d16, #0xffff000000000000
28+
; CHECK-BE-NEXT: vrev64.16 d0, d16
2729
; CHECK-BE-NEXT: bx lr
2830
ret <4 x i16> <i16 0, i16 0, i16 0, i16 -1>
2931
}
@@ -36,7 +38,8 @@ define arm_aapcs_vfpcc <4 x i16> @vmov_i16_b() {
3638
;
3739
; CHECK-BE-LABEL: vmov_i16_b:
3840
; CHECK-BE: @ %bb.0:
39-
; CHECK-BE-NEXT: vmov.i64 d0, #0xff
41+
; CHECK-BE-NEXT: vmov.i64 d16, #0xff000000000000
42+
; CHECK-BE-NEXT: vrev64.16 d0, d16
4043
; CHECK-BE-NEXT: bx lr
4144
ret <4 x i16> <i16 0, i16 0, i16 0, i16 255>
4245
}
@@ -49,7 +52,8 @@ define arm_aapcs_vfpcc <4 x i16> @vmov_i16_c() {
4952
;
5053
; CHECK-BE-LABEL: vmov_i16_c:
5154
; CHECK-BE: @ %bb.0:
52-
; CHECK-BE-NEXT: vmov.i64 d0, #0xff00
55+
; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000000000
56+
; CHECK-BE-NEXT: vrev64.16 d0, d16
5357
; CHECK-BE-NEXT: bx lr
5458
ret <4 x i16> <i16 0, i16 0, i16 0, i16 65280>
5559
}
@@ -62,7 +66,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_a() {
6266
;
6367
; CHECK-BE-LABEL: vmov_i32_a:
6468
; CHECK-BE: @ %bb.0:
65-
; CHECK-BE-NEXT: vmov.i64 d0, #0xffffffff
69+
; CHECK-BE-NEXT: vmov.i64 d16, #0xffffffff00000000
70+
; CHECK-BE-NEXT: vrev64.32 d0, d16
6671
; CHECK-BE-NEXT: bx lr
6772
ret <2 x i32> <i32 0, i32 -1>
6873
}
@@ -75,7 +80,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_b() {
7580
;
7681
; CHECK-BE-LABEL: vmov_i32_b:
7782
; CHECK-BE: @ %bb.0:
78-
; CHECK-BE-NEXT: vmov.i64 d0, #0xff
83+
; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000
84+
; CHECK-BE-NEXT: vrev64.32 d0, d16
7985
; CHECK-BE-NEXT: bx lr
8086
ret <2 x i32> <i32 0, i32 255>
8187
}
@@ -88,7 +94,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_c() {
8894
;
8995
; CHECK-BE-LABEL: vmov_i32_c:
9096
; CHECK-BE: @ %bb.0:
91-
; CHECK-BE-NEXT: vmov.i64 d0, #0xff00
97+
; CHECK-BE-NEXT: vmov.i64 d16, #0xff0000000000
98+
; CHECK-BE-NEXT: vrev64.32 d0, d16
9299
; CHECK-BE-NEXT: bx lr
93100
ret <2 x i32> <i32 0, i32 65280>
94101
}
@@ -101,7 +108,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_d() {
101108
;
102109
; CHECK-BE-LABEL: vmov_i32_d:
103110
; CHECK-BE: @ %bb.0:
104-
; CHECK-BE-NEXT: vmov.i64 d0, #0xff0000
111+
; CHECK-BE-NEXT: vmov.i64 d16, #0xff000000000000
112+
; CHECK-BE-NEXT: vrev64.32 d0, d16
105113
; CHECK-BE-NEXT: bx lr
106114
ret <2 x i32> <i32 0, i32 16711680>
107115
}
@@ -114,7 +122,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_e() {
114122
;
115123
; CHECK-BE-LABEL: vmov_i32_e:
116124
; CHECK-BE: @ %bb.0:
117-
; CHECK-BE-NEXT: vmov.i64 d0, #0xff000000
125+
; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000000000
126+
; CHECK-BE-NEXT: vrev64.32 d0, d16
118127
; CHECK-BE-NEXT: bx lr
119128
ret <2 x i32> <i32 0, i32 4278190080>
120129
}
@@ -128,10 +137,16 @@ define arm_aapcs_vfpcc <1 x i64> @vmov_i64_a() {
128137
}
129138

130139
define arm_aapcs_vfpcc <1 x i64> @vmov_i64_b() {
131-
; CHECK-LABEL: vmov_i64_b:
132-
; CHECK: @ %bb.0:
133-
; CHECK-NEXT: vmov.i64 d0, #0xffff00ff0000ff
134-
; CHECK-NEXT: bx lr
140+
; CHECK-LE-LABEL: vmov_i64_b:
141+
; CHECK-LE: @ %bb.0:
142+
; CHECK-LE-NEXT: vmov.i64 d0, #0xffff00ff0000ff
143+
; CHECK-LE-NEXT: bx lr
144+
;
145+
; CHECK-BE-LABEL: vmov_i64_b:
146+
; CHECK-BE: @ %bb.0:
147+
; CHECK-BE-NEXT: d16, #0xff0000ff00ffff00
148+
; CHECK-BE-NEXT: vrev64.32 d0, d16
149+
; CHECK-BE-NEXT: bx lr
135150
ret <1 x i64> <i64 72056498804490495>
136151
}
137152

@@ -157,11 +172,18 @@ define arm_aapcs_vfpcc <4 x i32> @vmov_v4i32_b() {
157172
}
158173

159174
define arm_aapcs_vfpcc <2 x i64> @and_v2i64_b(<2 x i64> %a) {
160-
; CHECK-LABEL: and_v2i64_b:
161-
; CHECK: @ %bb.0:
162-
; CHECK-NEXT: vmov.i64 q8, #0xffff00ff0000ff
163-
; CHECK-NEXT: vand q0, q0, q8
164-
; CHECK-NEXT: bx lr
175+
; CHECK-LE-LABEL: and_v2i64_b:
176+
; CHECK-LE: @ %bb.0:
177+
; CHECK-LE-NEXT: vmov.i64 q8, #0xffff00ff0000ff
178+
; CHECK-LE-NEXT: vand q0, q0, q8
179+
; CHECK-LE-NEXT: bx lr
180+
;
181+
; CHECK-BE-LABEL: and_v2i64_b:
182+
; CHECK-BE: @ %bb.0:
183+
; CHECK-BE-NEXT: q8, #0xff0000ff00ffff00
184+
; CHECK-BE-NEXT: vrev64.32 q8, q8
185+
; CHECK-BE-NEXT: vand q0, q0, q8
186+
; CHECK-BE-NEXT: bx lr
165187
%b = and <2 x i64> %a, <i64 72056498804490495, i64 72056498804490495>
166188
ret <2 x i64> %b
167189
}
@@ -175,9 +197,8 @@ define arm_aapcs_vfpcc <4 x i32> @and_v4i32_b(<4 x i32> %a) {
175197
;
176198
; CHECK-BE-LABEL: and_v4i32_b:
177199
; CHECK-BE: @ %bb.0:
178-
; CHECK-BE-NEXT: vmov.i64 q8, #0xffff00ff0000ff
200+
; CHECK-BE-NEXT: vmov.i64 q8, #0xff0000ff00ffff00
179201
; CHECK-BE-NEXT: vrev64.32 q9, q0
180-
; CHECK-BE-NEXT: vrev64.32 q8, q8
181202
; CHECK-BE-NEXT: vand q8, q9, q8
182203
; CHECK-BE-NEXT: vrev64.32 q0, q8
183204
; CHECK-BE-NEXT: bx lr
@@ -198,7 +219,6 @@ define arm_aapcs_vfpcc <8 x i16> @vmvn_v16i8_m1() {
198219
ret <8 x i16> <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534>
199220
}
200221

201-
; FIXME: This is incorrect for BE
202222
define arm_aapcs_vfpcc <8 x i16> @and_v8i16_m1(<8 x i16> %a) {
203223
; CHECK-LE-LABEL: and_v8i16_m1:
204224
; CHECK-LE: @ %bb.0:
@@ -227,7 +247,6 @@ define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_m1(<8 x i16> %a) {
227247
; CHECK-BE: @ %bb.0:
228248
; CHECK-BE-NEXT: vmvn.i32 q8, #0x10000
229249
; CHECK-BE-NEXT: vrev64.16 q9, q0
230-
; CHECK-BE-NEXT: vrev32.16 q8, q8
231250
; CHECK-BE-NEXT: veor q8, q9, q8
232251
; CHECK-BE-NEXT: vrev64.16 q0, q8
233252
; CHECK-BE-NEXT: bx lr

llvm/test/CodeGen/ARM/vmov.ll

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,16 @@ define arm_aapcs_vfpcc <2 x i32> @v_mvni32f() nounwind {
139139
}
140140

141141
define arm_aapcs_vfpcc <1 x i64> @v_movi64() nounwind {
142-
; CHECK-LABEL: v_movi64:
143-
; CHECK: @ %bb.0:
144-
; CHECK-NEXT: vmov.i64 d0, #0xff0000ff0000ffff
145-
; CHECK-NEXT: mov pc, lr
142+
; CHECK-LE-LABEL: v_movi64:
143+
; CHECK-LE: @ %bb.0:
144+
; CHECK-LE-NEXT: vmov.i64 d0, #0xff0000ff0000ffff
145+
; CHECK-LE-NEXT: mov pc, lr
146+
;
147+
; CHECK-BE-LABEL: v_movi64:
148+
; CHECK-BE: @ %bb.0:
149+
; CHECK-BE-NEXT: vmov.i64 d16, #0xffffff0000ff
150+
; CHECK-BE-NEXT: vrev64.32 d0, d16
151+
; CHECK-BE-NEXT: mov pc, lr
146152
ret <1 x i64> < i64 18374687574888349695 >
147153
}
148154

@@ -889,11 +895,18 @@ define arm_aapcs_vfpcc void @v_movf32_sti64(ptr %p) {
889895
}
890896

891897
define arm_aapcs_vfpcc void @v_movi64_sti64(ptr %p) {
892-
; CHECK-LABEL: v_movi64_sti64:
893-
; CHECK: @ %bb.0:
894-
; CHECK-NEXT: vmov.i64 d16, #0xff
895-
; CHECK-NEXT: vst1.64 {d16}, [r0:64]
896-
; CHECK-NEXT: mov pc, lr
898+
; CHECK-LE-LABEL: v_movi64_sti64:
899+
; CHECK-LE: @ %bb.0:
900+
; CHECK-LE-NEXT: vmov.i64 d16, #0xff
901+
; CHECK-LE-NEXT: vst1.64 {d16}, [r0:64]
902+
; CHECK-LE-NEXT: mov pc, lr
903+
;
904+
; CHECK-BE-LABEL: v_movi64_sti64:
905+
; CHECK-BE: @ %bb.0:
906+
; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000
907+
; CHECK-BE-NEXT: vrev64.32 d16, d16
908+
; CHECK-BE-NEXT: vst1.64 {d16}, [r0:64]
909+
; CHECK-BE-NEXT: mov pc, lr
897910
call void @llvm.arm.neon.vst1.p0.v1i64(ptr %p, <1 x i64> <i64 255>, i32 8)
898911
ret void
899912
}
@@ -1094,11 +1107,18 @@ define arm_aapcs_vfpcc void @v_movQf32_sti64(ptr %p) {
10941107
}
10951108

10961109
define arm_aapcs_vfpcc void @v_movQi64_sti64(ptr %p) {
1097-
; CHECK-LABEL: v_movQi64_sti64:
1098-
; CHECK: @ %bb.0:
1099-
; CHECK-NEXT: vmov.i64 q8, #0xff
1100-
; CHECK-NEXT: vst1.64 {d16, d17}, [r0:64]
1101-
; CHECK-NEXT: mov pc, lr
1110+
; CHECK-LE-LABEL: v_movQi64_sti64:
1111+
; CHECK-LE: @ %bb.0:
1112+
; CHECK-LE-NEXT: vmov.i64 q8, #0xff
1113+
; CHECK-LE-NEXT: vst1.64 {d16, d17}, [r0:64]
1114+
; CHECK-LE-NEXT: mov pc, lr
1115+
;
1116+
; CHECK-BE-LABEL: v_movQi64_sti64:
1117+
; CHECK-BE: @ %bb.0:
1118+
; CHECK-BE-NEXT: vmov.i64 q8, #0xff00000000
1119+
; CHECK-BE-NEXT: vrev64.32 q8, q8
1120+
; CHECK-BE-NEXT: vst1.64 {d16, d17}, [r0:64]
1121+
; CHECK-BE-NEXT: mov pc, lr
11021122
call void @llvm.arm.neon.vst1.p0.v2i64(ptr %p, <2 x i64> <i64 255, i64 255>, i32 8)
11031123
ret void
11041124
}

llvm/test/CodeGen/Thumb2/mve-be.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,6 @@ define arm_aapcs_vfpcc <16 x i8> @and_v16i8_le(<4 x i32> %src) {
232232
; CHECK-BE: @ %bb.0: @ %entry
233233
; CHECK-BE-NEXT: vrev64.8 q1, q0
234234
; CHECK-BE-NEXT: vmov.i32 q0, #0x1
235-
; CHECK-BE-NEXT: vrev32.8 q0, q0
236235
; CHECK-BE-NEXT: vand q1, q1, q0
237236
; CHECK-BE-NEXT: vrev64.8 q0, q1
238237
; CHECK-BE-NEXT: bx lr
@@ -254,7 +253,6 @@ define arm_aapcs_vfpcc <16 x i8> @and_v16i8_be(<4 x i32> %src) {
254253
; CHECK-BE: @ %bb.0: @ %entry
255254
; CHECK-BE-NEXT: vrev64.8 q1, q0
256255
; CHECK-BE-NEXT: vmov.i32 q0, #0x1000000
257-
; CHECK-BE-NEXT: vrev32.8 q0, q0
258256
; CHECK-BE-NEXT: vand q1, q1, q0
259257
; CHECK-BE-NEXT: vrev64.8 q0, q1
260258
; CHECK-BE-NEXT: bx lr

llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -421,13 +421,14 @@ define void @foo_zext_v2i64_v2i32(ptr %dest, ptr %mask, ptr %src) {
421421
; CHECK-BE-NEXT: vmov.32 q1[3], r1
422422
; CHECK-BE-NEXT: vrev64.32 q0, q1
423423
; CHECK-BE-NEXT: .LBB7_4: @ %else2
424-
; CHECK-BE-NEXT: vrev64.32 q3, q2
424+
; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff00000000
425425
; CHECK-BE-NEXT: movs r1, #0
426-
; CHECK-BE-NEXT: vmov r2, s15
427-
; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff
428-
; CHECK-BE-NEXT: vand q0, q0, q1
426+
; CHECK-BE-NEXT: vrev64.32 q3, q1
427+
; CHECK-BE-NEXT: vrev64.32 q1, q2
428+
; CHECK-BE-NEXT: vmov r2, s7
429+
; CHECK-BE-NEXT: vand q0, q0, q3
429430
; CHECK-BE-NEXT: rsbs r3, r2, #0
430-
; CHECK-BE-NEXT: vmov r3, s13
431+
; CHECK-BE-NEXT: vmov r3, s5
431432
; CHECK-BE-NEXT: sbcs.w r2, r1, r2, asr #31
432433
; CHECK-BE-NEXT: csetm r12, lt
433434
; CHECK-BE-NEXT: rsbs r2, r3, #0
@@ -537,13 +538,14 @@ define void @foo_zext_v2i64_v2i32_unaligned(ptr %dest, ptr %mask, ptr %src) {
537538
; CHECK-BE-NEXT: vmov.32 q1[3], r1
538539
; CHECK-BE-NEXT: vrev64.32 q0, q1
539540
; CHECK-BE-NEXT: .LBB8_4: @ %else2
540-
; CHECK-BE-NEXT: vrev64.32 q3, q2
541+
; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff00000000
541542
; CHECK-BE-NEXT: movs r1, #0
542-
; CHECK-BE-NEXT: vmov r2, s15
543-
; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff
544-
; CHECK-BE-NEXT: vand q0, q0, q1
543+
; CHECK-BE-NEXT: vrev64.32 q3, q1
544+
; CHECK-BE-NEXT: vrev64.32 q1, q2
545+
; CHECK-BE-NEXT: vmov r2, s7
546+
; CHECK-BE-NEXT: vand q0, q0, q3
545547
; CHECK-BE-NEXT: rsbs r3, r2, #0
546-
; CHECK-BE-NEXT: vmov r3, s13
548+
; CHECK-BE-NEXT: vmov r3, s5
547549
; CHECK-BE-NEXT: sbcs.w r2, r1, r2, asr #31
548550
; CHECK-BE-NEXT: csetm r12, lt
549551
; CHECK-BE-NEXT: rsbs r2, r3, #0

llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,6 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_to_v8i1(i8 %b, <8 x i16> %a) {
115115
; CHECK-BE-NEXT: vcmp.i16 ne, q1, zr
116116
; CHECK-BE-NEXT: vrev64.16 q1, q0
117117
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
118-
; CHECK-BE-NEXT: vrev32.16 q0, q0
119118
; CHECK-BE-NEXT: vpsel q1, q1, q0
120119
; CHECK-BE-NEXT: vrev64.16 q0, q1
121120
; CHECK-BE-NEXT: add sp, #4
@@ -145,7 +144,6 @@ define arm_aapcs_vfpcc <16 x i8> @bitcast_to_v16i1(i16 %b, <16 x i8> %a) {
145144
; CHECK-BE-NEXT: vrev64.8 q1, q0
146145
; CHECK-BE-NEXT: rbit r0, r0
147146
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
148-
; CHECK-BE-NEXT: vrev32.8 q0, q0
149147
; CHECK-BE-NEXT: lsrs r0, r0, #16
150148
; CHECK-BE-NEXT: vmsr p0, r0
151149
; CHECK-BE-NEXT: vpsel q1, q1, q0

0 commit comments

Comments
 (0)