Skip to content

[ARM][Codegen] Fix vector data miscompilation in arm32be #105519

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 5 additions & 16 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7123,19 +7123,6 @@ static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
ImmMask <<= 1;
}

if (DAG.getDataLayout().isBigEndian()) {
// Reverse the order of elements within the vector.
unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
unsigned Mask = (1 << BytesPerElem) - 1;
unsigned NumElems = 8 / BytesPerElem;
unsigned NewImm = 0;
for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
}
Imm = NewImm;
}

// Op=1, Cmode=1110.
OpCmode = 0x1e;
VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
Expand Down Expand Up @@ -7968,7 +7955,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,

if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
}

// Try an immediate VMVN.
Expand All @@ -7978,7 +7965,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
}

// Use vmov.f32 to materialize other v2f32 and v4f32 splats.
Expand Down Expand Up @@ -18606,7 +18593,9 @@ static SDValue PerformBITCASTCombine(SDNode *N,

// We may have a bitcast of something that has already had this bitcast
// combine performed on it, so skip past any VECTOR_REG_CASTs.
while (Src.getOpcode() == ARMISD::VECTOR_REG_CAST)
if (Src.getOpcode() == ARMISD::VECTOR_REG_CAST &&
Src.getOperand(0).getValueType().getScalarSizeInBits() <=
Src.getValueType().getScalarSizeInBits())
Src = Src.getOperand(0);

// Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,8 @@ define void @conv_v4i16_to_v4f16( <4 x i16> %a, ptr %store ) {
; CHECK-NEXT: vmov.i64 d16, #0xffff00000000ffff
; CHECK-NEXT: vldr d17, [r0]
; CHECK-NEXT: vrev64.16 d18, d0
; CHECK-NEXT: vrev64.16 d17, d17
; CHECK-NEXT: vrev64.16 d16, d16
; CHECK-NEXT: vadd.i16 d16, d18, d16
; CHECK-NEXT: vrev64.16 d17, d17
; CHECK-NEXT: vadd.f16 d16, d16, d17
; CHECK-NEXT: vrev64.16 d16, d16
; CHECK-NEXT: vstr d16, [r0]
Expand Down
63 changes: 41 additions & 22 deletions llvm/test/CodeGen/ARM/big-endian-vmov.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ define arm_aapcs_vfpcc <8 x i8> @vmov_i8() {
;
; CHECK-BE-LABEL: vmov_i8:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vmov.i64 d0, #0xff
; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000000000
; CHECK-BE-NEXT: vrev64.8 d0, d16
; CHECK-BE-NEXT: bx lr
ret <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1>
}
Expand All @@ -23,7 +24,8 @@ define arm_aapcs_vfpcc <4 x i16> @vmov_i16_a() {
;
; CHECK-BE-LABEL: vmov_i16_a:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vmov.i64 d0, #0xffff
; CHECK-BE-NEXT: vmov.i64 d16, #0xffff000000000000
; CHECK-BE-NEXT: vrev64.16 d0, d16
; CHECK-BE-NEXT: bx lr
ret <4 x i16> <i16 0, i16 0, i16 0, i16 -1>
}
Expand All @@ -36,7 +38,8 @@ define arm_aapcs_vfpcc <4 x i16> @vmov_i16_b() {
;
; CHECK-BE-LABEL: vmov_i16_b:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vmov.i64 d0, #0xff
; CHECK-BE-NEXT: vmov.i64 d16, #0xff000000000000
; CHECK-BE-NEXT: vrev64.16 d0, d16
; CHECK-BE-NEXT: bx lr
ret <4 x i16> <i16 0, i16 0, i16 0, i16 255>
}
Expand All @@ -49,7 +52,8 @@ define arm_aapcs_vfpcc <4 x i16> @vmov_i16_c() {
;
; CHECK-BE-LABEL: vmov_i16_c:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vmov.i64 d0, #0xff00
; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000000000
; CHECK-BE-NEXT: vrev64.16 d0, d16
; CHECK-BE-NEXT: bx lr
ret <4 x i16> <i16 0, i16 0, i16 0, i16 65280>
}
Expand All @@ -62,7 +66,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_a() {
;
; CHECK-BE-LABEL: vmov_i32_a:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vmov.i64 d0, #0xffffffff
; CHECK-BE-NEXT: vmov.i64 d16, #0xffffffff00000000
; CHECK-BE-NEXT: vrev64.32 d0, d16
; CHECK-BE-NEXT: bx lr
ret <2 x i32> <i32 0, i32 -1>
}
Expand All @@ -75,7 +80,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_b() {
;
; CHECK-BE-LABEL: vmov_i32_b:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vmov.i64 d0, #0xff
; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000
; CHECK-BE-NEXT: vrev64.32 d0, d16
; CHECK-BE-NEXT: bx lr
ret <2 x i32> <i32 0, i32 255>
}
Expand All @@ -88,7 +94,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_c() {
;
; CHECK-BE-LABEL: vmov_i32_c:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vmov.i64 d0, #0xff00
; CHECK-BE-NEXT: vmov.i64 d16, #0xff0000000000
; CHECK-BE-NEXT: vrev64.32 d0, d16
; CHECK-BE-NEXT: bx lr
ret <2 x i32> <i32 0, i32 65280>
}
Expand All @@ -101,7 +108,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_d() {
;
; CHECK-BE-LABEL: vmov_i32_d:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vmov.i64 d0, #0xff0000
; CHECK-BE-NEXT: vmov.i64 d16, #0xff000000000000
; CHECK-BE-NEXT: vrev64.32 d0, d16
; CHECK-BE-NEXT: bx lr
ret <2 x i32> <i32 0, i32 16711680>
}
Expand All @@ -114,7 +122,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_e() {
;
; CHECK-BE-LABEL: vmov_i32_e:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vmov.i64 d0, #0xff000000
; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000000000
; CHECK-BE-NEXT: vrev64.32 d0, d16
; CHECK-BE-NEXT: bx lr
ret <2 x i32> <i32 0, i32 4278190080>
}
Expand All @@ -128,10 +137,16 @@ define arm_aapcs_vfpcc <1 x i64> @vmov_i64_a() {
}

define arm_aapcs_vfpcc <1 x i64> @vmov_i64_b() {
; CHECK-LABEL: vmov_i64_b:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i64 d0, #0xffff00ff0000ff
; CHECK-NEXT: bx lr
; CHECK-LE-LABEL: vmov_i64_b:
; CHECK-LE: @ %bb.0:
; CHECK-LE-NEXT: vmov.i64 d0, #0xffff00ff0000ff
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: vmov_i64_b:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: d16, #0xff0000ff00ffff00
; CHECK-BE-NEXT: vrev64.32 d0, d16
; CHECK-BE-NEXT: bx lr
ret <1 x i64> <i64 72056498804490495>
}

Expand All @@ -157,11 +172,18 @@ define arm_aapcs_vfpcc <4 x i32> @vmov_v4i32_b() {
}

define arm_aapcs_vfpcc <2 x i64> @and_v2i64_b(<2 x i64> %a) {
; CHECK-LABEL: and_v2i64_b:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i64 q8, #0xffff00ff0000ff
; CHECK-NEXT: vand q0, q0, q8
; CHECK-NEXT: bx lr
; CHECK-LE-LABEL: and_v2i64_b:
; CHECK-LE: @ %bb.0:
; CHECK-LE-NEXT: vmov.i64 q8, #0xffff00ff0000ff
; CHECK-LE-NEXT: vand q0, q0, q8
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: and_v2i64_b:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: q8, #0xff0000ff00ffff00
; CHECK-BE-NEXT: vrev64.32 q8, q8
; CHECK-BE-NEXT: vand q0, q0, q8
; CHECK-BE-NEXT: bx lr
%b = and <2 x i64> %a, <i64 72056498804490495, i64 72056498804490495>
ret <2 x i64> %b
}
Expand All @@ -175,9 +197,8 @@ define arm_aapcs_vfpcc <4 x i32> @and_v4i32_b(<4 x i32> %a) {
;
; CHECK-BE-LABEL: and_v4i32_b:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vmov.i64 q8, #0xffff00ff0000ff
; CHECK-BE-NEXT: vmov.i64 q8, #0xff0000ff00ffff00
; CHECK-BE-NEXT: vrev64.32 q9, q0
; CHECK-BE-NEXT: vrev64.32 q8, q8
; CHECK-BE-NEXT: vand q8, q9, q8
; CHECK-BE-NEXT: vrev64.32 q0, q8
; CHECK-BE-NEXT: bx lr
Expand All @@ -198,7 +219,6 @@ define arm_aapcs_vfpcc <8 x i16> @vmvn_v16i8_m1() {
ret <8 x i16> <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534>
}

; FIXME: This is incorrect for BE
define arm_aapcs_vfpcc <8 x i16> @and_v8i16_m1(<8 x i16> %a) {
; CHECK-LE-LABEL: and_v8i16_m1:
; CHECK-LE: @ %bb.0:
Expand Down Expand Up @@ -227,7 +247,6 @@ define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_m1(<8 x i16> %a) {
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vmvn.i32 q8, #0x10000
; CHECK-BE-NEXT: vrev64.16 q9, q0
; CHECK-BE-NEXT: vrev32.16 q8, q8
; CHECK-BE-NEXT: veor q8, q9, q8
; CHECK-BE-NEXT: vrev64.16 q0, q8
; CHECK-BE-NEXT: bx lr
Expand Down
48 changes: 34 additions & 14 deletions llvm/test/CodeGen/ARM/vmov.ll
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,16 @@ define arm_aapcs_vfpcc <2 x i32> @v_mvni32f() nounwind {
}

define arm_aapcs_vfpcc <1 x i64> @v_movi64() nounwind {
; CHECK-LABEL: v_movi64:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i64 d0, #0xff0000ff0000ffff
; CHECK-NEXT: mov pc, lr
; CHECK-LE-LABEL: v_movi64:
; CHECK-LE: @ %bb.0:
; CHECK-LE-NEXT: vmov.i64 d0, #0xff0000ff0000ffff
; CHECK-LE-NEXT: mov pc, lr
;
; CHECK-BE-LABEL: v_movi64:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vmov.i64 d16, #0xffffff0000ff
; CHECK-BE-NEXT: vrev64.32 d0, d16
; CHECK-BE-NEXT: mov pc, lr
ret <1 x i64> < i64 18374687574888349695 >
}

Expand Down Expand Up @@ -889,11 +895,18 @@ define arm_aapcs_vfpcc void @v_movf32_sti64(ptr %p) {
}

define arm_aapcs_vfpcc void @v_movi64_sti64(ptr %p) {
; CHECK-LABEL: v_movi64_sti64:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i64 d16, #0xff
; CHECK-NEXT: vst1.64 {d16}, [r0:64]
; CHECK-NEXT: mov pc, lr
; CHECK-LE-LABEL: v_movi64_sti64:
; CHECK-LE: @ %bb.0:
; CHECK-LE-NEXT: vmov.i64 d16, #0xff
; CHECK-LE-NEXT: vst1.64 {d16}, [r0:64]
; CHECK-LE-NEXT: mov pc, lr
;
; CHECK-BE-LABEL: v_movi64_sti64:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000
; CHECK-BE-NEXT: vrev64.32 d16, d16
; CHECK-BE-NEXT: vst1.64 {d16}, [r0:64]
; CHECK-BE-NEXT: mov pc, lr
call void @llvm.arm.neon.vst1.p0.v1i64(ptr %p, <1 x i64> <i64 255>, i32 8)
ret void
}
Expand Down Expand Up @@ -1094,11 +1107,18 @@ define arm_aapcs_vfpcc void @v_movQf32_sti64(ptr %p) {
}

define arm_aapcs_vfpcc void @v_movQi64_sti64(ptr %p) {
; CHECK-LABEL: v_movQi64_sti64:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i64 q8, #0xff
; CHECK-NEXT: vst1.64 {d16, d17}, [r0:64]
; CHECK-NEXT: mov pc, lr
; CHECK-LE-LABEL: v_movQi64_sti64:
; CHECK-LE: @ %bb.0:
; CHECK-LE-NEXT: vmov.i64 q8, #0xff
; CHECK-LE-NEXT: vst1.64 {d16, d17}, [r0:64]
; CHECK-LE-NEXT: mov pc, lr
;
; CHECK-BE-LABEL: v_movQi64_sti64:
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vmov.i64 q8, #0xff00000000
; CHECK-BE-NEXT: vrev64.32 q8, q8
; CHECK-BE-NEXT: vst1.64 {d16, d17}, [r0:64]
; CHECK-BE-NEXT: mov pc, lr
call void @llvm.arm.neon.vst1.p0.v2i64(ptr %p, <2 x i64> <i64 255, i64 255>, i32 8)
ret void
}
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/CodeGen/Thumb2/mve-be.ll
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,6 @@ define arm_aapcs_vfpcc <16 x i8> @and_v16i8_le(<4 x i32> %src) {
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x1
; CHECK-BE-NEXT: vrev32.8 q0, q0
; CHECK-BE-NEXT: vand q1, q1, q0
; CHECK-BE-NEXT: vrev64.8 q0, q1
; CHECK-BE-NEXT: bx lr
Expand All @@ -254,7 +253,6 @@ define arm_aapcs_vfpcc <16 x i8> @and_v16i8_be(<4 x i32> %src) {
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x1000000
; CHECK-BE-NEXT: vrev32.8 q0, q0
; CHECK-BE-NEXT: vand q1, q1, q0
; CHECK-BE-NEXT: vrev64.8 q0, q1
; CHECK-BE-NEXT: bx lr
Expand Down
22 changes: 12 additions & 10 deletions llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
Original file line number Diff line number Diff line change
Expand Up @@ -421,13 +421,14 @@ define void @foo_zext_v2i64_v2i32(ptr %dest, ptr %mask, ptr %src) {
; CHECK-BE-NEXT: vmov.32 q1[3], r1
; CHECK-BE-NEXT: vrev64.32 q0, q1
; CHECK-BE-NEXT: .LBB7_4: @ %else2
; CHECK-BE-NEXT: vrev64.32 q3, q2
; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff00000000
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: vmov r2, s15
; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff
; CHECK-BE-NEXT: vand q0, q0, q1
; CHECK-BE-NEXT: vrev64.32 q3, q1
; CHECK-BE-NEXT: vrev64.32 q1, q2
; CHECK-BE-NEXT: vmov r2, s7
; CHECK-BE-NEXT: vand q0, q0, q3
; CHECK-BE-NEXT: rsbs r3, r2, #0
; CHECK-BE-NEXT: vmov r3, s13
; CHECK-BE-NEXT: vmov r3, s5
; CHECK-BE-NEXT: sbcs.w r2, r1, r2, asr #31
; CHECK-BE-NEXT: csetm r12, lt
; CHECK-BE-NEXT: rsbs r2, r3, #0
Expand Down Expand Up @@ -537,13 +538,14 @@ define void @foo_zext_v2i64_v2i32_unaligned(ptr %dest, ptr %mask, ptr %src) {
; CHECK-BE-NEXT: vmov.32 q1[3], r1
; CHECK-BE-NEXT: vrev64.32 q0, q1
; CHECK-BE-NEXT: .LBB8_4: @ %else2
; CHECK-BE-NEXT: vrev64.32 q3, q2
; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff00000000
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: vmov r2, s15
; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff
; CHECK-BE-NEXT: vand q0, q0, q1
; CHECK-BE-NEXT: vrev64.32 q3, q1
; CHECK-BE-NEXT: vrev64.32 q1, q2
; CHECK-BE-NEXT: vmov r2, s7
; CHECK-BE-NEXT: vand q0, q0, q3
; CHECK-BE-NEXT: rsbs r3, r2, #0
; CHECK-BE-NEXT: vmov r3, s13
; CHECK-BE-NEXT: vmov r3, s5
; CHECK-BE-NEXT: sbcs.w r2, r1, r2, asr #31
; CHECK-BE-NEXT: csetm r12, lt
; CHECK-BE-NEXT: rsbs r2, r3, #0
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_to_v8i1(i8 %b, <8 x i16> %a) {
; CHECK-BE-NEXT: vcmp.i16 ne, q1, zr
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
; CHECK-BE-NEXT: vrev32.16 q0, q0
; CHECK-BE-NEXT: vpsel q1, q1, q0
; CHECK-BE-NEXT: vrev64.16 q0, q1
; CHECK-BE-NEXT: add sp, #4
Expand Down Expand Up @@ -145,7 +144,6 @@ define arm_aapcs_vfpcc <16 x i8> @bitcast_to_v16i1(i16 %b, <16 x i8> %a) {
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: rbit r0, r0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
; CHECK-BE-NEXT: vrev32.8 q0, q0
; CHECK-BE-NEXT: lsrs r0, r0, #16
; CHECK-BE-NEXT: vmsr p0, r0
; CHECK-BE-NEXT: vpsel q1, q1, q0
Expand Down
Loading
Loading