Skip to content

feat: fix big endian shuffle vector miscompile #68673

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10961,7 +10961,12 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
if (SrcEltTy == SmallestEltTy)
continue;
assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
if (DAG.getDataLayout().isBigEndian()) {
Src.ShuffleVec =
DAG.getNode(AArch64ISD::NVCAST, dl, ShuffleVT, Src.ShuffleVec);
} else {
Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
}
Src.WindowScale =
SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
Src.WindowBase *= Src.WindowScale;
Expand Down Expand Up @@ -11013,7 +11018,12 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,

SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
ShuffleOps[1], Mask);
SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
SDValue V;
if (DAG.getDataLayout().isBigEndian()) {
V = DAG.getNode(AArch64ISD::NVCAST, dl, VT, Shuffle);
} else {
V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
}

LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();
dbgs() << "Reshuffle, creating node: "; V.dump(););
Expand Down
4 changes: 1 addition & 3 deletions llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ define <3 x i32> @fsext_v3i32(ptr %a) {
; CHECK-BE-NEXT: ldr s0, [x0]
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
; CHECK-BE-NEXT: rev16 v0.8b, v0.8b
; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-BE-NEXT: shl v0.4s, v0.4s, #24
; CHECK-BE-NEXT: sshr v0.4s, v0.4s, #24
Expand Down Expand Up @@ -284,7 +283,6 @@ define <3 x i16> @fsext_v3i16(ptr %a) {
; CHECK-BE-NEXT: ldr s0, [x0]
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
; CHECK-BE-NEXT: rev16 v0.8b, v0.8b
; CHECK-BE-NEXT: shl v0.4h, v0.4h, #8
; CHECK-BE-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
Expand Down Expand Up @@ -447,7 +445,7 @@ define <4 x i8> @bitcast(i32 %0) {
; CHECK-BE-NEXT: fmov s0, w0
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
; CHECK-BE-NEXT: ret
%2 = bitcast i32 %0 to <4 x i8>
ret <4 x i8> %2
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/CodeGen/AArch64/aarch64-uzp1-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,6 @@ define i8 @trunc_v4i64_v4i8(<4 x i64> %input) {
; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
; CHECK-BE-NEXT: xtn v1.2s, v1.2d
; CHECK-BE-NEXT: xtn v0.2s, v0.2d
; CHECK-BE-NEXT: rev32 v1.4h, v1.4h
; CHECK-BE-NEXT: rev32 v0.4h, v0.4h
; CHECK-BE-NEXT: uzp1 v0.4h, v0.4h, v1.4h
; CHECK-BE-NEXT: addv h0, v0.4h
; CHECK-BE-NEXT: fmov w0, s0
Expand Down
47 changes: 47 additions & 0 deletions llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefix=CHECKLE
; RUN: llc < %s -mtriple=aarch64_be | FileCheck %s --check-prefix=CHECKBE

define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind {
; CHECKLE-LABEL: test_reconstructshuffle:
; CHECKLE: // %bb.0:
; CHECKLE-NEXT: umov w8, v0.b[3]
; CHECKLE-NEXT: umov w9, v0.b[2]
; CHECKLE-NEXT: fmov s2, w8
; CHECKLE-NEXT: umov w8, v0.b[1]
; CHECKLE-NEXT: mov v2.h[1], w9
; CHECKLE-NEXT: mov v2.h[2], w8
; CHECKLE-NEXT: umov w8, v0.b[0]
; CHECKLE-NEXT: ext v0.16b, v1.16b, v1.16b, #8
; CHECKLE-NEXT: mov v2.h[3], w8
; CHECKLE-NEXT: zip2 v0.8b, v0.8b, v0.8b
; CHECKLE-NEXT: add v0.4h, v2.4h, v0.4h
; CHECKLE-NEXT: bic v0.4h, #255, lsl #8
; CHECKLE-NEXT: ret
;
; CHECKBE-LABEL: test_reconstructshuffle:
; CHECKBE: // %bb.0:
; CHECKBE-NEXT: rev64 v0.16b, v0.16b
; CHECKBE-NEXT: rev64 v1.16b, v1.16b
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
; CHECKBE-NEXT: umov w8, v0.b[3]
; CHECKBE-NEXT: umov w9, v0.b[2]
; CHECKBE-NEXT: fmov s2, w8
; CHECKBE-NEXT: umov w8, v0.b[1]
; CHECKBE-NEXT: mov v2.h[1], w9
; CHECKBE-NEXT: mov v2.h[2], w8
; CHECKBE-NEXT: umov w8, v0.b[0]
; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8
; CHECKBE-NEXT: mov v2.h[3], w8
; CHECKBE-NEXT: zip2 v0.8b, v0.8b, v0.8b
; CHECKBE-NEXT: add v0.4h, v2.4h, v0.4h
; CHECKBE-NEXT: bic v0.4h, #255, lsl #8
; CHECKBE-NEXT: rev64 v0.4h, v0.4h
; CHECKBE-NEXT: ret
%tmp1 = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%tmp2 = shufflevector <16 x i8> %b, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
%tmp3 = add <4 x i8> %tmp1, %tmp2
%tmp4 = zext <4 x i8> %tmp3 to <4 x i16>
ret <4 x i16> %tmp4
}
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/neon-bitcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ define <4 x i8> @bitcast_i32_to_v4i8(i32 %word) {
; CHECK-BE-NEXT: fmov s0, w0
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
; CHECK-BE-NEXT: ret
%ret = bitcast i32 %word to <4 x i8>
ret <4 x i8> %ret
Expand Down
4 changes: 0 additions & 4 deletions llvm/test/CodeGen/AArch64/zext-to-tbl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1345,10 +1345,6 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-NEXT: zip1 v1.8b, v1.8b, v0.8b
; CHECK-BE-NEXT: zip2 v4.8b, v2.8b, v0.8b
; CHECK-BE-NEXT: zip1 v2.8b, v2.8b, v0.8b
; CHECK-BE-NEXT: rev16 v3.8b, v3.8b
; CHECK-BE-NEXT: rev16 v1.8b, v1.8b
; CHECK-BE-NEXT: rev16 v4.8b, v4.8b
; CHECK-BE-NEXT: rev16 v2.8b, v2.8b
; CHECK-BE-NEXT: ushll v3.4s, v3.4h, #0
; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-BE-NEXT: and v3.16b, v3.16b, v0.16b
Expand Down