Skip to content

Commit abcbca2

Browse files
authored
[AArch64] Fix big endian shuffle vector miscompile (#68673)
Fixes #65884
1 parent 43af73f commit abcbca2

File tree

6 files changed

+61
-12
lines changed

6 files changed

+61
-12
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11050,7 +11050,12 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
1105011050
if (SrcEltTy == SmallestEltTy)
1105111051
continue;
1105211052
assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
11053-
Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
11053+
if (DAG.getDataLayout().isBigEndian()) {
11054+
Src.ShuffleVec =
11055+
DAG.getNode(AArch64ISD::NVCAST, dl, ShuffleVT, Src.ShuffleVec);
11056+
} else {
11057+
Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
11058+
}
1105411059
Src.WindowScale =
1105511060
SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
1105611061
Src.WindowBase *= Src.WindowScale;
@@ -11102,7 +11107,12 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
1110211107

1110311108
SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
1110411109
ShuffleOps[1], Mask);
11105-
SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
11110+
SDValue V;
11111+
if (DAG.getDataLayout().isBigEndian()) {
11112+
V = DAG.getNode(AArch64ISD::NVCAST, dl, VT, Shuffle);
11113+
} else {
11114+
V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
11115+
}
1110611116

1110711117
LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();
1110811118
dbgs() << "Reshuffle, creating node: "; V.dump(););

llvm/test/CodeGen/AArch64/aarch64-load-ext.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,6 @@ define <3 x i32> @fsext_v3i32(ptr %a) {
140140
; CHECK-BE-NEXT: ldr s0, [x0]
141141
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
142142
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
143-
; CHECK-BE-NEXT: rev16 v0.8b, v0.8b
144143
; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
145144
; CHECK-BE-NEXT: shl v0.4s, v0.4s, #24
146145
; CHECK-BE-NEXT: sshr v0.4s, v0.4s, #24
@@ -284,7 +283,6 @@ define <3 x i16> @fsext_v3i16(ptr %a) {
284283
; CHECK-BE-NEXT: ldr s0, [x0]
285284
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
286285
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
287-
; CHECK-BE-NEXT: rev16 v0.8b, v0.8b
288286
; CHECK-BE-NEXT: shl v0.4h, v0.4h, #8
289287
; CHECK-BE-NEXT: sshr v0.4h, v0.4h, #8
290288
; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
@@ -447,7 +445,7 @@ define <4 x i8> @bitcast(i32 %0) {
447445
; CHECK-BE-NEXT: fmov s0, w0
448446
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
449447
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
450-
; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
448+
; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
451449
; CHECK-BE-NEXT: ret
452450
%2 = bitcast i32 %0 to <4 x i8>
453451
ret <4 x i8> %2

llvm/test/CodeGen/AArch64/aarch64-uzp1-combine.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,8 +270,6 @@ define i8 @trunc_v4i64_v4i8(<4 x i64> %input) {
270270
; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
271271
; CHECK-BE-NEXT: xtn v1.2s, v1.2d
272272
; CHECK-BE-NEXT: xtn v0.2s, v0.2d
273-
; CHECK-BE-NEXT: rev32 v1.4h, v1.4h
274-
; CHECK-BE-NEXT: rev32 v0.4h, v0.4h
275273
; CHECK-BE-NEXT: uzp1 v0.4h, v0.4h, v1.4h
276274
; CHECK-BE-NEXT: addv h0, v0.4h
277275
; CHECK-BE-NEXT: fmov w0, s0
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefix=CHECKLE
3+
; RUN: llc < %s -mtriple=aarch64_be | FileCheck %s --check-prefix=CHECKBE
4+
5+
define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind {
6+
; CHECKLE-LABEL: test_reconstructshuffle:
7+
; CHECKLE: // %bb.0:
8+
; CHECKLE-NEXT: umov w8, v0.b[3]
9+
; CHECKLE-NEXT: umov w9, v0.b[2]
10+
; CHECKLE-NEXT: fmov s2, w8
11+
; CHECKLE-NEXT: umov w8, v0.b[1]
12+
; CHECKLE-NEXT: mov v2.h[1], w9
13+
; CHECKLE-NEXT: mov v2.h[2], w8
14+
; CHECKLE-NEXT: umov w8, v0.b[0]
15+
; CHECKLE-NEXT: ext v0.16b, v1.16b, v1.16b, #8
16+
; CHECKLE-NEXT: mov v2.h[3], w8
17+
; CHECKLE-NEXT: zip2 v0.8b, v0.8b, v0.8b
18+
; CHECKLE-NEXT: add v0.4h, v2.4h, v0.4h
19+
; CHECKLE-NEXT: bic v0.4h, #255, lsl #8
20+
; CHECKLE-NEXT: ret
21+
;
22+
; CHECKBE-LABEL: test_reconstructshuffle:
23+
; CHECKBE: // %bb.0:
24+
; CHECKBE-NEXT: rev64 v0.16b, v0.16b
25+
; CHECKBE-NEXT: rev64 v1.16b, v1.16b
26+
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
27+
; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
28+
; CHECKBE-NEXT: umov w8, v0.b[3]
29+
; CHECKBE-NEXT: umov w9, v0.b[2]
30+
; CHECKBE-NEXT: fmov s2, w8
31+
; CHECKBE-NEXT: umov w8, v0.b[1]
32+
; CHECKBE-NEXT: mov v2.h[1], w9
33+
; CHECKBE-NEXT: mov v2.h[2], w8
34+
; CHECKBE-NEXT: umov w8, v0.b[0]
35+
; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8
36+
; CHECKBE-NEXT: mov v2.h[3], w8
37+
; CHECKBE-NEXT: zip2 v0.8b, v0.8b, v0.8b
38+
; CHECKBE-NEXT: add v0.4h, v2.4h, v0.4h
39+
; CHECKBE-NEXT: bic v0.4h, #255, lsl #8
40+
; CHECKBE-NEXT: rev64 v0.4h, v0.4h
41+
; CHECKBE-NEXT: ret
42+
%tmp1 = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
43+
%tmp2 = shufflevector <16 x i8> %b, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
44+
%tmp3 = add <4 x i8> %tmp1, %tmp2
45+
%tmp4 = zext <4 x i8> %tmp3 to <4 x i16>
46+
ret <4 x i16> %tmp4
47+
}

llvm/test/CodeGen/AArch64/neon-bitcast.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,7 @@ define <4 x i8> @bitcast_i32_to_v4i8(i32 %word) {
545545
; CHECK-BE-NEXT: fmov s0, w0
546546
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
547547
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
548-
; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
548+
; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
549549
; CHECK-BE-NEXT: ret
550550
%ret = bitcast i32 %word to <4 x i8>
551551
ret <4 x i8> %ret

llvm/test/CodeGen/AArch64/zext-to-tbl.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1345,10 +1345,6 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
13451345
; CHECK-BE-NEXT: zip1 v1.8b, v1.8b, v0.8b
13461346
; CHECK-BE-NEXT: zip2 v4.8b, v2.8b, v0.8b
13471347
; CHECK-BE-NEXT: zip1 v2.8b, v2.8b, v0.8b
1348-
; CHECK-BE-NEXT: rev16 v3.8b, v3.8b
1349-
; CHECK-BE-NEXT: rev16 v1.8b, v1.8b
1350-
; CHECK-BE-NEXT: rev16 v4.8b, v4.8b
1351-
; CHECK-BE-NEXT: rev16 v2.8b, v2.8b
13521348
; CHECK-BE-NEXT: ushll v3.4s, v3.4h, #0
13531349
; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
13541350
; CHECK-BE-NEXT: and v3.16b, v3.16b, v0.16b

0 commit comments

Comments
 (0)