Skip to content

Commit 99b862e

Browse files
authored
[DAGISel][ARM] Fix vector truncate combine for big-endian (#118101)
This DAG combine was incorrect for big-endian targets, because it assumes that when a bitcast changes the lane width, the least-significant bits of the wider lanes are in the lower-numbered lanes of the smaller type, which is only true for little-endian.
1 parent 8271195 commit 99b862e

File tree

2 files changed

+55
-1
lines changed

2 files changed

+55
-1
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15498,12 +15498,14 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
1549815498
unsigned BuildVecNumElts = BuildVect.getNumOperands();
1549915499
unsigned TruncVecNumElts = VT.getVectorNumElements();
1550015500
unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
15501+
unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
1550115502

1550215503
assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
1550315504
"Invalid number of elements");
1550415505

1550515506
SmallVector<SDValue, 8> Opnds;
15506-
for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
15507+
for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
15508+
i += TruncEltOffset)
1550715509
Opnds.push_back(BuildVect.getOperand(i));
1550815510

1550915511
return DAG.getBuildVector(VT, DL, Opnds);

llvm/test/CodeGen/ARM/vector-trunc.ll

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon < %s | FileCheck %s --check-prefix=LE
3+
; RUN: llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon < %s | FileCheck %s --check-prefix=BE
4+
5+
define i32 @test(i64 %arg1) {
6+
; LE-LABEL: test:
7+
; LE: @ %bb.0: @ %entry
8+
; LE-NEXT: subs r0, r0, #1
9+
; LE-NEXT: mov r2, #0
10+
; LE-NEXT: sbcs r0, r1, #0
11+
; LE-NEXT: vldr s0, .LCPI0_0
12+
; LE-NEXT: movwhs r2, #1
13+
; LE-NEXT: cmp r2, #0
14+
; LE-NEXT: mvnne r2, #0
15+
; LE-NEXT: vmov s1, r2
16+
; LE-NEXT: vmovn.i32 d16, q0
17+
; LE-NEXT: vmovn.i16 d16, q8
18+
; LE-NEXT: vmov.u8 r0, d16[0]
19+
; LE-NEXT: and r0, r0, #1
20+
; LE-NEXT: bx lr
21+
; LE-NEXT: .p2align 2
22+
; LE-NEXT: @ %bb.1:
23+
; LE-NEXT: .LCPI0_0:
24+
; LE-NEXT: .long 0xffffffff @ float NaN
25+
;
26+
; BE-LABEL: test:
27+
; BE: @ %bb.0: @ %entry
28+
; BE-NEXT: subs r1, r1, #1
29+
; BE-NEXT: mov r2, #0
30+
; BE-NEXT: sbcs r0, r0, #0
31+
; BE-NEXT: vldr s0, .LCPI0_0
32+
; BE-NEXT: movwhs r2, #1
33+
; BE-NEXT: cmp r2, #0
34+
; BE-NEXT: mvnne r2, #0
35+
; BE-NEXT: vmov s1, r2
36+
; BE-NEXT: vmovn.i32 d16, q0
37+
; BE-NEXT: vmovn.i16 d16, q8
38+
; BE-NEXT: vmov.u8 r0, d16[0]
39+
; BE-NEXT: and r0, r0, #1
40+
; BE-NEXT: bx lr
41+
; BE-NEXT: .p2align 2
42+
; BE-NEXT: @ %bb.1:
43+
; BE-NEXT: .LCPI0_0:
44+
; BE-NEXT: .long 0xffffffff @ float NaN
45+
entry:
46+
%insert_zero = insertelement <8 x i64> poison, i64 %arg1, i64 0
47+
%splat_zero = shufflevector <8 x i64> %insert_zero, <8 x i64> poison, <8 x i32> zeroinitializer
48+
%cmp_vec = icmp ule <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %splat_zero
49+
%first_cmp = extractelement <8 x i1> %cmp_vec, i32 0
50+
%ext = zext i1 %first_cmp to i32
51+
ret i32 %ext
52+
}

0 commit comments

Comments
 (0)