Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 8ca089d

Browse files
committed
AArch64: fix LowerCONCAT_VECTORS for new CodeGen.
The function was making too many assumptions about its input: 1. The NEON_VDUP optimisation was far too aggressive, assuming (I think) that the input would always be BUILD_VECTOR. 2. We were treating most unknown concats as legal (by returning Op rather than SDValue()). I think only concats of pairs of vectors are actually legal. http://llvm.org/PR19094 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203450 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 31da39e commit 8ca089d

File tree

2 files changed

+32
-10
lines changed

2 files changed

+32
-10
lines changed

lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2281,19 +2281,20 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
22812281
// We custom lower concat_vectors with 4, 8, or 16 operands that are all the
22822282
// same operand and of type v1* using the DUP instruction.
22832283
unsigned NumOps = Op->getNumOperands();
2284-
if (NumOps != 4 && NumOps != 8 && NumOps != 16)
2284+
if (NumOps == 2) {
2285+
assert(Op.getValueType().getSizeInBits() == 128 && "unexpected concat");
22852286
return Op;
2287+
}
2288+
2289+
if (NumOps != 4 && NumOps != 8 && NumOps != 16)
2290+
return SDValue();
22862291

22872292
// Must be a single value for VDUP.
2288-
bool isConstant = true;
22892293
SDValue Op0 = Op.getOperand(0);
22902294
for (unsigned i = 1; i < NumOps; ++i) {
22912295
SDValue OpN = Op.getOperand(i);
22922296
if (Op0 != OpN)
2293-
return Op;
2294-
2295-
if (!isa<ConstantSDNode>(OpN->getOperand(0)))
2296-
isConstant = false;
2297+
return SDValue();
22972298
}
22982299

22992300
// Verify the value type.
@@ -2302,22 +2303,22 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
23022303
default: llvm_unreachable("Unexpected number of operands");
23032304
case 4:
23042305
if (EltVT != MVT::v1i16 && EltVT != MVT::v1i32)
2305-
return Op;
2306+
return SDValue();
23062307
break;
23072308
case 8:
23082309
if (EltVT != MVT::v1i8 && EltVT != MVT::v1i16)
2309-
return Op;
2310+
return SDValue();
23102311
break;
23112312
case 16:
23122313
if (EltVT != MVT::v1i8)
2313-
return Op;
2314+
return SDValue();
23142315
break;
23152316
}
23162317

23172318
SDLoc DL(Op);
23182319
EVT VT = Op.getValueType();
23192320
// VDUP produces better code for constants.
2320-
if (isConstant)
2321+
if (Op0->getOpcode() == ISD::BUILD_VECTOR)
23212322
return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Op0->getOperand(0));
23222323
return DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, Op0,
23232324
DAG.getConstant(0, MVT::i64));

test/CodeGen/AArch64/concatvector-v8i8-bug.ll renamed to test/CodeGen/AArch64/concatvector-bugs.ll

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,24 @@ for.body130.us.us: ; preds = %for.body130.us.us,
4545
br label %for.body130.us.us
4646
}
4747

48+
declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32)
49+
50+
define <8 x i16> @test_splat(i32 %l) nounwind {
51+
; CHECK-LABEL: test_splat:
52+
; CHECK: ret
53+
%lhs = insertelement <1 x i32> undef, i32 %l, i32 0
54+
%shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11)
55+
%vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> zeroinitializer
56+
ret <8 x i16> %vec
57+
}
58+
59+
60+
define <8 x i16> @test_notsplat(<8 x i16> %a, <8 x i16> %b, i32 %l) nounwind {
61+
; CHECK-LABEL: test_notsplat:
62+
; CHECK: ret
63+
entry:
64+
%lhs = insertelement <1 x i32> undef, i32 %l, i32 0
65+
%shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11)
66+
%vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0>
67+
ret <8 x i16> %vec
68+
}

0 commit comments

Comments
 (0)