Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 0d69959

Browse files
committed
AArch64/ARM64: spot a greater variety of concat_vector operations.
Code mostly copied from AArch64, just tidied up a trifle and plumbed into the ARM64 way of doing things. This also enables the AArch64 tests which inspired the previous untested commits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206574 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 70b6337 commit 0d69959

File tree

3 files changed

+1518
-15
lines changed

3 files changed

+1518
-15
lines changed

lib/Target/ARM64/ARM64ISelLowering.cpp

Lines changed: 72 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3829,9 +3829,11 @@ SDValue ARM64TargetLowering::ReconstructShuffle(SDValue Op,
38293829
VEXTOffsets[i] = 0;
38303830
continue;
38313831
} else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
3832-
// It probably isn't worth padding out a smaller vector just to
3833-
// break it down again in a shuffle.
3834-
return SDValue();
3832+
// We can pad out the smaller vector for free, so if it's part of a
3833+
// shuffle...
3834+
ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, SourceVecs[i],
3835+
DAG.getUNDEF(SourceVecs[i].getValueType()));
3836+
continue;
38353837
}
38363838

38373839
// Don't attempt to extract subvectors from BUILD_VECTOR sources
@@ -4094,7 +4096,7 @@ static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
40944096
}
40954097

40964098
static bool isINSMask(ArrayRef<int> M, int NumInputElements,
4097-
bool &BulkIsLeft, int &Anomaly) {
4099+
bool &DstIsLeft, int &Anomaly) {
40984100
if (M.size() != static_cast<size_t>(NumInputElements))
40994101
return false;
41004102

@@ -4120,18 +4122,67 @@ static bool isINSMask(ArrayRef<int> M, int NumInputElements,
41204122
}
41214123

41224124
if (NumLHSMatch == NumInputElements - 1) {
4123-
BulkIsLeft = true;
4125+
DstIsLeft = true;
41244126
Anomaly = LastLHSMismatch;
41254127
return true;
41264128
} else if (NumRHSMatch == NumInputElements - 1) {
4127-
BulkIsLeft = false;
4129+
DstIsLeft = false;
41284130
Anomaly = LastRHSMismatch;
41294131
return true;
41304132
}
41314133

41324134
return false;
41334135
}
41344136

4137+
static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
4138+
if (VT.getSizeInBits() != 128)
4139+
return false;
4140+
4141+
unsigned NumElts = VT.getVectorNumElements();
4142+
4143+
for (int I = 0, E = NumElts / 2; I != E; I++) {
4144+
if (Mask[I] != I)
4145+
return false;
4146+
}
4147+
4148+
int Offset = NumElts / 2;
4149+
for (int I = NumElts / 2, E = NumElts; I != E; I++) {
4150+
if (Mask[I] != I + SplitLHS * Offset)
4151+
return false;
4152+
}
4153+
4154+
return true;
4155+
}
4156+
4157+
static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
4158+
SDLoc DL(Op);
4159+
EVT VT = Op.getValueType();
4160+
SDValue V0 = Op.getOperand(0);
4161+
SDValue V1 = Op.getOperand(1);
4162+
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
4163+
4164+
if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() ||
4165+
VT.getVectorElementType() != V1.getValueType().getVectorElementType())
4166+
return SDValue();
4167+
4168+
bool SplitV0 = V0.getValueType().getSizeInBits() == 128;
4169+
4170+
if (!isConcatMask(Mask, VT, SplitV0))
4171+
return SDValue();
4172+
4173+
EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
4174+
VT.getVectorNumElements() / 2);
4175+
if (SplitV0) {
4176+
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
4177+
DAG.getConstant(0, MVT::i64));
4178+
}
4179+
if (V1.getValueType().getSizeInBits() == 128) {
4180+
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
4181+
DAG.getConstant(0, MVT::i64));
4182+
}
4183+
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
4184+
}
4185+
41354186
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
41364187
/// the specified operations to build the shuffle.
41374188
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
@@ -4401,6 +4452,10 @@ SDValue ARM64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
44014452
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
44024453
}
44034454

4455+
SDValue Concat = tryFormConcatFromShuffle(Op, DAG);
4456+
if (Concat.getNode())
4457+
return Concat;
4458+
44044459
bool DstIsLeft;
44054460
int Anomaly;
44064461
int NumInputElements = V1.getValueType().getVectorNumElements();
@@ -5264,18 +5319,21 @@ bool ARM64TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
52645319
return true;
52655320
}
52665321

5267-
bool ReverseVEXT;
5268-
unsigned Imm, WhichResult;
5322+
bool DummyBool;
5323+
int DummyInt;
5324+
unsigned DummyUnsigned;
52695325

52705326
return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
52715327
isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
5272-
isEXTMask(M, VT, ReverseVEXT, Imm) ||
5328+
isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
52735329
// isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
5274-
isTRNMask(M, VT, WhichResult) || isUZPMask(M, VT, WhichResult) ||
5275-
isZIPMask(M, VT, WhichResult) ||
5276-
isTRN_v_undef_Mask(M, VT, WhichResult) ||
5277-
isUZP_v_undef_Mask(M, VT, WhichResult) ||
5278-
isZIP_v_undef_Mask(M, VT, WhichResult));
5330+
isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) ||
5331+
isZIPMask(M, VT, DummyUnsigned) ||
5332+
isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
5333+
isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
5334+
isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
5335+
isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) ||
5336+
isConcatMask(M, VT, VT.getSizeInBits() == 128));
52795337
}
52805338

52815339
/// getVShiftImm - Check if this is a valid build_vector for the immediate

test/CodeGen/AArch64/neon-copy.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
2-
2+
; arm64 has copied equivalent test due to intrinsics.
33

44
define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
55
;CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}}

0 commit comments

Comments
 (0)