@@ -3829,9 +3829,11 @@ SDValue ARM64TargetLowering::ReconstructShuffle(SDValue Op,
3829
3829
VEXTOffsets[i] = 0 ;
3830
3830
continue ;
3831
3831
} else if (SourceVecs[i].getValueType ().getVectorNumElements () < NumElts) {
3832
- // It probably isn't worth padding out a smaller vector just to
3833
- // break it down again in a shuffle.
3834
- return SDValue ();
3832
+ // We can pad out the smaller vector for free, so if it's part of a
3833
+ // shuffle...
3834
+ ShuffleSrcs[i] = DAG.getNode (ISD::CONCAT_VECTORS, dl, VT, SourceVecs[i],
3835
+ DAG.getUNDEF (SourceVecs[i].getValueType ()));
3836
+ continue ;
3835
3837
}
3836
3838
3837
3839
// Don't attempt to extract subvectors from BUILD_VECTOR sources
@@ -4094,7 +4096,7 @@ static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
4094
4096
}
4095
4097
4096
4098
static bool isINSMask (ArrayRef<int > M, int NumInputElements,
4097
- bool &BulkIsLeft , int &Anomaly) {
4099
+ bool &DstIsLeft , int &Anomaly) {
4098
4100
if (M.size () != static_cast <size_t >(NumInputElements))
4099
4101
return false ;
4100
4102
@@ -4120,18 +4122,67 @@ static bool isINSMask(ArrayRef<int> M, int NumInputElements,
4120
4122
}
4121
4123
4122
4124
if (NumLHSMatch == NumInputElements - 1 ) {
4123
- BulkIsLeft = true ;
4125
+ DstIsLeft = true ;
4124
4126
Anomaly = LastLHSMismatch;
4125
4127
return true ;
4126
4128
} else if (NumRHSMatch == NumInputElements - 1 ) {
4127
- BulkIsLeft = false ;
4129
+ DstIsLeft = false ;
4128
4130
Anomaly = LastRHSMismatch;
4129
4131
return true ;
4130
4132
}
4131
4133
4132
4134
return false ;
4133
4135
}
4134
4136
4137
+ static bool isConcatMask (ArrayRef<int > Mask, EVT VT, bool SplitLHS) {
4138
+ if (VT.getSizeInBits () != 128 )
4139
+ return false ;
4140
+
4141
+ unsigned NumElts = VT.getVectorNumElements ();
4142
+
4143
+ for (int I = 0 , E = NumElts / 2 ; I != E; I++) {
4144
+ if (Mask[I] != I)
4145
+ return false ;
4146
+ }
4147
+
4148
+ int Offset = NumElts / 2 ;
4149
+ for (int I = NumElts / 2 , E = NumElts; I != E; I++) {
4150
+ if (Mask[I] != I + SplitLHS * Offset)
4151
+ return false ;
4152
+ }
4153
+
4154
+ return true ;
4155
+ }
4156
+
4157
+ static SDValue tryFormConcatFromShuffle (SDValue Op, SelectionDAG &DAG) {
4158
+ SDLoc DL (Op);
4159
+ EVT VT = Op.getValueType ();
4160
+ SDValue V0 = Op.getOperand (0 );
4161
+ SDValue V1 = Op.getOperand (1 );
4162
+ ArrayRef<int > Mask = cast<ShuffleVectorSDNode>(Op)->getMask ();
4163
+
4164
+ if (VT.getVectorElementType () != V0.getValueType ().getVectorElementType () ||
4165
+ VT.getVectorElementType () != V1.getValueType ().getVectorElementType ())
4166
+ return SDValue ();
4167
+
4168
+ bool SplitV0 = V0.getValueType ().getSizeInBits () == 128 ;
4169
+
4170
+ if (!isConcatMask (Mask, VT, SplitV0))
4171
+ return SDValue ();
4172
+
4173
+ EVT CastVT = EVT::getVectorVT (*DAG.getContext (), VT.getVectorElementType (),
4174
+ VT.getVectorNumElements () / 2 );
4175
+ if (SplitV0) {
4176
+ V0 = DAG.getNode (ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
4177
+ DAG.getConstant (0 , MVT::i64 ));
4178
+ }
4179
+ if (V1.getValueType ().getSizeInBits () == 128 ) {
4180
+ V1 = DAG.getNode (ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
4181
+ DAG.getConstant (0 , MVT::i64 ));
4182
+ }
4183
+ return DAG.getNode (ISD::CONCAT_VECTORS, DL, VT, V0, V1);
4184
+ }
4185
+
4135
4186
// / GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
4136
4187
// / the specified operations to build the shuffle.
4137
4188
static SDValue GeneratePerfectShuffle (unsigned PFEntry, SDValue LHS,
@@ -4401,6 +4452,10 @@ SDValue ARM64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
4401
4452
return DAG.getNode (Opc, dl, V1.getValueType (), V1, V1);
4402
4453
}
4403
4454
4455
+ SDValue Concat = tryFormConcatFromShuffle (Op, DAG);
4456
+ if (Concat.getNode ())
4457
+ return Concat;
4458
+
4404
4459
bool DstIsLeft;
4405
4460
int Anomaly;
4406
4461
int NumInputElements = V1.getValueType ().getVectorNumElements ();
@@ -5264,18 +5319,21 @@ bool ARM64TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
5264
5319
return true ;
5265
5320
}
5266
5321
5267
- bool ReverseVEXT;
5268
- unsigned Imm, WhichResult;
5322
+ bool DummyBool;
5323
+ int DummyInt;
5324
+ unsigned DummyUnsigned;
5269
5325
5270
5326
return (ShuffleVectorSDNode::isSplatMask (&M[0 ], VT) || isREVMask (M, VT, 64 ) ||
5271
5327
isREVMask (M, VT, 32 ) || isREVMask (M, VT, 16 ) ||
5272
- isEXTMask (M, VT, ReverseVEXT, Imm ) ||
5328
+ isEXTMask (M, VT, DummyBool, DummyUnsigned ) ||
5273
5329
// isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
5274
- isTRNMask (M, VT, WhichResult) || isUZPMask (M, VT, WhichResult) ||
5275
- isZIPMask (M, VT, WhichResult) ||
5276
- isTRN_v_undef_Mask (M, VT, WhichResult) ||
5277
- isUZP_v_undef_Mask (M, VT, WhichResult) ||
5278
- isZIP_v_undef_Mask (M, VT, WhichResult));
5330
+ isTRNMask (M, VT, DummyUnsigned) || isUZPMask (M, VT, DummyUnsigned) ||
5331
+ isZIPMask (M, VT, DummyUnsigned) ||
5332
+ isTRN_v_undef_Mask (M, VT, DummyUnsigned) ||
5333
+ isUZP_v_undef_Mask (M, VT, DummyUnsigned) ||
5334
+ isZIP_v_undef_Mask (M, VT, DummyUnsigned) ||
5335
+ isINSMask (M, VT.getVectorNumElements (), DummyBool, DummyInt) ||
5336
+ isConcatMask (M, VT, VT.getSizeInBits () == 128 ));
5279
5337
}
5280
5338
5281
5339
// / getVShiftImm - Check if this is a valid build_vector for the immediate
0 commit comments