@@ -4093,6 +4093,45 @@ static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
4093
4093
return true ;
4094
4094
}
4095
4095
4096
+ static bool isINSMask (ArrayRef<int > M, int NumInputElements,
4097
+ bool &BulkIsLeft, int &Anomaly) {
4098
+ if (M.size () != static_cast <size_t >(NumInputElements))
4099
+ return false ;
4100
+
4101
+ int NumLHSMatch = 0 , NumRHSMatch = 0 ;
4102
+ int LastLHSMismatch = -1 , LastRHSMismatch = -1 ;
4103
+
4104
+ for (int i = 0 ; i < NumInputElements; ++i) {
4105
+ if (M[i] == -1 ) {
4106
+ ++NumLHSMatch;
4107
+ ++NumRHSMatch;
4108
+ continue ;
4109
+ }
4110
+
4111
+ if (M[i] == i)
4112
+ ++NumLHSMatch;
4113
+ else
4114
+ LastLHSMismatch = i;
4115
+
4116
+ if (M[i] == i + NumInputElements)
4117
+ ++NumRHSMatch;
4118
+ else
4119
+ LastRHSMismatch = i;
4120
+ }
4121
+
4122
+ if (NumLHSMatch == NumInputElements - 1 ) {
4123
+ BulkIsLeft = true ;
4124
+ Anomaly = LastLHSMismatch;
4125
+ return true ;
4126
+ } else if (NumRHSMatch == NumInputElements - 1 ) {
4127
+ BulkIsLeft = false ;
4128
+ Anomaly = LastRHSMismatch;
4129
+ return true ;
4130
+ }
4131
+
4132
+ return false ;
4133
+ }
4134
+
4096
4135
// / GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
4097
4136
// / the specified operations to build the shuffle.
4098
4137
static SDValue GeneratePerfectShuffle (unsigned PFEntry, SDValue LHS,
@@ -4362,6 +4401,31 @@ SDValue ARM64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
4362
4401
return DAG.getNode (Opc, dl, V1.getValueType (), V1, V1);
4363
4402
}
4364
4403
4404
+ bool DstIsLeft;
4405
+ int Anomaly;
4406
+ int NumInputElements = V1.getValueType ().getVectorNumElements ();
4407
+ if (isINSMask (ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
4408
+ SDValue DstVec = DstIsLeft ? V1 : V2;
4409
+ SDValue DstLaneV = DAG.getConstant (Anomaly, MVT::i64 );
4410
+
4411
+ SDValue SrcVec = V1;
4412
+ int SrcLane = ShuffleMask[Anomaly];
4413
+ if (SrcLane >= NumInputElements) {
4414
+ SrcVec = V2;
4415
+ SrcLane -= VT.getVectorNumElements ();
4416
+ }
4417
+ SDValue SrcLaneV = DAG.getConstant (SrcLane, MVT::i64 );
4418
+
4419
+ EVT ScalarVT = VT.getVectorElementType ();
4420
+ if (ScalarVT.getSizeInBits () < 32 )
4421
+ ScalarVT = MVT::i32 ;
4422
+
4423
+ return DAG.getNode (
4424
+ ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
4425
+ DAG.getNode (ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
4426
+ DstLaneV);
4427
+ }
4428
+
4365
4429
// If the shuffle is not directly supported and it has 4 elements, use
4366
4430
// the PerfectShuffle-generated table to synthesize it from other shuffles.
4367
4431
unsigned NumElts = VT.getVectorNumElements ();
0 commit comments