@@ -8194,8 +8194,8 @@ static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op,
8194
8194
Input = Op->getOperand(1);
8195
8195
Elt -= 4;
8196
8196
}
8197
- SDValue BitCast = DAG.getBitcast(MVT::v4i32 , Input);
8198
- Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32 , BitCast,
8197
+ SDValue BitCast = DAG.getBitcast(MVT::v4f32 , Input);
8198
+ Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32 , BitCast,
8199
8199
DAG.getConstant(Elt, dl, MVT::i32));
8200
8200
}
8201
8201
}
@@ -8214,19 +8214,70 @@ static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op,
8214
8214
Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8215
8215
SDValue NewShuffle = DAG.getVectorShuffle(
8216
8216
VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8217
- SDValue BitCast = DAG.getBitcast(MVT::v4i32 , NewShuffle);
8217
+ SDValue BitCast = DAG.getBitcast(MVT::v4f32 , NewShuffle);
8218
8218
8219
8219
for (int Part = 0; Part < 4; ++Part)
8220
8220
if (!Parts[Part])
8221
- Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32 ,
8221
+ Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32 ,
8222
8222
BitCast, DAG.getConstant(Part, dl, MVT::i32));
8223
8223
}
8224
8224
// Build a vector out of the various parts and bitcast it back to the original
8225
8225
// type.
8226
- SDValue NewVec = DAG.getBuildVector(MVT::v4i32 , dl, Parts);
8226
+ SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR , dl, MVT::v4f32 , Parts);
8227
8227
return DAG.getBitcast(VT, NewVec);
8228
8228
}
8229
8229
8230
+ static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op,
8231
+ ArrayRef<int> ShuffleMask,
8232
+ SelectionDAG &DAG) {
8233
+ SDValue V1 = Op.getOperand(0);
8234
+ SDValue V2 = Op.getOperand(1);
8235
+ EVT VT = Op.getValueType();
8236
+ unsigned NumElts = VT.getVectorNumElements();
8237
+
8238
+ // An One-Off Identity mask is one that is mostly an identity mask from as
8239
+ // single source but contains a single element out-of-place, either from a
8240
+ // different vector or from another position in the same vector. As opposed to
8241
+ // lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert
8242
+ // pair directly.
8243
+ auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,
8244
+ int &OffElement) {
8245
+ OffElement = -1;
8246
+ int NonUndef = 0;
8247
+ for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8248
+ if (Mask[i] == -1)
8249
+ continue;
8250
+ NonUndef++;
8251
+ if (Mask[i] != i + BaseOffset) {
8252
+ if (OffElement == -1)
8253
+ OffElement = i;
8254
+ else
8255
+ return false;
8256
+ }
8257
+ }
8258
+ return NonUndef > 2 && OffElement != -1;
8259
+ };
8260
+ int OffElement;
8261
+ SDValue VInput;
8262
+ if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8263
+ VInput = V1;
8264
+ else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8265
+ VInput = V2;
8266
+ else
8267
+ return SDValue();
8268
+
8269
+ SDLoc dl(Op);
8270
+ EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16
8271
+ ? MVT::i32
8272
+ : VT.getScalarType();
8273
+ SDValue Elt = DAG.getNode(
8274
+ ISD::EXTRACT_VECTOR_ELT, dl, SVT,
8275
+ ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,
8276
+ DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));
8277
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,
8278
+ DAG.getVectorIdxConstant(OffElement % NumElts, dl));
8279
+ }
8280
+
8230
8281
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
8231
8282
const ARMSubtarget *ST) {
8232
8283
SDValue V1 = Op.getOperand(0);
@@ -8360,6 +8411,10 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
8360
8411
}
8361
8412
}
8362
8413
8414
+ if (ST->hasMVEIntegerOps() && EltSize <= 32)
8415
+ if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))
8416
+ return V;
8417
+
8363
8418
// If the shuffle is not directly supported and it has 4 elements, use
8364
8419
// the PerfectShuffle-generated table to synthesize it from other shuffles.
8365
8420
unsigned NumElts = VT.getVectorNumElements();
0 commit comments