@@ -12214,19 +12214,23 @@ static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
12214
12214
return SDValue();
12215
12215
}
12216
12216
12217
- /// Lower a vector shuffle as a zero or any extension.
12217
+ /// Lower a vector shuffle as an any/signed/zero extension.
12218
12218
///
12219
12219
/// Given a specific number of elements, element bit width, and extension
12220
- /// stride, produce either a zero or any extension based on the available
12220
+ /// stride, produce either an extension based on the available
12221
12221
/// features of the subtarget. The extended elements are consecutive and
12222
12222
/// begin and can start from an offsetted element index in the input; to
12223
12223
/// avoid excess shuffling the offset must either being in the bottom lane
12224
12224
/// or at the start of a higher lane. All extended elements must be from
12225
12225
/// the same lane.
12226
- static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
12227
- const SDLoc &DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV,
12228
- ArrayRef<int> Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
12226
+ static SDValue lowerShuffleAsSpecificExtension(const SDLoc &DL, MVT VT,
12227
+ int Scale, int Offset,
12228
+ unsigned ExtOpc, SDValue InputV,
12229
+ ArrayRef<int> Mask,
12230
+ const X86Subtarget &Subtarget,
12231
+ SelectionDAG &DAG) {
12229
12232
assert(Scale > 1 && "Need a scale to extend.");
12233
+ assert(ISD::isExtOpcode(ExtOpc) && "Unsupported extension");
12230
12234
int EltBits = VT.getScalarSizeInBits();
12231
12235
int NumElements = VT.getVectorNumElements();
12232
12236
int NumEltsPerLane = 128 / EltBits;
@@ -12267,13 +12271,17 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
12267
12271
NumElements / Scale);
12268
12272
InputV = DAG.getBitcast(VT, InputV);
12269
12273
InputV = ShuffleOffset(InputV);
12270
- InputV = getEXTEND_VECTOR_INREG(AnyExt ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND,
12271
- DL, ExtVT, InputV, DAG);
12274
+ InputV = getEXTEND_VECTOR_INREG(ExtOpc, DL, ExtVT, InputV, DAG);
12272
12275
return DAG.getBitcast(VT, InputV);
12273
12276
}
12274
12277
12275
12278
assert(VT.is128BitVector() && "Only 128-bit vectors can be extended.");
12276
12279
InputV = DAG.getBitcast(VT, InputV);
12280
+ bool AnyExt = ExtOpc == ISD::ANY_EXTEND;
12281
+
12282
+ // TODO: Add pre-SSE41 SIGN_EXTEND_VECTOR_INREG handling.
12283
+ if (ExtOpc == ISD::SIGN_EXTEND)
12284
+ return SDValue();
12277
12285
12278
12286
// For any extends we can cheat for larger element sizes and use shuffle
12279
12287
// instructions that can fold with a load and/or copy.
@@ -12458,8 +12466,9 @@ static SDValue lowerShuffleAsZeroOrAnyExtend(
12458
12466
if (Offset != 0 && Matches < 2)
12459
12467
return SDValue();
12460
12468
12461
- return lowerShuffleAsSpecificZeroOrAnyExtend(DL, VT, Scale, Offset, AnyExt,
12462
- InputV, Mask, Subtarget, DAG);
12469
+ unsigned ExtOpc = AnyExt ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND;
12470
+ return lowerShuffleAsSpecificExtension(DL, VT, Scale, Offset, ExtOpc,
12471
+ InputV, Mask, Subtarget, DAG);
12463
12472
};
12464
12473
12465
12474
// The widest scale possible for extending is to a 64-bit integer.
0 commit comments