Skip to content

Commit e74d834

Browse files
authored
[X86] combineConcatVectorOps - concat mixed v2f64/v4f64 faux shuffles into v4f64/v8f64 vshufpd (llvm#143521)
Replace getTargetShuffleMask call (only permitted for target shuffles) and use getTargetShuffleInputs instead to match various faux shuffles (insert+extract sequences in particular). This does mean we have to explicitly bail out with undef/zero mask elements, where before getTargetShuffleMask would handle them.
1 parent 007d29e commit e74d834

File tree

2 files changed

+7
-14
lines changed

2 files changed

+7
-14
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59304,23 +59304,17 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5930459304
// We can always convert per-lane vXf64 shuffles into VSHUFPD.
5930559305
if (!IsSplat &&
5930659306
(VT == MVT::v4f64 || (VT == MVT::v8f64 && Subtarget.useAVX512Regs())) &&
59307-
all_of(Ops, [](SDValue Op) {
59308-
return Op.hasOneUse() && (Op.getOpcode() == X86ISD::MOVDDUP ||
59309-
Op.getOpcode() == X86ISD::SHUFP ||
59310-
Op.getOpcode() == X86ISD::VPERMILPI ||
59311-
Op.getOpcode() == X86ISD::BLENDI ||
59312-
Op.getOpcode() == X86ISD::UNPCKL ||
59313-
Op.getOpcode() == X86ISD::UNPCKH);
59314-
})) {
59307+
all_of(Ops, [](SDValue Op) { return Op.hasOneUse(); })) {
5931559308
// Collect the individual per-lane v2f64/v4f64 shuffles.
5931659309
MVT OpVT = Ops[0].getSimpleValueType();
5931759310
unsigned NumOpElts = OpVT.getVectorNumElements();
5931859311
SmallVector<SmallVector<SDValue, 2>, 4> SrcOps(NumOps);
5931959312
SmallVector<SmallVector<int, 8>, 4> SrcMasks(NumOps);
5932059313
if (all_of(seq<int>(NumOps), [&](int I) {
59321-
return getTargetShuffleMask(Ops[I], /*AllowSentinelZero=*/false,
59322-
SrcOps[I], SrcMasks[I]) &&
59314+
return getTargetShuffleInputs(Ops[I], SrcOps[I], SrcMasks[I], DAG,
59315+
Depth + 1) &&
5932359316
!is128BitLaneCrossingShuffleMask(OpVT, SrcMasks[I]) &&
59317+
none_of(SrcMasks[I], isUndefOrZero) &&
5932459318
SrcMasks[I].size() == NumOpElts &&
5932559319
all_of(SrcOps[I], [&OpVT](SDValue V) {
5932659320
return V.getValueType() == OpVT;

llvm/test/CodeGen/X86/avx-insertelt.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -221,10 +221,9 @@ define <8 x float> @insert_f32_firstelts(<8 x float> %x, float %s) {
221221
define <4 x double> @insert_f64_firstelts(<4 x double> %x, double %s) {
222222
; AVX-LABEL: insert_f64_firstelts:
223223
; AVX: # %bb.0:
224-
; AVX-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0,1],xmm0[2,3]
225-
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
226-
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
227-
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
224+
; AVX-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
225+
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
226+
; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
228227
; AVX-NEXT: retq
229228
;
230229
; AVX2-LABEL: insert_f64_firstelts:

0 commit comments

Comments
 (0)