@@ -4578,36 +4578,48 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
4578
4578
VL);
4579
4579
}
4580
4580
4581
- // Can this shuffle be performed on exactly one (possibly larger) input?
4582
- static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2) {
4583
-
4584
- if (V2.isUndef())
4585
- return V1;
4586
-
4581
+ /// If concat_vector(V1,V2) could be folded away to some existing
4582
+ /// vector source, return it. Note that the source may be larger
4583
+ /// than the requested concat_vector (i.e. a extract_subvector
4584
+ /// might be required.)
4585
+ static SDValue foldConcatVector(SDValue V1, SDValue V2) {
4586
+ EVT VT = V1.getValueType();
4587
+ assert(VT == V2.getValueType() && "argument types must match");
4587
4588
// Both input must be extracts.
4588
4589
if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4589
4590
V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4590
4591
return SDValue();
4591
4592
4592
4593
// Extracting from the same source.
4593
4594
SDValue Src = V1.getOperand(0);
4594
- if (Src != V2.getOperand(0))
4595
- return SDValue();
4596
-
4597
- // Src needs to have twice the number of elements.
4598
- unsigned NumElts = VT.getVectorNumElements();
4599
- if (!Src.getValueType().isFixedLengthVector() ||
4600
- Src.getValueType().getVectorNumElements() != (NumElts * 2))
4595
+ if (Src != V2.getOperand(0) ||
4596
+ VT.isScalableVector() != Src.getValueType().isScalableVector())
4601
4597
return SDValue();
4602
4598
4603
4599
// The extracts must extract the two halves of the source.
4604
4600
if (V1.getConstantOperandVal(1) != 0 ||
4605
- V2.getConstantOperandVal(1) != NumElts )
4601
+ V2.getConstantOperandVal(1) != VT.getVectorMinNumElements() )
4606
4602
return SDValue();
4607
4603
4608
4604
return Src;
4609
4605
}
4610
4606
4607
+ // Can this shuffle be performed on exactly one (possibly larger) input?
4608
+ static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2) {
4609
+
4610
+ if (V2.isUndef())
4611
+ return V1;
4612
+
4613
+ unsigned NumElts = VT.getVectorNumElements();
4614
+ // Src needs to have twice the number of elements.
4615
+ // TODO: Update shuffle lowering to add the extract subvector
4616
+ if (SDValue Src = foldConcatVector(V1, V2);
4617
+ Src && Src.getValueType().getVectorNumElements() == (NumElts * 2))
4618
+ return Src;
4619
+
4620
+ return SDValue();
4621
+ }
4622
+
4611
4623
/// Is this shuffle interleaving contiguous elements from one vector into the
4612
4624
/// even elements and contiguous elements from another vector into the odd
4613
4625
/// elements. \p EvenSrc will contain the element that should be in the first
@@ -11519,12 +11531,27 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
11519
11531
return DAG.getMergeValues(Res, DL);
11520
11532
}
11521
11533
11522
- // TODO: Remove the e64 restriction once the fractional LMUL lowering
11523
- // is improved to always beat the vnsrl lowering below.
11524
- if (Subtarget.hasVendorXRivosVizip() && Factor == 2 &&
11525
- VecVT.getVectorElementType().getSizeInBits() == 64) {
11534
+ if (Subtarget.hasVendorXRivosVizip() && Factor == 2) {
11535
+ MVT VT = Op->getSimpleValueType(0);
11526
11536
SDValue V1 = Op->getOperand(0);
11527
11537
SDValue V2 = Op->getOperand(1);
11538
+
11539
+ // For fractional LMUL, check if we can use a higher LMUL
11540
+ // instruction to avoid a vslidedown.
11541
+ if (SDValue Src = foldConcatVector(V1, V2);
11542
+ Src && getLMUL1VT(VT).bitsGT(VT)) {
11543
+ EVT NewVT = VT.getDoubleNumVectorElementsVT();
11544
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
11545
+ Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewVT, Src, ZeroIdx);
11546
+ SDValue Even = lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, Src,
11547
+ DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
11548
+ SDValue Odd = lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, Src,
11549
+ DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
11550
+ Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Even, ZeroIdx);
11551
+ Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Odd, ZeroIdx);
11552
+ return DAG.getMergeValues({Even, Odd}, DL);
11553
+ }
11554
+
11528
11555
SDValue Even =
11529
11556
lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, V1, V2, DL, DAG, Subtarget);
11530
11557
SDValue Odd =
0 commit comments