@@ -597,6 +597,31 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
597
597
598
598
SplatActions.clampScalar (1 , sXLen , sXLen );
599
599
600
+ LegalityPredicate ExtractSubvecBitcastPred = [=](const LegalityQuery &Query) {
601
+ LLT DstTy = Query.Types [0 ];
602
+ LLT SrcTy = Query.Types [1 ];
603
+ return DstTy.getElementType () == LLT::scalar (1 ) &&
604
+ DstTy.getElementCount ().getKnownMinValue () >= 8 &&
605
+ SrcTy.getElementCount ().getKnownMinValue () >= 8 ;
606
+ };
607
+ getActionDefinitionsBuilder (G_EXTRACT_SUBVECTOR)
608
+ // We don't have the ability to slide mask vectors down indexed by their
609
+ // i1 elements; the smallest we can do is i8. Often we are able to bitcast
610
+ // to equivalent i8 vectors.
611
+ .bitcastIf (
612
+ all (typeIsLegalBoolVec (0 , BoolVecTys, ST),
613
+ typeIsLegalBoolVec (1 , BoolVecTys, ST), ExtractSubvecBitcastPred),
614
+ [=](const LegalityQuery &Query) {
615
+ LLT CastTy = LLT::vector (
616
+ Query.Types [0 ].getElementCount ().divideCoefficientBy (8 ), 8 );
617
+ return std::pair (0 , CastTy);
618
+ })
619
+ .customIf (LegalityPredicates::any (
620
+ all (typeIsLegalBoolVec (0 , BoolVecTys, ST),
621
+ typeIsLegalBoolVec (1 , BoolVecTys, ST)),
622
+ all (typeIsLegalIntOrFPVec (0 , IntOrFPVecTys, ST),
623
+ typeIsLegalIntOrFPVec (1 , IntOrFPVecTys, ST))));
624
+
600
625
getLegacyLegalizerInfo ().computeTables ();
601
626
}
602
627
@@ -931,6 +956,105 @@ bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
931
956
return true ;
932
957
}
933
958
959
+ static LLT getLMUL1Ty (LLT VecTy) {
960
+ assert (VecTy.getElementType ().getSizeInBits () <= 64 &&
961
+ " Unexpected vector LLT" );
962
+ return LLT::scalable_vector (RISCV::RVVBitsPerBlock /
963
+ VecTy.getElementType ().getSizeInBits (),
964
+ VecTy.getElementType ());
965
+ }
966
+
967
+ bool RISCVLegalizerInfo::legalizeExtractSubvector (MachineInstr &MI,
968
+ LegalizerHelper &Helper,
969
+ MachineIRBuilder &MIB) const {
970
+ GExtractSubvector &ES = cast<GExtractSubvector>(MI);
971
+
972
+ MachineRegisterInfo &MRI = *MIB.getMRI ();
973
+
974
+ Register Dst = ES.getReg (0 );
975
+ Register Src = ES.getSrcVec ();
976
+ uint64_t Idx = ES.getIndexImm ();
977
+
978
+ // With an index of 0 this is a cast-like subvector, which can be performed
979
+ // with subregister operations.
980
+ if (Idx == 0 )
981
+ return true ;
982
+
983
+ LLT LitTy = MRI.getType (Dst);
984
+ LLT BigTy = MRI.getType (Src);
985
+
986
+ if (LitTy.getElementType () == LLT::scalar (1 )) {
987
+ // We can't slide this mask vector up indexed by its i1 elements.
988
+ // This poses a problem when we wish to insert a scalable vector which
989
+ // can't be re-expressed as a larger type. Just choose the slow path and
990
+ // extend to a larger type, then truncate back down.
991
+ LLT ExtBigTy = BigTy.changeElementType (LLT::scalar (8 ));
992
+ LLT ExtLitTy = LitTy.changeElementType (LLT::scalar (8 ));
993
+ auto BigZExt = MIB.buildZExt (ExtBigTy, Src);
994
+ auto ExtractZExt = MIB.buildExtractSubvector (ExtLitTy, BigZExt, Idx);
995
+ auto SplatZero = MIB.buildSplatVector (
996
+ ExtLitTy, MIB.buildConstant (ExtLitTy.getElementType (), 0 ));
997
+ MIB.buildICmp (CmpInst::Predicate::ICMP_NE, Dst, ExtractZExt, SplatZero);
998
+ MI.eraseFromParent ();
999
+ return true ;
1000
+ }
1001
+
1002
+ // extract_subvector scales the index by vscale if the subvector is scalable,
1003
+ // and decomposeSubvectorInsertExtractToSubRegs takes this into account.
1004
+ const RISCVRegisterInfo *TRI = STI.getRegisterInfo ();
1005
+ MVT LitTyMVT = getMVTForLLT (LitTy);
1006
+ auto Decompose =
1007
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs (
1008
+ getMVTForLLT (BigTy), LitTyMVT, Idx, TRI);
1009
+ unsigned RemIdx = Decompose.second ;
1010
+
1011
+ // If the Idx has been completely eliminated then this is a subvector extract
1012
+ // which naturally aligns to a vector register. These can easily be handled
1013
+ // using subregister manipulation.
1014
+ if (RemIdx == 0 )
1015
+ return true ;
1016
+
1017
+ // Else LitTy is M1 or smaller and may need to be slid down: if LitTy
1018
+ // was > M1 then the index would need to be a multiple of VLMAX, and so would
1019
+ // divide exactly.
1020
+ assert (
1021
+ RISCVVType::decodeVLMUL (RISCVTargetLowering::getLMUL (LitTyMVT)).second ||
1022
+ RISCVTargetLowering::getLMUL (LitTyMVT) == RISCVII::VLMUL::LMUL_1);
1023
+
1024
+ // If the vector type is an LMUL-group type, extract a subvector equal to the
1025
+ // nearest full vector register type.
1026
+ LLT InterLitTy = BigTy;
1027
+ Register Vec = Src;
1028
+ if (TypeSize::isKnownGT (BigTy.getSizeInBits (),
1029
+ getLMUL1Ty (BigTy).getSizeInBits ())) {
1030
+ // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and
1031
+ // we should have successfully decomposed the extract into a subregister.
1032
+ assert (Decompose.first != RISCV::NoSubRegister);
1033
+ InterLitTy = getLMUL1Ty (BigTy);
1034
+ // SDAG builds a TargetExtractSubreg. We cannot create a a Copy with SubReg
1035
+ // specified on the source Register (the equivalent) since generic virtual
1036
+ // register does not allow subregister index.
1037
+ Vec = MIB.buildExtractSubvector (InterLitTy, Src, Idx - RemIdx).getReg (0 );
1038
+ }
1039
+
1040
+ // Slide this vector register down by the desired number of elements in order
1041
+ // to place the desired subvector starting at element 0.
1042
+ const LLT XLenTy (STI.getXLenVT ());
1043
+ auto SlidedownAmt = MIB.buildVScale (XLenTy, RemIdx);
1044
+ auto [Mask, VL] = buildDefaultVLOps (LitTy, MIB, MRI);
1045
+ uint64_t Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
1046
+ auto Slidedown = MIB.buildInstr (
1047
+ RISCV::G_VSLIDEDOWN_VL, {InterLitTy},
1048
+ {MIB.buildUndef (InterLitTy), Vec, SlidedownAmt, Mask, VL, Policy});
1049
+
1050
+ // Now the vector is in the right position, extract our final subvector. This
1051
+ // should resolve to a COPY.
1052
+ MIB.buildExtractSubvector (Dst, Slidedown, 0 );
1053
+
1054
+ MI.eraseFromParent ();
1055
+ return true ;
1056
+ }
1057
+
934
1058
bool RISCVLegalizerInfo::legalizeCustom (
935
1059
LegalizerHelper &Helper, MachineInstr &MI,
936
1060
LostDebugLocObserver &LocObserver) const {
@@ -1001,6 +1125,8 @@ bool RISCVLegalizerInfo::legalizeCustom(
1001
1125
return legalizeExt (MI, MIRBuilder);
1002
1126
case TargetOpcode::G_SPLAT_VECTOR:
1003
1127
return legalizeSplatVector (MI, MIRBuilder);
1128
+ case TargetOpcode::G_EXTRACT_SUBVECTOR:
1129
+ return legalizeExtractSubvector (MI, Helper, MIRBuilder);
1004
1130
case TargetOpcode::G_LOAD:
1005
1131
case TargetOpcode::G_STORE:
1006
1132
return legalizeLoadStore (MI, Helper, MIRBuilder);
0 commit comments