@@ -615,6 +615,13 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
615
615
all (typeIsLegalIntOrFPVec (0 , IntOrFPVecTys, ST),
616
616
typeIsLegalIntOrFPVec (1 , IntOrFPVecTys, ST))));
617
617
618
+
619
+ getActionDefinitionsBuilder (G_INSERT_SUBVECTOR)
620
+ .customIf (all (typeIsLegalBoolVec (0 , BoolVecTys, ST),
621
+ typeIsLegalBoolVec (1 , BoolVecTys, ST)))
622
+ .customIf (all (typeIsLegalIntOrFPVec (0 , IntOrFPVecTys, ST),
623
+ typeIsLegalIntOrFPVec (1 , IntOrFPVecTys, ST)));
624
+
618
625
getLegacyLegalizerInfo ().computeTables ();
619
626
}
620
627
@@ -833,10 +840,8 @@ static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL,
833
840
834
841
// / Gets the two common "VL" operands: an all-ones mask and the vector length.
835
842
// / VecTy is a scalable vector type.
836
- static std::pair<MachineInstrBuilder, MachineInstrBuilder>
837
- buildDefaultVLOps (const DstOp &Dst, MachineIRBuilder &MIB,
838
- MachineRegisterInfo &MRI) {
839
- LLT VecTy = Dst.getLLTTy (MRI);
843
+ static std::pair<MachineInstrBuilder, Register>
844
+ buildDefaultVLOps (LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) {
840
845
assert (VecTy.isScalableVector () && " Expecting scalable container type" );
841
846
const RISCVSubtarget &STI = MIB.getMF ().getSubtarget <RISCVSubtarget>();
842
847
LLT XLenTy (STI.getXLenVT ());
@@ -890,7 +895,7 @@ bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
890
895
// Handle case of s64 element vectors on rv32
891
896
if (XLenTy.getSizeInBits () == 32 &&
892
897
VecTy.getElementType ().getSizeInBits () == 64 ) {
893
- auto [_, VL] = buildDefaultVLOps (Dst, MIB, MRI);
898
+ auto [_, VL] = buildDefaultVLOps (MRI. getType ( Dst) , MIB, MRI);
894
899
buildSplatSplitS64WithVL (Dst, MIB.buildUndef (VecTy), SplatVal, VL, MIB,
895
900
MRI);
896
901
MI.eraseFromParent ();
@@ -1025,6 +1030,142 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
1025
1030
return true ;
1026
1031
}
1027
1032
1033
+ bool RISCVLegalizerInfo::legalizeInsertSubvector (MachineInstr &MI,
1034
+ LegalizerHelper &Helper,
1035
+ MachineIRBuilder &MIB) const {
1036
+ GInsertSubvector &IS = cast<GInsertSubvector>(MI);
1037
+
1038
+ MachineRegisterInfo &MRI = *MIB.getMRI ();
1039
+
1040
+ Register Dst = IS.getReg (0 );
1041
+ Register BigVec = IS.getBigVec ();
1042
+ Register LitVec = IS.getSubVec ();
1043
+ uint64_t Idx = IS.getIndexImm ();
1044
+
1045
+ LLT BigTy = MRI.getType (BigVec);
1046
+ LLT LitTy = MRI.getType (LitVec);
1047
+
1048
+ if (Idx == 0 ||
1049
+ MRI.getVRegDef (BigVec)->getOpcode () == TargetOpcode::G_IMPLICIT_DEF)
1050
+ return true ;
1051
+
1052
+ // We don't have the ability to slide mask vectors up indexed by their i1
1053
+ // elements; the smallest we can do is i8. Often we are able to bitcast to
1054
+ // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
1055
+ // vectors and truncate down after the insert.
1056
+ if (LitTy.getElementType () == LLT::scalar (1 )) {
1057
+ auto BigTyMinElts = BigTy.getElementCount ().getKnownMinValue ();
1058
+ auto LitTyMinElts = LitTy.getElementCount ().getKnownMinValue ();
1059
+ if (BigTyMinElts >= 8 && LitTyMinElts >= 8 )
1060
+ return Helper.bitcast (
1061
+ IS, 0 ,
1062
+ LLT::vector (BigTy.getElementCount ().divideCoefficientBy (8 ), 8 ));
1063
+
1064
+ // We can't slide this mask vector up indexed by its i1 elements.
1065
+ // This poses a problem when we wish to insert a scalable vector which
1066
+ // can't be re-expressed as a larger type. Just choose the slow path and
1067
+ // extend to a larger type, then truncate back down.
1068
+ BigTy = BigTy.changeElementType (LLT::scalar (8 ));
1069
+ LitTy = LitTy.changeElementType (LLT::scalar (8 ));
1070
+ auto BigZExt = MIB.buildZExt (BigTy, BigVec);
1071
+ auto LitZExt = MIB.buildZExt (LitTy, LitVec);
1072
+ auto Insert = MIB.buildInsertSubvector (BigTy, BigZExt, LitZExt, Idx);
1073
+ auto SplatZero = MIB.buildSplatVector (
1074
+ BigTy, MIB.buildConstant (BigTy.getElementType (), 0 ));
1075
+ MIB.buildICmp (CmpInst::Predicate::ICMP_NE, Dst, Insert, SplatZero);
1076
+ MI.eraseFromParent ();
1077
+ return true ;
1078
+ }
1079
+
1080
+ const RISCVRegisterInfo *TRI = STI.getRegisterInfo ();
1081
+ unsigned SubRegIdx, RemIdx;
1082
+ std::tie (SubRegIdx, RemIdx) =
1083
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs (
1084
+ getMVTForLLT (BigTy), getMVTForLLT (LitTy), Idx, TRI);
1085
+
1086
+ TypeSize VecRegSize = TypeSize::getScalable (RISCV::RVVBitsPerBlock);
1087
+ assert (isPowerOf2_64 (
1088
+ STI.expandVScale (LitTy.getSizeInBits ()).getKnownMinValue ()));
1089
+ bool ExactlyVecRegSized =
1090
+ STI.expandVScale (LitTy.getSizeInBits ())
1091
+ .isKnownMultipleOf (STI.expandVScale (VecRegSize));
1092
+
1093
+ // If the Idx has been completely eliminated and this subvector's size is a
1094
+ // vector register or a multiple thereof, or the surrounding elements are
1095
+ // undef, then this is a subvector insert which naturally aligns to a vector
1096
+ // register. These can easily be handled using subregister manipulation.
1097
+ if (RemIdx == 0 &&
1098
+ (ExactlyVecRegSized ||
1099
+ MRI.getVRegDef (BigVec)->getOpcode () == TargetOpcode::G_IMPLICIT_DEF))
1100
+ return true ;
1101
+
1102
+ // If the subvector is smaller than a vector register, then the insertion
1103
+ // must preserve the undisturbed elements of the register. We do this by
1104
+ // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
1105
+ // (which resolves to a subregister copy), performing a VSLIDEUP to place the
1106
+ // subvector within the vector register, and an INSERT_SUBVECTOR of that
1107
+ // LMUL=1 type back into the larger vector (resolving to another subregister
1108
+ // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
1109
+ // to avoid allocating a large register group to hold our subvector.
1110
+
1111
+ // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
1112
+ // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
1113
+ // (in our case undisturbed). This means we can set up a subvector insertion
1114
+ // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
1115
+ // size of the subvector.
1116
+ const LLT XLenTy (STI.getXLenVT ());
1117
+ LLT InterLitTy = BigTy;
1118
+ Register AlignedExtract = BigVec;
1119
+ unsigned AlignedIdx = Idx - RemIdx;
1120
+ if (TypeSize::isKnownGT (BigTy.getSizeInBits (),
1121
+ getLMUL1Ty (BigTy).getSizeInBits ())) {
1122
+ InterLitTy = getLMUL1Ty (BigTy);
1123
+ // Extract a subvector equal to the nearest full vector register type. This
1124
+ // should resolve to a G_EXTRACT on a subreg.
1125
+ AlignedExtract =
1126
+ MIB.buildExtractSubvector (InterLitTy, BigVec, AlignedIdx).getReg (0 );
1127
+ }
1128
+
1129
+ auto Insert = MIB.buildInsertSubvector (InterLitTy, MIB.buildUndef (InterLitTy),
1130
+ LitVec, 0 );
1131
+
1132
+ auto [Mask, _] = buildDefaultVLOps (BigTy, MIB, MRI);
1133
+ auto VL = MIB.buildVScale (XLenTy, LitTy.getElementCount ().getKnownMinValue ());
1134
+
1135
+ // Use tail agnostic policy if we're inserting over InterLitTy's tail.
1136
+ ElementCount EndIndex =
1137
+ ElementCount::getScalable (RemIdx) + LitTy.getElementCount ();
1138
+ uint64_t Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
1139
+ if (STI.expandVScale (EndIndex) ==
1140
+ STI.expandVScale (InterLitTy.getElementCount ()))
1141
+ Policy = RISCVII::TAIL_AGNOSTIC;
1142
+
1143
+ // If we're inserting into the lowest elements, use a tail undisturbed
1144
+ // vmv.v.v.
1145
+ MachineInstrBuilder Inserted;
1146
+ if (RemIdx == 0 ) {
1147
+ Inserted = MIB.buildInstr (RISCV::G_VMV_V_V_VL, {InterLitTy},
1148
+ {AlignedExtract, Insert, VL});
1149
+ } else {
1150
+ auto SlideupAmt = MIB.buildVScale (XLenTy, RemIdx);
1151
+ // Construct the vector length corresponding to RemIdx + length(LitTy).
1152
+ VL = MIB.buildAdd (XLenTy, SlideupAmt, VL);
1153
+ Inserted =
1154
+ MIB.buildInstr (RISCV::G_VSLIDEUP_VL, {InterLitTy},
1155
+ {AlignedExtract, Insert, SlideupAmt, Mask, VL, Policy});
1156
+ }
1157
+
1158
+ // If required, insert this subvector back into the correct vector register.
1159
+ // This should resolve to an INSERT_SUBREG instruction.
1160
+ if (TypeSize::isKnownGT (BigTy.getSizeInBits (), InterLitTy.getSizeInBits ()))
1161
+ MIB.buildInsertSubvector (Dst, BigVec, LitVec, AlignedIdx);
1162
+ else
1163
+ Inserted->getOperand (0 ).setReg (Dst);
1164
+
1165
+ MI.eraseFromParent ();
1166
+ return true ;
1167
+ }
1168
+
1028
1169
bool RISCVLegalizerInfo::legalizeCustom (
1029
1170
LegalizerHelper &Helper, MachineInstr &MI,
1030
1171
LostDebugLocObserver &LocObserver) const {
@@ -1092,6 +1233,8 @@ bool RISCVLegalizerInfo::legalizeCustom(
1092
1233
return legalizeSplatVector (MI, MIRBuilder);
1093
1234
case TargetOpcode::G_EXTRACT_SUBVECTOR:
1094
1235
return legalizeExtractSubvector (MI, MIRBuilder);
1236
+ case TargetOpcode::G_INSERT_SUBVECTOR:
1237
+ return legalizeInsertSubvector (MI, Helper, MIRBuilder);
1095
1238
case TargetOpcode::G_LOAD:
1096
1239
case TargetOpcode::G_STORE:
1097
1240
return legalizeLoadStore (MI, Helper, MIRBuilder);
0 commit comments