@@ -26806,7 +26806,7 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
26806
26806
unsigned IndexLen = MinSVESize / BitsPerElt;
26807
26807
unsigned ElementsPerVectorReg = VTOp1.getVectorNumElements();
26808
26808
uint64_t MaxOffset = APInt(BitsPerElt, -1, false).getZExtValue();
26809
- EVT MaskEltType = EVT::getIntegerVT(*DAG.getContext(), BitsPerElt );
26809
+ EVT MaskEltType = VTOp1.getVectorElementType().changeTypeToInteger( );
26810
26810
EVT MaskType = EVT::getVectorVT(*DAG.getContext(), MaskEltType, IndexLen);
26811
26811
bool MinMaxEqual = (MinSVESize == MaxSVESize);
26812
26812
assert(ElementsPerVectorReg <= IndexLen && ShuffleMask.size() <= IndexLen &&
@@ -26815,10 +26815,10 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
26815
26815
SmallVector<SDValue, 8> TBLMask;
26816
26816
// If MinSVESize is not equal to MaxSVESize then we need to know which
26817
26817
// TBL mask element needs adjustment.
26818
- SmallVector<SDValue, 8> MaskNormalized ;
26818
+ SmallVector<SDValue, 8> MulByVLMask ;
26819
26819
26820
- // Avoid if 8-bits element types, since with 2048-bit SVE register
26821
- // size we could not repersent index correctly .
26820
+ // Bail out for 8-bits element types, because with 2048-bit SVE register
26821
+ // size 8 bits is only sufficient to index into the first source vector .
26822
26822
if (!IsSingleOp && !MinMaxEqual && BitsPerElt == 8)
26823
26823
return SDValue();
26824
26824
@@ -26829,18 +26829,18 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
26829
26829
// If we refer to the second operand then we have to add elements
26830
26830
// number in hardware register minus number of elements in a type in
26831
26831
// case if MinSVESize equals to MaxSVESize, otherwise just add normalized
26832
- // value and record this element in MaskNormalized to be adjusted in the
26832
+ // value and record this element in MulByVLMask to be adjusted in the
26833
26833
// runtime.
26834
26834
if ((unsigned)Index >= ElementsPerVectorReg) {
26835
26835
if (!MinMaxEqual) {
26836
26836
Index = Index - ElementsPerVectorReg;
26837
- MaskNormalized .push_back(DAG.getConstant(1, DL, MVT::i64));
26837
+ MulByVLMask .push_back(DAG.getConstant(1, DL, MVT::i64));
26838
26838
} else {
26839
26839
Index += IndexLen - ElementsPerVectorReg;
26840
26840
}
26841
26841
} else {
26842
26842
if (!MinMaxEqual)
26843
- MaskNormalized .push_back(DAG.getConstant(0, DL, MVT::i64));
26843
+ MulByVLMask .push_back(DAG.getConstant(0, DL, MVT::i64));
26844
26844
}
26845
26845
// For 8-bit elements and 1024-bit SVE registers and MaxOffset equals
26846
26846
// to 255, this might point to the last element of in the second operand
@@ -26857,7 +26857,7 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
26857
26857
for (unsigned i = 0; i < IndexLen - ElementsPerVectorReg; ++i) {
26858
26858
TBLMask.push_back(DAG.getConstant((int)MaxOffset, DL, MVT::i64));
26859
26859
if (!MinMaxEqual)
26860
- MaskNormalized .push_back(DAG.getConstant(0, DL, MVT::i64));
26860
+ MulByVLMask .push_back(DAG.getConstant(0, DL, MVT::i64));
26861
26861
}
26862
26862
26863
26863
EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskType);
@@ -26873,36 +26873,26 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
26873
26873
Op1, SVEMask);
26874
26874
else if (Subtarget.hasSVE2()) {
26875
26875
if (!MinMaxEqual) {
26876
- SDValue VScale = (BitsPerElt == 64)
26877
- ? DAG.getVScale(DL, MVT::i64, APInt(64, 1))
26878
- : DAG.getVScale(DL, MVT::i32, APInt(32, 1));
26879
- SDValue Mul =
26880
- DAG.getNode(ISD::MUL, DL, (BitsPerElt == 64) ? MVT::i64 : MVT::i32,
26881
- DAG.getConstant(128 / BitsPerElt, DL,
26882
- (BitsPerElt == 64) ? MVT::i64 : MVT::i32),
26883
- VScale);
26876
+ SDValue VScale =
26877
+ (BitsPerElt == 64)
26878
+ ? DAG.getVScale(DL, MVT::i64, APInt(64, 128 / BitsPerElt))
26879
+ : DAG.getVScale(DL, MVT::i32, APInt(32, 128 / BitsPerElt));
26884
26880
SDValue VecMask =
26885
26881
DAG.getBuildVector(MaskType, DL, ArrayRef(TBLMask.data(), IndexLen));
26886
- SDValue MulMask = DAG.getBuildVector (
26887
- MaskType , DL, ArrayRef(MaskNormalized.data(), IndexLen));
26888
- SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, MaskType, Mul);
26889
- SDValue MulMaskNormalized =
26890
- DAG.getNode(ISD::MUL, DL, MaskType, SplatPred, MulMask );
26882
+ SDValue MulByMask = DAG.getNode (
26883
+ ISD::MUL , DL, MaskType,
26884
+ DAG.getNode(ISD::SPLAT_VECTOR, DL, MaskType, VScale),
26885
+ DAG.getBuildVector(MaskType, DL,
26886
+ ArrayRef(MulByVLMask.data(), IndexLen)) );
26891
26887
SDValue UpdatedVecMask =
26892
- DAG.getNode(ISD::ADD, DL, MaskType, VecMask, MulMaskNormalized);
26893
- EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskType);
26894
- SDValue SVEMask =
26895
- convertToScalableVector(DAG, MaskContainerVT, UpdatedVecMask);
26896
- Shuffle = DAG.getNode(
26897
- ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
26898
- DAG.getConstant(Intrinsic::aarch64_sve_tbl2, DL, MVT::i32), Op1, Op2,
26899
- SVEMask);
26900
- } else {
26901
- Shuffle = DAG.getNode(
26902
- ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
26903
- DAG.getConstant(Intrinsic::aarch64_sve_tbl2, DL, MVT::i32), Op1, Op2,
26904
- SVEMask);
26888
+ DAG.getNode(ISD::ADD, DL, MaskType, VecMask, MulByMask);
26889
+ SVEMask = convertToScalableVector(
26890
+ DAG, getContainerForFixedLengthVector(DAG, MaskType), UpdatedVecMask);
26905
26891
}
26892
+ Shuffle =
26893
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
26894
+ DAG.getConstant(Intrinsic::aarch64_sve_tbl2, DL, MVT::i32),
26895
+ Op1, Op2, SVEMask);
26906
26896
}
26907
26897
Shuffle = convertFromScalableVector(DAG, VT, Shuffle);
26908
26898
return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
0 commit comments