@@ -26815,7 +26815,7 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
26815
26815
SmallVector<SDValue, 8> TBLMask;
26816
26816
// If MinSVESize is not equal to MaxSVESize then we need to know which
26817
26817
// TBL mask element needs adjustment.
26818
- SmallVector<SDValue, 8> MulByVLMask ;
26818
+ SmallVector<SDValue, 8> AddRuntimeVLMask ;
26819
26819
26820
26820
// Bail out for 8-bits element types, because with 2048-bit SVE register
26821
26821
// size 8 bits is only sufficient to index into the first source vector.
@@ -26826,22 +26826,19 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
26826
26826
// Handling poison index value.
26827
26827
if (Index < 0)
26828
26828
Index = 0;
26829
- // If we refer to the second operand then we have to add elements
26830
- // number in hardware register minus number of elements in a type in
26831
- // case if MinSVESize equals to MaxSVESize, otherwise just add normalized
26832
- // value and record this element in MulByVLMask to be adjusted in the
26833
- // runtime.
26829
+ // If the mask refers to elements in the second operand, then we have to
26830
+ // offset the index by the number of elements in a vector. If this is number
26831
+ // is not known at compile-time, we need to maintain a mask with 'VL' values
26832
+ // to add at runtime.
26834
26833
if ((unsigned)Index >= ElementsPerVectorReg) {
26835
- if (!MinMaxEqual) {
26836
- Index = Index - ElementsPerVectorReg;
26837
- MulByVLMask.push_back(DAG.getConstant(1, DL, MVT::i64));
26838
- } else {
26834
+ if (MinMaxEqual) {
26839
26835
Index += IndexLen - ElementsPerVectorReg;
26836
+ } else {
26837
+ Index = Index - ElementsPerVectorReg;
26838
+ AddRuntimeVLMask.push_back(DAG.getConstant(1, DL, MVT::i64));
26840
26839
}
26841
- } else {
26842
- if (!MinMaxEqual)
26843
- MulByVLMask.push_back(DAG.getConstant(0, DL, MVT::i64));
26844
- }
26840
+ } else if (!MinMaxEqual)
26841
+ AddRuntimeVLMask.push_back(DAG.getConstant(0, DL, MVT::i64));
26845
26842
// For 8-bit elements and 1024-bit SVE registers and MaxOffset equals
26846
26843
// to 255, this might point to the last element of in the second operand
26847
26844
// of the shufflevector, thus we are rejecting this transform.
@@ -26857,7 +26854,7 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
26857
26854
for (unsigned i = 0; i < IndexLen - ElementsPerVectorReg; ++i) {
26858
26855
TBLMask.push_back(DAG.getConstant((int)MaxOffset, DL, MVT::i64));
26859
26856
if (!MinMaxEqual)
26860
- MulByVLMask .push_back(DAG.getConstant(0, DL, MVT::i64));
26857
+ AddRuntimeVLMask .push_back(DAG.getConstant(0, DL, MVT::i64));
26861
26858
}
26862
26859
26863
26860
EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskType);
@@ -26873,17 +26870,17 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
26873
26870
Op1, SVEMask);
26874
26871
else if (Subtarget.hasSVE2()) {
26875
26872
if (!MinMaxEqual) {
26876
- SDValue VScale =
26877
- (BitsPerElt == 64)
26878
- ? DAG.getVScale(DL, MVT::i64, APInt(64, 128 / BitsPerElt ))
26879
- : DAG.getVScale(DL, MVT::i32, APInt(32, 128 / BitsPerElt ));
26873
+ unsigned MinNumElts = AArch64::SVEBitsPerBlock / BitsPerElt;
26874
+ SDValue VScale = (BitsPerElt == 64)
26875
+ ? DAG.getVScale(DL, MVT::i64, APInt(64, MinNumElts ))
26876
+ : DAG.getVScale(DL, MVT::i32, APInt(32, MinNumElts ));
26880
26877
SDValue VecMask =
26881
26878
DAG.getBuildVector(MaskType, DL, ArrayRef(TBLMask.data(), IndexLen));
26882
26879
SDValue MulByMask = DAG.getNode(
26883
26880
ISD::MUL, DL, MaskType,
26884
26881
DAG.getNode(ISD::SPLAT_VECTOR, DL, MaskType, VScale),
26885
26882
DAG.getBuildVector(MaskType, DL,
26886
- ArrayRef(MulByVLMask .data(), IndexLen)));
26883
+ ArrayRef(AddRuntimeVLMask .data(), IndexLen)));
26887
26884
SDValue UpdatedVecMask =
26888
26885
DAG.getNode(ISD::ADD, DL, MaskType, VecMask, MulByMask);
26889
26886
SVEMask = convertToScalableVector(
0 commit comments