Skip to content

Commit 3cd68b4

Browse files
Dinar TemirbulatovDinar Temirbulatov
authored andcommitted
Resolved remarks.
1 parent 9907304 commit 3cd68b4

File tree

2 files changed

+40
-56
lines changed

2 files changed

+40
-56
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 24 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -26806,7 +26806,7 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
2680626806
unsigned IndexLen = MinSVESize / BitsPerElt;
2680726807
unsigned ElementsPerVectorReg = VTOp1.getVectorNumElements();
2680826808
uint64_t MaxOffset = APInt(BitsPerElt, -1, false).getZExtValue();
26809-
EVT MaskEltType = EVT::getIntegerVT(*DAG.getContext(), BitsPerElt);
26809+
EVT MaskEltType = VTOp1.getVectorElementType().changeTypeToInteger();
2681026810
EVT MaskType = EVT::getVectorVT(*DAG.getContext(), MaskEltType, IndexLen);
2681126811
bool MinMaxEqual = (MinSVESize == MaxSVESize);
2681226812
assert(ElementsPerVectorReg <= IndexLen && ShuffleMask.size() <= IndexLen &&
@@ -26815,10 +26815,10 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
2681526815
SmallVector<SDValue, 8> TBLMask;
2681626816
// If MinSVESize is not equal to MaxSVESize then we need to know which
2681726817
// TBL mask element needs adjustment.
26818-
SmallVector<SDValue, 8> MaskNormalized;
26818+
SmallVector<SDValue, 8> MulByVLMask;
2681926819

26820-
// Avoid if 8-bits element types, since with 2048-bit SVE register
26821-
// size we could not repersent index correctly.
26820+
// Bail out for 8-bits element types, because with 2048-bit SVE register
26821+
// size 8 bits is only sufficient to index into the first source vector.
2682226822
if (!IsSingleOp && !MinMaxEqual && BitsPerElt == 8)
2682326823
return SDValue();
2682426824

@@ -26829,18 +26829,18 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
2682926829
// If we refer to the second operand then we have to add elements
2683026830
// number in hardware register minus number of elements in a type in
2683126831
// case if MinSVESize equals to MaxSVESize, otherwise just add normalized
26832-
// value and record this element in MaskNormalized to be adjusted in the
26832+
// value and record this element in MulByVLMask to be adjusted in the
2683326833
// runtime.
2683426834
if ((unsigned)Index >= ElementsPerVectorReg) {
2683526835
if (!MinMaxEqual) {
2683626836
Index = Index - ElementsPerVectorReg;
26837-
MaskNormalized.push_back(DAG.getConstant(1, DL, MVT::i64));
26837+
MulByVLMask.push_back(DAG.getConstant(1, DL, MVT::i64));
2683826838
} else {
2683926839
Index += IndexLen - ElementsPerVectorReg;
2684026840
}
2684126841
} else {
2684226842
if (!MinMaxEqual)
26843-
MaskNormalized.push_back(DAG.getConstant(0, DL, MVT::i64));
26843+
MulByVLMask.push_back(DAG.getConstant(0, DL, MVT::i64));
2684426844
}
2684526845
// For 8-bit elements and 1024-bit SVE registers and MaxOffset equals
2684626846
// to 255, this might point to the last element of in the second operand
@@ -26857,7 +26857,7 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
2685726857
for (unsigned i = 0; i < IndexLen - ElementsPerVectorReg; ++i) {
2685826858
TBLMask.push_back(DAG.getConstant((int)MaxOffset, DL, MVT::i64));
2685926859
if (!MinMaxEqual)
26860-
MaskNormalized.push_back(DAG.getConstant(0, DL, MVT::i64));
26860+
MulByVLMask.push_back(DAG.getConstant(0, DL, MVT::i64));
2686126861
}
2686226862

2686326863
EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskType);
@@ -26873,36 +26873,26 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
2687326873
Op1, SVEMask);
2687426874
else if (Subtarget.hasSVE2()) {
2687526875
if (!MinMaxEqual) {
26876-
SDValue VScale = (BitsPerElt == 64)
26877-
? DAG.getVScale(DL, MVT::i64, APInt(64, 1))
26878-
: DAG.getVScale(DL, MVT::i32, APInt(32, 1));
26879-
SDValue Mul =
26880-
DAG.getNode(ISD::MUL, DL, (BitsPerElt == 64) ? MVT::i64 : MVT::i32,
26881-
DAG.getConstant(128 / BitsPerElt, DL,
26882-
(BitsPerElt == 64) ? MVT::i64 : MVT::i32),
26883-
VScale);
26876+
SDValue VScale =
26877+
(BitsPerElt == 64)
26878+
? DAG.getVScale(DL, MVT::i64, APInt(64, 128 / BitsPerElt))
26879+
: DAG.getVScale(DL, MVT::i32, APInt(32, 128 / BitsPerElt));
2688426880
SDValue VecMask =
2688526881
DAG.getBuildVector(MaskType, DL, ArrayRef(TBLMask.data(), IndexLen));
26886-
SDValue MulMask = DAG.getBuildVector(
26887-
MaskType, DL, ArrayRef(MaskNormalized.data(), IndexLen));
26888-
SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, MaskType, Mul);
26889-
SDValue MulMaskNormalized =
26890-
DAG.getNode(ISD::MUL, DL, MaskType, SplatPred, MulMask);
26882+
SDValue MulByMask = DAG.getNode(
26883+
ISD::MUL, DL, MaskType,
26884+
DAG.getNode(ISD::SPLAT_VECTOR, DL, MaskType, VScale),
26885+
DAG.getBuildVector(MaskType, DL,
26886+
ArrayRef(MulByVLMask.data(), IndexLen)));
2689126887
SDValue UpdatedVecMask =
26892-
DAG.getNode(ISD::ADD, DL, MaskType, VecMask, MulMaskNormalized);
26893-
EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskType);
26894-
SDValue SVEMask =
26895-
convertToScalableVector(DAG, MaskContainerVT, UpdatedVecMask);
26896-
Shuffle = DAG.getNode(
26897-
ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
26898-
DAG.getConstant(Intrinsic::aarch64_sve_tbl2, DL, MVT::i32), Op1, Op2,
26899-
SVEMask);
26900-
} else {
26901-
Shuffle = DAG.getNode(
26902-
ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
26903-
DAG.getConstant(Intrinsic::aarch64_sve_tbl2, DL, MVT::i32), Op1, Op2,
26904-
SVEMask);
26888+
DAG.getNode(ISD::ADD, DL, MaskType, VecMask, MulByMask);
26889+
SVEMask = convertToScalableVector(
26890+
DAG, getContainerForFixedLengthVector(DAG, MaskType), UpdatedVecMask);
2690526891
}
26892+
Shuffle =
26893+
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
26894+
DAG.getConstant(Intrinsic::aarch64_sve_tbl2, DL, MVT::i32),
26895+
Op1, Op2, SVEMask);
2690626896
}
2690726897
Shuffle = convertFromScalableVector(DAG, VT, Shuffle);
2690826898
return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);

llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -559,15 +559,13 @@ define <8 x i16> @shuffle_index_indices_from_both_ops_i16(ptr %a, ptr %b) {
559559
;
560560
; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_both_ops_i16:
561561
; SVE2_128_NOMAX: // %bb.0:
562-
; SVE2_128_NOMAX-NEXT: rdvl x8, #1
563562
; SVE2_128_NOMAX-NEXT: ptrue p0.h, vl8
563+
; SVE2_128_NOMAX-NEXT: cnth x8
564564
; SVE2_128_NOMAX-NEXT: adrp x9, .LCPI7_0
565-
; SVE2_128_NOMAX-NEXT: lsr x8, x8, #4
566-
; SVE2_128_NOMAX-NEXT: ldr q1, [x9, :lo12:.LCPI7_0]
567-
; SVE2_128_NOMAX-NEXT: lsl w8, w8, #3
565+
; SVE2_128_NOMAX-NEXT: adrp x10, .LCPI7_1
568566
; SVE2_128_NOMAX-NEXT: mov z0.h, w8
569-
; SVE2_128_NOMAX-NEXT: adrp x8, .LCPI7_1
570-
; SVE2_128_NOMAX-NEXT: ldr q2, [x8, :lo12:.LCPI7_1]
567+
; SVE2_128_NOMAX-NEXT: ldr q1, [x9, :lo12:.LCPI7_0]
568+
; SVE2_128_NOMAX-NEXT: ldr q2, [x10, :lo12:.LCPI7_1]
571569
; SVE2_128_NOMAX-NEXT: mad z0.h, p0/m, z1.h, z2.h
572570
; SVE2_128_NOMAX-NEXT: ldr q1, [x0]
573571
; SVE2_128_NOMAX-NEXT: ldr q2, [x1]
@@ -577,15 +575,13 @@ define <8 x i16> @shuffle_index_indices_from_both_ops_i16(ptr %a, ptr %b) {
577575
;
578576
; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_both_ops_i16:
579577
; SVE2_NOMIN_NOMAX: // %bb.0:
580-
; SVE2_NOMIN_NOMAX-NEXT: rdvl x8, #1
581578
; SVE2_NOMIN_NOMAX-NEXT: ptrue p0.h, vl8
579+
; SVE2_NOMIN_NOMAX-NEXT: cnth x8
582580
; SVE2_NOMIN_NOMAX-NEXT: adrp x9, .LCPI7_0
583-
; SVE2_NOMIN_NOMAX-NEXT: lsr x8, x8, #4
584-
; SVE2_NOMIN_NOMAX-NEXT: ldr q1, [x9, :lo12:.LCPI7_0]
585-
; SVE2_NOMIN_NOMAX-NEXT: lsl w8, w8, #3
581+
; SVE2_NOMIN_NOMAX-NEXT: adrp x10, .LCPI7_1
586582
; SVE2_NOMIN_NOMAX-NEXT: mov z0.h, w8
587-
; SVE2_NOMIN_NOMAX-NEXT: adrp x8, .LCPI7_1
588-
; SVE2_NOMIN_NOMAX-NEXT: ldr q2, [x8, :lo12:.LCPI7_1]
583+
; SVE2_NOMIN_NOMAX-NEXT: ldr q1, [x9, :lo12:.LCPI7_0]
584+
; SVE2_NOMIN_NOMAX-NEXT: ldr q2, [x10, :lo12:.LCPI7_1]
589585
; SVE2_NOMIN_NOMAX-NEXT: mad z0.h, p0/m, z1.h, z2.h
590586
; SVE2_NOMIN_NOMAX-NEXT: ldr q1, [x0]
591587
; SVE2_NOMIN_NOMAX-NEXT: ldr q2, [x1]
@@ -596,18 +592,16 @@ define <8 x i16> @shuffle_index_indices_from_both_ops_i16(ptr %a, ptr %b) {
596592
; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_both_ops_i16:
597593
; SVE2_MIN_256_NOMAX: // %bb.0:
598594
; SVE2_MIN_256_NOMAX-NEXT: ptrue p0.h, vl16
599-
; SVE2_MIN_256_NOMAX-NEXT: rdvl x8, #1
600595
; SVE2_MIN_256_NOMAX-NEXT: ldr q0, [x0]
601-
; SVE2_MIN_256_NOMAX-NEXT: lsr x8, x8, #4
602-
; SVE2_MIN_256_NOMAX-NEXT: adrp x9, .LCPI7_0
603-
; SVE2_MIN_256_NOMAX-NEXT: add x9, x9, :lo12:.LCPI7_0
604-
; SVE2_MIN_256_NOMAX-NEXT: adrp x10, .LCPI7_1
605-
; SVE2_MIN_256_NOMAX-NEXT: add x10, x10, :lo12:.LCPI7_1
596+
; SVE2_MIN_256_NOMAX-NEXT: adrp x8, .LCPI7_0
597+
; SVE2_MIN_256_NOMAX-NEXT: add x8, x8, :lo12:.LCPI7_0
598+
; SVE2_MIN_256_NOMAX-NEXT: adrp x9, .LCPI7_1
599+
; SVE2_MIN_256_NOMAX-NEXT: add x9, x9, :lo12:.LCPI7_1
600+
; SVE2_MIN_256_NOMAX-NEXT: cnth x10
606601
; SVE2_MIN_256_NOMAX-NEXT: ldr q1, [x1]
607-
; SVE2_MIN_256_NOMAX-NEXT: lsl w8, w8, #3
608-
; SVE2_MIN_256_NOMAX-NEXT: mov z4.h, w8
609-
; SVE2_MIN_256_NOMAX-NEXT: ld1h { z2.h }, p0/z, [x9]
610-
; SVE2_MIN_256_NOMAX-NEXT: ld1h { z3.h }, p0/z, [x10]
602+
; SVE2_MIN_256_NOMAX-NEXT: mov z4.h, w10
603+
; SVE2_MIN_256_NOMAX-NEXT: ld1h { z2.h }, p0/z, [x8]
604+
; SVE2_MIN_256_NOMAX-NEXT: ld1h { z3.h }, p0/z, [x9]
611605
; SVE2_MIN_256_NOMAX-NEXT: mad z2.h, p0/m, z4.h, z3.h
612606
; SVE2_MIN_256_NOMAX-NEXT: tbl z0.h, { z0.h, z1.h }, z2.h
613607
; SVE2_MIN_256_NOMAX-NEXT: // kill: def $q0 killed $q0 killed $z0

0 commit comments

Comments
 (0)