Skip to content

Commit cffe3a0

Browse files
committed
[X86][Costmodel] Now that getReplicationShuffleCost() is good, update getInterleavedMemoryOpCostAVX512()
... to actually ask about i1-elt-wide mask, since that is what will probably be used on AVX512. This unblocks D111460. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D114316
1 parent 2e7202b commit cffe3a0

File tree

2 files changed

+9
-10
lines changed

2 files changed

+9
-10
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5285,7 +5285,6 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
52855285
unsigned VF = VecTy->getNumElements() / Factor;
52865286
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
52875287

5288-
// FIXME: this is the most conservative estimate for the mask cost.
52895288
InstructionCost MaskCost;
52905289
if (UseMaskForCond || UseMaskForGaps) {
52915290
APInt DemandedLoadStoreElts = APInt::getZero(VecTy->getNumElements());
@@ -5295,10 +5294,10 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
52955294
DemandedLoadStoreElts.setBit(Index + Elm * Factor);
52965295
}
52975296

5298-
Type *I8Type = Type::getInt8Ty(VecTy->getContext());
5297+
Type *I1Type = Type::getInt1Ty(VecTy->getContext());
52995298

53005299
MaskCost = getReplicationShuffleCost(
5301-
I8Type, Factor, VF,
5300+
I1Type, Factor, VF,
53025301
UseMaskForGaps ? DemandedLoadStoreElts
53035302
: APInt::getAllOnes(VecTy->getNumElements()),
53045303
CostKind);
@@ -5309,7 +5308,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
53095308
// memory access, we need to account for the cost of And-ing the two masks
53105309
// inside the loop.
53115310
if (UseMaskForGaps) {
5312-
auto *MaskVT = FixedVectorType::get(I8Type, VecTy->getNumElements());
5311+
auto *MaskVT = FixedVectorType::get(I1Type, VecTy->getNumElements());
53135312
MaskCost += getArithmeticInstrCost(BinaryOperator::And, MaskVT, CostKind);
53145313
}
53155314
}

llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,13 @@ target triple = "x86_64-unknown-linux-gnu"
4343
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2
4444
;
4545
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2
46-
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2
46+
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2
4747
;
4848
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2
49-
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2
49+
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2
5050
;
5151
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2
52-
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 31 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2
52+
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2
5353

5454
define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) {
5555
entry:
@@ -110,13 +110,13 @@ for.end:
110110
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2
111111
;
112112
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2
113-
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2
113+
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2
114114
;
115115
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2
116-
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2
116+
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2
117117
;
118118
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2
119-
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 31 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2
119+
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2
120120

121121
define void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) {
122122
entry:

0 commit comments

Comments
 (0)