Skip to content

Commit 8201cf3

Browse files
authored
[TTI][CostModel] Add cost modeling for expandload and compressstore intrinsics (#122882)
This patch adds methods for cost estimation for llvm.masked.expandload/llvm.masked.compressstore intrinsics in TTI. If backend doesn't support custom lowering of these intrinsics it will be processed by ScalarizeMaskedMemIntrin so we estimate its cost via getCommonMaskedMemoryOpCost as gather/scatter operation; for RISC-V backend, this patch implements custom hook to calculate the cost based on current lowering scheme.
1 parent 227b32f commit 8201cf3

13 files changed

+1726
-1444
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1517,6 +1517,19 @@ class TargetTransformInfo {
15171517
Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
15181518
const Instruction *I = nullptr) const;
15191519

1520+
/// \return The cost of Expand Load or Compress Store operation
1521+
/// \p Opcode - is a type of memory access Load or Store
1522+
/// \p Src - a vector type of the data to be loaded or stored
1523+
/// \p VariableMask - true when the memory access is predicated with a mask
1524+
/// that is not a compile-time constant
1525+
/// \p Alignment - alignment of single element
1526+
/// \p I - the optional original context instruction, if one exists, e.g. the
1527+
/// load/store to transform or the call to the gather/scatter intrinsic
1528+
InstructionCost getExpandCompressMemoryOpCost(
1529+
unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
1530+
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1531+
const Instruction *I = nullptr) const;
1532+
15201533
/// \return The cost of strided memory operations.
15211534
/// \p Opcode - is a type of memory access Load or Store
15221535
/// \p DataTy - a vector type of the data to be loaded or stored
@@ -2228,6 +2241,9 @@ class TargetTransformInfo::Concept {
22282241
bool VariableMask, Align Alignment,
22292242
TTI::TargetCostKind CostKind,
22302243
const Instruction *I = nullptr) = 0;
2244+
virtual InstructionCost getExpandCompressMemoryOpCost(
2245+
unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
2246+
TTI::TargetCostKind CostKind, const Instruction *I = nullptr) = 0;
22312247
virtual InstructionCost
22322248
getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
22332249
bool VariableMask, Align Alignment,
@@ -2963,6 +2979,12 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
29632979
return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
29642980
Alignment, CostKind, I);
29652981
}
2982+
InstructionCost getExpandCompressMemoryOpCost(
2983+
unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
2984+
TTI::TargetCostKind CostKind, const Instruction *I = nullptr) override {
2985+
return Impl.getExpandCompressMemoryOpCost(Opcode, DataTy, VariableMask,
2986+
Alignment, CostKind, I);
2987+
}
29662988
InstructionCost
29672989
getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
29682990
bool VariableMask, Align Alignment,

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -774,6 +774,12 @@ class TargetTransformInfoImplBase {
774774
return 1;
775775
}
776776

777+
InstructionCost getExpandCompressMemoryOpCost(
778+
unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
779+
TTI::TargetCostKind CostKind, const Instruction *I = nullptr) const {
780+
return 1;
781+
}
782+
777783
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
778784
const Value *Ptr, bool VariableMask,
779785
Align Alignment,

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1468,6 +1468,15 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
14681468
true, CostKind);
14691469
}
14701470

1471+
InstructionCost getExpandCompressMemoryOpCost(
1472+
unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
1473+
TTI::TargetCostKind CostKind, const Instruction *I = nullptr) {
1474+
// Treat expand load/compress store as gather/scatter operation.
1475+
// TODO: implement more precise cost estimation for these intrinsics.
1476+
return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask,
1477+
/*IsGatherScatter*/ true, CostKind);
1478+
}
1479+
14711480
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
14721481
const Value *Ptr, bool VariableMask,
14731482
Align Alignment,
@@ -1776,6 +1785,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
17761785
return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
17771786
VarMask, Alignment, CostKind, I);
17781787
}
1788+
case Intrinsic::masked_compressstore: {
1789+
const Value *Data = Args[0];
1790+
const Value *Mask = Args[2];
1791+
Align Alignment = I->getParamAlign(1).valueOrOne();
1792+
return thisT()->getExpandCompressMemoryOpCost(
1793+
Instruction::Store, Data->getType(), !isa<Constant>(Mask), Alignment,
1794+
CostKind, I);
1795+
}
1796+
case Intrinsic::masked_expandload: {
1797+
const Value *Mask = Args[1];
1798+
Align Alignment = I->getParamAlign(0).valueOrOne();
1799+
return thisT()->getExpandCompressMemoryOpCost(Instruction::Load, RetTy,
1800+
!isa<Constant>(Mask),
1801+
Alignment, CostKind, I);
1802+
}
17791803
case Intrinsic::experimental_vp_strided_store: {
17801804
const Value *Data = Args[0];
17811805
const Value *Ptr = Args[1];

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,6 +1153,15 @@ InstructionCost TargetTransformInfo::getGatherScatterOpCost(
11531153
return Cost;
11541154
}
11551155

1156+
InstructionCost TargetTransformInfo::getExpandCompressMemoryOpCost(
1157+
unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
1158+
TTI::TargetCostKind CostKind, const Instruction *I) const {
1159+
InstructionCost Cost = TTIImpl->getExpandCompressMemoryOpCost(
1160+
Opcode, DataTy, VariableMask, Alignment, CostKind, I);
1161+
assert(Cost >= 0 && "TTI should not produce negative costs!");
1162+
return Cost;
1163+
}
1164+
11561165
InstructionCost TargetTransformInfo::getStridedMemoryOpCost(
11571166
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
11581167
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -940,6 +940,44 @@ InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
940940
return NumLoads * MemOpCost;
941941
}
942942

943+
InstructionCost RISCVTTIImpl::getExpandCompressMemoryOpCost(
944+
unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
945+
TTI::TargetCostKind CostKind, const Instruction *I) {
946+
bool IsLegal = (Opcode == Instruction::Store &&
947+
isLegalMaskedCompressStore(DataTy, Alignment)) ||
948+
(Opcode == Instruction::Load &&
949+
isLegalMaskedExpandLoad(DataTy, Alignment));
950+
if (!IsLegal || CostKind != TTI::TCK_RecipThroughput)
951+
return BaseT::getExpandCompressMemoryOpCost(Opcode, DataTy, VariableMask,
952+
Alignment, CostKind, I);
953+
// Example compressstore sequence:
954+
// vsetivli zero, 8, e32, m2, ta, ma (ignored)
955+
// vcompress.vm v10, v8, v0
956+
// vcpop.m a1, v0
957+
// vsetvli zero, a1, e32, m2, ta, ma
958+
// vse32.v v10, (a0)
959+
// Example expandload sequence:
960+
// vsetivli zero, 8, e8, mf2, ta, ma (ignored)
961+
// vcpop.m a1, v0
962+
// vsetvli zero, a1, e32, m2, ta, ma
963+
// vle32.v v10, (a0)
964+
// vsetivli zero, 8, e32, m2, ta, ma
965+
// viota.m v12, v0
966+
// vrgather.vv v8, v10, v12, v0.t
967+
auto MemOpCost =
968+
getMemoryOpCost(Opcode, DataTy, Alignment, /*AddressSpace*/ 0, CostKind);
969+
auto LT = getTypeLegalizationCost(DataTy);
970+
SmallVector<unsigned, 4> Opcodes{RISCV::VSETVLI};
971+
if (VariableMask)
972+
Opcodes.push_back(RISCV::VCPOP_M);
973+
if (Opcode == Instruction::Store)
974+
Opcodes.append({RISCV::VCOMPRESS_VM});
975+
else
976+
Opcodes.append({RISCV::VSETIVLI, RISCV::VIOTA_M, RISCV::VRGATHER_VV});
977+
return MemOpCost +
978+
LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);
979+
}
980+
943981
InstructionCost RISCVTTIImpl::getStridedMemoryOpCost(
944982
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
945983
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,12 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
174174
TTI::TargetCostKind CostKind,
175175
const Instruction *I);
176176

177+
InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *Src,
178+
bool VariableMask,
179+
Align Alignment,
180+
TTI::TargetCostKind CostKind,
181+
const Instruction *I = nullptr);
182+
177183
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
178184
const Value *Ptr, bool VariableMask,
179185
Align Alignment,

llvm/test/Analysis/CostModel/RISCV/gep.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ define void @non_foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
268268
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = getelementptr i8, <2 x ptr> %base.vec, <2 x i32> <i32 42, i32 43>
269269
; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x3 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %3, i32 1, <2 x i1> undef, <2 x i8> undef)
270270
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = getelementptr i8, ptr %base, i32 42
271-
; RVI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
271+
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
272272
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = getelementptr i8, ptr %base, i32 42
273273
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
274274
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = getelementptr i8, ptr %base, i32 42
@@ -280,7 +280,7 @@ define void @non_foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
280280
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = getelementptr i8, <2 x ptr> %base.vec, <2 x i32> <i32 42, i32 43>
281281
; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> undef, <2 x ptr> %9, i32 1, <2 x i1> undef)
282282
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = getelementptr i8, ptr %base, i32 42
283-
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
283+
; RVI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
284284
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = getelementptr i8, ptr %base, i32 42
285285
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
286286
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = getelementptr i8, ptr %base, i32 42
@@ -338,7 +338,7 @@ define void @foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
338338
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %3 = getelementptr i8, <2 x ptr> %base.vec, <2 x i32> zeroinitializer
339339
; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x3 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %3, i32 1, <2 x i1> undef, <2 x i8> undef)
340340
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %4 = getelementptr i8, ptr %base, i32 0
341-
; RVI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
341+
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
342342
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %5 = getelementptr i8, ptr %base, i32 0
343343
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
344344
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %6 = getelementptr i8, ptr %base, i32 0
@@ -350,7 +350,7 @@ define void @foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
350350
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %9 = getelementptr i8, <2 x ptr> %base.vec, <2 x i32> zeroinitializer
351351
; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> undef, <2 x ptr> %9, i32 1, <2 x i1> undef)
352352
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %10 = getelementptr i8, ptr %base, i32 0
353-
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
353+
; RVI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
354354
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %11 = getelementptr i8, ptr %base, i32 0
355355
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
356356
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %12 = getelementptr i8, ptr %base, i32 0

0 commit comments

Comments
 (0)