Skip to content

Commit 0f0b730

Browse files
hliao2igcbot
authored andcommitted
Refactor LSC queries
Move that logic into CShader so that it could be queried within the CShader.
1 parent 056a711 commit 0f0b730

File tree

3 files changed

+176
-136
lines changed

3 files changed

+176
-136
lines changed

IGC/Compiler/CISACodeGen/CShader.cpp

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3928,3 +3928,164 @@ bool CShader::needsEntryFence() const
39283928
}
39293929
return false;
39303930
}
3931+
3932+
bool CShader::forceCacheCtrl(llvm::Instruction* inst)
3933+
{
3934+
std::map<uint32_t, uint32_t> list = m_ModuleMetadata->forceLscCacheList;
3935+
unsigned calleeArgNo = 0;
3936+
PushInfo& pushInfo = m_ModuleMetadata->pushInfo;
3937+
Value* src = IGC::TracePointerSource(inst->getOperand(0));
3938+
if (src)
3939+
{
3940+
if (Argument * calleeArg = dyn_cast<Argument>(src))
3941+
{
3942+
calleeArgNo = calleeArg->getArgNo();
3943+
for (auto index_it = pushInfo.constantReg.begin(); index_it != pushInfo.constantReg.end(); ++index_it)
3944+
{
3945+
if (index_it->second == calleeArgNo)
3946+
{
3947+
auto pos = list.find(index_it->first);
3948+
if (pos != list.end()) {
3949+
MDNode* node = MDNode::get(
3950+
inst->getContext(),
3951+
ConstantAsMetadata::get(
3952+
ConstantInt::get(Type::getInt32Ty(inst->getContext()), pos->second)));
3953+
inst->setMetadata("lsc.cache.ctrl", node);
3954+
return true;
3955+
}
3956+
}
3957+
}
3958+
}
3959+
}
3960+
return false;
3961+
}
3962+
3963+
// This function may be used in earlier passes to determine whether a given
3964+
// instruction will generate an LSC message. If it returns Unknown or False, you
3965+
// should conservatively assume that you don't know what will be generated. If
3966+
// this returns True, it is guaranteed that an LSC message will result.
3967+
Tristate CShader::shouldGenerateLSCQuery(
3968+
const CodeGenContext& Ctx,
3969+
Instruction* vectorLdStInst,
3970+
SIMDMode Mode)
3971+
{
3972+
auto& Platform = Ctx.platform;
3973+
auto& DriverInfo = Ctx.m_DriverInfo;
3974+
3975+
if (!Platform.LSCEnabled(Mode)) {
3976+
// We enable LSC load/store only when program SIMD size is >= LSC's
3977+
// simd size. This is to avoid increasing register pressure and
3978+
// reduce extra moves.
3979+
// Note, that this only applies to gather/scatter;
3980+
// for blocked messages we can always enable LSC
3981+
return Tristate::False;
3982+
}
3983+
3984+
// Geneate LSC for load/store instructions as Load/store emit can
3985+
// handle full-payload uniform non-transpose LSC on PVC A0.
3986+
if (vectorLdStInst == nullptr
3987+
|| isa<LoadInst>(vectorLdStInst)
3988+
|| isa<StoreInst>(vectorLdStInst))
3989+
return Tristate::True;
3990+
// special checks for typed r/w
3991+
if (GenIntrinsicInst* inst = dyn_cast<GenIntrinsicInst>(vectorLdStInst))
3992+
{
3993+
if (inst->getIntrinsicID() == GenISAIntrinsic::GenISA_typedread ||
3994+
inst->getIntrinsicID() == GenISAIntrinsic::GenISA_typedwrite ||
3995+
inst->getIntrinsicID() == GenISAIntrinsic::GenISA_intatomictyped ||
3996+
inst->getIntrinsicID() == GenISAIntrinsic::GenISA_icmpxchgatomictyped)
3997+
{
3998+
return (Platform.hasLSCTypedMessage() ? Tristate::True : Tristate::False);
3999+
}
4000+
else if (inst->getIntrinsicID() == GenISAIntrinsic::GenISA_ldraw_indexed ||
4001+
inst->getIntrinsicID() == GenISAIntrinsic::GenISA_ldrawvector_indexed ||
4002+
inst->getIntrinsicID() == GenISAIntrinsic::GenISA_storeraw_indexed ||
4003+
inst->getIntrinsicID() == GenISAIntrinsic::GenISA_storerawvector_indexed)
4004+
{
4005+
IGC_ASSERT(Platform.isProductChildOf(IGFX_DG2));
4006+
IGC_ASSERT(Platform.hasLSC());
4007+
4008+
bool Result =
4009+
DriverInfo.EnableLSCForLdRawAndStoreRawOnDG2() ||
4010+
Platform.isCoreChildOf(IGFX_XE_HPC_CORE);
4011+
4012+
return (Result ? Tristate::True : Tristate::False);
4013+
}
4014+
}
4015+
4016+
// in PVC A0, SIMD1 reads/writes need full payloads
4017+
// this causes chaos for vISA (would need 4REG alignment)
4018+
// and to make extra moves to enable the payload
4019+
// B0 gets this feature (there is no A1)
4020+
if (!Platform.LSCSimd1NeedFullPayload()) {
4021+
return Tristate::True;
4022+
}
4023+
4024+
return Tristate::Unknown;
4025+
}
4026+
4027+
// Note that if LSCEnabled() returns true, load/store instructions must be
4028+
// generated with LSC; but some intrinsics are still generated with legacy.
4029+
bool CShader::shouldGenerateLSC(llvm::Instruction* vectorLdStInst)
4030+
{
4031+
if (vectorLdStInst && m_ctx->m_DriverInfo.SupportForceRouteAndCache())
4032+
{
4033+
// check if umd specified lsc caching mode and set the metadata if needed.
4034+
if (forceCacheCtrl(vectorLdStInst))
4035+
{
4036+
// if umd force the caching mode, also assume it wants the resource to be in lsc.
4037+
return true;
4038+
}
4039+
}
4040+
4041+
if (auto result = shouldGenerateLSCQuery(*m_ctx, vectorLdStInst, m_SIMDSize);
4042+
result != Tristate::Unknown)
4043+
return (result == Tristate::True);
4044+
4045+
// ensure both source and destination are not uniform
4046+
Value* addrs = nullptr;
4047+
if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(vectorLdStInst)) {
4048+
addrs = inst->getOperand(0); // operand 0 is always addr for loads and stores
4049+
} // else others?
4050+
4051+
// we can generate LSC only if it's not uniform (SIMD1) or A32
4052+
bool canGenerate = true;
4053+
if (addrs) {
4054+
bool isA32 = false; // TODO: see below
4055+
if (PointerType * ptrType = dyn_cast<PointerType>(addrs->getType())) {
4056+
isA32 = !IGC::isA64Ptr(ptrType, GetContext());
4057+
}
4058+
canGenerate &= isA32 || !GetSymbol(addrs)->IsUniform();
4059+
4060+
if (!isA32 && GetSymbol(addrs)->IsUniform()) {
4061+
// This is A64 and Uniform case. The LSC is not allowed.
4062+
// However, before exit check the total bytes to be stored or loaded.
4063+
if (totalBytesToStoreOrLoad(vectorLdStInst) >= 4) {
4064+
canGenerate = true;
4065+
}
4066+
}
4067+
}
4068+
return canGenerate;
4069+
} // shouldGenerateLSC
4070+
4071+
uint32_t CShader::totalBytesToStoreOrLoad(llvm::Instruction* vectorLdStInst)
4072+
{
4073+
if (dyn_cast<LoadInst>(vectorLdStInst) || dyn_cast<StoreInst>(vectorLdStInst)) {
4074+
Type* Ty = nullptr;
4075+
if (LoadInst * inst = dyn_cast<LoadInst>(vectorLdStInst)) {
4076+
Ty = inst->getType();
4077+
}
4078+
else if (StoreInst * inst = dyn_cast<StoreInst>(vectorLdStInst)) {
4079+
Value* storedVal = inst->getValueOperand();
4080+
Ty = storedVal->getType();
4081+
}
4082+
if (Ty) {
4083+
IGCLLVM::FixedVectorType* VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty);
4084+
Type* eltTy = VTy ? VTy->getElementType() : Ty;
4085+
uint32_t eltBytes = GetScalarTypeSizeInRegister(eltTy);
4086+
uint32_t elts = VTy ? int_cast<uint32_t>(VTy->getNumElements()) : 1;
4087+
return (eltBytes * elts);
4088+
}
4089+
}
4090+
return 0;
4091+
} // totalBytesToStoreOrLoad

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 4 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -23138,33 +23138,7 @@ void EmitPass::emitHDCuncompressedwrite(llvm::GenIntrinsicInst* inst)
2313823138

2313923139
bool EmitPass::forceCacheCtrl(llvm::Instruction* inst)
2314023140
{
23141-
std::map<uint32_t, uint32_t> list = m_currShader->m_ModuleMetadata->forceLscCacheList;
23142-
unsigned calleeArgNo = 0;
23143-
PushInfo& pushInfo = m_currShader->m_ModuleMetadata->pushInfo;
23144-
Value* src = IGC::TracePointerSource(inst->getOperand(0));
23145-
if (src)
23146-
{
23147-
if (Argument * calleeArg = dyn_cast<Argument>(src))
23148-
{
23149-
calleeArgNo = calleeArg->getArgNo();
23150-
for (auto index_it = pushInfo.constantReg.begin(); index_it != pushInfo.constantReg.end(); ++index_it)
23151-
{
23152-
if (index_it->second == calleeArgNo)
23153-
{
23154-
auto pos = list.find(index_it->first);
23155-
if (pos != list.end()) {
23156-
MDNode* node = MDNode::get(
23157-
inst->getContext(),
23158-
ConstantAsMetadata::get(
23159-
ConstantInt::get(Type::getInt32Ty(inst->getContext()), pos->second)));
23160-
inst->setMetadata("lsc.cache.ctrl", node);
23161-
return true;
23162-
}
23163-
}
23164-
}
23165-
}
23166-
}
23167-
return false;
23141+
return m_currShader->forceCacheCtrl(inst);
2316823142
}
2316923143

2317023144
// This function may be used in earlier passes to determine whether a given
@@ -23176,125 +23150,19 @@ Tristate EmitPass::shouldGenerateLSCQuery(
2317623150
Instruction* vectorLdStInst,
2317723151
SIMDMode Mode)
2317823152
{
23179-
auto& Platform = Ctx.platform;
23180-
auto& DriverInfo = Ctx.m_DriverInfo;
23181-
23182-
if (!Platform.LSCEnabled(Mode)) {
23183-
// We enable LSC load/store only when program SIMD size is >= LSC's
23184-
// simd size. This is to avoid increasing register pressure and
23185-
// reduce extra moves.
23186-
// Note, that this only applies to gather/scatter;
23187-
// for blocked messages we can always enable LSC
23188-
return Tristate::False;
23189-
}
23190-
23191-
// Geneate LSC for load/store instructions as Load/store emit can
23192-
// handle full-payload uniform non-transpose LSC on PVC A0.
23193-
if (vectorLdStInst == nullptr
23194-
|| isa<LoadInst>(vectorLdStInst)
23195-
|| isa<StoreInst>(vectorLdStInst))
23196-
return Tristate::True;
23197-
// special checks for typed r/w
23198-
if (GenIntrinsicInst* inst = dyn_cast<GenIntrinsicInst>(vectorLdStInst))
23199-
{
23200-
if (inst->getIntrinsicID() == GenISAIntrinsic::GenISA_typedread ||
23201-
inst->getIntrinsicID() == GenISAIntrinsic::GenISA_typedwrite ||
23202-
inst->getIntrinsicID() == GenISAIntrinsic::GenISA_intatomictyped ||
23203-
inst->getIntrinsicID() == GenISAIntrinsic::GenISA_icmpxchgatomictyped)
23204-
{
23205-
return (Platform.hasLSCTypedMessage() ? Tristate::True : Tristate::False);
23206-
}
23207-
else if (inst->getIntrinsicID() == GenISAIntrinsic::GenISA_ldraw_indexed ||
23208-
inst->getIntrinsicID() == GenISAIntrinsic::GenISA_ldrawvector_indexed ||
23209-
inst->getIntrinsicID() == GenISAIntrinsic::GenISA_storeraw_indexed ||
23210-
inst->getIntrinsicID() == GenISAIntrinsic::GenISA_storerawvector_indexed)
23211-
{
23212-
IGC_ASSERT(Platform.isProductChildOf(IGFX_DG2));
23213-
IGC_ASSERT(Platform.hasLSC());
23214-
23215-
bool Result =
23216-
DriverInfo.EnableLSCForLdRawAndStoreRawOnDG2() ||
23217-
Platform.isCoreChildOf(IGFX_XE_HPC_CORE);
23218-
23219-
return (Result ? Tristate::True : Tristate::False);
23220-
}
23221-
}
23222-
23223-
// in PVC A0, SIMD1 reads/writes need full payloads
23224-
// this causes chaos for vISA (would need 4REG alignment)
23225-
// and to make extra moves to enable the payload
23226-
// B0 gets this feature (there is no A1)
23227-
if (!Platform.LSCSimd1NeedFullPayload()) {
23228-
return Tristate::True;
23229-
}
23230-
23231-
return Tristate::Unknown;
23153+
return CShader::shouldGenerateLSCQuery(Ctx, vectorLdStInst, Mode);
2323223154
}
2323323155

2323423156
// Note that if LSCEnabled() returns true, load/store instructions must be
2323523157
// generated with LSC; but some intrinsics are still generated with legacy.
2323623158
bool EmitPass::shouldGenerateLSC(llvm::Instruction* vectorLdStInst)
2323723159
{
23238-
if (vectorLdStInst && m_pCtx->m_DriverInfo.SupportForceRouteAndCache())
23239-
{
23240-
// check if umd specified lsc caching mode and set the metadata if needed.
23241-
if (forceCacheCtrl(vectorLdStInst))
23242-
{
23243-
// if umd force the caching mode, also assume it wants the resource to be in lsc.
23244-
return true;
23245-
}
23246-
}
23247-
23248-
if (auto result = shouldGenerateLSCQuery(*m_pCtx, vectorLdStInst, m_currShader->m_SIMDSize);
23249-
result != Tristate::Unknown)
23250-
return (result == Tristate::True);
23251-
23252-
// ensure both source and destination are not uniform
23253-
Value* addrs = nullptr;
23254-
if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(vectorLdStInst)) {
23255-
addrs = inst->getOperand(0); // operand 0 is always addr for loads and stores
23256-
} // else others?
23257-
23258-
// we can generate LSC only if it's not uniform (SIMD1) or A32
23259-
bool canGenerate = true;
23260-
if (addrs) {
23261-
bool isA32 = false; // TODO: see below
23262-
if (PointerType * ptrType = dyn_cast<PointerType>(addrs->getType())) {
23263-
isA32 = !IGC::isA64Ptr(ptrType, m_currShader->GetContext());
23264-
}
23265-
canGenerate &= isA32 || !GetSymbol(addrs)->IsUniform();
23266-
23267-
if (!isA32 && GetSymbol(addrs)->IsUniform()) {
23268-
// This is A64 and Uniform case. The LSC is not allowed.
23269-
// However, before exit check the total bytes to be stored or loaded.
23270-
if (totalBytesToStoreOrLoad(vectorLdStInst) >= 4) {
23271-
canGenerate = true;
23272-
}
23273-
}
23274-
}
23275-
return canGenerate;
23160+
return m_currShader->shouldGenerateLSC(vectorLdStInst);
2327623161
} // shouldGenerateLSC
2327723162

2327823163
uint32_t EmitPass::totalBytesToStoreOrLoad(llvm::Instruction* vectorLdStInst)
2327923164
{
23280-
if (dyn_cast<LoadInst>(vectorLdStInst) || dyn_cast<StoreInst>(vectorLdStInst)) {
23281-
Type* Ty = nullptr;
23282-
if (LoadInst * inst = dyn_cast<LoadInst>(vectorLdStInst)) {
23283-
Ty = inst->getType();
23284-
}
23285-
else if (StoreInst * inst = dyn_cast<StoreInst>(vectorLdStInst)) {
23286-
Value* storedVal = inst->getValueOperand();
23287-
Ty = storedVal->getType();
23288-
}
23289-
if (Ty) {
23290-
IGCLLVM::FixedVectorType* VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty);
23291-
Type* eltTy = VTy ? VTy->getElementType() : Ty;
23292-
uint32_t eltBytes = GetScalarTypeSizeInRegister(eltTy);
23293-
uint32_t elts = VTy ? int_cast<uint32_t>(VTy->getNumElements()) : 1;
23294-
return (eltBytes * elts);
23295-
}
23296-
}
23297-
return 0;
23165+
return m_currShader->totalBytesToStoreOrLoad(vectorLdStInst);
2329823166
} // totalBytesToStoreOrLoad
2329923167

2330023168

IGC/Compiler/CISACodeGen/ShaderCodeGen.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,17 @@ class CShader
566566
bool IsIntelSymbolTableVoidProgram() const { return m_isIntelSymbolTableVoidProgram; }
567567
void SetIsIntelSymbolTableVoidProgram() { m_isIntelSymbolTableVoidProgram = true; }
568568

569+
////////////////////////////////////////////////////////////////////
570+
// NOTE: for vector load/stores instructions pass the
571+
// optional instruction argument checks additional constraints
572+
static Tristate shouldGenerateLSCQuery(
573+
const CodeGenContext& Ctx,
574+
llvm::Instruction* vectorLdStInst = nullptr,
575+
SIMDMode Mode = SIMDMode::UNKNOWN);
576+
bool shouldGenerateLSC(llvm::Instruction* vectorLdStInst = nullptr);
577+
bool forceCacheCtrl(llvm::Instruction* vectorLdStInst = nullptr);
578+
uint32_t totalBytesToStoreOrLoad(llvm::Instruction* vectorLdStInst);
579+
569580
protected:
570581
bool CompileSIMDSizeInCommon(SIMDMode simdMode);
571582
uint32_t GetShaderThreadUsageRate();

0 commit comments

Comments
 (0)