@@ -23138,33 +23138,7 @@ void EmitPass::emitHDCuncompressedwrite(llvm::GenIntrinsicInst* inst)
23138
23138
23139
23139
bool EmitPass::forceCacheCtrl(llvm::Instruction* inst)
23140
23140
{
23141
- std::map<uint32_t, uint32_t> list = m_currShader->m_ModuleMetadata->forceLscCacheList;
23142
- unsigned calleeArgNo = 0;
23143
- PushInfo& pushInfo = m_currShader->m_ModuleMetadata->pushInfo;
23144
- Value* src = IGC::TracePointerSource(inst->getOperand(0));
23145
- if (src)
23146
- {
23147
- if (Argument * calleeArg = dyn_cast<Argument>(src))
23148
- {
23149
- calleeArgNo = calleeArg->getArgNo();
23150
- for (auto index_it = pushInfo.constantReg.begin(); index_it != pushInfo.constantReg.end(); ++index_it)
23151
- {
23152
- if (index_it->second == calleeArgNo)
23153
- {
23154
- auto pos = list.find(index_it->first);
23155
- if (pos != list.end()) {
23156
- MDNode* node = MDNode::get(
23157
- inst->getContext(),
23158
- ConstantAsMetadata::get(
23159
- ConstantInt::get(Type::getInt32Ty(inst->getContext()), pos->second)));
23160
- inst->setMetadata("lsc.cache.ctrl", node);
23161
- return true;
23162
- }
23163
- }
23164
- }
23165
- }
23166
- }
23167
- return false;
23141
+ return m_currShader->forceCacheCtrl(inst);
23168
23142
}
23169
23143
23170
23144
// This function may be used in earlier passes to determine whether a given
@@ -23176,125 +23150,19 @@ Tristate EmitPass::shouldGenerateLSCQuery(
23176
23150
Instruction* vectorLdStInst,
23177
23151
SIMDMode Mode)
23178
23152
{
23179
- auto& Platform = Ctx.platform;
23180
- auto& DriverInfo = Ctx.m_DriverInfo;
23181
-
23182
- if (!Platform.LSCEnabled(Mode)) {
23183
- // We enable LSC load/store only when program SIMD size is >= LSC's
23184
- // simd size. This is to avoid increasing register pressure and
23185
- // reduce extra moves.
23186
- // Note, that this only applies to gather/scatter;
23187
- // for blocked messages we can always enable LSC
23188
- return Tristate::False;
23189
- }
23190
-
23191
- // Geneate LSC for load/store instructions as Load/store emit can
23192
- // handle full-payload uniform non-transpose LSC on PVC A0.
23193
- if (vectorLdStInst == nullptr
23194
- || isa<LoadInst>(vectorLdStInst)
23195
- || isa<StoreInst>(vectorLdStInst))
23196
- return Tristate::True;
23197
- // special checks for typed r/w
23198
- if (GenIntrinsicInst* inst = dyn_cast<GenIntrinsicInst>(vectorLdStInst))
23199
- {
23200
- if (inst->getIntrinsicID() == GenISAIntrinsic::GenISA_typedread ||
23201
- inst->getIntrinsicID() == GenISAIntrinsic::GenISA_typedwrite ||
23202
- inst->getIntrinsicID() == GenISAIntrinsic::GenISA_intatomictyped ||
23203
- inst->getIntrinsicID() == GenISAIntrinsic::GenISA_icmpxchgatomictyped)
23204
- {
23205
- return (Platform.hasLSCTypedMessage() ? Tristate::True : Tristate::False);
23206
- }
23207
- else if (inst->getIntrinsicID() == GenISAIntrinsic::GenISA_ldraw_indexed ||
23208
- inst->getIntrinsicID() == GenISAIntrinsic::GenISA_ldrawvector_indexed ||
23209
- inst->getIntrinsicID() == GenISAIntrinsic::GenISA_storeraw_indexed ||
23210
- inst->getIntrinsicID() == GenISAIntrinsic::GenISA_storerawvector_indexed)
23211
- {
23212
- IGC_ASSERT(Platform.isProductChildOf(IGFX_DG2));
23213
- IGC_ASSERT(Platform.hasLSC());
23214
-
23215
- bool Result =
23216
- DriverInfo.EnableLSCForLdRawAndStoreRawOnDG2() ||
23217
- Platform.isCoreChildOf(IGFX_XE_HPC_CORE);
23218
-
23219
- return (Result ? Tristate::True : Tristate::False);
23220
- }
23221
- }
23222
-
23223
- // in PVC A0, SIMD1 reads/writes need full payloads
23224
- // this causes chaos for vISA (would need 4REG alignment)
23225
- // and to make extra moves to enable the payload
23226
- // B0 gets this feature (there is no A1)
23227
- if (!Platform.LSCSimd1NeedFullPayload()) {
23228
- return Tristate::True;
23229
- }
23230
-
23231
- return Tristate::Unknown;
23153
+ return CShader::shouldGenerateLSCQuery(Ctx, vectorLdStInst, Mode);
23232
23154
}
23233
23155
23234
23156
// Note that if LSCEnabled() returns true, load/store instructions must be
23235
23157
// generated with LSC; but some intrinsics are still generated with legacy.
23236
23158
bool EmitPass::shouldGenerateLSC(llvm::Instruction* vectorLdStInst)
23237
23159
{
23238
- if (vectorLdStInst && m_pCtx->m_DriverInfo.SupportForceRouteAndCache())
23239
- {
23240
- // check if umd specified lsc caching mode and set the metadata if needed.
23241
- if (forceCacheCtrl(vectorLdStInst))
23242
- {
23243
- // if umd force the caching mode, also assume it wants the resource to be in lsc.
23244
- return true;
23245
- }
23246
- }
23247
-
23248
- if (auto result = shouldGenerateLSCQuery(*m_pCtx, vectorLdStInst, m_currShader->m_SIMDSize);
23249
- result != Tristate::Unknown)
23250
- return (result == Tristate::True);
23251
-
23252
- // ensure both source and destination are not uniform
23253
- Value* addrs = nullptr;
23254
- if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(vectorLdStInst)) {
23255
- addrs = inst->getOperand(0); // operand 0 is always addr for loads and stores
23256
- } // else others?
23257
-
23258
- // we can generate LSC only if it's not uniform (SIMD1) or A32
23259
- bool canGenerate = true;
23260
- if (addrs) {
23261
- bool isA32 = false; // TODO: see below
23262
- if (PointerType * ptrType = dyn_cast<PointerType>(addrs->getType())) {
23263
- isA32 = !IGC::isA64Ptr(ptrType, m_currShader->GetContext());
23264
- }
23265
- canGenerate &= isA32 || !GetSymbol(addrs)->IsUniform();
23266
-
23267
- if (!isA32 && GetSymbol(addrs)->IsUniform()) {
23268
- // This is A64 and Uniform case. The LSC is not allowed.
23269
- // However, before exit check the total bytes to be stored or loaded.
23270
- if (totalBytesToStoreOrLoad(vectorLdStInst) >= 4) {
23271
- canGenerate = true;
23272
- }
23273
- }
23274
- }
23275
- return canGenerate;
23160
+ return m_currShader->shouldGenerateLSC(vectorLdStInst);
23276
23161
} // shouldGenerateLSC
23277
23162
23278
23163
uint32_t EmitPass::totalBytesToStoreOrLoad(llvm::Instruction* vectorLdStInst)
23279
23164
{
23280
- if (dyn_cast<LoadInst>(vectorLdStInst) || dyn_cast<StoreInst>(vectorLdStInst)) {
23281
- Type* Ty = nullptr;
23282
- if (LoadInst * inst = dyn_cast<LoadInst>(vectorLdStInst)) {
23283
- Ty = inst->getType();
23284
- }
23285
- else if (StoreInst * inst = dyn_cast<StoreInst>(vectorLdStInst)) {
23286
- Value* storedVal = inst->getValueOperand();
23287
- Ty = storedVal->getType();
23288
- }
23289
- if (Ty) {
23290
- IGCLLVM::FixedVectorType* VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty);
23291
- Type* eltTy = VTy ? VTy->getElementType() : Ty;
23292
- uint32_t eltBytes = GetScalarTypeSizeInRegister(eltTy);
23293
- uint32_t elts = VTy ? int_cast<uint32_t>(VTy->getNumElements()) : 1;
23294
- return (eltBytes * elts);
23295
- }
23296
- }
23297
- return 0;
23165
+ return m_currShader->totalBytesToStoreOrLoad(vectorLdStInst);
23298
23166
} // totalBytesToStoreOrLoad
23299
23167
23300
23168
0 commit comments