@@ -5340,7 +5340,7 @@ void EmitPass::emitLdInstruction(llvm::Instruction* inst)
5340
5340
Value* ptr = inst->getOperand(textureArgIdx);
5341
5341
ResourceDescriptor resource = GetResourceVariable(ptr);
5342
5342
uint ResourceLoopMarker = m_RLA->GetResourceLoopMarker(inst);
5343
- bool needLoop = ResourceLoopHeader(resource, flag, label, ResourceLoopMarker);
5343
+ bool needLoop = ResourceLoopHeader(dst, resource, flag, label, ResourceLoopMarker);
5344
5344
ResourceLoopSubIteration(resource, flag, label, ResourceLoopMarker);
5345
5345
5346
5346
m_encoder->SetPredicate(flag);
@@ -8364,7 +8364,7 @@ void EmitPass::emitInfoInstruction(InfoIntrinsic* inst)
8364
8364
8365
8365
uint label = 0;
8366
8366
CVariable* flag = nullptr;
8367
- bool needLoop = ResourceLoopHeader(resource, flag, label);
8367
+ bool needLoop = ResourceLoopHeader(tempDest, resource, flag, label);
8368
8368
ResourceLoopSubIteration(resource, flag, label);
8369
8369
8370
8370
if (opCode == llvm_readsurfacetypeandformat)
@@ -8487,7 +8487,7 @@ void EmitPass::emitSurfaceInfo(GenIntrinsicInst* inst)
8487
8487
}
8488
8488
uint label = 0;
8489
8489
CVariable* flag = nullptr;
8490
- bool needLoop = ResourceLoopHeader(resource, flag, label);
8490
+ bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
8491
8491
ResourceLoopSubIteration(resource, flag, label);
8492
8492
CVariable* payload = m_currShader->GetNewVariable(8, ISA_TYPE_UD, EALIGN_GRF, CName::NONE);
8493
8493
@@ -8648,7 +8648,8 @@ void EmitPass::emitGather4Instruction(SamplerGatherIntrinsic* inst)
8648
8648
bool feedbackEnable = (m_destination->GetNumberElement() / numLanes(m_currShader->m_SIMDSize) == 5) ? true : false;
8649
8649
uint label = 0;
8650
8650
CVariable* flag = nullptr;
8651
- bool needLoop = ResourceLoopHeader(resource, sampler, flag, label);
8651
+ CVariable* dest = dst ? dst : m_destination;
8652
+ bool needLoop = ResourceLoopHeader(dest, resource, sampler, flag, label);
8652
8653
ResourceLoopSubIteration(resource, sampler, flag, label);
8653
8654
m_encoder->SetPredicate(flag);
8654
8655
m_encoder->Gather4Inst(
@@ -8740,7 +8741,7 @@ void EmitPass::emitLdmsInstruction(llvm::Instruction* inst)
8740
8741
bool feedbackEnable = writeMask.isSet(4);
8741
8742
uint label = 0;
8742
8743
CVariable* flag = nullptr;
8743
- bool needLoop = ResourceLoopHeader(resource, flag, label);
8744
+ bool needLoop = ResourceLoopHeader(dst, resource, flag, label);
8744
8745
ResourceLoopSubIteration(resource, flag, label);
8745
8746
m_encoder->SetPredicate(flag);
8746
8747
m_encoder->LoadMS(opCode, writeMask.getEM(), offset, resource, numSources, dst, payload, feedbackEnable);
@@ -11239,7 +11240,7 @@ void EmitPass::emitLoad3DInner(LdRawIntrinsic* inst, ResourceDescriptor& resourc
11239
11240
{
11240
11241
uint label = 0;
11241
11242
CVariable* flag = nullptr;
11242
- bool needLoop = ResourceLoopHeader(resource, flag, label);
11243
+ bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
11243
11244
ResourceLoopSubIteration(resource, flag, label);
11244
11245
uint sizeInBits = GetPrimitiveTypeSizeInRegisterInBits(inst->getType());
11245
11246
IGC_ASSERT_MESSAGE((sizeInBits == 8) || (sizeInBits == 16) || (sizeInBits == 32) || (sizeInBits == 64) || (sizeInBits == 96) || (sizeInBits == 128),
@@ -12559,7 +12560,7 @@ void EmitPass::emitStore3DInner(Value* pllValToStore, Value* pllDstPtr, Value* p
12559
12560
12560
12561
uint label = 0;
12561
12562
CVariable* flag = nullptr;
12562
- bool needLoop = ResourceLoopHeader(resource, flag, label);
12563
+ bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
12563
12564
ResourceLoopSubIteration(resource, flag, label);
12564
12565
if (sizeInBits == 32)
12565
12566
{
@@ -16110,7 +16111,8 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst *pInst, Value *dstAddr,
16110
16111
}
16111
16112
uint label = 0;
16112
16113
CVariable* flag = nullptr;
16113
- bool needLoop = ResourceLoopHeader(resource, flag, label);
16114
+ CVariable* dest = pDst ? pDst : m_destination;
16115
+ bool needLoop = ResourceLoopHeader(dest, resource, flag, label);
16114
16116
ResourceLoopSubIteration(resource, flag, label);
16115
16117
if (shouldGenerateLSC(pInst)) {
16116
16118
auto cacheOpts = LSC_DEFAULT_CACHING;
@@ -16320,7 +16322,7 @@ void EmitPass::emitAtomicTyped(GenIntrinsicInst* pInsn)
16320
16322
16321
16323
uint label = 0;
16322
16324
CVariable* flag = nullptr;
16323
- bool needLoop = ResourceLoopHeader(resource, flag, label);
16325
+ bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
16324
16326
ResourceLoopSubIteration(resource, flag, label);
16325
16327
for (uint i = 0; i < loopIter; ++i)
16326
16328
{
@@ -16429,7 +16431,7 @@ void EmitPass::emitTypedRead(llvm::Instruction* pInsn)
16429
16431
{
16430
16432
uint label = 0;
16431
16433
CVariable* flag = nullptr;
16432
- bool needLoop = ResourceLoopHeader(resource, flag, label);
16434
+ bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
16433
16435
ResourceLoopSubIteration(resource, flag, label);
16434
16436
CVariable* tempdst[4] = { nullptr, nullptr, nullptr, nullptr };
16435
16437
SIMDMode instWidth = std::min(
@@ -16551,7 +16553,7 @@ void EmitPass::emitTypedWrite(llvm::Instruction* pInsn)
16551
16553
{
16552
16554
uint label = 0;
16553
16555
CVariable* flag = nullptr;
16554
- bool needLoop = ResourceLoopHeader(resource, flag, label);
16556
+ bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
16555
16557
ResourceLoopSubIteration(resource, flag, label);
16556
16558
uint parameterLength = 4;
16557
16559
@@ -17201,7 +17203,7 @@ void EmitPass::emitAtomicCounter(llvm::GenIntrinsicInst* pInsn)
17201
17203
17202
17204
uint label = 0;
17203
17205
CVariable* flag = nullptr;
17204
- bool needLoop = ResourceLoopHeader(resource, flag, label);
17206
+ bool needLoop = ResourceLoopHeader(dst, resource, flag, label);
17205
17207
ResourceLoopSubIteration(resource, flag, label);
17206
17208
17207
17209
uint messageDescriptor = encodeMessageDescriptorForAtomicUnaryOp(
@@ -20093,7 +20095,7 @@ void EmitPass::emitLSCTypedRead(llvm::Instruction* pInsn)
20093
20095
{
20094
20096
uint label = 0;
20095
20097
CVariable* flag = nullptr;
20096
- bool needLoop = ResourceLoopHeader(resource, flag, label);
20098
+ bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
20097
20099
ResourceLoopSubIteration(resource, flag, label);
20098
20100
CVariable* tempdst[4] = { nullptr, nullptr, nullptr, nullptr };
20099
20101
auto instWidth = m_currShader->m_Platform->getMaxLSCTypedMessageSize();
@@ -20191,7 +20193,7 @@ void EmitPass::emitLSCTypedWrite(llvm::Instruction* pInsn)
20191
20193
20192
20194
uint label = 0;
20193
20195
CVariable* flag = nullptr;
20194
- bool needLoop = ResourceLoopHeader(resource, flag, label);
20196
+ bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
20195
20197
ResourceLoopSubIteration(resource, flag, label);
20196
20198
uint parameterLength = 4;
20197
20199
@@ -20448,7 +20450,7 @@ void EmitPass::emitLSCAtomicTyped(llvm::GenIntrinsicInst* inst)
20448
20450
20449
20451
uint label = 0;
20450
20452
CVariable* flag = nullptr;
20451
- bool needLoop = ResourceLoopHeader(resource, flag, label);
20453
+ bool needLoop = ResourceLoopHeader(m_destination, resource, flag, label);
20452
20454
ResourceLoopSubIteration(resource, flag, label);
20453
20455
CVariable* tempdst[4] = { nullptr, nullptr, nullptr, nullptr };
20454
20456
auto instWidth = m_currShader->m_Platform->getMaxLSCTypedMessageSize();
@@ -21724,19 +21726,21 @@ SamplerDescriptor EmitPass::GetSamplerVariable(Value* sampleOp)
21724
21726
}
21725
21727
21726
21728
bool EmitPass::ResourceLoopHeader(
21729
+ const CVariable* destination,
21727
21730
ResourceDescriptor& resource,
21728
21731
CVariable*& flag,
21729
21732
uint& label,
21730
21733
uint ResourceLoopMarker,
21731
21734
int* subInteration)
21732
21735
{
21733
21736
SamplerDescriptor sampler;
21734
- return ResourceLoopHeader(resource, sampler, flag, label, ResourceLoopMarker, subInteration);
21737
+ return ResourceLoopHeader(destination, resource, sampler, flag, label, ResourceLoopMarker, subInteration);
21735
21738
}
21736
21739
21737
21740
// Insert loop header to handle non-uniform resource and sampler
21738
21741
// This generates sub-optimal code for SIMD32, this can be revisited if we need better code generation
21739
21742
bool EmitPass::ResourceLoopHeader(
21743
+ const CVariable* destination,
21740
21744
ResourceDescriptor& resource,
21741
21745
SamplerDescriptor& sampler,
21742
21746
CVariable*& flag,
@@ -21774,6 +21778,11 @@ bool EmitPass::ResourceLoopHeader(
21774
21778
}
21775
21779
m_currShader->IncNumSampleBallotLoops();
21776
21780
21781
+ if (destination)
21782
+ {
21783
+ m_encoder->Lifetime(LIFETIME_START, (CVariable*)destination);
21784
+ }
21785
+
21777
21786
label = m_encoder->GetNewLabelID("_opt_resource_loop");
21778
21787
m_encoder->AddDivergentResourceLoopLabel(label);
21779
21788
m_encoder->Push();
0 commit comments