@@ -943,7 +943,7 @@ bool EmitPass::runOnFunction(llvm::Function& F)
943
943
for (uint i = 0; i < m_pattern->m_numBlocks; i++)
944
944
{
945
945
SBasicBlock& block = m_pattern->m_blocks[i];
946
- block.clearCaching() ; // clear for each SIMD size
946
+ block.m_activeMask = nullptr ; // clear for each SIMD size
947
947
m_currentBlock = i;
948
948
if (m_blockCoalescing->IsEmptyBlock(block.bb))
949
949
{
@@ -975,8 +975,6 @@ bool EmitPass::runOnFunction(llvm::Function& F)
975
975
while (I != E)
976
976
{
977
977
Instruction* llvmInst = I->m_root;
978
- resetCurrInstNumInstances();
979
-
980
978
if (llvmInst->getDebugLoc())
981
979
{
982
980
unsigned int curLineNumber = llvmInst->getDebugLoc().getLine();
@@ -1006,8 +1004,6 @@ bool EmitPass::runOnFunction(llvm::Function& F)
1006
1004
bool slicing = false;
1007
1005
uint numInstance = DecideInstanceAndSlice(*block.bb, *I, slicing);
1008
1006
IGC_ASSERT(numInstance == 1 || numInstance == 2);
1009
- // caching the number of instance
1010
- setCurrInstNumInstances(numInstance);
1011
1007
1012
1008
if (slicing && !disableSlicing)
1013
1009
{
@@ -1037,7 +1033,6 @@ bool EmitPass::runOnFunction(llvm::Function& F)
1037
1033
if (slicing)
1038
1034
{
1039
1035
numInstance = DecideInstanceAndSlice(*block.bb, *I, slicing);
1040
- setCurrInstNumInstances(numInstance);
1041
1036
}
1042
1037
1043
1038
if (llvmtoVISADump)
@@ -12332,93 +12327,32 @@ void EmitPass::emitScalarAtomics(
12332
12327
uniformAtomicOp = EATOMIC_IADD;
12333
12328
}
12334
12329
bool returnsImmValue = (!pInst->use_empty());
12335
- CVariable* pFinalAtomicSrcVal;
12330
+ CVariable* pFinalAtomicSrcVal = m_currShader->GetNewVariable(
12331
+ 1,
12332
+ type,
12333
+ isA64 ? EALIGN_2GRF : EALIGN_GRF,
12334
+ true,
12335
+ CName::NONE);
12336
12336
CVariable* pSrcsArr[2] = { nullptr, nullptr };
12337
-
12338
- if (op == EOPCODE_ADD && bitWidth == 32 && pSrc->IsUniform() &&
12339
- getCurrInstNumInstances() == 1 && !returnsImmValue)
12337
+ if (returnsImmValue)
12340
12338
{
12341
- // Special case for uniform DW src (like atomic_add(1) without return value.
12342
- // Note: limit this code for a single instance for now as scalar atomic must have
12343
- // instance = 1 (see DecideInstanceAndSlice()).
12344
- //
12345
- // The following sequence will be generated:
12346
- // (W) mov (16|M0) f0.0<1>:uw 0:uw
12347
- // cmp.eq.f0.0 (16|M0) dummy:uw dummy:uw
12348
- // (W) mov (1|M0) r2.0<1>:uw f0.0:uw
12349
- // (W) cbit (1|M0) r1.0:uw r2.0:uw <-- r1.0 : number of active lanes
12350
- // (W) mul (1|M0) r10:ud pSrc r1.0:uw
12351
- SBasicBlock& currBlk = getCurrentBlock();
12352
- CVariable* numActiveLanes = currBlk.m_numActiveLanes;
12353
- if (numActiveLanes == nullptr)
12354
- {
12355
- CVariable* emask = GetExecutionMask(); // execution mask for the entire dispatch size
12356
- // Count the number of '1' bits we have in the execmask to get the number of active lanes.
12357
- // For example, given emask = 1011011000100010b, numActiveLanes = 7
12358
- // This will handle cases in which not all lanes are active.
12359
- numActiveLanes = m_currShader->GetNewVariable(1, ISA_TYPE_W, EALIGN_DWORD, true, CName::NONE);
12360
- m_encoder->CBit(numActiveLanes, emask);
12361
- m_encoder->Push();
12362
-
12363
- // save it for possible re-use later.
12364
- currBlk.m_numActiveLanes = numActiveLanes;
12365
- }
12339
+ // sum all the lanes
12340
+ emitPreOrPostFixOp(op, identityValue, type, negateSrc, pSrc, pSrcsArr);
12366
12341
12367
- // pFinalAtomicSrcVal is used in msg's payload and thus needs to be GRF-aligned
12368
- pFinalAtomicSrcVal = m_currShader->GetNewVariable(1, ISA_TYPE_D, EALIGN_GRF, true, CName::NONE);
12369
- if (pSrc->IsImmediate() && pSrc->GetImmediateValue() == 1)
12342
+ CVariable* pSrcCopy = pSrcsArr[0];
12343
+ if (m_currShader->m_numberInstance == 2)
12370
12344
{
12371
- if (negateSrc)
12372
- {
12373
- m_encoder->SetSrcModifier(0, EMOD_NEG);
12374
- }
12375
- m_encoder->Cast(pFinalAtomicSrcVal, numActiveLanes);
12376
- m_encoder->Push();
12345
+ pSrcCopy = pSrcsArr[1];
12377
12346
}
12378
- else
12379
- {
12380
- m_encoder->Mul(pFinalAtomicSrcVal, pSrc, numActiveLanes);
12381
- m_encoder->Push();
12382
12347
12383
- // using neg srcmod with mul will end up with more insts, thus using srcmod on mov
12384
- if (negateSrc)
12385
- {
12386
- m_encoder->SetSrcModifier(0, EMOD_NEG);
12387
- }
12388
- m_encoder->Copy(pFinalAtomicSrcVal, pFinalAtomicSrcVal);
12389
- m_encoder->Push();
12390
- }
12348
+ m_encoder->SetSrcRegion(0, 0, 1, 0);
12349
+ m_encoder->SetSrcSubReg(0, numLanes(m_currShader->m_SIMDSize) - 1);
12350
+ m_encoder->Copy(pFinalAtomicSrcVal, pSrcCopy);
12351
+ m_encoder->Push();
12391
12352
}
12392
12353
else
12393
12354
{
12394
- // general case
12395
- pFinalAtomicSrcVal = m_currShader->GetNewVariable(
12396
- 1,
12397
- type,
12398
- isA64 ? EALIGN_2GRF : EALIGN_GRF,
12399
- true,
12400
- CName::NONE);
12401
-
12402
- if (returnsImmValue)
12403
- {
12404
- // sum all the lanes
12405
- emitPreOrPostFixOp(op, identityValue, type, negateSrc, pSrc, pSrcsArr);
12406
-
12407
- CVariable* pSrcCopy = pSrcsArr[0];
12408
- if (m_currShader->m_numberInstance == 2)
12409
- {
12410
- pSrcCopy = pSrcsArr[1];
12411
- }
12412
-
12413
- m_encoder->SetSrcRegion(0, 0, 1, 0);
12414
- m_encoder->SetSrcSubReg(0, numLanes(m_currShader->m_SIMDSize) - 1);
12415
- m_encoder->Copy(pFinalAtomicSrcVal, pSrcCopy);
12416
- m_encoder->Push();
12417
- }
12418
- else
12419
- {
12420
- emitReductionAll(op, identityValue, type, negateSrc, pSrc, pFinalAtomicSrcVal);
12421
- }
12355
+ emitReductionAll(op, identityValue, type, negateSrc, pSrc, pFinalAtomicSrcVal);
12422
12356
}
12423
12357
12424
12358
auto moveToReg = [&](CVariable*& pVar)
@@ -12454,6 +12388,11 @@ void EmitPass::emitScalarAtomics(
12454
12388
m_encoder->SetSimdSize(SIMDMode::SIMD1);
12455
12389
m_encoder->SetNoMask();
12456
12390
12391
+ CVariable* pReturnVal = returnsImmValue ?
12392
+ m_currShader->GetNewVariable(
12393
+ 1, ISA_TYPE_UD, EALIGN_GRF, true, CName::NONE) :
12394
+ nullptr;
12395
+
12457
12396
if (bitWidth == 16)
12458
12397
{
12459
12398
CVariable* pCastAtomicSrcVal =
@@ -12463,11 +12402,6 @@ void EmitPass::emitScalarAtomics(
12463
12402
pFinalAtomicSrcVal = pCastAtomicSrcVal;
12464
12403
}
12465
12404
12466
- CVariable* pReturnVal = returnsImmValue ?
12467
- m_currShader->GetNewVariable(
12468
- 1, ISA_TYPE_UD, EALIGN_GRF, true, CName::NONE) :
12469
- nullptr;
12470
-
12471
12405
if (shouldGenerateLSC(pInst))
12472
12406
{
12473
12407
m_encoder->LSC_AtomicRaw(
0 commit comments