Skip to content

Commit 15a4203

Browse files
PawelJurekigcbot
authored andcommitted
Minor refactoring
Minor refactoring around atomic instruction emit functions.
1 parent 406034d commit 15a4203

File tree

5 files changed

+107
-103
lines changed

5 files changed

+107
-103
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8647,6 +8647,7 @@ namespace IGC
86478647
ResourceDescriptor* resource,
86488648
LSC_ADDR_SIZE addr_size,
86498649
int immOff,
8650+
int immScale,
86508651
LSC_CACHE_OPTS cacheOpts)
86518652
{
86528653
// There is no need to change the order of arguments for EATOMIC_CMPXCHG, EATOMIC_FCMPWR anymore.
@@ -8671,7 +8672,7 @@ namespace IGC
86718672
}
86728673
}
86738674

8674-
addr.immScale = 1;
8675+
addr.immScale = immScale;
86758676
addr.immOffset = immOff;
86768677
addr.size = addr_size;
86778678

IGC/Compiler/CISACodeGen/CISABuilder.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -285,12 +285,12 @@ namespace IGC
285285
ResourceDescriptor * resource,
286286
LSC_ADDR_SIZE addrSize, int addrImmOffset,
287287
LSC_CACHE_OPTS cacheOpts);
288-
void LSC_AtomicRaw(
289-
AtomicOp atomic_op, CVariable * dst, CVariable * offset,
290-
CVariable * src0, CVariable * src1,
291-
unsigned short bitwidth, ResourceDescriptor * resource,
292-
LSC_ADDR_SIZE addr_size,
293-
int immOff, LSC_CACHE_OPTS cacheOpts);
288+
void LSC_AtomicRaw(AtomicOp atomic_op, CVariable *dst,
289+
CVariable *offset, CVariable *src0, CVariable *src1,
290+
unsigned short bitwidth,
291+
ResourceDescriptor *resource,
292+
LSC_ADDR_SIZE addr_size, int immOff, int immScale,
293+
LSC_CACHE_OPTS cacheOpts);
294294
void LSC_Fence(LSC_SFID sfid, LSC_SCOPE scope, LSC_FENCE_OP op);
295295
void LSC_2DBlockMessage(
296296
LSC_OP subOp, ResourceDescriptor* resource,

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 76 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -8225,7 +8225,7 @@ void EmitPass::EmitGenIntrinsicMessage(llvm::GenIntrinsicInst* inst)
82258225
case GenISAIntrinsic::GenISA_fcmpxchgatomicraw:
82268226
case GenISAIntrinsic::GenISA_icmpxchgatomicrawA64:
82278227
case GenISAIntrinsic::GenISA_fcmpxchgatomicrawA64:
8228-
emitAtomicRaw(inst);
8228+
emitAtomicRaw(inst, inst->getOperand(1));
82298229
break;
82308230
case GenISAIntrinsic::GenISA_intatomictyped:
82318231
case GenISAIntrinsic::GenISA_icmpxchgatomictyped:
@@ -13686,18 +13686,13 @@ add happens with destination address as <addr> = constant. <src> = constant too.
1368613686
say for SIMD8 there are 8 lanes trying to write to the same address. H/W will serialize this to
1368713687
8 back to back atomic instructions which are extremely slow to execute.
1368813688
*/
13689-
void EmitPass::emitScalarAtomics(
13690-
llvm::Instruction* pInst,
13691-
ResourceDescriptor& resource,
13692-
AtomicOp atomic_op,
13693-
CVariable* pDstAddr,
13694-
CVariable* pU,
13695-
CVariable* pV,
13696-
CVariable* pR,
13697-
CVariable* pSrc,
13698-
bool isA64,
13699-
int bitWidth)
13700-
{
13689+
void EmitPass::emitScalarAtomics(llvm::Instruction *pInst,
13690+
ResourceDescriptor &resource,
13691+
AtomicOp atomic_op,
13692+
CVariable *pDstAddr, CVariable *pU,
13693+
CVariable *pV, CVariable *pR, CVariable *pSrc,
13694+
bool isA64, int bitWidth, int immOffset,
13695+
int immScale, LSC_ADDR_SIZE addrSize) {
1370113696
e_opcode op = EOPCODE_ADD;
1370213697
// find the value for which opcode(x, identity) == x
1370313698
unsigned int identityValue = 0;
@@ -14022,7 +14017,10 @@ void EmitPass::emitScalarAtomicLoad(
1402214017
CVariable* pR,
1402314018
CVariable* pSrc,
1402414019
bool isA64,
14025-
int bitWidth)
14020+
int bitWidth,
14021+
int immOffset,
14022+
int immScale,
14023+
LSC_ADDR_SIZE addrSize)
1402614024
{
1402714025
auto moveToReg = [&](CVariable*& pVar)
1402814026
{
@@ -14070,6 +14068,7 @@ void EmitPass::emitScalarAtomicLoad(
1407014068
true,
1407114069
pDstAddr ? pDstAddr->getName() : CName::NONE) : nullptr;
1407214070
{
14071+
1407314072
if (isA64)
1407414073
{
1407514074
m_encoder->AtomicRawA64(
@@ -14259,20 +14258,22 @@ CVariable* EmitPass::UnpackOrBroadcastIfUniform(CVariable* pVar)
1425914258
return pUnpacked;
1426014259
}
1426114260

14262-
void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst* pInsn)
14261+
void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst *pInst, Value *dstAddr,
14262+
ConstantInt *immOffset, ConstantInt *immScale
14263+
)
1426314264
{
1426414265
ForceDMask();
1426514266
// Currently, Dword Atomics can be called by matching 2 intrinsics. One is the DwordAtomicRaw
1426614267
// and AtomicCmpXchg (which has 2 srcs unlike the other atomics).
14267-
IGC_ASSERT(IGCLLVM::getNumArgOperands(pInsn) == 4);
14268+
IGC_ASSERT(IGCLLVM::getNumArgOperands(pInst) == 4);
1426814269

1426914270
/// Immediate Atomics return the value before the atomic operation is performed. So that flag
1427014271
/// needs to be set for this.
14271-
bool returnsImmValue = !pInsn->use_empty();
14272+
bool returnsImmValue = !pInst->use_empty();
1427214273

14273-
llvm::Value* pllbuffer = pInsn->getOperand(0);
14274-
llvm::Value* pllDstAddr = pInsn->getOperand(1);
14275-
llvm::Value* pllSrc0 = pInsn->getOperand(2);
14274+
llvm::Value* pllbuffer = pInst->getOperand(0);
14275+
if (!dstAddr) dstAddr = pInst->getOperand(1);
14276+
llvm::Value* pllSrc0 = pInst->getOperand(2);
1427614277
ResourceDescriptor resource = GetResourceVariable(pllbuffer);
1427714278
CountStatelessIndirectAccess(pllbuffer, resource);
1427814279
AtomicOp atomic_op = EATOMIC_UNDEF;
@@ -14284,18 +14285,18 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst* pInsn)
1428414285

1428514286
CVariable* pSrc0 = nullptr;
1428614287
CVariable* pSrc1 = nullptr;
14287-
llvm::GenIntrinsicInst* pIntrinCall = llvm::cast<llvm::GenIntrinsicInst>(pInsn);
14288+
llvm::GenIntrinsicInst* pIntrinCall = llvm::cast<llvm::GenIntrinsicInst>(pInst);
1428814289
GenISAIntrinsic::ID IID = pIntrinCall->getIntrinsicID();
1428914290
if (IID == GenISAIntrinsic::GenISA_icmpxchgatomicraw ||
1429014291
IID == GenISAIntrinsic::GenISA_fcmpxchgatomicraw ||
1429114292
IID == GenISAIntrinsic::GenISA_icmpxchgatomicrawA64 ||
1429214293
IID == GenISAIntrinsic::GenISA_fcmpxchgatomicrawA64)
1429314294
{
14294-
llvm::Value* pllSrc1 = pInsn->getOperand(3);
14295+
llvm::Value* pllSrc1 = pInst->getOperand(3);
1429514296
pSrc1 = GetSymbol(pllSrc1);
1429614297

14297-
Function* F = pInsn->getParent()->getParent();
14298-
if (F->hasFnAttribute("KMPLOCK") && m_currShader->GetIsUniform(pInsn))
14298+
Function* F = pInst->getParent()->getParent();
14299+
if (F->hasFnAttribute("KMPLOCK") && m_currShader->GetIsUniform(pInst))
1429914300
{
1430014301
m_encoder->SetSimdSize(SIMDMode::SIMD1);
1430114302
m_encoder->SetNoMask();
@@ -14314,11 +14315,11 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst* pInsn)
1431414315
}
1431514316
else
1431614317
{
14317-
atomic_op = static_cast<AtomicOp>(llvm::cast<llvm::ConstantInt>(pInsn->getOperand(3))->getZExtValue());
14318+
atomic_op = static_cast<AtomicOp>(llvm::cast<llvm::ConstantInt>(pInst->getOperand(3))->getZExtValue());
1431814319
}
1431914320

1432014321

14321-
unsigned short bitwidth = pInsn->getType()->getScalarSizeInBits();
14322+
unsigned short bitwidth = pInst->getType()->getScalarSizeInBits();
1432214323
const bool is16Bit = (bitwidth == 16);
1432314324

1432414325
if (is16Bit)
@@ -14335,38 +14336,52 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst* pInsn)
1433514336
}
1433614337

1433714338
// Dst address in bytes.
14338-
CVariable* pDstAddr = GetSymbol(pllDstAddr);
14339+
CVariable* pDstAddr = GetSymbol(dstAddr);
14340+
14341+
14342+
PointerType *PtrTy = dyn_cast<PointerType>(dstAddr->getType());
14343+
bool isA64 = PtrTy && isA64Ptr(PtrTy, m_currShader->GetContext());
14344+
LSC_ADDR_SIZE addrSize = isA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b;
14345+
14346+
const int immOffsetVal =
14347+
immOffset ? static_cast<int>(immOffset->getSExtValue()) : 0;
14348+
const int immScaleVal =
14349+
immScale ? static_cast<int>(immScale->getSExtValue()) : 1;
14350+
1433914351
// If DisableScalarAtomics regkey is enabled or DisableIGCOptimizations regkey is enabled then
1434014352
// don't enable scalar atomics
14341-
if (IsUniformAtomic(pInsn))
14353+
if (IsUniformAtomic(pInst))
1434214354
{
14343-
PointerType* PtrTy = dyn_cast<PointerType>(pllDstAddr->getType());
14344-
bool isA64 = PtrTy && isA64Ptr(PtrTy, m_currShader->GetContext());
1434514355
e_alignment uniformAlign = isA64 ? EALIGN_2GRF : EALIGN_GRF;
1434614356
// Re-align the pointer if it's not GRF aligned.
1434714357
pDstAddr = ReAlignUniformVariable(pDstAddr, uniformAlign);
14348-
if (atomic_op == EATOMIC_OR && OrWith0Atomic(pInsn, 2))
14358+
if (atomic_op == EATOMIC_OR && OrWith0Atomic(pInst, 2))
1434914359
{
1435014360
// special case of atomic_load
14351-
emitScalarAtomicLoad(pInsn, resource, pDstAddr, nullptr /*u*/, nullptr /*v*/, nullptr /*r*/, pSrc0, isA64, bitwidth);
14361+
emitScalarAtomicLoad(pInst, resource,
14362+
pDstAddr, nullptr /*u*/, nullptr /*v*/,
14363+
nullptr /*r*/, pSrc0, isA64, bitwidth,
14364+
immOffsetVal, immScaleVal, addrSize);
1435214365
}
14353-
else
14354-
{
14355-
emitScalarAtomics(pInsn, resource, atomic_op, pDstAddr, nullptr /*u*/, nullptr /*v*/, nullptr /*r*/, pSrc0, isA64, bitwidth);
14366+
else {
14367+
emitScalarAtomics(pInst, resource, atomic_op,
14368+
pDstAddr, nullptr /*u*/, nullptr /*v*/,
14369+
nullptr /*r*/, pSrc0, isA64, bitwidth,
14370+
immOffsetVal, immScaleVal, addrSize);
1435614371
ResetVMask();
1435714372
}
1435814373
return;
1435914374
}
1436014375

14361-
Function* F = pInsn->getParent()->getParent();
14362-
if (F->hasFnAttribute("KMPLOCK") && m_currShader->GetIsUniform(pInsn))
14376+
Function* F = pInst->getParent()->getParent();
14377+
if (F->hasFnAttribute("KMPLOCK") && m_currShader->GetIsUniform(pInst))
1436314378
{
1436414379
m_encoder->SetSimdSize(SIMDMode::SIMD1);
1436514380
m_encoder->SetNoMask();
1436614381
}
1436714382
pDstAddr = BroadcastIfUniform(pDstAddr);
1436814383

14369-
if (F->hasFnAttribute("KMPLOCK") && m_currShader->GetIsUniform(pInsn))
14384+
if (F->hasFnAttribute("KMPLOCK") && m_currShader->GetIsUniform(pInst))
1437014385
{
1437114386
m_encoder->SetSimdSize(SIMDMode::SIMD1);
1437214387
m_encoder->SetNoMask();
@@ -14376,7 +14391,7 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst* pInsn)
1437614391
pSrc0 = UnpackOrBroadcastIfUniform(pSrc0);
1437714392
}
1437814393

14379-
if (F->hasFnAttribute("KMPLOCK") && m_currShader->GetIsUniform(pInsn))
14394+
if (F->hasFnAttribute("KMPLOCK") && m_currShader->GetIsUniform(pInst))
1438014395
{
1438114396
m_encoder->SetSimdSize(SIMDMode::SIMD1);
1438214397
m_encoder->SetNoMask();
@@ -14390,9 +14405,6 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst* pInsn)
1439014405
EALIGN_GRF, CName::NONE) :
1439114406
nullptr;
1439214407

14393-
PointerType* PtrTy = dyn_cast<PointerType>(pllDstAddr->getType());
14394-
bool isA64 = PtrTy && isA64Ptr(PtrTy, m_currShader->GetContext());
14395-
LSC_ADDR_SIZE addrSize = isA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b;
1439614408
bool extendPointer = (bitwidth == 64 && !isA64);
1439714409
// DG2 onward with LSC we do not have to extend an A32 pointer to an
1439814410
// A64 pointer for 64bit atomics
@@ -14410,17 +14422,11 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst* pInsn)
1441014422
}
1441114423
else
1441214424
{
14413-
if (shouldGenerateLSC())
14414-
{
14415-
m_encoder->LSC_AtomicRaw(
14416-
atomic_op,
14417-
pDst, pDstAddr,
14418-
pSrc0, pSrc1,
14419-
bitwidth,
14420-
&resource,
14421-
addrSize,
14422-
0,
14423-
LSC_DEFAULT_CACHING);
14425+
if (shouldGenerateLSC()) {
14426+
m_encoder->LSC_AtomicRaw(atomic_op, pDst,
14427+
pDstAddr, pSrc0, pSrc1, bitwidth,
14428+
&resource, addrSize, immOffsetVal,
14429+
immScaleVal, LSC_DEFAULT_CACHING);
1442414430
}
1442514431
else
1442614432
{
@@ -14466,19 +14472,15 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst* pInsn)
1446614472
uint label = 0;
1446714473
CVariable* flag = nullptr;
1446814474
bool needLoop = ResourceLoopHeader(resource, flag, label);
14469-
if (shouldGenerateLSC(pInsn))
14470-
{
14471-
m_encoder->LSC_AtomicRaw(
14472-
atomic_op,
14473-
pDst, pDstAddr,
14474-
pSrc0, pSrc1,
14475-
bitwidth,
14476-
&resource, addrSize,
14477-
0,
14478-
LSC_DEFAULT_CACHING);
14475+
if (shouldGenerateLSC(pInst)) {
14476+
m_encoder->LSC_AtomicRaw(atomic_op, pDst,
14477+
pDstAddr, pSrc0, pSrc1, bitwidth,
14478+
&resource, addrSize, immOffsetVal,
14479+
immScaleVal, LSC_DEFAULT_CACHING);
1447914480
}
1448014481
else
1448114482
{
14483+
IGC_ASSERT_MESSAGE(!immScale && !immOffset, "Scale and offset not supported on non-LSC path!");
1448214484
m_encoder->DwordAtomicRaw(
1448314485
atomic_op,
1448414486
resource,
@@ -14561,11 +14563,17 @@ void EmitPass::emitAtomicTyped(GenIntrinsicInst* pInsn)
1456114563
if (atomic_op == EATOMIC_OR && OrWith0Atomic(pInsn, 4))
1456214564
{
1456314565
// special case of atomic_load
14564-
emitScalarAtomicLoad(pInsn, resource, nullptr /*pDstAddr*/, pU, pV, pR, pSrc0, false /*isA64*/, bitwidth);
14566+
emitScalarAtomicLoad(pInsn, resource,
14567+
nullptr /*pDstAddr*/, pU, pV, pR, pSrc0,
14568+
false /*isA64*/, bitwidth, 0, 1,
14569+
LSC_ADDR_SIZE_32b);
1456514570
}
1456614571
else
1456714572
{
14568-
emitScalarAtomics(pInsn, resource, atomic_op, nullptr /*pDstAddr*/, pU, pV, pR, pSrc0, false /*isA64*/, bitwidth);
14573+
emitScalarAtomics(pInsn, resource, atomic_op,
14574+
nullptr /*pDstAddr*/, pU, pV, pR, pSrc0,
14575+
false /*isA64*/, bitwidth, 0, 1,
14576+
LSC_ADDR_SIZE_32b);
1456914577
}
1457014578
}
1457114579
else
@@ -21572,11 +21580,9 @@ void EmitPass::emitLSCAtomic(llvm::GenIntrinsicInst* inst)
2157221580

2157321581
auto cacheOpts = translateLSCCacheControlsFromValue(inst->getOperand(5), false);
2157421582

21575-
m_encoder->LSC_AtomicRaw(
21576-
atomicOp, pOldValue, pDstAddr, pAtomicVal,
21577-
pAtomicCmp, bitwidth, &resource,
21578-
addrSize, immOff,
21579-
cacheOpts);
21583+
m_encoder->LSC_AtomicRaw(atomicOp, pOldValue,
21584+
pDstAddr, pAtomicVal, pAtomicCmp, bitwidth,
21585+
&resource, addrSize, immOff, 1, cacheOpts);
2158021586
m_encoder->Push();
2158121587
}
2158221588

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -262,28 +262,19 @@ class EmitPass : public llvm::FunctionPass
262262

263263
void emitUAVSerialize();
264264

265-
void emitScalarAtomics(
266-
llvm::Instruction* pInst,
267-
ResourceDescriptor& resource,
268-
AtomicOp atomic_op,
269-
CVariable* pDstAddr,
270-
CVariable* pU,
271-
CVariable* pV,
272-
CVariable* pR,
273-
CVariable* pSrc,
274-
bool isA64,
275-
int bitSize);
276-
277-
void emitScalarAtomicLoad(
278-
llvm::Instruction* pInst,
279-
ResourceDescriptor& resource,
280-
CVariable* pDstAddr,
281-
CVariable* pU,
282-
CVariable* pV,
283-
CVariable* pR,
284-
CVariable* pSrc,
285-
bool isA64,
286-
int bitSize);
265+
void emitScalarAtomics(llvm::Instruction *pInst,
266+
ResourceDescriptor &resource, AtomicOp atomic_op,
267+
CVariable *pDstAddr, CVariable *pU, CVariable *pV,
268+
CVariable *pR, CVariable *pSrc, bool isA64,
269+
int bitSize, int immOffset, int immScale,
270+
LSC_ADDR_SIZE addrSize);
271+
272+
void emitScalarAtomicLoad(llvm::Instruction *pInst,
273+
ResourceDescriptor &resource,
274+
CVariable *pDstAddr, CVariable *pU, CVariable *pV,
275+
CVariable *pR, CVariable *pSrc, bool isA64,
276+
int bitWidth, int immOffset, int immScale,
277+
LSC_ADDR_SIZE addrSize);
287278

288279
/// wave/subgroup support
289280
/// reduction and prefix/postfix facilities
@@ -337,7 +328,10 @@ class EmitPass : public llvm::FunctionPass
337328
bool isPrefix);
338329

339330
bool IsUniformAtomic(llvm::Instruction* pInst);
340-
void emitAtomicRaw(llvm::GenIntrinsicInst* pInst);
331+
void emitAtomicRaw(llvm::GenIntrinsicInst *pInst, Value *varOffset,
332+
ConstantInt *immOffset = nullptr,
333+
ConstantInt *immScale = nullptr
334+
);
341335
void emitAtomicTyped(llvm::GenIntrinsicInst* pInst);
342336
void emitAtomicCounter(llvm::GenIntrinsicInst* pInst);
343337
void emitFastClear(llvm::LoadInst* inst);

IGC/Compiler/CISACodeGen/PatternMatchPass.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,9 +1289,12 @@ namespace IGC
12891289
case GenISAIntrinsic::GenISA_ldraw_indexed:
12901290
case GenISAIntrinsic::GenISA_storerawvector_indexed:
12911291
case GenISAIntrinsic::GenISA_storeraw_indexed:
1292-
match = supportsLSCImmediateGlobalBaseOffset() ?
1293-
MatchImmOffsetLSC(I) || MatchSingleInstruction(I) :
1294-
MatchSingleInstruction(I);
1292+
if (supportsLSCImmediateGlobalBaseOffset()) {
1293+
match = MatchImmOffsetLSC(I);
1294+
if (match)
1295+
return;
1296+
}
1297+
match = MatchSingleInstruction(I);
12951298
break;
12961299
case GenISAIntrinsic::GenISA_GradientX:
12971300
case GenISAIntrinsic::GenISA_GradientY:

0 commit comments

Comments
 (0)