Skip to content

Commit ef20f5f

Browse files
weiyu-chensys_zuul
authored andcommitted
Emit scalar atomic load (emulated as atomic_or with zero).
Change-Id: Iaafb4f4ec70a6bd82623b34dbced37a79d26c1a5
1 parent eb3de50 commit ef20f5f

File tree

2 files changed

+96
-3
lines changed

2 files changed

+96
-3
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 88 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12076,6 +12076,79 @@ void EmitPass::emitScalarAtomics(
1207612076
}
1207712077
}
1207812078

12079+
//
12080+
// We emulate an atomic_load with an atomic_or with zero.
12081+
// when the atomic is uniform we can directly generate a SIMD1 atomic_or
12082+
//
12083+
void EmitPass::emitScalarAtomicLoad(
12084+
llvm::Instruction* pInst,
12085+
ResourceDescriptor& resource,
12086+
CVariable* pDstAddr,
12087+
CVariable* pSrc,
12088+
bool isA64,
12089+
int bitWidth)
12090+
{
12091+
if (pDstAddr->IsImmediate())
12092+
{
12093+
CVariable* pDstAddrCopy = m_currShader->GetNewVariable(1, ISA_TYPE_UD, IGC::EALIGN_GRF, true);
12094+
m_encoder->SetSimdSize(SIMDMode::SIMD1);
12095+
m_encoder->SetNoMask();
12096+
m_encoder->Copy(pDstAddrCopy, pDstAddr);
12097+
m_encoder->Push();
12098+
pDstAddr = pDstAddrCopy;
12099+
}
12100+
12101+
{
12102+
// pSrc is imm zero
12103+
CVariable* pSrcCopy = m_currShader->GetNewVariable(1, ISA_TYPE_UD, IGC::EALIGN_GRF, true);
12104+
m_encoder->SetSimdSize(SIMDMode::SIMD1);
12105+
m_encoder->SetNoMask();
12106+
m_encoder->Copy(pSrcCopy, pSrc);
12107+
m_encoder->Push();
12108+
pSrc = pSrcCopy;
12109+
}
12110+
12111+
m_encoder->SetSimdSize(SIMDMode::SIMD1);
12112+
m_encoder->SetNoMask();
12113+
12114+
CVariable* atomicDst = !pInst->use_empty() ? m_currShader->GetNewVariable(
12115+
1,
12116+
ISA_TYPE_UD,
12117+
isA64 ? IGC::EALIGN_2GRF : IGC::EALIGN_GRF,
12118+
true) : nullptr;
12119+
12120+
if (isA64)
12121+
{
12122+
m_encoder->AtomicRawA64(
12123+
EATOMIC_OR, resource,
12124+
atomicDst, pDstAddr,
12125+
pSrc, nullptr,
12126+
bitWidth);
12127+
}
12128+
else
12129+
{
12130+
m_encoder->DwordAtomicRaw(
12131+
EATOMIC_OR, resource,
12132+
atomicDst, pDstAddr,
12133+
pSrc,
12134+
nullptr, bitWidth == 16);
12135+
}
12136+
m_encoder->Push();
12137+
12138+
if (!pInst->use_empty())
12139+
{
12140+
// we need to broadcast the return value
12141+
// ToDo: change divergence analysis to mark scalar atomic load as uniform
12142+
unsigned int counter = m_currShader->m_numberInstance;
12143+
for (unsigned int i = 0; i < counter; ++i)
12144+
{
12145+
m_encoder->SetSecondHalf(i == 1);
12146+
m_encoder->Copy(m_destination, atomicDst);
12147+
m_encoder->Push();
12148+
}
12149+
}
12150+
}
12151+
1207912152
bool EmitPass::IsUniformAtomic(llvm::Instruction* pInst)
1208012153
{
1208112154
if (llvm::GenIntrinsicInst * pIntrinsic = llvm::dyn_cast<llvm::GenIntrinsicInst>(pInst))
@@ -12106,7 +12179,11 @@ bool EmitPass::IsUniformAtomic(llvm::Instruction* pInst)
1210612179
atomic_op == EATOMIC_IMIN ||
1210712180
atomic_op == EATOMIC_IMAX;
1210812181

12109-
if (isAddAtomic || (isMinMaxAtomic && pInst->use_empty()))
12182+
// capture the special case of atomic_or with 0 (it's used to simulate atomic_load)
12183+
bool isOrWith0Atomic = atomic_op == EATOMIC_OR &&
12184+
isa<ConstantInt>(pInst->getOperand(2)) && cast<ConstantInt>(pInst->getOperand(2))->isZero();
12185+
12186+
if (isAddAtomic || (isMinMaxAtomic && pInst->use_empty()) || isOrWith0Atomic)
1211012187
return true;
1211112188
}
1211212189
}
@@ -12212,8 +12289,16 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst* pInsn)
1221212289
e_alignment uniformAlign = isA64 ? EALIGN_2GRF : EALIGN_GRF;
1221312290
// Re-align the pointer if it's not GRF aligned.
1221412291
pDstAddr = ReAlignUniformVariable(pDstAddr, uniformAlign);
12215-
emitScalarAtomics(pInsn, resource, atomic_op, pDstAddr, pSrc0, isA64, bitwidth);
12216-
ResetVMask();
12292+
if (atomic_op == EATOMIC_OR)
12293+
{
12294+
// special case of atomic_load
12295+
emitScalarAtomicLoad(pInsn, resource, pDstAddr, pSrc0, isA64, bitwidth);
12296+
}
12297+
else
12298+
{
12299+
emitScalarAtomics(pInsn, resource, atomic_op, pDstAddr, pSrc0, isA64, bitwidth);
12300+
ResetVMask();
12301+
}
1221712302
return;
1221812303
}
1221912304

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,14 @@ namespace IGC
252252
bool isA64,
253253
int bitSize);
254254

255+
void emitScalarAtomicLoad(
256+
llvm::Instruction* pInst,
257+
ResourceDescriptor& resource,
258+
CVariable* pDstAddr,
259+
CVariable* pSrc,
260+
bool isA64,
261+
int bitSize);
262+
255263
/// reduction and prefix/postfix facilities
256264
CVariable* ScanReducePrepareSrc(VISA_Type type, uint64_t identityValue, bool negate, bool secondHalf,
257265
CVariable* src, CVariable* dst, CVariable* flag = nullptr);

0 commit comments

Comments
 (0)