@@ -12076,6 +12076,79 @@ void EmitPass::emitScalarAtomics(
12076
12076
}
12077
12077
}
12078
12078
12079
+ //
12080
+ // We emulate an atomic_load with an atomic_or with zero.
12081
+ // when the atomic is uniform we can directly generate a SIMD1 atomic_or
12082
+ //
12083
+ void EmitPass::emitScalarAtomicLoad(
12084
+ llvm::Instruction* pInst,
12085
+ ResourceDescriptor& resource,
12086
+ CVariable* pDstAddr,
12087
+ CVariable* pSrc,
12088
+ bool isA64,
12089
+ int bitWidth)
12090
+ {
12091
+ if (pDstAddr->IsImmediate())
12092
+ {
12093
+ CVariable* pDstAddrCopy = m_currShader->GetNewVariable(1, ISA_TYPE_UD, IGC::EALIGN_GRF, true);
12094
+ m_encoder->SetSimdSize(SIMDMode::SIMD1);
12095
+ m_encoder->SetNoMask();
12096
+ m_encoder->Copy(pDstAddrCopy, pDstAddr);
12097
+ m_encoder->Push();
12098
+ pDstAddr = pDstAddrCopy;
12099
+ }
12100
+
12101
+ {
12102
+ // pSrc is imm zero
12103
+ CVariable* pSrcCopy = m_currShader->GetNewVariable(1, ISA_TYPE_UD, IGC::EALIGN_GRF, true);
12104
+ m_encoder->SetSimdSize(SIMDMode::SIMD1);
12105
+ m_encoder->SetNoMask();
12106
+ m_encoder->Copy(pSrcCopy, pSrc);
12107
+ m_encoder->Push();
12108
+ pSrc = pSrcCopy;
12109
+ }
12110
+
12111
+ m_encoder->SetSimdSize(SIMDMode::SIMD1);
12112
+ m_encoder->SetNoMask();
12113
+
12114
+ CVariable* atomicDst = !pInst->use_empty() ? m_currShader->GetNewVariable(
12115
+ 1,
12116
+ ISA_TYPE_UD,
12117
+ isA64 ? IGC::EALIGN_2GRF : IGC::EALIGN_GRF,
12118
+ true) : nullptr;
12119
+
12120
+ if (isA64)
12121
+ {
12122
+ m_encoder->AtomicRawA64(
12123
+ EATOMIC_OR, resource,
12124
+ atomicDst, pDstAddr,
12125
+ pSrc, nullptr,
12126
+ bitWidth);
12127
+ }
12128
+ else
12129
+ {
12130
+ m_encoder->DwordAtomicRaw(
12131
+ EATOMIC_OR, resource,
12132
+ atomicDst, pDstAddr,
12133
+ pSrc,
12134
+ nullptr, bitWidth == 16);
12135
+ }
12136
+ m_encoder->Push();
12137
+
12138
+ if (!pInst->use_empty())
12139
+ {
12140
+ // we need to broadcast the return value
12141
+ // ToDo: change divergence analysis to mark scalar atomic load as uniform
12142
+ unsigned int counter = m_currShader->m_numberInstance;
12143
+ for (unsigned int i = 0; i < counter; ++i)
12144
+ {
12145
+ m_encoder->SetSecondHalf(i == 1);
12146
+ m_encoder->Copy(m_destination, atomicDst);
12147
+ m_encoder->Push();
12148
+ }
12149
+ }
12150
+ }
12151
+
12079
12152
bool EmitPass::IsUniformAtomic(llvm::Instruction* pInst)
12080
12153
{
12081
12154
if (llvm::GenIntrinsicInst * pIntrinsic = llvm::dyn_cast<llvm::GenIntrinsicInst>(pInst))
@@ -12106,7 +12179,11 @@ bool EmitPass::IsUniformAtomic(llvm::Instruction* pInst)
12106
12179
atomic_op == EATOMIC_IMIN ||
12107
12180
atomic_op == EATOMIC_IMAX;
12108
12181
12109
- if (isAddAtomic || (isMinMaxAtomic && pInst->use_empty()))
12182
+ // capture the special case of atomic_or with 0 (it's used to simulate atomic_load)
12183
+ bool isOrWith0Atomic = atomic_op == EATOMIC_OR &&
12184
+ isa<ConstantInt>(pInst->getOperand(2)) && cast<ConstantInt>(pInst->getOperand(2))->isZero();
12185
+
12186
+ if (isAddAtomic || (isMinMaxAtomic && pInst->use_empty()) || isOrWith0Atomic)
12110
12187
return true;
12111
12188
}
12112
12189
}
@@ -12212,8 +12289,16 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst* pInsn)
12212
12289
e_alignment uniformAlign = isA64 ? EALIGN_2GRF : EALIGN_GRF;
12213
12290
// Re-align the pointer if it's not GRF aligned.
12214
12291
pDstAddr = ReAlignUniformVariable(pDstAddr, uniformAlign);
12215
- emitScalarAtomics(pInsn, resource, atomic_op, pDstAddr, pSrc0, isA64, bitwidth);
12216
- ResetVMask();
12292
+ if (atomic_op == EATOMIC_OR)
12293
+ {
12294
+ // special case of atomic_load
12295
+ emitScalarAtomicLoad(pInsn, resource, pDstAddr, pSrc0, isA64, bitwidth);
12296
+ }
12297
+ else
12298
+ {
12299
+ emitScalarAtomics(pInsn, resource, atomic_op, pDstAddr, pSrc0, isA64, bitwidth);
12300
+ ResetVMask();
12301
+ }
12217
12302
return;
12218
12303
}
12219
12304
0 commit comments