@@ -12723,6 +12723,10 @@ void EmitPass::emitScalarAtomics(
12723
12723
{
12724
12724
case EATOMIC_IADD:
12725
12725
case EATOMIC_SUB:
12726
+ case EATOMIC_IADD64:
12727
+ case EATOMIC_SUB64:
12728
+ case EATOMIC_FADD64:
12729
+ case EATOMIC_FSUB64:
12726
12730
case EATOMIC_INC:
12727
12731
case EATOMIC_DEC:
12728
12732
case EATOMIC_FADD:
@@ -12768,13 +12772,18 @@ void EmitPass::emitScalarAtomics(
12768
12772
{
12769
12773
type = ISA_TYPE_F;
12770
12774
}
12775
+ else if (atomic_op == EATOMIC_FADD64 || atomic_op == EATOMIC_FSUB64)
12776
+ {
12777
+ type = ISA_TYPE_DF;
12778
+ }
12771
12779
else
12772
12780
{
12773
12781
type =
12774
12782
bitWidth == 16 ? ISA_TYPE_W :
12775
12783
bitWidth == 32 ? ISA_TYPE_D :
12776
12784
ISA_TYPE_Q;
12777
12785
}
12786
+
12778
12787
IGC_ASSERT_MESSAGE((bitWidth == 16) || (bitWidth == 32) || (bitWidth == 64), "invalid bitsize");
12779
12788
if (atomic_op == EATOMIC_INC || atomic_op == EATOMIC_DEC)
12780
12789
{
@@ -12968,7 +12977,7 @@ void EmitPass::emitScalarAtomics(
12968
12977
m_encoder->Add(pSrcsArr[i], pSrcsArr[i], pReturnVal);
12969
12978
m_encoder->Push();
12970
12979
12971
- if (atomic_op == EATOMIC_IADD)
12980
+ if (atomic_op == EATOMIC_IADD || atomic_op == EATOMIC_IADD64 )
12972
12981
{
12973
12982
m_encoder->SetSrcModifier(1, EMOD_NEG);
12974
12983
}
@@ -13098,14 +13107,32 @@ bool EmitPass::IsUniformAtomic(llvm::Instruction* pInst)
13098
13107
{
13099
13108
Function* F = pInst->getParent()->getParent();
13100
13109
//We cannot optimize float atomics if the flag "unsafe-fp-math" was not passed.
13101
- if (id == GenISAIntrinsic::GenISA_floatatomicrawA64) {
13102
- if (pInst->getType()->getScalarSizeInBits() != 32) {
13110
+ if (id == GenISAIntrinsic::GenISA_floatatomicrawA64)
13111
+ {
13112
+ if (!F->hasFnAttribute("unsafe-fp-math") || !(F->getFnAttribute("unsafe-fp-math").getValueAsString() == "true"))
13113
+ {
13103
13114
return false;
13104
13115
}
13105
- if (!F->hasFnAttribute("unsafe-fp-math") || !(F->getFnAttribute("unsafe-fp-math").getValueAsString() == "true")) {
13116
+ }
13117
+
13118
+ if (pInst->getType()->getScalarSizeInBits() == 64)
13119
+ {
13120
+ AtomicOp atomic_op = static_cast<AtomicOp>(llvm::cast<llvm::ConstantInt>(pInst->getOperand(3))->getZExtValue());
13121
+
13122
+ if ((atomic_op == EATOMIC_IADD64 || atomic_op == EATOMIC_SUB64) && m_currShader->m_Platform->hasInt64Add())
13123
+ {
13124
+ return true;
13125
+ }
13126
+ else if ((atomic_op == EATOMIC_FADD64 || atomic_op == EATOMIC_FSUB64) && m_currShader->m_Platform->hasFP64GlobalAtomicAdd())
13127
+ {
13128
+ return true;
13129
+ }
13130
+ else
13131
+ {
13106
13132
return false;
13107
13133
}
13108
13134
}
13135
+
13109
13136
if (IGC_IS_FLAG_ENABLED(DisableScalarAtomics) ||
13110
13137
F->hasFnAttribute("KMPLOCK") ||
13111
13138
m_currShader->m_DriverInfo->WASLMPointersDwordUnit())
@@ -13122,6 +13149,10 @@ bool EmitPass::IsUniformAtomic(llvm::Instruction* pInst)
13122
13149
13123
13150
bool isAtomicAdd =
13124
13151
atomic_op == EATOMIC_IADD ||
13152
+ atomic_op == EATOMIC_IADD64 ||
13153
+ atomic_op == EATOMIC_SUB64 ||
13154
+ atomic_op == EATOMIC_FADD64 ||
13155
+ atomic_op == EATOMIC_FSUB64 ||
13125
13156
atomic_op == EATOMIC_INC ||
13126
13157
atomic_op == EATOMIC_SUB ||
13127
13158
atomic_op == EATOMIC_DEC ||
@@ -13290,7 +13321,7 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst* pInsn)
13290
13321
CVariable* pDstAddr = GetSymbol(pllDstAddr);
13291
13322
// If DisableScalarAtomics regkey is enabled or DisableIGCOptimizations regkey is enabled then
13292
13323
// don't enable scalar atomics, also do not enable for 64 bit
13293
- if (IsUniformAtomic(pInsn) && bitwidth != 64 )
13324
+ if (IsUniformAtomic(pInsn))
13294
13325
{
13295
13326
PointerType* PtrTy = dyn_cast<PointerType>(pllDstAddr->getType());
13296
13327
bool isA64 = PtrTy && isA64Ptr(PtrTy, m_currShader->GetContext());
0 commit comments