Skip to content

Commit bf90f40

Browse files
committed
AMDGPU: Refactor atomicrmw fadd expansion logic
This had some repeated and overlapping conditions, which made it more difficult to handle the new metadata scheme. Reflow the function to handle the easy LDS cases first. For the flat/global cases, write in a positive-enabled style where everything unhandled hits a default cmpxchg.
1 parent 9318600 commit bf90f40

File tree

1 file changed

+23
-30
lines changed

1 file changed

+23
-30
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 23 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -16075,56 +16075,49 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1607516075
return AtomicExpansionKind::CmpXChg;
1607616076
}
1607716077

16078-
if (!Ty->isFloatTy() && (!Subtarget->hasGFX90AInsts() || !Ty->isDoubleTy()))
16078+
if (!AMDGPU::isFlatGlobalAddrSpace(AS) && AS != AMDGPUAS::BUFFER_FAT_POINTER)
1607916079
return AtomicExpansionKind::CmpXChg;
1608016080

16081-
if ((AMDGPU::isFlatGlobalAddrSpace(AS) ||
16082-
AS == AMDGPUAS::BUFFER_FAT_POINTER) &&
16083-
Subtarget->hasAtomicFaddNoRtnInsts()) {
16084-
if (Subtarget->hasGFX940Insts())
16085-
return AtomicExpansionKind::None;
16081+
// TODO: gfx940 supports v2f16 and v2bf16
16082+
if (Subtarget->hasGFX940Insts() && (Ty->isFloatTy() || Ty->isDoubleTy()))
16083+
return AtomicExpansionKind::None;
1608616084

16087-
if (unsafeFPAtomicsDisabled(RMW->getFunction()))
16088-
return AtomicExpansionKind::CmpXChg;
16085+
if (unsafeFPAtomicsDisabled(RMW->getFunction()))
16086+
return AtomicExpansionKind::CmpXChg;
1608916087

16090-
// Always expand system scope fp atomics.
16091-
if (HasSystemScope)
16092-
return AtomicExpansionKind::CmpXChg;
16088+
// Always expand system scope fp atomics.
16089+
if (HasSystemScope)
16090+
return AtomicExpansionKind::CmpXChg;
1609316091

16094-
if ((AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16095-
AS == AMDGPUAS::BUFFER_FAT_POINTER) &&
16096-
Ty->isFloatTy()) {
16097-
// global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940, gfx11+.
16098-
if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
16099-
return ReportUnsafeHWInst(AtomicExpansionKind::None);
16100-
// global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+.
16101-
if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
16102-
return ReportUnsafeHWInst(AtomicExpansionKind::None);
16103-
}
16092+
// global and flat atomic fadd f64: gfx90a, gfx940.
16093+
if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy())
16094+
return ReportUnsafeHWInst(AtomicExpansionKind::None);
1610416095

16105-
// flat atomic fadd f32: gfx940, gfx11+.
16106-
if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy() &&
16107-
Subtarget->hasFlatAtomicFaddF32Inst())
16096+
if (AS != AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
16097+
// global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940, gfx11+.
16098+
if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
16099+
return ReportUnsafeHWInst(AtomicExpansionKind::None);
16100+
// global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+.
16101+
if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
1610816102
return ReportUnsafeHWInst(AtomicExpansionKind::None);
16103+
}
1610916104

16110-
// global and flat atomic fadd f64: gfx90a, gfx940.
16111-
if (Ty->isDoubleTy() && Subtarget->hasGFX90AInsts())
16105+
// flat atomic fadd f32: gfx940, gfx11+.
16106+
if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
16107+
if (Subtarget->hasFlatAtomicFaddF32Inst())
1611216108
return ReportUnsafeHWInst(AtomicExpansionKind::None);
1611316109

1611416110
// If it is in flat address space, and the type is float, we will try to
1611516111
// expand it, if the target supports global and lds atomic fadd. The
1611616112
// reason we need that is, in the expansion, we emit the check of address
1611716113
// space. If it is in global address space, we emit the global atomic
1611816114
// fadd; if it is in shared address space, we emit the LDS atomic fadd.
16119-
if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy() &&
16120-
Subtarget->hasLDSFPAtomicAddF32()) {
16115+
if (Subtarget->hasLDSFPAtomicAddF32()) {
1612116116
if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
1612216117
return AtomicExpansionKind::Expand;
1612316118
if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
1612416119
return AtomicExpansionKind::Expand;
1612516120
}
16126-
16127-
return AtomicExpansionKind::CmpXChg;
1612816121
}
1612916122

1613016123
return AtomicExpansionKind::CmpXChg;

0 commit comments

Comments
 (0)