Skip to content

Commit 02f1d1c

Browse files
committed
AMDGPU: Refactor atomicrmw fadd expansion logic
This had some repeated and overlapping conditions, which made it more difficult to handle the new metadata scheme. Reflow the function to handle the easy LDS cases first. For the flat/global cases, write in a positive-enabled style where everything unhandled hits a default cmpxchg.
1 parent 2653277 commit 02f1d1c

File tree

1 file changed

+24
-30
lines changed

1 file changed

+24
-30
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -16075,56 +16075,50 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1607516075
return AtomicExpansionKind::CmpXChg;
1607616076
}
1607716077

16078-
if (!Ty->isFloatTy() && (!Subtarget->hasGFX90AInsts() || !Ty->isDoubleTy()))
16078+
if (!AMDGPU::isFlatGlobalAddrSpace(AS) &&
16079+
AS != AMDGPUAS::BUFFER_FAT_POINTER)
1607916080
return AtomicExpansionKind::CmpXChg;
1608016081

16081-
if ((AMDGPU::isFlatGlobalAddrSpace(AS) ||
16082-
AS == AMDGPUAS::BUFFER_FAT_POINTER) &&
16083-
Subtarget->hasAtomicFaddNoRtnInsts()) {
16084-
if (Subtarget->hasGFX940Insts())
16085-
return AtomicExpansionKind::None;
16082+
// TODO: gfx940 supports v2f16 and v2bf16
16083+
if (Subtarget->hasGFX940Insts() && (Ty->isFloatTy() || Ty->isDoubleTy()))
16084+
return AtomicExpansionKind::None;
1608616085

16087-
if (unsafeFPAtomicsDisabled(RMW->getFunction()))
16088-
return AtomicExpansionKind::CmpXChg;
16086+
if (unsafeFPAtomicsDisabled(RMW->getFunction()))
16087+
return AtomicExpansionKind::CmpXChg;
1608916088

16090-
// Always expand system scope fp atomics.
16091-
if (HasSystemScope)
16092-
return AtomicExpansionKind::CmpXChg;
16089+
// Always expand system scope fp atomics.
16090+
if (HasSystemScope)
16091+
return AtomicExpansionKind::CmpXChg;
1609316092

16094-
if ((AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16095-
AS == AMDGPUAS::BUFFER_FAT_POINTER) &&
16096-
Ty->isFloatTy()) {
16097-
// global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940, gfx11+.
16098-
if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
16099-
return ReportUnsafeHWInst(AtomicExpansionKind::None);
16100-
// global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+.
16101-
if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
16102-
return ReportUnsafeHWInst(AtomicExpansionKind::None);
16103-
}
16093+
// global and flat atomic fadd f64: gfx90a, gfx940.
16094+
if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy())
16095+
return ReportUnsafeHWInst(AtomicExpansionKind::None);
1610416096

16105-
// flat atomic fadd f32: gfx940, gfx11+.
16106-
if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy() &&
16107-
Subtarget->hasFlatAtomicFaddF32Inst())
16097+
if (AS != AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
16098+
// global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940, gfx11+.
16099+
if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
16100+
return ReportUnsafeHWInst(AtomicExpansionKind::None);
16101+
// global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+.
16102+
if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
1610816103
return ReportUnsafeHWInst(AtomicExpansionKind::None);
16104+
}
1610916105

16110-
// global and flat atomic fadd f64: gfx90a, gfx940.
16111-
if (Ty->isDoubleTy() && Subtarget->hasGFX90AInsts())
16106+
// flat atomic fadd f32: gfx940, gfx11+.
16107+
if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
16108+
if (Subtarget->hasFlatAtomicFaddF32Inst())
1611216109
return ReportUnsafeHWInst(AtomicExpansionKind::None);
1611316110

1611416111
// If it is in flat address space, and the type is float, we will try to
1611516112
// expand it, if the target supports global and lds atomic fadd. The
1611616113
// reason we need that is, in the expansion, we emit the check of address
1611716114
// space. If it is in global address space, we emit the global atomic
1611816115
// fadd; if it is in shared address space, we emit the LDS atomic fadd.
16119-
if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy() &&
16120-
Subtarget->hasLDSFPAtomicAddF32()) {
16116+
if (Subtarget->hasLDSFPAtomicAddF32()) {
1612116117
if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
1612216118
return AtomicExpansionKind::Expand;
1612316119
if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
1612416120
return AtomicExpansionKind::Expand;
1612516121
}
16126-
16127-
return AtomicExpansionKind::CmpXChg;
1612816122
}
1612916123

1613016124
return AtomicExpansionKind::CmpXChg;

0 commit comments

Comments
 (0)