Skip to content

Commit 9ba2a99

Browse files
committed
AMDGPU: Expand remaining system atomic operations
System scope atomics need to use cmpxchg loops if we know nothing about the allocation the address is from. aea5980 started this, this expands the set to cover the remaining integer operations. Don't expand xchg and add, those theoretically should work over PCIe. This is a pre-commit which will introduce performance regressions. Subsequent changes will add handling of new atomicrmw metadata, which will avoid the expansion. Note this still isn't conservative enough; we do need to expand some device scope atomics if the memory is in fine-grained remote memory.
1 parent e540546 commit 9ba2a99

13 files changed

+21165
-3983
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16601,26 +16601,39 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1660116601

1660216602
auto Op = RMW->getOperation();
1660316603
switch (Op) {
16604-
case AtomicRMWInst::Xchg: {
16604+
case AtomicRMWInst::Xchg:
1660516605
// PCIe supports add and xchg for system atomics.
1660616606
return isAtomicRMWLegalXChgTy(RMW)
1660716607
? TargetLowering::AtomicExpansionKind::None
1660816608
: TargetLowering::AtomicExpansionKind::CmpXChg;
16609-
}
1661016609
case AtomicRMWInst::Add:
16611-
case AtomicRMWInst::And:
16612-
case AtomicRMWInst::UIncWrap:
16613-
case AtomicRMWInst::UDecWrap:
16610+
// PCIe supports add and xchg for system atomics.
1661416611
return atomicSupportedIfLegalIntType(RMW);
1661516612
case AtomicRMWInst::Sub:
16613+
case AtomicRMWInst::And:
1661616614
case AtomicRMWInst::Or:
16617-
case AtomicRMWInst::Xor: {
16618-
// Atomic sub/or/xor do not work over PCI express, but atomic add
16619-
// does. InstCombine transforms these with 0 to or, so undo that.
16620-
if (HasSystemScope && AMDGPU::isFlatGlobalAddrSpace(AS)) {
16621-
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
16622-
ConstVal && ConstVal->isNullValue())
16623-
return AtomicExpansionKind::Expand;
16615+
case AtomicRMWInst::Xor:
16616+
case AtomicRMWInst::Max:
16617+
case AtomicRMWInst::Min:
16618+
case AtomicRMWInst::UMax:
16619+
case AtomicRMWInst::UMin:
16620+
case AtomicRMWInst::UIncWrap:
16621+
case AtomicRMWInst::UDecWrap: {
16622+
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
16623+
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16624+
// Always expand system scope atomics.
16625+
if (HasSystemScope) {
16626+
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
16627+
Op == AtomicRMWInst::Xor) {
16628+
// Atomic sub/or/xor do not work over PCI express, but atomic add
16629+
// does. InstCombine transforms these with 0 to or, so undo that.
16630+
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
16631+
ConstVal && ConstVal->isNullValue())
16632+
return AtomicExpansionKind::Expand;
16633+
}
16634+
16635+
return AtomicExpansionKind::CmpXChg;
16636+
}
1662416637
}
1662516638

1662616639
return atomicSupportedIfLegalIntType(RMW);
@@ -16775,19 +16788,6 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1677516788

1677616789
return AtomicExpansionKind::CmpXChg;
1677716790
}
16778-
case AtomicRMWInst::Min:
16779-
case AtomicRMWInst::Max:
16780-
case AtomicRMWInst::UMin:
16781-
case AtomicRMWInst::UMax: {
16782-
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
16783-
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16784-
// Always expand system scope min/max atomics.
16785-
if (HasSystemScope)
16786-
return AtomicExpansionKind::CmpXChg;
16787-
}
16788-
16789-
return atomicSupportedIfLegalIntType(RMW);
16790-
}
1679116791
case AtomicRMWInst::Nand:
1679216792
case AtomicRMWInst::FSub:
1679316793
default:

0 commit comments

Comments
 (0)