Skip to content

Commit 9bd1085

Browse files
authored
AMDGPU: Undo atomicrmw add/sub/xor 0 -> atomicrmw or canonicalization (#87533)
InstCombine transforms add of 0 to or of 0. For system atomics, this is problematic because while PCIe supports add, it does not support the other operations. Undo this for system scope atomics.
1 parent 0d2bb7f commit 9bd1085

File tree

3 files changed

+572
-308
lines changed

3 files changed

+572
-308
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16027,6 +16027,19 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1602716027
SSID == RMW->getContext().getOrInsertSyncScopeID("one-as");
1602816028

1602916029
switch (RMW->getOperation()) {
16030+
case AtomicRMWInst::Sub:
16031+
case AtomicRMWInst::Or:
16032+
case AtomicRMWInst::Xor: {
16033+
// Atomic sub/or/xor do not work over PCI express, but atomic add
16034+
// does. InstCombine transforms these with 0 to or, so undo that.
16035+
if (HasSystemScope && AMDGPU::isFlatGlobalAddrSpace(AS)) {
16036+
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
16037+
ConstVal && ConstVal->isNullValue())
16038+
return AtomicExpansionKind::Expand;
16039+
}
16040+
16041+
break;
16042+
}
1603016043
case AtomicRMWInst::FAdd: {
1603116044
Type *Ty = RMW->getType();
1603216045

@@ -16312,14 +16325,24 @@ bool SITargetLowering::checkForPhysRegDependency(
1631216325
}
1631316326

1631416327
void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
16328+
AtomicRMWInst::BinOp Op = AI->getOperation();
16329+
16330+
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
16331+
Op == AtomicRMWInst::Xor) {
16332+
// atomicrmw or %ptr, 0 -> atomicrmw add %ptr, 0
16333+
assert(cast<Constant>(AI->getValOperand())->isNullValue() &&
16334+
"this cannot be replaced with add");
16335+
AI->setOperation(AtomicRMWInst::Add);
16336+
return;
16337+
}
16338+
1631516339
assert(Subtarget->hasAtomicFaddInsts() &&
1631616340
"target should have atomic fadd instructions");
1631716341
assert(AI->getType()->isFloatTy() &&
1631816342
AI->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS &&
1631916343
"generic atomicrmw expansion only supports FP32 operand in flat "
1632016344
"address space");
16321-
assert(AI->getOperation() == AtomicRMWInst::FAdd &&
16322-
"only fadd is supported for now");
16345+
assert(Op == AtomicRMWInst::FAdd && "only fadd is supported for now");
1632316346

1632416347
// Given: atomicrmw fadd ptr %addr, float %val ordering
1632516348
//

0 commit comments

Comments
 (0)