|
30 | 30 | #include "llvm/CodeGen/MachineFunction.h"
|
31 | 31 | #include "llvm/CodeGen/MachineLoopInfo.h"
|
32 | 32 | #include "llvm/IR/DiagnosticInfo.h"
|
| 33 | +#include "llvm/IR/IRBuilder.h" |
33 | 34 | #include "llvm/IR/IntrinsicInst.h"
|
34 | 35 | #include "llvm/IR/IntrinsicsAMDGPU.h"
|
35 | 36 | #include "llvm/IR/IntrinsicsR600.h"
|
@@ -12866,6 +12867,19 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
|
12866 | 12867 | if (Ty->isDoubleTy() && Subtarget->hasGFX90AInsts())
|
12867 | 12868 | return ReportUnsafeHWInst(AtomicExpansionKind::None);
|
12868 | 12869 |
|
| 12870 | + // If it is in flat address space, and the type is float, we will try to |
| 12871 | + // expand it, if the target supports global and lds atomic fadd. The |
| 12872 | + // reason we need that is, in the expansion, we emit the check of address |
| 12873 | + // space. If it is in global address space, we emit the global atomic |
| 12874 | + // fadd; if it is in shared address space, we emit the LDS atomic fadd. |
| 12875 | + if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy() && |
| 12876 | + Subtarget->hasLDSFPAtomicAdd()) { |
| 12877 | + if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts()) |
| 12878 | + return AtomicExpansionKind::Expand; |
| 12879 | + if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts()) |
| 12880 | + return AtomicExpansionKind::Expand; |
| 12881 | + } |
| 12882 | + |
12869 | 12883 | return AtomicExpansionKind::CmpXChg;
|
12870 | 12884 | }
|
12871 | 12885 |
|
@@ -13066,3 +13080,140 @@ bool SITargetLowering::checkForPhysRegDependency(
|
13066 | 13080 | }
|
13067 | 13081 | return false;
|
13068 | 13082 | }
|
| 13083 | + |
| 13084 | +void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const { |
| 13085 | + assert(Subtarget->hasAtomicFaddInsts() && |
| 13086 | + "target should have atomic fadd instructions"); |
| 13087 | + assert(AI->getType()->isFloatTy() && |
| 13088 | + AI->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS && |
| 13089 | + "generic atomicrmw expansion only supports FP32 operand in flat " |
| 13090 | + "address space"); |
| 13091 | + assert(AI->getOperation() == AtomicRMWInst::FAdd && |
| 13092 | + "only fadd is supported for now"); |
| 13093 | + |
| 13094 | + // Given: atomicrmw fadd float* %addr, float %val ordering |
| 13095 | + // |
| 13096 | + // With this expansion we produce the following code: |
| 13097 | + // [...] |
| 13098 | + // %int8ptr = bitcast float* %addr to i8* |
| 13099 | + // br label %atomicrmw.check.shared |
| 13100 | + // |
| 13101 | + // atomicrmw.check.shared: |
| 13102 | + // %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %int8ptr) |
| 13103 | + // br i1 %is.shared, label %atomicrmw.shared, label %atomicrmw.check.private |
| 13104 | + // |
| 13105 | + // atomicrmw.shared: |
| 13106 | + // %cast.shared = addrspacecast float* %addr to float addrspace(3)* |
| 13107 | + // %loaded.shared = atomicrmw fadd float addrspace(3)* %cast.shared, |
| 13108 | + // float %val ordering |
| 13109 | + // br label %atomicrmw.phi |
| 13110 | + // |
| 13111 | + // atomicrmw.check.private: |
| 13112 | + // %is.private = call i1 @llvm.amdgcn.is.private(i8* %int8ptr) |
| 13113 | + // br i1 %is.private, label %atomicrmw.private, label %atomicrmw.global |
| 13114 | + // |
| 13115 | + // atomicrmw.private: |
| 13116 | + // %cast.private = addrspacecast float* %addr to float addrspace(5)* |
| 13117 | + // %loaded.private = load float, float addrspace(5)* %cast.private |
| 13118 | + // %val.new = fadd float %loaded.private, %val |
| 13119 | + // store float %val.new, float addrspace(5)* %cast.private |
| 13120 | + // br label %atomicrmw.phi |
| 13121 | + // |
| 13122 | + // atomicrmw.global: |
| 13123 | + // %cast.global = addrspacecast float* %addr to float addrspace(1)* |
| 13124 | + // %loaded.global = atomicrmw fadd float addrspace(1)* %cast.global, |
| 13125 | + // float %val ordering |
| 13126 | + // br label %atomicrmw.phi |
| 13127 | + // |
| 13128 | + // atomicrmw.phi: |
| 13129 | + // %loaded.phi = phi float [ %loaded.shared, %atomicrmw.shared ], |
| 13130 | + // [ %loaded.private, %atomicrmw.private ], |
| 13131 | + // [ %loaded.global, %atomicrmw.global ] |
| 13132 | + // br label %atomicrmw.end |
| 13133 | + // |
| 13134 | + // atomicrmw.end: |
| 13135 | + // [...] |
| 13136 | + |
| 13137 | + IRBuilder<> Builder(AI); |
| 13138 | + LLVMContext &Ctx = Builder.getContext(); |
| 13139 | + |
| 13140 | + BasicBlock *BB = Builder.GetInsertBlock(); |
| 13141 | + Function *F = BB->getParent(); |
| 13142 | + BasicBlock *ExitBB = |
| 13143 | + BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); |
| 13144 | + BasicBlock *CheckSharedBB = |
| 13145 | + BasicBlock::Create(Ctx, "atomicrmw.check.shared", F, ExitBB); |
| 13146 | + BasicBlock *SharedBB = BasicBlock::Create(Ctx, "atomicrmw.shared", F, ExitBB); |
| 13147 | + BasicBlock *CheckPrivateBB = |
| 13148 | + BasicBlock::Create(Ctx, "atomicrmw.check.private", F, ExitBB); |
| 13149 | + BasicBlock *PrivateBB = |
| 13150 | + BasicBlock::Create(Ctx, "atomicrmw.private", F, ExitBB); |
| 13151 | + BasicBlock *GlobalBB = BasicBlock::Create(Ctx, "atomicrmw.global", F, ExitBB); |
| 13152 | + BasicBlock *PhiBB = BasicBlock::Create(Ctx, "atomicrmw.phi", F, ExitBB); |
| 13153 | + |
| 13154 | + Value *Val = AI->getValOperand(); |
| 13155 | + Type *ValTy = Val->getType(); |
| 13156 | + Value *Addr = AI->getPointerOperand(); |
| 13157 | + PointerType *PtrTy = cast<PointerType>(Addr->getType()); |
| 13158 | + |
| 13159 | + auto CreateNewAtomicRMW = [AI](IRBuilder<> &Builder, Value *Addr, |
| 13160 | + Value *Val) -> Value * { |
| 13161 | + AtomicRMWInst *OldVal = |
| 13162 | + Builder.CreateAtomicRMW(AI->getOperation(), Addr, Val, AI->getAlign(), |
| 13163 | + AI->getOrdering(), AI->getSyncScopeID()); |
| 13164 | + SmallVector<std::pair<unsigned, MDNode *>> MDs; |
| 13165 | + AI->getAllMetadata(MDs); |
| 13166 | + for (auto &P : MDs) |
| 13167 | + OldVal->setMetadata(P.first, P.second); |
| 13168 | + return OldVal; |
| 13169 | + }; |
| 13170 | + |
| 13171 | + std::prev(BB->end())->eraseFromParent(); |
| 13172 | + Builder.SetInsertPoint(BB); |
| 13173 | + Value *Int8Ptr = Builder.CreateBitCast(Addr, Builder.getInt8PtrTy()); |
| 13174 | + Builder.CreateBr(CheckSharedBB); |
| 13175 | + |
| 13176 | + Builder.SetInsertPoint(CheckSharedBB); |
| 13177 | + CallInst *IsShared = Builder.CreateIntrinsic(Intrinsic::amdgcn_is_shared, {}, |
| 13178 | + {Int8Ptr}, nullptr, "is.shared"); |
| 13179 | + Builder.CreateCondBr(IsShared, SharedBB, CheckPrivateBB); |
| 13180 | + |
| 13181 | + Builder.SetInsertPoint(SharedBB); |
| 13182 | + Value *CastToLocal = Builder.CreateAddrSpaceCast( |
| 13183 | + Addr, |
| 13184 | + PointerType::getWithSamePointeeType(PtrTy, AMDGPUAS::LOCAL_ADDRESS)); |
| 13185 | + Value *LoadedShared = CreateNewAtomicRMW(Builder, CastToLocal, Val); |
| 13186 | + Builder.CreateBr(PhiBB); |
| 13187 | + |
| 13188 | + Builder.SetInsertPoint(CheckPrivateBB); |
| 13189 | + CallInst *IsPrivate = Builder.CreateIntrinsic( |
| 13190 | + Intrinsic::amdgcn_is_private, {}, {Int8Ptr}, nullptr, "is.private"); |
| 13191 | + Builder.CreateCondBr(IsPrivate, PrivateBB, GlobalBB); |
| 13192 | + |
| 13193 | + Builder.SetInsertPoint(PrivateBB); |
| 13194 | + Value *CastToPrivate = Builder.CreateAddrSpaceCast( |
| 13195 | + Addr, |
| 13196 | + PointerType::getWithSamePointeeType(PtrTy, AMDGPUAS::PRIVATE_ADDRESS)); |
| 13197 | + Value *LoadedPrivate = |
| 13198 | + Builder.CreateLoad(ValTy, CastToPrivate, "loaded.private"); |
| 13199 | + Value *NewVal = Builder.CreateFAdd(LoadedPrivate, Val, "val.new"); |
| 13200 | + Builder.CreateStore(NewVal, CastToPrivate); |
| 13201 | + Builder.CreateBr(PhiBB); |
| 13202 | + |
| 13203 | + Builder.SetInsertPoint(GlobalBB); |
| 13204 | + Value *CastToGlobal = Builder.CreateAddrSpaceCast( |
| 13205 | + Addr, |
| 13206 | + PointerType::getWithSamePointeeType(PtrTy, AMDGPUAS::GLOBAL_ADDRESS)); |
| 13207 | + Value *LoadedGlobal = CreateNewAtomicRMW(Builder, CastToGlobal, Val); |
| 13208 | + Builder.CreateBr(PhiBB); |
| 13209 | + |
| 13210 | + Builder.SetInsertPoint(PhiBB); |
| 13211 | + PHINode *Loaded = Builder.CreatePHI(ValTy, 3, "loaded.phi"); |
| 13212 | + Loaded->addIncoming(LoadedShared, SharedBB); |
| 13213 | + Loaded->addIncoming(LoadedPrivate, PrivateBB); |
| 13214 | + Loaded->addIncoming(LoadedGlobal, GlobalBB); |
| 13215 | + Builder.CreateBr(ExitBB); |
| 13216 | + |
| 13217 | + AI->replaceAllUsesWith(Loaded); |
| 13218 | + AI->eraseFromParent(); |
| 13219 | +} |
0 commit comments