Skip to content

Commit 51b4ada

Browse files
authored
clang/AMDGPU: Set noalias.addrspace metadata on atomicrmw (llvm#102462)
1 parent c1047ba commit 51b4ada

File tree

9 files changed

+339
-228
lines changed

9 files changed

+339
-228
lines changed

clang/include/clang/AST/Expr.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6777,6 +6777,17 @@ class AtomicExpr : public Expr {
67776777
getOp() <= AO__opencl_atomic_store;
67786778
}
67796779

6780+
bool isHIP() const {
6781+
return Op >= AO__hip_atomic_compare_exchange_strong &&
6782+
Op <= AO__hip_atomic_store;
6783+
}
6784+
6785+
/// Return true if atomics operations targeting allocations in private memory
6786+
/// are undefined.
6787+
bool threadPrivateMemoryAtomicsAreUndefined() const {
6788+
return isOpenCL() || isHIP();
6789+
}
6790+
67806791
SourceLocation getBuiltinLoc() const { return BuiltinLoc; }
67816792
SourceLocation getRParenLoc() const { return RParenLoc; }
67826793

clang/include/clang/Basic/LangOptions.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,14 @@ class LangOptions : public LangOptionsBase {
698698
return ConvergentFunctions;
699699
}
700700

701+
/// Return true if atomicrmw operations targeting allocations in private
702+
/// memory are undefined.
703+
bool threadPrivateMemoryAtomicsAreUndefined() const {
704+
// Should be false for OpenMP.
705+
// TODO: Should this be true for SYCL?
706+
return OpenCL || CUDA;
707+
}
708+
701709
/// Return the OpenCL C or C++ version as a VersionTuple.
702710
VersionTuple getOpenCLVersionTuple() const;
703711

clang/lib/CodeGen/CGAtomic.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,7 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
389389
Ptr, Expected, Desired, SuccessOrder, FailureOrder, Scope);
390390
Pair->setVolatile(E->isVolatile());
391391
Pair->setWeak(IsWeak);
392+
CGF.getTargetHooks().setTargetAtomicMetadata(CGF, *Pair, E);
392393

393394
// Cmp holds the result of the compare-exchange operation: true on success,
394395
// false on failure.
@@ -727,7 +728,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
727728

728729
llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
729730
llvm::AtomicRMWInst *RMWI =
730-
CGF.emitAtomicRMWInst(Op, Ptr, LoadVal1, Order, Scope);
731+
CGF.emitAtomicRMWInst(Op, Ptr, LoadVal1, Order, Scope, E);
731732
RMWI->setVolatile(E->isVolatile());
732733

733734
// For __atomic_*_fetch operations, perform the operation again to
@@ -2048,11 +2049,11 @@ std::pair<RValue, llvm::Value *> CodeGenFunction::EmitAtomicCompareExchange(
20482049
llvm::AtomicRMWInst *
20492050
CodeGenFunction::emitAtomicRMWInst(llvm::AtomicRMWInst::BinOp Op, Address Addr,
20502051
llvm::Value *Val, llvm::AtomicOrdering Order,
2051-
llvm::SyncScope::ID SSID) {
2052-
2052+
llvm::SyncScope::ID SSID,
2053+
const AtomicExpr *AE) {
20532054
llvm::AtomicRMWInst *RMW =
20542055
Builder.CreateAtomicRMW(Op, Addr, Val, Order, SSID);
2055-
getTargetHooks().setTargetAtomicMetadata(*this, *RMW);
2056+
getTargetHooks().setTargetAtomicMetadata(*this, *RMW, AE);
20562057
return RMW;
20572058
}
20582059

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4166,7 +4166,8 @@ class CodeGenFunction : public CodeGenTypeCache {
41664166
llvm::AtomicRMWInst *emitAtomicRMWInst(
41674167
llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val,
41684168
llvm::AtomicOrdering Order = llvm::AtomicOrdering::SequentiallyConsistent,
4169-
llvm::SyncScope::ID SSID = llvm::SyncScope::System);
4169+
llvm::SyncScope::ID SSID = llvm::SyncScope::System,
4170+
const AtomicExpr *AE = nullptr);
41704171

41714172
void EmitAtomicUpdate(LValue LVal, llvm::AtomicOrdering AO,
41724173
const llvm::function_ref<RValue(RValue)> &UpdateOp,

clang/lib/CodeGen/TargetInfo.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,9 @@ class TargetCodeGenInfo {
336336

337337
/// Allow the target to apply other metadata to an atomic instruction
338338
virtual void setTargetAtomicMetadata(CodeGenFunction &CGF,
339-
llvm::AtomicRMWInst &RMW) const {}
339+
llvm::Instruction &AtomicInst,
340+
const AtomicExpr *Expr = nullptr) const {
341+
}
340342

341343
/// Interface class for filling custom fields of a block literal for OpenCL.
342344
class TargetOpenCLBlockHelper {

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "ABIInfoImpl.h"
1010
#include "TargetInfo.h"
1111
#include "clang/Basic/TargetOptions.h"
12+
#include "llvm/Support/AMDGPUAddrSpace.h"
1213

1314
using namespace clang;
1415
using namespace clang::CodeGen;
@@ -312,7 +313,8 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
312313
llvm::AtomicOrdering Ordering,
313314
llvm::LLVMContext &Ctx) const override;
314315
void setTargetAtomicMetadata(CodeGenFunction &CGF,
315-
llvm::AtomicRMWInst &RMW) const override;
316+
llvm::Instruction &AtomicInst,
317+
const AtomicExpr *Expr = nullptr) const override;
316318
llvm::Value *createEnqueuedBlockKernel(CodeGenFunction &CGF,
317319
llvm::Function *BlockInvokeFunc,
318320
llvm::Type *BlockTy) const override;
@@ -546,19 +548,39 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
546548
}
547549

548550
void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata(
549-
CodeGenFunction &CGF, llvm::AtomicRMWInst &RMW) const {
550-
if (!CGF.getTarget().allowAMDGPUUnsafeFPAtomics())
551+
CodeGenFunction &CGF, llvm::Instruction &AtomicInst,
552+
const AtomicExpr *AE) const {
553+
auto *RMW = dyn_cast<llvm::AtomicRMWInst>(&AtomicInst);
554+
auto *CmpX = dyn_cast<llvm::AtomicCmpXchgInst>(&AtomicInst);
555+
556+
// OpenCL and old style HIP atomics consider atomics targeting thread private
557+
// memory to be undefined.
558+
//
559+
// TODO: This is probably undefined for atomic load/store, but there's not
560+
// much direct codegen benefit to knowing this.
561+
if (((RMW && RMW->getPointerAddressSpace() == llvm::AMDGPUAS::FLAT_ADDRESS) ||
562+
(CmpX &&
563+
CmpX->getPointerAddressSpace() == llvm::AMDGPUAS::FLAT_ADDRESS)) &&
564+
AE && AE->threadPrivateMemoryAtomicsAreUndefined()) {
565+
llvm::MDBuilder MDHelper(CGF.getLLVMContext());
566+
llvm::MDNode *ASRange = MDHelper.createRange(
567+
llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS),
568+
llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS + 1));
569+
AtomicInst.setMetadata(llvm::LLVMContext::MD_noalias_addrspace, ASRange);
570+
}
571+
572+
if (!RMW || !CGF.getTarget().allowAMDGPUUnsafeFPAtomics())
551573
return;
552574

553575
// TODO: Introduce new, more controlled options that also work for integers,
554576
// and deprecate allowAMDGPUUnsafeFPAtomics.
555-
llvm::AtomicRMWInst::BinOp RMWOp = RMW.getOperation();
577+
llvm::AtomicRMWInst::BinOp RMWOp = RMW->getOperation();
556578
if (llvm::AtomicRMWInst::isFPOperation(RMWOp)) {
557579
llvm::MDNode *Empty = llvm::MDNode::get(CGF.getLLVMContext(), {});
558-
RMW.setMetadata("amdgpu.no.fine.grained.memory", Empty);
580+
RMW->setMetadata("amdgpu.no.fine.grained.memory", Empty);
559581

560-
if (RMWOp == llvm::AtomicRMWInst::FAdd && RMW.getType()->isFloatTy())
561-
RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
582+
if (RMWOp == llvm::AtomicRMWInst::FAdd && RMW->getType()->isFloatTy())
583+
RMW->setMetadata("amdgpu.ignore.denormal.mode", Empty);
562584
}
563585
}
564586

0 commit comments

Comments
 (0)