Skip to content

Commit 675cefb

Browse files
committed
[AMDGPU] Introduce Clang builtins to be mapped to AMDGCN atomic inc/dec intrinsics
Summary: __builtin_amdgcn_atomic_inc32(int *Ptr, int Val, unsigned MemoryOrdering, const char *SyncScope) __builtin_amdgcn_atomic_inc64(int64_t *Ptr, int64_t Val, unsigned MemoryOrdering, const char *SyncScope) __builtin_amdgcn_atomic_dec32(int *Ptr, int Val, unsigned MemoryOrdering, const char *SyncScope) __builtin_amdgcn_atomic_dec64(int64_t *Ptr, int64_t Val, unsigned MemoryOrdering, const char *SyncScope) First and second arguments gets transparently passed to the amdgcn atomic inc/dec intrinsic. Fifth argument of the intrinsic is set as true if the first argument of the builtin is a volatile pointer. The third argument of this builtin is one of the memory-ordering specifiers ATOMIC_ACQUIRE, ATOMIC_RELEASE, ATOMIC_ACQ_REL, or ATOMIC_SEQ_CST following C++11 memory model semantics. This is mapped to corresponding LLVM atomic memory ordering for the atomic inc/dec instruction using CLANG atomic C ABI. The fourth argument is an AMDGPU-specific synchronization scope defined as string. Reviewers: arsenm, sameerds, JonChesterfield, jdoerfert Reviewed By: arsenm, sameerds Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, kerbowa, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D80804
1 parent 1c189d7 commit 675cefb

File tree

7 files changed

+455
-60
lines changed

7 files changed

+455
-60
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,12 @@ BUILTIN(__builtin_amdgcn_ds_gws_sema_br, "vUiUi", "n")
6060
BUILTIN(__builtin_amdgcn_ds_gws_sema_p, "vUi", "n")
6161
BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n")
6262

63+
BUILTIN(__builtin_amdgcn_atomic_inc32, "ZiZiD*ZiUicC*", "n")
64+
BUILTIN(__builtin_amdgcn_atomic_inc64, "WiWiD*WiUicC*", "n")
65+
66+
BUILTIN(__builtin_amdgcn_atomic_dec32, "ZiZiD*ZiUicC*", "n")
67+
BUILTIN(__builtin_amdgcn_atomic_dec64, "WiWiD*WiUicC*", "n")
68+
6369
// FIXME: Need to disallow constant address space.
6470
BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n")
6571
BUILTIN(__builtin_amdgcn_div_scalef, "fffbb*", "n")

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 80 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14301,8 +14301,49 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
1430114301
}
1430214302
} // namespace
1430314303

14304+
// For processing memory ordering and memory scope arguments of various
14305+
// amdgcn builtins.
14306+
// \p Order takes a C++11 comptabile memory-ordering specifier and converts
14307+
// it into LLVM's memory ordering specifier using atomic C ABI, and writes
14308+
// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
14309+
// specific SyncScopeID and writes it to \p SSID.
14310+
bool CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
14311+
llvm::AtomicOrdering &AO,
14312+
llvm::SyncScope::ID &SSID) {
14313+
if (isa<llvm::ConstantInt>(Order)) {
14314+
int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
14315+
14316+
// Map C11/C++11 memory ordering to LLVM memory ordering
14317+
switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
14318+
case llvm::AtomicOrderingCABI::acquire:
14319+
AO = llvm::AtomicOrdering::Acquire;
14320+
break;
14321+
case llvm::AtomicOrderingCABI::release:
14322+
AO = llvm::AtomicOrdering::Release;
14323+
break;
14324+
case llvm::AtomicOrderingCABI::acq_rel:
14325+
AO = llvm::AtomicOrdering::AcquireRelease;
14326+
break;
14327+
case llvm::AtomicOrderingCABI::seq_cst:
14328+
AO = llvm::AtomicOrdering::SequentiallyConsistent;
14329+
break;
14330+
case llvm::AtomicOrderingCABI::consume:
14331+
case llvm::AtomicOrderingCABI::relaxed:
14332+
break;
14333+
}
14334+
14335+
StringRef scp;
14336+
llvm::getConstantStringInfo(Scope, scp);
14337+
SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
14338+
return true;
14339+
}
14340+
return false;
14341+
}
14342+
1430414343
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1430514344
const CallExpr *E) {
14345+
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
14346+
llvm::SyncScope::ID SSID;
1430614347
switch (BuiltinID) {
1430714348
case AMDGPU::BI__builtin_amdgcn_div_scale:
1430814349
case AMDGPU::BI__builtin_amdgcn_div_scalef: {
@@ -14507,38 +14548,49 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1450714548
}
1450814549

1450914550
case AMDGPU::BI__builtin_amdgcn_fence: {
14510-
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
14511-
llvm::SyncScope::ID SSID;
14512-
Value *Order = EmitScalarExpr(E->getArg(0));
14513-
Value *Scope = EmitScalarExpr(E->getArg(1));
14551+
if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
14552+
EmitScalarExpr(E->getArg(1)), AO, SSID))
14553+
return Builder.CreateFence(AO, SSID);
14554+
LLVM_FALLTHROUGH;
14555+
}
14556+
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
14557+
case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
14558+
case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
14559+
case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
14560+
unsigned BuiltinAtomicOp;
14561+
llvm::Type *ResultType = ConvertType(E->getType());
1451414562

14515-
if (isa<llvm::ConstantInt>(Order)) {
14516-
int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
14563+
switch (BuiltinID) {
14564+
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
14565+
case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
14566+
BuiltinAtomicOp = Intrinsic::amdgcn_atomic_inc;
14567+
break;
14568+
case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
14569+
case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
14570+
BuiltinAtomicOp = Intrinsic::amdgcn_atomic_dec;
14571+
break;
14572+
}
1451714573

14518-
// Map C11/C++11 memory ordering to LLVM memory ordering
14519-
switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
14520-
case llvm::AtomicOrderingCABI::acquire:
14521-
AO = llvm::AtomicOrdering::Acquire;
14522-
break;
14523-
case llvm::AtomicOrderingCABI::release:
14524-
AO = llvm::AtomicOrdering::Release;
14525-
break;
14526-
case llvm::AtomicOrderingCABI::acq_rel:
14527-
AO = llvm::AtomicOrdering::AcquireRelease;
14528-
break;
14529-
case llvm::AtomicOrderingCABI::seq_cst:
14530-
AO = llvm::AtomicOrdering::SequentiallyConsistent;
14531-
break;
14532-
case llvm::AtomicOrderingCABI::consume: // not supported by LLVM fence
14533-
case llvm::AtomicOrderingCABI::relaxed: // not supported by LLVM fence
14534-
break;
14535-
}
14574+
Value *Ptr = EmitScalarExpr(E->getArg(0));
14575+
Value *Val = EmitScalarExpr(E->getArg(1));
1453614576

14537-
StringRef scp;
14538-
llvm::getConstantStringInfo(Scope, scp);
14539-
SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
14577+
llvm::Function *F =
14578+
CGM.getIntrinsic(BuiltinAtomicOp, {ResultType, Ptr->getType()});
1454014579

14541-
return Builder.CreateFence(AO, SSID);
14580+
if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
14581+
EmitScalarExpr(E->getArg(3)), AO, SSID)) {
14582+
14583+
// llvm.amdgcn.atomic.inc and llvm.amdgcn.atomic.dec expects ordering and
14584+
// scope as unsigned values
14585+
Value *MemOrder = Builder.getInt32(static_cast<int>(AO));
14586+
Value *MemScope = Builder.getInt32(static_cast<int>(SSID));
14587+
14588+
QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
14589+
bool Volatile =
14590+
PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
14591+
Value *IsVolatile = Builder.getInt1(static_cast<bool>(Volatile));
14592+
14593+
return Builder.CreateCall(F, {Ptr, Val, MemOrder, MemScope, IsVolatile});
1454214594
}
1454314595
LLVM_FALLTHROUGH;
1454414596
}

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3988,6 +3988,9 @@ class CodeGenFunction : public CodeGenTypeCache {
39883988
llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
39893989
const CallExpr *E);
39903990
llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
3991+
bool ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope,
3992+
llvm::AtomicOrdering &AO,
3993+
llvm::SyncScope::ID &SSID);
39913994

39923995
private:
39933996
enum class MSVCIntrin;

clang/lib/Sema/SemaChecking.cpp

Lines changed: 47 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3070,41 +3070,56 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
30703070

30713071
bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
30723072
CallExpr *TheCall) {
3073+
// position of memory order and scope arguments in the builtin
3074+
unsigned OrderIndex, ScopeIndex;
30733075
switch (BuiltinID) {
3074-
case AMDGPU::BI__builtin_amdgcn_fence: {
3075-
ExprResult Arg = TheCall->getArg(0);
3076-
auto ArgExpr = Arg.get();
3077-
Expr::EvalResult ArgResult;
3078-
3079-
if (!ArgExpr->EvaluateAsInt(ArgResult, Context))
3080-
return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
3081-
<< ArgExpr->getType();
3082-
int ord = ArgResult.Val.getInt().getZExtValue();
3083-
3084-
// Check valididty of memory ordering as per C11 / C++11's memody model.
3085-
switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
3086-
case llvm::AtomicOrderingCABI::acquire:
3087-
case llvm::AtomicOrderingCABI::release:
3088-
case llvm::AtomicOrderingCABI::acq_rel:
3089-
case llvm::AtomicOrderingCABI::seq_cst:
3090-
break;
3091-
default: {
3092-
return Diag(ArgExpr->getBeginLoc(),
3093-
diag::warn_atomic_op_has_invalid_memory_order)
3094-
<< ArgExpr->getSourceRange();
3095-
}
3096-
}
3076+
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
3077+
case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
3078+
case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
3079+
case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
3080+
OrderIndex = 2;
3081+
ScopeIndex = 3;
3082+
break;
3083+
case AMDGPU::BI__builtin_amdgcn_fence:
3084+
OrderIndex = 0;
3085+
ScopeIndex = 1;
3086+
break;
3087+
default:
3088+
return false;
3089+
}
30973090

3098-
Arg = TheCall->getArg(1);
3099-
ArgExpr = Arg.get();
3100-
Expr::EvalResult ArgResult1;
3101-
// Check that sync scope is a constant literal
3102-
if (!ArgExpr->EvaluateAsConstantExpr(ArgResult1, Expr::EvaluateForCodeGen,
3103-
Context))
3104-
return Diag(ArgExpr->getExprLoc(), diag::err_expr_not_string_literal)
3105-
<< ArgExpr->getType();
3106-
} break;
3091+
ExprResult Arg = TheCall->getArg(OrderIndex);
3092+
auto ArgExpr = Arg.get();
3093+
Expr::EvalResult ArgResult;
3094+
3095+
if (!ArgExpr->EvaluateAsInt(ArgResult, Context))
3096+
return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
3097+
<< ArgExpr->getType();
3098+
int ord = ArgResult.Val.getInt().getZExtValue();
3099+
3100+
// Check valididty of memory ordering as per C11 / C++11's memody model.
3101+
switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
3102+
case llvm::AtomicOrderingCABI::acquire:
3103+
case llvm::AtomicOrderingCABI::release:
3104+
case llvm::AtomicOrderingCABI::acq_rel:
3105+
case llvm::AtomicOrderingCABI::seq_cst:
3106+
break;
3107+
default: {
3108+
return Diag(ArgExpr->getBeginLoc(),
3109+
diag::warn_atomic_op_has_invalid_memory_order)
3110+
<< ArgExpr->getSourceRange();
3111+
}
31073112
}
3113+
3114+
Arg = TheCall->getArg(ScopeIndex);
3115+
ArgExpr = Arg.get();
3116+
Expr::EvalResult ArgResult1;
3117+
// Check that sync scope is a constant literal
3118+
if (!ArgExpr->EvaluateAsConstantExpr(ArgResult1, Expr::EvaluateForCodeGen,
3119+
Context))
3120+
return Diag(ArgExpr->getExprLoc(), diag::err_expr_not_string_literal)
3121+
<< ArgExpr->getType();
3122+
31083123
return false;
31093124
}
31103125

0 commit comments

Comments
 (0)