Skip to content

[Clang] Introduce scoped variants of GNU atomic functions #72280

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions clang/docs/LanguageExtensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3866,6 +3866,30 @@ builtin function, and are named with a ``__opencl_`` prefix. The macros
and ``__OPENCL_MEMORY_SCOPE_SUB_GROUP`` are provided, with values
corresponding to the enumerators of OpenCL's ``memory_scope`` enumeration.)

__scoped_atomic builtins
------------------------

Clang provides a set of atomics taking a memory scope argument. These atomics
are identical to the standard GNU / GCC atomic builtins but taking an extra
memory scope argument. These are designed to be a generic alternative to the
``__opencl_atomic_*`` builtin functions for targets that support atomic memory
scopes.

Atomic memory scopes are designed to assist optimizations for systems with
several levels of memory hierarchy like GPUs. The following memory scopes are
currently supported:

* ``__MEMORY_SCOPE_SYSTEM``
* ``__MEMORY_SCOPE_DEVICE``
* ``__MEMORY_SCOPE_WRKGRP``
* ``__MEMORY_SCOPE_WVFRNT``
* ``__MEMORY_SCOPE_SINGLE``

This controls whether or not the atomic operation is ordered with respect to the
whole system, the current device, an OpenCL workgroup, wavefront, or just a
single thread. If these are used on a target that does not support atomic
scopes, then they will behave exactly as the standard GNU atomic builtins.

Low-level ARM exclusive memory builtins
---------------------------------------

Expand Down
20 changes: 11 additions & 9 deletions clang/include/clang/AST/Expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -6498,7 +6498,7 @@ class AtomicExpr : public Expr {
return cast<Expr>(SubExprs[ORDER_FAIL]);
}
Expr *getVal2() const {
if (Op == AO__atomic_exchange)
if (Op == AO__atomic_exchange || Op == AO__scoped_atomic_exchange)
return cast<Expr>(SubExprs[ORDER_FAIL]);
assert(NumSubExprs > VAL2);
return cast<Expr>(SubExprs[VAL2]);
Expand Down Expand Up @@ -6539,7 +6539,9 @@ class AtomicExpr : public Expr {
getOp() == AO__opencl_atomic_compare_exchange_weak ||
getOp() == AO__hip_atomic_compare_exchange_weak ||
getOp() == AO__atomic_compare_exchange ||
getOp() == AO__atomic_compare_exchange_n;
getOp() == AO__atomic_compare_exchange_n ||
getOp() == AO__scoped_atomic_compare_exchange ||
getOp() == AO__scoped_atomic_compare_exchange_n;
}

bool isOpenCL() const {
Expand Down Expand Up @@ -6569,13 +6571,13 @@ class AtomicExpr : public Expr {
/// \return empty atomic scope model if the atomic op code does not have
/// scope operand.
static std::unique_ptr<AtomicScopeModel> getScopeModel(AtomicOp Op) {
auto Kind =
(Op >= AO__opencl_atomic_load && Op <= AO__opencl_atomic_fetch_max)
? AtomicScopeModelKind::OpenCL
: (Op >= AO__hip_atomic_load && Op <= AO__hip_atomic_fetch_max)
? AtomicScopeModelKind::HIP
: AtomicScopeModelKind::None;
return AtomicScopeModel::create(Kind);
if (Op >= AO__opencl_atomic_load && Op <= AO__opencl_atomic_fetch_max)
return AtomicScopeModel::create(AtomicScopeModelKind::OpenCL);
else if (Op >= AO__hip_atomic_load && Op <= AO__hip_atomic_fetch_max)
return AtomicScopeModel::create(AtomicScopeModelKind::HIP);
else if (Op >= AO__scoped_atomic_load && Op <= AO__scoped_atomic_fetch_max)
return AtomicScopeModel::create(AtomicScopeModelKind::Generic);
return AtomicScopeModel::create(AtomicScopeModelKind::None);
}

/// Get atomic scope model.
Expand Down
26 changes: 26 additions & 0 deletions clang/include/clang/Basic/Builtins.def
Original file line number Diff line number Diff line change
Expand Up @@ -904,6 +904,32 @@ BUILTIN(__atomic_signal_fence, "vi", "n")
BUILTIN(__atomic_always_lock_free, "bzvCD*", "nE")
BUILTIN(__atomic_is_lock_free, "bzvCD*", "nE")

// GNU atomic builtins with atomic scopes.
ATOMIC_BUILTIN(__scoped_atomic_load, "v.", "t")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it would be better to preserve __atomic as the prefix, and move the _scope part to the name suffix

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Naming things is hard, we could do

__atomic_scoped_load
__scoped_atomic_load
__atomic_load_scoped

Unsure which is the best.

ATOMIC_BUILTIN(__scoped_atomic_load_n, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_store, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_store_n, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_exchange, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_exchange_n, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_compare_exchange, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_compare_exchange_n, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_fetch_add, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_fetch_sub, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_fetch_and, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_fetch_or, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_fetch_xor, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_fetch_nand, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_add_fetch, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_sub_fetch, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_and_fetch, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_or_fetch, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_xor_fetch, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_max_fetch, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_min_fetch, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_nand_fetch, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_fetch_min, "v.", "t")
ATOMIC_BUILTIN(__scoped_atomic_fetch_max, "v.", "t")

// OpenCL 2.0 atomic builtins.
ATOMIC_BUILTIN(__opencl_atomic_init, "v.", "t")
ATOMIC_BUILTIN(__opencl_atomic_load, "v.", "t")
Expand Down
69 changes: 68 additions & 1 deletion clang/include/clang/Basic/SyncScope.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ namespace clang {
/// Update getAsString.
///
enum class SyncScope {
SystemScope,
DeviceScope,
WorkgroupScope,
WavefrontScope,
SingleScope,
HIPSingleThread,
HIPWavefront,
HIPWorkgroup,
Expand All @@ -54,6 +59,16 @@ enum class SyncScope {

inline llvm::StringRef getAsString(SyncScope S) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this is a pre-existing problem, but why don't these just match the backend string names?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's because this is for AST printing purposes, while the backend strings vary per target.

switch (S) {
case SyncScope::SystemScope:
return "system_scope";
case SyncScope::DeviceScope:
return "device_scope";
case SyncScope::WorkgroupScope:
return "workgroup_scope";
case SyncScope::WavefrontScope:
return "wavefront_scope";
case SyncScope::SingleScope:
return "single_scope";
case SyncScope::HIPSingleThread:
return "hip_singlethread";
case SyncScope::HIPWavefront:
Expand All @@ -77,7 +92,7 @@ inline llvm::StringRef getAsString(SyncScope S) {
}

/// Defines the kind of atomic scope models.
enum class AtomicScopeModelKind { None, OpenCL, HIP };
enum class AtomicScopeModelKind { None, OpenCL, HIP, Generic };

/// Defines the interface for synch scope model.
class AtomicScopeModel {
Expand Down Expand Up @@ -205,6 +220,56 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
}
};

/// Defines the generic atomic scope model.
class AtomicScopeGenericModel : public AtomicScopeModel {
public:
/// The enum values match predefined built-in macros __ATOMIC_SCOPE_*.
enum ID {
System = 0,
Device = 1,
Workgroup = 2,
Wavefront = 3,
Single = 4,
Last = Single
};

AtomicScopeGenericModel() = default;

SyncScope map(unsigned S) const override {
switch (static_cast<ID>(S)) {
case Device:
return SyncScope::DeviceScope;
case System:
return SyncScope::SystemScope;
case Workgroup:
return SyncScope::WorkgroupScope;
case Wavefront:
return SyncScope::WavefrontScope;
case Single:
return SyncScope::SingleScope;
}
llvm_unreachable("Invalid language sync scope value");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it would be better to just assume anything else is system?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mostly just copying the existing code for this, but we have semantic checks to ensure that the value is valid. So there's no chance that a user will actually get to specify anything different from the macros provided.

}

bool isValid(unsigned S) const override {
return S >= static_cast<unsigned>(System) &&
S <= static_cast<unsigned>(Last);
}

ArrayRef<unsigned> getRuntimeValues() const override {
static_assert(Last == Single, "Does not include all sync scopes");
static const unsigned Scopes[] = {
static_cast<unsigned>(Device), static_cast<unsigned>(System),
static_cast<unsigned>(Workgroup), static_cast<unsigned>(Wavefront),
static_cast<unsigned>(Single)};
return llvm::ArrayRef(Scopes);
}

unsigned getFallBackValue() const override {
return static_cast<unsigned>(System);
}
};

inline std::unique_ptr<AtomicScopeModel>
AtomicScopeModel::create(AtomicScopeModelKind K) {
switch (K) {
Expand All @@ -214,6 +279,8 @@ AtomicScopeModel::create(AtomicScopeModelKind K) {
return std::make_unique<AtomicScopeOpenCLModel>();
case AtomicScopeModelKind::HIP:
return std::make_unique<AtomicScopeHIPModel>();
case AtomicScopeModelKind::Generic:
return std::make_unique<AtomicScopeGenericModel>();
}
llvm_unreachable("Invalid atomic scope model kind");
}
Expand Down
26 changes: 26 additions & 0 deletions clang/lib/AST/Expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4887,6 +4887,7 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
case AO__atomic_load_n:
return 2;

case AO__scoped_atomic_load_n:
case AO__opencl_atomic_load:
case AO__hip_atomic_load:
case AO__c11_atomic_store:
Expand Down Expand Up @@ -4921,6 +4922,26 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
case AO__atomic_fetch_max:
return 3;

case AO__scoped_atomic_load:
case AO__scoped_atomic_store:
case AO__scoped_atomic_store_n:
case AO__scoped_atomic_fetch_add:
case AO__scoped_atomic_fetch_sub:
case AO__scoped_atomic_fetch_and:
case AO__scoped_atomic_fetch_or:
case AO__scoped_atomic_fetch_xor:
case AO__scoped_atomic_fetch_nand:
case AO__scoped_atomic_add_fetch:
case AO__scoped_atomic_sub_fetch:
case AO__scoped_atomic_and_fetch:
case AO__scoped_atomic_or_fetch:
case AO__scoped_atomic_xor_fetch:
case AO__scoped_atomic_nand_fetch:
case AO__scoped_atomic_min_fetch:
case AO__scoped_atomic_max_fetch:
case AO__scoped_atomic_fetch_min:
case AO__scoped_atomic_fetch_max:
case AO__scoped_atomic_exchange_n:
case AO__hip_atomic_exchange:
case AO__hip_atomic_fetch_add:
case AO__hip_atomic_fetch_sub:
Expand All @@ -4942,6 +4963,7 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
case AO__atomic_exchange:
return 4;

case AO__scoped_atomic_exchange:
case AO__c11_atomic_compare_exchange_strong:
case AO__c11_atomic_compare_exchange_weak:
return 5;
Expand All @@ -4952,6 +4974,10 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
case AO__atomic_compare_exchange:
case AO__atomic_compare_exchange_n:
return 6;

case AO__scoped_atomic_compare_exchange:
case AO__scoped_atomic_compare_exchange_n:
return 7;
}
llvm_unreachable("unknown atomic op");
}
Expand Down
1 change: 1 addition & 0 deletions clang/lib/AST/StmtPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1841,6 +1841,7 @@ void StmtPrinter::VisitAtomicExpr(AtomicExpr *Node) {
PrintExpr(Node->getPtr());
if (Node->getOp() != AtomicExpr::AO__c11_atomic_load &&
Node->getOp() != AtomicExpr::AO__atomic_load_n &&
Node->getOp() != AtomicExpr::AO__scoped_atomic_load_n &&
Node->getOp() != AtomicExpr::AO__opencl_atomic_load &&
Node->getOp() != AtomicExpr::AO__hip_atomic_load) {
OS << ", ";
Expand Down
Loading