Skip to content

Commit 219355d

Browse files
authored
[Libomptarget] Use scoped atomics in the device runtime (#75834)
Summary: A recent patch allowed us to easily replace GNU atomics with scoped variants that make use of the backend's handling for more permissive scopes. The default is full "system" scope, that means the atomic operation must be consistent with operations that may happen on the host's memory. This is generally only required for processes that are communicating with something via global fine-grained memory. This patch uses these atomics to make everything device scoped, as nothing in the OpenMP runtime should depend on the host. This is only provided as a very new clang extension but the DeviceRTL is only compiled with clang so it is always available.
1 parent 78a195e commit 219355d

File tree

1 file changed

+17
-10
lines changed

1 file changed

+17
-10
lines changed

openmp/libomptarget/DeviceRTL/src/Synchronization.cpp

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ uint32_t atomicInc(uint32_t *Address, uint32_t Val, atomic::OrderingTy Ordering,
3434

3535
template <typename Ty>
3636
Ty atomicAdd(Ty *Address, Ty Val, atomic::OrderingTy Ordering) {
37-
return __atomic_fetch_add(Address, Val, Ordering);
37+
return __scoped_atomic_fetch_add(Address, Val, Ordering,
38+
__MEMORY_SCOPE_DEVICE);
3839
}
3940

4041
template <typename Ty>
@@ -56,25 +57,28 @@ template <typename Ty> Ty atomicLoad(Ty *Address, atomic::OrderingTy Ordering) {
5657

5758
template <typename Ty>
5859
void atomicStore(Ty *Address, Ty Val, atomic::OrderingTy Ordering) {
59-
__atomic_store_n(Address, Val, Ordering);
60+
__scoped_atomic_store_n(Address, Val, Ordering, __MEMORY_SCOPE_DEVICE);
6061
}
6162

6263
template <typename Ty>
6364
bool atomicCAS(Ty *Address, Ty ExpectedV, Ty DesiredV,
6465
atomic::OrderingTy OrderingSucc,
6566
atomic::OrderingTy OrderingFail) {
66-
return __atomic_compare_exchange(Address, &ExpectedV, &DesiredV, false,
67-
OrderingSucc, OrderingFail);
67+
return __scoped_atomic_compare_exchange(Address, &ExpectedV, &DesiredV, false,
68+
OrderingSucc, OrderingFail,
69+
__MEMORY_SCOPE_DEVICE);
6870
}
6971

7072
template <typename Ty>
7173
Ty atomicMin(Ty *Address, Ty Val, atomic::OrderingTy Ordering) {
72-
return __atomic_fetch_min(Address, Val, Ordering);
74+
return __scoped_atomic_fetch_min(Address, Val, Ordering,
75+
__MEMORY_SCOPE_DEVICE);
7376
}
7477

7578
template <typename Ty>
7679
Ty atomicMax(Ty *Address, Ty Val, atomic::OrderingTy Ordering) {
77-
return __atomic_fetch_max(Address, Val, Ordering);
80+
return __scoped_atomic_fetch_max(Address, Val, Ordering,
81+
__MEMORY_SCOPE_DEVICE);
7882
}
7983

8084
// TODO: Implement this with __atomic_fetch_max and remove the duplication.
@@ -94,23 +98,26 @@ Ty atomicMaxFP(Ty *Address, Ty Val, atomic::OrderingTy Ordering) {
9498

9599
template <typename Ty>
96100
Ty atomicOr(Ty *Address, Ty Val, atomic::OrderingTy Ordering) {
97-
return __atomic_fetch_or(Address, Val, Ordering);
101+
return __scoped_atomic_fetch_or(Address, Val, Ordering,
102+
__MEMORY_SCOPE_DEVICE);
98103
}
99104

100105
template <typename Ty>
101106
Ty atomicAnd(Ty *Address, Ty Val, atomic::OrderingTy Ordering) {
102-
return __atomic_fetch_and(Address, Val, Ordering);
107+
return __scoped_atomic_fetch_and(Address, Val, Ordering,
108+
__MEMORY_SCOPE_DEVICE);
103109
}
104110

105111
template <typename Ty>
106112
Ty atomicXOr(Ty *Address, Ty Val, atomic::OrderingTy Ordering) {
107-
return __atomic_fetch_xor(Address, Val, Ordering);
113+
return __scoped_atomic_fetch_xor(Address, Val, Ordering,
114+
__MEMORY_SCOPE_DEVICE);
108115
}
109116

110117
uint32_t atomicExchange(uint32_t *Address, uint32_t Val,
111118
atomic::OrderingTy Ordering) {
112119
uint32_t R;
113-
__atomic_exchange(Address, &Val, &R, Ordering);
120+
__scoped_atomic_exchange(Address, &Val, &R, Ordering, __MEMORY_SCOPE_DEVICE);
114121
return R;
115122
}
116123
///}

0 commit comments

Comments
 (0)