Skip to content

Commit f85c1f3

Browse files
committed
[OpenMP] Replace __ATOMIC_XYZ with atomic::xyz for style
Also fixes one ordering argument not used. Differential Revision: https://reviews.llvm.org/D135035
1 parent a955711 commit f85c1f3

File tree

4 files changed

+108
-94
lines changed

4 files changed

+108
-94
lines changed

openmp/libomptarget/DeviceRTL/include/Synchronization.h

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -44,38 +44,46 @@ __attribute__((noinline)) void threadsAligned();
4444

4545
} // namespace synchronize
4646

47-
namespace fence {
48-
49-
/// Memory fence with \p Ordering semantics for the team.
50-
void team(int Ordering);
51-
52-
/// Memory fence with \p Ordering semantics for the contention group.
53-
void kernel(int Ordering);
54-
55-
/// Memory fence with \p Ordering semantics for the system.
56-
void system(int Ordering);
57-
58-
} // namespace fence
59-
6047
namespace atomic {
6148

49+
enum OrderingTy {
50+
relaxed = __ATOMIC_RELAXED,
51+
aquire = __ATOMIC_ACQUIRE,
52+
release = __ATOMIC_RELEASE,
53+
acq_rel = __ATOMIC_ACQ_REL,
54+
seq_cst = __ATOMIC_SEQ_CST,
55+
};
56+
6257
/// Atomically load \p Addr with \p Ordering semantics.
63-
uint32_t load(uint32_t *Addr, int Ordering);
58+
uint32_t load(uint32_t *Addr, atomic::OrderingTy Ordering);
6459

6560
/// Atomically store \p V to \p Addr with \p Ordering semantics.
66-
void store(uint32_t *Addr, uint32_t V, int Ordering);
61+
void store(uint32_t *Addr, uint32_t V, atomic::OrderingTy Ordering);
6762

6863
/// Atomically increment \p *Addr and wrap at \p V with \p Ordering semantics.
69-
uint32_t inc(uint32_t *Addr, uint32_t V, int Ordering);
64+
uint32_t inc(uint32_t *Addr, uint32_t V, atomic::OrderingTy Ordering);
7065

7166
/// Atomically add \p V to \p *Addr with \p Ordering semantics.
72-
uint32_t add(uint32_t *Addr, uint32_t V, int Ordering);
67+
uint32_t add(uint32_t *Addr, uint32_t V, atomic::OrderingTy Ordering);
7368

7469
/// Atomically add \p V to \p *Addr with \p Ordering semantics.
75-
uint64_t add(uint64_t *Addr, uint64_t V, int Ordering);
70+
uint64_t add(uint64_t *Addr, uint64_t V, atomic::OrderingTy Ordering);
7671

7772
} // namespace atomic
7873

74+
namespace fence {
75+
76+
/// Memory fence with \p Ordering semantics for the team.
77+
void team(atomic::OrderingTy Ordering);
78+
79+
/// Memory fence with \p Ordering semantics for the contention group.
80+
void kernel(atomic::OrderingTy Ordering);
81+
82+
/// Memory fence with \p Ordering semantics for the system.
83+
void system(atomic::OrderingTy Ordering);
84+
85+
} // namespace fence
86+
7987
} // namespace _OMP
8088

8189
#endif

openmp/libomptarget/DeviceRTL/src/Reduction.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
211211
// to the number of slots in the buffer.
212212
bool IsMaster = (ThreadId == 0);
213213
while (IsMaster) {
214-
Bound = atomic::load(&IterCnt, __ATOMIC_SEQ_CST);
214+
Bound = atomic::load(&IterCnt, atomic::seq_cst);
215215
if (TeamId < Bound + num_of_records)
216216
break;
217217
}
@@ -223,12 +223,12 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
223223
} else
224224
lgredFct(GlobalBuffer, ModBockId, reduce_data);
225225

226-
fence::system(__ATOMIC_SEQ_CST);
226+
fence::system(atomic::seq_cst);
227227

228228
// Increment team counter.
229229
// This counter is incremented by all teams in the current
230230
// BUFFER_SIZE chunk.
231-
ChunkTeamCount = atomic::inc(&Cnt, num_of_records - 1u, __ATOMIC_SEQ_CST);
231+
ChunkTeamCount = atomic::inc(&Cnt, num_of_records - 1u, atomic::seq_cst);
232232
}
233233
// Synchronize
234234
if (mapping::isSPMDMode())
@@ -304,7 +304,7 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
304304
if (IsMaster && ChunkTeamCount == num_of_records - 1) {
305305
// Allow SIZE number of teams to proceed writing their
306306
// intermediate results to the global buffer.
307-
atomic::add(&IterCnt, uint32_t(num_of_records), __ATOMIC_SEQ_CST);
307+
atomic::add(&IterCnt, uint32_t(num_of_records), atomic::seq_cst);
308308
}
309309

310310
return 0;

openmp/libomptarget/DeviceRTL/src/Synchronization.cpp

Lines changed: 76 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -29,47 +29,52 @@ namespace impl {
2929
///
3030
///{
3131
/// NOTE: This function needs to be implemented by every target.
32-
uint32_t atomicInc(uint32_t *Address, uint32_t Val, int Ordering);
32+
uint32_t atomicInc(uint32_t *Address, uint32_t Val,
33+
atomic::OrderingTy Ordering);
3334

34-
uint32_t atomicLoad(uint32_t *Address, int Ordering) {
35-
return __atomic_fetch_add(Address, 0U, __ATOMIC_SEQ_CST);
35+
uint32_t atomicLoad(uint32_t *Address, atomic::OrderingTy Ordering) {
36+
return __atomic_fetch_add(Address, 0U, Ordering);
3637
}
3738

38-
void atomicStore(uint32_t *Address, uint32_t Val, int Ordering) {
39+
void atomicStore(uint32_t *Address, uint32_t Val, atomic::OrderingTy Ordering) {
3940
__atomic_store_n(Address, Val, Ordering);
4041
}
4142

42-
uint32_t atomicAdd(uint32_t *Address, uint32_t Val, int Ordering) {
43+
uint32_t atomicAdd(uint32_t *Address, uint32_t Val,
44+
atomic::OrderingTy Ordering) {
4345
return __atomic_fetch_add(Address, Val, Ordering);
4446
}
45-
uint32_t atomicMax(uint32_t *Address, uint32_t Val, int Ordering) {
47+
uint32_t atomicMax(uint32_t *Address, uint32_t Val,
48+
atomic::OrderingTy Ordering) {
4649
return __atomic_fetch_max(Address, Val, Ordering);
4750
}
4851

49-
uint32_t atomicExchange(uint32_t *Address, uint32_t Val, int Ordering) {
52+
uint32_t atomicExchange(uint32_t *Address, uint32_t Val,
53+
atomic::OrderingTy Ordering) {
5054
uint32_t R;
5155
__atomic_exchange(Address, &Val, &R, Ordering);
5256
return R;
5357
}
5458
uint32_t atomicCAS(uint32_t *Address, uint32_t Compare, uint32_t Val,
55-
int Ordering) {
59+
atomic::OrderingTy Ordering) {
5660
(void)__atomic_compare_exchange(Address, &Compare, &Val, false, Ordering,
5761
Ordering);
5862
return Compare;
5963
}
6064

61-
uint64_t atomicAdd(uint64_t *Address, uint64_t Val, int Ordering) {
65+
uint64_t atomicAdd(uint64_t *Address, uint64_t Val,
66+
atomic::OrderingTy Ordering) {
6267
return __atomic_fetch_add(Address, Val, Ordering);
6368
}
6469
///}
6570

6671
// Forward declarations defined to be defined for AMDGCN and NVPTX.
67-
uint32_t atomicInc(uint32_t *A, uint32_t V, int Ordering);
72+
uint32_t atomicInc(uint32_t *A, uint32_t V, atomic::OrderingTy Ordering);
6873
void namedBarrierInit();
6974
void namedBarrier();
70-
void fenceTeam(int Ordering);
71-
void fenceKernel(int Ordering);
72-
void fenceSystem(int Ordering);
75+
void fenceTeam(atomic::OrderingTy Ordering);
76+
void fenceKernel(atomic::OrderingTy Ordering);
77+
void fenceSystem(atomic::OrderingTy Ordering);
7378
void syncWarp(__kmpc_impl_lanemask_t);
7479
void syncThreads();
7580
void syncThreadsAligned() { syncThreads(); }
@@ -84,30 +89,30 @@ void setLock(omp_lock_t *);
8489
///{
8590
#pragma omp begin declare variant match(device = {arch(amdgcn)})
8691

87-
uint32_t atomicInc(uint32_t *A, uint32_t V, int Ordering) {
92+
uint32_t atomicInc(uint32_t *A, uint32_t V, atomic::OrderingTy Ordering) {
8893
// builtin_amdgcn_atomic_inc32 should expand to this switch when
8994
// passed a runtime value, but does not do so yet. Workaround here.
9095
switch (Ordering) {
9196
default:
9297
__builtin_unreachable();
93-
case __ATOMIC_RELAXED:
94-
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELAXED, "");
95-
case __ATOMIC_ACQUIRE:
96-
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQUIRE, "");
97-
case __ATOMIC_RELEASE:
98-
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELEASE, "");
99-
case __ATOMIC_ACQ_REL:
100-
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQ_REL, "");
101-
case __ATOMIC_SEQ_CST:
102-
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_SEQ_CST, "");
98+
case atomic::relaxed:
99+
return __builtin_amdgcn_atomic_inc32(A, V, atomic::relaxed, "");
100+
case atomic::aquire:
101+
return __builtin_amdgcn_atomic_inc32(A, V, atomic::aquire, "");
102+
case atomic::release:
103+
return __builtin_amdgcn_atomic_inc32(A, V, atomic::release, "");
104+
case atomic::acq_rel:
105+
return __builtin_amdgcn_atomic_inc32(A, V, atomic::acq_rel, "");
106+
case atomic::seq_cst:
107+
return __builtin_amdgcn_atomic_inc32(A, V, atomic::seq_cst, "");
103108
}
104109
}
105110

106111
uint32_t SHARED(namedBarrierTracker);
107112

108113
void namedBarrierInit() {
109114
// Don't have global ctors, and shared memory is not zero init
110-
atomic::store(&namedBarrierTracker, 0u, __ATOMIC_RELEASE);
115+
atomic::store(&namedBarrierTracker, 0u, atomic::release);
111116
}
112117

113118
void namedBarrier() {
@@ -117,7 +122,7 @@ void namedBarrier() {
117122
uint32_t WarpSize = mapping::getWarpSize();
118123
uint32_t NumWaves = NumThreads / WarpSize;
119124

120-
fence::team(__ATOMIC_ACQUIRE);
125+
fence::team(atomic::aquire);
121126

122127
// named barrier implementation for amdgcn.
123128
// Uses two 16 bit unsigned counters. One for the number of waves to have
@@ -133,7 +138,7 @@ void namedBarrier() {
133138
// Increment the low 16 bits once, using the lowest active thread.
134139
if (mapping::isLeaderInWarp()) {
135140
uint32_t load = atomic::add(&namedBarrierTracker, 1,
136-
__ATOMIC_RELAXED); // commutative
141+
atomic::relaxed); // commutative
137142

138143
// Record the number of times the barrier has been passed
139144
uint32_t generation = load & 0xffff0000u;
@@ -145,61 +150,61 @@ void namedBarrier() {
145150
load &= 0xffff0000u; // because bits zeroed second
146151

147152
// Reset the wave counter and release the waiting waves
148-
atomic::store(&namedBarrierTracker, load, __ATOMIC_RELAXED);
153+
atomic::store(&namedBarrierTracker, load, atomic::relaxed);
149154
} else {
150155
// more waves still to go, spin until generation counter changes
151156
do {
152157
__builtin_amdgcn_s_sleep(0);
153-
load = atomic::load(&namedBarrierTracker, __ATOMIC_RELAXED);
158+
load = atomic::load(&namedBarrierTracker, atomic::relaxed);
154159
} while ((load & 0xffff0000u) == generation);
155160
}
156161
}
157-
fence::team(__ATOMIC_RELEASE);
162+
fence::team(atomic::release);
158163
}
159164

160165
// sema checking of amdgcn_fence is aggressive. Intention is to patch clang
161166
// so that it is usable within a template environment and so that a runtime
162167
// value of the memory order is expanded to this switch within clang/llvm.
163-
void fenceTeam(int Ordering) {
168+
void fenceTeam(atomic::OrderingTy Ordering) {
164169
switch (Ordering) {
165170
default:
166171
__builtin_unreachable();
167-
case __ATOMIC_ACQUIRE:
168-
return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
169-
case __ATOMIC_RELEASE:
170-
return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup");
171-
case __ATOMIC_ACQ_REL:
172-
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "workgroup");
173-
case __ATOMIC_SEQ_CST:
174-
return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
172+
case atomic::aquire:
173+
return __builtin_amdgcn_fence(atomic::aquire, "workgroup");
174+
case atomic::release:
175+
return __builtin_amdgcn_fence(atomic::release, "workgroup");
176+
case atomic::acq_rel:
177+
return __builtin_amdgcn_fence(atomic::acq_rel, "workgroup");
178+
case atomic::seq_cst:
179+
return __builtin_amdgcn_fence(atomic::seq_cst, "workgroup");
175180
}
176181
}
177-
void fenceKernel(int Ordering) {
182+
void fenceKernel(atomic::OrderingTy Ordering) {
178183
switch (Ordering) {
179184
default:
180185
__builtin_unreachable();
181-
case __ATOMIC_ACQUIRE:
182-
return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
183-
case __ATOMIC_RELEASE:
184-
return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent");
185-
case __ATOMIC_ACQ_REL:
186-
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "agent");
187-
case __ATOMIC_SEQ_CST:
188-
return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
186+
case atomic::aquire:
187+
return __builtin_amdgcn_fence(atomic::aquire, "agent");
188+
case atomic::release:
189+
return __builtin_amdgcn_fence(atomic::release, "agent");
190+
case atomic::acq_rel:
191+
return __builtin_amdgcn_fence(atomic::acq_rel, "agent");
192+
case atomic::seq_cst:
193+
return __builtin_amdgcn_fence(atomic::seq_cst, "agent");
189194
}
190195
}
191-
void fenceSystem(int Ordering) {
196+
void fenceSystem(atomic::OrderingTy Ordering) {
192197
switch (Ordering) {
193198
default:
194199
__builtin_unreachable();
195-
case __ATOMIC_ACQUIRE:
196-
return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "");
197-
case __ATOMIC_RELEASE:
198-
return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "");
199-
case __ATOMIC_ACQ_REL:
200-
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "");
201-
case __ATOMIC_SEQ_CST:
202-
return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
200+
case atomic::aquire:
201+
return __builtin_amdgcn_fence(atomic::aquire, "");
202+
case atomic::release:
203+
return __builtin_amdgcn_fence(atomic::release, "");
204+
case atomic::acq_rel:
205+
return __builtin_amdgcn_fence(atomic::acq_rel, "");
206+
case atomic::seq_cst:
207+
return __builtin_amdgcn_fence(atomic::seq_cst, "");
203208
}
204209
}
205210

@@ -226,7 +231,8 @@ void setLock(omp_lock_t *) { __builtin_trap(); }
226231
#pragma omp begin declare variant match( \
227232
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
228233

229-
uint32_t atomicInc(uint32_t *Address, uint32_t Val, int Ordering) {
234+
uint32_t atomicInc(uint32_t *Address, uint32_t Val,
235+
atomic::OrderingTy Ordering) {
230236
return __nvvm_atom_inc_gen_ui(Address, Val);
231237
}
232238

@@ -268,11 +274,11 @@ constexpr uint32_t SET = 1;
268274
// called before it is defined
269275
// here the overload won't happen. Investigate lalter!
270276
void unsetLock(omp_lock_t *Lock) {
271-
(void)atomicExchange((uint32_t *)Lock, UNSET, __ATOMIC_SEQ_CST);
277+
(void)atomicExchange((uint32_t *)Lock, UNSET, atomic::seq_cst);
272278
}
273279

274280
int testLock(omp_lock_t *Lock) {
275-
return atomicAdd((uint32_t *)Lock, 0u, __ATOMIC_SEQ_CST);
281+
return atomicAdd((uint32_t *)Lock, 0u, atomic::seq_cst);
276282
}
277283

278284
void initLock(omp_lock_t *Lock) { unsetLock(Lock); }
@@ -281,7 +287,7 @@ void destroyLock(omp_lock_t *Lock) { unsetLock(Lock); }
281287

282288
void setLock(omp_lock_t *Lock) {
283289
// TODO: not sure spinning is a good idea here..
284-
while (atomicCAS((uint32_t *)Lock, UNSET, SET, __ATOMIC_SEQ_CST) != UNSET) {
290+
while (atomicCAS((uint32_t *)Lock, UNSET, SET, atomic::seq_cst) != UNSET) {
285291
int32_t start = __nvvm_read_ptx_sreg_clock();
286292
int32_t now;
287293
for (;;) {
@@ -310,29 +316,29 @@ void synchronize::threads() { impl::syncThreads(); }
310316

311317
void synchronize::threadsAligned() { impl::syncThreadsAligned(); }
312318

313-
void fence::team(int Ordering) { impl::fenceTeam(Ordering); }
319+
void fence::team(atomic::OrderingTy Ordering) { impl::fenceTeam(Ordering); }
314320

315-
void fence::kernel(int Ordering) { impl::fenceKernel(Ordering); }
321+
void fence::kernel(atomic::OrderingTy Ordering) { impl::fenceKernel(Ordering); }
316322

317-
void fence::system(int Ordering) { impl::fenceSystem(Ordering); }
323+
void fence::system(atomic::OrderingTy Ordering) { impl::fenceSystem(Ordering); }
318324

319-
uint32_t atomic::load(uint32_t *Addr, int Ordering) {
325+
uint32_t atomic::load(uint32_t *Addr, atomic::OrderingTy Ordering) {
320326
return impl::atomicLoad(Addr, Ordering);
321327
}
322328

323-
void atomic::store(uint32_t *Addr, uint32_t V, int Ordering) {
329+
void atomic::store(uint32_t *Addr, uint32_t V, atomic::OrderingTy Ordering) {
324330
impl::atomicStore(Addr, V, Ordering);
325331
}
326332

327-
uint32_t atomic::inc(uint32_t *Addr, uint32_t V, int Ordering) {
333+
uint32_t atomic::inc(uint32_t *Addr, uint32_t V, atomic::OrderingTy Ordering) {
328334
return impl::atomicInc(Addr, V, Ordering);
329335
}
330336

331-
uint32_t atomic::add(uint32_t *Addr, uint32_t V, int Ordering) {
337+
uint32_t atomic::add(uint32_t *Addr, uint32_t V, atomic::OrderingTy Ordering) {
332338
return impl::atomicAdd(Addr, V, Ordering);
333339
}
334340

335-
uint64_t atomic::add(uint64_t *Addr, uint64_t V, int Ordering) {
341+
uint64_t atomic::add(uint64_t *Addr, uint64_t V, atomic::OrderingTy Ordering) {
336342
return impl::atomicAdd(Addr, V, Ordering);
337343
}
338344

@@ -389,7 +395,7 @@ void __kmpc_end_single(IdentTy *Loc, int32_t TId) {
389395

390396
void __kmpc_flush(IdentTy *Loc) {
391397
FunctionTracingRAII();
392-
fence::kernel(__ATOMIC_SEQ_CST);
398+
fence::kernel(atomic::seq_cst);
393399
}
394400

395401
uint64_t __kmpc_warp_active_thread_mask(void) {

0 commit comments

Comments
 (0)