Skip to content

Commit 4a38ae9

Browse files
jhuber6ronlieb
authored andcommitted
[OpenMP] Replace AMDGPU fences with generic scoped fences (llvm#119619)
Summary: This is simpler and more common. I would've replaced the CUDA uses and made this the same but currently it doesn't codegen these fences fully and just emits a full system wide barrier as a fallback. Change-Id: I00eb7a789779bce7ab5abc6fa3aedddf4d07ae87
1 parent b828378 commit 4a38ae9

File tree

2 files changed

+13
-39
lines changed

2 files changed

+13
-39
lines changed

offload/DeviceRTL/include/Synchronization.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@ enum OrderingTy {
2626
seq_cst = __ATOMIC_SEQ_CST,
2727
};
2828

29+
enum ScopeTy {
30+
system = __MEMORY_SCOPE_SYSTEM,
31+
device_ = __MEMORY_SCOPE_DEVICE,
32+
workgroup = __MEMORY_SCOPE_WRKGRP,
33+
wavefront = __MEMORY_SCOPE_WVFRNT,
34+
single = __MEMORY_SCOPE_SINGLE,
35+
};
36+
2937
enum MemScopeTy {
3038
all, // All threads on all devices
3139
device, // All threads on the device

offload/DeviceRTL/src/Synchronization.cpp

Lines changed: 5 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -298,50 +298,16 @@ void namedBarrier() {
298298
fence::team(atomic::release);
299299
}
300300

301-
// sema checking of amdgcn_fence is aggressive. Intention is to patch clang
302-
// so that it is usable within a template environment and so that a runtime
303-
// value of the memory order is expanded to this switch within clang/llvm.
304301
void fenceTeam(atomic::OrderingTy Ordering) {
305-
switch (Ordering) {
306-
default:
307-
__builtin_unreachable();
308-
case atomic::aquire:
309-
return __builtin_amdgcn_fence(atomic::aquire, "workgroup");
310-
case atomic::release:
311-
return __builtin_amdgcn_fence(atomic::release, "workgroup");
312-
case atomic::acq_rel:
313-
return __builtin_amdgcn_fence(atomic::acq_rel, "workgroup");
314-
case atomic::seq_cst:
315-
return __builtin_amdgcn_fence(atomic::seq_cst, "workgroup");
316-
}
302+
return __scoped_atomic_thread_fence(Ordering, atomic::workgroup);
317303
}
304+
318305
void fenceKernel(atomic::OrderingTy Ordering) {
319-
switch (Ordering) {
320-
default:
321-
__builtin_unreachable();
322-
case atomic::aquire:
323-
return __builtin_amdgcn_fence(atomic::aquire, "agent");
324-
case atomic::release:
325-
return __builtin_amdgcn_fence(atomic::release, "agent");
326-
case atomic::acq_rel:
327-
return __builtin_amdgcn_fence(atomic::acq_rel, "agent");
328-
case atomic::seq_cst:
329-
return __builtin_amdgcn_fence(atomic::seq_cst, "agent");
330-
}
306+
return __scoped_atomic_thread_fence(Ordering, atomic::device_);
331307
}
308+
332309
void fenceSystem(atomic::OrderingTy Ordering) {
333-
switch (Ordering) {
334-
default:
335-
__builtin_unreachable();
336-
case atomic::aquire:
337-
return __builtin_amdgcn_fence(atomic::aquire, "");
338-
case atomic::release:
339-
return __builtin_amdgcn_fence(atomic::release, "");
340-
case atomic::acq_rel:
341-
return __builtin_amdgcn_fence(atomic::acq_rel, "");
342-
case atomic::seq_cst:
343-
return __builtin_amdgcn_fence(atomic::seq_cst, "");
344-
}
310+
return __scoped_atomic_thread_fence(Ordering, atomic::system);
345311
}
346312

347313
void syncWarp(__kmpc_impl_lanemask_t) {

0 commit comments

Comments
 (0)