Skip to content

Commit f4ee5a6

Browse files
authored
[OpenMP] Replace AMDGPU fences with generic scoped fences (#119619)
Summary: This is simpler and more common. I would've replaced the CUDA uses and made this the same but currently it doesn't codegen these fences fully and just emits a full system wide barrier as a fallback.
1 parent 86779da commit f4ee5a6

File tree

2 files changed

+13
-39
lines changed

2 files changed

+13
-39
lines changed

offload/DeviceRTL/include/Synchronization.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@ enum OrderingTy {
2626
seq_cst = __ATOMIC_SEQ_CST,
2727
};
2828

29+
enum ScopeTy {
30+
system = __MEMORY_SCOPE_SYSTEM,
31+
device_ = __MEMORY_SCOPE_DEVICE,
32+
workgroup = __MEMORY_SCOPE_WRKGRP,
33+
wavefront = __MEMORY_SCOPE_WVFRNT,
34+
single = __MEMORY_SCOPE_SINGLE,
35+
};
36+
2937
enum MemScopeTy {
3038
all, // All threads on all devices
3139
device, // All threads on the device

offload/DeviceRTL/src/Synchronization.cpp

Lines changed: 5 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -232,50 +232,16 @@ void namedBarrier() {
232232
fence::team(atomic::release);
233233
}
234234

235-
// sema checking of amdgcn_fence is aggressive. Intention is to patch clang
236-
// so that it is usable within a template environment and so that a runtime
237-
// value of the memory order is expanded to this switch within clang/llvm.
238235
void fenceTeam(atomic::OrderingTy Ordering) {
239-
switch (Ordering) {
240-
default:
241-
__builtin_unreachable();
242-
case atomic::aquire:
243-
return __builtin_amdgcn_fence(atomic::aquire, "workgroup");
244-
case atomic::release:
245-
return __builtin_amdgcn_fence(atomic::release, "workgroup");
246-
case atomic::acq_rel:
247-
return __builtin_amdgcn_fence(atomic::acq_rel, "workgroup");
248-
case atomic::seq_cst:
249-
return __builtin_amdgcn_fence(atomic::seq_cst, "workgroup");
250-
}
236+
return __scoped_atomic_thread_fence(Ordering, atomic::workgroup);
251237
}
238+
252239
void fenceKernel(atomic::OrderingTy Ordering) {
253-
switch (Ordering) {
254-
default:
255-
__builtin_unreachable();
256-
case atomic::aquire:
257-
return __builtin_amdgcn_fence(atomic::aquire, "agent");
258-
case atomic::release:
259-
return __builtin_amdgcn_fence(atomic::release, "agent");
260-
case atomic::acq_rel:
261-
return __builtin_amdgcn_fence(atomic::acq_rel, "agent");
262-
case atomic::seq_cst:
263-
return __builtin_amdgcn_fence(atomic::seq_cst, "agent");
264-
}
240+
return __scoped_atomic_thread_fence(Ordering, atomic::device_);
265241
}
242+
266243
void fenceSystem(atomic::OrderingTy Ordering) {
267-
switch (Ordering) {
268-
default:
269-
__builtin_unreachable();
270-
case atomic::aquire:
271-
return __builtin_amdgcn_fence(atomic::aquire, "");
272-
case atomic::release:
273-
return __builtin_amdgcn_fence(atomic::release, "");
274-
case atomic::acq_rel:
275-
return __builtin_amdgcn_fence(atomic::acq_rel, "");
276-
case atomic::seq_cst:
277-
return __builtin_amdgcn_fence(atomic::seq_cst, "");
278-
}
244+
return __scoped_atomic_thread_fence(Ordering, atomic::system);
279245
}
280246

281247
void syncWarp(__kmpc_impl_lanemask_t) {

0 commit comments

Comments
 (0)