[OpenMP] Replace AMDGPU fences with generic scoped fences (#119619)

jhuber6 · web-flow · commit f4ee5a673f6e · 2024-12-12T07:54:51.000-06:00
Summary:
This is simpler and more common. I would've replaced the CUDA uses and
made this the same but currently it doesn't codegen these fences fully
and just emits a full system wide barrier as a fallback.
diff --git a/offload/DeviceRTL/include/Synchronization.h b/offload/DeviceRTL/include/Synchronization.h
@@ -26,6 +26,14 @@ enum OrderingTy {
   seq_cst = __ATOMIC_SEQ_CST,
 };
 
+enum ScopeTy {
+  system = __MEMORY_SCOPE_SYSTEM,
+  device_ = __MEMORY_SCOPE_DEVICE,
+  workgroup = __MEMORY_SCOPE_WRKGRP,
+  wavefront = __MEMORY_SCOPE_WVFRNT,
+  single = __MEMORY_SCOPE_SINGLE,
+};
+
 enum MemScopeTy {
   all,    // All threads on all devices
   device, // All threads on the device
diff --git a/offload/DeviceRTL/src/Synchronization.cpp b/offload/DeviceRTL/src/Synchronization.cpp
@@ -232,50 +232,16 @@ void namedBarrier() {
   fence::team(atomic::release);
 }
 
-// sema checking of amdgcn_fence is aggressive. Intention is to patch clang
-// so that it is usable within a template environment and so that a runtime
-// value of the memory order is expanded to this switch within clang/llvm.
 void fenceTeam(atomic::OrderingTy Ordering) {
-  switch (Ordering) {
-  default:
-    __builtin_unreachable();
-  case atomic::aquire:
-    return __builtin_amdgcn_fence(atomic::aquire, "workgroup");
-  case atomic::release:
-    return __builtin_amdgcn_fence(atomic::release, "workgroup");
-  case atomic::acq_rel:
-    return __builtin_amdgcn_fence(atomic::acq_rel, "workgroup");
-  case atomic::seq_cst:
-    return __builtin_amdgcn_fence(atomic::seq_cst, "workgroup");
-  }
+  return __scoped_atomic_thread_fence(Ordering, atomic::workgroup);
 }
+
 void fenceKernel(atomic::OrderingTy Ordering) {
-  switch (Ordering) {
-  default:
-    __builtin_unreachable();
-  case atomic::aquire:
-    return __builtin_amdgcn_fence(atomic::aquire, "agent");
-  case atomic::release:
-    return __builtin_amdgcn_fence(atomic::release, "agent");
-  case atomic::acq_rel:
-    return __builtin_amdgcn_fence(atomic::acq_rel, "agent");
-  case atomic::seq_cst:
-    return __builtin_amdgcn_fence(atomic::seq_cst, "agent");
-  }
+  return __scoped_atomic_thread_fence(Ordering, atomic::device_);
 }
+
 void fenceSystem(atomic::OrderingTy Ordering) {
-  switch (Ordering) {
-  default:
-    __builtin_unreachable();
-  case atomic::aquire:
-    return __builtin_amdgcn_fence(atomic::aquire, "");
-  case atomic::release:
-    return __builtin_amdgcn_fence(atomic::release, "");
-  case atomic::acq_rel:
-    return __builtin_amdgcn_fence(atomic::acq_rel, "");
-  case atomic::seq_cst:
-    return __builtin_amdgcn_fence(atomic::seq_cst, "");
-  }
+  return __scoped_atomic_thread_fence(Ordering, atomic::system);
 }
 
 void syncWarp(__kmpc_impl_lanemask_t) {