[openmp][nfc] Use clang gpuintrin for some dispatch to target intrinsics #131907

JonChesterfield · 2025-03-18T20:32:52Z

Picked a few straightforward ones to get the ball moving, left the UNREACHABLE path unchanged.

llvmbot · 2025-03-18T20:33:26Z

@llvm/pr-subscribers-offload

Author: Jon Chesterfield (JonChesterfield)

Changes

Picked a few straightforward ones to get the ball moving, left the UNREACHABLE path unchanged.

Full diff: https://github.com/llvm/llvm-project/pull/131907.diff

1 Files Affected:

(modified) offload/DeviceRTL/src/Mapping.cpp (+18-68)

diff --git a/offload/DeviceRTL/src/Mapping.cpp b/offload/DeviceRTL/src/Mapping.cpp
index 641be81cca3ed..53031cbeaa696 100644
--- a/offload/DeviceRTL/src/Mapping.cpp
+++ b/offload/DeviceRTL/src/Mapping.cpp
@@ -16,6 +16,7 @@
 #include "State.h"
 
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
+#include "clang/lib/Headers/gpuintrin.h"
 
 using namespace ompx;
 
@@ -27,22 +28,6 @@ namespace impl {
 ///{
 #ifdef __AMDGPU__
 
-uint32_t getWarpSize() { return __builtin_amdgcn_wavefrontsize(); }
-
-uint32_t getNumberOfThreadsInBlock(int32_t Dim) {
-  switch (Dim) {
-  case 0:
-    return __builtin_amdgcn_workgroup_size_x();
-  case 1:
-    return __builtin_amdgcn_workgroup_size_y();
-  case 2:
-    return __builtin_amdgcn_workgroup_size_z();
-  };
-  UNREACHABLE("Dim outside range!");
-}
-
-LaneMaskTy activemask() { return __builtin_amdgcn_read_exec(); }
-
 LaneMaskTy lanemaskLT() {
   uint32_t Lane = mapping::getThreadIdInWarp();
   int64_t Ballot = mapping::activemask();
@@ -59,22 +44,6 @@ LaneMaskTy lanemaskGT() {
   return Mask & Ballot;
 }
 
-uint32_t getThreadIdInWarp() {
-  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
-}
-
-uint32_t getThreadIdInBlock(int32_t Dim) {
-  switch (Dim) {
-  case 0:
-    return __builtin_amdgcn_workitem_id_x();
-  case 1:
-    return __builtin_amdgcn_workitem_id_y();
-  case 2:
-    return __builtin_amdgcn_workitem_id_z();
-  };
-  UNREACHABLE("Dim outside range!");
-}
-
 uint32_t getNumberOfThreadsInKernel() {
   return __builtin_amdgcn_grid_size_x() * __builtin_amdgcn_grid_size_y() *
          __builtin_amdgcn_grid_size_z();
@@ -120,40 +89,10 @@ uint32_t getNumberOfWarpsInBlock() {
 ///{
 #ifdef __NVPTX__
 
-uint32_t getNumberOfThreadsInBlock(int32_t Dim) {
-  switch (Dim) {
-  case 0:
-    return __nvvm_read_ptx_sreg_ntid_x();
-  case 1:
-    return __nvvm_read_ptx_sreg_ntid_y();
-  case 2:
-    return __nvvm_read_ptx_sreg_ntid_z();
-  };
-  UNREACHABLE("Dim outside range!");
-}
-
-uint32_t getWarpSize() { return __nvvm_read_ptx_sreg_warpsize(); }
-
-LaneMaskTy activemask() { return __nvvm_activemask(); }
-
 LaneMaskTy lanemaskLT() { return __nvvm_read_ptx_sreg_lanemask_lt(); }
 
 LaneMaskTy lanemaskGT() { return __nvvm_read_ptx_sreg_lanemask_gt(); }
 
-uint32_t getThreadIdInBlock(int32_t Dim) {
-  switch (Dim) {
-  case 0:
-    return __nvvm_read_ptx_sreg_tid_x();
-  case 1:
-    return __nvvm_read_ptx_sreg_tid_y();
-  case 2:
-    return __nvvm_read_ptx_sreg_tid_z();
-  };
-  UNREACHABLE("Dim outside range!");
-}
-
-uint32_t getThreadIdInWarp() { return __nvvm_read_ptx_sreg_laneid(); }
-
 uint32_t getBlockIdInKernel(int32_t Dim) {
   switch (Dim) {
   case 0:
@@ -236,24 +175,29 @@ bool mapping::isLeaderInWarp() {
   return utils::popc(Active & LaneMaskLT) == 0;
 }
 
-LaneMaskTy mapping::activemask() { return impl::activemask(); }
+LaneMaskTy mapping::activemask() { return __gpu_lane_mask(); }
 
 LaneMaskTy mapping::lanemaskLT() { return impl::lanemaskLT(); }
 
 LaneMaskTy mapping::lanemaskGT() { return impl::lanemaskGT(); }
 
 uint32_t mapping::getThreadIdInWarp() {
-  uint32_t ThreadIdInWarp = impl::getThreadIdInWarp();
+  uint32_t ThreadIdInWarp = __gpu_lane_id();
   ASSERT(ThreadIdInWarp < impl::getWarpSize(), nullptr);
   return ThreadIdInWarp;
 }
 
 uint32_t mapping::getThreadIdInBlock(int32_t Dim) {
-  uint32_t ThreadIdInBlock = impl::getThreadIdInBlock(Dim);
-  return ThreadIdInBlock;
+  switch (Dim) {
+  case 0:
+  case 1:
+  case 2:
+    return __gpu_thread_id(Dim);
+  };
+  UNREACHABLE("Dim outside range!");
 }
 
-uint32_t mapping::getWarpSize() { return impl::getWarpSize(); }
+uint32_t mapping::getWarpSize() { return __gpu_num_lanes(); }
 
 uint32_t mapping::getMaxTeamThreads(bool IsSPMD) {
   uint32_t BlockSize = mapping::getNumberOfThreadsInBlock();
@@ -265,7 +209,13 @@ uint32_t mapping::getMaxTeamThreads() {
 }
 
 uint32_t mapping::getNumberOfThreadsInBlock(int32_t Dim) {
-  return impl::getNumberOfThreadsInBlock(Dim);
+  switch (Dim) {
+  case 0:
+  case 1:
+  case 2:
+    return __gpu_num_threads(Dim);
+  };
+  UNREACHABLE("Dim outside range!");
 }
 
 uint32_t mapping::getNumberOfThreadsInKernel() {

jhuber6 · 2025-03-18T20:34:00Z

I already made a PR for this #131631 and #131644.

github-actions · 2025-03-18T20:36:20Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff 7d7b58bc5d2bacc3d76463d2ee06a13d2a08b084 4d29d2e8d6f264adc82779071b7d09d552873460 --extensions cpp -- offload/DeviceRTL/src/Mapping.cpp

View the diff from clang-format here.

diff --git a/offload/DeviceRTL/src/Mapping.cpp b/offload/DeviceRTL/src/Mapping.cpp
index 53031cbeaa..af94d96251 100644
--- a/offload/DeviceRTL/src/Mapping.cpp
+++ b/offload/DeviceRTL/src/Mapping.cpp
@@ -15,8 +15,8 @@
 #include "Interface.h"
 #include "State.h"
 
-#include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "clang/lib/Headers/gpuintrin.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
 
 using namespace ompx;

JonChesterfield · 2025-03-18T20:38:01Z

Good stuff. It's a superset of this one, let's ship the bigger one then :)

[openmp][nfc] Use clang gpuintrin for some dispatch to target intrinsics

4d29d2e

JonChesterfield requested review from jdoerfert, jhuber6 and sarnex March 18, 2025 20:32

llvmbot added the offload label Mar 18, 2025

JonChesterfield mentioned this pull request Mar 18, 2025

[OpenMP] Use 'gpuintrin.h' definitions for simple block identifiers #131631

Merged

JonChesterfield closed this Mar 18, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[openmp][nfc] Use clang gpuintrin for some dispatch to target intrinsics #131907

[openmp][nfc] Use clang gpuintrin for some dispatch to target intrinsics #131907

Uh oh!

JonChesterfield commented Mar 18, 2025

Uh oh!

llvmbot commented Mar 18, 2025

Uh oh!

jhuber6 commented Mar 18, 2025 •

edited

Loading

Uh oh!

github-actions bot commented Mar 18, 2025

Uh oh!

JonChesterfield commented Mar 18, 2025 •

edited

Loading

Uh oh!

Uh oh!

[openmp][nfc] Use clang gpuintrin for some dispatch to target intrinsics #131907

[openmp][nfc] Use clang gpuintrin for some dispatch to target intrinsics #131907

Uh oh!

Conversation

JonChesterfield commented Mar 18, 2025

Uh oh!

llvmbot commented Mar 18, 2025

Uh oh!

jhuber6 commented Mar 18, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Mar 18, 2025

Uh oh!

JonChesterfield commented Mar 18, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

jhuber6 commented Mar 18, 2025 •

edited

Loading

JonChesterfield commented Mar 18, 2025 •

edited

Loading