Skip to content

Commit 3768039

Browse files
authored
[OpenMP] Directly use user's grid and block size in kernel language mode (#70612)
In kernel language mode, use user's grid and blocks size directly. No validity check, which means if user's values are too large, the launch will fail, similar to what CUDA and HIP are doing right now.
1 parent 8e2cc19 commit 3768039

File tree

3 files changed

+49
-0
lines changed

3 files changed

+49
-0
lines changed

openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,9 @@ struct GenericKernelTy {
397397

398398
/// The prototype kernel launch environment.
399399
KernelLaunchEnvironmentTy KernelLaunchEnvironment;
400+
401+
/// If the kernel is a bare kernel.
402+
bool IsBareKernel = false;
400403
};
401404

402405
/// Class representing a map of host pinned allocations. We track these pinned

openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,7 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
436436
Name, ErrStr.data());
437437
assert(KernelEnvironment.Configuration.ReductionDataSize == 0 &&
438438
"Default initialization failed.");
439+
IsBareKernel = true;
439440
}
440441

441442
// Max = Config.Max > 0 ? min(Config.Max, Device.Max) : Device.Max;
@@ -594,6 +595,10 @@ uint32_t GenericKernelTy::getNumThreads(GenericDeviceTy &GenericDevice,
594595
uint32_t ThreadLimitClause[3]) const {
595596
assert(ThreadLimitClause[1] == 0 && ThreadLimitClause[2] == 0 &&
596597
"Multi dimensional launch not supported yet.");
598+
599+
if (IsBareKernel && ThreadLimitClause[0] > 0)
600+
return ThreadLimitClause[0];
601+
597602
if (ThreadLimitClause[0] > 0 && isGenericMode())
598603
ThreadLimitClause[0] += GenericDevice.getWarpSize();
599604

@@ -610,6 +615,9 @@ uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
610615
assert(NumTeamsClause[1] == 0 && NumTeamsClause[2] == 0 &&
611616
"Multi dimensional launch not supported yet.");
612617

618+
if (IsBareKernel && NumTeamsClause[0] > 0)
619+
return NumTeamsClause[0];
620+
613621
if (NumTeamsClause[0] > 0) {
614622
// TODO: We need to honor any value and consequently allow more than the
615623
// block limit. For this we might need to start multiple kernels or let the
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// RUN: %libomptarget-compile-generic
2+
// RUN: env LIBOMPTARGET_INFO=63 %libomptarget-run-generic 2>&1 | %fcheck-generic
3+
//
4+
// UNSUPPORTED: x86_64-pc-linux-gnu
5+
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
6+
// UNSUPPORTED: aarch64-unknown-linux-gnu
7+
// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
8+
9+
#include <assert.h>
10+
#include <ompx.h>
11+
#include <stdio.h>
12+
#include <stdlib.h>
13+
14+
int main(int argc, char *argv[]) {
15+
const int num_blocks = 64;
16+
const int block_size = 64;
17+
const int N = num_blocks * block_size;
18+
int *data = (int *)malloc(N * sizeof(int));
19+
20+
// CHECK: "PluginInterface" device 0 info: Launching kernel __omp_offloading_{{.*}} with 64 blocks and 64 threads in SPMD mode
21+
22+
#pragma omp target teams ompx_bare num_teams(num_blocks) thread_limit(block_size) map(from: data[0:N])
23+
{
24+
int bid = ompx_block_id_x();
25+
int bdim = ompx_block_dim_x();
26+
int tid = ompx_thread_id_x();
27+
int idx = bid * bdim + tid;
28+
data[idx] = idx;
29+
}
30+
31+
for (int i = 0; i < N; ++i)
32+
assert(data[i] == i);
33+
34+
// CHECK: PASS
35+
printf("PASS\n");
36+
37+
return 0;
38+
}

0 commit comments

Comments
 (0)