Skip to content

Commit 86bb713

Browse files
committed
[OpenMP][FIX] Enlarge thread state array, improve test and add second
1 parent 4d80eff commit 86bb713

File tree

3 files changed

+52
-8
lines changed

3 files changed

+52
-8
lines changed

openmp/libomptarget/DeviceRTL/src/State.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,8 @@ void state::enterDataEnvironment(IdentTy *Ident) {
262262
memory::allocGlobal(sizeof(ThreadStateTy), "ThreadStates alloc"));
263263
uintptr_t *ThreadStatesBitsPtr = reinterpret_cast<uintptr_t *>(&ThreadStates);
264264
if (!atomic::load(ThreadStatesBitsPtr, atomic::seq_cst)) {
265-
uint32_t Bytes = sizeof(ThreadStates[0]) * mapping::getMaxTeamThreads();
265+
uint32_t Bytes =
266+
sizeof(ThreadStates[0]) * mapping::getNumberOfThreadsInBlock();
266267
void *ThreadStatesPtr =
267268
memory::allocGlobal(Bytes, "Thread state array allocation");
268269
memset(ThreadStatesPtr, 0, Bytes);

openmp/libomptarget/test/offloading/thread_state_1.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
// Still broken "without optimizations"
2-
// XUN: %libomptarget-compile-run-and-check-generic
1+
// RUN: %libomptarget-compile-run-and-check-generic
32
// RUN: %libomptarget-compileopt-run-and-check-generic
43

54
#include <omp.h>
@@ -10,10 +9,10 @@ int main() {
109
int o_lvl = 111, i_lvl = 222, o_tid = 333, i_tid = 333, o_nt = 444,
1110
i_nt = 555;
1211
#pragma omp target teams map(tofrom : o_lvl, i_lvl, o_tid, i_tid, o_nt, i_nt) \
13-
num_teams(2) thread_limit(2)
12+
num_teams(2) thread_limit(64)
1413
{
1514
if (omp_get_team_num() == 0) {
16-
#pragma omp parallel num_threads(128)
15+
#pragma omp parallel num_threads(64)
1716
if (omp_get_thread_num() == omp_get_num_threads() - 1) {
1817
o_lvl = omp_get_level();
1918
o_tid = omp_get_thread_num();
@@ -27,9 +26,13 @@ int main() {
2726
}
2827
}
2928
}
30-
// CHECK: outer: lvl: 1, tid: 1, nt: 2
31-
// CHECK: inner: lvl: 2, tid: 0, nt: 1
29+
if (o_lvl == 1 && o_tid == o_nt - 1 && o_nt > 1 && i_lvl == 2 && i_tid == 0 &&
30+
i_nt == 1) {
31+
// CHECK: Success
32+
printf("Success\n");
33+
return 0;
34+
}
3235
printf("outer: lvl: %i, tid: %i, nt: %i\n", o_lvl, o_tid, o_nt);
3336
printf("inner: lvl: %i, tid: %i, nt: %i\n", i_lvl, i_tid, i_nt);
34-
return 0;
37+
return 1;
3538
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// This fails when optimized for now.
2+
// RUN: %libomptarget-compile-run-and-check-generic
3+
// XUN: %libomptarget-compileopt-run-and-check-generic
4+
5+
#include <omp.h>
6+
#include <stdio.h>
7+
8+
int main() {
9+
// TODO: Test all ICVs on all levels
10+
int o_lvl = 111, i_lvl = 222, o_tid = 333, i_tid = 333, o_nt = 444,
11+
i_nt = 555;
12+
#pragma omp target teams map(tofrom : o_lvl, i_lvl, o_tid, i_tid, o_nt, i_nt) \
13+
num_teams(2) thread_limit(64)
14+
{
15+
omp_set_max_active_levels(1);
16+
if (omp_get_team_num() == 0) {
17+
#pragma omp parallel num_threads(64)
18+
if (omp_get_thread_num() == omp_get_num_threads() - 1) {
19+
o_lvl = omp_get_level();
20+
o_tid = omp_get_thread_num();
21+
o_nt = omp_get_num_threads();
22+
#pragma omp parallel num_threads(64)
23+
if (omp_get_thread_num() == omp_get_num_threads() - 1) {
24+
i_lvl = omp_get_level();
25+
i_tid = omp_get_thread_num();
26+
i_nt = omp_get_num_threads();
27+
}
28+
}
29+
}
30+
}
31+
if (o_lvl == 1 && o_tid == o_nt - 1 && o_nt > 1 && i_lvl == 2 && i_tid == 0 &&
32+
i_nt == 1) {
33+
// CHECK: Success
34+
printf("Success\n");
35+
return 0;
36+
}
37+
printf("outer: lvl: %i, tid: %i, nt: %i\n", o_lvl, o_tid, o_nt);
38+
printf("inner: lvl: %i, tid: %i, nt: %i\n", i_lvl, i_tid, i_nt);
39+
return 1;
40+
}

0 commit comments

Comments
 (0)