Skip to content

Commit 0870a4f

Browse files
committed
[OpenMP] Add flag for disabling thread state in runtime
The runtime uses thread state values to indicate when we use an ICV or are in nested parallelism. This is done for OpenMP correctness, but it not needed in the majority of cases. The new flag added is `-fopenmp-assume-no-thread-state`. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D120106
1 parent 03ec026 commit 0870a4f

File tree

8 files changed

+41
-4
lines changed

8 files changed

+41
-4
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ LANGOPT(OpenMPTargetDebug , 32, 0, "Enable debugging in the OpenMP offloading de
246246
LANGOPT(OpenMPOptimisticCollapse , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.")
247247
LANGOPT(OpenMPThreadSubscription , 1, 0, "Assume work-shared loops do not have more iterations than participating threads.")
248248
LANGOPT(OpenMPTeamSubscription , 1, 0, "Assume distributed loops do not have more iterations than participating teams.")
249+
LANGOPT(OpenMPNoThreadState , 1, 0, "Assume that no thread in a parallel region will modify an ICV.")
249250
LANGOPT(RenderScript , 1, 0, "RenderScript")
250251

251252
LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device")

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2473,6 +2473,10 @@ def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-te
24732473
Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
24742474
def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">,
24752475
Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
2476+
def fopenmp_assume_no_thread_state : Flag<["-"], "fopenmp-assume-no-thread-state">, Group<f_Group>,
2477+
Flags<[CC1Option, NoArgumentUnused, HelpHidden]>,
2478+
HelpText<"Assert no thread in a parallel region modifies an ICV">,
2479+
MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>;
24762480
defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime",
24772481
LangOpts<"OpenMPTargetNewRuntime">, DefaultTrue,
24782482
PosFlag<SetTrue, [CC1Option], "Use the new bitcode library for OpenMP offloading">,

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,6 +1210,8 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
12101210
"__omp_rtl_assume_teams_oversubscription");
12111211
OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPThreadSubscription,
12121212
"__omp_rtl_assume_threads_oversubscription");
1213+
OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPNoThreadState,
1214+
"__omp_rtl_assume_no_thread_state");
12131215
}
12141216
}
12151217

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5995,6 +5995,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
59955995
options::OPT_fno_openmp_assume_threads_oversubscription,
59965996
/*Default=*/false))
59975997
CmdArgs.push_back("-fopenmp-assume-threads-oversubscription");
5998+
if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state))
5999+
CmdArgs.push_back("-fopenmp-assume-no-thread-state");
59986000
break;
59996001
default:
60006002
// By default, if Clang doesn't know how to generate useful OpenMP code

clang/test/OpenMP/target_globals_codegen.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-DEFAULT
77
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-assume-threads-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-THREADS
88
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-assume-teams-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-TEAMS
9+
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-assume-no-thread-state -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-STATE
910
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-assume-teams-oversubscription -fopenmp-is-device -o - | FileCheck %s --check-prefix=CHECK-RUNTIME
1011
// expected-no-diagnostics
1112

@@ -16,26 +17,37 @@
1617
// CHECK: @__omp_rtl_debug_kind = weak_odr hidden constant i32 1
1718
// CHECK: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
1819
// CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
20+
// CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
1921
//.
2022
// CHECK-EQ: @__omp_rtl_debug_kind = weak_odr hidden constant i32 111
2123
// CHECK-EQ: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
2224
// CHECK-EQ: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
25+
// CHECK-EQ: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
2326
//.
2427
// CHECK-DEFAULT: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
2528
// CHECK-DEFAULT: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
2629
// CHECK-DEFAULT: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
30+
// CHECK-DEFAULT: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
2731
//.
2832
// CHECK-THREADS: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
2933
// CHECK-THREADS: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
3034
// CHECK-THREADS: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 1
35+
// CHECK-THREADS: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
3136
//.
3237
// CHECK-TEAMS: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
3338
// CHECK-TEAMS: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1
3439
// CHECK-TEAMS: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
40+
// CHECK-TEAMS: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
41+
//.
42+
// CHECK-STATE: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
43+
// CHECK-STATE: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
44+
// CHECK-STATE: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
45+
// CHECK-STATE: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1
3546
//.
3647
// CHECK-RUNTIME-NOT: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
3748
// CHECK-RUNTIME-NOT: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1
3849
// CHECK-RUNTIME-NOT: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
50+
// CHECK-RUNTIME-NOT: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
3951
//.
4052
void foo() {
4153
#pragma omp target

openmp/libomptarget/DeviceRTL/include/Configuration.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,13 @@ uint32_t getDebugKind();
3838
/// Return the amount of dynamic shared memory that was allocated at launch.
3939
uint64_t getDynamicMemorySize();
4040

41+
/// Return if debugging is enabled for the given debug kind.
4142
bool isDebugMode(DebugKind Level);
4243

44+
/// Indicates if this kernel may require thread-specific states, or if it was
45+
/// explicitly disabled by the user.
46+
bool mayUseThreadStates();
47+
4348
} // namespace config
4449
} // namespace _OMP
4550

openmp/libomptarget/DeviceRTL/src/Configuration.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ using namespace _OMP;
2020

2121
#pragma omp declare target
2222

23-
extern uint32_t __omp_rtl_debug_kind; // defined by CGOpenMPRuntimeGPU
23+
// defined by CGOpenMPRuntimeGPU
24+
extern uint32_t __omp_rtl_debug_kind;
25+
extern uint32_t __omp_rtl_assume_no_thread_state;
2426

2527
// TODO: We want to change the name as soon as the old runtime is gone.
2628
// This variable should be visibile to the plugin so we override the default
@@ -48,4 +50,6 @@ bool config::isDebugMode(config::DebugKind Kind) {
4850
return config::getDebugKind() & Kind;
4951
}
5052

53+
bool config::mayUseThreadStates() { return !__omp_rtl_assume_no_thread_state; }
54+
5155
#pragma omp end declare target

openmp/libomptarget/DeviceRTL/src/State.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,8 @@ ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam];
285285
#pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
286286

287287
uint32_t &lookupForModify32Impl(uint32_t ICVStateTy::*Var, IdentTy *Ident) {
288-
if (OMP_LIKELY(TeamState.ICVState.LevelVar == 0))
288+
if (OMP_LIKELY(!config::mayUseThreadStates() ||
289+
TeamState.ICVState.LevelVar == 0))
289290
return TeamState.ICVState.*Var;
290291
uint32_t TId = mapping::getThreadIdInBlock();
291292
if (!ThreadStates[TId]) {
@@ -299,13 +300,13 @@ uint32_t &lookupForModify32Impl(uint32_t ICVStateTy::*Var, IdentTy *Ident) {
299300

300301
uint32_t &lookup32Impl(uint32_t ICVStateTy::*Var) {
301302
uint32_t TId = mapping::getThreadIdInBlock();
302-
if (OMP_UNLIKELY(ThreadStates[TId]))
303+
if (OMP_UNLIKELY(config::mayUseThreadStates() && ThreadStates[TId]))
303304
return ThreadStates[TId]->ICVState.*Var;
304305
return TeamState.ICVState.*Var;
305306
}
306307
uint64_t &lookup64Impl(uint64_t ICVStateTy::*Var) {
307308
uint64_t TId = mapping::getThreadIdInBlock();
308-
if (OMP_UNLIKELY(ThreadStates[TId]))
309+
if (OMP_UNLIKELY(config::mayUseThreadStates() && ThreadStates[TId]))
309310
return ThreadStates[TId]->ICVState.*Var;
310311
return TeamState.ICVState.*Var;
311312
}
@@ -380,6 +381,9 @@ void state::init(bool IsSPMD) {
380381
}
381382

382383
void state::enterDataEnvironment(IdentTy *Ident) {
384+
ASSERT(config::mayUseThreadStates() &&
385+
"Thread state modified while explicitly disabled!");
386+
383387
unsigned TId = mapping::getThreadIdInBlock();
384388
ThreadStateTy *NewThreadState =
385389
static_cast<ThreadStateTy *>(__kmpc_alloc_shared(sizeof(ThreadStateTy)));
@@ -388,6 +392,9 @@ void state::enterDataEnvironment(IdentTy *Ident) {
388392
}
389393

390394
void state::exitDataEnvironment() {
395+
ASSERT(config::mayUseThreadStates() &&
396+
"Thread state modified while explicitly disabled!");
397+
391398
unsigned TId = mapping::getThreadIdInBlock();
392399
resetStateForThread(TId);
393400
}

0 commit comments

Comments
 (0)