Skip to content

Commit 29b44ca

Browse files
committed
[OpenMP] Add flag for setting debug in the offloading device
This patch introduces the flags `-fopenmp-target-debug` and `-fopenmp-target-debug=` to set the value of a global in the device. This will be used to enable or disable debugging features statically in the device runtime library. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D109544
1 parent 7eb899c commit 29b44ca

File tree

9 files changed

+92
-0
lines changed

9 files changed

+92
-0
lines changed

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ def err_drv_optimization_remark_format : Error<
278278
"unknown remark serializer format: '%0'">;
279279
def err_drv_no_neon_modifier : Error<"[no]neon is not accepted as modifier, please use [no]simd instead">;
280280
def err_drv_invalid_omp_target : Error<"OpenMP target is invalid: '%0'">;
281+
def err_drv_debug_no_new_runtime : Error<"OpenMP target device debugging enabled with incompatible runtime">;
281282
def err_drv_incompatible_omp_arch : Error<"OpenMP target architecture '%0' pointer size is incompatible with host '%1'">;
282283
def err_drv_omp_host_ir_file_not_found : Error<
283284
"provided host compiler IR file '%0' is required to generate code for OpenMP "

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ LANGOPT(OpenMPCUDANumSMs , 32, 0, "Number of SMs for CUDA devices.")
242242
LANGOPT(OpenMPCUDABlocksPerSM , 32, 0, "Number of blocks per SM for CUDA devices.")
243243
LANGOPT(OpenMPCUDAReductionBufNum , 32, 1024, "Number of the reduction records in the intermediate reduction buffer used for the teams reductions.")
244244
LANGOPT(OpenMPTargetNewRuntime , 1, 0, "Use the new bitcode library for OpenMP offloading")
245+
LANGOPT(OpenMPTargetDebug , 32, 0, "Enable debugging in the OpenMP offloading device RTL")
245246
LANGOPT(OpenMPOptimisticCollapse , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.")
246247
LANGOPT(RenderScript , 1, 0, "RenderScript")
247248

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2411,6 +2411,10 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], "fopenmp-cuda-blocks-per-sm=">
24112411
Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
24122412
def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group<f_Group>,
24132413
Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
2414+
def fopenmp_target_debug : Flag<["-"], "fopenmp-target-debug">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>,
2415+
HelpText<"Enable debugging in the OpenMP offloading device RTL">;
2416+
def fno_openmp_target_debug : Flag<["-"], "fno-openmp-target-debug">, Group<f_Group>, Flags<[NoArgumentUnused]>;
2417+
def fopenmp_target_debug_EQ : Joined<["-"], "fopenmp-target-debug=">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
24142418
defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime",
24152419
LangOpts<"OpenMPTargetNewRuntime">, DefaultFalse,
24162420
PosFlag<SetTrue, [CC1Option], "Use the new bitcode library for OpenMP offloading">,

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,6 +1197,10 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
11971197
: CGOpenMPRuntime(CGM, "_", "$") {
11981198
if (!CGM.getLangOpts().OpenMPIsDevice)
11991199
llvm_unreachable("OpenMP NVPTX can only handle device code.");
1200+
1201+
llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder();
1202+
if (CGM.getLangOpts().OpenMPTargetNewRuntime)
1203+
OMPBuilder.createDebugKind(CGM.getLangOpts().OpenMPTargetDebug);
12001204
}
12011205

12021206
void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF,

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5756,6 +5756,19 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
57565756
options::OPT_fno_openmp_cuda_mode, /*Default=*/false))
57575757
CmdArgs.push_back("-fopenmp-cuda-mode");
57585758

5759+
// When in OpenMP offloading mode, enable or disable the new device
5760+
// runtime.
5761+
if (Args.hasFlag(options::OPT_fopenmp_target_new_runtime,
5762+
options::OPT_fno_openmp_target_new_runtime,
5763+
/*Default=*/false))
5764+
CmdArgs.push_back("-fopenmp-target-new-runtime");
5765+
5766+
// When in OpenMP offloading mode, enable debugging on the device.
5767+
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_target_debug_EQ);
5768+
if (Args.hasFlag(options::OPT_fopenmp_target_debug,
5769+
options::OPT_fno_openmp_target_debug, /*Default=*/false))
5770+
CmdArgs.push_back("-fopenmp-target-debug");
5771+
57595772
// When in OpenMP offloading mode with NVPTX target, check if full runtime
57605773
// is required.
57615774
if (Args.hasFlag(options::OPT_fopenmp_cuda_force_full_runtime,

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3461,6 +3461,13 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts,
34613461
GenerateArg(Args, OPT_fopenmp_version_EQ, Twine(Opts.OpenMP), SA);
34623462
}
34633463

3464+
if (Opts.OpenMPTargetNewRuntime)
3465+
GenerateArg(Args, OPT_fopenmp_target_new_runtime, SA);
3466+
3467+
if (Opts.OpenMPTargetDebug != 0)
3468+
GenerateArg(Args, OPT_fopenmp_target_debug_EQ,
3469+
Twine(Opts.OpenMPTargetDebug), SA);
3470+
34643471
if (Opts.OpenMPCUDANumSMs != 0)
34653472
GenerateArg(Args, OPT_fopenmp_cuda_number_of_sm_EQ,
34663473
Twine(Opts.OpenMPCUDANumSMs), SA);
@@ -3839,6 +3846,9 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
38393846
Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_enable_irbuilder);
38403847
bool IsTargetSpecified =
38413848
Opts.OpenMPIsDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ);
3849+
Opts.OpenMPTargetNewRuntime =
3850+
Opts.OpenMPIsDevice &&
3851+
Args.hasArg(options::OPT_fopenmp_target_new_runtime);
38423852

38433853
Opts.ConvergentFunctions = Opts.ConvergentFunctions || Opts.OpenMPIsDevice;
38443854

@@ -3866,6 +3876,7 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
38663876
// handling code for those requiring so.
38673877
if ((Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN())) ||
38683878
Opts.OpenCLCPlusPlus) {
3879+
38693880
Opts.Exceptions = 0;
38703881
Opts.CXXExceptions = 0;
38713882
}
@@ -3881,6 +3892,20 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
38813892
Opts.OpenMPCUDAReductionBufNum, Diags);
38823893
}
38833894

3895+
// Set the value of the debugging flag used in the new offloading device RTL.
3896+
// Set either by a specific value or to a default if not specified.
3897+
if (Opts.OpenMPIsDevice && (Args.hasArg(OPT_fopenmp_target_debug) ||
3898+
Args.hasArg(OPT_fopenmp_target_debug_EQ))) {
3899+
if (Opts.OpenMPTargetNewRuntime) {
3900+
Opts.OpenMPTargetDebug = getLastArgIntValue(
3901+
Args, OPT_fopenmp_target_debug_EQ, Opts.OpenMPTargetDebug, Diags);
3902+
if (!Opts.OpenMPTargetDebug && Args.hasArg(OPT_fopenmp_target_debug))
3903+
Opts.OpenMPTargetDebug = 1;
3904+
} else {
3905+
Diags.Report(diag::err_drv_debug_no_new_runtime);
3906+
}
3907+
}
3908+
38843909
// Get the OpenMP target triples if any.
38853910
if (Arg *A = Args.getLastArg(options::OPT_fopenmp_targets_EQ)) {
38863911
enum ArchPtrSize { Arch16Bit, Arch32Bit, Arch64Bit };
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex "(__omp_rtl_debug_kind|llvm\.used)"
2+
// Test target codegen - host bc file has to be created first.
3+
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
4+
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK
5+
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug=111 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-EQ
6+
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-DEFAULT
7+
// expected-no-diagnostics
8+
9+
#ifndef HEADER
10+
#define HEADER
11+
12+
//.
13+
// CHECK: @__omp_rtl_debug_kind = private constant i32 1
14+
// CHECK: @llvm.used = appending global [1 x i8*] [i8* bitcast (i32* @__omp_rtl_debug_kind to i8*)], section "llvm.metadata"
15+
//.
16+
// CHECK-EQ: @__omp_rtl_debug_kind = private constant i32 111
17+
// CHECK-EQ: @llvm.used = appending global [1 x i8*] [i8* bitcast (i32* @__omp_rtl_debug_kind to i8*)], section "llvm.metadata"
18+
//.
19+
// CHECK-DEFAULT: @__omp_rtl_debug_kind = private constant i32 0
20+
// CHECK-DEFAULT: @llvm.used = appending global [1 x i8*] [i8* bitcast (i32* @__omp_rtl_debug_kind to i8*)], section "llvm.metadata"
21+
//.
22+
void foo() {
23+
#pragma omp target
24+
{ }
25+
}
26+
27+
#endif

llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,10 @@ class OpenMPIRBuilder {
683683
omp::IdentFlag Flags = omp::IdentFlag(0),
684684
unsigned Reserve2Flags = 0);
685685

686+
/// Create a global value containing the \p DebugLevel to control debuggin in
687+
/// the module.
688+
GlobalValue *createDebugKind(unsigned DebugLevel);
689+
686690
/// Generate control flow and cleanup for cancellation.
687691
///
688692
/// \param CancelFlag Flag indicating if the cancellation is performed.

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
3535
#include "llvm/Transforms/Utils/CodeExtractor.h"
3636
#include "llvm/Transforms/Utils/LoopPeel.h"
37+
#include "llvm/Transforms/Utils/ModuleUtils.h"
3738
#include "llvm/Transforms/Utils/UnrollLoop.h"
3839

3940
#include <sstream>
@@ -244,6 +245,18 @@ OpenMPIRBuilder::~OpenMPIRBuilder() {
244245
assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
245246
}
246247

248+
GlobalValue *OpenMPIRBuilder::createDebugKind(unsigned DebugKind) {
249+
IntegerType *I32Ty = Type::getInt32Ty(M.getContext());
250+
auto *GV = new GlobalVariable(
251+
M, I32Ty,
252+
/* isConstant = */ true, GlobalValue::PrivateLinkage,
253+
ConstantInt::get(I32Ty, DebugKind), "__omp_rtl_debug_kind");
254+
255+
llvm::appendToUsed(M, {GV});
256+
257+
return GV;
258+
}
259+
247260
Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
248261
IdentFlag LocFlags,
249262
unsigned Reserve2Flags) {

0 commit comments

Comments
 (0)