Skip to content

Commit d297211

Browse files
jhuber6jhuber-ornl
authored andcommitted
[OpenMP] Add a driver flag to enable the new device runtime library
This patch adds a driver flag `-fopenmp-target-new-runtime` to optionally enable the new device runtime bitcode library. This allows users to enable the new experimental runtime before it becomes the default in the future. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D106793
1 parent cbad576 commit d297211

File tree

6 files changed

+27
-2
lines changed

6 files changed

+27
-2
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ LANGOPT(OpenMPCUDAForceFullRuntime , 1, 0, "Force to use full runtime in all con
240240
LANGOPT(OpenMPCUDANumSMs , 32, 0, "Number of SMs for CUDA devices.")
241241
LANGOPT(OpenMPCUDABlocksPerSM , 32, 0, "Number of blocks per SM for CUDA devices.")
242242
LANGOPT(OpenMPCUDAReductionBufNum , 32, 1024, "Number of the reduction records in the intermediate reduction buffer used for the teams reductions.")
243+
LANGOPT(OpenMPTargetNewRuntime , 1, 0, "Use the new bitcode library for OpenMP offloading")
243244
LANGOPT(OpenMPOptimisticCollapse , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.")
244245
LANGOPT(RenderScript , 1, 0, "RenderScript")
245246

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2376,6 +2376,10 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], "fopenmp-cuda-blocks-per-sm=">
23762376
Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
23772377
def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group<f_Group>,
23782378
Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
2379+
defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime",
2380+
LangOpts<"OpenMPTargetNewRuntime">, DefaultFalse,
2381+
PosFlag<SetTrue, [CC1Option], "Use the new bitcode library for OpenMP offloading">,
2382+
NegFlag<SetFalse>>;
23792383
defm openmp_optimistic_collapse : BoolFOption<"openmp-optimistic-collapse",
23802384
LangOpts<"OpenMPOptimisticCollapse">, DefaultFalse,
23812385
PosFlag<SetTrue, [CC1Option]>, NegFlag<SetFalse>, BothFlags<[NoArgumentUnused, HelpHidden]>>;

clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,14 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions(
222222

223223
if (DriverArgs.hasArg(options::OPT_nogpulib))
224224
return;
225-
std::string BitcodeSuffix = "amdgcn-" + GPUArch;
225+
226+
std::string BitcodeSuffix;
227+
if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
228+
options::OPT_fno_openmp_target_new_runtime, false))
229+
BitcodeSuffix = "new-amdgcn-" + GPUArch;
230+
else
231+
BitcodeSuffix = "amdgcn-" + GPUArch;
232+
226233
addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
227234
getTriple());
228235
}

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -751,7 +751,13 @@ void CudaToolChain::addClangTargetOptions(
751751
return;
752752
}
753753

754-
std::string BitcodeSuffix = "nvptx-" + GpuArch.str();
754+
std::string BitcodeSuffix;
755+
if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
756+
options::OPT_fno_openmp_target_new_runtime, false))
757+
BitcodeSuffix = "new-nvptx-" + GpuArch.str();
758+
else
759+
BitcodeSuffix = "nvptx-" + GpuArch.str();
760+
755761
addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
756762
getTriple());
757763
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

clang/test/Driver/openmp-offload-gpu.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,11 @@
154154
// RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
155155
// RUN: -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
156156
// RUN: | FileCheck -check-prefix=CHK-BCLIB %s
157+
/// Check with the new runtime enabled
158+
// RUN: env LIBRARY_PATH=%S/Inputs/libomptarget %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
159+
// RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
160+
// RUN: -fopenmp-relocatable-target -fopenmp-target-new-runtime -save-temps -no-canonical-prefixes %s 2>&1 \
161+
// RUN: | FileCheck -check-prefix=CHK-BCLIB-NEW %s
157162
/// The user can override default detection using --libomptarget-nvptx-bc-path=.
158163
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
159164
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \
@@ -162,6 +167,7 @@
162167
// RUN: | FileCheck -check-prefix=CHK-BCLIB-USER %s
163168

164169
// CHK-BCLIB: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-sm_35.bc
170+
// CHK-BCLIB-NEW: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-new-nvptx-sm_35.bc
165171
// CHK-BCLIB-USER: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-test.bc
166172
// CHK-BCLIB-NOT: {{error:|warning:}}
167173

0 commit comments

Comments
 (0)