Skip to content

Commit 8f381c7

Browse files
committed
[OpenMP] Introduce -fopenmp-force-usm flag
The new flag implements logic to include #pragma omp requires unified_shared_memory in every translation unit. This enables a straightforward way to enable USM for an application without the need to modify sources.
1 parent 3c92011 commit 8f381c7

File tree

4 files changed

+25
-0
lines changed

4 files changed

+25
-0
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3381,6 +3381,8 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], "fopenmp-cuda-blocks-per-sm=">
33813381
Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
33823382
def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group<f_Group>,
33833383
Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
3384+
def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>,
3385+
Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
33843386

33853387
//===----------------------------------------------------------------------===//
33863388
// Shared cc1 + fc1 OpenMP Target Options

clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,22 @@ AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const {
129129
void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
130130
const ArgList &DriverArgs, ArgStringList &CC1Args) const {
131131
HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
132+
133+
CC1Args.push_back("-internal-isystem");
134+
SmallString<128> P(HostTC.getDriver().ResourceDir);
135+
llvm::sys::path::append(P, "include/cuda_wrappers");
136+
CC1Args.push_back(DriverArgs.MakeArgString(P));
137+
138+
// Force USM mode will forcefully include #pragma omp requires
139+
// unified_shared_memory via the force_usm header
140+
// XXX This may result in a compilation error if the source
141+
// file already includes that pragma.
142+
if (DriverArgs.hasArg(options::OPT_fopenmp_force_usm)) {
143+
CC1Args.push_back("-include");
144+
CC1Args.push_back(
145+
DriverArgs.MakeArgString(HostTC.getDriver().ResourceDir +
146+
"/include/openmp_wrappers/force_usm.h"));
147+
}
132148
}
133149

134150
void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,

clang/lib/Headers/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,7 @@ set(openmp_wrapper_files
320320
openmp_wrappers/__clang_openmp_device_functions.h
321321
openmp_wrappers/complex_cmath.h
322322
openmp_wrappers/new
323+
openmp_wrappers/usm/force_usm.h
323324
)
324325

325326
set(llvm_libc_wrapper_files
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#ifndef __CLANG_FORCE_OPENMP_USM
2+
#define __CLANG_FORCE_OPENMP_USM
3+
4+
#pragma omp requires unified_shared_memory
5+
6+
#endif

0 commit comments

Comments
 (0)