Skip to content

Commit 3698504

Browse files
committed
[Flang][OpenMP] Add -fopenmp-force-usm option to flang
This patch enables the `-fopenmp-force-usm` option to be passed to the flang driver, which forwards it to the compiler frontend. This flag, when set, results in the introduction of the `unified_shared_memory` bit to the `omp.requires` attribute of the top-level module operation. This is later combined with any other target device-related REQUIRES clauses that may have been explicitly set in the compilation unit.
1 parent e325e2e commit 3698504

File tree

10 files changed

+81
-14
lines changed

10 files changed

+81
-14
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3574,7 +3574,7 @@ def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group<
35743574
HelpText<"Do not create a host fallback if offloading to the device fails.">,
35753575
MarshallingInfoFlag<LangOpts<"OpenMPOffloadMandatory">>;
35763576
def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>,
3577-
Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>,
3577+
Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
35783578
HelpText<"Force behvaior as if the user specified pragma omp requires unified_shared_memory.">,
35793579
MarshallingInfoFlag<LangOpts<"OpenMPForceUSM">>;
35803580
def fopenmp_target_jit : Flag<["-"], "fopenmp-target-jit">, Group<f_Group>,

clang/lib/Driver/ToolChains/Flang.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -766,6 +766,8 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
766766
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
767767

768768
// FIXME: Clang supports a whole bunch more flags here.
769+
if (Args.hasArg(options::OPT_fopenmp_force_usm))
770+
CmdArgs.push_back("-fopenmp-force-usm");
769771
break;
770772
default:
771773
// By default, if Clang doesn't know how to generate useful OpenMP code

flang/include/flang/Frontend/LangOptions.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ LANGOPT(OpenMPVersion, 32, 0)
4242
LANGOPT(OpenMPIsTargetDevice, 1, false)
4343
/// Generate OpenMP target code only for GPUs
4444
LANGOPT(OpenMPIsGPU, 1, false)
45+
/// Generate OpenMP target code only for GPUs
46+
LANGOPT(OpenMPForceUSM, 1, false)
4547
/// Enable debugging in the OpenMP offloading device RTL
4648
LANGOPT(OpenMPTargetDebug, 32, 0)
4749
/// Assume work-shared loops do not have more iterations than participating

flang/include/flang/Tools/CrossToolHelpers.h

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -130,16 +130,16 @@ struct OffloadModuleOpts {
130130
OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription,
131131
bool OpenMPThreadSubscription, bool OpenMPNoThreadState,
132132
bool OpenMPNoNestedParallelism, bool OpenMPIsTargetDevice,
133-
bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {},
134-
bool NoGPULib = false)
133+
bool OpenMPIsGPU, bool OpenMPForceUSM, uint32_t OpenMPVersion,
134+
std::string OMPHostIRFile = {}, bool NoGPULib = false)
135135
: OpenMPTargetDebug(OpenMPTargetDebug),
136136
OpenMPTeamSubscription(OpenMPTeamSubscription),
137137
OpenMPThreadSubscription(OpenMPThreadSubscription),
138138
OpenMPNoThreadState(OpenMPNoThreadState),
139139
OpenMPNoNestedParallelism(OpenMPNoNestedParallelism),
140140
OpenMPIsTargetDevice(OpenMPIsTargetDevice), OpenMPIsGPU(OpenMPIsGPU),
141-
OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile),
142-
NoGPULib(NoGPULib) {}
141+
OpenMPForceUSM(OpenMPForceUSM), OpenMPVersion(OpenMPVersion),
142+
OMPHostIRFile(OMPHostIRFile), NoGPULib(NoGPULib) {}
143143

144144
OffloadModuleOpts(Fortran::frontend::LangOptions &Opts)
145145
: OpenMPTargetDebug(Opts.OpenMPTargetDebug),
@@ -148,8 +148,9 @@ struct OffloadModuleOpts {
148148
OpenMPNoThreadState(Opts.OpenMPNoThreadState),
149149
OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism),
150150
OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice),
151-
OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPVersion(Opts.OpenMPVersion),
152-
OMPHostIRFile(Opts.OMPHostIRFile), NoGPULib(Opts.NoGPULib) {}
151+
OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPForceUSM(Opts.OpenMPForceUSM),
152+
OpenMPVersion(Opts.OpenMPVersion), OMPHostIRFile(Opts.OMPHostIRFile),
153+
NoGPULib(Opts.NoGPULib) {}
153154

154155
uint32_t OpenMPTargetDebug = 0;
155156
bool OpenMPTeamSubscription = false;
@@ -158,6 +159,7 @@ struct OffloadModuleOpts {
158159
bool OpenMPNoNestedParallelism = false;
159160
bool OpenMPIsTargetDevice = false;
160161
bool OpenMPIsGPU = false;
162+
bool OpenMPForceUSM = false;
161163
uint32_t OpenMPVersion = 11;
162164
std::string OMPHostIRFile = {};
163165
bool NoGPULib = false;
@@ -172,13 +174,17 @@ struct OffloadModuleOpts {
172174
module.getOperation())) {
173175
offloadMod.setIsTargetDevice(Opts.OpenMPIsTargetDevice);
174176
offloadMod.setIsGPU(Opts.OpenMPIsGPU);
177+
if (Opts.OpenMPForceUSM) {
178+
offloadMod.setRequires(mlir::omp::ClauseRequires::unified_shared_memory);
179+
}
175180
if (Opts.OpenMPIsTargetDevice) {
176181
offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription,
177182
Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState,
178183
Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion, Opts.NoGPULib);
179184

180-
if (!Opts.OMPHostIRFile.empty())
185+
if (!Opts.OMPHostIRFile.empty()) {
181186
offloadMod.setHostIRFilePath(Opts.OMPHostIRFile);
187+
}
182188
}
183189
}
184190
}

flang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -906,6 +906,9 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
906906
res.getLangOpts().OpenMPVersion, diags)) {
907907
res.getLangOpts().OpenMPVersion = Version;
908908
}
909+
if (args.hasArg(clang::driver::options::OPT_fopenmp_force_usm)) {
910+
res.getLangOpts().OpenMPForceUSM = 1;
911+
}
909912
if (args.hasArg(clang::driver::options::OPT_fopenmp_is_target_device)) {
910913
res.getLangOpts().OpenMPIsTargetDevice = 1;
911914

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2608,7 +2608,9 @@ void Fortran::lower::genOpenMPRequires(mlir::Operation *mod,
26082608
symbol->details());
26092609
}
26102610

2611-
MlirRequires mlirFlags = MlirRequires::none;
2611+
// Use pre-populated omp.requires module attribute if it was set, so that
2612+
// the "-fopenmp-force-usm" compiler option is honored.
2613+
MlirRequires mlirFlags = offloadMod.getRequires();
26122614
if (semaFlags.test(SemaRequires::ReverseOffload))
26132615
mlirFlags = mlirFlags | MlirRequires::reverse_offload;
26142616
if (semaFlags.test(SemaRequires::UnifiedAddress))

flang/test/Driver/omp-driver-offload.f90

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,3 +207,23 @@
207207
! RUN: --rocm-path=%S/Inputs/rocm %s 2>&1 \
208208
! RUN: | FileCheck --check-prefix=ROCM-PATH %s
209209
! ROCM-PATH: Found HIP installation: {{.*Inputs.*rocm}}, version 3.6.20214-a2917cd
210+
211+
! Test -fopenmp-force-usm option without offload
212+
! RUN: %flang -S -### %s -o %t 2>&1 \
213+
! RUN: -fopenmp -fopenmp-force-usm \
214+
! RUN: --target=aarch64-unknown-linux-gnu \
215+
! RUN: | FileCheck %s --check-prefix=FORCE-USM-NO-OFFLOAD
216+
217+
! FORCE-USM-NO-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
218+
! FORCE-USM-NO-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
219+
220+
! Test -fopenmp-force-usm option with offload
221+
! RUN: %flang -S -### %s -o %t 2>&1 \
222+
! RUN: -fopenmp -fopenmp-force-usm --offload-arch=gfx90a \
223+
! RUN: --target=aarch64-unknown-linux-gnu \
224+
! RUN: | FileCheck %s --check-prefix=FORCE-USM-OFFLOAD
225+
226+
! FORCE-USM-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
227+
! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
228+
! FORCE-USM-OFFLOAD-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa"
229+
! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"

flang/test/Lower/OpenMP/force-usm.f90

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s
2+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s
3+
! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
4+
! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
5+
6+
! This test checks the addition of requires unified_shared_memory when
7+
! -fopenmp-force-usm is set
8+
9+
!CHECK: module attributes {
10+
!CHECK-SAME: omp.requires = #omp<clause_requires unified_shared_memory>
11+
program requires
12+
end program requires
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s
2+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s
3+
! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
4+
! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
5+
6+
! This test checks the addition of requires unified_shared_memory when
7+
! -fopenmp-force-usm is set, even when other requires directives are present
8+
9+
!CHECK: module attributes {
10+
!CHECK-SAME: omp.requires = #omp<clause_requires reverse_offload|unified_shared_memory>
11+
program requires
12+
!$omp requires reverse_offload
13+
!$omp target
14+
!$omp end target
15+
end program requires

flang/tools/bbc/bbc.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,11 @@ static llvm::cl::opt<bool>
144144
llvm::cl::desc("enable openmp GPU target codegen"),
145145
llvm::cl::init(false));
146146

147+
static llvm::cl::opt<bool> enableOpenMPForceUSM(
148+
"fopenmp-force-usm",
149+
llvm::cl::desc("force openmp unified shared memory mode"),
150+
llvm::cl::init(false));
151+
147152
// A simplified subset of the OpenMP RTL Flags from Flang, only the primary
148153
// positive options are available, no negative options e.g. fopen_assume* vs
149154
// fno_open_assume*
@@ -374,11 +379,11 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
374379
"-fopenmp-is-target-device is also set";
375380
return mlir::failure();
376381
}
377-
auto offloadModuleOpts =
378-
OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription,
379-
setOpenMPThreadSubscription, setOpenMPNoThreadState,
380-
setOpenMPNoNestedParallelism, enableOpenMPDevice,
381-
enableOpenMPGPU, setOpenMPVersion, "", setNoGPULib);
382+
auto offloadModuleOpts = OffloadModuleOpts(
383+
setOpenMPTargetDebug, setOpenMPTeamSubscription,
384+
setOpenMPThreadSubscription, setOpenMPNoThreadState,
385+
setOpenMPNoNestedParallelism, enableOpenMPDevice, enableOpenMPGPU,
386+
enableOpenMPForceUSM, setOpenMPVersion, "", setNoGPULib);
382387
setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts);
383388
setOpenMPVersionAttribute(mlirModule, setOpenMPVersion);
384389
}

0 commit comments

Comments
 (0)