Skip to content

Commit b954926

Browse files
authored
[Flang][OpenMP] Add -fopenmp-force-usm option to flang (llvm#94359)
This patch enables the `-fopenmp-force-usm` option to be passed to the flang driver, which forwards it to the compiler frontend. This flag, when set, results in the introduction of the `unified_shared_memory` bit to the `omp.requires` attribute of the top-level module operation. This is later combined with any other target device-related REQUIRES clauses that may have been explicitly set in the compilation unit.
1 parent 79e09b1 commit b954926

File tree

10 files changed

+80
-13
lines changed

10 files changed

+80
-13
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3592,7 +3592,7 @@ def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group<
35923592
HelpText<"Do not create a host fallback if offloading to the device fails.">,
35933593
MarshallingInfoFlag<LangOpts<"OpenMPOffloadMandatory">>;
35943594
def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>,
3595-
Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>,
3595+
Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
35963596
HelpText<"Force behvaior as if the user specified pragma omp requires unified_shared_memory.">,
35973597
MarshallingInfoFlag<LangOpts<"OpenMPForceUSM">>;
35983598
def fopenmp_target_jit : Flag<["-"], "fopenmp-target-jit">, Group<f_Group>,

clang/lib/Driver/ToolChains/Flang.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -765,6 +765,9 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
765765
CmdArgs.push_back("-fopenmp");
766766
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
767767

768+
if (Args.hasArg(options::OPT_fopenmp_force_usm))
769+
CmdArgs.push_back("-fopenmp-force-usm");
770+
768771
// FIXME: Clang supports a whole bunch more flags here.
769772
break;
770773
default:

flang/include/flang/Frontend/LangOptions.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ LANGOPT(OpenMPVersion, 32, 0)
4242
LANGOPT(OpenMPIsTargetDevice, 1, false)
4343
/// Generate OpenMP target code only for GPUs
4444
LANGOPT(OpenMPIsGPU, 1, false)
45+
/// Generate OpenMP target code only for GPUs
46+
LANGOPT(OpenMPForceUSM, 1, false)
4547
/// Enable debugging in the OpenMP offloading device RTL
4648
LANGOPT(OpenMPTargetDebug, 32, 0)
4749
/// Assume work-shared loops do not have more iterations than participating

flang/include/flang/Tools/CrossToolHelpers.h

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -130,16 +130,16 @@ struct OffloadModuleOpts {
130130
OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription,
131131
bool OpenMPThreadSubscription, bool OpenMPNoThreadState,
132132
bool OpenMPNoNestedParallelism, bool OpenMPIsTargetDevice,
133-
bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {},
134-
bool NoGPULib = false)
133+
bool OpenMPIsGPU, bool OpenMPForceUSM, uint32_t OpenMPVersion,
134+
std::string OMPHostIRFile = {}, bool NoGPULib = false)
135135
: OpenMPTargetDebug(OpenMPTargetDebug),
136136
OpenMPTeamSubscription(OpenMPTeamSubscription),
137137
OpenMPThreadSubscription(OpenMPThreadSubscription),
138138
OpenMPNoThreadState(OpenMPNoThreadState),
139139
OpenMPNoNestedParallelism(OpenMPNoNestedParallelism),
140140
OpenMPIsTargetDevice(OpenMPIsTargetDevice), OpenMPIsGPU(OpenMPIsGPU),
141-
OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile),
142-
NoGPULib(NoGPULib) {}
141+
OpenMPForceUSM(OpenMPForceUSM), OpenMPVersion(OpenMPVersion),
142+
OMPHostIRFile(OMPHostIRFile), NoGPULib(NoGPULib) {}
143143

144144
OffloadModuleOpts(Fortran::frontend::LangOptions &Opts)
145145
: OpenMPTargetDebug(Opts.OpenMPTargetDebug),
@@ -148,8 +148,9 @@ struct OffloadModuleOpts {
148148
OpenMPNoThreadState(Opts.OpenMPNoThreadState),
149149
OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism),
150150
OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice),
151-
OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPVersion(Opts.OpenMPVersion),
152-
OMPHostIRFile(Opts.OMPHostIRFile), NoGPULib(Opts.NoGPULib) {}
151+
OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPForceUSM(Opts.OpenMPForceUSM),
152+
OpenMPVersion(Opts.OpenMPVersion), OMPHostIRFile(Opts.OMPHostIRFile),
153+
NoGPULib(Opts.NoGPULib) {}
153154

154155
uint32_t OpenMPTargetDebug = 0;
155156
bool OpenMPTeamSubscription = false;
@@ -158,6 +159,7 @@ struct OffloadModuleOpts {
158159
bool OpenMPNoNestedParallelism = false;
159160
bool OpenMPIsTargetDevice = false;
160161
bool OpenMPIsGPU = false;
162+
bool OpenMPForceUSM = false;
161163
uint32_t OpenMPVersion = 11;
162164
std::string OMPHostIRFile = {};
163165
bool NoGPULib = false;
@@ -172,6 +174,9 @@ struct OffloadModuleOpts {
172174
module.getOperation())) {
173175
offloadMod.setIsTargetDevice(Opts.OpenMPIsTargetDevice);
174176
offloadMod.setIsGPU(Opts.OpenMPIsGPU);
177+
if (Opts.OpenMPForceUSM) {
178+
offloadMod.setRequires(mlir::omp::ClauseRequires::unified_shared_memory);
179+
}
175180
if (Opts.OpenMPIsTargetDevice) {
176181
offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription,
177182
Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState,

flang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -906,6 +906,9 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
906906
res.getLangOpts().OpenMPVersion, diags)) {
907907
res.getLangOpts().OpenMPVersion = Version;
908908
}
909+
if (args.hasArg(clang::driver::options::OPT_fopenmp_force_usm)) {
910+
res.getLangOpts().OpenMPForceUSM = 1;
911+
}
909912
if (args.hasArg(clang::driver::options::OPT_fopenmp_is_target_device)) {
910913
res.getLangOpts().OpenMPIsTargetDevice = 1;
911914

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2608,7 +2608,9 @@ void Fortran::lower::genOpenMPRequires(mlir::Operation *mod,
26082608
symbol->details());
26092609
}
26102610

2611-
MlirRequires mlirFlags = MlirRequires::none;
2611+
// Use pre-populated omp.requires module attribute if it was set, so that
2612+
// the "-fopenmp-force-usm" compiler option is honored.
2613+
MlirRequires mlirFlags = offloadMod.getRequires();
26122614
if (semaFlags.test(SemaRequires::ReverseOffload))
26132615
mlirFlags = mlirFlags | MlirRequires::reverse_offload;
26142616
if (semaFlags.test(SemaRequires::UnifiedAddress))

flang/test/Driver/omp-driver-offload.f90

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,3 +207,23 @@
207207
! RUN: --rocm-path=%S/Inputs/rocm %s 2>&1 \
208208
! RUN: | FileCheck --check-prefix=ROCM-PATH %s
209209
! ROCM-PATH: Found HIP installation: {{.*Inputs.*rocm}}, version 3.6.20214-a2917cd
210+
211+
! Test -fopenmp-force-usm option without offload
212+
! RUN: %flang -S -### %s -o %t 2>&1 \
213+
! RUN: -fopenmp -fopenmp-force-usm \
214+
! RUN: --target=aarch64-unknown-linux-gnu \
215+
! RUN: | FileCheck %s --check-prefix=FORCE-USM-NO-OFFLOAD
216+
217+
! FORCE-USM-NO-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
218+
! FORCE-USM-NO-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
219+
220+
! Test -fopenmp-force-usm option with offload
221+
! RUN: %flang -S -### %s -o %t 2>&1 \
222+
! RUN: -fopenmp -fopenmp-force-usm --offload-arch=gfx90a \
223+
! RUN: --target=aarch64-unknown-linux-gnu \
224+
! RUN: | FileCheck %s --check-prefix=FORCE-USM-OFFLOAD
225+
226+
! FORCE-USM-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
227+
! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
228+
! FORCE-USM-OFFLOAD-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa"
229+
! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"

flang/test/Lower/OpenMP/force-usm.f90

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s
2+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s
3+
! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
4+
! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
5+
6+
! This test checks the addition of requires unified_shared_memory when
7+
! -fopenmp-force-usm is set
8+
9+
!CHECK: module attributes {
10+
!CHECK-SAME: omp.requires = #omp<clause_requires unified_shared_memory>
11+
program requires
12+
end program requires
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s
2+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s
3+
! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
4+
! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
5+
6+
! This test checks the addition of requires unified_shared_memory when
7+
! -fopenmp-force-usm is set, even when other requires directives are present
8+
9+
!CHECK: module attributes {
10+
!CHECK-SAME: omp.requires = #omp<clause_requires reverse_offload|unified_shared_memory>
11+
program requires
12+
!$omp requires reverse_offload
13+
!$omp target
14+
!$omp end target
15+
end program requires

flang/tools/bbc/bbc.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,11 @@ static llvm::cl::opt<bool>
144144
llvm::cl::desc("enable openmp GPU target codegen"),
145145
llvm::cl::init(false));
146146

147+
static llvm::cl::opt<bool> enableOpenMPForceUSM(
148+
"fopenmp-force-usm",
149+
llvm::cl::desc("force openmp unified shared memory mode"),
150+
llvm::cl::init(false));
151+
147152
// A simplified subset of the OpenMP RTL Flags from Flang, only the primary
148153
// positive options are available, no negative options e.g. fopen_assume* vs
149154
// fno_open_assume*
@@ -374,11 +379,11 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
374379
"-fopenmp-is-target-device is also set";
375380
return mlir::failure();
376381
}
377-
auto offloadModuleOpts =
378-
OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription,
379-
setOpenMPThreadSubscription, setOpenMPNoThreadState,
380-
setOpenMPNoNestedParallelism, enableOpenMPDevice,
381-
enableOpenMPGPU, setOpenMPVersion, "", setNoGPULib);
382+
auto offloadModuleOpts = OffloadModuleOpts(
383+
setOpenMPTargetDebug, setOpenMPTeamSubscription,
384+
setOpenMPThreadSubscription, setOpenMPNoThreadState,
385+
setOpenMPNoNestedParallelism, enableOpenMPDevice, enableOpenMPGPU,
386+
enableOpenMPForceUSM, setOpenMPVersion, "", setNoGPULib);
382387
setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts);
383388
setOpenMPVersionAttribute(mlirModule, setOpenMPVersion);
384389
}

0 commit comments

Comments
 (0)