-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Flang][OpenMP] Add -fopenmp-force-usm option to flang #94359
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-fir-hlfir @llvm/pr-subscribers-clang-driver Author: Sergio Afonso (skatrak) ChangesThis patch enables the This is later combined with any other target device-related REQUIRES clauses that may have been explicitly set in the compilation unit. Full diff: https://github.com/llvm/llvm-project/pull/94359.diff 10 Files Affected:
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 57f37c5023110..eefec33e6d333 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3574,7 +3574,7 @@ def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group<
HelpText<"Do not create a host fallback if offloading to the device fails.">,
MarshallingInfoFlag<LangOpts<"OpenMPOffloadMandatory">>;
def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>,
- Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>,
+ Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
HelpText<"Force behvaior as if the user specified pragma omp requires unified_shared_memory.">,
MarshallingInfoFlag<LangOpts<"OpenMPForceUSM">>;
def fopenmp_target_jit : Flag<["-"], "fopenmp-target-jit">, Group<f_Group>,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 42ca060186fd8..b7abe7b1c19bc 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -766,6 +766,8 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
// FIXME: Clang supports a whole bunch more flags here.
+ if (Args.hasArg(options::OPT_fopenmp_force_usm))
+ CmdArgs.push_back("-fopenmp-force-usm");
break;
default:
// By default, if Clang doesn't know how to generate useful OpenMP code
diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def
index 2bf10826120a8..d3e1e972d1519 100644
--- a/flang/include/flang/Frontend/LangOptions.def
+++ b/flang/include/flang/Frontend/LangOptions.def
@@ -42,6 +42,8 @@ LANGOPT(OpenMPVersion, 32, 0)
LANGOPT(OpenMPIsTargetDevice, 1, false)
/// Generate OpenMP target code only for GPUs
LANGOPT(OpenMPIsGPU, 1, false)
+/// Generate OpenMP target code only for GPUs
+LANGOPT(OpenMPForceUSM, 1, false)
/// Enable debugging in the OpenMP offloading device RTL
LANGOPT(OpenMPTargetDebug, 32, 0)
/// Assume work-shared loops do not have more iterations than participating
diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h
index 77b68fc6187fa..26fbe51d329c7 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -130,16 +130,16 @@ struct OffloadModuleOpts {
OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription,
bool OpenMPThreadSubscription, bool OpenMPNoThreadState,
bool OpenMPNoNestedParallelism, bool OpenMPIsTargetDevice,
- bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {},
- bool NoGPULib = false)
+ bool OpenMPIsGPU, bool OpenMPForceUSM, uint32_t OpenMPVersion,
+ std::string OMPHostIRFile = {}, bool NoGPULib = false)
: OpenMPTargetDebug(OpenMPTargetDebug),
OpenMPTeamSubscription(OpenMPTeamSubscription),
OpenMPThreadSubscription(OpenMPThreadSubscription),
OpenMPNoThreadState(OpenMPNoThreadState),
OpenMPNoNestedParallelism(OpenMPNoNestedParallelism),
OpenMPIsTargetDevice(OpenMPIsTargetDevice), OpenMPIsGPU(OpenMPIsGPU),
- OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile),
- NoGPULib(NoGPULib) {}
+ OpenMPForceUSM(OpenMPForceUSM), OpenMPVersion(OpenMPVersion),
+ OMPHostIRFile(OMPHostIRFile), NoGPULib(NoGPULib) {}
OffloadModuleOpts(Fortran::frontend::LangOptions &Opts)
: OpenMPTargetDebug(Opts.OpenMPTargetDebug),
@@ -148,8 +148,9 @@ struct OffloadModuleOpts {
OpenMPNoThreadState(Opts.OpenMPNoThreadState),
OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism),
OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice),
- OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPVersion(Opts.OpenMPVersion),
- OMPHostIRFile(Opts.OMPHostIRFile), NoGPULib(Opts.NoGPULib) {}
+ OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPForceUSM(Opts.OpenMPForceUSM),
+ OpenMPVersion(Opts.OpenMPVersion), OMPHostIRFile(Opts.OMPHostIRFile),
+ NoGPULib(Opts.NoGPULib) {}
uint32_t OpenMPTargetDebug = 0;
bool OpenMPTeamSubscription = false;
@@ -158,6 +159,7 @@ struct OffloadModuleOpts {
bool OpenMPNoNestedParallelism = false;
bool OpenMPIsTargetDevice = false;
bool OpenMPIsGPU = false;
+ bool OpenMPForceUSM = false;
uint32_t OpenMPVersion = 11;
std::string OMPHostIRFile = {};
bool NoGPULib = false;
@@ -172,13 +174,17 @@ struct OffloadModuleOpts {
module.getOperation())) {
offloadMod.setIsTargetDevice(Opts.OpenMPIsTargetDevice);
offloadMod.setIsGPU(Opts.OpenMPIsGPU);
+ if (Opts.OpenMPForceUSM) {
+ offloadMod.setRequires(mlir::omp::ClauseRequires::unified_shared_memory);
+ }
if (Opts.OpenMPIsTargetDevice) {
offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription,
Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState,
Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion, Opts.NoGPULib);
- if (!Opts.OMPHostIRFile.empty())
+ if (!Opts.OMPHostIRFile.empty()) {
offloadMod.setHostIRFilePath(Opts.OMPHostIRFile);
+ }
}
}
}
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 50c3e8b0113b5..f64a939b785ef 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -906,6 +906,9 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
res.getLangOpts().OpenMPVersion, diags)) {
res.getLangOpts().OpenMPVersion = Version;
}
+ if (args.hasArg(clang::driver::options::OPT_fopenmp_force_usm)) {
+ res.getLangOpts().OpenMPForceUSM = 1;
+ }
if (args.hasArg(clang::driver::options::OPT_fopenmp_is_target_device)) {
res.getLangOpts().OpenMPIsTargetDevice = 1;
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 9598457d123cf..af9e2af24619b 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -2608,7 +2608,9 @@ void Fortran::lower::genOpenMPRequires(mlir::Operation *mod,
symbol->details());
}
- MlirRequires mlirFlags = MlirRequires::none;
+ // Use pre-populated omp.requires module attribute if it was set, so that
+ // the "-fopenmp-force-usm" compiler option is honored.
+ MlirRequires mlirFlags = offloadMod.getRequires();
if (semaFlags.test(SemaRequires::ReverseOffload))
mlirFlags = mlirFlags | MlirRequires::reverse_offload;
if (semaFlags.test(SemaRequires::UnifiedAddress))
diff --git a/flang/test/Driver/omp-driver-offload.f90 b/flang/test/Driver/omp-driver-offload.f90
index 8f48ca75114ce..6fb4f4eeeeca1 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -207,3 +207,23 @@
! RUN: --rocm-path=%S/Inputs/rocm %s 2>&1 \
! RUN: | FileCheck --check-prefix=ROCM-PATH %s
! ROCM-PATH: Found HIP installation: {{.*Inputs.*rocm}}, version 3.6.20214-a2917cd
+
+! Test -fopenmp-force-usm option without offload
+! RUN: %flang -S -### %s -o %t 2>&1 \
+! RUN: -fopenmp -fopenmp-force-usm \
+! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: | FileCheck %s --check-prefix=FORCE-USM-NO-OFFLOAD
+
+! FORCE-USM-NO-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
+! FORCE-USM-NO-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
+
+! Test -fopenmp-force-usm option with offload
+! RUN: %flang -S -### %s -o %t 2>&1 \
+! RUN: -fopenmp -fopenmp-force-usm --offload-arch=gfx90a \
+! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: | FileCheck %s --check-prefix=FORCE-USM-OFFLOAD
+
+! FORCE-USM-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
+! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
+! FORCE-USM-OFFLOAD-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa"
+! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
diff --git a/flang/test/Lower/OpenMP/force-usm.f90 b/flang/test/Lower/OpenMP/force-usm.f90
new file mode 100644
index 0000000000000..90bbf3c4d842f
--- /dev/null
+++ b/flang/test/Lower/OpenMP/force-usm.f90
@@ -0,0 +1,12 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
+
+! This test checks the addition of requires unified_shared_memory when
+! -fopenmp-force-usm is set
+
+!CHECK: module attributes {
+!CHECK-SAME: omp.requires = #omp<clause_requires unified_shared_memory>
+program requires
+end program requires
diff --git a/flang/test/Lower/OpenMP/requires-force-usm.f90 b/flang/test/Lower/OpenMP/requires-force-usm.f90
new file mode 100644
index 0000000000000..5f5cf9e64cd70
--- /dev/null
+++ b/flang/test/Lower/OpenMP/requires-force-usm.f90
@@ -0,0 +1,15 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
+
+! This test checks the addition of requires unified_shared_memory when
+! -fopenmp-force-usm is set, even when other requires directives are present
+
+!CHECK: module attributes {
+!CHECK-SAME: omp.requires = #omp<clause_requires reverse_offload|unified_shared_memory>
+program requires
+ !$omp requires reverse_offload
+ !$omp target
+ !$omp end target
+end program requires
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index bab21338cef26..3485c1499d3b6 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -144,6 +144,11 @@ static llvm::cl::opt<bool>
llvm::cl::desc("enable openmp GPU target codegen"),
llvm::cl::init(false));
+static llvm::cl::opt<bool> enableOpenMPForceUSM(
+ "fopenmp-force-usm",
+ llvm::cl::desc("force openmp unified shared memory mode"),
+ llvm::cl::init(false));
+
// A simplified subset of the OpenMP RTL Flags from Flang, only the primary
// positive options are available, no negative options e.g. fopen_assume* vs
// fno_open_assume*
@@ -374,11 +379,11 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
"-fopenmp-is-target-device is also set";
return mlir::failure();
}
- auto offloadModuleOpts =
- OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription,
- setOpenMPThreadSubscription, setOpenMPNoThreadState,
- setOpenMPNoNestedParallelism, enableOpenMPDevice,
- enableOpenMPGPU, setOpenMPVersion, "", setNoGPULib);
+ auto offloadModuleOpts = OffloadModuleOpts(
+ setOpenMPTargetDebug, setOpenMPTeamSubscription,
+ setOpenMPThreadSubscription, setOpenMPNoThreadState,
+ setOpenMPNoNestedParallelism, enableOpenMPDevice, enableOpenMPGPU,
+ enableOpenMPForceUSM, setOpenMPVersion, "", setNoGPULib);
setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts);
setOpenMPVersionAttribute(mlirModule, setOpenMPVersion);
}
|
@llvm/pr-subscribers-clang Author: Sergio Afonso (skatrak) ChangesThis patch enables the This is later combined with any other target device-related REQUIRES clauses that may have been explicitly set in the compilation unit. Full diff: https://github.com/llvm/llvm-project/pull/94359.diff 10 Files Affected:
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 57f37c5023110..eefec33e6d333 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3574,7 +3574,7 @@ def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group<
HelpText<"Do not create a host fallback if offloading to the device fails.">,
MarshallingInfoFlag<LangOpts<"OpenMPOffloadMandatory">>;
def fopenmp_force_usm : Flag<["-"], "fopenmp-force-usm">, Group<f_Group>,
- Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option]>,
+ Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
HelpText<"Force behvaior as if the user specified pragma omp requires unified_shared_memory.">,
MarshallingInfoFlag<LangOpts<"OpenMPForceUSM">>;
def fopenmp_target_jit : Flag<["-"], "fopenmp-target-jit">, Group<f_Group>,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 42ca060186fd8..b7abe7b1c19bc 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -766,6 +766,8 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
// FIXME: Clang supports a whole bunch more flags here.
+ if (Args.hasArg(options::OPT_fopenmp_force_usm))
+ CmdArgs.push_back("-fopenmp-force-usm");
break;
default:
// By default, if Clang doesn't know how to generate useful OpenMP code
diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def
index 2bf10826120a8..d3e1e972d1519 100644
--- a/flang/include/flang/Frontend/LangOptions.def
+++ b/flang/include/flang/Frontend/LangOptions.def
@@ -42,6 +42,8 @@ LANGOPT(OpenMPVersion, 32, 0)
LANGOPT(OpenMPIsTargetDevice, 1, false)
/// Generate OpenMP target code only for GPUs
LANGOPT(OpenMPIsGPU, 1, false)
+/// Generate OpenMP target code only for GPUs
+LANGOPT(OpenMPForceUSM, 1, false)
/// Enable debugging in the OpenMP offloading device RTL
LANGOPT(OpenMPTargetDebug, 32, 0)
/// Assume work-shared loops do not have more iterations than participating
diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h
index 77b68fc6187fa..26fbe51d329c7 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -130,16 +130,16 @@ struct OffloadModuleOpts {
OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription,
bool OpenMPThreadSubscription, bool OpenMPNoThreadState,
bool OpenMPNoNestedParallelism, bool OpenMPIsTargetDevice,
- bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {},
- bool NoGPULib = false)
+ bool OpenMPIsGPU, bool OpenMPForceUSM, uint32_t OpenMPVersion,
+ std::string OMPHostIRFile = {}, bool NoGPULib = false)
: OpenMPTargetDebug(OpenMPTargetDebug),
OpenMPTeamSubscription(OpenMPTeamSubscription),
OpenMPThreadSubscription(OpenMPThreadSubscription),
OpenMPNoThreadState(OpenMPNoThreadState),
OpenMPNoNestedParallelism(OpenMPNoNestedParallelism),
OpenMPIsTargetDevice(OpenMPIsTargetDevice), OpenMPIsGPU(OpenMPIsGPU),
- OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile),
- NoGPULib(NoGPULib) {}
+ OpenMPForceUSM(OpenMPForceUSM), OpenMPVersion(OpenMPVersion),
+ OMPHostIRFile(OMPHostIRFile), NoGPULib(NoGPULib) {}
OffloadModuleOpts(Fortran::frontend::LangOptions &Opts)
: OpenMPTargetDebug(Opts.OpenMPTargetDebug),
@@ -148,8 +148,9 @@ struct OffloadModuleOpts {
OpenMPNoThreadState(Opts.OpenMPNoThreadState),
OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism),
OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice),
- OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPVersion(Opts.OpenMPVersion),
- OMPHostIRFile(Opts.OMPHostIRFile), NoGPULib(Opts.NoGPULib) {}
+ OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPForceUSM(Opts.OpenMPForceUSM),
+ OpenMPVersion(Opts.OpenMPVersion), OMPHostIRFile(Opts.OMPHostIRFile),
+ NoGPULib(Opts.NoGPULib) {}
uint32_t OpenMPTargetDebug = 0;
bool OpenMPTeamSubscription = false;
@@ -158,6 +159,7 @@ struct OffloadModuleOpts {
bool OpenMPNoNestedParallelism = false;
bool OpenMPIsTargetDevice = false;
bool OpenMPIsGPU = false;
+ bool OpenMPForceUSM = false;
uint32_t OpenMPVersion = 11;
std::string OMPHostIRFile = {};
bool NoGPULib = false;
@@ -172,13 +174,17 @@ struct OffloadModuleOpts {
module.getOperation())) {
offloadMod.setIsTargetDevice(Opts.OpenMPIsTargetDevice);
offloadMod.setIsGPU(Opts.OpenMPIsGPU);
+ if (Opts.OpenMPForceUSM) {
+ offloadMod.setRequires(mlir::omp::ClauseRequires::unified_shared_memory);
+ }
if (Opts.OpenMPIsTargetDevice) {
offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription,
Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState,
Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion, Opts.NoGPULib);
- if (!Opts.OMPHostIRFile.empty())
+ if (!Opts.OMPHostIRFile.empty()) {
offloadMod.setHostIRFilePath(Opts.OMPHostIRFile);
+ }
}
}
}
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 50c3e8b0113b5..f64a939b785ef 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -906,6 +906,9 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
res.getLangOpts().OpenMPVersion, diags)) {
res.getLangOpts().OpenMPVersion = Version;
}
+ if (args.hasArg(clang::driver::options::OPT_fopenmp_force_usm)) {
+ res.getLangOpts().OpenMPForceUSM = 1;
+ }
if (args.hasArg(clang::driver::options::OPT_fopenmp_is_target_device)) {
res.getLangOpts().OpenMPIsTargetDevice = 1;
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 9598457d123cf..af9e2af24619b 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -2608,7 +2608,9 @@ void Fortran::lower::genOpenMPRequires(mlir::Operation *mod,
symbol->details());
}
- MlirRequires mlirFlags = MlirRequires::none;
+ // Use pre-populated omp.requires module attribute if it was set, so that
+ // the "-fopenmp-force-usm" compiler option is honored.
+ MlirRequires mlirFlags = offloadMod.getRequires();
if (semaFlags.test(SemaRequires::ReverseOffload))
mlirFlags = mlirFlags | MlirRequires::reverse_offload;
if (semaFlags.test(SemaRequires::UnifiedAddress))
diff --git a/flang/test/Driver/omp-driver-offload.f90 b/flang/test/Driver/omp-driver-offload.f90
index 8f48ca75114ce..6fb4f4eeeeca1 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -207,3 +207,23 @@
! RUN: --rocm-path=%S/Inputs/rocm %s 2>&1 \
! RUN: | FileCheck --check-prefix=ROCM-PATH %s
! ROCM-PATH: Found HIP installation: {{.*Inputs.*rocm}}, version 3.6.20214-a2917cd
+
+! Test -fopenmp-force-usm option without offload
+! RUN: %flang -S -### %s -o %t 2>&1 \
+! RUN: -fopenmp -fopenmp-force-usm \
+! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: | FileCheck %s --check-prefix=FORCE-USM-NO-OFFLOAD
+
+! FORCE-USM-NO-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
+! FORCE-USM-NO-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
+
+! Test -fopenmp-force-usm option with offload
+! RUN: %flang -S -### %s -o %t 2>&1 \
+! RUN: -fopenmp -fopenmp-force-usm --offload-arch=gfx90a \
+! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: | FileCheck %s --check-prefix=FORCE-USM-OFFLOAD
+
+! FORCE-USM-OFFLOAD: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu"
+! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
+! FORCE-USM-OFFLOAD-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa"
+! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
diff --git a/flang/test/Lower/OpenMP/force-usm.f90 b/flang/test/Lower/OpenMP/force-usm.f90
new file mode 100644
index 0000000000000..90bbf3c4d842f
--- /dev/null
+++ b/flang/test/Lower/OpenMP/force-usm.f90
@@ -0,0 +1,12 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
+
+! This test checks the addition of requires unified_shared_memory when
+! -fopenmp-force-usm is set
+
+!CHECK: module attributes {
+!CHECK-SAME: omp.requires = #omp<clause_requires unified_shared_memory>
+program requires
+end program requires
diff --git a/flang/test/Lower/OpenMP/requires-force-usm.f90 b/flang/test/Lower/OpenMP/requires-force-usm.f90
new file mode 100644
index 0000000000000..5f5cf9e64cd70
--- /dev/null
+++ b/flang/test/Lower/OpenMP/requires-force-usm.f90
@@ -0,0 +1,15 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-force-usm %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device -fopenmp-force-usm %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
+! RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-force-usm -emit-hlfir %s -o - | FileCheck %s
+
+! This test checks the addition of requires unified_shared_memory when
+! -fopenmp-force-usm is set, even when other requires directives are present
+
+!CHECK: module attributes {
+!CHECK-SAME: omp.requires = #omp<clause_requires reverse_offload|unified_shared_memory>
+program requires
+ !$omp requires reverse_offload
+ !$omp target
+ !$omp end target
+end program requires
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index bab21338cef26..3485c1499d3b6 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -144,6 +144,11 @@ static llvm::cl::opt<bool>
llvm::cl::desc("enable openmp GPU target codegen"),
llvm::cl::init(false));
+static llvm::cl::opt<bool> enableOpenMPForceUSM(
+ "fopenmp-force-usm",
+ llvm::cl::desc("force openmp unified shared memory mode"),
+ llvm::cl::init(false));
+
// A simplified subset of the OpenMP RTL Flags from Flang, only the primary
// positive options are available, no negative options e.g. fopen_assume* vs
// fno_open_assume*
@@ -374,11 +379,11 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
"-fopenmp-is-target-device is also set";
return mlir::failure();
}
- auto offloadModuleOpts =
- OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription,
- setOpenMPThreadSubscription, setOpenMPNoThreadState,
- setOpenMPNoNestedParallelism, enableOpenMPDevice,
- enableOpenMPGPU, setOpenMPVersion, "", setNoGPULib);
+ auto offloadModuleOpts = OffloadModuleOpts(
+ setOpenMPTargetDebug, setOpenMPTeamSubscription,
+ setOpenMPThreadSubscription, setOpenMPNoThreadState,
+ setOpenMPNoNestedParallelism, enableOpenMPDevice, enableOpenMPGPU,
+ enableOpenMPForceUSM, setOpenMPVersion, "", setNoGPULib);
setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts);
setOpenMPVersionAttribute(mlirModule, setOpenMPVersion);
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This patch enables the `-fopenmp-force-usm` option to be passed to the flang driver, which forwards it to the compiler frontend. This flag, when set, results in the introduction of the `unified_shared_memory` bit to the `omp.requires` attribute of the top-level module operation. This is later combined with any other target device-related REQUIRES clauses that may have been explicitly set in the compilation unit.
Thank your for the reviews. Merging now since buildbot failure is unrelated. |
This patch enables the
-fopenmp-force-usm
option to be passed to the flang driver, which forwards it to the compiler frontend. This flag, when set, results in the introduction of theunified_shared_memory
bit to theomp.requires
attribute of the top-level module operation.This is later combined with any other target device-related REQUIRES clauses that may have been explicitly set in the compilation unit.