-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MLIR][OpenMP][OMPIRBuilder] Add lowering support for omp.target_triples #100156
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-llvm Author: Sergio Afonso (skatrak) ChangesThis patch modifies MLIR to LLVM IR lowering of the OpenMP dialect to take into consideration the contents of the It adds the The result of this change is that offloading calls are only generated when the Several unit tests impacted by these changes, which are intended to check host code generated for Patch is 21.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100156.diff 14 Files Affected:
diff --git a/flang/test/Integration/OpenMP/map-types-and-sizes.f90 b/flang/test/Integration/OpenMP/map-types-and-sizes.f90
index 591be0b680a51..055fdecc91464 100644
--- a/flang/test/Integration/OpenMP/map-types-and-sizes.f90
+++ b/flang/test/Integration/OpenMP/map-types-and-sizes.f90
@@ -6,7 +6,7 @@
! added to this directory and sub-directories.
!===----------------------------------------------------------------------===!
-!RUN: %flang_fc1 -emit-llvm -fopenmp -flang-deprecated-no-hlfir %s -o - | FileCheck %s
+!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -flang-deprecated-no-hlfir %s -o - | FileCheck %s
!===============================================================================
! Check MapTypes for target implicit captures
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index a6995888de7d4..dbfe95790f4de 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -115,6 +115,10 @@ class OpenMPIRBuilderConfig {
// Grid Value for the GPU target
std::optional<omp::GV> GridValue;
+ /// When compilation is being done for the OpenMP host (i.e. `IsTargetDevice =
+ /// false`), this contains the list of offloading triples associated, if any.
+ SmallVector<Triple> TargetTriples;
+
OpenMPIRBuilderConfig();
OpenMPIRBuilderConfig(bool IsTargetDevice, bool IsGPU,
bool OpenMPOffloadMandatory,
@@ -2815,6 +2819,7 @@ class OpenMPIRBuilder {
/// Generator for '#omp target'
///
/// \param Loc where the target data construct was encountered.
+ /// \param IsOffloadEntry whether it is an offload entry.
/// \param CodeGenIP The insertion point where the call to the outlined
/// function should be emitted.
/// \param EntryInfo The entry information about the function.
@@ -2828,6 +2833,7 @@ class OpenMPIRBuilder {
/// \param Dependencies A vector of DependData objects that carry
// dependency information as passed in the depend clause
InsertPointTy createTarget(const LocationDescription &Loc,
+ bool IsOffloadEntry,
OpenMPIRBuilder::InsertPointTy AllocaIP,
OpenMPIRBuilder::InsertPointTy CodeGenIP,
TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index ab2482b91213b..6aa8faaf67f85 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -6769,7 +6769,7 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder,
return ProxyFn;
}
static void emitTargetOutlinedFunction(
- OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
+ OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry,
TargetRegionEntryInfo &EntryInfo, Function *&OutlinedFn,
Constant *&OutlinedFnID, SmallVectorImpl<Value *> &Inputs,
OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
@@ -6782,8 +6782,8 @@ static void emitTargetOutlinedFunction(
CBFunc, ArgAccessorFuncCB);
};
- OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, true,
- OutlinedFn, OutlinedFnID);
+ OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
+ IsOffloadEntry, OutlinedFn, OutlinedFnID);
}
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
Function *OutlinedFn, Value *OutlinedFnID,
@@ -7053,6 +7053,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
<< "\n");
return Builder.saveIP();
}
+
static void emitTargetCall(
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
OpenMPIRBuilder::InsertPointTy AllocaIP, Function *OutlinedFn,
@@ -7060,6 +7061,20 @@ static void emitTargetCall(
SmallVectorImpl<Value *> &Args,
OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
SmallVector<llvm::OpenMPIRBuilder::DependData> Dependencies = {}) {
+ // emitKernelLaunch
+ auto &&EmitTargetCallFallbackCB =
+ [&](OpenMPIRBuilder::InsertPointTy IP) -> OpenMPIRBuilder::InsertPointTy {
+ Builder.restoreIP(IP);
+ Builder.CreateCall(OutlinedFn, Args);
+ return Builder.saveIP();
+ };
+
+ // If we don't have an ID for the target region, it means an offload entry
+ // wasn't created. In this case we just run the host fallback directly.
+ if (!OutlinedFnID) {
+ Builder.restoreIP(EmitTargetCallFallbackCB(Builder.saveIP()));
+ return;
+ }
OpenMPIRBuilder::TargetDataInfo Info(
/*RequiresDevicePointerInfo=*/false,
@@ -7073,14 +7088,6 @@ static void emitTargetCall(
OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info,
!MapInfo.Names.empty());
- // emitKernelLaunch
- auto &&EmitTargetCallFallbackCB =
- [&](OpenMPIRBuilder::InsertPointTy IP) -> OpenMPIRBuilder::InsertPointTy {
- Builder.restoreIP(IP);
- Builder.CreateCall(OutlinedFn, Args);
- return Builder.saveIP();
- };
-
unsigned NumTargetItems = MapInfo.BasePointers.size();
// TODO: Use correct device ID
Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF);
@@ -7116,7 +7123,7 @@ static void emitTargetCall(
}
}
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTarget(
- const LocationDescription &Loc, InsertPointTy AllocaIP,
+ const LocationDescription &Loc, bool IsOffloadEntry, InsertPointTy AllocaIP,
InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
int32_t NumThreads, SmallVectorImpl<Value *> &Args,
GenMapInfoCallbackTy GenMapInfoCB,
@@ -7130,12 +7137,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTarget(
Builder.restoreIP(CodeGenIP);
Function *OutlinedFn;
- Constant *OutlinedFnID;
+ Constant *OutlinedFnID = nullptr;
// The target region is outlined into its own function. The LLVM IR for
// the target region itself is generated using the callbacks CBFunc
// and ArgAccessorFuncCB
- emitTargetOutlinedFunction(*this, Builder, EntryInfo, OutlinedFn,
- OutlinedFnID, Args, CBFunc, ArgAccessorFuncCB);
+ emitTargetOutlinedFunction(*this, Builder, IsOffloadEntry, EntryInfo,
+ OutlinedFn, OutlinedFnID, Args, CBFunc,
+ ArgAccessorFuncCB);
// If we are not on the target device, then we need to generate code
// to make a remote call (offload) to the previously outlined function
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 8653bbd3d38fd..d6213e84bbbb5 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -5983,8 +5983,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) {
TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17);
OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL});
Builder.restoreIP(OMPBuilder.createTarget(
- OmpLoc, Builder.saveIP(), Builder.saveIP(), EntryInfo, -1, 0, Inputs,
- GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
+ OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), Builder.saveIP(),
+ EntryInfo, -1, 0, Inputs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
OMPBuilder.finalize();
Builder.CreateRetVoid();
@@ -6087,7 +6087,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
/*Line=*/3, /*Count=*/0);
Builder.restoreIP(
- OMPBuilder.createTarget(Loc, EntryIP, EntryIP, EntryInfo, /*NumTeams=*/-1,
+ OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
+ EntryInfo, /*NumTeams=*/-1,
/*NumThreads=*/0, CapturedArgs, GenMapInfoCB,
BodyGenCB, SimpleArgAccessorCB));
@@ -6235,7 +6236,8 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
/*Line=*/3, /*Count=*/0);
Builder.restoreIP(
- OMPBuilder.createTarget(Loc, EntryIP, EntryIP, EntryInfo, /*NumTeams=*/-1,
+ OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
+ EntryInfo, /*NumTeams=*/-1,
/*NumThreads=*/0, CapturedArgs, GenMapInfoCB,
BodyGenCB, SimpleArgAccessorCB));
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 8b031deca8931..5ebcec0156d3d 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3196,6 +3196,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
if (!targetOpSupported(opInst))
return failure();
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ bool isTargetDevice = ompBuilder->Config.isTargetDevice();
auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
auto targetOp = cast<omp::TargetOp>(opInst);
auto &targetRegion = targetOp.getRegion();
@@ -3203,6 +3205,11 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
SmallVector<Value> mapOperands = targetOp.getMapOperands();
llvm::Function *llvmOutlinedFn = nullptr;
+ // TODO: It can also be false if a compile-time constant `false` IF clause is
+ // specified.
+ bool isOffloadEntry =
+ isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
+
LogicalResult bodyGenStatus = success();
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
auto bodyCB = [&](InsertPointTy allocaIP,
@@ -3270,14 +3277,12 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
llvm::Value *&retVal, InsertPointTy allocaIP,
InsertPointTy codeGenIP) {
- llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
-
// We just return the unaltered argument for the host function
// for now, some alterations may be required in the future to
// keep host fallback functions working identically to the device
// version (e.g. pass ByCopy values should be treated as such on
// host and device, currently not always the case)
- if (!ompBuilder->Config.isTargetDevice()) {
+ if (!isTargetDevice) {
retVal = cast<llvm::Value>(&arg);
return codeGenIP;
}
@@ -3303,9 +3308,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
moduleTranslation, dds);
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget(
- ompLoc, allocaIP, builder.saveIP(), entryInfo, defaultValTeams,
- defaultValThreads, kernelInput, genMapInfoCB, bodyCB, argAccessorCB,
- dds));
+ ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), entryInfo,
+ defaultValTeams, defaultValThreads, kernelInput, genMapInfoCB, bodyCB,
+ argAccessorCB, dds));
// Remap access operations to declare target reference pointers for the
// device, essentially generating extra loadop's as necessary
@@ -3679,6 +3684,23 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
}
return failure();
})
+ .Case("omp.target_triples",
+ [&](Attribute attr) {
+ if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
+ llvm::OpenMPIRBuilderConfig &config =
+ moduleTranslation.getOpenMPBuilder()->Config;
+ config.TargetTriples.clear();
+ config.TargetTriples.reserve(triplesAttr.size());
+ for (Attribute tripleAttr : triplesAttr) {
+ if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
+ config.TargetTriples.emplace_back(tripleStrAttr.getValue());
+ else
+ return failure();
+ }
+ return success();
+ }
+ return failure();
+ })
.Default([](Attribute) {
// Fall through for omp attributes that do not require lowering.
return success();
diff --git a/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir b/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir
index 0016a1f05a2b1..a14214cd8c1cb 100644
--- a/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir
@@ -7,7 +7,7 @@
// array bounds to lower to the full size of the array and the sectioned
// array to be the size of 3*3*1*element-byte-size (36 bytes in this case).
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_3d_target_array_section() {
%0 = llvm.mlir.addressof @_QFEinarray : !llvm.ptr
%1 = llvm.mlir.addressof @_QFEoutarray : !llvm.ptr
diff --git a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir
index 8635ea4956706..7c494e80155bb 100644
--- a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir
@@ -1,6 +1,6 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
%0 = llvm.mlir.addressof @_QFEi : !llvm.ptr
%1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
diff --git a/mlir/test/Target/LLVMIR/omptarget-depend.mlir b/mlir/test/Target/LLVMIR/omptarget-depend.mlir
index c386342005e5e..c66fe8f455dfb 100644
--- a/mlir/test/Target/LLVMIR/omptarget-depend.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-depend.mlir
@@ -1,4 +1,6 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
%0 = llvm.mlir.constant(39 : index) : i64
%1 = llvm.mlir.constant(0 : index) : i64
@@ -117,6 +119,7 @@
llvm.call @_FortranAProgramEndStatement() {fastmathFlags = #llvm.fastmath<contract>} : () -> ()
llvm.return %0 : i32
}
+}
// %strucArg holds pointers to shared data.
// CHECK: define void @_QQmain() {
diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
index 9b46f84e5050f..f0e301bd70e3b 100644
--- a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
@@ -6,7 +6,7 @@
// alongside the omp.map.info, the test utilises mapping of array sections,
// full arrays and individual allocated scalars.
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_QQmain() {
%0 = llvm.mlir.constant(5 : index) : i64
%1 = llvm.mlir.constant(2 : index) : i64
diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-common-block-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-common-block-host.mlir
index 7273f53d0a3db..396628e1081e9 100644
--- a/mlir/test/Target/LLVMIR/omptarget-fortran-common-block-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-fortran-common-block-host.mlir
@@ -5,7 +5,7 @@
// to LLVM-IR from MLIR when a fortran common block is lowered alongside
// the omp.map.info.
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @omp_map_common_block_using_common_block_members() {
%0 = llvm.mlir.constant(4 : index) : i64
%1 = llvm.mlir.constant(0 : index) : i64
diff --git a/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
index e4d82d4a58c89..8cec94abf968b 100644
--- a/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
@@ -7,7 +7,7 @@
// derived type) where members of both the nested and outer record type have
// members mapped.
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_QQmain() {
%0 = llvm.mlir.constant(10 : index) : i64
%1 = llvm.mlir.constant(4 : index) : i64
diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
index c7a87e44d6537..bbfcb4eecb3e8 100644
--- a/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
@@ -6,7 +6,7 @@
// (C++/C class/structure, Fortran derived type) where only members of the record
// type are mapped.
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_QQmain() {
%0 = llvm.mlir.constant(10 : index) : i64
%1 = llvm.mlir.constant(4 : index) : i64
diff --git a/mlir/test/Target/LLVMIR/omptarget-region-host-only.mlir b/mlir/test/Target/LLVMIR/omptarget-region-host-only.mlir
new file mode 100644
index 0000000000000..61b6f3b91cd79
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-region-host-only.mlir
@@ -0,0 +1,54 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {omp.is_target_device = false} {
+ llvm.func @omp_target_region_() {
+ %0 = llvm.mlir.constant(20 : i32) : i32
+ %1 = llvm.mlir.constant(10 : i32) : i32
+ %2 = llvm.mlir.constant(1 : i64) : i64
+ %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr
+ %4 = llvm.mlir.constant(1 : i64) : i64
+ %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr
+ %6 = llvm.mlir.constant(1 : i64) : i64
+ %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr
+ llvm.store %1, %3 : i32, !llvm.ptr
+ llvm.store %0, %5 : i32, !llvm.ptr
+ %map1 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ %map3 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr):
+ %8 = llvm.load %arg0 : !llvm.ptr -> i32
+ %9 = llvm.load %arg1 : !llvm.ptr -> i32
+ %10 = llvm.add %8, %9 : i32
+ llvm.store %10, %arg2 : i32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @omp_target_no_map() {
+ omp.target {
+ omp.terminator
+ }
+ llvm.return
+ }
+}
+
+// CHECK: define void @omp_target_region_()
+// CHECK-NOT: call i32 @__tgt_target_kernel({{.*}})
+// CHECK: call void @__omp_offloading_[[DEV:.*]]_[[FIL:.*]]_omp_target_region__l[[LINE1:.*]](ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
+// CHECK-NEXT: ret void
+
+// CHECK: define void @omp_target_no_map()
+// CHECK-NOT: call i32 @__tgt_target_kernel({{.*}})
+// CHECK: call void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_no_map_l[[LINE2:.*]]()
+// CHECK-NEXT: ret void
+
+// CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LIN...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you, @skatrak - Just a minor nit and and I opened a linked issue so that the exact linker error message is captured for posterity.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you @bhandarkar-pranav for the review, you comment should be addressed now. I'll rebase the whole PR stack once I check that the buildbot issue caused by the first is resolved. Then, this one should be green too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
e2fe2f8
to
f16bc33
Compare
c17698c
to
9094873
Compare
// Generate a function call to the host fallback implementation of the target | ||
// region. This is called by the host when no offload entry was generated for | ||
// the target region and when the offloading call fails at runtime. | ||
auto &&EmitTargetCallFallbackCB = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just a note here, that if (!OutlinedFnID)
, then we cannot simply inline a call to OutlinedFn
. We need to the following check
if (!OutlinedFnID) {
if(RequiresOuterTargetTask)
Builder.restoreIP(emitTargetTask(...));
else
Builder.restoreIP(EmitTargetCallFallBackCB(Builder.saveIP()));
}
But dont change this PR because I'll fold that into my upcoming changes and more importantly, emitTargetTask
->emitKernelLaunch
assert on OutlinedFnID
here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for pointing out this issue. I just pushed some changes to hopefully address it, though I think these should get a second review from you since you're more familiar with the handling of target depend
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks @skatrak. LGTM
f16bc33
to
5a8c5c3
Compare
89d28a4
to
366b716
Compare
5a8c5c3
to
fa11ec9
Compare
This patch modifies MLIR to LLVM IR lowering of the OpenMP dialect to take into consideration the contents of the `omp.target_triples` module attribute while generating code for `omp.target` operations. It adds the `OpenMPIRBuilderConfig::TargetTriples` field and initializes it using the `amendOperation` flow of the `OpenMPToLLVMIRTranslation` pass. Some changes are introduced into the `OpenMPIRBuilder` to allow passing the information about whether a target region is intended to be offloaded from outside. The result of this change is that offloading calls are only generated when the `--offload-arch` or `-fopenmp-targets` options are given to the compiler. Otherwise, only the host fallback code is generated. This fixes linker errors currently triggered by `flang-new` if a source file containing a `target` construct is compiled without any of the aforementioned options. Several unit tests impacted by these changes, which are intended to check host code generated for `omp.target` operations, are updated to contain the new attribute. Without it, no calls to `__tgt_target_kernel` and associated control flow operations are generated. Fixes #100209.
366b716
to
6f7653b
Compare
This patch modifies MLIR to LLVM IR lowering of the OpenMP dialect to take into consideration the contents of the
omp.target_triples
module attribute while generating code foromp.target
operations.It adds the
OpenMPIRBuilderConfig::TargetTriples
field and initializes it using theamendOperation
flow of theOpenMPToLLVMIRTranslation
pass. Some changes are introduced into theOpenMPIRBuilder
to allow passing the information about whether a target region is intended to be offloaded from outside.The result of this change is that offloading calls are only generated when the
--offload-arch
or-fopenmp-targets
options are given to the compiler. Otherwise, only the host fallback code is generated. This fixes linker errors currently triggered byflang-new
if a source file containing atarget
construct is compiled without any of the aforementioned options.Several unit tests impacted by these changes, which are intended to check host code generated for
omp.target
operations, are updated to contain the new attribute. Without it, no calls to__tgt_target_kernel
and associated control flow operations are generated.