Skip to content

[Flang][MLIR][OpenMP] Use function-attached target attributes for OpenMP lowering #78291

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions flang/include/flang/Tools/CrossToolHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,17 +109,6 @@ void setOffloadModuleInterfaceAttributes(
}
}

// Shares assinging of the OpenMP OffloadModuleInterface and its TargetCPU
// attribute accross Flang tools (bbc/flang)
void setOffloadModuleInterfaceTargetAttribute(mlir::ModuleOp &module,
llvm::StringRef targetCPU, llvm::StringRef targetFeatures) {
// Should be registered by the OpenMPDialect
if (auto offloadMod = llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(
module.getOperation())) {
offloadMod.setTarget(targetCPU, targetFeatures);
}
}

void setOpenMPVersionAttribute(mlir::ModuleOp &module, int64_t version) {
module.getOperation()->setAttr(
mlir::StringAttr::get(module.getContext(), llvm::Twine{"omp.version"}),
Expand Down
3 changes: 0 additions & 3 deletions flang/lib/Frontend/FrontendActions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,6 @@ bool CodeGenAction::beginSourceFileAction() {
Fortran::common::LanguageFeature::OpenMP)) {
setOffloadModuleInterfaceAttributes(*mlirModule,
ci.getInvocation().getLangOpts());
setOffloadModuleInterfaceTargetAttribute(
*mlirModule, targetMachine.getTargetCPU(),
targetMachine.getTargetFeatureString());
setOpenMPVersionAttribute(*mlirModule,
ci.getInvocation().getLangOpts().OpenMPVersion);
}
Expand Down
27 changes: 12 additions & 15 deletions flang/test/Lower/OpenMP/FIR/target_cpu_features.f90
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
!REQUIRES: amdgpu-registered-target, nvptx-registered-target
!RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
!RUN: %flang_fc1 -emit-hlfir -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=NVPTX %s

!RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=AMDGCN %s
!RUN: %flang_fc1 -emit-fir -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=NVPTX %s

!===============================================================================
! Target_Enter Simple
!===============================================================================

!CHECK: omp.target = #omp.target<target_cpu = "gfx908",
!CHECK-SAME: target_features = "+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,
!CHECK-SAME: +dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,
!CHECK-SAME: +gfx8-insts,+gfx9-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,
!CHECK-SAME: +wavefrontsize64">
!NVPTX: omp.target = #omp.target<target_cpu = "sm_80", target_features = "+ptx61,+sm_80">
!CHECK-LABEL: func.func @_QPomp_target_simple()
subroutine omp_target_simple
! Directive needed to prevent subroutine from being filtered out when
! compiling for the device.
!$omp declare target
end subroutine omp_target_simple
!AMDGCN: module attributes {
!AMDGCN-SAME: fir.target_cpu = "gfx908"
!AMDGCN-SAME: fir.target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts",
!AMDGCN-SAME: "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts",
!AMDGCN-SAME: "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp",
!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+mai-insts",
!AMDGCN-SAME: "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]>

!NVPTX: module attributes {
!NVPTX-SAME: fir.target_cpu = "sm_80"
!NVPTX-SAME: fir.target_features = #llvm.target_features<["+ptx61", "+sm_80"]>
24 changes: 11 additions & 13 deletions flang/test/Lower/OpenMP/target_cpu_features.f90
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
!REQUIRES: amdgpu-registered-target, nvptx-registered-target
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=AMDGCN %s
!RUN: %flang_fc1 -emit-hlfir -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=NVPTX %s

!===============================================================================
! Target_Enter Simple
!===============================================================================

!CHECK: omp.target = #omp.target<target_cpu = "gfx908",
!CHECK-SAME: target_features = "+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,
!CHECK-SAME: +dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,
!CHECK-SAME: +gfx8-insts,+gfx9-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,
!CHECK-SAME: +wavefrontsize64">
!NVPTX: omp.target = #omp.target<target_cpu = "sm_80", target_features = "+ptx61,+sm_80">
!CHECK-LABEL: func.func @_QPomp_target_simple()
subroutine omp_target_simple
! Directive needed to prevent subroutine from being filtered out when
! compiling for the device.
!$omp declare target
end subroutine omp_target_simple
!AMDGCN: module attributes {
!AMDGCN-SAME: fir.target_cpu = "gfx908"
!AMDGCN-SAME: fir.target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts",
!AMDGCN-SAME: "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts",
!AMDGCN-SAME: "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp",
!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+mai-insts",
!AMDGCN-SAME: "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]>

!NVPTX: module attributes {
!NVPTX-SAME: fir.target_cpu = "sm_80"
!NVPTX-SAME: fir.target_features = #llvm.target_features<["+ptx61", "+sm_80"]>
9 changes: 0 additions & 9 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,6 @@ def FlagsAttr : OpenMP_Attr<"Flags", "flags"> {
let assemblyFormat = "`<` struct(params) `>`";
}

def TargetAttr : OpenMP_Attr<"Target", "target"> {
let parameters = (ins
StringRefParameter<>:$target_cpu,
StringRefParameter<>:$target_features
);

let assemblyFormat = "`<` struct(params) `>`";
}


class OpenMP_Op<string mnemonic, list<Trait> traits = []> :
Op<OpenMP_Dialect, mnemonic, traits>;
Expand Down
28 changes: 0 additions & 28 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
Original file line number Diff line number Diff line change
Expand Up @@ -205,34 +205,6 @@ def OffloadModuleInterface : OpInterface<"OffloadModuleInterface"> {
assumeTeamsOversubscription, assumeThreadsOversubscription,
assumeNoThreadState, assumeNoNestedParallelism, noGPULib, openmpDeviceVersion));
}]>,
InterfaceMethod<
/*description=*/[{
Get the Target attribute on the current module if it exists
and return the attribute, if it doesn't exist it returns a nullptr.
}],
/*retTy=*/"mlir::omp::TargetAttr",
/*methodName=*/"getTarget",
(ins), [{}], [{
if (Attribute flags = $_op->getAttr("omp.target"))
return ::llvm::dyn_cast_or_null<mlir::omp::TargetAttr>(flags);
return nullptr;
}]>,
InterfaceMethod<
/*description=*/[{
Set the attribute target on the current module with the
specified string arguments - name of cpu and corresponding features.
}],
/*retTy=*/"void",
/*methodName=*/"setTarget",
(ins "llvm::StringRef":$targetCPU,
"llvm::StringRef":$targetFeatures), [{}], [{
if (targetCPU.empty())
return;
$_op->setAttr(("omp." + mlir::omp::TargetAttr::getMnemonic()).str(),
mlir::omp::TargetAttr::get($_op->getContext(),
targetCPU.str(),
targetFeatures.str()));
}]>,
InterfaceMethod<
/*description=*/[{
Set a StringAttr on the current module containing the host IR file path. This
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2336,6 +2336,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
if (!targetOpSupported(opInst))
return failure();

auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
auto targetOp = cast<omp::TargetOp>(opInst);
auto &targetRegion = targetOp.getRegion();
DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
Expand All @@ -2345,6 +2346,22 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
auto bodyCB = [&](InsertPointTy allocaIP,
InsertPointTy codeGenIP) -> InsertPointTy {
// Forward target-cpu and target-features function attributes from the
// original function to the new outlined function.
llvm::Function *llvmParentFn =
moduleTranslation.lookupFunction(parentFn.getName());
llvm::Function *llvmOutlinedFn = codeGenIP.getBlock()->getParent();
assert(llvmParentFn && llvmOutlinedFn &&
"Both parent and outlined functions must exist at this point");

if (auto attr = llvmParentFn->getFnAttribute("target-cpu");
attr.isStringAttribute())
llvmOutlinedFn->addFnAttr(attr);

if (auto attr = llvmParentFn->getFnAttribute("target-features");
attr.isStringAttribute())
llvmOutlinedFn->addFnAttr(attr);

builder.restoreIP(codeGenIP);
unsigned argIndex = 0;
for (auto &mapOp : mapOperands) {
Expand All @@ -2363,7 +2380,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
};

llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
StringRef parentName = opInst.getParentOfType<LLVM::LLVMFuncOp>().getName();
StringRef parentName = parentFn.getName();

llvm::TargetRegionEntryInfo entryInfo;

Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// The aim of the test is to check the LLVM IR codegen for the device
// for omp target parallel construct

module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true, omp.target = #omp.target<target_cpu = "gfx90a", target_features = "">} {
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
llvm.func @_QQmain_omp_outline_1(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
%0 = omp.map_info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"}
omp.target map_entries(%0 -> %arg2 : !llvm.ptr) {
Expand Down
23 changes: 23 additions & 0 deletions mlir/test/Target/LLVMIR/omptarget-target-cpu-features.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Test that the target_features and target_cpu llvm.func attributes are
// forwarded to outlined target region functions.

// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s

module attributes {omp.is_target_device = false} {
llvm.func @omp_target_region() attributes {
target_cpu = "x86-64",
target_features = #llvm.target_features<["+mmx", "+sse"]>
} {
omp.target {
omp.terminator
}
llvm.return
}
}

// CHECK: define void @omp_target_region() #[[ATTRS:.*]] {
// CHECK: define internal void @__omp_offloading_{{.*}}_omp_target_region_{{.*}}() #[[ATTRS]] {

// CHECK: attributes #[[ATTRS]] = {
// CHECK-SAME: "target-cpu"="x86-64"
// CHECK-SAME: "target-features"="+mmx,+sse"