Skip to content

Commit fbbb8ad

Browse files
committed
[mlir][gpu] Add passes to attach (NVVM|ROCDL) target attributes to GPU Modules
Adds the passes `nvvm-attach-target` & `rocdl-attach-target for attaching `nvvm.target` & `rocdl.target` attributes to GPU Modules. These passes search GPU Modules in the immediate region of the Op being acted on, attaching the target attribute to the module. Modules can be selected using a regex string, allowing fine grain attachment of targets, see the test `attach-target.mlir` for an example. Depends on D154153 Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D157351
1 parent a7cdea7 commit fbbb8ad

File tree

5 files changed

+316
-0
lines changed

5 files changed

+316
-0
lines changed

mlir/include/mlir/Dialect/GPU/Transforms/Passes.td

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,4 +82,109 @@ def GpuModuleToBinaryPass
8282
];
8383
}
8484

85+
def GpuNVVMAttachTarget: Pass<"nvvm-attach-target", ""> {
86+
let summary = "Attaches an NVVM target attribute to a GPU Module.";
87+
let description = [{
88+
This pass searches for all GPU Modules in the immediate regions and attaches
89+
an NVVM target if the module matches the name specified by the `module` argument.
90+
91+
Example:
92+
```
93+
// File: in.mlir:
94+
gpu.module @nvvm_module_1 {...}
95+
gpu.module @nvvm_module_2 {...}
96+
gpu.module @rocdl_module_1 {...}
97+
// mlir-opt --nvvm-attach-target="module=nvvm.* chip=sm_90" in.mlir
98+
gpu.module @nvvm_module_1 [#nvvm.target<chip = "sm_90">] {...}
99+
gpu.module @nvvm_module_2 [#nvvm.target<chip = "sm_90">] {...}
100+
gpu.module @rocdl_module_1 {...}
101+
```
102+
}];
103+
let options = [
104+
Option<"moduleMatcher", "module", "std::string",
105+
/*default=*/ [{""}],
106+
"Regex used to identify the modules to attach the target to.">,
107+
Option<"triple", "triple", "std::string",
108+
/*default=*/ "\"nvptx64-nvidia-cuda\"",
109+
"Target triple.">,
110+
Option<"chip", "chip", "std::string",
111+
/*default=*/"\"sm_50\"",
112+
"Target chip.">,
113+
Option<"features", "features", "std::string",
114+
/*default=*/"\"+ptx60\"",
115+
"Target features.">,
116+
Option<"optLevel", "O", "unsigned",
117+
/*default=*/"2",
118+
"Optimization level.">,
119+
Option<"fastFlag", "fast", "bool",
120+
/*default=*/"false",
121+
"Enable fast math mode.">,
122+
Option<"ftzFlag", "ftz", "bool",
123+
/*default=*/"false",
124+
"Enable flush to zero for denormals.">,
125+
ListOption<"linkLibs", "l", "std::string",
126+
"Extra bitcode libraries paths to link to.">,
127+
];
128+
}
129+
130+
def GpuROCDLAttachTarget: Pass<"rocdl-attach-target", ""> {
131+
let summary = "Attaches a ROCDL target attribute to a GPU Module.";
132+
let description = [{
133+
This pass searches for all GPU Modules in the immediate regions and attaches
134+
a ROCDL target if the module matches the name specified by the `module` argument.
135+
136+
Example:
137+
```
138+
// File: in.mlir:
139+
gpu.module @nvvm_module_1 {...}
140+
gpu.module @nvvm_module_2 {...}
141+
gpu.module @rocdl_module_1 {...}
142+
// mlir-opt --nvvm-attach-target="module=rocdl.* chip=gfx90a" in.mlir
143+
gpu.module @nvvm_module_1 {...}
144+
gpu.module @nvvm_module_2 {...}
145+
gpu.module @rocdl_module_1 [#rocdl.target<chip = "gfx90a">] {...}
146+
```
147+
}];
148+
let options = [
149+
Option<"moduleMatcher", "module", "std::string",
150+
/*default=*/ [{""}],
151+
"Regex used to identify the modules to attach the target to.">,
152+
Option<"triple", "triple", "std::string",
153+
/*default=*/ "\"amdgcn-amd-amdhsa\"",
154+
"Target triple.">,
155+
Option<"chip", "chip", "std::string",
156+
/*default=*/"\"gfx900\"",
157+
"Target chip.">,
158+
Option<"features", "features", "std::string",
159+
/*default=*/"\"\"",
160+
"Target features.">,
161+
Option<"abiVersion", "abi", "std::string",
162+
/*default=*/"\"400\"",
163+
"Optimization level.">,
164+
Option<"optLevel", "O", "unsigned",
165+
/*default=*/"2",
166+
"Optimization level.">,
167+
Option<"wave64Flag", "wave64", "bool",
168+
/*default=*/"true",
169+
"Use Wave64 mode.">,
170+
Option<"fastFlag", "fast", "bool",
171+
/*default=*/"false",
172+
"Enable fast relaxed math opt.">,
173+
Option<"dazFlag", "daz", "bool",
174+
/*default=*/"false",
175+
"Enable denormals are zero opt.">,
176+
Option<"finiteOnlyFlag", "finite-only", "bool",
177+
/*default=*/"false",
178+
"Enable finite only opt.">,
179+
Option<"unsafeMathFlag", "unsafe-math", "bool",
180+
/*default=*/"false",
181+
"Enable unsafe math opt.">,
182+
Option<"correctSqrtFlag", "correct-sqrt", "bool",
183+
/*default=*/"true",
184+
"Enable correct rounded sqrt.">,
185+
ListOption<"linkLibs", "l", "std::string",
186+
"Extra bitcode libraries paths to link to.">,
187+
];
188+
}
189+
85190
#endif // MLIR_DIALECT_GPU_PASSES

mlir/lib/Dialect/GPU/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,13 @@ add_mlir_dialect_library(MLIRGPUTransforms
5252
Transforms/KernelOutlining.cpp
5353
Transforms/MemoryPromotion.cpp
5454
Transforms/ModuleToBinary.cpp
55+
Transforms/NVVMAttachTarget.cpp
5556
Transforms/ParallelLoopMapper.cpp
5657
Transforms/SerializeToBlob.cpp
5758
Transforms/SerializeToCubin.cpp
5859
Transforms/SerializeToHsaco.cpp
5960
Transforms/ShuffleRewriter.cpp
61+
Transforms/ROCDLAttachTarget.cpp
6062

6163
ADDITIONAL_HEADER_DIRS
6264
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
//===- NVVMAttachTarget.cpp - Attach an NVVM target -----------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements the `GpuNVVMAttachTarget` pass, attaching `#nvvm.target`
10+
// attributes to GPU modules.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "mlir/Dialect/GPU/Transforms/Passes.h"
15+
16+
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
17+
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
18+
#include "mlir/IR/Builders.h"
19+
#include "mlir/Pass/Pass.h"
20+
#include "mlir/Target/LLVM/NVVM/Target.h"
21+
#include "llvm/Support/Regex.h"
22+
23+
namespace mlir {
24+
#define GEN_PASS_DEF_GPUNVVMATTACHTARGET
25+
#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
26+
} // namespace mlir
27+
28+
using namespace mlir;
29+
using namespace mlir::NVVM;
30+
31+
namespace {
32+
struct NVVMAttachTarget
33+
: public impl::GpuNVVMAttachTargetBase<NVVMAttachTarget> {
34+
using Base::Base;
35+
36+
DictionaryAttr getFlags(OpBuilder &builder) const;
37+
38+
void runOnOperation() override;
39+
40+
void getDependentDialects(DialectRegistry &registry) const override {
41+
registerNVVMTarget(registry);
42+
}
43+
};
44+
} // namespace
45+
46+
DictionaryAttr NVVMAttachTarget::getFlags(OpBuilder &builder) const {
47+
UnitAttr unitAttr = builder.getUnitAttr();
48+
SmallVector<NamedAttribute, 2> flags;
49+
auto addFlag = [&](StringRef flag) {
50+
flags.push_back(builder.getNamedAttr(flag, unitAttr));
51+
};
52+
if (fastFlag)
53+
addFlag("fast");
54+
if (ftzFlag)
55+
addFlag("ftz");
56+
if (flags.size())
57+
return builder.getDictionaryAttr(flags);
58+
return nullptr;
59+
}
60+
61+
void NVVMAttachTarget::runOnOperation() {
62+
OpBuilder builder(&getContext());
63+
ArrayRef<std::string> libs(linkLibs);
64+
SmallVector<StringRef> filesToLink(libs.begin(), libs.end());
65+
auto target = builder.getAttr<NVVMTargetAttr>(
66+
optLevel, triple, chip, features, getFlags(builder),
67+
filesToLink.size() ? builder.getStrArrayAttr(filesToLink) : nullptr);
68+
llvm::Regex matcher(moduleMatcher);
69+
for (Region &region : getOperation()->getRegions())
70+
for (Block &block : region.getBlocks())
71+
for (auto module : block.getOps<gpu::GPUModuleOp>()) {
72+
// Check if the name of the module matches.
73+
if (!moduleMatcher.empty() && !matcher.match(module.getName()))
74+
continue;
75+
// Create the target array.
76+
SmallVector<Attribute> targets;
77+
if (std::optional<ArrayAttr> attrs = module.getTargets())
78+
targets.append(attrs->getValue().begin(), attrs->getValue().end());
79+
targets.push_back(target);
80+
// Remove any duplicate targets.
81+
targets.erase(std::unique(targets.begin(), targets.end()),
82+
targets.end());
83+
// Update the target attribute array.
84+
module.setTargetsAttr(builder.getArrayAttr(targets));
85+
}
86+
}
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
//===- ROCDLAttachTarget.cpp - Attach an ROCDL target ---------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements the `GpuROCDLAttachTarget` pass, attaching
10+
// `#rocdl.target` attributes to GPU modules.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "mlir/Dialect/GPU/Transforms/Passes.h"
15+
16+
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
17+
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
18+
#include "mlir/IR/Builders.h"
19+
#include "mlir/Pass/Pass.h"
20+
#include "mlir/Target/LLVM/ROCDL/Target.h"
21+
#include "llvm/Support/Regex.h"
22+
23+
namespace mlir {
24+
#define GEN_PASS_DEF_GPUROCDLATTACHTARGET
25+
#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
26+
} // namespace mlir
27+
28+
using namespace mlir;
29+
using namespace mlir::ROCDL;
30+
31+
namespace {
32+
struct ROCDLAttachTarget
33+
: public impl::GpuROCDLAttachTargetBase<ROCDLAttachTarget> {
34+
using Base::Base;
35+
36+
DictionaryAttr getFlags(OpBuilder &builder) const;
37+
38+
void runOnOperation() override;
39+
40+
void getDependentDialects(DialectRegistry &registry) const override {
41+
registerROCDLTarget(registry);
42+
}
43+
};
44+
} // namespace
45+
46+
DictionaryAttr ROCDLAttachTarget::getFlags(OpBuilder &builder) const {
47+
UnitAttr unitAttr = builder.getUnitAttr();
48+
SmallVector<NamedAttribute, 6> flags;
49+
auto addFlag = [&](StringRef flag) {
50+
flags.push_back(builder.getNamedAttr(flag, unitAttr));
51+
};
52+
if (!wave64Flag)
53+
addFlag("no_wave64");
54+
if (fastFlag)
55+
addFlag("fast");
56+
if (dazFlag)
57+
addFlag("daz");
58+
if (finiteOnlyFlag)
59+
addFlag("finite_only");
60+
if (unsafeMathFlag)
61+
addFlag("unsafe_math");
62+
if (!correctSqrtFlag)
63+
addFlag("unsafe_sqrt");
64+
if (flags.size())
65+
return builder.getDictionaryAttr(flags);
66+
return nullptr;
67+
}
68+
69+
void ROCDLAttachTarget::runOnOperation() {
70+
OpBuilder builder(&getContext());
71+
ArrayRef<std::string> libs(linkLibs);
72+
SmallVector<StringRef> filesToLink(libs.begin(), libs.end());
73+
auto target = builder.getAttr<ROCDLTargetAttr>(
74+
optLevel, triple, chip, features, abiVersion, getFlags(builder),
75+
filesToLink.size() ? builder.getStrArrayAttr(filesToLink) : nullptr);
76+
llvm::Regex matcher(moduleMatcher);
77+
for (Region &region : getOperation()->getRegions())
78+
for (Block &block : region.getBlocks())
79+
for (auto module : block.getOps<gpu::GPUModuleOp>()) {
80+
// Check if the name of the module matches.
81+
if (!moduleMatcher.empty() && !matcher.match(module.getName()))
82+
continue;
83+
// Create the target array.
84+
SmallVector<Attribute> targets;
85+
if (std::optional<ArrayAttr> attrs = module.getTargets())
86+
targets.append(attrs->getValue().begin(), attrs->getValue().end());
87+
targets.push_back(target);
88+
// Remove any duplicate targets.
89+
targets.erase(std::unique(targets.begin(), targets.end()),
90+
targets.end());
91+
// Update the target attribute array.
92+
module.setTargetsAttr(builder.getArrayAttr(targets));
93+
}
94+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// RUN: mlir-opt %s --nvvm-attach-target='module=nvvm.* O=3 chip=sm_90' --rocdl-attach-target='module=rocdl.* O=3 chip=gfx90a' | FileCheck %s
2+
// RUN: mlir-opt %s --nvvm-attach-target='module=options.* O=1 chip=sm_70 fast=true ftz=true' --rocdl-attach-target='module=options.* l=file1.bc,file2.bc wave64=false finite-only=true' | FileCheck %s --check-prefix=CHECK_OPTS
3+
4+
module attributes {gpu.container_module} {
5+
// Verify the target is appended.
6+
// CHECK: @nvvm_module_1 [#nvvm.target<O = 3, chip = "sm_90">] {
7+
gpu.module @nvvm_module_1 {
8+
}
9+
// Verify the target is appended.
10+
// CHECK: @nvvm_module_2 [#nvvm.target<chip = "sm_60">, #nvvm.target<O = 3, chip = "sm_90">] {
11+
gpu.module @nvvm_module_2 [#nvvm.target<chip = "sm_60">] {
12+
}
13+
// Verify the target is not added multiple times.
14+
// CHECK: @nvvm_module_3 [#nvvm.target<O = 3, chip = "sm_90">] {
15+
gpu.module @nvvm_module_3 [#nvvm.target<O = 3, chip = "sm_90">] {
16+
}
17+
// Verify the NVVM target is not added as it fails to match the regex, but the ROCDL does get appended.
18+
// CHECK: @rocdl_module [#rocdl.target<O = 3, chip = "gfx90a">] {
19+
gpu.module @rocdl_module {
20+
}
21+
// Check the options were added.
22+
// CHECK_OPTS: @options_module_1 [#nvvm.target<O = 1, chip = "sm_70", flags = {fast, ftz}>, #rocdl.target<flags = {finite_only, no_wave64}, link = ["file1.bc", "file2.bc"]>] {
23+
gpu.module @options_module_1 {
24+
}
25+
// Check the options were added and that the first target was preserved.
26+
// CHECK_OPTS: @options_module_2 [#nvvm.target<O = 3, chip = "sm_90">, #nvvm.target<O = 1, chip = "sm_70", flags = {fast, ftz}>, #rocdl.target<flags = {finite_only, no_wave64}, link = ["file1.bc", "file2.bc"]>] {
27+
gpu.module @options_module_2 [#nvvm.target<O = 3, chip = "sm_90">] {
28+
}
29+
}

0 commit comments

Comments
 (0)