[mlir][gpu] Add passes to attach (NVVM|ROCDL) target attributes to GPU Modules

fabianmcg · fabianmcg · commit fbbb8adef15c · 2023-08-12T00:45:26.000Z
Adds the passes `nvvm-attach-target` & `rocdl-attach-target for attaching `nvvm.target` & `rocdl.target` attributes to GPU Modules. These passes search GPU Modules in the immediate region of the Op being acted on, attaching the target attribute to the module. Modules can be selected using a regex string, allowing fine grain attachment of targets, see the test `attach-target.mlir` for an example. Depends on D154153 Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D157351
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
@@ -82,4 +82,109 @@ def GpuModuleToBinaryPass
   ];
 }
 
+def GpuNVVMAttachTarget: Pass<"nvvm-attach-target", ""> {
+  let summary = "Attaches an NVVM target attribute to a GPU Module.";
+  let description = [{
+    This pass searches for all GPU Modules in the immediate regions and attaches
+    an NVVM target if the module matches the name specified by the `module` argument.
+
+    Example:
+    ```
+    // File: in.mlir:
+    gpu.module @nvvm_module_1 {...}
+    gpu.module @nvvm_module_2 {...}
+    gpu.module @rocdl_module_1 {...}
+    // mlir-opt --nvvm-attach-target="module=nvvm.* chip=sm_90" in.mlir
+    gpu.module @nvvm_module_1 [#nvvm.target<chip = "sm_90">] {...}
+    gpu.module @nvvm_module_2 [#nvvm.target<chip = "sm_90">] {...}
+    gpu.module @rocdl_module_1 {...}
+    ```
+  }];
+  let options = [
+    Option<"moduleMatcher", "module", "std::string",
+           /*default=*/ [{""}],
+           "Regex used to identify the modules to attach the target to.">,
+    Option<"triple", "triple", "std::string",
+           /*default=*/ "\"nvptx64-nvidia-cuda\"",
+           "Target triple.">,
+    Option<"chip", "chip", "std::string",
+           /*default=*/"\"sm_50\"",
+           "Target chip.">,
+    Option<"features", "features", "std::string",
+           /*default=*/"\"+ptx60\"",
+           "Target features.">,
+    Option<"optLevel", "O", "unsigned",
+           /*default=*/"2",
+           "Optimization level.">,
+    Option<"fastFlag", "fast", "bool",
+           /*default=*/"false",
+           "Enable fast math mode.">,
+    Option<"ftzFlag", "ftz", "bool",
+           /*default=*/"false",
+           "Enable flush to zero for denormals.">,
+    ListOption<"linkLibs", "l", "std::string",
+           "Extra bitcode libraries paths to link to.">,
+  ];
+}
+
+def GpuROCDLAttachTarget: Pass<"rocdl-attach-target", ""> {
+  let summary = "Attaches a ROCDL target attribute to a GPU Module.";
+  let description = [{
+    This pass searches for all GPU Modules in the immediate regions and attaches
+    a ROCDL target if the module matches the name specified by the `module` argument.
+
+    Example:
+    ```
+    // File: in.mlir:
+    gpu.module @nvvm_module_1 {...}
+    gpu.module @nvvm_module_2 {...}
+    gpu.module @rocdl_module_1 {...}
+    // mlir-opt --nvvm-attach-target="module=rocdl.* chip=gfx90a" in.mlir
+    gpu.module @nvvm_module_1 {...}
+    gpu.module @nvvm_module_2 {...}
+    gpu.module @rocdl_module_1 [#rocdl.target<chip = "gfx90a">] {...}
+    ```
+  }];
+  let options = [
+    Option<"moduleMatcher", "module", "std::string",
+           /*default=*/ [{""}],
+           "Regex used to identify the modules to attach the target to.">,
+    Option<"triple", "triple", "std::string",
+           /*default=*/ "\"amdgcn-amd-amdhsa\"",
+           "Target triple.">,
+    Option<"chip", "chip", "std::string",
+           /*default=*/"\"gfx900\"",
+           "Target chip.">,
+    Option<"features", "features", "std::string",
+           /*default=*/"\"\"",
+           "Target features.">,
+    Option<"abiVersion", "abi", "std::string",
+           /*default=*/"\"400\"",
+           "Optimization level.">,
+    Option<"optLevel", "O", "unsigned",
+           /*default=*/"2",
+           "Optimization level.">,
+    Option<"wave64Flag", "wave64", "bool",
+           /*default=*/"true",
+           "Use Wave64 mode.">,
+    Option<"fastFlag", "fast", "bool",
+           /*default=*/"false",
+           "Enable fast relaxed math opt.">,
+    Option<"dazFlag", "daz", "bool",
+           /*default=*/"false",
+           "Enable denormals are zero opt.">,
+    Option<"finiteOnlyFlag", "finite-only", "bool",
+           /*default=*/"false",
+           "Enable finite only opt.">,
+    Option<"unsafeMathFlag", "unsafe-math", "bool",
+           /*default=*/"false",
+           "Enable unsafe math opt.">,
+    Option<"correctSqrtFlag", "correct-sqrt", "bool",
+           /*default=*/"true",
+           "Enable correct rounded sqrt.">,
+    ListOption<"linkLibs", "l", "std::string",
+           "Extra bitcode libraries paths to link to.">,
+  ];
+}
+
 #endif // MLIR_DIALECT_GPU_PASSES
diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -52,11 +52,13 @@ add_mlir_dialect_library(MLIRGPUTransforms
   Transforms/KernelOutlining.cpp
   Transforms/MemoryPromotion.cpp
   Transforms/ModuleToBinary.cpp
+  Transforms/NVVMAttachTarget.cpp
   Transforms/ParallelLoopMapper.cpp
   Transforms/SerializeToBlob.cpp
   Transforms/SerializeToCubin.cpp
   Transforms/SerializeToHsaco.cpp
   Transforms/ShuffleRewriter.cpp
+  Transforms/ROCDLAttachTarget.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
diff --git a/mlir/lib/Dialect/GPU/Transforms/NVVMAttachTarget.cpp b/mlir/lib/Dialect/GPU/Transforms/NVVMAttachTarget.cpp
@@ -0,0 +1,86 @@
+//===- NVVMAttachTarget.cpp - Attach an NVVM target -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the `GpuNVVMAttachTarget` pass, attaching `#nvvm.target`
+// attributes to GPU modules.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/GPU/Transforms/Passes.h"
+
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Target/LLVM/NVVM/Target.h"
+#include "llvm/Support/Regex.h"
+
+namespace mlir {
+#define GEN_PASS_DEF_GPUNVVMATTACHTARGET
+#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
+} // namespace mlir
+
+using namespace mlir;
+using namespace mlir::NVVM;
+
+namespace {
+struct NVVMAttachTarget
+    : public impl::GpuNVVMAttachTargetBase<NVVMAttachTarget> {
+  using Base::Base;
+
+  DictionaryAttr getFlags(OpBuilder &builder) const;
+
+  void runOnOperation() override;
+
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registerNVVMTarget(registry);
+  }
+};
+} // namespace
+
+DictionaryAttr NVVMAttachTarget::getFlags(OpBuilder &builder) const {
+  UnitAttr unitAttr = builder.getUnitAttr();
+  SmallVector<NamedAttribute, 2> flags;
+  auto addFlag = [&](StringRef flag) {
+    flags.push_back(builder.getNamedAttr(flag, unitAttr));
+  };
+  if (fastFlag)
+    addFlag("fast");
+  if (ftzFlag)
+    addFlag("ftz");
+  if (flags.size())
+    return builder.getDictionaryAttr(flags);
+  return nullptr;
+}
+
+void NVVMAttachTarget::runOnOperation() {
+  OpBuilder builder(&getContext());
+  ArrayRef<std::string> libs(linkLibs);
+  SmallVector<StringRef> filesToLink(libs.begin(), libs.end());
+  auto target = builder.getAttr<NVVMTargetAttr>(
+      optLevel, triple, chip, features, getFlags(builder),
+      filesToLink.size() ? builder.getStrArrayAttr(filesToLink) : nullptr);
+  llvm::Regex matcher(moduleMatcher);
+  for (Region &region : getOperation()->getRegions())
+    for (Block &block : region.getBlocks())
+      for (auto module : block.getOps<gpu::GPUModuleOp>()) {
+        // Check if the name of the module matches.
+        if (!moduleMatcher.empty() && !matcher.match(module.getName()))
+          continue;
+        // Create the target array.
+        SmallVector<Attribute> targets;
+        if (std::optional<ArrayAttr> attrs = module.getTargets())
+          targets.append(attrs->getValue().begin(), attrs->getValue().end());
+        targets.push_back(target);
+        // Remove any duplicate targets.
+        targets.erase(std::unique(targets.begin(), targets.end()),
+                      targets.end());
+        // Update the target attribute array.
+        module.setTargetsAttr(builder.getArrayAttr(targets));
+      }
+}
diff --git a/mlir/lib/Dialect/GPU/Transforms/ROCDLAttachTarget.cpp b/mlir/lib/Dialect/GPU/Transforms/ROCDLAttachTarget.cpp
@@ -0,0 +1,94 @@
+//===- ROCDLAttachTarget.cpp - Attach an ROCDL target ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the `GpuROCDLAttachTarget` pass, attaching
+// `#rocdl.target` attributes to GPU modules.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/GPU/Transforms/Passes.h"
+
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Target/LLVM/ROCDL/Target.h"
+#include "llvm/Support/Regex.h"
+
+namespace mlir {
+#define GEN_PASS_DEF_GPUROCDLATTACHTARGET
+#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
+} // namespace mlir
+
+using namespace mlir;
+using namespace mlir::ROCDL;
+
+namespace {
+struct ROCDLAttachTarget
+    : public impl::GpuROCDLAttachTargetBase<ROCDLAttachTarget> {
+  using Base::Base;
+
+  DictionaryAttr getFlags(OpBuilder &builder) const;
+
+  void runOnOperation() override;
+
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registerROCDLTarget(registry);
+  }
+};
+} // namespace
+
+DictionaryAttr ROCDLAttachTarget::getFlags(OpBuilder &builder) const {
+  UnitAttr unitAttr = builder.getUnitAttr();
+  SmallVector<NamedAttribute, 6> flags;
+  auto addFlag = [&](StringRef flag) {
+    flags.push_back(builder.getNamedAttr(flag, unitAttr));
+  };
+  if (!wave64Flag)
+    addFlag("no_wave64");
+  if (fastFlag)
+    addFlag("fast");
+  if (dazFlag)
+    addFlag("daz");
+  if (finiteOnlyFlag)
+    addFlag("finite_only");
+  if (unsafeMathFlag)
+    addFlag("unsafe_math");
+  if (!correctSqrtFlag)
+    addFlag("unsafe_sqrt");
+  if (flags.size())
+    return builder.getDictionaryAttr(flags);
+  return nullptr;
+}
+
+void ROCDLAttachTarget::runOnOperation() {
+  OpBuilder builder(&getContext());
+  ArrayRef<std::string> libs(linkLibs);
+  SmallVector<StringRef> filesToLink(libs.begin(), libs.end());
+  auto target = builder.getAttr<ROCDLTargetAttr>(
+      optLevel, triple, chip, features, abiVersion, getFlags(builder),
+      filesToLink.size() ? builder.getStrArrayAttr(filesToLink) : nullptr);
+  llvm::Regex matcher(moduleMatcher);
+  for (Region &region : getOperation()->getRegions())
+    for (Block &block : region.getBlocks())
+      for (auto module : block.getOps<gpu::GPUModuleOp>()) {
+        // Check if the name of the module matches.
+        if (!moduleMatcher.empty() && !matcher.match(module.getName()))
+          continue;
+        // Create the target array.
+        SmallVector<Attribute> targets;
+        if (std::optional<ArrayAttr> attrs = module.getTargets())
+          targets.append(attrs->getValue().begin(), attrs->getValue().end());
+        targets.push_back(target);
+        // Remove any duplicate targets.
+        targets.erase(std::unique(targets.begin(), targets.end()),
+                      targets.end());
+        // Update the target attribute array.
+        module.setTargetsAttr(builder.getArrayAttr(targets));
+      }
+}
diff --git a/mlir/test/Dialect/LLVMIR/attach-targets.mlir b/mlir/test/Dialect/LLVMIR/attach-targets.mlir
@@ -0,0 +1,29 @@
+// RUN: mlir-opt %s --nvvm-attach-target='module=nvvm.* O=3 chip=sm_90' --rocdl-attach-target='module=rocdl.* O=3 chip=gfx90a' | FileCheck %s
+// RUN: mlir-opt %s --nvvm-attach-target='module=options.* O=1 chip=sm_70 fast=true ftz=true' --rocdl-attach-target='module=options.* l=file1.bc,file2.bc wave64=false finite-only=true' | FileCheck %s --check-prefix=CHECK_OPTS
+
+module attributes {gpu.container_module} {
+// Verify the target is appended.
+// CHECK: @nvvm_module_1 [#nvvm.target<O = 3, chip = "sm_90">] {
+gpu.module @nvvm_module_1 {
+}
+// Verify the target is appended.
+// CHECK: @nvvm_module_2 [#nvvm.target<chip = "sm_60">, #nvvm.target<O = 3, chip = "sm_90">] {
+gpu.module @nvvm_module_2 [#nvvm.target<chip = "sm_60">] {
+}
+// Verify the target is not added multiple times.
+// CHECK: @nvvm_module_3 [#nvvm.target<O = 3, chip = "sm_90">] {
+gpu.module @nvvm_module_3 [#nvvm.target<O = 3, chip = "sm_90">] {
+}
+// Verify the NVVM target is not added as it fails to match the regex, but the ROCDL does get appended.
+// CHECK: @rocdl_module [#rocdl.target<O = 3, chip = "gfx90a">] {
+gpu.module @rocdl_module {
+}
+// Check the options were added.
+// CHECK_OPTS: @options_module_1 [#nvvm.target<O = 1, chip = "sm_70", flags = {fast, ftz}>, #rocdl.target<flags = {finite_only, no_wave64}, link = ["file1.bc", "file2.bc"]>]  {
+gpu.module @options_module_1 {
+}
+// Check the options were added and that the first target was preserved.
+// CHECK_OPTS: @options_module_2 [#nvvm.target<O = 3, chip = "sm_90">, #nvvm.target<O = 1, chip = "sm_70", flags = {fast, ftz}>, #rocdl.target<flags = {finite_only, no_wave64}, link = ["file1.bc", "file2.bc"]>]  {
+gpu.module @options_module_2 [#nvvm.target<O = 3, chip = "sm_90">] {
+}
+}