llvm · chelini · Mar 6, 2025 · Mar 5, 2025
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
@@ -35,25 +35,6 @@ class FuncOp;
 #define GEN_PASS_DECL
 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
 
-/// Pass that moves ops which are likely an index computation into gpu.launch
-/// body.
-std::unique_ptr<Pass> createGpuLauchSinkIndexComputationsPass();
-
-/// Replaces `gpu.launch` with `gpu.launch_func` by moving the region into
-/// a separate kernel function.
-std::unique_ptr<OperationPass<ModuleOp>>
-createGpuKernelOutliningPass(StringRef dataLayoutStr = StringRef());
-
-/// Rewrites a function region so that GPU ops execute asynchronously.
-std::unique_ptr<OperationPass<func::FuncOp>> createGpuAsyncRegionPass();
-
-/// Maps the parallel loops found in the given function to workgroups. The first
-/// loop encountered will be mapped to the global workgroup and the second loop
-/// encountered to the local workgroup. Within each mapping, the first three
-/// dimensions are mapped to x/y/z hardware ids and all following dimensions are
-/// mapped to sequential loops.
-std::unique_ptr<OperationPass<func::FuncOp>> createGpuMapParallelLoopsPass();
-
 /// Collect a set of patterns to rewrite GlobalIdOp op within the GPU dialect.
 void populateGpuGlobalIdPatterns(RewritePatternSet &patterns);
 
@@ -110,9 +91,6 @@ LogicalResult transformGpuModulesToBinaries(
 /// Collect a set of patterns to decompose memrefs ops.
 void populateGpuDecomposeMemrefsPatterns(RewritePatternSet &patterns);
 
-/// Pass decomposes memref ops inside `gpu.launch` body.
-std::unique_ptr<Pass> createGpuDecomposeMemrefsPass();
-
 /// Erase barriers that do not enforce conflicting memory side effects.
 void populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns);
 

diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
@@ -11,29 +11,35 @@
 
 include "mlir/Pass/PassBase.td"
 
-def GpuLaunchSinkIndexComputations : Pass<"gpu-launch-sink-index-computations"> {
+def GpuLaunchSinkIndexComputationsPass
+    : Pass<"gpu-launch-sink-index-computations"> {
   let summary = "Sink index computations into gpu.launch body";
-  let constructor = "mlir::createGpuLauchSinkIndexComputationsPass()";
   let dependentDialects = ["mlir::gpu::GPUDialect"];
 }
 
-def GpuKernelOutlining : Pass<"gpu-kernel-outlining", "ModuleOp"> {
+def GpuKernelOutliningPass : Pass<"gpu-kernel-outlining", "ModuleOp"> {
   let summary = "Outline gpu.launch bodies to kernel functions";
-  let constructor = "mlir::createGpuKernelOutliningPass()";
   let dependentDialects = ["mlir::DLTIDialect", "cf::ControlFlowDialect"];
+  let options = [Option<"dataLayoutStr", "data-layout-str", "std::string",
+                        /*default=*/"",
+                        "String description of the data layout">];
 }
 
 def GpuAsyncRegionPass : Pass<"gpu-async-region", "func::FuncOp"> {
   let summary = "Make GPU ops async";
-  let constructor = "mlir::createGpuAsyncRegionPass()";
   let dependentDialects = ["async::AsyncDialect"];
 }
 
 def GpuMapParallelLoopsPass
     : Pass<"gpu-map-parallel-loops", "mlir::func::FuncOp"> {
   let summary = "Greedily maps loops to GPU hardware dimensions.";
-  let constructor = "mlir::createGpuMapParallelLoopsPass()";
-  let description = "Greedily maps loops to GPU hardware dimensions.";
+  let description = [{
+    Maps the parallel loops found in the given function to workgroups. The first
+    loop encountered will be mapped to the global workgroup and the second loop
+    encountered to the local workgroup. Within each mapping, the first three
+    dimensions are mapped to x/y/z hardware ids and all following dimensions are
+    mapped to sequential loops.
+  }];
   let dependentDialects = ["mlir::gpu::GPUDialect"];
 }
 
@@ -66,7 +72,6 @@ def GpuDecomposeMemrefsPass : Pass<"gpu-decompose-memrefs"> {
     and sizes/strides for dynamically-sized memrefs are not available inside
     `gpu.launch`.
   }];
-  let constructor = "mlir::createGpuDecomposeMemrefsPass()";
   let dependentDialects = [
     "mlir::gpu::GPUDialect", "mlir::memref::MemRefDialect",
     "mlir::affine::AffineDialect"

diff --git a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
@@ -347,7 +347,3 @@ void GpuAsyncRegionPass::runOnOperation() {
   // Makes each !gpu.async.token returned from async.execute op have single use.
   getOperation().getRegion().walk(SingleTokenUseCallback());
 }
-
-std::unique_ptr<OperationPass<func::FuncOp>> mlir::createGpuAsyncRegionPass() {
-  return std::make_unique<GpuAsyncRegionPass>();
-}
diff --git a/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp b/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp
@@ -238,7 +238,3 @@ void mlir::populateGpuDecomposeMemrefsPatterns(RewritePatternSet &patterns) {
   patterns.insert<FlattenLoad, FlattenStore, FlattenSubview>(
       patterns.getContext());
 }
-
-std::unique_ptr<Pass> mlir::createGpuDecomposeMemrefsPass() {
-  return std::make_unique<GpuDecomposeMemrefsPass>();
-}
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -30,8 +30,8 @@
 #include <limits>
 
 namespace mlir {
-#define GEN_PASS_DEF_GPULAUNCHSINKINDEXCOMPUTATIONS
-#define GEN_PASS_DEF_GPUKERNELOUTLINING
+#define GEN_PASS_DEF_GPULAUNCHSINKINDEXCOMPUTATIONSPASS
+#define GEN_PASS_DEF_GPUKERNELOUTLININGPASS
 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
 } // namespace mlir
 
@@ -302,7 +302,7 @@ namespace {
 /// Pass that moves ops which are likely an index computation into gpu.launch
 /// body.
 class GpuLaunchSinkIndexComputationsPass
-    : public impl::GpuLaunchSinkIndexComputationsBase<
+    : public impl::GpuLaunchSinkIndexComputationsPassBase<
           GpuLaunchSinkIndexComputationsPass> {
 public:
   void runOnOperation() override {
@@ -329,17 +329,9 @@ class GpuLaunchSinkIndexComputationsPass
 /// a separate pass. The external functions can then be annotated with the
 /// symbol of the cubin accessor function.
 class GpuKernelOutliningPass
-    : public impl::GpuKernelOutliningBase<GpuKernelOutliningPass> {
+    : public impl::GpuKernelOutliningPassBase<GpuKernelOutliningPass> {
 public:
-  GpuKernelOutliningPass(StringRef dlStr) {
-    if (!dlStr.empty() && !dataLayoutStr.hasValue())
-      dataLayoutStr = dlStr.str();
-  }
-
-  GpuKernelOutliningPass(const GpuKernelOutliningPass &other)
-      : GpuKernelOutliningBase(other), dataLayoutSpec(other.dataLayoutSpec) {
-    dataLayoutStr = other.dataLayoutStr.getValue();
-  }
+  using Base::Base;
 
   LogicalResult initialize(MLIRContext *context) override {
     // Initialize the data layout specification from the data layout string.
@@ -457,21 +449,7 @@ class GpuKernelOutliningPass
     return kernelModule;
   }
 
-  Option<std::string> dataLayoutStr{
-      *this, "data-layout-str",
-      llvm::cl::desc("String containing the data layout specification to be "
-                     "attached to the GPU kernel module")};
-
   DataLayoutSpecInterface dataLayoutSpec;
 };
 
 } // namespace
-
-std::unique_ptr<Pass> mlir::createGpuLauchSinkIndexComputationsPass() {
-  return std::make_unique<GpuLaunchSinkIndexComputationsPass>();
-}
-
-std::unique_ptr<OperationPass<ModuleOp>>
-mlir::createGpuKernelOutliningPass(StringRef dataLayoutStr) {
-  return std::make_unique<GpuKernelOutliningPass>(dataLayoutStr);
-}
diff --git a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp
@@ -146,8 +146,3 @@ struct GpuMapParallelLoopsPass
 } // namespace
 } // namespace gpu
 } // namespace mlir
-
-std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>>
-mlir::createGpuMapParallelLoopsPass() {
-  return std::make_unique<gpu::GpuMapParallelLoopsPass>();
-}