[mlir][MLProgram] Add MLProgram to MemRef bufferization pass

ryanpholt · ryanpholt · commit 3c58380499b0 · 2023-12-12T10:00:34.000-05:00
There is currently no lowering out of `ml_program` in the LLVM repository. This change adds a lowering to `memref` so that it can be can be lowered all the way to LLVM. This lowering was taken from the reference backend in torch-mlir: llvm/torch-mlir@f416953 I had tried implementing the `BufferizableOpInterface` instead of adding a new pass but that did not work because OneShotBufferize does not visit module-level ops like `ml_program.global`.
diff --git a/mlir/include/mlir/Dialect/MLProgram/Transforms/Passes.td b/mlir/include/mlir/Dialect/MLProgram/Transforms/Passes.td
@@ -11,6 +11,23 @@
 
 include "mlir/Pass/PassBase.td"
 
+def MLProgramBufferize: Pass<"mlprogram-bufferize", "ModuleOp"> {
+  let summary = "Bufferize the MLProgram dialect ops";
+  let description = [{
+    This pass bufferizes ops in the `ml_program` dialect. It is implemented as a
+    standalone pass because One-Shot Bufferize does not handle module-level ops
+    like `ml_program.global`. Therefore, this pass should run just before
+    One-Shot Bufferize.
+
+    This pass is intended to be a generic lowering of `ml_program` ops to allow
+    for them to be lowered all the way to LLVM. Users may want a more specialized
+    lowering depending on how they manage global state in their system.
+  }];
+  let dependentDialects = [
+    "bufferization::BufferizationDialect", "memref::MemRefDialect", 
+  ];
+}
+
 def MLProgramPipelineGlobals : Pass<"mlprogram-pipeline-globals", "ModuleOp"> {
   let summary = "Optimize `ml_program` global operations for read and store";
   let description = [{
diff --git a/mlir/lib/Dialect/MLProgram/Transforms/Bufferize.cpp b/mlir/lib/Dialect/MLProgram/Transforms/Bufferize.cpp
@@ -0,0 +1,142 @@
+//===- Bufferize.cpp - MLProgram bufferize pass ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a bufferization pass for the MLProgram dialect
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/MLProgram/Transforms/Passes.h"
+
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/MLProgram/IR/MLProgram.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/BuiltinTypes.h"
+
+namespace mlir {
+namespace ml_program {
+#define GEN_PASS_DEF_MLPROGRAMBUFFERIZE
+#include "mlir/Dialect/MLProgram/Transforms/Passes.h.inc"
+
+static LogicalResult bufferizeMLProgramGlobalOp(GlobalOp globalOp,
+                                                OpBuilder &builder) {
+  if (!globalOp.getValue().has_value())
+    return globalOp.emitError("global op must have a value");
+
+  auto tensorType = cast<RankedTensorType>(globalOp.getType());
+  auto memrefType =
+      MemRefType::get(tensorType.getShape(), tensorType.getElementType());
+
+  builder.setInsertionPointToStart(
+      globalOp->getParentOfType<ModuleOp>().getBody());
+  builder.create<memref::GlobalOp>(
+      globalOp.getLoc(), globalOp.getSymName(),
+      /*sym_visibility=*/globalOp.getSymVisibilityAttr(),
+      /*type=*/memrefType,
+      /*initial_value=*/globalOp.getValue().value(),
+      /*constant=*/!globalOp.getIsMutable(),
+      /*alignment=*/nullptr);
+  return success();
+}
+
+static LogicalResult bufferizeMLProgramGlobalLoadOp(GlobalLoadOp globalLoadOp,
+                                                    OpBuilder &builder) {
+  auto loc = globalLoadOp.getLoc();
+  auto tensorType = cast<RankedTensorType>(globalLoadOp.getType());
+  auto memrefType =
+      MemRefType::get(tensorType.getShape(), tensorType.getElementType());
+
+  builder.setInsertionPoint(globalLoadOp);
+  Value globalVal = builder.create<memref::GetGlobalOp>(
+      loc, memrefType, globalLoadOp.getGlobalAttr().getLeafReference());
+
+  // We need a copy to guarantee that the produced tensor does not alias with
+  // any other buffer.
+  Value alloc = builder.create<memref::AllocOp>(loc, memrefType, ValueRange{});
+  builder.create<memref::CopyOp>(globalLoadOp->getLoc(), globalVal, alloc);
+
+  globalVal = builder.create<bufferization::ToTensorOp>(loc, tensorType, alloc,
+                                                        /*restrict=*/true);
+  globalLoadOp->getResult(0).replaceAllUsesWith(globalVal);
+  return success();
+}
+
+static LogicalResult
+bufferizeMLProgramGlobalStoreOp(GlobalStoreOp globalStoreOp,
+                                OpBuilder &builder) {
+  auto loc = globalStoreOp.getLoc();
+  auto tensorType = cast<RankedTensorType>(globalStoreOp.getValue().getType());
+  auto memrefType =
+      MemRefType::get(tensorType.getShape(), tensorType.getElementType());
+
+  builder.setInsertionPoint(globalStoreOp);
+  Value memref = builder.create<memref::GetGlobalOp>(
+      loc, memrefType, globalStoreOp.getGlobalAttr().getLeafReference());
+  Value copyValue = builder.create<bufferization::ToMemrefOp>(
+      loc, memrefType, globalStoreOp.getValue());
+  builder.create<memref::CopyOp>(loc, copyValue, memref);
+  return success();
+}
+
+namespace {
+/// Converts MLProgram operations that work on tensor-type operands or results
+/// to work on buffers.
+class MLProgramBufferize
+    : public impl::MLProgramBufferizeBase<MLProgramBufferize> {
+  void runOnOperation() override {
+    auto module = getOperation();
+    OpBuilder builder(module.getBodyRegion());
+    SmallVector<Operation *> toErase;
+
+    auto walkResult = module.walk([&](GlobalOp op) {
+      if (auto type = dyn_cast<RankedTensorType>(op.getType())) {
+        if (!type.hasStaticShape()) {
+          // If the ml_program.global has dynamically shaped tensor.
+          op.emitError(
+              "unimplemented: global op bufferization with dynamic shape");
+          return WalkResult::interrupt();
+        }
+      } else {
+        // If the ml_program.global is of non-tensor type.
+        op.emitError("unsupported global op type");
+        return WalkResult::interrupt();
+      }
+
+      if (failed(bufferizeMLProgramGlobalOp(op, builder))) {
+        op.emitError("bufferization for this op failed");
+        return WalkResult::interrupt();
+      }
+      toErase.push_back(op);
+      return WalkResult::advance();
+    });
+
+    if (walkResult.wasInterrupted())
+      return signalPassFailure();
+
+    module.walk([&](GlobalLoadOp op) {
+      if (failed(bufferizeMLProgramGlobalLoadOp(op, builder))) {
+        op.emitError("bufferization for this op failed");
+        return;
+      }
+      toErase.push_back(op);
+    });
+
+    module.walk([&](GlobalStoreOp op) {
+      if (failed(bufferizeMLProgramGlobalStoreOp(op, builder))) {
+        op.emitError("bufferization for this op failed");
+        return;
+      }
+      toErase.push_back(op);
+    });
+
+    for (auto *op : llvm::reverse(toErase))
+      op->erase();
+  }
+};
+} // namespace
+} // namespace ml_program
+} // namespace mlir
diff --git a/mlir/lib/Dialect/MLProgram/Transforms/CMakeLists.txt b/mlir/lib/Dialect/MLProgram/Transforms/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_mlir_dialect_library(MLIRMLProgramTransforms
+  Bufferize.cpp
   PipelineGlobalOps.cpp
 
   ADDITIONAL_HEADER_DIRS
diff --git a/mlir/test/Dialect/MLProgram/bufferize.mlir b/mlir/test/Dialect/MLProgram/bufferize.mlir
@@ -0,0 +1,81 @@
+// RUN: mlir-opt %s --mlprogram-bufferize -split-input-file -verify-diagnostics | FileCheck %s
+
+// CHECK-LABEL: @global 
+ml_program.global private mutable @global(dense<0> : tensor<i64>) : tensor<i64>
+
+// CHECK-LABEL: @global_load_store
+func.func @global_load_store() -> i64 {
+  // CHECK-DAG: %[[CST127:.+]] = arith.constant 127
+  // CHECK-DAG: %[[GLOBAL_1:.+]] = memref.get_global @global
+  // CHECK-DAG: %[[NEW_ALLOC:.+]] = memref.alloc
+  // CHECK:     memref.copy %[[GLOBAL_1]], %[[NEW_ALLOC]]
+  // CHECK:     %[[TENSOR:.+]] = bufferization.to_tensor %[[NEW_ALLOC]]
+  // CHECK:     %[[EXTRACTED:.+]] = tensor.extract %[[TENSOR]][]
+  // CHECK:     %[[NEW_VALUE:.+]] = arith.muli %[[EXTRACTED]], %[[CST127]]
+  // CHECK:     %[[INSERTED:.+]] = tensor.insert %[[NEW_VALUE]] into %[[TENSOR]][]
+  // CHECK:     %[[GLOBAL_2:.+]] = memref.get_global @global
+  // CHECK:     %[[MEMREF:.+]] = bufferization.to_memref %[[INSERTED]]
+  // CHECK:     memref.copy %[[MEMREF]], %[[GLOBAL_2]]
+  // CHECK:     return %[[NEW_VALUE]]
+  %c127_i64 = arith.constant 127 : i64
+  %0 = ml_program.global_load @global : tensor<i64>
+  %extracted = tensor.extract %0[] : tensor<i64>
+  %1 = arith.muli %extracted, %c127_i64 : i64
+  %inserted = tensor.insert %1 into %0[] : tensor<i64>
+  ml_program.global_store @global = %inserted : tensor<i64>
+  return %1 : i64
+}
+
+// -----
+
+// expected-error @below {{unsupported global op type}}
+ml_program.global private mutable @global(0 : i64) : i64
+
+func.func @global_scalar() -> i64 {
+  %c127_i64 = arith.constant 127 : i64
+  %0 = ml_program.global_load @global : i64
+  %1 = arith.muli %0, %c127_i64 : i64
+  ml_program.global_store @global = %1 : i64
+  return %1 : i64
+}
+
+// -----
+
+// expected-error @below {{unsupported global op type}}
+ml_program.global private mutable @global(dense<0> : memref<i64>) : memref<i64>
+
+func.func @global_memref() -> i64 {
+  %c127_i64 = arith.constant 127 : i64
+  %0 = ml_program.global_load @global : memref<i64>
+  %extracted = memref.load %0[] : memref<i64>
+  %1 = arith.muli %extracted, %c127_i64 : i64
+  memref.store %1, %0[] : memref<i64>
+  ml_program.global_store @global = %0 : memref<i64>
+  return %1 : i64
+}
+
+// -----
+
+// expected-error @below {{invalid tensor element type}}
+ml_program.global private mutable @global(dense<0> : tensor<memref<i64>>) : tensor<memref<i64>>
+
+func.func @global_tensor_of_memref() -> i64 {
+  %c127_i64 = arith.constant 127 : i64
+  return %c127_i64 : i64
+}
+
+// -----
+
+// expected-error @below {{unimplemented: global op bufferization with dynamic shape}}
+ml_program.global private mutable @global(dense<0> : tensor<1xi64>) : tensor<?xi64>
+
+func.func @global_dynamic_shape() -> i64 {
+  %c127_i64 = arith.constant 127 : i64
+  %c0 = arith.constant 0 : index
+  %0 = ml_program.global_load @global : tensor<?xi64>
+  %extracted = tensor.extract %0[%c0] : tensor<?xi64>
+  %1 = arith.muli %extracted, %c127_i64 : i64
+  %inserted = tensor.insert %1 into %0[%c0] : tensor<?xi64>
+  ml_program.global_store @global = %inserted : tensor<?xi64>
+  return %1 : i64
+}

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`add_mlir_dialect_library(MLIRMLProgramTransforms`
	`2`	`+ Bufferize.cpp`
`2`	`3`	`PipelineGlobalOps.cpp`
`3`	`4`
`4`	`5`	`ADDITIONAL_HEADER_DIRS`