add lowering

Longsheng Du · Longsheng Du · commit 5ee1278eca56 · 2024-05-10T16:06:23.000+08:00
diff --git a/include/gc-dialects/Passes.td b/include/gc-dialects/Passes.td
@@ -17,4 +17,18 @@ def TileLinalgNamed : Pass<"tile-named-linalg", "func::FuncOp"> {
       ["linalg::LinalgDialect", "scf::SCFDialect", "tensor::TensorDialect"];
 }
 
+def ConvertOneDNNGraphToLinalg : Pass<"convert-onednn-graph-to-linalg"> {
+  let summary = "Lower the operations from the oneDNN Graph dialect into Linalg";
+  let description = [{
+    Lowers the `onednn_graph` ops to `linalg` ops.
+  }];
+  let dependentDialects = [
+    "func::FuncDialect",
+    "math::MathDialect",
+    "arith::ArithDialect",
+    "tensor::TensorDialect",
+    "linalg::LinalgDialect"
+  ];
+}
+
 #endif // GC_DIALECT_GC_PASSES
diff --git a/lib/gc-dialects/Transforms/CMakeLists.txt b/lib/gc-dialects/Transforms/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_mlir_library(GCPasses
+  OneDNNGraphToLinalg.cpp
   TileNamed.cpp
 
   ADDITIONAL_HEADER_DIRS
@@ -9,6 +10,7 @@ add_mlir_library(GCPasses
 
   LINK_LIBS PUBLIC
     ${mlir_dialect_libs}
+    MLIROneDNNGraph
     MLIRIR
     MLIRSupport
     MLIRBufferizationToMemRef
diff --git a/lib/gc-dialects/Transforms/OneDNNGraphToLinalg.cpp b/lib/gc-dialects/Transforms/OneDNNGraphToLinalg.cpp
@@ -0,0 +1,250 @@
+//===- OneDNNGraphToLinalg.cpp - OneDNN Graph To Linalg Lowering --*- C++ -*-=//
+//-*-===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <numeric>
+#include <vector>
+
+#include "gc-dialects/OneDNNGraph/OneDNNGraphDialect.h"
+#include "gc-dialects/OneDNNGraph/OneDNNGraphOps.h"
+#include "gc-dialects/Passes.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Rewrite/FrozenRewritePatternSet.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+using namespace mlir::onednn_graph;
+
+namespace mlir {
+namespace gc {
+#define GEN_PASS_DEF_CONVERTONEDNNGRAPHTOLINALG
+#include "gc-dialects/Passes.h.inc"
+
+namespace {
+//===----------------------------------------------------------------------===//
+// Util funcs
+//===----------------------------------------------------------------------===//
+
+Value createBroadcastOperand(Location loc, PatternRewriter &rewriter,
+                             TensorType ty, Value op) {
+  auto opTy = dyn_cast<TensorType>(op.getType());
+  llvm::ArrayRef<int64_t> bcastShape = ty.getShape();
+  llvm::ArrayRef<int64_t> opShape = opTy.getShape();
+  int64_t diff = bcastShape.size() - opShape.size();
+
+  if (bcastShape.equals(opShape)) {
+    return op;
+  } else {
+    // get broadcast dimensions
+    llvm::SmallVector<int64_t> bcastDims;
+    for (int64_t i = 0; i < (int64_t)bcastShape.size(); i++) {
+      int64_t idxOp = i - diff;
+      if (idxOp < 0) {
+        bcastDims.push_back(i);
+      } else if (bcastShape[i] != opShape[idxOp]) {
+        bcastDims.push_back(i);
+      }
+    }
+    // create a new output tensor
+    Value initTensor =
+        rewriter.create<tensor::EmptyOp>(loc, bcastShape, ty.getElementType());
+    return rewriter
+        .create<linalg::BroadcastOp>(
+            /*location=*/loc,
+            /*inputs=*/op,
+            /*inits=*/initTensor,
+            /*dimensions=*/bcastDims)
+        .getResults()
+        .front();
+  }
+}
+
+typedef Value (*OperandGet)(Operation *, PatternRewriter &, TensorType);
+
+template <unsigned I>
+Value OriginalOperand(Operation *op, PatternRewriter &b, TensorType ty) {
+  return createBroadcastOperand(op->getLoc(), b, ty, op->getOperand(I));
+}
+
+static Value ConstZeroOperand(Operation *op, PatternRewriter &b,
+                              TensorType ty) {
+  auto loc = op->getLoc();
+  Value zero =
+      b.create<arith::ConstantOp>(loc, b.getZeroAttr(ty.getElementType()));
+  Value newTensor =
+      b.create<tensor::EmptyOp>(loc, ty.getShape(), ty.getElementType());
+  return b.create<linalg::FillOp>(loc, zero, newTensor).getResult(0);
+}
+
+//===----------------------------------------------------------------------===//
+// Elemwise lowering
+//===----------------------------------------------------------------------===//
+
+// Generate elementwise op using linalg named ops
+template <typename LoweredOp>
+Value createElemwiseOp(Location loc, PatternRewriter &rewriter, TensorType ty,
+                       llvm::ArrayRef<Value> inputs) {
+  // create a new output tensor
+  Value outTensor =
+      rewriter.create<tensor::EmptyOp>(loc, ty.getShape(), ty.getElementType());
+
+  auto elemwiseOp = rewriter.create<LoweredOp>(
+      /*location=*/loc,
+      /*resultTensorTypes=*/outTensor.getType(),
+      /*inputs=*/inputs,
+      /*outputs=*/outTensor);
+
+  return elemwiseOp.getResult(0);
+}
+
+template <typename UnaryOp, typename LoweredOp>
+struct UnaryElemwiseLowering : public OpRewritePattern<UnaryOp> {
+  using OpRewritePattern<UnaryOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(UnaryOp op,
+                                PatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto resultTy = dyn_cast<TensorType>(op->getResultTypes().front());
+    auto unaryOp = createElemwiseOp<LoweredOp>(loc, rewriter, resultTy, //
+                                               {op->getOperand(0)});
+    rewriter.replaceOp(op, unaryOp);
+    return success();
+  }
+};
+
+template <typename BinaryOp, typename LoweredOp, OperandGet GetOperandLHS,
+          OperandGet GetOperandRHS>
+struct BinaryElemwiseLowering : public OpRewritePattern<BinaryOp> {
+  using OpRewritePattern<BinaryOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(BinaryOp op,
+                                PatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto resultTy = dyn_cast<TensorType>(op->getResultTypes().front());
+    auto lhsOp = GetOperandLHS(op, rewriter, resultTy);
+    auto rhsOp = GetOperandRHS(op, rewriter, resultTy);
+    auto binaryOp = createElemwiseOp<LoweredOp>(loc, rewriter, resultTy, //
+                                                {lhsOp, rhsOp});
+    rewriter.replaceOp(op, binaryOp);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Op lowering
+//===----------------------------------------------------------------------===//
+
+using ReLUOpLowering = BinaryElemwiseLowering< //
+    onednn_graph::ReLUOp, linalg::MaxOp, OriginalOperand<0>, ConstZeroOperand>;
+
+using AddOpLowering = BinaryElemwiseLowering< //
+    onednn_graph::AddOp, linalg::AddOp, OriginalOperand<0>, OriginalOperand<1>>;
+
+//===----------------------------------------------------------------------===//
+// MatMulOp lowering
+//===----------------------------------------------------------------------===//
+
+struct MatMulOpLowering : public OpRewritePattern<MatMulOp> {
+  using OpRewritePattern<MatMulOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(MatMulOp op,
+                                PatternRewriter &rewriter) const final {
+    auto loc = op->getLoc();
+    auto resultTy = dyn_cast<TensorType>(op->getResultTypes().front());
+    //
+    Value newTensor = rewriter.create<tensor::EmptyOp>(
+        loc, resultTy.getShape(), resultTy.getElementType());
+    Value zero = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getZeroAttr(resultTy.getElementType()));
+    Value outTensor =
+        rewriter.create<linalg::FillOp>(loc, zero, newTensor).getResult(0);
+
+    bool transposeA = op.getTransposeA();
+    bool transposeB = op.getTransposeB();
+    Operation *newOp;
+    if (!transposeA && !transposeB) {
+      // (A * B)
+      newOp = rewriter.create<linalg::MatmulOp>(
+          /*location=*/loc,
+          /*resultTensorTypes=*/resultTy,
+          /*inputs=*/ValueRange{op.getInputA(), op.getInputB()},
+          /*outputs=*/outTensor);
+    } else if (transposeA && !transposeB) {
+      // T(A) * B
+      newOp = rewriter.create<linalg::MatmulTransposeAOp>(
+          /*location=*/loc,
+          /*resultTensorTypes=*/resultTy,
+          /*inputs=*/ValueRange{op.getInputA(), op.getInputB()},
+          /*outputs=*/outTensor);
+    } else if (!transposeA && transposeB) {
+      // A * T(B)
+      newOp = rewriter.create<linalg::MatmulTransposeBOp>(
+          /*location=*/loc,
+          /*resultTensorTypes=*/resultTy,
+          /*inputs=*/ValueRange{op.getInputA(), op.getInputB()},
+          /*outputs=*/outTensor);
+    } else {
+      // T(B * A)
+      int64_t rank = resultTy.getRank();
+      SmallVector<int64_t> permutation(rank);
+      std::iota(std::begin(permutation), std::end(permutation), 0);
+      permutation[rank - 2] = rank - 1;
+      permutation[rank - 1] = rank - 2;
+      auto matmulOp = rewriter.create<linalg::MatmulOp>(
+          /*location=*/loc,
+          /*resultTensorTypes=*/resultTy,
+          /*inputs=*/ValueRange{op.getInputB(), op.getInputA()},
+          /*outputs=*/outTensor);
+      newOp = rewriter.create<linalg::TransposeOp>(
+          /*location=*/loc,
+          /*inputs=*/matmulOp.getResult(0),
+          /*outputs=*/outTensor,
+          /*permutation=*/permutation);
+    }
+
+    if (op.getBias()) {
+      auto bias = createBroadcastOperand(loc, rewriter, resultTy, op.getBias());
+      newOp = rewriter.create<linalg::AddOp>(
+          /*location=*/loc,
+          /*resultTensorTypes=*/outTensor.getType(),
+          /*inputs=*/newOp->getResult(0),
+          /*outputs=*/bias);
+    }
+
+    rewriter.replaceOp(op, newOp);
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Pass define
+//===----------------------------------------------------------------------===//
+
+struct ConvertOneDNNGraphToLinalg
+    : public impl::ConvertOneDNNGraphToLinalgBase<ConvertOneDNNGraphToLinalg> {
+
+  void runOnOperation() final {
+    //
+    auto *ctx = &getContext();
+    RewritePatternSet patterns(ctx);
+    patterns.add<AddOpLowering>(ctx);
+    patterns.add<ReLUOpLowering>(ctx);
+    patterns.add<MatMulOpLowering>(ctx);
+    //
+    if (failed(applyPatternsAndFoldGreedily(getOperation(),
+                                            std::move(patterns)))) {
+      signalPassFailure();
+    }
+  }
+};
+
+} // namespace
+} // namespace gc
+} // namespace mlir
diff --git a/test/gc-dialects/OneDNNGraph/onednn-graph-to-linalg.mlir b/test/gc-dialects/OneDNNGraph/onednn-graph-to-linalg.mlir
@@ -0,0 +1,40 @@
+// RUN: gc-opt --split-input-file -pass-pipeline="builtin.module(func.func(convert-onednn-graph-to-linalg))" %s -verify-diagnostics -o -| FileCheck %s
+
+// CHECK-LABEL: @matmul
+func.func @matmul(%arg0: tensor<128x512xbf16>, %arg1: tensor<512x256xbf16>) -> tensor<128x256xbf16> {
+  // CHECK: [[C0:%.+]] = arith.constant 0
+  // CHECK: [[INIT:%.+]] = tensor.empty()
+  // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : bf16) outs([[INIT]] : tensor<128x256xbf16>) -> tensor<128x256xbf16>
+  // CHECK: linalg.matmul ins(%arg0, %arg1 : tensor<128x512xbf16>, tensor<512x256xbf16>) outs([[FILLED]] : tensor<128x256xbf16>) -> tensor<128x256xbf16>
+  %0 = onednn_graph.matmul %arg0, %arg1 : (tensor<128x512xbf16>, tensor<512x256xbf16>) -> tensor<128x256xbf16>
+  return %0 : tensor<128x256xbf16>
+}
+
+// CHECK-LABEL: @add
+func.func @add(%arg0: tensor<128x256xf32>, %arg1: tensor<128x256xf32>) -> tensor<128x256xf32> {
+  // CHECK: tensor.empty()
+  // CHECK: linalg.add
+  %0 = onednn_graph.add %arg0, %arg1 : (tensor<128x256xf32>, tensor<128x256xf32>) -> tensor<128x256xf32>
+  return %0 : tensor<128x256xf32>
+}
+
+// CHECK-LABEL: @add_bcast
+func.func @add_bcast(%arg0: tensor<128x256xf32>, %arg1: tensor<256xf32>) -> tensor<128x256xf32> {
+  // CHECK: tensor.empty()
+  // CHECK: linalg.broadcast
+  // CHECK: tensor.empty()
+  // CHECK: linalg.add
+  %0 = onednn_graph.add %arg0, %arg1 : (tensor<128x256xf32>, tensor<256xf32>) -> tensor<128x256xf32>
+  return %0 : tensor<128x256xf32>
+}
+
+// CHECK-LABEL: @relu
+func.func @relu(%arg0: tensor<128x256xf32>) -> tensor<128x256xf32> {
+  // CHECK: arith.constant 0
+  // CHECK: tensor.empty()
+  // CHECK: linalg.fill
+  // CHECK: tensor.empty()
+  // CHECK: linalg.max
+  %0 = onednn_graph.relu %arg0 : (tensor<128x256xf32>) -> tensor<128x256xf32>
+  return %0 : tensor<128x256xf32>
+}