feat(TosaToLinalg): use linalg.matmul instead of linalg.batch_matmul when converting tosa.matmul with 1x batch.

ttjost · ttjost · commit 608e8f7f45c9 · 2024-01-22T17:14:59.000Z
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
@@ -974,7 +974,10 @@ def TosaToLinalgNamed
     Pass that converts TOSA operations to the equivalent operations using the
     Linalg named operations.
   }];
-
+  let options = [
+    Option<"useMatmulForSingleBatch", "use-matmul-for-single-batch", "bool", /*default=*/"false",
+      "Use linalg.matmul for 1x batch size instead of linalg.batch_matmul.">
+  ];
   let constructor = "tosa::createTosaToLinalgNamed()";
 }
 
diff --git a/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h b/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h
@@ -38,7 +38,8 @@ void addTosaToLinalgPasses(OpPassManager &pm,
 void populateTosaToLinalgConversionPatterns(RewritePatternSet *patterns);
 
 /// Populates conversion passes from TOSA dialect to Linalg named operations.
-void populateTosaToLinalgNamedConversionPatterns(RewritePatternSet *patterns);
+void populateTosaToLinalgNamedConversionPatterns(
+    RewritePatternSet *patterns, bool useMatmulForSingleBatch = false);
 
 } // namespace tosa
 } // namespace mlir
diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
@@ -173,6 +173,35 @@ static void createDepthwiseConvCollapseMap(
       rewriter.getAffineDimExpr(outputRank));
 }
 
+static FailureOr<Value> collapseValue(OpBuilder &rewriter, Location loc,
+                                      Value value, ShapedType type) {
+  auto reassociationMap = getReassociationIndicesForReshape(
+      cast<ShapedType>(value.getType()), type);
+  if (!reassociationMap.has_value())
+    return failure();
+
+  return Value(rewriter.create<tensor::CollapseShapeOp>(
+      loc, type, value, reassociationMap.value()));
+}
+
+static FailureOr<SmallVector<Value>>
+collapseValues(OpBuilder &rewriter, Location loc, SmallVector<Value> values,
+               SmallVector<ShapedType> newTys, bool useMatmulForBatchOne) {
+  if (!useMatmulForBatchOne)
+    return values;
+
+  SmallVector<Value> newValues;
+  for (auto [idx, value] : llvm::enumerate(values)) {
+
+    auto newValue = collapseValue(rewriter, loc, value, newTys[idx]);
+    if (failed(newValue))
+      return failure();
+
+    newValues.push_back(*newValue);
+  }
+  return newValues;
+}
+
 namespace {
 
 template <typename TosaConvOp, typename LinalgConvOp, typename LinalgConvQOp>
@@ -498,6 +527,9 @@ class DepthwiseConvConverter
 
 class MatMulConverter : public OpConversionPattern<tosa::MatMulOp> {
 public:
+  MatMulConverter(MLIRContext *ctx, bool useMatmulForSingleBatch)
+      : OpConversionPattern<tosa::MatMulOp>(ctx),
+        useMatmulForSingleBatch(useMatmulForSingleBatch) {}
   using OpConversionPattern<tosa::MatMulOp>::OpConversionPattern;
   LogicalResult
   matchAndRewrite(tosa::MatMulOp op, OpAdaptor adaptor,
@@ -525,20 +557,55 @@ class MatMulConverter : public OpConversionPattern<tosa::MatMulOp> {
       dynDims[2] = rewriter.create<tensor::DimOp>(loc, op->getOperand(1), 2);
     }
 
+    auto getTypeWithoutBatch = [&](ShapedType ty) {
+      auto shape2D = {ty.getDimSize(1), ty.getDimSize(2)};
+      return RankedTensorType::get(shape2D, ty.getElementType());
+    };
+
     SmallVector<Value> filteredDims = condenseValues(dynDims);
 
+    bool useMatmulForBatchOne =
+        outputTy.getDimSize(0) == 1 && this->useMatmulForSingleBatch;
+
+    auto newInput1Type = getTypeWithoutBatch(firstOperandTy);
+    auto newInput2Type = getTypeWithoutBatch(secondOperandTy);
+    auto newOutputType = getTypeWithoutBatch(outputTy);
+
+    SmallVector<Value> inputs = {adaptor.getA(), adaptor.getB()};
+    auto inputsOrFailure =
+        collapseValues(rewriter, loc, inputs, {newInput1Type, newInput2Type},
+                       useMatmulForBatchOne);
+    auto matmulMap = getReassociationIndicesForReshape(newOutputType, outputTy);
+
+    // If any of the reassociations of indices failed, don't use matmul.
+    if (failed(inputsOrFailure) || !matmulMap.has_value()) {
+      useMatmulForBatchOne = false;
+    } else {
+      inputs = *inputsOrFailure;
+    }
+
     auto zeroAttr = rewriter.getZeroAttr(outputElementTy);
     Value zero = rewriter.create<arith::ConstantOp>(loc, zeroAttr);
-    auto emptyTensor = rewriter.create<tensor::EmptyOp>(
-        loc, outputTy.getShape(), outputTy.getElementType(), filteredDims);
+
+    Value emptyTensor = rewriter.create<tensor::EmptyOp>(
+        loc,
+        useMatmulForBatchOne ? newOutputType.getShape() : outputTy.getShape(),
+        outputElementTy, filteredDims);
+
     Value zeroTensor = rewriter
                            .create<linalg::FillOp>(loc, ValueRange{zero},
                                                    ValueRange{emptyTensor})
                            .result();
+
     if (!op.getQuantizationInfo()) {
-      rewriter.replaceOpWithNewOp<linalg::BatchMatmulOp>(
-          op, TypeRange{op.getType()},
-          ValueRange{adaptor.getA(), adaptor.getB()}, ValueRange{zeroTensor});
+      if (useMatmulForBatchOne) {
+        auto matmul = rewriter.create<linalg::MatmulOp>(
+            loc, TypeRange{newOutputType}, inputs, ValueRange{zeroTensor});
+        rewriter.replaceOpWithNewOp<tensor::ExpandShapeOp>(
+            op, outputTy, matmul->getResult(0), matmulMap.value());
+      } else
+        rewriter.replaceOpWithNewOp<linalg::BatchMatmulOp>(
+            op, TypeRange{op.getType()}, inputs, ValueRange{zeroTensor});
       return success();
     }
 
@@ -547,12 +614,22 @@ class MatMulConverter : public OpConversionPattern<tosa::MatMulOp> {
         loc, rewriter.getI32IntegerAttr(quantizationInfo.getAZp()));
     auto bZp = rewriter.create<arith::ConstantOp>(
         loc, rewriter.getI32IntegerAttr(quantizationInfo.getBZp()));
-    rewriter.replaceOpWithNewOp<linalg::QuantizedBatchMatmulOp>(
-        op, TypeRange{op.getType()},
-        ValueRange{adaptor.getA(), adaptor.getB(), aZp, bZp}, zeroTensor);
+    if (useMatmulForBatchOne) {
+      auto matmul = rewriter.create<linalg::QuantizedMatmulOp>(
+          loc, TypeRange{newOutputType},
+          ValueRange{inputs[0], inputs[1], aZp, bZp}, zeroTensor);
+      rewriter.replaceOpWithNewOp<tensor::ExpandShapeOp>(
+          op, outputTy, matmul->getResult(0), matmulMap.value());
+    } else
+      rewriter.replaceOpWithNewOp<linalg::QuantizedBatchMatmulOp>(
+          op, TypeRange{op.getType()},
+          ValueRange{inputs[0], inputs[1], aZp, bZp}, zeroTensor);
 
     return success();
   }
+
+private:
+  bool useMatmulForSingleBatch;
 };
 
 class FullyConnectedConverter
@@ -974,15 +1051,16 @@ class AvgPool2dConverter : public OpRewritePattern<tosa::AvgPool2dOp> {
 } // namespace
 
 void mlir::tosa::populateTosaToLinalgNamedConversionPatterns(
-    RewritePatternSet *patterns) {
+    RewritePatternSet *patterns, bool useMatmulForSingleBatch) {
   patterns->add<
       // clang-format off
       ConvConverter<tosa::Conv2DOp, linalg::Conv2DNhwcHwcfOp, linalg::Conv2DNhwcHwcfQOp>,
       ConvConverter<tosa::Conv3DOp, linalg::Conv3DNdhwcDhwcfOp, linalg::Conv3DNdhwcDhwcfQOp>,
       DepthwiseConvConverter,
-      MatMulConverter,
       MaxPool2dConverter,
       AvgPool2dConverter,
       FullyConnectedConverter>(patterns->getContext());
+  patterns->add<
+      MatMulConverter>(patterns->getContext(), useMatmulForSingleBatch);
   // clang-format on
 }
diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamedPass.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamedPass.cpp
@@ -61,7 +61,8 @@ struct TosaToLinalgNamed
     target.markUnknownOpDynamicallyLegal([](Operation *) { return true; });
 
     FunctionOpInterface func = getOperation();
-    mlir::tosa::populateTosaToLinalgNamedConversionPatterns(&patterns);
+    mlir::tosa::populateTosaToLinalgNamedConversionPatterns(
+        &patterns, this->useMatmulForSingleBatch);
     if (failed(applyFullConversion(func, target, std::move(patterns))))
       signalPassFailure();
   }
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named-matmul-single-batch.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named-matmul-single-batch.mlir
@@ -0,0 +1,74 @@
+// RUN: mlir-opt --split-input-file -pass-pipeline="builtin.module(func.func(tosa-to-linalg-named{use-matmul-for-single-batch=true},cse))" %s -verify-diagnostics -o -| FileCheck %s
+
+// CHECK-LABEL: @matmul
+func.func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) {
+  // CHECK: %[[COLLAPSE1:.*]] = tensor.collapse_shape %arg0 {{\[\[}}0, 1], [2]] : tensor<1x5x3xf32> into tensor<5x3xf32>
+  // CHECK: %[[COLLAPSE2:.*]] = tensor.collapse_shape %arg1 {{\[\[}}0, 1], [2]] : tensor<1x3x6xf32> into tensor<3x6xf32>
+  // CHECK: %[[CONST:.*]] = arith.constant 0.000000e+00
+  // CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<5x6xf32>
+  // CHECK: %[[FILL:.*]] = linalg.fill ins(%[[CONST]] : f32) outs(%[[EMPTY]] : tensor<5x6xf32>) -> tensor<5x6xf32>
+  // CHECK: %[[MATMUL:.*]] = linalg.matmul ins(%[[COLLAPSE1]], %[[COLLAPSE2]] : tensor<5x3xf32>, tensor<3x6xf32>) outs(%[[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32>
+  // CHECK: tensor.expand_shape %[[MATMUL]] {{\[\[}}0, 1], [2]] : tensor<5x6xf32> into tensor<1x5x6xf32>
+  %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>)  -> (tensor<1x5x6xf32>)
+  return %0 : tensor<1x5x6xf32>
+}
+
+// -----
+
+// CHECK-LABEL: @matmul_quantized
+func.func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) {
+  // CHECK: %[[COLLAPSE1:.*]] = tensor.collapse_shape %arg0 {{\[\[}}0, 1], [2]] : tensor<1x5x3xi8> into tensor<5x3xi8>
+  // CHECK: %[[COLLAPSE2:.*]] = tensor.collapse_shape %arg1 {{\[\[}}0, 1], [2]] : tensor<1x3x6xi8> into tensor<3x6xi8>
+  // CHECK: %[[VAL_4:.*]] = arith.constant 0
+  // CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<5x6xi32>
+  // CHECK: %[[FILL:.*]] = linalg.fill ins(%[[VAL_4]] : i32) outs(%[[EMPTY]] : tensor<5x6xi32>) -> tensor<5x6xi32>
+  // CHECK: %[[CONST1:.*]] = arith.constant 1
+  // CHECK: %[[CONST2:.*]] = arith.constant 2
+  // CHECK: %[[VAL_9:.*]] = linalg.quantized_matmul ins(%[[COLLAPSE1]], %[[COLLAPSE2]], %[[CONST1]], %[[CONST2]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs(%[[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32>
+  // CHECK:  tensor.expand_shape %[[VAL_9]] {{\[\[}}0, 1], [2]] : tensor<5x6xi32> into tensor<1x5x6xi32>
+  %0 = "tosa.matmul"(%arg0, %arg1) {quantization_info = #tosa.matmul_quant<a_zp = 1, b_zp = 2>} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>)
+  return %0 : tensor<1x5x6xi32>
+}
+
+// -----
+
+// CHECK-LABEL: @matmul_dyn_batch_no_matmul
+func.func @matmul_dyn_batch_no_matmul(%arg0: tensor<?x5x3xf32>, %arg1: tensor<?x3x6xf32>) -> (tensor<?x5x6xf32>) {
+  // CHECK: %[[C0:.+]] = arith.constant 0
+  // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
+  // CHECK: %[[C0_0:.+]] = arith.constant 0
+  // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
+  // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0_0]] : f32) outs(%[[INIT]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
+  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<?x5x3xf32>, tensor<?x3x6xf32>) outs(%[[FILLED]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
+  %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<?x5x3xf32>, tensor<?x3x6xf32>)  -> (tensor<?x5x6xf32>)
+  return %0 : tensor<?x5x6xf32>
+}
+
+// -----
+
+// CHECK-LABEL: @matmul_dyn_independent_dim
+func.func @matmul_dyn_independent_dim(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) {
+  // CHECK: %[[CONST1:.*]] = arith.constant 2
+  // CHECK: %[[DIM:.*]] = tensor.dim %arg1, %[[CONST1]] : tensor<1x3x?xf32>
+  // CHECK: %[[COLLAPSE1:.*]] = tensor.collapse_shape %arg0 {{\[\[}}0, 1], [2]] : tensor<1x5x3xf32> into tensor<5x3xf32>
+  // CHECK: %[[COLLAPSE2:.*]] = tensor.collapse_shape %arg1 {{\[\[}}0, 1], [2]] : tensor<1x3x?xf32> into tensor<3x?xf32>
+  // CHECK: %[[CONST2:.*]] = arith.constant 0.000000e+00
+  // CHECK: %[[EMPTY:.*]] = tensor.empty(%[[DIM]]) : tensor<5x?xf32>
+  // CHECK: %[[FILL:.*]] = linalg.fill ins(%[[CONST2]] : f32) outs(%[[EMPTY]] : tensor<5x?xf32>) -> tensor<5x?xf32>
+  // CHECK: %[[MATMUL:.*]] = linalg.matmul ins(%[[COLLAPSE1]], %[[COLLAPSE2]] : tensor<5x3xf32>, tensor<3x?xf32>) outs(%[[FILL]] : tensor<5x?xf32>) -> tensor<5x?xf32>
+  // CHECK:           %[[VAL_10:.*]] = tensor.expand_shape %[[MATMUL]] {{\[\[}}0, 1], [2]] : tensor<5x?xf32> into tensor<1x5x?xf32>
+  %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x?xf32>)  -> (tensor<1x5x?xf32>)
+  return %0 : tensor<1x5x?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: @matmul_dyn_independent_dim_no_matmul
+func.func @matmul_dyn_independent_dim_no_matmul(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) {
+  // CHECK: %[[C0:.+]] = arith.constant 0
+  // CHECK: %[[INIT:.+]] = tensor.empty()
+  // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
+  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
+  %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x?xf32>, tensor<1x?x6xf32>)  -> (tensor<1x5x6xf32>)
+  return %0 : tensor<1x5x6xf32>
+}

Original file line number	Diff line number	Diff line change
`@@ -61,7 +61,8 @@ struct TosaToLinalgNamed`
`61`	`61`	`target.markUnknownOpDynamicallyLegal([](Operation *) { return true; });`
`62`	`62`
`63`	`63`	`FunctionOpInterface func = getOperation();`
`64`		`- mlir::tosa::populateTosaToLinalgNamedConversionPatterns(&patterns);`
	`64`	`+ mlir::tosa::populateTosaToLinalgNamedConversionPatterns(`
	`65`	`+ &patterns, this->useMatmulForSingleBatch);`
`65`	`66`	`if (failed(applyFullConversion(func, target, std::move(patterns))))`
`66`	`67`	`signalPassFailure();`
`67`	`68`	`}`