Merge pull request #6 from ACT/tina.tosareciprocalfolding

mgehre-amd · GitHub Enterprise · commit 59b56bf08a37 · 2023-03-14T09:12:57.000+01:00
[FXML-1727] Implement folding for constant reciprocals
diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h
@@ -29,6 +29,8 @@ void populateTosaDecomposeTransposeConv(MLIRContext *ctx,
                                         RewritePatternSet &patterns);
 void populateTosaDecomposeDepthwise(MLIRContext *ctx,
                                     RewritePatternSet &patterns);
+void populateTosaFoldConstantReciprocalPatterns(MLIRContext *ctx,
+                                                RewritePatternSet &patterns);
 void populateTosaFoldConstantTransposePatterns(MLIRContext *ctx,
                                                RewritePatternSet &patterns);
 
diff --git a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt
@@ -2,6 +2,7 @@ add_mlir_dialect_library(MLIRTosaTransforms
   TosaDecomposeTransposeConv.cpp
   TosaDecomposeConv2D.cpp
   TosaDecomposeDepthwise.cpp
+  TosaFoldConstantReciprocal.cpp
   TosaFoldConstantTranspose.cpp
   TosaInferShapes.cpp
   TosaLayerwiseConstantFoldPass.cpp
diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaFoldConstantReciprocal.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaFoldConstantReciprocal.cpp
@@ -0,0 +1,130 @@
+//===- TosaFoldConstantReciprocal.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Fold TOSA Reciprocal operation on constant data
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Tosa/IR/TosaOps.h"
+#include "mlir/Dialect/Tosa/Transforms/Passes.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/Pass/Pass.h"
+#include <llvm/ADT/APFloat.h>
+#include <llvm/ADT/FloatingPointMode.h>
+#include <llvm/ADT/SmallVector.h>
+#include <mlir/IR/BuiltinAttributes.h>
+#include <mlir/Support/LogicalResult.h>
+
+using namespace mlir;
+using namespace mlir::tosa;
+
+namespace {
+
+struct TosaFoldConstantReciprocal : public OpRewritePattern<ReciprocalOp> {
+
+  using OpRewritePattern::OpRewritePattern;
+  static constexpr llvm::RoundingMode reciprocalRoundingMode =
+      APFloat::rmNearestTiesToEven;
+
+  APFloat computeReciprocal(const APFloat &floatVal, Type floatTy) const {
+    auto recipAttr = FloatAttr::get(floatTy, 1.0);
+    APFloat recip = recipAttr.getValue();
+    recip.divide(floatVal, reciprocalRoundingMode);
+
+    return recip;
+  }
+
+  DenseElementsAttr
+  replaceTensorWithReciprocal(ConstOp tensorToReplace,
+                              const DenseElementsAttr &inputValues) const {
+    // TODO it would be nicer to do this in-place
+
+    // Compute the reciprocal for each tensor element
+    llvm::SmallVector<APFloat, 1> transformedValues;
+    // We already know the amount of values we will insert, reserve space for
+    // all of them to avoid dynamic resizing
+    transformedValues.reserve(inputValues.getNumElements());
+    for (auto val : inputValues.getValues<APFloat>()) {
+      auto recipVal = computeReciprocal(val, inputValues.getElementType());
+      transformedValues.push_back(recipVal);
+    }
+
+    // Replace the current tensor with one containing the computed reciprocals
+    auto newTensor =
+        DenseElementsAttr::get(inputValues.getType(), transformedValues);
+    return newTensor;
+  }
+
+  LogicalResult matchAndRewrite(ReciprocalOp recip,
+                                PatternRewriter &rewriter) const override {
+    auto inputTensor = recip.getInput1();
+    auto elemType = inputTensor.getType().getElementType();
+    // TOSA only allows for floats as inputs to the reciprocal operation, so
+    // bail if anything else is contained
+    if (!isa<FloatType>(elemType)) {
+      return rewriter.notifyMatchFailure(recip,
+                                         "Unexpected input tensor type: the "
+                                         "TOSA spec only allows floats");
+    }
+
+    // Check whether the tensor is constant and dense
+    DenseElementsAttr inputValues;
+    if (!matchPattern(inputTensor, m_Constant(&inputValues))) {
+      return rewriter.notifyMatchFailure(
+          recip, "Non-const or non-dense input to reciprocal");
+    }
+
+    // In case we have a splat, we only need to calculate the reciprocal once
+    // and update the tensor to the transformed splat value.
+    if (auto splatAttrs = dyn_cast<SplatElementsAttr>(inputValues)) {
+      // Transform the splat value
+      auto splatVal = splatAttrs.getSplatValue<APFloat>();
+      auto newSplatRecipAttr = computeReciprocal(splatVal, elemType);
+
+      // Create a tensor with the transformed splat value
+      auto newSplatTensor =
+          DenseElementsAttr::get(splatAttrs.getType(), newSplatRecipAttr);
+
+      // Replace the reciprocal op with the newly constructed tensor
+      rewriter.replaceOpWithNewOp<ConstOp>(recip, newSplatTensor.getType(),
+                                           newSplatTensor);
+      return success();
+    }
+
+    if (!isa<ConstOp>(inputTensor.getDefiningOp())) {
+      return rewriter.notifyMatchFailure(recip,
+                                         "The reciprocal can only be folded if "
+                                         "it operates on a TOSA constant");
+    }
+    auto definingConstOp = cast<ConstOp>(inputTensor.getDefiningOp());
+
+    // Our transformation replaces the input tensor with the transformed tensor.
+    // If the input has several users we need to keep the input. This can
+    // result in a significantly increased memory usage, such that we currently
+    // refrain from applying the transformation in that case.
+    if (!definingConstOp->hasOneUse()) {
+      return rewriter.notifyMatchFailure(
+          recip, "Currently, reciprocals will only be folded if the input "
+                 "tensor has a single user");
+    }
+
+    // Create a new tensor with the updated values
+    auto newTensor = replaceTensorWithReciprocal(definingConstOp, inputValues);
+
+    // Replace the use of the reciprocal with the transformed tensor
+    rewriter.replaceOpWithNewOp<ConstOp>(recip, newTensor.getType(), newTensor);
+    return success();
+  }
+};
+
+} // namespace
+
+void mlir::tosa::populateTosaFoldConstantReciprocalPatterns(
+    MLIRContext *ctx, RewritePatternSet &patterns) {
+  patterns.add<TosaFoldConstantReciprocal>(ctx);
+}
diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaLayerwiseConstantFoldPass.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaLayerwiseConstantFoldPass.cpp
@@ -50,6 +50,7 @@ struct TosaLayerwiseConstantFoldPass
     RewritePatternSet patterns(ctx);
     auto func = getOperation();
 
+    mlir::tosa::populateTosaFoldConstantReciprocalPatterns(ctx, patterns);
     mlir::tosa::populateTosaFoldConstantTransposePatterns(ctx, patterns);
     populateTosaOpsCanonicalizationPatterns(ctx, patterns);
 
diff --git a/mlir/test/Dialect/Tosa/constant-reciproc-opt.mlir b/mlir/test/Dialect/Tosa/constant-reciproc-opt.mlir
@@ -0,0 +1,117 @@
+// RUN: mlir-opt --split-input-file --tosa-layerwise-constant-fold %s | FileCheck %s
+
+// CHECK-LABEL: @reciprocal_fold_single_valued
+func.func @reciprocal_fold_single_valued() -> tensor<f32> {
+  // CHECK: [[RES:]] ={{.*}}tosa.const{{.*}}2.5{{0*}}e-01{{.*}}tensor<f32>
+  // CHECK-NOT: tosa.reciprocal
+  // CHECK: return [[RES]]
+  %0 = "tosa.const"() {value = dense<4.0> : tensor<f32>} : () -> tensor<f32>
+  %1 = "tosa.reciprocal"(%0) : (tensor<f32>) -> tensor<f32>
+  return %1 : tensor<f32>
+}
+
+// CHECK-LABEL: @reciprocal_fold_splat
+func.func @reciprocal_fold_splat() -> tensor<12x7xf32> {
+  // CHECK: [[RES:]] ={{.*}}tosa.const{{.*}}2.5{{0*}}e-01{{.*}}tensor<12x7xf32>
+  // CHECK-NOT: tosa.reciprocal
+  // CHECK: return [[RES]]
+  %0 = "tosa.const"() {value = dense<4.0> : tensor<12x7xf32>} : () -> tensor<12x7xf32>
+  %1 = "tosa.reciprocal"(%0) : (tensor<12x7xf32>) -> tensor<12x7xf32>
+  return %1 : tensor<12x7xf32>
+}
+
+// CHECK-LABEL: @reciprocal_div_zero
+func.func @reciprocal_div_zero() -> tensor<f32> {
+  // 0x7F800000 is the value for +infinity
+  // CHECK: [[RES:]] ={{.*}}tosa.const{{.*}}0x7F800000
+  // CHECK-NOT: tosa.reciprocal
+  // CHECK: return [[RES]]
+  %0 = "tosa.const"() {value = dense<0.0> : tensor<f32>} : () -> tensor<f32>
+  %1 = "tosa.reciprocal"(%0) : (tensor<f32>) -> tensor<f32>
+  return %1 : tensor<f32>
+}
+
+// CHECK-LABEL: @reciprocal_div_neg_zero
+func.func @reciprocal_div_neg_zero() -> tensor<f32> {
+  // 0xFF800000 is the value for -infinity
+  // CHECK: [[RES:]] ={{.*}}tosa.const{{.*}}0xFF800000
+  // CHECK-NOT: tosa.reciprocal
+  // CHECK: return [[RES]]
+  %0 = "tosa.const"() {value = dense<-0.0> : tensor<f32>} : () -> tensor<f32>
+  %1 = "tosa.reciprocal"(%0) : (tensor<f32>) -> tensor<f32>
+  return %1 : tensor<f32>
+}
+
+// CHECK-LABEL: @reciprocal_div_nan
+func.func @reciprocal_div_nan() -> tensor<f32> {
+  // 0x7FC00000 is the value for NAN
+  // CHECK: [[RES:]] ={{.*}}tosa.const{{.*}}0x7FC00000
+  // CHECK-NOT: tosa.reciprocal
+  // CHECK: return [[RES]]
+  %0 = "tosa.const"() {value = dense<0x7FC00000> : tensor<f32>} : () -> tensor<f32>
+  %1 = "tosa.reciprocal"(%0) : (tensor<f32>) -> tensor<f32>
+  return %1 : tensor<f32>
+}
+
+// CHECK-LABEL: @reciprocal_div_infinity
+func.func @reciprocal_div_infinity() -> tensor<f32> {
+  // CHECK: [[RES:]] ={{.*}}tosa.const{{.*}}<0.{{0*}}e+00>
+  // CHECK-NOT: tosa.reciprocal
+  // CHECK: return [[RES]]
+  %0 = "tosa.const"() {value = dense<0x7F800000> : tensor<f32>} : () -> tensor<f32>
+  %1 = "tosa.reciprocal"(%0) : (tensor<f32>) -> tensor<f32>
+  return %1 : tensor<f32>
+}
+
+// CHECK-LABEL: @reciprocal_div_neg_infinity
+func.func @reciprocal_div_neg_infinity() -> tensor<f32> {
+  // CHECK: [[RES:]] ={{.*}}tosa.const{{.*}}<-0.{{0*}}e+00>
+  // CHECK-NOT: tosa.reciprocal
+  // CHECK: return [[RES]]
+  %0 = "tosa.const"() {value = dense<0xFF800000> : tensor<f32>} : () -> tensor<f32>
+  %1 = "tosa.reciprocal"(%0) : (tensor<f32>) -> tensor<f32>
+  return %1 : tensor<f32>
+}
+
+// CHECK-LABEL: @reciprocal_no_fold
+// The folding optimization works only intra-procedurally, so we won't be able
+// to fold anything here
+func.func @reciprocal_no_fold(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32> {
+  // CHECK: tosa.reciprocal
+  // CHECK-NEXT: return
+  %0 = "tosa.reciprocal"(%arg0) : (tensor<?x?xf32>) -> tensor<?x?xf32>
+  return %0 : tensor<?x?xf32>
+}
+
+// CHECK-LABEL: @reciprocal_fold
+func.func @reciprocal_fold() -> tensor<4x6xf32> {
+  // CHECK: [[RES:]] ={{.*}}tosa.const
+  // CHECK-SAME{LITERAL}: [[5.68828249, 11.4416485, 1.6880486, 0.680272102, -0.875350117, 0.342313349],
+  // CHECK-SAME{LITERAL}:  [-4.81231928, 0.698080301, 0.65432179, -82.6446304, -4.33651352, -0.747551739],
+  // CHECK-SAME{LITERAL}:  [-12.4378109, 13.140605, 1.89501607, 0.885582745, 4.08830738, 1.4396776],
+  // CHECK-SAME{LITERAL}:  [2.02880907, -1.53280187, 0.552730501, 7.15819644, 0.64495325, -0.973709881]]
+  // CHECK-NOT: tosa.reciprocal
+  // CHECK: return [[RES]]
+  %0 = "tosa.const"() { value = dense<[
+                        [ 0.1758,  0.0874,  0.5924,  1.4700, -1.1424,  2.9213],
+                        [-0.2078,  1.4325,  1.5283, -0.0121, -0.2306, -1.3377],
+                        [-0.0804,  0.0761,  0.5277,  1.1292,  0.2446,  0.6946],
+                        [ 0.4929, -0.6524,  1.8092,  0.1397,  1.5505, -1.0270]]>
+                        : tensor<4x6xf32>
+                      } : () -> tensor<4x6xf32>
+  %1 = "tosa.reciprocal"(%0) : (tensor<4x6xf32>) -> tensor<4x6xf32>
+  return %1 : tensor<4x6xf32>
+}
+
+// CHECK-LABEL: @reciprocal_of_const_sparse
+// Sparse tensors are currently not supported
+func.func @reciprocal_of_const_sparse() -> tensor<32xbf16> {
+  // CHECK: tosa.const
+  // CHECK: tosa.reciprocal
+    %0 = "tosa.const"() { value = sparse<
+          [[0], [3], [11], [17], [20], [23], [25], [30], [31]],
+          [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]>
+          : tensor<32xbf16> } : () -> tensor<32xbf16>
+    %1 = "tosa.reciprocal"(%0) : (tensor<32xbf16>) -> tensor<32xbf16>
+    return %1 : tensor<32xbf16>
+}