Implement folding for the power operation.

TinaAMD · TinaAMD · commit a8712f6ddfd8 · 2023-03-27T12:07:47.000+01:00
Use the TOSA spec for POW ([0]+[1]). [0] https://www.mlplatform.org/tosa/tosa_spec.html#_pow [1] https://www.mlplatform.org/tosa/tosa_spec.html#_main_inference_profile tosa.pow can be applied to tensors of different shapes, in which case broadcasting is applied. Implement TOSA broadcasting helpers as specified here [2] as well. [2] https://www.mlplatform.org/tosa/tosa_spec.html#_tensor_access_helpers
diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h
@@ -29,6 +29,8 @@ void populateTosaDecomposeTransposeConv(MLIRContext *ctx,
                                         RewritePatternSet &patterns);
 void populateTosaDecomposeDepthwise(MLIRContext *ctx,
                                     RewritePatternSet &patterns);
+void populateTosaFoldConstantPowPatterns(MLIRContext *ctx,
+                                         RewritePatternSet &patterns);
 void populateTosaFoldConstantReciprocalPatterns(MLIRContext *ctx,
                                                 RewritePatternSet &patterns);
 void populateTosaFoldConstantRSQRTPatterns(MLIRContext *ctx,
diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/TosaFoldCommon.h b/mlir/include/mlir/Dialect/Tosa/Transforms/TosaFoldCommon.h
@@ -13,18 +13,32 @@
 #define MLIR_DIALECT_TOSA_TRANSFORMS_TOSA_FOLD_COMMON_H
 
 #include <llvm/ADT/APFloat.h>
+#include <llvm/ADT/ArrayRef.h>
 #include <functional>
 #include <mlir/Dialect/Tosa/IR/TosaOps.h>
 #include <mlir/IR/PatternMatch.h>
 
 namespace mlir {
 namespace tosa {
 
+/// Type that represents tensor dimensions.
+using DimensionType = ArrayRef<int64_t>;
+
+/// Type for tensor offsets.
+using OffsetType = size_t;
+
 /// Transform a tensor with the given transformation function.
 DenseElementsAttr applyElementWise(
     const DenseElementsAttr &toTransform,
     const std::function<llvm::APFloat(const llvm::APFloat &, Type)> &toApply);
 
+/// Apply the given transformation function on the elements of the given
+/// tensors. If the input tensors do not match \p targetType, broadcasting is
+/// applied.
+DenseElementsAttr applyElementWise(
+    const DenseElementsAttr &, const DenseElementsAttr &, TensorType targetType,
+    const std::function<APFloat(const APFloat &, const APFloat &)> &toApply);
+
 /// Function that checks if \p toCheck is a dense TOSA constant float tensor.
 LogicalResult notifyIfNotConstantFloatTosaTensor(TypedValue<TensorType> toCheck,
                                                  TosaOp location,
@@ -39,6 +53,23 @@ LogicalResult notifyIfNoTosaDenseConstantTensor(TypedValue<TensorType> toCheck,
 LogicalResult notifyIfNotFloat(TypedValue<TensorType> toCheck, TosaOp location,
                                PatternRewriter &);
 
+/// Compute the offset in \p shape which corresponds to the given \p index.
+OffsetType indexToOffset(DimensionType shape, DimensionType index);
+
+/// Compute the index into \p shape which corresponds to the given \p offset.
+SmallVector<int64_t> offsetToIndex(DimensionType shape, OffsetType offset);
+
+/// Given an \p index into \p desiredShape, compute the corresponding index into
+/// \p toBeBroadcasted.
+SmallVector<int64_t> getBroadcastedIndex(DimensionType desiredShape,
+                                         DimensionType toBeBroadcasted,
+                                         DimensionType index);
+/// Given an \p offset into \p desiredShape, compute the corresponding offset
+/// into \p toBeBroadcasted.
+OffsetType getBroadcastedOffset(DimensionType desiredShape,
+                                DimensionType toBeBroadcasted,
+                                OffsetType offset);
+
 /// Function to compute the reciprocal.
 APFloat computeReciprocal(const APFloat &, Type);
 
diff --git a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt
@@ -3,6 +3,7 @@ add_mlir_dialect_library(MLIRTosaTransforms
   TosaDecomposeConv2D.cpp
   TosaDecomposeDepthwise.cpp
   TosaFoldCommon.cpp
+  TosaFoldConstantPow.cpp
   TosaFoldConstantReciprocal.cpp
   TosaFoldConstantRSQRT.cpp
   TosaFoldConstantTranspose.cpp
diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaFoldCommon.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaFoldCommon.cpp
@@ -12,7 +12,9 @@
 
 #include "mlir/Dialect/Tosa/Transforms/TosaFoldCommon.h"
 #include "mlir/Dialect/Tosa/IR/TosaOps.h"
+#include <algorithm>
 #include <llvm/ADT/APFloat.h>
+#include <llvm/ADT/SmallVector.h>
 #include <mlir/IR/BuiltinAttributes.h>
 #include <mlir/IR/BuiltinTypes.h>
 #include <mlir/IR/Matchers.h>
@@ -44,6 +46,42 @@ DenseElementsAttr mlir::tosa::applyElementWise(
   return newTensor;
 }
 
+DenseElementsAttr mlir::tosa::applyElementWise(
+    const DenseElementsAttr &first, const DenseElementsAttr &second,
+    TensorType targetType,
+    const std::function<APFloat(const APFloat &, const APFloat &)> &toApply) {
+  // Make sure to use the correct values in case broadcasting is required
+  SmallVector<APFloat> transformedValues;
+  // We already know the amount of values we will insert, reserve space for
+  // all of them to avoid dynamic resizing
+  auto targetSize = 1;
+  auto targetShape = targetType.getShape();
+  for (const auto &dimSize : targetShape) {
+    targetSize *= dimSize;
+  }
+  transformedValues.reserve(targetSize);
+
+  // Apply the given function to each pair of values from the input tensors.
+  // Make sure to broadcast the offsets properly.
+  auto firstIt = first.getValues<APFloat>();
+  auto firstShape = first.getType().getShape();
+  auto secondIt = second.getValues<APFloat>();
+  auto secondShape = second.getType().getShape();
+  for (auto offset = 0; offset < targetSize; offset++) {
+    OffsetType offsetInTargetFirst =
+        getBroadcastedOffset(targetShape, firstShape, offset);
+    OffsetType offsetInTargetSecond =
+        getBroadcastedOffset(targetShape, secondShape, offset);
+    auto res =
+        toApply(firstIt[offsetInTargetFirst], secondIt[offsetInTargetSecond]);
+    transformedValues.push_back(res);
+  }
+
+  // Generate a tensor with the computed values.
+  auto newTensor = DenseElementsAttr::get(targetType, transformedValues);
+  return newTensor;
+}
+
 LogicalResult
 mlir::tosa::notifyIfNotConstantFloatTosaTensor(TypedValue<TensorType> toCheck,
                                                TosaOp location,
@@ -91,6 +129,55 @@ LogicalResult mlir::tosa::notifyIfNotFloat(TypedValue<TensorType> toCheck,
                                      "TOSA spec only allows floats");
 }
 
+OffsetType mlir::tosa::indexToOffset(DimensionType shape, DimensionType index) {
+  OffsetType offset = 0;
+  for (size_t i = 0; i < shape.size(); i++) {
+    offset = offset * shape[i] + index[i];
+  }
+  return offset;
+}
+
+SmallVector<int64_t> mlir::tosa::offsetToIndex(DimensionType shape,
+                                               OffsetType offset) {
+  auto rank = shape.size();
+  // The rank of the index will be equal to the rank of the shape
+  SmallVector<int64_t> resultIndex;
+  resultIndex.reserve(rank);
+  // Compute all the index values from the last to the first one, reverse the
+  // vector afterwards as there is no convenient push_front.
+  for (int32_t i = rank - 1; i >= 0; i--) {
+    resultIndex.push_back(offset % shape[i]);
+    offset /= shape[i];
+  }
+  std::reverse(resultIndex.begin(), resultIndex.end());
+  return resultIndex;
+}
+
+SmallVector<int64_t>
+mlir::tosa::getBroadcastedIndex(DimensionType desiredShape,
+                                DimensionType toBeBroadcasted,
+                                DimensionType index) {
+  SmallVector<int64_t> broadCasted;
+  broadCasted.reserve(desiredShape.size());
+  for (size_t i = 0; i < desiredShape.size(); i++) {
+    auto toInsert = 0;
+    if (toBeBroadcasted[i] == desiredShape[i]) {
+      toInsert = index[i];
+    }
+    broadCasted.push_back(toInsert);
+  }
+  return broadCasted;
+}
+
+OffsetType mlir::tosa::getBroadcastedOffset(DimensionType desiredShape,
+                                            DimensionType toBeBroadcasted,
+                                            OffsetType offset) {
+  auto indexInTarget = offsetToIndex(desiredShape, offset);
+  auto indexBroadcasted =
+      getBroadcastedIndex(desiredShape, toBeBroadcasted, indexInTarget);
+  return indexToOffset(toBeBroadcasted, indexBroadcasted);
+}
+
 APFloat mlir::tosa::computeReciprocal(const APFloat &floatVal, Type floatTy) {
   auto recipAttr = FloatAttr::get(floatTy, 1.0);
   APFloat recip = recipAttr.getValue();
diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaFoldConstantPow.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaFoldConstantPow.cpp
@@ -0,0 +1,110 @@
+//===- TosaFoldConstantPow.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Fold TOSA Pow operation on constant data
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Tosa/IR/TosaOps.h"
+#include "mlir/Dialect/Tosa/Transforms/Passes.h"
+#include "mlir/Dialect/Tosa/Transforms/TosaFoldCommon.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/Pass/Pass.h"
+#include <cmath>
+#include <llvm/ADT/APFloat.h>
+#include <llvm/ADT/FloatingPointMode.h>
+#include <llvm/ADT/SmallVector.h>
+#include <mlir/IR/BuiltinAttributes.h>
+#include <mlir/Support/LogicalResult.h>
+
+using namespace mlir;
+using namespace mlir::tosa;
+
+namespace {
+
+struct TosaFoldConstantPow : public OpRewritePattern<PowOp> {
+
+  using OpRewritePattern::OpRewritePattern;
+
+  static APFloat computePower(const APFloat &base, const APFloat &exp) {
+    // Propagate NaN
+    if (base.isNaN() || exp.isNaN()) {
+      return APFloat::getNaN(base.getSemantics());
+    }
+    // TOSA defines 0.0**0.0 as NaN
+    if (base.isZero() && exp.isZero()) {
+      return APFloat::getNaN(base.getSemantics());
+    }
+    // In case the value is negative, the exponent needs to be an integer
+    if (base.isNegative() && !base.isZero()) {
+      if (!exp.isInteger()) {
+        return APFloat::getNaN(base.getSemantics());
+      }
+    }
+
+    // Actually compute base**exp. Special cases for [-]infinity and [-]0 are
+    // already handled in accordance with the TOSA spec.
+    auto powFloat = std::pow(base.convertToFloat(), exp.convertToFloat());
+    auto res = APFloat(powFloat);
+
+    bool lostPrecision;
+    res.convert(base.getSemantics(), APFloat::rmNearestTiesToEven,
+                &lostPrecision);
+    return res;
+  }
+
+  LogicalResult matchAndRewrite(PowOp powOp,
+                                PatternRewriter &rewriter) const override {
+    auto baseOp = powOp.getInput1();
+    auto expOp = powOp.getInput2();
+
+    // Check if both tensors are constant
+    auto baseIsConstCheck =
+        notifyIfNotConstantFloatTosaTensor(baseOp, powOp, rewriter);
+    if (failed(baseIsConstCheck)) {
+      return baseIsConstCheck;
+    }
+    auto expIsConstCheck =
+        notifyIfNotConstantFloatTosaTensor(expOp, powOp, rewriter);
+    if (failed(expIsConstCheck)) {
+      return expIsConstCheck;
+    }
+
+    // Extract the tensor values
+    DenseElementsAttr baseValues;
+    matchPattern(baseOp, m_Constant(&baseValues));
+
+    DenseElementsAttr expValues;
+    matchPattern(expOp, m_Constant(&expValues));
+
+    // If both tensors are splat, we don't care for the number of users
+    if (!isa<SplatElementsAttr>(baseValues) ||
+        !isa<SplatElementsAttr>(expValues)) {
+      // Make sure that at least one of the constant input tensors can be
+      // replaced (i.e. only has a single user)
+      if (!baseOp.hasOneUse() && !expOp.hasOneUse()) {
+        return rewriter.notifyMatchFailure(
+            powOp, "Currently, pows will only be folded if at least one input "
+                   "tensor only has a single user");
+      }
+    }
+
+    auto newTensor =
+        applyElementWise(baseValues, expValues, powOp.getType(), &computePower);
+    rewriter.replaceOpWithNewOp<ConstOp>(powOp, newTensor.getType(), newTensor);
+
+    return success();
+  }
+};
+
+} // namespace
+
+void mlir::tosa::populateTosaFoldConstantPowPatterns(
+    MLIRContext *ctx, RewritePatternSet &patterns) {
+  patterns.add<TosaFoldConstantPow>(ctx);
+}
diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaLayerwiseConstantFoldPass.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaLayerwiseConstantFoldPass.cpp
@@ -50,6 +50,7 @@ struct TosaLayerwiseConstantFoldPass
     RewritePatternSet patterns(ctx);
     auto func = getOperation();
 
+    mlir::tosa::populateTosaFoldConstantPowPatterns(ctx, patterns);
     mlir::tosa::populateTosaFoldConstantReciprocalPatterns(ctx, patterns);
     mlir::tosa::populateTosaFoldConstantRSQRTPatterns(ctx, patterns);
     mlir::tosa::populateTosaFoldConstantTransposePatterns(ctx, patterns);
diff --git a/mlir/test/Dialect/Tosa/constant-pow-opt.mlir b/mlir/test/Dialect/Tosa/constant-pow-opt.mlir
@@ -0,0 +1,92 @@
+// RUN: mlir-opt --split-input-file --tosa-layerwise-constant-fold %s | FileCheck %s
+
+// CHECK-LABEL: @pow_fold_tiny
+func.func @pow_fold_tiny() -> tensor<f32> {
+  // CHECK: [[RES:]] ={{.*}}tosa.const{{.*}}1.6{{0*}}e+01{{.*}}tensor<f32>
+  // CHECK-NOT: tosa.pow
+  // CHECK: return [[RES]]
+  %0 = "tosa.const"() {value = dense<4.0> : tensor<f32>} : () -> tensor<f32>
+  %1 = "tosa.const"() {value = dense<2.0> : tensor<f32>} : () -> tensor<f32>
+  %2 = "tosa.pow"(%0, %1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  return %2 : tensor<f32>
+}
+
+// CHECK-LABEL: @pow_fold_tensor
+func.func @pow_fold_tensor() -> tensor<3xf16> {
+  // CHECK: [[RES:]] ={{.*}}tosa.const{{.*}}2.56{{0*}}e+02, 1.191410e+00, -3.099610e+00{{.*}}tensor<3xf16>
+  // CHECK-NOT: tosa.pow
+  // CHECK: return [[RES]]
+  %0 = "tosa.const"() {value = dense<[4.0, 2.22, -3.1]> : tensor<3xf16>} : () -> tensor<3xf16>
+  %1 = "tosa.const"() {value = dense<[4.0, 0.22, 1.0]> : tensor<3xf16>} : () -> tensor<3xf16>
+  %2 = "tosa.pow"(%0, %1) : (tensor<3xf16>, tensor<3xf16>) -> tensor<3xf16>
+  return %2 : tensor<3xf16>
+}
+
+// CHECK-LABEL: @pow_fold_overflow
+func.func @pow_fold_overflow() -> tensor<2xf16> {
+  // CHECK: [[RES:]] ={{.*}}tosa.const{{.*}}0x7C00, 0xFC00{{.*}}tensor<2xf16>
+  // CHECK-NOT: tosa.pow
+  // CHECK: return [[RES]]
+  %0 = "tosa.const"() {value = dense<[65500.0, -65500.0]> : tensor<2xf16>} : () -> tensor<2xf16>
+  %1 = "tosa.const"() {value = dense<[2.0, 3.0]> : tensor<2xf16>} : () -> tensor<2xf16>
+  %2 = "tosa.pow"(%0, %1) : (tensor<2xf16>, tensor<2xf16>) -> tensor<2xf16>
+  return %2 : tensor<2xf16>
+}
+
+// CHECK-LABEL: @pow_fold_underflow
+func.func @pow_fold_underflow() -> tensor<2xf16> {
+  // CHECK: [[RES:]] ={{.*}}tosa.const{{.*}}[0.0{{0*}}e+00, -0.0{{0*}}e+00{{.*}}tensor<2xf16>
+  // CHECK-NOT: tosa.pow
+  // CHECK: return [[RES]]
+  %0 = "tosa.const"() {value = dense<[0.000001, -0.000001]> : tensor<2xf16>} : () -> tensor<2xf16>
+  %1 = "tosa.const"() {value = dense<[10.0, 9.0]> : tensor<2xf16>} : () -> tensor<2xf16>
+  %2 = "tosa.pow"(%0, %1) : (tensor<2xf16>, tensor<2xf16>) -> tensor<2xf16>
+  return %2 : tensor<2xf16>
+}
+
+// CHECK-LABEL: @pow_fold_nan_cases
+func.func @pow_fold_nan_cases() -> tensor<3xf32> {
+  // CHECK: [[RES:]] ={{.*}}tosa.const{{.*}}<0x7FC00000>{{.*}}tensor<3xf32>
+  // CHECK-NOT: tosa.pow
+  // CHECK: return [[RES]]
+  %0 = "tosa.const"() {value = dense<[0.0, -1.25, 0x7FC00000]> : tensor<3xf32>} : () -> tensor<3xf32>
+  %1 = "tosa.const"() {value = dense<[0.0, 0.745, 2.0]> : tensor<3xf32>} : () -> tensor<3xf32>
+  %2 = "tosa.pow"(%0, %1) : (tensor<3xf32>, tensor<3xf32>) -> tensor<3xf32>
+  return %2 : tensor<3xf32>
+}
+
+// CHECK-LABEL: @pow_fold_tensor_broadcast_exp
+func.func @pow_fold_tensor_broadcast_exp() -> tensor<3xf16> {
+  // CHECK: [[RES:]] ={{.*}}tosa.const{{.*}}1.6{{0*}}e+01, 4.929690e+00, 9.609370e+00{{.*}}tensor<3xf16>
+  // CHECK-NOT: tosa.pow
+  // CHECK: return [[RES]]
+  %0 = "tosa.const"() {value = dense<[4.0, 2.22, -3.1]> : tensor<3xf16>} : () -> tensor<3xf16>
+  %1 = "tosa.const"() {value = dense<2.0> : tensor<1xf16>} : () -> tensor<1xf16>
+  %2 = "tosa.pow"(%0, %1) : (tensor<3xf16>, tensor<1xf16>) -> tensor<3xf16>
+  return %2 : tensor<3xf16>
+}
+
+// CHECK-LABEL: @pow_fold_tensor_broadcast_base
+func.func @pow_fold_tensor_broadcast_base() -> tensor<3xf16> {
+  // CHECK: [[RES:]] ={{.*}}tosa.const{{.*}}1.6{{0*}}e+01, 4.660160e+00, 1.166380e-01{{.*}}tensor<3xf16>
+  // CHECK-NOT: tosa.pow
+  // CHECK: return [[RES]]
+  %0 = "tosa.const"() {value = dense<[4.0, 2.22, -3.1]> : tensor<3xf16>} : () -> tensor<3xf16>
+  %1 = "tosa.const"() {value = dense<2.0> : tensor<1xf16>} : () -> tensor<1xf16>
+  %2 = "tosa.pow"(%1, %0) : (tensor<1xf16>, tensor<3xf16>) -> tensor<3xf16>
+  return %2 : tensor<3xf16>
+}
+
+// CHECK-LABEL: @pow_fold_broadcast_two_dimensions
+func.func @pow_fold_broadcast_two_dimensions() -> tensor<3x3xf32> {
+  // CHECK: [[RES:]] ={{.*}}tosa.const
+  // CHECK-SAME{LITERAL}: [[388.023529, 1.102940e+03, 2554.37329],
+  // CHECK-SAME{LITERAL}:  [75281.1328, 538664.813, 0x4A1FF040],
+  // CHECK-SAME{LITERAL}:  [24.2514629, 42.4044418, 66.4508896]]
+  // CHECK-NOT: tosa.pow
+  // CHECK: return [[RES]]
+  %0 = "tosa.const"() {value = dense<[[4.0, 5.1, 6.2]]> : tensor<1x3xf32>} : () -> tensor<1x3xf32>
+  %1 = "tosa.const"() {value = dense<[[4.3], [8.1], [2.3]]> : tensor<3x1xf32>} : () -> tensor<3x1xf32>
+  %2 = "tosa.pow"(%0, %1) : (tensor<1x3xf32>, tensor<3x1xf32>) -> tensor<3x3xf32>
+  return %2 : tensor<3x3xf32>
+}