Merge pull request #407 from Xilinx/matthias.fix_non_monotonic_slice_params

mgehre-amd · web-flow · commit 573742add912 · 2024-12-03T15:33:54.000+01:00
computeSliceParameters: use full slice if affine exprs are non-monotonic
diff --git a/mlir/include/mlir/IR/AffineExpr.h b/mlir/include/mlir/IR/AffineExpr.h
@@ -110,6 +110,11 @@ class AffineExpr {
   /// floordiv, ceildiv, and mod is only allowed w.r.t constants.
   bool isPureAffine() const;
 
+  /// Returns true if this expression is monotonicically increasing with respect
+  /// to the AffineDimExprs, i.e. increasing the value of any AffineDimExpr will
+  /// never decrease the value of the result.
+  bool isMonotonicallyIncreasing() const;
+
   /// Returns the greatest known integral divisor of this affine expression. The
   /// result is always positive.
   int64_t getLargestKnownDivisor() const;
diff --git a/mlir/include/mlir/IR/AffineMap.h b/mlir/include/mlir/IR/AffineMap.h
@@ -364,6 +364,10 @@ class AffineMap {
   /// Returns true if the AffineMap represents a symbol-less permutation map.
   bool isPermutation() const;
 
+  // Returns true if every result is monotonically increasing.
+  // See AffineExpr::isMonotonicallyIncreasing().
+  bool isComponentWiseMonotonicallyIncreasing() const;
+
   /// Returns the map consisting of the `resultPos` subset.
   AffineMap getSubMap(ArrayRef<unsigned> resultPos) const;
 
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -616,7 +616,11 @@ computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile,
   sliceParams.strides.reserve(rank);
   for (unsigned r = 0; r < rank; ++r) {
     LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: for dim#" << r);
-    if (!isTiled(map.getSubMap({r}), tileSizes)) {
+    auto m = map.getSubMap({r});
+    // The offset & size computation below only handles the case when
+    // the map is monotonically increasing, i.e. the min and max values are
+    // attained at the lower and upper bounds of the iteration domain.
+    if (!isTiled(m, tileSizes) || !m.isComponentWiseMonotonicallyIncreasing()) {
       sliceParams.offsets.push_back(builder.getIndexAttr(0));
       OpFoldResult dim = createFoldedDimOp(builder, loc, valueToTile, r);
       sliceParams.sizes.push_back(dim);
@@ -628,7 +632,6 @@ computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile,
 
     // Tiling creates a new slice at the proper index, the slice step is 1
     // (i.e. the op does not subsample, stepping occurs in the loop).
-    auto m = map.getSubMap({r});
     LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: submap: " << m << "\n");
     IRRewriter rewriter(builder);
     OpFoldResult offset = makeComposedFoldedAffineApply(rewriter, loc, m, lbs);
diff --git a/mlir/lib/IR/AffineExpr.cpp b/mlir/lib/IR/AffineExpr.cpp
@@ -239,6 +239,42 @@ bool AffineExpr::isPureAffine() const {
   llvm_unreachable("Unknown AffineExpr");
 }
 
+static bool isNonNegativeConstant(AffineExpr expr) {
+  auto constant = dyn_cast<AffineConstantExpr>(expr);
+  return constant && constant.getValue() >= 0;
+}
+
+bool AffineExpr::isMonotonicallyIncreasing() const {
+  switch (getKind()) {
+  case AffineExprKind::SymbolId:
+  case AffineExprKind::DimId:
+  case AffineExprKind::Constant:
+    return true;
+  case AffineExprKind::Add: {
+    auto op = llvm::cast<AffineBinaryOpExpr>(*this);
+    return op.getLHS().isMonotonicallyIncreasing() &&
+           op.getRHS().isMonotonicallyIncreasing();
+  }
+  case AffineExprKind::Mul: {
+    // One operand must be a non-negative constant.
+    auto op = llvm::cast<AffineBinaryOpExpr>(*this);
+    return op.getLHS().isMonotonicallyIncreasing() &&
+           op.getRHS().isMonotonicallyIncreasing() &&
+           (isNonNegativeConstant(op.getLHS()) ||
+            isNonNegativeConstant(op.getRHS()));
+  }
+  case AffineExprKind::FloorDiv:
+  case AffineExprKind::CeilDiv: {
+    auto op = llvm::cast<AffineBinaryOpExpr>(*this);
+    return op.getLHS().isMonotonicallyIncreasing() &&
+           isNonNegativeConstant(op.getRHS());
+  }
+  case AffineExprKind::Mod:
+    return false;
+  }
+  llvm_unreachable("Unknown AffineExpr");
+}
+
 // Returns the greatest known integral divisor of this affine expression.
 int64_t AffineExpr::getLargestKnownDivisor() const {
   AffineBinaryOpExpr binExpr(nullptr);
diff --git a/mlir/lib/IR/AffineMap.cpp b/mlir/lib/IR/AffineMap.cpp
@@ -628,6 +628,11 @@ bool AffineMap::isPermutation() const {
   return isProjectedPermutation();
 }
 
+bool AffineMap::isComponentWiseMonotonicallyIncreasing() const {
+  return all_of(getResults(),
+                [](auto expr) { return expr.isMonotonicallyIncreasing(); });
+}
+
 AffineMap AffineMap::getSubMap(ArrayRef<unsigned> resultPos) const {
   SmallVector<AffineExpr, 4> exprs;
   exprs.reserve(resultPos.size());
diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir
@@ -167,3 +167,35 @@ module attributes {transform.with_named_sequence} {
     transform.yield
   }
 }
+
+// -----
+
+// CHECK-LABEL: func @non_monotonic_affine_expr
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?xf32>
+func.func @non_monotonic_affine_expr(%arg0 : tensor<?xf32>) -> tensor<?xf32> {
+  %c0 = arith.constant 0 : index
+  %0 = tensor.dim %arg0, %c0 : tensor<?xf32>
+  %empty = tensor.empty(%0) : tensor<?xf32>
+
+  // CHECK: scf.for
+  // CHECK: %[[SIZE:[a-zA-Z0-9_]+]] = tensor.dim %[[ARG0]],
+  // CHECK: tensor.extract_slice %[[ARG0]][0] [%[[SIZE]]] [1] : tensor<?xf32> to tensor<?xf32>
+  %generic = linalg.generic
+    {indexing_maps = [affine_map<(d0) -> (d0 mod 3)>,
+                      affine_map<(d0) -> (d0)>],
+     iterator_types = ["parallel"]}
+    ins(%arg0: tensor<?xf32>)
+    outs(%empty : tensor<?xf32>) {
+    ^bb0(%in : f32, %out: f32):
+      linalg.yield %in : f32
+    } -> tensor<?xf32>
+  return %generic : tensor<?xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %1, %loop = transform.structured.tile_using_for %0 tile_sizes [100] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+    transform.yield
+  }
+}