Skip to content

Commit 954de25

Browse files
committed
[MLIR] TilingInterface: Avoid map when tile divides iteration domain
Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D131080
1 parent ea50901 commit 954de25

File tree

2 files changed

+28
-10
lines changed

2 files changed

+28
-10
lines changed

mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,20 @@ static bool isPermutation(ArrayRef<unsigned> interchange) {
9090
// TileUsingSCFForOp pattern implementation.
9191
//===----------------------------------------------------------------------===//
9292

93+
// Check if `stride` evenly divides the trip count `size - offset`.
94+
static bool tileDividesIterationDomain(Range loopRange) {
95+
Optional<int64_t> offsetAsInt = getConstantIntValue(loopRange.offset);
96+
if (!offsetAsInt)
97+
return false;
98+
Optional<int64_t> sizeAsInt = getConstantIntValue(loopRange.size);
99+
if (!sizeAsInt)
100+
return false;
101+
Optional<int64_t> strideAsInt = getConstantIntValue(loopRange.stride);
102+
if (!strideAsInt)
103+
return false;
104+
return ((sizeAsInt.value() - offsetAsInt.value()) % strideAsInt.value() == 0);
105+
}
106+
93107
/// Generate an empty loop nest that represents the tiled loop nest shell.
94108
/// - `loopRanges` specifies the lb, ub and step of the untiled iteration space.
95109
/// - `tileSizeVals` is the tile sizes to use. Zero represent untiled loops.
@@ -134,9 +148,15 @@ generateTileLoopNest(OpBuilder &builder, Location loc,
134148
loc, offset, size, tileSizeVals[loopRange.index()], ValueRange{},
135149
[&](OpBuilder &bodyBuilder, Location bodyLoc, Value iv,
136150
ValueRange /*iterArgs*/) {
137-
Value boundedTileSize = builder.create<AffineMinOp>(
138-
bodyLoc, minMap,
139-
ValueRange{iv, tileSizeVals[loopRange.index()], size});
151+
bool canAvoidMap = tileDividesIterationDomain(
152+
Range{loopRange.value().offset, loopRange.value().size,
153+
tileSizeVals[loopRange.index()]});
154+
Value boundedTileSize =
155+
(canAvoidMap)
156+
? tileSizeVals[loopRange.index()]
157+
: builder.create<AffineMinOp>(
158+
bodyLoc, minMap,
159+
ValueRange{iv, tileSizeVals[loopRange.index()], size});
140160
sizes[loopRange.index()] = boundedTileSize;
141161
builder.create<scf::YieldOp>(loc);
142162
});

mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ func.func @multi_result(%arg0 : tensor<128x200x300xf32>) -> (tensor<128x300x200x
101101
return %0#0, %0#1 : tensor<128x300x200xf32>, tensor<300x128x200xf32>
102102
}
103103
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)>
104-
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (20, -d0 + s1)>
105104
// CHECK: func.func @multi_result(
106105
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<128x200x300xf32>)
107106
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
@@ -116,20 +115,19 @@ func.func @multi_result(%arg0 : tensor<128x200x300xf32>) -> (tensor<128x300x200x
116115
// CHECK: %[[TS_Y:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[C10]], %[[C128]]]
117116
// CHECK: %[[INNER:[a-zA-Z0-9]+]]:2 = scf.for %[[IV1:[a-zA-Z0-9]+]] = %[[C0]] to %[[C300]] step %[[C20]]
118117
// CHECK-SAME: iter_args(%[[ARG3:[a-zA-Z0-9]+]] = %[[ARG1]], %[[ARG4:[a-zA-Z0-9]+]] = %[[ARG2]])
119-
// CHECK: %[[TS_X:.+]] = affine.min #[[MAP1]](%[[IV1]])[%[[C20]], %[[C300]]]
120118
// CHECK-DAG: %[[ARG_TILE:.+]] = tensor.extract_slice %[[ARG0]]
121-
// CHECK-SAME: [%[[IV0]], 0, %[[IV1]]] [%[[TS_Y]], 200, %[[TS_X]]] [1, 1, 1]
119+
// CHECK-SAME: [%[[IV0]], 0, %[[IV1]]] [%[[TS_Y]], 200, 20] [1, 1, 1]
122120
// CHECK-DAG: %[[INIT0_TILE:.+]] = tensor.extract_slice %[[ARG3]]
123-
// CHECK-SAME: [%[[IV0]], %[[IV1]], 0] [%[[TS_Y]], %[[TS_X]], 200] [1, 1, 1]
121+
// CHECK-SAME: [%[[IV0]], %[[IV1]], 0] [%[[TS_Y]], 20, 200] [1, 1, 1]
124122
// CHECK-DAG: %[[INIT1_TILE:.+]] = tensor.extract_slice %[[ARG4]]
125-
// CHECK-SAME: [%[[IV1]], %[[IV0]], 0] [%[[TS_X]], %[[TS_Y]], 200] [1, 1, 1]
123+
// CHECK-SAME: [%[[IV1]], %[[IV0]], 0] [20, %[[TS_Y]], 200] [1, 1, 1]
126124
// CHECK: %[[RESULT_TILE:.+]]:2 = linalg.generic
127125
// CHECK-SAME: ins(%[[ARG_TILE]] :
128126
// CHECK-SAME: outs(%[[INIT0_TILE]], %[[INIT1_TILE]] :
129127
// CHECK: %[[UPDATE0:.+]] = tensor.insert_slice %[[RESULT_TILE]]#0 into %[[ARG3]]
130-
// CHECK-SAME: [%[[IV0]], %[[IV1]], 0] [%[[TS_Y]], %[[TS_X]], 200] [1, 1, 1]
128+
// CHECK-SAME: [%[[IV0]], %[[IV1]], 0] [%[[TS_Y]], 20, 200] [1, 1, 1]
131129
// CHECK: %[[UPDATE1:.+]] = tensor.insert_slice %[[RESULT_TILE]]#1 into %[[ARG4]]
132-
// CHECK-SAME: [%[[IV1]], %[[IV0]], 0] [%[[TS_X]], %[[TS_Y]], 200] [1, 1, 1]
130+
// CHECK-SAME: [%[[IV1]], %[[IV0]], 0] [20, %[[TS_Y]], 200] [1, 1, 1]
133131
// CHECK: scf.yield %[[UPDATE0]], %[[UPDATE1]]
134132
// CHECK: scf.yield %[[INNER]]#0, %[[INNER]]#1
135133
// CHECK: return %[[OUTER]]#0, %[[OUTER]]#1

0 commit comments

Comments
 (0)