Skip to content

Commit 876704f

Browse files
Add method to normalize scf.forall op.
1 parent c3e8ca9 commit 876704f

File tree

10 files changed

+199
-81
lines changed

10 files changed

+199
-81
lines changed

mlir/include/mlir/Dialect/SCF/Utils/Utils.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,13 @@ scf::ForallOp fuseIndependentSiblingForallLoops(scf::ForallOp target,
195195
scf::ForOp fuseIndependentSiblingForLoops(scf::ForOp target, scf::ForOp source,
196196
RewriterBase &rewriter);
197197

198+
/// Normalize an `scf.forall` operation. Returns `failure()`if normalization fails.
199+
// On `success()` returns the
200+
/// newly created operation with all uses of the original operation replaced
201+
/// with results of the new operation.
202+
FailureOr<scf::ForallOp> normalizeForallOp(RewriterBase &rewriter,
203+
scf::ForallOp forallOp);
204+
198205
} // namespace mlir
199206

200207
#endif // MLIR_DIALECT_SCF_UTILS_UTILS_H_

mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp

Lines changed: 100 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "mlir/Dialect/Affine/IR/AffineOps.h"
1414
#include "mlir/Dialect/Arith/IR/Arith.h"
15+
#include "mlir/Dialect/Arith/Utils/Utils.h"
1516
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
1617
#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
1718
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
@@ -3151,6 +3152,94 @@ void transform::TileUsingForallOp::build(OpBuilder &builder,
31513152
/*mapping=*/mapping);
31523153
}
31533154

3155+
/// Given `lbs`, `ubs` and `steps` of loops, return (for each loop), the
3156+
/// normalized upper bound.
3157+
static SmallVector<OpFoldResult>
3158+
normalizeUpperBounds(RewriterBase &rewriter, Location loc,
3159+
ArrayRef<OpFoldResult> lbs, ArrayRef<OpFoldResult> ubs,
3160+
ArrayRef<OpFoldResult> steps) {
3161+
AffineExpr s0, s1, s2;
3162+
bindSymbols(rewriter.getContext(), s0, s1, s2);
3163+
AffineExpr normalizedUbExpr = (s1 - s0).ceilDiv(s2);
3164+
SmallVector<OpFoldResult> normalizedUbs;
3165+
for (auto [lb, ub, step] : llvm::zip_equal(lbs, ubs, steps)) {
3166+
OpFoldResult normalizedUb = affine::makeComposedFoldedAffineApply(
3167+
rewriter, loc, normalizedUbExpr, {lb, ub, step});
3168+
normalizedUbs.push_back(normalizedUb);
3169+
}
3170+
return normalizedUbs;
3171+
}
3172+
3173+
/// When a loop is normalized, the uses of the induction variable within the
3174+
/// loop need to replaced with `original_lb + old_iv * original_step`.
3175+
static SmallVector<Value> denormalizeIndVar(RewriterBase &rewriter,
3176+
Location loc, ValueRange ivs,
3177+
ArrayRef<OpFoldResult> lbs,
3178+
ArrayRef<OpFoldResult> steps) {
3179+
AffineExpr s0, s1;
3180+
AffineExpr d0;
3181+
bindSymbols(rewriter.getContext(), s0, s1);
3182+
bindDims(rewriter.getContext(), d0);
3183+
AffineExpr denormExpr = s0 + d0 * s1;
3184+
SmallVector<Value> denormalizedIvs;
3185+
3186+
for (auto [iv, lb, step] : llvm::zip_equal(ivs, lbs, steps)) {
3187+
OpFoldResult denormValue = affine::makeComposedFoldedAffineApply(
3188+
rewriter, loc, denormExpr, ArrayRef<OpFoldResult>{iv, lb, step});
3189+
denormalizedIvs.push_back(
3190+
getValueOrCreateConstantIndexOp(rewriter, loc, denormValue));
3191+
}
3192+
return denormalizedIvs;
3193+
}
3194+
3195+
/// Given a `scf.forall` loop return a loop op with the loop bounds
3196+
/// normalized.
3197+
/// TODO: Replace this with a general utility to normalize `scf.forall`.
3198+
/// At the time of writing, this wasnt done since adding this to `scf`
3199+
/// dialect would disallow using of `affine.apply` operations due
3200+
/// to cyclic dependencies. To avoid churn in lit tests
3201+
/// with the change this was added with, defer that to a follow up.
3202+
static scf::ForallOp normalizeForallLoopOp(RewriterBase &rewriter,
3203+
scf::ForallOp loop) {
3204+
SmallVector<OpFoldResult> lbs = loop.getMixedLowerBound();
3205+
SmallVector<OpFoldResult> ubs = loop.getMixedUpperBound();
3206+
SmallVector<OpFoldResult> steps = loop.getMixedStep();
3207+
3208+
if (llvm::all_of(
3209+
lbs, [](OpFoldResult ofr) { return isConstantIntValue(ofr, 0); }) &&
3210+
llvm::all_of(
3211+
steps, [](OpFoldResult ofr) { return isConstantIntValue(ofr, 1); })) {
3212+
return loop;
3213+
}
3214+
3215+
Location loc = loop.getLoc();
3216+
SmallVector<OpFoldResult> normalizedUbs =
3217+
normalizeUpperBounds(rewriter, loc, lbs, ubs, steps);
3218+
SmallVector<OpFoldResult> normalizedLbs(normalizedUbs.size(),
3219+
rewriter.getIndexAttr(0));
3220+
SmallVector<OpFoldResult> normalizedSteps(normalizedUbs.size(),
3221+
rewriter.getIndexAttr(1));
3222+
3223+
auto normalizedForallOp = rewriter.create<scf::ForallOp>(
3224+
loc, normalizedLbs, normalizedUbs, normalizedSteps, loop.getOutputs(),
3225+
loop.getMapping(), [](OpBuilder &, Location, ValueRange) {});
3226+
3227+
auto normalizedLoopIvs = normalizedForallOp.getInductionVars();
3228+
OpBuilder::InsertionGuard g(rewriter);
3229+
Block *normalizedLoopBlock = normalizedForallOp.getBody();
3230+
rewriter.setInsertionPointToStart(normalizedLoopBlock);
3231+
3232+
SmallVector<Value> argValues =
3233+
denormalizeIndVar(rewriter, loc, normalizedLoopIvs, lbs, steps);
3234+
argValues.append(normalizedForallOp.getRegionIterArgs().begin(),
3235+
normalizedForallOp.getRegionIterArgs().end());
3236+
Block *origLoopBlock = loop.getBody();
3237+
rewriter.mergeBlocks(origLoopBlock, normalizedLoopBlock, argValues);
3238+
3239+
rewriter.replaceOp(loop, normalizedForallOp);
3240+
return normalizedForallOp;
3241+
}
3242+
31543243
DiagnosedSilenceableFailure transform::tileToForallOpImpl(
31553244
RewriterBase &rewriter, transform::TransformState &state,
31563245
TransformOpInterface transformOp, Operation *target,
@@ -3172,23 +3261,6 @@ DiagnosedSilenceableFailure transform::tileToForallOpImpl(
31723261
if (!mixedNumThreads.empty()) {
31733262
options.setNumThreads(mixedNumThreads);
31743263
} else {
3175-
SmallVector<Range> loopRanges = tileableOp.getIterationDomain(rewriter);
3176-
unsigned nLoops = loopRanges.size();
3177-
SmallVector<OpFoldResult> numThreads;
3178-
numThreads.reserve(nLoops);
3179-
AffineExpr s0, s1;
3180-
bindSymbols(rewriter.getContext(), s0, s1);
3181-
AffineExpr divExpr = s0.ceilDiv(s1);
3182-
for (int i = 0, e = std::min(mixedTileSizes.size(), loopRanges.size());
3183-
i < e; ++i) {
3184-
OpFoldResult numTiles = mixedTileSizes[i];
3185-
if (!isConstantIntValue(numTiles, 0))
3186-
numTiles = affine::makeComposedFoldedAffineApply(
3187-
rewriter, tileableOp.getLoc(), divExpr,
3188-
{loopRanges[i].size, numTiles});
3189-
numThreads.push_back(numTiles);
3190-
}
3191-
options.setNumThreads(numThreads);
31923264
options.setTileSizes(mixedTileSizes);
31933265
}
31943266
if (mapping) {
@@ -3199,9 +3271,20 @@ DiagnosedSilenceableFailure transform::tileToForallOpImpl(
31993271

32003272
if (failed(maybeTilingResult))
32013273
return transformOp.emitDefaultSilenceableFailure(tileableOp);
3274+
32023275
rewriter.replaceOp(tileableOp, maybeTilingResult->replacements);
32033276

32043277
tilingResult = *maybeTilingResult;
3278+
3279+
if (mixedNumThreads.empty()) {
3280+
auto generatedForallOp = cast<scf::ForallOp>(tilingResult.loops.front());
3281+
OpBuilder::InsertionGuard g(rewriter);
3282+
rewriter.setInsertionPoint(generatedForallOp);
3283+
scf::ForallOp normalizedForallOp =
3284+
normalizeForallLoopOp(rewriter, generatedForallOp);
3285+
tilingResult.loops.front() = normalizedForallOp;
3286+
}
3287+
32053288
return DiagnosedSilenceableFailure::success();
32063289
}
32073290

mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,10 +217,10 @@ static OpFoldResult getBoundedTileSize(OpBuilder &b, Location loc,
217217
AffineExpr s0, s1, d0;
218218
bindDims(b.getContext(), d0);
219219
bindSymbols(b.getContext(), s0, s1);
220-
AffineMap minMap = AffineMap::get(1, 2, {s0, s1 - d0}, b.getContext());
220+
AffineMap minMap = AffineMap::get(1, 2, {s0 - d0, s1}, b.getContext());
221221
Value size = getValueOrCreateConstantIndexOp(b, loc, loopRange.size);
222222
return affine::makeComposedFoldedAffineMin(
223-
b, loc, minMap, SmallVector<OpFoldResult>{offset, tileSize, size});
223+
b, loc, minMap, SmallVector<OpFoldResult>{offset, size, tileSize});
224224
}
225225

226226
/// Returns true if the maximum tile offset `tileSize * numThreads-1` is less

mlir/lib/Dialect/SCF/Utils/Utils.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,3 +1363,37 @@ scf::ForOp mlir::fuseIndependentSiblingForLoops(scf::ForOp target,
13631363

13641364
return fusedLoop;
13651365
}
1366+
1367+
FailureOr<scf::ForallOp> mlir::normalizeForallOp(RewriterBase &rewriter,
1368+
scf::ForallOp forallOp) {
1369+
SmallVector<OpFoldResult> lbs = forallOp.getMixedLowerBound();
1370+
SmallVector<OpFoldResult> ubs = forallOp.getMixedUpperBound();
1371+
SmallVector<OpFoldResult> steps = forallOp.getMixedStep();
1372+
1373+
if (llvm::all_of(
1374+
lbs, [](OpFoldResult ofr) { return isConstantIntValue(ofr, 0); }) &&
1375+
llvm::all_of(
1376+
steps, [](OpFoldResult ofr) { return isConstantIntValue(ofr, 1); })) {
1377+
return forallOp;
1378+
}
1379+
1380+
SmallVector<OpFoldResult> newLbs, newUbs, newSteps;
1381+
for (auto [lb, ub, step] : llvm::zip_equal(lbs, ubs, steps)) {
1382+
LoopParams normalizedLoopParams =
1383+
emitNormalizedLoopBounds(rewriter, forallOp.getLoc(), lb, ub, step);
1384+
newLbs.push_back(normalizedLoopParams.lowerBound);
1385+
newUbs.push_back(normalizedLoopParams.upperBound);
1386+
newSteps.push_back(normalizedLoopParams.step);
1387+
}
1388+
1389+
auto normalizedForallOp = rewriter.create<scf::ForallOp>(
1390+
forallOp.getLoc(), newLbs, newUbs, newSteps, forallOp.getOutputs(),
1391+
forallOp.getMapping(), [](OpBuilder &, Location, ValueRange) {});
1392+
1393+
rewriter.inlineRegionBefore(forallOp.getBodyRegion(),
1394+
normalizedForallOp.getBodyRegion(),
1395+
normalizedForallOp.getBodyRegion().begin());
1396+
1397+
rewriter.replaceAllOpUsesWith(forallOp, normalizedForallOp);
1398+
return success();
1399+
}

mlir/test/Dialect/Linalg/tile-tensors.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ module attributes {transform.with_named_sequence} {
119119

120120
// -----
121121

122-
// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
122+
// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
123123

124124
// CHECK: fold_extract_slice
125125
// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<?x128xf32>

mlir/test/Dialect/Linalg/tile-to-forall.mlir

Lines changed: 31 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -196,10 +196,10 @@ func.func @matmul_tile_size_dynamic(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C
196196
// CHECK: %[[NT0:.+]] = affine.apply #[[$map0]]()[%[[M]]]
197197
// CHECK: %[[NT1:.+]] = affine.apply #[[$map1]]()[%[[N]]]
198198
// CHECK: scf.forall (%[[IV0:.+]], %[[IV1:.+]]) in (%[[NT0]], %[[NT1]]) shared_outs(%[[C_BLK:.*]] = %[[C]])
199-
// CHECK: %[[TS0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]]
200-
// CHECK: %[[TS1:.+]] = affine.min #[[$map4]](%[[IV1]])[%[[N]]]
201-
// CHECK: %[[LB0:.+]] = affine.apply #[[$map5]](%[[IV0]])
202-
// CHECK: %[[LB1:.+]] = affine.apply #[[$map6]](%[[IV1]])
199+
// CHECK-DAG: %[[TS0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]]
200+
// CHECK-DAG: %[[TS1:.+]] = affine.min #[[$map4]](%[[IV1]])[%[[N]]]
201+
// CHECK-DAG: %[[LB0:.+]] = affine.apply #[[$map5]](%[[IV0]])
202+
// CHECK-DAG: %[[LB1:.+]] = affine.apply #[[$map6]](%[[IV1]])
203203
// CHECK: tensor.extract_slice %[[A]]
204204
// CHECK: tensor.extract_slice %[[B]]
205205
// CHECK: tensor.extract_slice %[[C_BLK]]
@@ -233,11 +233,11 @@ module attributes {transform.with_named_sequence} {
233233
// CHECK-SAME: %[[C:[0-9a-z]+]]: tensor
234234
func.func @matmul_tile_size_static(%A: tensor<100x200xf32>, %B: tensor<200x300xf32>, %C: tensor<100x300xf32>) -> tensor<100x300xf32> {
235235
// CHECK: scf.forall (%[[IV0:.+]], %[[IV1:.+]]) in (10, 15) shared_outs(%[[C_BLK:.*]] = %[[C]])
236-
// CHECK: %[[TS:.+]] = affine.min #[[$map0]](%[[IV1]])
236+
// CHECK-DAG: %[[TS:.+]] = affine.min #[[$map0]](%[[IV1]])
237+
// CHECK-DAG: %[[LB0:.+]] = affine.apply #[[$map2]](%[[IV0]])
238+
// CHECK-DAG: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]])
237239
// CHECK-NOT: affine.max
238240
// CHECK-NOT: affine.min
239-
// CHECK: %[[LB0:.+]] = affine.apply #[[$map2]](%[[IV0]])
240-
// CHECK: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]])
241241
// CHECK: %[[tA:.+]] = tensor.extract_slice %[[A]][%[[LB0]], 0] [10, 200] [1, 1] :
242242
// CHECK: %[[tB:.+]] = tensor.extract_slice %[[B]][0, %[[LB1]]] [200, %[[TS]]] [1, 1] :
243243
// CHECK: %[[tC:.+]] = tensor.extract_slice %[[C_BLK]][%[[LB0]], %[[LB1]]] [10, %[[TS]]] [1, 1] :
@@ -452,10 +452,9 @@ module attributes {transform.with_named_sequence} {
452452
// CHECK-DAG: #[[$map0:.+]] = affine_map<()[s0] -> (s0 ceildiv 10)>
453453
// CHECK-DAG: #[[$map1:.+]] = affine_map<()[s0] -> (s0 ceildiv 20)>
454454
// CHECK-DAG: #[[$map2:.+]] = affine_map<(d0)[s0] -> (d0 * -10 + s0, 10)>
455-
// CHECK-DAG: #[[$map3:.+]] = affine_map<(d0) -> (0, d0)>
456-
// CHECK-DAG: #[[$map4:.+]] = affine_map<(d0)[s0] -> (d0 * -20 + s0, 20)>
457-
// CHECK-DAG: #[[$map5:.+]] = affine_map<(d0) -> (d0 * 10)>
458-
// CHECK-DAG: #[[$map6:.+]] = affine_map<(d0) -> (d0 * 20)>
455+
// CHECK-DAG: #[[$map3:.+]] = affine_map<(d0)[s0] -> (d0 * -20 + s0, 20)>
456+
// CHECK-DAG: #[[$map4:.+]] = affine_map<(d0) -> (d0 * 10)>
457+
// CHECK-DAG: #[[$map5:.+]] = affine_map<(d0) -> (d0 * 20)>
459458

460459
// CHECK-LABEL: matmul_tile_size_dynamic(
461460
// CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<?x?xf32>
@@ -464,18 +463,16 @@ module attributes {transform.with_named_sequence} {
464463
func.func @matmul_tile_size_dynamic(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C: tensor<?x?xf32>) -> tensor<?x?xf32> {
465464
// CHECK: %[[c1:.*]] = arith.constant 1 : index
466465
// CHECK: %[[c0:.*]] = arith.constant 0 : index
467-
// CHECK: %[[M:.+]] = tensor.dim %[[A]], %[[c0]] :
468-
// CHECK: %[[N:.+]] = tensor.dim %[[B]], %[[c1]] :
469-
// CHECK: %[[NT0:.+]] = affine.apply #map()[%[[M]]]
470-
// CHECK: %[[NT1:.+]] = affine.apply #map1()[%[[N]]]
471-
// CHECK: %[[K:.+]] = tensor.dim %[[A]], %[[c1]] :
466+
// CHECK-DAG: %[[M:.+]] = tensor.dim %[[A]], %[[c0]] :
467+
// CHECK-DAG: %[[N:.+]] = tensor.dim %[[B]], %[[c1]] :
468+
// CHECK-DAG: %[[NT0:.+]] = affine.apply #map()[%[[M]]]
469+
// CHECK-DAG: %[[NT1:.+]] = affine.apply #map1()[%[[N]]]
470+
// CHECK-DAG: %[[K:.+]] = tensor.dim %[[A]], %[[c1]] :
472471
// CHECK: scf.forall (%[[IV0:.+]], %[[IV1:.+]]) in (%[[NT0]], %[[NT1]]) shared_outs(%[[C_BLK:.*]] = %[[C]])
473-
// CHECK: %[[TSMIN0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]]
474-
// CHECK: %[[TS0:.+]] = affine.max #[[$map3]](%[[TSMIN0]])
475-
// CHECK: %[[TSMIN1:.+]] = affine.min #[[$map4]](%[[IV1]])[%[[N]]]
476-
// CHECK: %[[TS1:.+]] = affine.max #[[$map3]](%[[TSMIN1]])
477-
// CHECK: %[[LB0:.+]] = affine.apply #[[$map5]](%[[IV0]])
478-
// CHECK: %[[LB1:.+]] = affine.apply #[[$map6]](%[[IV1]])
472+
// CHECK-DAG: %[[TS0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]]
473+
// CHECK-DAG: %[[TS1:.+]] = affine.min #[[$map3]](%[[IV1]])[%[[N]]]
474+
// CHECK-DAG: %[[LB0:.+]] = affine.apply #[[$map4]](%[[IV0]])
475+
// CHECK-DAG: %[[LB1:.+]] = affine.apply #[[$map5]](%[[IV1]])
479476
// CHECK: tensor.extract_slice %[[A]][%[[LB0]], 0] [%[[TS0]], %[[K]]] [1, 1] :
480477
// CHECK: tensor.extract_slice %[[B]][0, %[[LB1]]] [%[[K]], %[[TS1]]] [1, 1] :
481478
// CHECK: tensor.extract_slice %[[C_BLK]][%[[LB0]], %[[LB1]]] [%[[TS0]], %[[TS1]]] [1, 1] :
@@ -523,10 +520,9 @@ module attributes {transform.with_named_sequence} {
523520
// CHECK-DAG: #[[$map0:.+]] = affine_map<()[s0] -> (s0 ceildiv 10)>
524521
// CHECK-DAG: #[[$map1:.+]] = affine_map<()[s0] -> (s0 ceildiv 20)>
525522
// CHECK-DAG: #[[$map2:.+]] = affine_map<(d0)[s0] -> (d0 * -10 + s0, 10)>
526-
// CHECK-DAG: #[[$map3:.+]] = affine_map<(d0) -> (0, d0)>
527-
// CHECK-DAG: #[[$map4:.+]] = affine_map<(d0)[s0] -> (d0 * -20 + s0, 20)>
528-
// CHECK-DAG: #[[$map5:.+]] = affine_map<(d0) -> (d0 * 10)>
529-
// CHECK-DAG: #[[$map6:.+]] = affine_map<(d0) -> (d0 * 20)>
523+
// CHECK-DAG: #[[$map3:.+]] = affine_map<(d0)[s0] -> (d0 * -20 + s0, 20)>
524+
// CHECK-DAG: #[[$map4:.+]] = affine_map<(d0) -> (d0 * 10)>
525+
// CHECK-DAG: #[[$map5:.+]] = affine_map<(d0) -> (d0 * 20)>
530526

531527
// CHECK-LABEL: matmul_tile_size_dynamic(
532528
// CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<?x?xf32>
@@ -535,18 +531,16 @@ module attributes {transform.with_named_sequence} {
535531
func.func @matmul_tile_size_dynamic(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C: tensor<?x?xf32>) -> tensor<?x?xf32> {
536532
// CHECK: %[[c1:.*]] = arith.constant 1 : index
537533
// CHECK: %[[c0:.*]] = arith.constant 0 : index
538-
// CHECK: %[[M:.+]] = tensor.dim %[[A]], %[[c0]] :
539-
// CHECK: %[[N:.+]] = tensor.dim %[[B]], %[[c1]] :
540-
// CHECK: %[[NT0:.+]] = affine.apply #map()[%[[M]]]
541-
// CHECK: %[[NT1:.+]] = affine.apply #map1()[%[[N]]]
542-
// CHECK: %[[K:.+]] = tensor.dim %[[A]], %[[c1]] :
534+
// CHECK-DAG: %[[M:.+]] = tensor.dim %[[A]], %[[c0]] :
535+
// CHECK-DAG: %[[N:.+]] = tensor.dim %[[B]], %[[c1]] :
536+
// CHECK-DAG: %[[NT0:.+]] = affine.apply #map()[%[[M]]]
537+
// CHECK-DAG: %[[NT1:.+]] = affine.apply #map1()[%[[N]]]
538+
// CHECK-DAG: %[[K:.+]] = tensor.dim %[[A]], %[[c1]] :
543539
// CHECK: scf.forall (%[[IV0:.+]], %[[IV1:.+]]) in (%[[NT0]], %[[NT1]]) shared_outs(%[[C_BLK:.*]] = %[[C]])
544-
// CHECK: %[[TSMIN0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]]
545-
// CHECK: %[[TS0:.+]] = affine.max #[[$map3]](%[[TSMIN0]])
546-
// CHECK: %[[TSMIN1:.+]] = affine.min #[[$map4]](%[[IV1]])[%[[N]]]
547-
// CHECK: %[[TS1:.+]] = affine.max #[[$map3]](%[[TSMIN1]])
548-
// CHECK: %[[LB0:.+]] = affine.apply #[[$map5]](%[[IV0]])
549-
// CHECK: %[[LB1:.+]] = affine.apply #[[$map6]](%[[IV1]])
540+
// CHECK-DAG: %[[TS0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]]
541+
// CHECK-DAG: %[[TS1:.+]] = affine.min #[[$map3]](%[[IV1]])[%[[N]]]
542+
// CHECK-DAG: %[[LB0:.+]] = affine.apply #[[$map4]](%[[IV0]])
543+
// CHECK-DAG: %[[LB1:.+]] = affine.apply #[[$map5]](%[[IV1]])
550544
// CHECK: tensor.extract_slice %[[A]][%[[LB0]], 0] [%[[TS0]], %[[K]]] [1, 1] :
551545
// CHECK: tensor.extract_slice %[[B]][0, %[[LB1]]] [%[[K]], %[[TS1]]] [1, 1] :
552546
// CHECK: tensor.extract_slice %[[C_BLK]][%[[LB0]], %[[LB1]]] [%[[TS0]], %[[TS1]]] [1, 1] :

mlir/test/Dialect/Linalg/transform-op-tile.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ module {
184184
// CHECK: %[[VS:.*]] = vector.vscale
185185
// CHECK: %[[STEP:.*]] = arith.muli %[[VEC_SIZE]], %[[VS]] : index
186186
// CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[DIM]] step %[[STEP]] iter_args(%[[VAL:.*]] = %[[ARG_2]]) -> (tensor<?xf32>) {
187-
// CHECK: %[[SIZE:.*]] = affine.min affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>(%[[IV]])[%[[STEP]], %[[DIM]]]
187+
// CHECK: %[[SIZE:.*]] = affine.min affine_map<(d0)[s0, s1] -> (-d0 + s0, s1)>(%[[IV]])[%[[DIM]], %[[STEP]]]
188188
// CHECK: %[[SLICE_ARG0:.*]] = tensor.extract_slice %[[ARG_0]][%[[IV]]] [%[[SIZE]]] [1] : tensor<?xf32> to tensor<?xf32>
189189
// CHECK: %[[SLICE_ARG1:.*]] = tensor.extract_slice %[[ARG_1]][%[[IV]]] [%[[SIZE]]] [1] : tensor<?xf32> to tensor<?xf32>
190190
// CHECK: %[[SLICE_ARG2:.*]] = tensor.extract_slice %[[VAL]][%[[IV]]] [%[[SIZE]]] [1] : tensor<?xf32> to tensor<?xf32>

mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ module attributes {transform.with_named_sequence} {
428428
transform.yield
429429
}
430430
}
431-
// CHECK: #[[MAP:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)>
431+
// CHECK: #[[MAP:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 10)>
432432
// CHECK: func @matmul_sequence_fusion(
433433
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
434434
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?xf32>

0 commit comments

Comments
 (0)