Skip to content

Commit 86e3d91

Browse files
bondhugulafrederik-h
authored andcommitted
[MLIR][Affine] Fix affine data copy generation copy placement for missing memref definition check (llvm#130750)
This was exposed with the test case previously added but when performing generation with limited memory capacity.
1 parent edff463 commit 86e3d91

File tree

3 files changed

+54
-15
lines changed

3 files changed

+54
-15
lines changed

mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1774,23 +1774,37 @@ findHighestBlockForPlacement(const MemRefRegion &region, Block &block,
17741774
SmallVector<Value, 4> symbols;
17751775
cst->getValues(cst->getNumDimVars(), cst->getNumDimAndSymbolVars(), &symbols);
17761776

1777-
SmallVector<AffineForOp, 4> enclosingFors;
1778-
getAffineForIVs(*block.begin(), &enclosingFors);
1777+
SmallVector<Operation *, 4> enclosingAffineOps;
1778+
getEnclosingAffineOps(*block.begin(), &enclosingAffineOps);
17791779
// Walk up loop parents till we find an IV on which this region is
1780-
// symbolic/variant.
1781-
auto it = enclosingFors.rbegin();
1782-
for (auto e = enclosingFors.rend(); it != e; ++it) {
1780+
// symbolic/variant or we hit `hoistGuard`.
1781+
auto it = enclosingAffineOps.rbegin();
1782+
AffineForOp lastInvariantFor;
1783+
for (auto e = enclosingAffineOps.rend(); it != e; ++it) {
1784+
Operation *enclosingOp = *it;
1785+
// We can't hoist past the definition of the memref being copied.
1786+
Value memref = region.memref;
1787+
if (!memref.getParentRegion()->isAncestor(enclosingOp->getParentRegion())) {
1788+
LLVM_DEBUG(
1789+
llvm::dbgs()
1790+
<< "memref definition will end up not dominating hoist location\n");
1791+
break;
1792+
}
1793+
1794+
auto affineFor = dyn_cast<AffineForOp>(enclosingOp);
1795+
if (!affineFor)
1796+
break;
17831797
// TODO: also need to be checking this for regions symbols that
17841798
// aren't loop IVs, whether we are within their resp. defs' dominance scope.
1785-
if (llvm::is_contained(symbols, it->getInductionVar()))
1799+
if (llvm::is_contained(symbols, affineFor.getInductionVar()))
17861800
break;
1801+
lastInvariantFor = affineFor;
17871802
}
17881803

1789-
if (it != enclosingFors.rbegin()) {
1790-
auto lastInvariantIV = *std::prev(it);
1791-
*copyInPlacementStart = Block::iterator(lastInvariantIV.getOperation());
1804+
if (it != enclosingAffineOps.rbegin()) {
1805+
*copyInPlacementStart = Block::iterator(lastInvariantFor);
17921806
*copyOutPlacementStart = std::next(*copyInPlacementStart);
1793-
*copyPlacementBlock = lastInvariantIV->getBlock();
1807+
*copyPlacementBlock = lastInvariantFor->getBlock();
17941808
} else {
17951809
*copyInPlacementStart = begin;
17961810
*copyOutPlacementStart = end;

mlir/test/Dialect/Affine/affine-data-copy.mlir

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
// affine.load op in the innermost loop as a filter.
99
// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='memref-filter' | FileCheck %s --check-prefix=FILTER
1010
// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='for-memref-region' | FileCheck %s --check-prefix=MEMREF_REGION
11+
// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='capacity-kib=32' | FileCheck %s --check-prefix=LIMITED-MEM
1112

1213
// -copy-skip-non-stride-loops forces the copies to be placed right inside the
1314
// tile space loops, avoiding the sensitivity of copy placement depth to memory
@@ -23,6 +24,7 @@
2324

2425
// CHECK-LABEL: func @matmul
2526
// FILTER-LABEL: func @matmul
27+
// LIMITED-MEM-LABEL: func @matmul
2628
func.func @matmul(%A: memref<4096x4096xf32>, %B: memref<4096x4096xf32>, %C: memref<4096x4096xf32>) -> memref<4096x4096xf32> {
2729
affine.for %i = 0 to 4096 step 128 {
2830
affine.for %j = 0 to 4096 step 128 {
@@ -43,6 +45,7 @@ func.func @matmul(%A: memref<4096x4096xf32>, %B: memref<4096x4096xf32>, %C: memr
4345
}
4446
}
4547
return %C : memref<4096x4096xf32>
48+
// LIMITED-MEM: return
4649
}
4750

4851
// Buffers of size 128x128 get created here for all three matrices.
@@ -421,6 +424,7 @@ func.func @scalar_memref_copy_in_loop(%3:memref<480xi1>) {
421424
}
422425

423426
// CHECK-LABEL: func @memref_def_inside
427+
// LIMITED-MEM-LABEL: func @memref_def_inside
424428
func.func @memref_def_inside(%arg0: index) {
425429
%0 = llvm.mlir.constant(1.000000e+00 : f32) : f32
426430
// No copy generation can happen at this depth given the definition inside.
@@ -429,5 +433,17 @@ func.func @memref_def_inside(%arg0: index) {
429433
// CHECK: affine.store {{.*}} : memref<1xf32>
430434
affine.store %0, %alloc_7[0] : memref<1xf32>
431435
}
436+
437+
// With the limited capacity specified, buffer generation happens at the
438+
// innermost depth. Tests that copy-placement is proper and respects the
439+
// memref definition.
440+
441+
// LIMITED-MEM: affine.for %{{.*}} = 0 to 29
442+
// LIMITED-MEM-NEXT: memref.alloc() : memref<1xf32>
443+
// LIMITED-MEM-NEXT: memref.alloc() : memref<1xf32>
444+
// LIMITED-MEM-NEXT: affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
445+
// LIMITED-MEM-NEXT: affine.load %{{.*}}[%c0{{.*}}] : memref<1xf32>
446+
// LIMITED-MEM-NEXT: affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
447+
// LIMITED-MEM-NEXT: memref.dealloc %{{.*}} : memref<1xf32>
432448
return
433449
}

mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ struct TestAffineDataCopy
5353
*this, "for-memref-region",
5454
llvm::cl::desc("Test copy generation for a single memref region"),
5555
llvm::cl::init(false)};
56+
Option<uint64_t> clCapacityLimit{
57+
*this, "capacity-kib",
58+
llvm::cl::desc("Test copy generation enforcing a limit of capacity "
59+
"(default: unlimited)"),
60+
llvm::cl::init(UINT64_MAX)};
5661
};
5762

5863
} // namespace
@@ -73,24 +78,24 @@ void TestAffineDataCopy::runOnOperation() {
7378
auto loopNest = depthToLoops[0][0];
7479
auto innermostLoop = depthToLoops[innermostLoopIdx][0];
7580
AffineLoadOp load;
81+
// For simplicity, these options are tested on the first memref being loaded
82+
// from in the innermost loop.
7683
if (clMemRefFilter || clTestGenerateCopyForMemRegion) {
77-
// Gather MemRef filter. For simplicity, we use the first loaded memref
78-
// found in the innermost loop.
7984
for (auto &op : *innermostLoop.getBody()) {
8085
if (auto ld = dyn_cast<AffineLoadOp>(op)) {
8186
load = ld;
8287
break;
8388
}
8489
}
90+
if (!load)
91+
return;
8592
}
86-
if (!load)
87-
return;
8893

8994
AffineCopyOptions copyOptions = {/*generateDma=*/false,
9095
/*slowMemorySpace=*/0,
9196
/*fastMemorySpace=*/0,
9297
/*tagMemorySpace=*/0,
93-
/*fastMemCapacityBytes=*/32 * 1024 * 1024UL};
98+
/*fastMemCapacityBytes=*/clCapacityLimit};
9499
DenseSet<Operation *> copyNests;
95100
if (clMemRefFilter) {
96101
if (failed(affineDataCopyGenerate(loopNest, copyOptions, load.getMemRef(),
@@ -103,6 +108,10 @@ void TestAffineDataCopy::runOnOperation() {
103108
return;
104109
if (failed(generateCopyForMemRegion(region, loopNest, copyOptions, result)))
105110
return;
111+
} else if (failed(affineDataCopyGenerate(loopNest, copyOptions,
112+
/*filterMemref=*/std::nullopt,
113+
copyNests))) {
114+
return;
106115
}
107116

108117
// Promote any single iteration loops in the copy nests and simplify

0 commit comments

Comments
 (0)