-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MLIR][Affine] Fix affine data copy generation copy placement for missing memref definition check #130750
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…sing memref definition check This was exposed with the test case previously added but when performing generation with limited memory capacity.
@llvm/pr-subscribers-mlir-affine @llvm/pr-subscribers-mlir Author: Uday Bondhugula (bondhugula) ChangesThis was exposed with the test case previously added but when performing Full diff: https://github.com/llvm/llvm-project/pull/130750.diff 3 Files Affected:
diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
index 5c94ec2985c3d..b58bf3f271d47 100644
--- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
@@ -1774,23 +1774,37 @@ findHighestBlockForPlacement(const MemRefRegion ®ion, Block &block,
SmallVector<Value, 4> symbols;
cst->getValues(cst->getNumDimVars(), cst->getNumDimAndSymbolVars(), &symbols);
- SmallVector<AffineForOp, 4> enclosingFors;
- getAffineForIVs(*block.begin(), &enclosingFors);
+ SmallVector<Operation *, 4> enclosingAffineOps;
+ getEnclosingAffineOps(*block.begin(), &enclosingAffineOps);
// Walk up loop parents till we find an IV on which this region is
- // symbolic/variant.
- auto it = enclosingFors.rbegin();
- for (auto e = enclosingFors.rend(); it != e; ++it) {
+ // symbolic/variant or we hit `hoistGuard`.
+ auto it = enclosingAffineOps.rbegin();
+ AffineForOp lastInvariantFor;
+ for (auto e = enclosingAffineOps.rend(); it != e; ++it) {
+ Operation *enclosingOp = *it;
+ // We can't hoist past the definition of the memref being copied.
+ Value memref = region.memref;
+ if (!memref.getParentRegion()->isAncestor(enclosingOp->getParentRegion())) {
+ LLVM_DEBUG(
+ llvm::dbgs()
+ << "memref definition will end up not dominating hoist location\n");
+ break;
+ }
+
+ auto affineFor = dyn_cast<AffineForOp>(enclosingOp);
+ if (!affineFor)
+ break;
// TODO: also need to be checking this for regions symbols that
// aren't loop IVs, whether we are within their resp. defs' dominance scope.
- if (llvm::is_contained(symbols, it->getInductionVar()))
+ if (llvm::is_contained(symbols, affineFor.getInductionVar()))
break;
+ lastInvariantFor = affineFor;
}
- if (it != enclosingFors.rbegin()) {
- auto lastInvariantIV = *std::prev(it);
- *copyInPlacementStart = Block::iterator(lastInvariantIV.getOperation());
+ if (it != enclosingAffineOps.rbegin()) {
+ *copyInPlacementStart = Block::iterator(lastInvariantFor);
*copyOutPlacementStart = std::next(*copyInPlacementStart);
- *copyPlacementBlock = lastInvariantIV->getBlock();
+ *copyPlacementBlock = lastInvariantFor->getBlock();
} else {
*copyInPlacementStart = begin;
*copyOutPlacementStart = end;
diff --git a/mlir/test/Dialect/Affine/affine-data-copy.mlir b/mlir/test/Dialect/Affine/affine-data-copy.mlir
index 1f3aac3401bea..a1f0d952e7c63 100644
--- a/mlir/test/Dialect/Affine/affine-data-copy.mlir
+++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir
@@ -8,6 +8,7 @@
// affine.load op in the innermost loop as a filter.
// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='memref-filter' | FileCheck %s --check-prefix=FILTER
// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='for-memref-region' | FileCheck %s --check-prefix=MEMREF_REGION
+// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='capacity-kib=32' | FileCheck %s --check-prefix=LIMITED-MEM
// -copy-skip-non-stride-loops forces the copies to be placed right inside the
// tile space loops, avoiding the sensitivity of copy placement depth to memory
@@ -23,6 +24,7 @@
// CHECK-LABEL: func @matmul
// FILTER-LABEL: func @matmul
+// LIMITED-MEM-LABEL: func @matmul
func.func @matmul(%A: memref<4096x4096xf32>, %B: memref<4096x4096xf32>, %C: memref<4096x4096xf32>) -> memref<4096x4096xf32> {
affine.for %i = 0 to 4096 step 128 {
affine.for %j = 0 to 4096 step 128 {
@@ -43,6 +45,7 @@ func.func @matmul(%A: memref<4096x4096xf32>, %B: memref<4096x4096xf32>, %C: memr
}
}
return %C : memref<4096x4096xf32>
+ // LIMITED-MEM: return
}
// Buffers of size 128x128 get created here for all three matrices.
@@ -421,6 +424,7 @@ func.func @scalar_memref_copy_in_loop(%3:memref<480xi1>) {
}
// CHECK-LABEL: func @memref_def_inside
+// LIMITED-MEM-LABEL: func @memref_def_inside
func.func @memref_def_inside(%arg0: index) {
%0 = llvm.mlir.constant(1.000000e+00 : f32) : f32
// No copy generation can happen at this depth given the definition inside.
@@ -429,5 +433,17 @@ func.func @memref_def_inside(%arg0: index) {
// CHECK: affine.store {{.*}} : memref<1xf32>
affine.store %0, %alloc_7[0] : memref<1xf32>
}
+
+ // With the limited capacity specified, buffer generation happens at the
+ // innermost depth. Tests that copy-placement is proper and respects the
+ // memref definition.
+
+ // LIMITED-MEM: affine.for %{{.*}} = 0 to 29
+ // LIMITED-MEM-NEXT: memref.alloc() : memref<1xf32>
+ // LIMITED-MEM-NEXT: memref.alloc() : memref<1xf32>
+ // LIMITED-MEM-NEXT: affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
+ // LIMITED-MEM-NEXT: affine.load %{{.*}}[%c0{{.*}}] : memref<1xf32>
+ // LIMITED-MEM-NEXT: affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
+ // LIMITED-MEM-NEXT: memref.dealloc %{{.*}} : memref<1xf32>
return
}
diff --git a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
index 404f34ebee17a..d6aaa6faf94cb 100644
--- a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
@@ -53,6 +53,11 @@ struct TestAffineDataCopy
*this, "for-memref-region",
llvm::cl::desc("Test copy generation for a single memref region"),
llvm::cl::init(false)};
+ Option<uint64_t> clCapacityLimit{
+ *this, "capacity-kib",
+ llvm::cl::desc("Test copy generation enforcing a limit of capacity "
+ "(default: unlimited)"),
+ llvm::cl::init(UINT64_MAX)};
};
} // namespace
@@ -73,24 +78,24 @@ void TestAffineDataCopy::runOnOperation() {
auto loopNest = depthToLoops[0][0];
auto innermostLoop = depthToLoops[innermostLoopIdx][0];
AffineLoadOp load;
+ // For simplicity, these options are tested on the first memref being loaded
+ // from in the innermost loop.
if (clMemRefFilter || clTestGenerateCopyForMemRegion) {
- // Gather MemRef filter. For simplicity, we use the first loaded memref
- // found in the innermost loop.
for (auto &op : *innermostLoop.getBody()) {
if (auto ld = dyn_cast<AffineLoadOp>(op)) {
load = ld;
break;
}
}
+ if (!load)
+ return;
}
- if (!load)
- return;
AffineCopyOptions copyOptions = {/*generateDma=*/false,
/*slowMemorySpace=*/0,
/*fastMemorySpace=*/0,
/*tagMemorySpace=*/0,
- /*fastMemCapacityBytes=*/32 * 1024 * 1024UL};
+ /*fastMemCapacityBytes=*/clCapacityLimit};
DenseSet<Operation *> copyNests;
if (clMemRefFilter) {
if (failed(affineDataCopyGenerate(loopNest, copyOptions, load.getMemRef(),
@@ -103,6 +108,10 @@ void TestAffineDataCopy::runOnOperation() {
return;
if (failed(generateCopyForMemRegion(region, loopNest, copyOptions, result)))
return;
+ } else if (failed(affineDataCopyGenerate(loopNest, copyOptions,
+ /*filterMemref=*/std::nullopt,
+ copyNests))) {
+ return;
}
// Promote any single iteration loops in the copy nests and simplify
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
…sing memref definition check (llvm#130750) This was exposed with the test case previously added but when performing generation with limited memory capacity.
This was exposed with the test case previously added but when performing
generation with limited memory capacity.