-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[mlir][affine] implement promoteIfSingleIteration
for AffineForOp
#72547
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
9a749e3
to
bdbe7c8
Compare
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-affine Author: Maksim Levental (makslevental) ChangesRecently someone on discord asked if Full diff: https://github.com/llvm/llvm-project/pull/72547.diff 10 Files Affected:
diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h b/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h
index 92f3d5a2c4925b1..8bd061f344f128e 100644
--- a/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h
+++ b/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h
@@ -43,6 +43,10 @@ void getTripCountMapAndOperands(AffineForOp forOp, AffineMap *map,
/// constant trip count in non-trivial cases.
std::optional<uint64_t> getConstantTripCount(AffineForOp forOp);
+/// Helper to replace uses of loop carried values (iter_args) and loop
+/// yield values while promoting single iteration affine.for ops.
+void replaceIterArgsAndYieldResults(AffineForOp forOp);
+
/// Returns the greatest known integral divisor of the trip count. Affine
/// expression analysis is used (indirectly through getTripCount), and
/// this method is thus able to determine non-trivial divisors.
diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td
index f9578cf37d5d768..b4ea6122ed4c0e0 100644
--- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td
+++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.td
@@ -121,7 +121,7 @@ def AffineForOp : Affine_Op<"for",
ImplicitAffineTerminator, ConditionallySpeculatable,
RecursiveMemoryEffects, DeclareOpInterfaceMethods<LoopLikeOpInterface,
["getSingleInductionVar", "getSingleLowerBound", "getSingleStep",
- "getSingleUpperBound", "getYieldedValuesMutable",
+ "getSingleUpperBound", "getYieldedValuesMutable", "promoteIfSingleIteration",
"replaceWithAdditionalYields"]>,
DeclareOpInterfaceMethods<RegionBranchOpInterface,
["getEntrySuccessorOperands"]>]> {
diff --git a/mlir/include/mlir/Dialect/Affine/LoopUtils.h b/mlir/include/mlir/Dialect/Affine/LoopUtils.h
index 723a262f24acc51..1e3b3bffea7b838 100644
--- a/mlir/include/mlir/Dialect/Affine/LoopUtils.h
+++ b/mlir/include/mlir/Dialect/Affine/LoopUtils.h
@@ -83,10 +83,6 @@ LogicalResult loopUnrollJamByFactor(AffineForOp forOp,
LogicalResult loopUnrollJamUpToFactor(AffineForOp forOp,
uint64_t unrollJamFactor);
-/// Promotes the loop body of a AffineForOp to its containing block if the loop
-/// was known to have a single iteration.
-LogicalResult promoteIfSingleIteration(AffineForOp forOp);
-
/// Promotes all single iteration AffineForOp's in the Function, i.e., moves
/// their body into the containing Block.
void promoteSingleIterationLoops(func::FuncOp f);
diff --git a/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp
index e645afe7cd3e8fa..602f8f7cc2ce3c6 100644
--- a/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp
@@ -107,6 +107,20 @@ std::optional<uint64_t> mlir::affine::getConstantTripCount(AffineForOp forOp) {
return tripCount;
}
+void mlir::affine::replaceIterArgsAndYieldResults(AffineForOp forOp) {
+ // Replace uses of iter arguments with iter operands (initial values).
+ auto iterOperands = forOp.getInits();
+ auto iterArgs = forOp.getRegionIterArgs();
+ for (auto e : llvm::zip(iterOperands, iterArgs))
+ std::get<1>(e).replaceAllUsesWith(std::get<0>(e));
+
+ // Replace uses of loop results with the values yielded by the loop.
+ auto outerResults = forOp.getResults();
+ auto innerResults = forOp.getBody()->getTerminator()->getOperands();
+ for (auto e : llvm::zip(outerResults, innerResults))
+ std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
+}
+
/// Returns the greatest known integral divisor of the trip count. Affine
/// expression analysis is used (indirectly through getTripCount), and
/// this method is thus able to determine non-trivial divisors.
diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
index 05496e70716a2a1..314bfbe6968f3ec 100644
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -7,7 +7,9 @@
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/IR/AffineExprVisitor.h"
#include "mlir/IR/IRMapping.h"
@@ -2440,6 +2442,53 @@ std::optional<OpFoldResult> AffineForOp::getSingleUpperBound() {
return OpFoldResult(b.getI64IntegerAttr(getConstantUpperBound()));
}
+/// Promotes the loop body of a forOp to its containing block if the forOp
+/// was known to have a single iteration.
+LogicalResult AffineForOp::promoteIfSingleIteration(RewriterBase &rewriter) {
+ auto forOp = cast<AffineForOp>(getOperation());
+ std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
+ if (!tripCount || *tripCount != 1)
+ return failure();
+
+ // TODO: extend this for arbitrary affine bounds.
+ if (forOp.getLowerBoundMap().getNumResults() != 1)
+ return failure();
+
+ // Replaces all IV uses to its single iteration value.
+ auto iv = forOp.getInductionVar();
+ auto *parentBlock = forOp->getBlock();
+ if (!iv.use_empty()) {
+ if (forOp.hasConstantLowerBound()) {
+ OpBuilder topBuilder(forOp->getParentOfType<func::FuncOp>().getBody());
+ auto constOp = topBuilder.create<arith::ConstantIndexOp>(
+ forOp.getLoc(), forOp.getConstantLowerBound());
+ iv.replaceAllUsesWith(constOp);
+ } else {
+ auto lbOperands = forOp.getLowerBoundOperands();
+ auto lbMap = forOp.getLowerBoundMap();
+ OpBuilder builder(forOp);
+ if (lbMap == builder.getDimIdentityMap()) {
+ // No need of generating an affine.apply.
+ iv.replaceAllUsesWith(lbOperands[0]);
+ } else {
+ auto affineApplyOp =
+ builder.create<AffineApplyOp>(forOp.getLoc(), lbMap, lbOperands);
+ iv.replaceAllUsesWith(affineApplyOp);
+ }
+ }
+ }
+
+ replaceIterArgsAndYieldResults(forOp);
+
+ // Move the loop body operations, except for its terminator, to the loop's
+ // containing block.
+ forOp.getBody()->back().erase();
+ parentBlock->getOperations().splice(Block::iterator(forOp),
+ forOp.getBody()->getOperations());
+ forOp.erase();
+ return success();
+}
+
FailureOr<LoopLikeOpInterface> AffineForOp::replaceWithAdditionalYields(
RewriterBase &rewriter, ValueRange newInitOperands,
bool replaceInitOperandUsesInLoop,
@@ -2905,8 +2954,7 @@ static void composeSetAndOperands(IntegerSet &set,
}
/// Canonicalize an affine if op's conditional (integer set + operands).
-LogicalResult AffineIfOp::fold(FoldAdaptor,
- SmallVectorImpl<OpFoldResult> &) {
+LogicalResult AffineIfOp::fold(FoldAdaptor, SmallVectorImpl<OpFoldResult> &) {
auto set = getIntegerSet();
SmallVector<Value, 4> operands(getOperands());
composeSetAndOperands(set, operands);
@@ -2997,11 +3045,11 @@ static LogicalResult
verifyMemoryOpIndexing(Operation *op, AffineMapAttr mapAttr,
Operation::operand_range mapOperands,
MemRefType memrefType, unsigned numIndexOperands) {
- AffineMap map = mapAttr.getValue();
- if (map.getNumResults() != memrefType.getRank())
- return op->emitOpError("affine map num results must equal memref rank");
- if (map.getNumInputs() != numIndexOperands)
- return op->emitOpError("expects as many subscripts as affine map inputs");
+ AffineMap map = mapAttr.getValue();
+ if (map.getNumResults() != memrefType.getRank())
+ return op->emitOpError("affine map num results must equal memref rank");
+ if (map.getNumInputs() != numIndexOperands)
+ return op->emitOpError("expects as many subscripts as affine map inputs");
Region *scope = getAffineScope(op);
for (auto idx : mapOperands) {
diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
index 331b0f1b2c2b1c6..a5a4740a7334df7 100644
--- a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
@@ -224,9 +224,10 @@ void AffineDataCopyGeneration::runOnOperation() {
// With a post order walk, the erasure of loops does not affect
// continuation of the walk or the collection of load/store ops.
nest->walk([&](Operation *op) {
- if (auto forOp = dyn_cast<AffineForOp>(op))
- (void)promoteIfSingleIteration(forOp);
- else if (isa<AffineLoadOp, AffineStoreOp>(op))
+ if (auto forOp = dyn_cast<AffineForOp>(op)) {
+ IRRewriter rewriter(f.getContext());
+ (void)forOp.promoteIfSingleIteration(rewriter);
+ } else if (isa<AffineLoadOp, AffineStoreOp>(op))
copyOps.push_back(op);
});
diff --git a/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp
index 5053b08ee0834cd..8c486cbc3731a51 100644
--- a/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp
@@ -464,9 +464,11 @@ void mlir::affine::fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
// Patch reduction loop - only ones that are sibling-fused with the
// destination loop - into the parent loop.
(void)promoteSingleIterReductionLoop(forOp, true);
- else
+ else {
+ IRRewriter rewriter(forOp.getContext());
// Promote any single iteration slice loops.
- (void)promoteIfSingleIteration(forOp);
+ (void)forOp.promoteIfSingleIteration(rewriter);
+ }
}
}
diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
index 3794ef2dabe1e0a..b1e88a633e682b3 100644
--- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
@@ -110,68 +110,6 @@ getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
lb.erase();
}
-/// Helper to replace uses of loop carried values (iter_args) and loop
-/// yield values while promoting single iteration affine.for ops.
-static void replaceIterArgsAndYieldResults(AffineForOp forOp) {
- // Replace uses of iter arguments with iter operands (initial values).
- auto iterOperands = forOp.getInits();
- auto iterArgs = forOp.getRegionIterArgs();
- for (auto e : llvm::zip(iterOperands, iterArgs))
- std::get<1>(e).replaceAllUsesWith(std::get<0>(e));
-
- // Replace uses of loop results with the values yielded by the loop.
- auto outerResults = forOp.getResults();
- auto innerResults = forOp.getBody()->getTerminator()->getOperands();
- for (auto e : llvm::zip(outerResults, innerResults))
- std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
-}
-
-/// Promotes the loop body of a forOp to its containing block if the forOp
-/// was known to have a single iteration.
-LogicalResult mlir::affine::promoteIfSingleIteration(AffineForOp forOp) {
- std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
- if (!tripCount || *tripCount != 1)
- return failure();
-
- // TODO: extend this for arbitrary affine bounds.
- if (forOp.getLowerBoundMap().getNumResults() != 1)
- return failure();
-
- // Replaces all IV uses to its single iteration value.
- auto iv = forOp.getInductionVar();
- auto *parentBlock = forOp->getBlock();
- if (!iv.use_empty()) {
- if (forOp.hasConstantLowerBound()) {
- OpBuilder topBuilder(forOp->getParentOfType<func::FuncOp>().getBody());
- auto constOp = topBuilder.create<arith::ConstantIndexOp>(
- forOp.getLoc(), forOp.getConstantLowerBound());
- iv.replaceAllUsesWith(constOp);
- } else {
- auto lbOperands = forOp.getLowerBoundOperands();
- auto lbMap = forOp.getLowerBoundMap();
- OpBuilder builder(forOp);
- if (lbMap == builder.getDimIdentityMap()) {
- // No need of generating an affine.apply.
- iv.replaceAllUsesWith(lbOperands[0]);
- } else {
- auto affineApplyOp =
- builder.create<AffineApplyOp>(forOp.getLoc(), lbMap, lbOperands);
- iv.replaceAllUsesWith(affineApplyOp);
- }
- }
- }
-
- replaceIterArgsAndYieldResults(forOp);
-
- // Move the loop body operations, except for its terminator, to the loop's
- // containing block.
- forOp.getBody()->back().erase();
- parentBlock->getOperations().splice(Block::iterator(forOp),
- forOp.getBody()->getOperations());
- forOp.erase();
- return success();
-}
-
/// Generates an affine.for op with the specified lower and upper bounds
/// while generating the right IV remappings to realize shifts for operations in
/// its body. The operations that go into the loop body are specified in
@@ -218,7 +156,9 @@ static AffineForOp generateShiftedLoop(
for (auto *op : ops)
bodyBuilder.clone(*op, operandMap);
};
- if (succeeded(promoteIfSingleIteration(loopChunk)))
+
+ IRRewriter rewriter(loopChunk.getContext());
+ if (succeeded(loopChunk.promoteIfSingleIteration(rewriter)))
return AffineForOp();
return loopChunk;
}
@@ -896,8 +836,10 @@ LogicalResult mlir::affine::loopUnrollFull(AffineForOp forOp) {
uint64_t tripCount = *mayBeConstantTripCount;
if (tripCount == 0)
return success();
- if (tripCount == 1)
- return promoteIfSingleIteration(forOp);
+ if (tripCount == 1) {
+ IRRewriter rewriter(forOp.getContext());
+ return forOp.promoteIfSingleIteration(rewriter);
+ }
return loopUnrollByFactor(forOp, tripCount);
}
return failure();
@@ -1003,7 +945,8 @@ static LogicalResult generateCleanupLoopForUnroll(AffineForOp forOp,
cleanupForOp.setLowerBound(cleanupOperands, cleanupMap);
// Promote the loop body up if this has turned into a single iteration loop.
- (void)promoteIfSingleIteration(cleanupForOp);
+ IRRewriter rewriter(cleanupForOp.getContext());
+ (void)cleanupForOp.promoteIfSingleIteration(rewriter);
// Adjust upper bound of the original loop; this is the same as the lower
// bound of the cleanup loop.
@@ -1021,9 +964,11 @@ LogicalResult mlir::affine::loopUnrollByFactor(
std::optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
if (unrollFactor == 1) {
- if (mayBeConstantTripCount && *mayBeConstantTripCount == 1 &&
- failed(promoteIfSingleIteration(forOp)))
- return failure();
+ if (mayBeConstantTripCount && *mayBeConstantTripCount == 1) {
+ IRRewriter rewriter(forOp.getContext());
+ if (failed(forOp.promoteIfSingleIteration(rewriter)))
+ return failure();
+ }
return success();
}
@@ -1076,7 +1021,8 @@ LogicalResult mlir::affine::loopUnrollByFactor(
/*iterArgs=*/iterArgs, /*yieldedValues=*/yieldedValues);
// Promote the loop body up if this has turned into a single iteration loop.
- (void)promoteIfSingleIteration(forOp);
+ IRRewriter rewriter(forOp.getContext());
+ (void)forOp.promoteIfSingleIteration(rewriter);
return success();
}
@@ -1137,9 +1083,11 @@ LogicalResult mlir::affine::loopUnrollJamByFactor(AffineForOp forOp,
std::optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
if (unrollJamFactor == 1) {
- if (mayBeConstantTripCount && *mayBeConstantTripCount == 1 &&
- failed(promoteIfSingleIteration(forOp)))
- return failure();
+ if (mayBeConstantTripCount && *mayBeConstantTripCount == 1) {
+ IRRewriter rewriter(forOp.getContext());
+ if (failed(forOp.promoteIfSingleIteration(rewriter)))
+ return failure();
+ }
return success();
}
@@ -1321,7 +1269,7 @@ LogicalResult mlir::affine::loopUnrollJamByFactor(AffineForOp forOp,
}
// Promote the loop body up if this has turned into a single iteration loop.
- (void)promoteIfSingleIteration(forOp);
+ (void)forOp.promoteIfSingleIteration(rewriter);
return success();
}
diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
index 50a052fb8b74e70..4a1013fea4dddc8 100644
--- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
@@ -552,8 +552,11 @@ void mlir::affine::normalizeAffineParallel(AffineParallelOp op) {
LogicalResult mlir::affine::normalizeAffineFor(AffineForOp op,
bool promoteSingleIter) {
- if (promoteSingleIter && succeeded(promoteIfSingleIteration(op)))
- return success();
+ if (promoteSingleIter) {
+ IRRewriter rewriter(op.getContext());
+ if (succeeded(op.promoteIfSingleIteration(rewriter)))
+ return success();
+ }
// Check if the forop is already normalized.
if (op.hasConstantLowerBound() && (op.getConstantLowerBound() == 0) &&
diff --git a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
index b418a457473a8ec..6354504d6dac315 100644
--- a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
@@ -112,9 +112,10 @@ void TestAffineDataCopy::runOnOperation() {
// With a post order walk, the erasure of loops does not affect
// continuation of the walk or the collection of load/store ops.
nest->walk([&](Operation *op) {
- if (auto forOp = dyn_cast<AffineForOp>(op))
- (void)promoteIfSingleIteration(forOp);
- else if (auto loadOp = dyn_cast<AffineLoadOp>(op))
+ if (auto forOp = dyn_cast<AffineForOp>(op)) {
+ IRRewriter rewriter(forOp.getContext());
+ (void)forOp.promoteIfSingleIteration(rewriter);
+ } else if (auto loadOp = dyn_cast<AffineLoadOp>(op))
copyOps.push_back(loadOp);
else if (auto storeOp = dyn_cast<AffineStoreOp>(op))
copyOps.push_back(storeOp);
|
bdbe7c8
to
f8e5e5b
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just some cosmetics that stood out to me. Would be nice to fix, but I'm fine with leaving them as-is for the exiting code that has been moved across files if you really don't want to change this.
I'm all for sprucing things up while I'm here. |
430261b
to
cbcfb86
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks
9fd828c
to
bf5646f
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
bf5646f
to
2a075ce
Compare
2a075ce
to
3e638af
Compare
…neForOp` (llvm#72547)" This reverts commit aa6be2f.
…neForOp` (llvm#72547)" This reverts commit aa6be2f.
Recently someone on discord asked if
AffineForOp
could be refactored to just implement thepromoteIfSingleIteration
interface method onLoopLikeOpInterface
(rather than have a dangling method). Went more smoothly than I thought so here it is.