Skip to content

Commit 98fa615

Browse files
committed
[MLIR] move loopUnrollJamBy*Factor to loop transforms utils
The declarations for these were already part of transforms utils, but the definitions were left in affine transforms. Move definitions to loop transforms utils. Signed-off-by: Uday Bondhugula <[email protected]> Differential Revision: https://reviews.llvm.org/D76633
1 parent ca69be2 commit 98fa615

File tree

2 files changed

+129
-129
lines changed

2 files changed

+129
-129
lines changed

mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp

Lines changed: 0 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -101,134 +101,5 @@ LogicalResult LoopUnrollAndJam::runOnAffineForOp(AffineForOp forOp) {
101101
return loopUnrollJamByFactor(forOp, kDefaultUnrollJamFactor);
102102
}
103103

104-
LogicalResult mlir::loopUnrollJamUpToFactor(AffineForOp forOp,
105-
uint64_t unrollJamFactor) {
106-
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
107-
108-
if (mayBeConstantTripCount.hasValue() &&
109-
mayBeConstantTripCount.getValue() < unrollJamFactor)
110-
return loopUnrollJamByFactor(forOp, mayBeConstantTripCount.getValue());
111-
return loopUnrollJamByFactor(forOp, unrollJamFactor);
112-
}
113-
114-
/// Unrolls and jams this loop by the specified factor.
115-
LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
116-
uint64_t unrollJamFactor) {
117-
// Gathers all maximal sub-blocks of operations that do not themselves
118-
// include a for op (a operation could have a descendant for op though
119-
// in its tree). Ignore the block terminators.
120-
struct JamBlockGatherer {
121-
// Store iterators to the first and last op of each sub-block found.
122-
std::vector<std::pair<Block::iterator, Block::iterator>> subBlocks;
123-
124-
// This is a linear time walk.
125-
void walk(Operation *op) {
126-
for (auto &region : op->getRegions())
127-
for (auto &block : region)
128-
walk(block);
129-
}
130-
void walk(Block &block) {
131-
for (auto it = block.begin(), e = std::prev(block.end()); it != e;) {
132-
auto subBlockStart = it;
133-
while (it != e && !isa<AffineForOp>(&*it))
134-
++it;
135-
if (it != subBlockStart)
136-
subBlocks.push_back({subBlockStart, std::prev(it)});
137-
// Process all for insts that appear next.
138-
while (it != e && isa<AffineForOp>(&*it))
139-
walk(&*it++);
140-
}
141-
}
142-
};
143-
144-
assert(unrollJamFactor >= 1 && "unroll jam factor should be >= 1");
145-
146-
if (unrollJamFactor == 1)
147-
return promoteIfSingleIteration(forOp);
148-
149-
if (forOp.getBody()->empty() ||
150-
forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
151-
return failure();
152-
153-
// Loops where both lower and upper bounds are multi-result maps won't be
154-
// unrolled (since the trip can't be expressed as an affine function in
155-
// general).
156-
// TODO(mlir-team): this may not be common, but we could support the case
157-
// where the lower bound is a multi-result map and the ub is a single result
158-
// one.
159-
if (forOp.getLowerBoundMap().getNumResults() != 1)
160-
return failure();
161-
162-
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
163-
// If the trip count is lower than the unroll jam factor, no unroll jam.
164-
if (mayBeConstantTripCount.hasValue() &&
165-
mayBeConstantTripCount.getValue() < unrollJamFactor)
166-
return failure();
167-
168-
auto *forInst = forOp.getOperation();
169-
170-
// Gather all sub-blocks to jam upon the loop being unrolled.
171-
JamBlockGatherer jbg;
172-
jbg.walk(forInst);
173-
auto &subBlocks = jbg.subBlocks;
174-
175-
// Generate the cleanup loop if trip count isn't a multiple of
176-
// unrollJamFactor.
177-
if (getLargestDivisorOfTripCount(forOp) % unrollJamFactor != 0) {
178-
// Insert the cleanup loop right after 'forOp'.
179-
OpBuilder builder(forInst->getBlock(), std::next(Block::iterator(forInst)));
180-
auto cleanupAffineForOp = cast<AffineForOp>(builder.clone(*forInst));
181-
// Adjust the lower bound of the cleanup loop; its upper bound is the same
182-
// as the original loop's upper bound.
183-
AffineMap cleanupMap;
184-
SmallVector<Value, 4> cleanupOperands;
185-
getCleanupLoopLowerBound(forOp, unrollJamFactor, &cleanupMap,
186-
&cleanupOperands, builder);
187-
cleanupAffineForOp.setLowerBound(cleanupOperands, cleanupMap);
188-
189-
// Promote the cleanup loop if it has turned into a single iteration loop.
190-
promoteIfSingleIteration(cleanupAffineForOp);
191-
192-
// Adjust the upper bound of the original loop - it will be the same as the
193-
// cleanup loop's lower bound. Its lower bound remains unchanged.
194-
forOp.setUpperBound(cleanupOperands, cleanupMap);
195-
}
196-
197-
// Scale the step of loop being unroll-jammed by the unroll-jam factor.
198-
int64_t step = forOp.getStep();
199-
forOp.setStep(step * unrollJamFactor);
200-
201-
auto forOpIV = forOp.getInductionVar();
202-
// Unroll and jam (appends unrollJamFactor - 1 additional copies).
203-
for (unsigned i = unrollJamFactor - 1; i >= 1; --i) {
204-
// Operand map persists across all sub-blocks.
205-
BlockAndValueMapping operandMapping;
206-
for (auto &subBlock : subBlocks) {
207-
// Builder to insert unroll-jammed bodies. Insert right at the end of
208-
// sub-block.
209-
OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second));
210-
211-
// If the induction variable is used, create a remapping to the value for
212-
// this unrolled instance.
213-
if (!forOpIV.use_empty()) {
214-
// iv' = iv + i, i = 1 to unrollJamFactor-1.
215-
auto d0 = builder.getAffineDimExpr(0);
216-
auto bumpMap = AffineMap::get(1, 0, {d0 + i * step});
217-
auto ivUnroll =
218-
builder.create<AffineApplyOp>(forInst->getLoc(), bumpMap, forOpIV);
219-
operandMapping.map(forOpIV, ivUnroll);
220-
}
221-
// Clone the sub-block being unroll-jammed.
222-
for (auto it = subBlock.first; it != std::next(subBlock.second); ++it) {
223-
builder.clone(*it, operandMapping);
224-
}
225-
}
226-
}
227-
228-
// Promote the loop body up if this has turned into a single iteration loop.
229-
promoteIfSingleIteration(forOp);
230-
return success();
231-
}
232-
233104
static PassRegistration<LoopUnrollAndJam> pass("affine-loop-unroll-jam",
234105
"Unroll and jam loops");

mlir/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,135 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
486486
return success();
487487
}
488488

489+
LogicalResult mlir::loopUnrollJamUpToFactor(AffineForOp forOp,
490+
uint64_t unrollJamFactor) {
491+
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
492+
493+
if (mayBeConstantTripCount.hasValue() &&
494+
mayBeConstantTripCount.getValue() < unrollJamFactor)
495+
return loopUnrollJamByFactor(forOp, mayBeConstantTripCount.getValue());
496+
return loopUnrollJamByFactor(forOp, unrollJamFactor);
497+
}
498+
499+
/// Unrolls and jams this loop by the specified factor.
500+
LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
501+
uint64_t unrollJamFactor) {
502+
// Gathers all maximal sub-blocks of operations that do not themselves
503+
// include a for op (a operation could have a descendant for op though
504+
// in its tree). Ignore the block terminators.
505+
struct JamBlockGatherer {
506+
// Store iterators to the first and last op of each sub-block found.
507+
std::vector<std::pair<Block::iterator, Block::iterator>> subBlocks;
508+
509+
// This is a linear time walk.
510+
void walk(Operation *op) {
511+
for (auto &region : op->getRegions())
512+
for (auto &block : region)
513+
walk(block);
514+
}
515+
void walk(Block &block) {
516+
for (auto it = block.begin(), e = std::prev(block.end()); it != e;) {
517+
auto subBlockStart = it;
518+
while (it != e && !isa<AffineForOp>(&*it))
519+
++it;
520+
if (it != subBlockStart)
521+
subBlocks.push_back({subBlockStart, std::prev(it)});
522+
// Process all for insts that appear next.
523+
while (it != e && isa<AffineForOp>(&*it))
524+
walk(&*it++);
525+
}
526+
}
527+
};
528+
529+
assert(unrollJamFactor >= 1 && "unroll jam factor should be >= 1");
530+
531+
if (unrollJamFactor == 1)
532+
return promoteIfSingleIteration(forOp);
533+
534+
if (forOp.getBody()->empty() ||
535+
forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
536+
return failure();
537+
538+
// Loops where both lower and upper bounds are multi-result maps won't be
539+
// unrolled (since the trip can't be expressed as an affine function in
540+
// general).
541+
// TODO(mlir-team): this may not be common, but we could support the case
542+
// where the lower bound is a multi-result map and the ub is a single result
543+
// one.
544+
if (forOp.getLowerBoundMap().getNumResults() != 1)
545+
return failure();
546+
547+
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
548+
// If the trip count is lower than the unroll jam factor, no unroll jam.
549+
if (mayBeConstantTripCount.hasValue() &&
550+
mayBeConstantTripCount.getValue() < unrollJamFactor)
551+
return failure();
552+
553+
auto *forInst = forOp.getOperation();
554+
555+
// Gather all sub-blocks to jam upon the loop being unrolled.
556+
JamBlockGatherer jbg;
557+
jbg.walk(forInst);
558+
auto &subBlocks = jbg.subBlocks;
559+
560+
// Generate the cleanup loop if trip count isn't a multiple of
561+
// unrollJamFactor.
562+
if (getLargestDivisorOfTripCount(forOp) % unrollJamFactor != 0) {
563+
// Insert the cleanup loop right after 'forOp'.
564+
OpBuilder builder(forInst->getBlock(), std::next(Block::iterator(forInst)));
565+
auto cleanupAffineForOp = cast<AffineForOp>(builder.clone(*forInst));
566+
// Adjust the lower bound of the cleanup loop; its upper bound is the same
567+
// as the original loop's upper bound.
568+
AffineMap cleanupMap;
569+
SmallVector<Value, 4> cleanupOperands;
570+
getCleanupLoopLowerBound(forOp, unrollJamFactor, &cleanupMap,
571+
&cleanupOperands, builder);
572+
cleanupAffineForOp.setLowerBound(cleanupOperands, cleanupMap);
573+
574+
// Promote the cleanup loop if it has turned into a single iteration loop.
575+
promoteIfSingleIteration(cleanupAffineForOp);
576+
577+
// Adjust the upper bound of the original loop - it will be the same as the
578+
// cleanup loop's lower bound. Its lower bound remains unchanged.
579+
forOp.setUpperBound(cleanupOperands, cleanupMap);
580+
}
581+
582+
// Scale the step of loop being unroll-jammed by the unroll-jam factor.
583+
int64_t step = forOp.getStep();
584+
forOp.setStep(step * unrollJamFactor);
585+
586+
auto forOpIV = forOp.getInductionVar();
587+
// Unroll and jam (appends unrollJamFactor - 1 additional copies).
588+
for (unsigned i = unrollJamFactor - 1; i >= 1; --i) {
589+
// Operand map persists across all sub-blocks.
590+
BlockAndValueMapping operandMapping;
591+
for (auto &subBlock : subBlocks) {
592+
// Builder to insert unroll-jammed bodies. Insert right at the end of
593+
// sub-block.
594+
OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second));
595+
596+
// If the induction variable is used, create a remapping to the value for
597+
// this unrolled instance.
598+
if (!forOpIV.use_empty()) {
599+
// iv' = iv + i, i = 1 to unrollJamFactor-1.
600+
auto d0 = builder.getAffineDimExpr(0);
601+
auto bumpMap = AffineMap::get(1, 0, {d0 + i * step});
602+
auto ivUnroll =
603+
builder.create<AffineApplyOp>(forInst->getLoc(), bumpMap, forOpIV);
604+
operandMapping.map(forOpIV, ivUnroll);
605+
}
606+
// Clone the sub-block being unroll-jammed.
607+
for (auto it = subBlock.first; it != std::next(subBlock.second); ++it) {
608+
builder.clone(*it, operandMapping);
609+
}
610+
}
611+
}
612+
613+
// Promote the loop body up if this has turned into a single iteration loop.
614+
promoteIfSingleIteration(forOp);
615+
return success();
616+
}
617+
489618
/// Performs loop interchange on 'forOpA' and 'forOpB', where 'forOpB' is
490619
/// nested within 'forOpA' as the only non-terminator operation in its block.
491620
void mlir::interchangeLoops(AffineForOp forOpA, AffineForOp forOpB) {

0 commit comments

Comments
 (0)