Skip to content

Commit cca3f3d

Browse files
author
Frank Laub
committed
[MLIR] Add affine.parallel folder and normalizer
Add a folder to the affine.parallel op so that loop bounds expressions are canonicalized. Additionally, a new AffineParallelNormalizePass is added to adjust affine.parallel ops so that the lower bound is always 0 and the upper bound always represents a range with a step size of 1. Differential Revision: https://reviews.llvm.org/D84998
1 parent 4e266ea commit cca3f3d

File tree

10 files changed

+266
-15
lines changed

10 files changed

+266
-15
lines changed

mlir/include/mlir/Dialect/Affine/IR/AffineOps.td

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -621,11 +621,6 @@ def AffineParallelOp : Affine_Op<"parallel",
621621
/// Get the number of dimensions.
622622
unsigned getNumDims();
623623

624-
operand_range getLowerBoundsOperands();
625-
operand_range getUpperBoundsOperands();
626-
627-
AffineValueMap getLowerBoundsValueMap();
628-
AffineValueMap getUpperBoundsValueMap();
629624
AffineValueMap getRangesValueMap();
630625

631626
/// Get ranges as constants, may fail in dynamic case.
@@ -636,13 +631,27 @@ def AffineParallelOp : Affine_Op<"parallel",
636631
MutableArrayRef<BlockArgument> getIVs() {
637632
return getBody()->getArguments();
638633
}
634+
635+
operand_range getLowerBoundsOperands();
636+
AffineValueMap getLowerBoundsValueMap();
637+
void setLowerBounds(ValueRange operands, AffineMap map);
638+
void setLowerBoundsMap(AffineMap map);
639+
640+
operand_range getUpperBoundsOperands();
641+
AffineValueMap getUpperBoundsValueMap();
642+
void setUpperBounds(ValueRange operands, AffineMap map);
643+
void setUpperBoundsMap(AffineMap map);
644+
645+
SmallVector<int64_t, 8> getSteps();
639646
void setSteps(ArrayRef<int64_t> newSteps);
640647

641648
static StringRef getReductionsAttrName() { return "reductions"; }
642649
static StringRef getLowerBoundsMapAttrName() { return "lowerBoundsMap"; }
643650
static StringRef getUpperBoundsMapAttrName() { return "upperBoundsMap"; }
644651
static StringRef getStepsAttrName() { return "steps"; }
645652
}];
653+
654+
let hasFolder = 1;
646655
}
647656

648657
def AffinePrefetchOp : Affine_Op<"prefetch"> {

mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ class AffineValueMap {
7474
ArrayRef<Value> getOperands() const;
7575
AffineMap getAffineMap() const;
7676

77+
/// Attempts to canonicalize the map and operands. Return success if the map
78+
/// and/or operands have been modified.
79+
LogicalResult canonicalize();
80+
7781
private:
7882
// A mutable affine map.
7983
MutableAffineMap map;

mlir/include/mlir/Dialect/Affine/Passes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ createAffineLoopInvariantCodeMotionPass();
3535
/// ops.
3636
std::unique_ptr<OperationPass<FuncOp>> createAffineParallelizePass();
3737

38+
/// Normalize affine.parallel ops so that lower bounds are 0 and steps are 1.
39+
std::unique_ptr<OperationPass<FuncOp>> createAffineParallelNormalizePass();
40+
3841
/// Performs packing (or explicit copying) of accessed memref regions into
3942
/// buffers in the specified faster memory space through either pointwise copies
4043
/// or DMA operations.

mlir/include/mlir/Dialect/Affine/Passes.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,12 @@ def AffineParallelize : FunctionPass<"affine-parallelize"> {
118118
let constructor = "mlir::createAffineParallelizePass()";
119119
}
120120

121+
def AffineParallelNormalize : FunctionPass<"affine-parallel-normalize"> {
122+
let summary = "Normalize affine.parallel ops so that lower bounds are 0 and "
123+
"steps are 1";
124+
let constructor = "mlir::createAffineParallelNormalizePass()";
125+
}
126+
121127
def SimplifyAffineStructures : FunctionPass<"simplify-affine-structures"> {
122128
let summary = "Simplify affine expressions in maps/sets and normalize "
123129
"memrefs";

mlir/include/mlir/Dialect/Affine/Utils.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ void vectorizeAffineLoops(
4343
llvm::DenseSet<Operation *, DenseMapInfo<Operation *>> &loops,
4444
ArrayRef<int64_t> vectorSizes, ArrayRef<int64_t> fastestVaryingPattern);
4545

46+
/// Normalize a affine.parallel op so that lower bounds are 0 and steps are 1.
47+
/// As currently implemented, this transformation cannot fail and will return
48+
/// early if the op is already in a normalized form.
49+
void normalizeAffineParallel(AffineParallelOp op);
50+
4651
} // namespace mlir
4752

4853
#endif // MLIR_DIALECT_AFFINE_UTILS_H

mlir/lib/Dialect/Affine/IR/AffineOps.cpp

Lines changed: 89 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2505,9 +2505,58 @@ OpBuilder AffineParallelOp::getBodyBuilder() {
25052505
return OpBuilder(getBody(), std::prev(getBody()->end()));
25062506
}
25072507

2508+
void AffineParallelOp::setLowerBounds(ValueRange lbOperands, AffineMap map) {
2509+
assert(lbOperands.size() == map.getNumInputs() &&
2510+
"operands to map must match number of inputs");
2511+
assert(map.getNumResults() >= 1 && "bounds map has at least one result");
2512+
2513+
auto ubOperands = getUpperBoundsOperands();
2514+
2515+
SmallVector<Value, 4> newOperands(lbOperands);
2516+
newOperands.append(ubOperands.begin(), ubOperands.end());
2517+
getOperation()->setOperands(newOperands);
2518+
2519+
lowerBoundsMapAttr(AffineMapAttr::get(map));
2520+
}
2521+
2522+
void AffineParallelOp::setUpperBounds(ValueRange ubOperands, AffineMap map) {
2523+
assert(ubOperands.size() == map.getNumInputs() &&
2524+
"operands to map must match number of inputs");
2525+
assert(map.getNumResults() >= 1 && "bounds map has at least one result");
2526+
2527+
SmallVector<Value, 4> newOperands(getLowerBoundsOperands());
2528+
newOperands.append(ubOperands.begin(), ubOperands.end());
2529+
getOperation()->setOperands(newOperands);
2530+
2531+
upperBoundsMapAttr(AffineMapAttr::get(map));
2532+
}
2533+
2534+
void AffineParallelOp::setLowerBoundsMap(AffineMap map) {
2535+
AffineMap lbMap = lowerBoundsMap();
2536+
assert(lbMap.getNumDims() == map.getNumDims() &&
2537+
lbMap.getNumSymbols() == map.getNumSymbols());
2538+
(void)lbMap;
2539+
lowerBoundsMapAttr(AffineMapAttr::get(map));
2540+
}
2541+
2542+
void AffineParallelOp::setUpperBoundsMap(AffineMap map) {
2543+
AffineMap ubMap = upperBoundsMap();
2544+
assert(ubMap.getNumDims() == map.getNumDims() &&
2545+
ubMap.getNumSymbols() == map.getNumSymbols());
2546+
(void)ubMap;
2547+
upperBoundsMapAttr(AffineMapAttr::get(map));
2548+
}
2549+
2550+
SmallVector<int64_t, 8> AffineParallelOp::getSteps() {
2551+
SmallVector<int64_t, 8> result;
2552+
for (Attribute attr : steps()) {
2553+
result.push_back(attr.cast<IntegerAttr>().getInt());
2554+
}
2555+
return result;
2556+
}
2557+
25082558
void AffineParallelOp::setSteps(ArrayRef<int64_t> newSteps) {
2509-
assert(newSteps.size() == getNumDims() && "steps & num dims mismatch");
2510-
setAttr(getStepsAttrName(), getBodyBuilder().getI64ArrayAttr(newSteps));
2559+
stepsAttr(getBodyBuilder().getI64ArrayAttr(newSteps));
25112560
}
25122561

25132562
static LogicalResult verify(AffineParallelOp op) {
@@ -2541,6 +2590,41 @@ static LogicalResult verify(AffineParallelOp op) {
25412590
return success();
25422591
}
25432592

2593+
LogicalResult AffineValueMap::canonicalize() {
2594+
SmallVector<Value, 4> newOperands{operands};
2595+
auto newMap = getAffineMap();
2596+
composeAffineMapAndOperands(&newMap, &newOperands);
2597+
if (newMap == getAffineMap() && newOperands == operands)
2598+
return failure();
2599+
reset(newMap, newOperands);
2600+
return success();
2601+
}
2602+
2603+
/// Canonicalize the bounds of the given loop.
2604+
static LogicalResult canonicalizeLoopBounds(AffineParallelOp op) {
2605+
AffineValueMap lb = op.getLowerBoundsValueMap();
2606+
bool lbCanonicalized = succeeded(lb.canonicalize());
2607+
2608+
AffineValueMap ub = op.getUpperBoundsValueMap();
2609+
bool ubCanonicalized = succeeded(ub.canonicalize());
2610+
2611+
// Any canonicalization change always leads to updated map(s).
2612+
if (!lbCanonicalized && !ubCanonicalized)
2613+
return failure();
2614+
2615+
if (lbCanonicalized)
2616+
op.setLowerBounds(lb.getOperands(), lb.getAffineMap());
2617+
if (ubCanonicalized)
2618+
op.setUpperBounds(ub.getOperands(), ub.getAffineMap());
2619+
2620+
return success();
2621+
}
2622+
2623+
LogicalResult AffineParallelOp::fold(ArrayRef<Attribute> operands,
2624+
SmallVectorImpl<OpFoldResult> &results) {
2625+
return canonicalizeLoopBounds(*this);
2626+
}
2627+
25442628
static void print(OpAsmPrinter &p, AffineParallelOp op) {
25452629
p << op.getOperationName() << " (" << op.getBody()->getArguments() << ") = (";
25462630
p.printAffineMapOfSSAIds(op.lowerBoundsMapAttr(),
@@ -2549,13 +2633,8 @@ static void print(OpAsmPrinter &p, AffineParallelOp op) {
25492633
p.printAffineMapOfSSAIds(op.upperBoundsMapAttr(),
25502634
op.getUpperBoundsOperands());
25512635
p << ')';
2552-
SmallVector<int64_t, 4> steps;
2553-
bool elideSteps = true;
2554-
for (auto attr : op.steps()) {
2555-
auto step = attr.cast<IntegerAttr>().getInt();
2556-
elideSteps &= (step == 1);
2557-
steps.push_back(step);
2558-
}
2636+
SmallVector<int64_t, 8> steps = op.getSteps();
2637+
bool elideSteps = llvm::all_of(steps, [](int64_t step) { return step == 1; });
25592638
if (!elideSteps) {
25602639
p << " step (";
25612640
llvm::interleaveComma(steps, p);
@@ -2641,7 +2720,7 @@ static ParseResult parseAffineParallelOp(OpAsmParser &parser,
26412720
}
26422721

26432722
// Parse optional clause of the form: `reduce ("addf", "maxf")`, where the
2644-
// quoted strings a member of the enum AtomicRMWKind.
2723+
// quoted strings are a member of the enum AtomicRMWKind.
26452724
SmallVector<Attribute, 4> reductions;
26462725
if (succeeded(parser.parseOptionalKeyword("reduce"))) {
26472726
if (parser.parseLParen())
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
//===- AffineParallelNormalize.cpp - AffineParallelNormalize Pass ---------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements a normalizer for affine parallel loops.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "PassDetail.h"
14+
#include "mlir/Dialect/Affine/IR/AffineOps.h"
15+
#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
16+
#include "mlir/Dialect/Affine/Passes.h"
17+
#include "mlir/IR/PatternMatch.h"
18+
19+
using namespace mlir;
20+
21+
void normalizeAffineParallel(AffineParallelOp op) {
22+
AffineMap lbMap = op.lowerBoundsMap();
23+
SmallVector<int64_t, 8> steps = op.getSteps();
24+
// No need to do any work if the parallel op is already normalized.
25+
bool isAlreadyNormalized =
26+
llvm::all_of(llvm::zip(steps, lbMap.getResults()), [](auto tuple) {
27+
int64_t step = std::get<0>(tuple);
28+
auto lbExpr =
29+
std::get<1>(tuple).template dyn_cast<AffineConstantExpr>();
30+
return lbExpr && lbExpr.getValue() == 0 && step == 1;
31+
});
32+
if (isAlreadyNormalized)
33+
return;
34+
35+
AffineValueMap ranges = op.getRangesValueMap();
36+
auto builder = OpBuilder::atBlockBegin(op.getBody());
37+
auto zeroExpr = builder.getAffineConstantExpr(0);
38+
SmallVector<AffineExpr, 8> lbExprs;
39+
SmallVector<AffineExpr, 8> ubExprs;
40+
for (unsigned i = 0, e = steps.size(); i < e; ++i) {
41+
int64_t step = steps[i];
42+
43+
// Adjust the lower bound to be 0.
44+
lbExprs.push_back(zeroExpr);
45+
46+
// Adjust the upper bound expression: 'range / step'.
47+
AffineExpr ubExpr = ranges.getResult(i).ceilDiv(step);
48+
ubExprs.push_back(ubExpr);
49+
50+
// Adjust the corresponding IV: 'lb + i * step'.
51+
BlockArgument iv = op.getBody()->getArgument(i);
52+
AffineExpr lbExpr = lbMap.getResult(i);
53+
unsigned nDims = lbMap.getNumDims();
54+
auto expr = lbExpr + builder.getAffineDimExpr(nDims) * step;
55+
auto map = AffineMap::get(/*dimCount=*/nDims + 1,
56+
/*symbolCount=*/lbMap.getNumSymbols(), expr);
57+
58+
// Use an 'affine.apply' op that will be simplified later in subsequent
59+
// canonicalizations.
60+
OperandRange lbOperands = op.getLowerBoundsOperands();
61+
OperandRange dimOperands = lbOperands.take_front(nDims);
62+
OperandRange symbolOperands = lbOperands.drop_front(nDims);
63+
SmallVector<Value, 8> applyOperands{dimOperands};
64+
applyOperands.push_back(iv);
65+
applyOperands.append(symbolOperands.begin(), symbolOperands.end());
66+
auto apply = builder.create<AffineApplyOp>(op.getLoc(), map, applyOperands);
67+
iv.replaceAllUsesExcept(apply, SmallPtrSet<Operation *, 1>{apply});
68+
}
69+
70+
SmallVector<int64_t, 8> newSteps(op.getNumDims(), 1);
71+
op.setSteps(newSteps);
72+
auto newLowerMap = AffineMap::get(
73+
/*dimCount=*/0, /*symbolCount=*/0, lbExprs, op.getContext());
74+
op.setLowerBounds({}, newLowerMap);
75+
auto newUpperMap = AffineMap::get(ranges.getNumDims(), ranges.getNumSymbols(),
76+
ubExprs, op.getContext());
77+
op.setUpperBounds(ranges.getOperands(), newUpperMap);
78+
}
79+
80+
namespace {
81+
82+
/// Normalize affine.parallel ops so that lower bounds are 0 and steps are 1.
83+
/// As currently implemented, this pass cannot fail, but it might skip over ops
84+
/// that are already in a normalized form.
85+
struct AffineParallelNormalizePass
86+
: public AffineParallelNormalizeBase<AffineParallelNormalizePass> {
87+
88+
void runOnFunction() override { getFunction().walk(normalizeAffineParallel); }
89+
};
90+
91+
} // namespace
92+
93+
std::unique_ptr<OperationPass<FuncOp>>
94+
mlir::createAffineParallelNormalizePass() {
95+
return std::make_unique<AffineParallelNormalizePass>();
96+
}

mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ add_mlir_dialect_library(MLIRAffineTransforms
22
AffineDataCopyGeneration.cpp
33
AffineLoopInvariantCodeMotion.cpp
44
AffineParallelize.cpp
5+
AffineParallelNormalize.cpp
56
LoopTiling.cpp
67
LoopUnroll.cpp
78
LoopUnrollAndJam.cpp
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// RUN: mlir-opt %s -affine-parallel-normalize -split-input-file | FileCheck %s
2+
3+
// Normalize steps to 1 and lower bounds to 0.
4+
5+
// CHECK-DAG: [[$MAP0:#map[0-9]+]] = affine_map<(d0) -> (d0 * 3)>
6+
// CHECK-DAG: [[$MAP1:#map[0-9]+]] = affine_map<(d0) -> (d0 * 2 + 1)>
7+
// CHECK-DAG: [[$MAP2:#map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)>
8+
9+
// CHECK-LABEL: func @normalize_parallel()
10+
func @normalize_parallel() {
11+
%cst = constant 1.0 : f32
12+
%0 = alloc() : memref<2x4xf32>
13+
// CHECK: affine.parallel (%[[i0:.*]], %[[j0:.*]]) = (0, 0) to (4, 2)
14+
affine.parallel (%i, %j) = (0, 1) to (10, 5) step (3, 2) {
15+
// CHECK: %[[i1:.*]] = affine.apply [[$MAP0]](%[[i0]])
16+
// CHECK: %[[j1:.*]] = affine.apply [[$MAP1]](%[[j0]])
17+
// CHECK: affine.parallel (%[[k0:.*]]) = (0) to (%[[j1]] - %[[i1]])
18+
affine.parallel (%k) = (%i) to (%j) {
19+
// CHECK: %[[k1:.*]] = affine.apply [[$MAP2]](%[[i1]], %[[k0]])
20+
// CHECK: affine.store %{{.*}}, %{{.*}}[%[[i1]], %[[k1]]] : memref<2x4xf32>
21+
affine.store %cst, %0[%i, %k] : memref<2x4xf32>
22+
}
23+
}
24+
return
25+
}

mlir/test/Dialect/Affine/canonicalize.mlir

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,3 +604,26 @@ func @drop_duplicate_bounds(%N : index) {
604604
}
605605
return
606606
}
607+
608+
// -----
609+
610+
// Ensure affine.parallel bounds expressions are canonicalized.
611+
612+
#map3 = affine_map<(d0) -> (d0 * 5)>
613+
614+
// CHECK-LABEL: func @affine_parallel_const_bounds
615+
func @affine_parallel_const_bounds() {
616+
%cst = constant 1.0 : f32
617+
%c0 = constant 0 : index
618+
%c4 = constant 4 : index
619+
%0 = alloc() : memref<4xf32>
620+
// CHECK: affine.parallel (%{{.*}}) = (0) to (4)
621+
affine.parallel (%i) = (%c0) to (%c0 + %c4) {
622+
%1 = affine.apply #map3(%i)
623+
// CHECK: affine.parallel (%{{.*}}) = (0) to (%{{.*}} * 5)
624+
affine.parallel (%j) = (%c0) to (%1) {
625+
affine.store %cst, %0[%j] : memref<4xf32>
626+
}
627+
}
628+
return
629+
}

0 commit comments

Comments
 (0)