@@ -37,14 +37,6 @@ struct TiledLinalgOp {
37
37
SmallVector<Value, 4 > tensorResults;
38
38
};
39
39
40
- struct TiledAndFusedLinalgOps {
41
- LinalgOp op;
42
- SmallVector<LinalgOp, 1 > fusedProducers;
43
- SmallVector<LinalgOp, 1 > originalProducers;
44
- SmallVector<Operation *, 4 > fusedLoops;
45
- SmallVector<Operation *, 4 > unfusedLoops;
46
- };
47
-
48
40
// / Populates patterns for vectorization of all ConvN-D ops.
49
41
void populateConvVectorizationPatterns (
50
42
MLIRContext *context, SmallVectorImpl<OwningRewritePatternList> &patterns,
@@ -73,14 +65,11 @@ void populateLinalgBufferizePatterns(MLIRContext *context,
73
65
Optional<TiledLinalgOp> tileLinalgOp (OpBuilder &b, LinalgOp op,
74
66
const LinalgTilingOptions &options);
75
67
76
- // / Tile and fuse the `op` with its producers. The tile and fuse proceeds in
77
- // / three steps
78
- // / - Find tile loops that are fusable with its producer tile loops (a.k.a. tile
79
- // / + fuse loops).
80
- // / - Tile just these loops of the consumer (root operation) and fuse with
81
- // / the producer.
82
- // / - Tile again the tiled consumer operation produced above to do rest of
83
- // / the tiling specified by the `tilingOptions`.
68
+ // / Fuse a sequence of linalg operations (`ops`) using tile-and-fuse. This
69
+ // / proceeds as follows:
70
+ // / - Find outer parallel loops in these ops that can be fused.
71
+ // / - Tile fusable outer parallel loops of the last operation in the sequence.
72
+ // / - Fuse the remaining operations with the tiled operation
84
73
// /
85
74
// / For example, consider the sequence of matmul below
86
75
// /
@@ -107,36 +96,39 @@ Optional<TiledLinalgOp> tileLinalgOp(OpBuilder &b, LinalgOp op,
107
96
// / : memref<256x32xf32> to memref<16x32xf32, #map0>
108
97
// / %3 = subview %arg1[0, 0] [32, 32] [1, 1]
109
98
// / : memref<32x32xf32> to memref<32x32xf32, #map1>
99
+ // / %4 = subview %arg3[0, 0] [32, 32] [1, 1]
100
+ // / : memref<32x32xf32> to memref<32x32xf32, #map1>
110
101
// / linalg.matmul
111
102
// / ins(%2, %3 : memref<16x32xf32, #map0>, memref<32x32xf32, #map1>)
112
103
// / outs(%0 : memref<16x32xf32, #map0>)
113
- // / scf.parallel (%arg6) = (%c0) to (%c32) step (%c8) {
114
- // / scf.for %arg7 = %c0 to %c32 step %c4 {
115
- // / %4 = subview %0[0, %arg7] [16, 4] [1, 1]
116
- // / : memref<16x32xf32, #map0> to memref<16x4xf32, #map0>
117
- // / %5 = subview %arg3[%arg7, %arg6] [4, 8] [1, 1]
118
- // / : memref<32x32xf32> to memref<4x8xf32, #map0>
119
- // / %6 = subview %1[0, %arg6] [16, 8] [1, 1]
120
- // / : memref<16x32xf32, #map0> to memref<16x8xf32, #map0>
121
- // / linalg.matmul
122
- // / ins(%4, %5 : memref<16x4xf32, #map0>, memref<4x8xf32, #map0>)
123
- // / outs(%6 : memref<16x8xf32, #map0>)
124
- // / }
125
- // / scf.yield
126
- // / }
127
- // / scf.yield
104
+ // / linalg.matmul
105
+ // / ins(%0, %4 : memref<16x4xf32, #map0>, memref<4x8xf32, #map0>)
106
+ // / outs(%1 : memref<16x8xf32, #map0>)
128
107
// / }
129
108
// /
130
- // / The following tiling options are handled differently in tile+fuse (compared
131
- // / to tile only)
109
+ // / `tilingOptions` are used to tile the corresponding operation in `ops` (the
110
+ // / size of the former should be same as size of the latter. Based on how
111
+ // / tile+fuse is implemented, the fused loops are generated based on the last
112
+ // / operation in the sequence. For example, the tile sizes for the fused loops
113
+ // / is obtained from `tilingOptions.back()`. The following tiling options are
114
+ // / handled differently in tile+fuse (compared to tile only)
132
115
// / - Interchange of the tiling loops is not supported right now.
133
- // / - Distribution is only done for the tile+fuse loops. The tiled loops
134
- // / generated by the second tiling is not distributed.
116
+ // / - Only the fused loops are distributed.
117
+ struct TiledAndFusedLinalgOps {
118
+ // / Operation obtained by tiling the last operation in sequence of `ops`
119
+ // / passed to `tileAndFuseLinalgOps`.
120
+ LinalgOp op;
121
+ // / The dimension of the loops that are fused.
122
+ std::set<unsigned > fusedLoopDims;
123
+ // / The generated fused operations (created within the fused loops).
124
+ SmallVector<LinalgOp, 1 > fusedProducers;
125
+ // / The fused loop generated.
126
+ SmallVector<Operation *, 4 > fusedLoops;
127
+ };
135
128
Optional<TiledAndFusedLinalgOps>
136
- tileAndFuseLinalgOps (PatternRewriter &rewriter, LinalgOp op ,
129
+ tileAndFuseLinalgOps (OpBuilder &builder, ArrayRef< LinalgOp> ops ,
137
130
const LinalgDependenceGraph &dependenceGraph,
138
- const LinalgTilingOptions &tilingOptions,
139
- const LinalgFusionOptions &fusionOptions);
131
+ const LinalgTilingOptions &tilingOptions);
140
132
141
133
// / Interchanges the `iterator_types` and `iterator_maps` dimensions of `op`.
142
134
// / This is an in-place transformation controlled by `interchangeVector`.
0 commit comments