@@ -30,6 +30,9 @@ namespace looputils {
30
30
struct InductionVariableInfo {
31
31
// / The operation allocating memory for iteration variable.
32
32
mlir::Operation *iterVarMemDef;
33
+ // / the operation(s) updating the iteration variable with the current
34
+ // / iteration number.
35
+ llvm::SetVector<mlir::Operation *> indVarUpdateOps;
33
36
};
34
37
35
38
using LoopNestToIndVarMap =
@@ -87,6 +90,47 @@ mlir::Operation *findLoopIterationVarMemDecl(fir::DoLoopOp doLoop) {
87
90
return result.getDefiningOp ();
88
91
}
89
92
93
+ // / Collects the op(s) responsible for updating a loop's iteration variable with
94
+ // / the current iteration number. For example, for the input IR:
95
+ // / ```
96
+ // / %i = fir.alloca i32 {bindc_name = "i"}
97
+ // / %i_decl:2 = hlfir.declare %i ...
98
+ // / ...
99
+ // / fir.do_loop %i_iv = %lb to %ub step %step unordered {
100
+ // / %1 = fir.convert %i_iv : (index) -> i32
101
+ // / fir.store %1 to %i_decl#1 : !fir.ref<i32>
102
+ // / ...
103
+ // / }
104
+ // / ```
105
+ // / this function would return the first 2 ops in the `fir.do_loop`'s region.
106
+ llvm::SetVector<mlir::Operation *>
107
+ extractIndVarUpdateOps (fir::DoLoopOp doLoop) {
108
+ mlir::Value indVar = doLoop.getInductionVar ();
109
+ llvm::SetVector<mlir::Operation *> indVarUpdateOps;
110
+
111
+ llvm::SmallVector<mlir::Value> toProcess;
112
+ toProcess.push_back (indVar);
113
+
114
+ llvm::DenseSet<mlir::Value> done;
115
+
116
+ while (!toProcess.empty ()) {
117
+ mlir::Value val = toProcess.back ();
118
+ toProcess.pop_back ();
119
+
120
+ if (!done.insert (val).second )
121
+ continue ;
122
+
123
+ for (mlir::Operation *user : val.getUsers ()) {
124
+ indVarUpdateOps.insert (user);
125
+
126
+ for (mlir::Value result : user->getResults ())
127
+ toProcess.push_back (result);
128
+ }
129
+ }
130
+
131
+ return std::move (indVarUpdateOps);
132
+ }
133
+
90
134
// / Loop \p innerLoop is considered perfectly-nested inside \p outerLoop iff
91
135
// / there are no operations in \p outerloop's body other than:
92
136
// /
@@ -183,7 +227,9 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop,
183
227
while (true ) {
184
228
loopNest.insert (
185
229
{currentLoop,
186
- InductionVariableInfo{findLoopIterationVarMemDecl (currentLoop)}});
230
+ InductionVariableInfo{
231
+ findLoopIterationVarMemDecl (currentLoop),
232
+ std::move (looputils::extractIndVarUpdateOps (currentLoop))}});
187
233
188
234
llvm::SmallVector<fir::DoLoopOp> unorderedLoops;
189
235
@@ -210,6 +256,96 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop,
210
256
211
257
return mlir::success ();
212
258
}
259
+
260
+ // / Prepares the `fir.do_loop` nest to be easily mapped to OpenMP. In
261
+ // / particular, this function would take this input IR:
262
+ // / ```
263
+ // / fir.do_loop %i_iv = %i_lb to %i_ub step %i_step unordered {
264
+ // / fir.store %i_iv to %i#1 : !fir.ref<i32>
265
+ // / %j_lb = arith.constant 1 : i32
266
+ // / %j_ub = arith.constant 10 : i32
267
+ // / %j_step = arith.constant 1 : index
268
+ // /
269
+ // / fir.do_loop %j_iv = %j_lb to %j_ub step %j_step unordered {
270
+ // / fir.store %j_iv to %j#1 : !fir.ref<i32>
271
+ // / ...
272
+ // / }
273
+ // / }
274
+ // / ```
275
+ // /
276
+ // / into the following form (using generic op form since the result is
277
+ // / technically an invalid `fir.do_loop` op:
278
+ // /
279
+ // / ```
280
+ // / "fir.do_loop"(%i_lb, %i_ub, %i_step) <{unordered}> ({
281
+ // / ^bb0(%i_iv: index):
282
+ // / %j_lb = "arith.constant"() <{value = 1 : i32}> : () -> i32
283
+ // / %j_ub = "arith.constant"() <{value = 10 : i32}> : () -> i32
284
+ // / %j_step = "arith.constant"() <{value = 1 : index}> : () -> index
285
+ // /
286
+ // / "fir.do_loop"(%j_lb, %j_ub, %j_step) <{unordered}> ({
287
+ // / ^bb0(%new_i_iv: index, %new_j_iv: index):
288
+ // / "fir.store"(%new_i_iv, %i#1) : (i32, !fir.ref<i32>) -> ()
289
+ // / "fir.store"(%new_j_iv, %j#1) : (i32, !fir.ref<i32>) -> ()
290
+ // / ...
291
+ // / })
292
+ // / ```
293
+ // /
294
+ // / What happened to the loop nest is the following:
295
+ // /
296
+ // / * the innermost loop's entry block was updated from having one operand to
297
+ // / having `n` operands where `n` is the number of loops in the nest,
298
+ // /
299
+ // / * the outer loop(s)' ops that update the IVs were sank inside the innermost
300
+ // / loop (see the `"fir.store"(%new_i_iv, %i#1)` op above),
301
+ // /
302
+ // / * the innermost loop's entry block's arguments were mapped in order from the
303
+ // / outermost to the innermost IV.
304
+ // /
305
+ // / With this IR change, we can directly inline the innermost loop's region into
306
+ // / the newly generated `omp.loop_nest` op.
307
+ // /
308
+ // / Note that this function has a pre-condition that \p loopNest consists of
309
+ // / perfectly nested loops; i.e. there are no in-between ops between 2 nested
310
+ // / loops except for the ops to setup the inner loop's LB, UB, and step. These
311
+ // / ops are handled/cloned by `genLoopNestClauseOps(..)`.
312
+ void sinkLoopIVArgs (mlir::ConversionPatternRewriter &rewriter,
313
+ looputils::LoopNestToIndVarMap &loopNest) {
314
+ if (loopNest.size () <= 1 )
315
+ return ;
316
+
317
+ fir::DoLoopOp innermostLoop = loopNest.back ().first ;
318
+ mlir::Operation &innermostFirstOp = innermostLoop.getRegion ().front ().front ();
319
+
320
+ llvm::SmallVector<mlir::Type> argTypes;
321
+ llvm::SmallVector<mlir::Location> argLocs;
322
+
323
+ for (auto &[doLoop, indVarInfo] : llvm::drop_end (loopNest)) {
324
+ // Sink the IV update ops to the innermost loop. We need to do for all loops
325
+ // except for the innermost one, hence the `drop_end` usage above.
326
+ for (mlir::Operation *op : indVarInfo.indVarUpdateOps )
327
+ op->moveBefore (&innermostFirstOp);
328
+
329
+ argTypes.push_back (doLoop.getInductionVar ().getType ());
330
+ argLocs.push_back (doLoop.getInductionVar ().getLoc ());
331
+ }
332
+
333
+ mlir::Region &innermmostRegion = innermostLoop.getRegion ();
334
+ // Extend the innermost entry block with arguments to represent the outer IVs.
335
+ innermmostRegion.addArguments (argTypes, argLocs);
336
+
337
+ unsigned idx = 1 ;
338
+ // In reverse, remap the IVs of the loop nest from the old values to the new
339
+ // ones. We do that in reverse since the first argument before this loop is
340
+ // the old IV for the innermost loop. Therefore, we want to replace it first
341
+ // before the old value (1st argument in the block) is remapped to be the IV
342
+ // of the outermost loop in the nest.
343
+ for (auto &[doLoop, _] : llvm::reverse (loopNest)) {
344
+ doLoop.getInductionVar ().replaceAllUsesWith (
345
+ innermmostRegion.getArgument (innermmostRegion.getNumArguments () - idx));
346
+ ++idx;
347
+ }
348
+ }
213
349
} // namespace looputils
214
350
215
351
class DoConcurrentConversion : public mlir ::OpConversionPattern<fir::DoLoopOp> {
@@ -236,6 +372,7 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
236
372
" Some `do concurent` loops are not perfectly-nested. "
237
373
" These will be serialized." );
238
374
375
+ looputils::sinkLoopIVArgs (rewriter, loopNest);
239
376
mlir::IRMapping mapper;
240
377
genParallelOp (doLoop.getLoc (), rewriter, loopNest, mapper);
241
378
mlir::omp::LoopNestOperands loopNestClauseOps;
0 commit comments