@@ -245,6 +245,96 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop,
245
245
246
246
return mlir::success ();
247
247
}
248
+
249
+ // / Prepares the `fir.do_loop` nest to be easily mapped to OpenMP. In
250
+ // / particular, this function would take this input IR:
251
+ // / ```
252
+ // / fir.do_loop %i_iv = %i_lb to %i_ub step %i_step unordered {
253
+ // / fir.store %i_iv to %i#1 : !fir.ref<i32>
254
+ // / %j_lb = arith.constant 1 : i32
255
+ // / %j_ub = arith.constant 10 : i32
256
+ // / %j_step = arith.constant 1 : index
257
+ // /
258
+ // / fir.do_loop %j_iv = %j_lb to %j_ub step %j_step unordered {
259
+ // / fir.store %j_iv to %j#1 : !fir.ref<i32>
260
+ // / ...
261
+ // / }
262
+ // / }
263
+ // / ```
264
+ // /
265
+ // / into the following form (using generic op form since the result is
266
+ // / technically an invalid `fir.do_loop` op:
267
+ // /
268
+ // / ```
269
+ // / "fir.do_loop"(%i_lb, %i_ub, %i_step) <{unordered}> ({
270
+ // / ^bb0(%i_iv: index):
271
+ // / %j_lb = "arith.constant"() <{value = 1 : i32}> : () -> i32
272
+ // / %j_ub = "arith.constant"() <{value = 10 : i32}> : () -> i32
273
+ // / %j_step = "arith.constant"() <{value = 1 : index}> : () -> index
274
+ // /
275
+ // / "fir.do_loop"(%j_lb, %j_ub, %j_step) <{unordered}> ({
276
+ // / ^bb0(%new_i_iv: index, %new_j_iv: index):
277
+ // / "fir.store"(%new_i_iv, %i#1) : (i32, !fir.ref<i32>) -> ()
278
+ // / "fir.store"(%new_j_iv, %j#1) : (i32, !fir.ref<i32>) -> ()
279
+ // / ...
280
+ // / })
281
+ // / ```
282
+ // /
283
+ // / What happened to the loop nest is the following:
284
+ // /
285
+ // / * the innermost loop's entry block was updated from having one operand to
286
+ // / having `n` operands where `n` is the number of loops in the nest,
287
+ // /
288
+ // / * the outer loop(s)' ops that update the IVs were sank inside the innermost
289
+ // / loop (see the `"fir.store"(%new_i_iv, %i#1)` op above),
290
+ // /
291
+ // / * the innermost loop's entry block's arguments were mapped in order from the
292
+ // / outermost to the innermost IV.
293
+ // /
294
+ // / With this IR change, we can directly inline the innermost loop's region into
295
+ // / the newly generated `omp.loop_nest` op.
296
+ // /
297
+ // / Note that this function has a pre-condition that \p loopNest consists of
298
+ // / perfectly nested loops; i.e. there are no in-between ops between 2 nested
299
+ // / loops except for the ops to setup the inner loop's LB, UB, and step. These
300
+ // / ops are handled/cloned by `genLoopNestClauseOps(..)`.
301
+ void sinkLoopIVArgs (mlir::ConversionPatternRewriter &rewriter,
302
+ looputils::LoopNestToIndVarMap &loopNest) {
303
+ if (loopNest.size () <= 1 )
304
+ return ;
305
+
306
+ fir::DoLoopOp innermostLoop = loopNest.back ().first ;
307
+ mlir::Operation &innermostFirstOp = innermostLoop.getRegion ().front ().front ();
308
+
309
+ llvm::SmallVector<mlir::Type> argTypes;
310
+ llvm::SmallVector<mlir::Location> argLocs;
311
+
312
+ for (auto &[doLoop, indVarInfo] : llvm::drop_end (loopNest)) {
313
+ // Sink the IV update ops to the innermost loop. We need to do for all loops
314
+ // except for the innermost one, hence the `drop_end` usage above.
315
+ for (mlir::Operation *op : indVarInfo.indVarUpdateOps )
316
+ op->moveBefore (&innermostFirstOp);
317
+
318
+ argTypes.push_back (doLoop.getInductionVar ().getType ());
319
+ argLocs.push_back (doLoop.getInductionVar ().getLoc ());
320
+ }
321
+
322
+ mlir::Region &innermmostRegion = innermostLoop.getRegion ();
323
+ // Extend the innermost entry block with arguments to represent the outer IVs.
324
+ innermmostRegion.addArguments (argTypes, argLocs);
325
+
326
+ unsigned idx = 1 ;
327
+ // In reverse, remap the IVs of the loop nest from the old values to the new
328
+ // ones. We do that in reverse since the first argument before this loop is
329
+ // the old IV for the innermost loop. Therefore, we want to replace it first
330
+ // before the old value (1st argument in the block) is remapped to be the IV
331
+ // of the outermost loop in the nest.
332
+ for (auto &[doLoop, _] : llvm::reverse (loopNest)) {
333
+ doLoop.getInductionVar ().replaceAllUsesWith (
334
+ innermmostRegion.getArgument (innermmostRegion.getNumArguments () - idx));
335
+ ++idx;
336
+ }
337
+ }
248
338
} // namespace looputils
249
339
250
340
class DoConcurrentConversion : public mlir ::OpConversionPattern<fir::DoLoopOp> {
@@ -267,6 +357,7 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
267
357
" Some `do concurent` loops are not perfectly-nested. "
268
358
" These will be serialzied." );
269
359
360
+ looputils::sinkLoopIVArgs (rewriter, loopNest);
270
361
mlir::IRMapping mapper;
271
362
genParallelOp (doLoop.getLoc (), rewriter, loopNest, mapper);
272
363
mlir::omp::LoopNestOperands loopNestClauseOps;
0 commit comments