11
11
#include " flang/Optimizer/OpenMP/Utils.h"
12
12
#include " mlir/Analysis/SliceAnalysis.h"
13
13
#include " mlir/Dialect/OpenMP/OpenMPDialect.h"
14
+ #include " mlir/IR/IRMapping.h"
14
15
#include " mlir/Transforms/DialectConversion.h"
15
16
#include " mlir/Transforms/RegionUtils.h"
16
17
@@ -24,7 +25,82 @@ namespace flangomp {
24
25
25
26
namespace {
26
27
namespace looputils {
27
- using LoopNest = llvm::SetVector<fir::DoLoopOp>;
28
+ // / Stores info needed about the induction/iteration variable for each `do
29
+ // / concurrent` in a loop nest.
30
+ struct InductionVariableInfo {
31
+ // / the operation allocating memory for iteration variable,
32
+ mlir::Operation *iterVarMemDef;
33
+ };
34
+
35
+ using LoopNestToIndVarMap =
36
+ llvm::MapVector<fir::DoLoopOp, InductionVariableInfo>;
37
+
38
+ // / Given an operation `op`, this returns true if one of `op`'s operands is
39
+ // / "ultimately" the loop's induction variable. This helps in cases where the
40
+ // / induction variable's use is "hidden" behind a convert/cast.
41
+ // /
42
+ // / For example, give the following loop:
43
+ // / ```
44
+ // / fir.do_loop %ind_var = %lb to %ub step %s unordered {
45
+ // / %ind_var_conv = fir.convert %ind_var : (index) -> i32
46
+ // / fir.store %ind_var_conv to %i#1 : !fir.ref<i32>
47
+ // / ...
48
+ // / }
49
+ // / ```
50
+ // /
51
+ // / If \p op is the `fir.store` operation, then this function will return true
52
+ // / since the IV is the "ultimate" operand to the `fir.store` op through the
53
+ // / `%ind_var_conv` -> `%ind_var` conversion sequence.
54
+ // /
55
+ // / For why this is useful, see its use in `findLoopIndVarMemDecl`.
56
+ bool isIndVarUltimateOperand (mlir::Operation *op, fir::DoLoopOp doLoop) {
57
+ while (op != nullptr && op->getNumOperands () > 0 ) {
58
+ auto ivIt = llvm::find_if (op->getOperands (), [&](mlir::Value operand) {
59
+ return operand == doLoop.getInductionVar ();
60
+ });
61
+
62
+ if (ivIt != op->getOperands ().end ())
63
+ return true ;
64
+
65
+ op = op->getOperand (0 ).getDefiningOp ();
66
+ }
67
+
68
+ return false ;
69
+ }
70
+
71
+ // / For the \p doLoop parameter, find the operation that declares its iteration
72
+ // / variable or allocates memory for it.
73
+ // /
74
+ // / For example, give the following loop:
75
+ // / ```
76
+ // / ...
77
+ // / %i:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : ...
78
+ // / ...
79
+ // / fir.do_loop %ind_var = %lb to %ub step %s unordered {
80
+ // / %ind_var_conv = fir.convert %ind_var : (index) -> i32
81
+ // / fir.store %ind_var_conv to %i#1 : !fir.ref<i32>
82
+ // / ...
83
+ // / }
84
+ // / ```
85
+ // /
86
+ // / This function returns the `hlfir.declare` op for `%i`.
87
+ mlir::Operation *findLoopIterationVarMemDecl (fir::DoLoopOp doLoop) {
88
+ mlir::Value result = nullptr ;
89
+ mlir::visitUsedValuesDefinedAbove (
90
+ doLoop.getRegion (), [&](mlir::OpOperand *operand) {
91
+ if (result)
92
+ return ;
93
+
94
+ if (isIndVarUltimateOperand (operand->getOwner (), doLoop)) {
95
+ assert (result == nullptr &&
96
+ " loop can have only one induction variable" );
97
+ result = operand->get ();
98
+ }
99
+ });
100
+
101
+ assert (result != nullptr && result.getDefiningOp () != nullptr );
102
+ return result.getDefiningOp ();
103
+ }
28
104
29
105
// / Loop \p innerLoop is considered perfectly-nested inside \p outerLoop iff
30
106
// / there are no operations in \p outerloop's body other than:
@@ -116,11 +192,14 @@ bool isPerfectlyNested(fir::DoLoopOp outerLoop, fir::DoLoopOp innerLoop) {
116
192
// / fails to recognize a certain nested loop as part of the nest it just returns
117
193
// / the parent loops it discovered before.
118
194
mlir::LogicalResult collectLoopNest (fir::DoLoopOp currentLoop,
119
- LoopNest &loopNest) {
195
+ LoopNestToIndVarMap &loopNest) {
120
196
assert (currentLoop.getUnordered ());
121
197
122
198
while (true ) {
123
- loopNest.insert (currentLoop);
199
+ loopNest.insert (
200
+ {currentLoop,
201
+ InductionVariableInfo{findLoopIterationVarMemDecl (currentLoop)}});
202
+
124
203
llvm::SmallVector<fir::DoLoopOp> unorderedLoops;
125
204
126
205
for (auto nestedLoop : currentLoop.getRegion ().getOps <fir::DoLoopOp>())
@@ -152,26 +231,136 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
152
231
public:
153
232
using mlir::OpConversionPattern<fir::DoLoopOp>::OpConversionPattern;
154
233
155
- DoConcurrentConversion (mlir::MLIRContext *context, bool mapToDevice)
156
- : OpConversionPattern(context), mapToDevice(mapToDevice) {}
234
+ DoConcurrentConversion (mlir::MLIRContext *context, bool mapToDevice,
235
+ llvm::DenseSet<fir::DoLoopOp> &concurrentLoopsToSkip)
236
+ : OpConversionPattern(context), mapToDevice(mapToDevice),
237
+ concurrentLoopsToSkip (concurrentLoopsToSkip) {}
157
238
158
239
mlir::LogicalResult
159
240
matchAndRewrite (fir::DoLoopOp doLoop, OpAdaptor adaptor,
160
241
mlir::ConversionPatternRewriter &rewriter) const override {
161
- looputils::LoopNest loopNest;
242
+ looputils::LoopNestToIndVarMap loopNest;
162
243
bool hasRemainingNestedLoops =
163
244
failed (looputils::collectLoopNest (doLoop, loopNest));
164
245
if (hasRemainingNestedLoops)
165
246
mlir::emitWarning (doLoop.getLoc (),
166
247
" Some `do concurent` loops are not perfectly-nested. "
167
248
" These will be serialized." );
168
249
169
- // TODO This will be filled in with the next PRs that upstreams the rest of
170
- // the ROCm implementaion.
250
+ mlir::IRMapping mapper;
251
+ genParallelOp (doLoop.getLoc (), rewriter, loopNest, mapper);
252
+ mlir::omp::LoopNestOperands loopNestClauseOps;
253
+ genLoopNestClauseOps (doLoop.getLoc (), rewriter, loopNest, mapper,
254
+ loopNestClauseOps);
255
+
256
+ mlir::omp::LoopNestOp ompLoopNest =
257
+ genWsLoopOp (rewriter, loopNest.back ().first , mapper, loopNestClauseOps,
258
+ /* isComposite=*/ mapToDevice);
259
+
260
+ rewriter.eraseOp (doLoop);
261
+
262
+ // Mark `unordered` loops that are not perfectly nested to be skipped from
263
+ // the legality check of the `ConversionTarget` since we are not interested
264
+ // in mapping them to OpenMP.
265
+ ompLoopNest->walk ([&](fir::DoLoopOp doLoop) {
266
+ if (doLoop.getUnordered ()) {
267
+ concurrentLoopsToSkip.insert (doLoop);
268
+ }
269
+ });
270
+
171
271
return mlir::success ();
172
272
}
173
273
274
+ private:
275
+ mlir::omp::ParallelOp genParallelOp (mlir::Location loc,
276
+ mlir::ConversionPatternRewriter &rewriter,
277
+ looputils::LoopNestToIndVarMap &loopNest,
278
+ mlir::IRMapping &mapper) const {
279
+ auto parallelOp = rewriter.create <mlir::omp::ParallelOp>(loc);
280
+ rewriter.createBlock (¶llelOp.getRegion ());
281
+ rewriter.setInsertionPoint (rewriter.create <mlir::omp::TerminatorOp>(loc));
282
+
283
+ genLoopNestIndVarAllocs (rewriter, loopNest, mapper);
284
+ return parallelOp;
285
+ }
286
+
287
+ void genLoopNestIndVarAllocs (mlir::ConversionPatternRewriter &rewriter,
288
+ looputils::LoopNestToIndVarMap &loopNest,
289
+ mlir::IRMapping &mapper) const {
290
+
291
+ for (auto &[_, indVarInfo] : loopNest)
292
+ genInductionVariableAlloc (rewriter, indVarInfo.iterVarMemDef , mapper);
293
+ }
294
+
295
+ mlir::Operation *
296
+ genInductionVariableAlloc (mlir::ConversionPatternRewriter &rewriter,
297
+ mlir::Operation *indVarMemDef,
298
+ mlir::IRMapping &mapper) const {
299
+ assert (
300
+ indVarMemDef != nullptr &&
301
+ " Induction variable memdef is expected to have a defining operation." );
302
+
303
+ llvm::SmallSetVector<mlir::Operation *, 2 > indVarDeclareAndAlloc;
304
+ for (auto operand : indVarMemDef->getOperands ())
305
+ indVarDeclareAndAlloc.insert (operand.getDefiningOp ());
306
+ indVarDeclareAndAlloc.insert (indVarMemDef);
307
+
308
+ mlir::Operation *result;
309
+ for (mlir::Operation *opToClone : indVarDeclareAndAlloc)
310
+ result = rewriter.clone (*opToClone, mapper);
311
+
312
+ return result;
313
+ }
314
+
315
+ void genLoopNestClauseOps (
316
+ mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
317
+ looputils::LoopNestToIndVarMap &loopNest, mlir::IRMapping &mapper,
318
+ mlir::omp::LoopNestOperands &loopNestClauseOps) const {
319
+ assert (loopNestClauseOps.loopLowerBounds .empty () &&
320
+ " Loop nest bounds were already emitted!" );
321
+
322
+ auto populateBounds = [&](mlir::Value var,
323
+ llvm::SmallVectorImpl<mlir::Value> &bounds) {
324
+ bounds.push_back (var.getDefiningOp ()->getResult (0 ));
325
+ };
326
+
327
+ for (auto &[doLoop, _] : loopNest) {
328
+ populateBounds (doLoop.getLowerBound (), loopNestClauseOps.loopLowerBounds );
329
+ populateBounds (doLoop.getUpperBound (), loopNestClauseOps.loopUpperBounds );
330
+ populateBounds (doLoop.getStep (), loopNestClauseOps.loopSteps );
331
+ }
332
+
333
+ loopNestClauseOps.loopInclusive = rewriter.getUnitAttr ();
334
+ }
335
+
336
+ mlir::omp::LoopNestOp
337
+ genWsLoopOp (mlir::ConversionPatternRewriter &rewriter, fir::DoLoopOp doLoop,
338
+ mlir::IRMapping &mapper,
339
+ const mlir::omp::LoopNestOperands &clauseOps,
340
+ bool isComposite) const {
341
+
342
+ auto wsloopOp = rewriter.create <mlir::omp::WsloopOp>(doLoop.getLoc ());
343
+ wsloopOp.setComposite (isComposite);
344
+ rewriter.createBlock (&wsloopOp.getRegion ());
345
+
346
+ auto loopNestOp =
347
+ rewriter.create <mlir::omp::LoopNestOp>(doLoop.getLoc (), clauseOps);
348
+
349
+ // Clone the loop's body inside the loop nest construct using the
350
+ // mapped values.
351
+ rewriter.cloneRegionBefore (doLoop.getRegion (), loopNestOp.getRegion (),
352
+ loopNestOp.getRegion ().begin (), mapper);
353
+
354
+ mlir::Operation *terminator = loopNestOp.getRegion ().back ().getTerminator ();
355
+ rewriter.setInsertionPointToEnd (&loopNestOp.getRegion ().back ());
356
+ rewriter.create <mlir::omp::YieldOp>(terminator->getLoc ());
357
+ rewriter.eraseOp (terminator);
358
+
359
+ return loopNestOp;
360
+ }
361
+
174
362
bool mapToDevice;
363
+ llvm::DenseSet<fir::DoLoopOp> &concurrentLoopsToSkip;
175
364
};
176
365
177
366
class DoConcurrentConversionPass
@@ -200,16 +389,18 @@ class DoConcurrentConversionPass
200
389
return ;
201
390
}
202
391
392
+ llvm::DenseSet<fir::DoLoopOp> concurrentLoopsToSkip;
203
393
mlir::RewritePatternSet patterns (context);
204
394
patterns.insert <DoConcurrentConversion>(
205
- context, mapTo == flangomp::DoConcurrentMappingKind::DCMK_Device);
395
+ context, mapTo == flangomp::DoConcurrentMappingKind::DCMK_Device,
396
+ concurrentLoopsToSkip);
206
397
mlir::ConversionTarget target (*context);
207
398
target.addDynamicallyLegalOp <fir::DoLoopOp>([&](fir::DoLoopOp op) {
208
399
// The goal is to handle constructs that eventually get lowered to
209
400
// `fir.do_loop` with the `unordered` attribute (e.g. array expressions).
210
401
// Currently, this is only enabled for the `do concurrent` construct since
211
402
// the pass runs early in the pipeline.
212
- return !op.getUnordered ();
403
+ return !op.getUnordered () || concurrentLoopsToSkip. contains (op) ;
213
404
});
214
405
target.markUnknownOpDynamicallyLegal (
215
406
[](mlir::Operation *) { return true ; });
0 commit comments