@@ -161,18 +161,71 @@ void cloneOrMapRegionOutsiders(fir::FirOpBuilder &builder,
161
161
namespace {
162
162
namespace looputils {
163
163
// / Stores info needed about the induction/iteration variable for each `do
164
- // / concurrent` in a loop nest. This includes:
165
- // / * the operation allocating memory for iteration variable,
166
- // / * the operation(s) updating the iteration variable with the current
167
- // / iteration number.
164
+ // / concurrent` in a loop nest.
168
165
struct InductionVariableInfo {
166
+ // / The operation allocating memory for iteration variable.
169
167
mlir::Operation *iterVarMemDef;
168
+
169
+ // / the operation(s) updating the iteration variable with the current
170
+ // / iteration number.
170
171
llvm::SetVector<mlir::Operation *> indVarUpdateOps;
171
172
};
172
173
173
174
using LoopNestToIndVarMap =
174
175
llvm::MapVector<fir::DoLoopOp, InductionVariableInfo>;
175
176
177
+ // / For the \p doLoop parameter, find the operation that declares its iteration
178
+ // / variable or allocates memory for it.
179
+ // /
180
+ // / For example, give the following loop:
181
+ // / ```
182
+ // / ...
183
+ // / %i:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : ...
184
+ // / ...
185
+ // / fir.do_loop %ind_var = %lb to %ub step %s unordered {
186
+ // / %ind_var_conv = fir.convert %ind_var : (index) -> i32
187
+ // / fir.store %ind_var_conv to %i#1 : !fir.ref<i32>
188
+ // / ...
189
+ // / }
190
+ // / ```
191
+ // /
192
+ // / This function returns the `hlfir.declare` op for `%i`.
193
+ // /
194
+ // / Note: The current implementation is dependent on how flang emits loop
195
+ // / bodies; which is sufficient for the current simple test/use cases. If this
196
+ // / proves to be insufficient, this should be made more generic.
197
+ mlir::Operation *findLoopIterationVarMemDecl (fir::DoLoopOp doLoop) {
198
+ mlir::Value result = nullptr ;
199
+
200
+ // Checks if a StoreOp is updating the memref of the loop's iteration
201
+ // variable.
202
+ auto isStoringIV = [&](fir::StoreOp storeOp) {
203
+ // Direct store into the IV memref.
204
+ if (storeOp.getValue () == doLoop.getInductionVar ())
205
+ return true ;
206
+
207
+ // Indirect store into the IV memref.
208
+ if (auto convertOp = mlir::dyn_cast<fir::ConvertOp>(
209
+ storeOp.getValue ().getDefiningOp ())) {
210
+ if (convertOp.getOperand () == doLoop.getInductionVar ())
211
+ return true ;
212
+ }
213
+
214
+ return false ;
215
+ };
216
+
217
+ for (mlir::Operation &op : doLoop) {
218
+ if (auto storeOp = mlir::dyn_cast<fir::StoreOp>(op))
219
+ if (isStoringIV (storeOp)) {
220
+ result = storeOp.getMemref ();
221
+ break ;
222
+ }
223
+ }
224
+
225
+ assert (result != nullptr && result.getDefiningOp () != nullptr );
226
+ return result.getDefiningOp ();
227
+ }
228
+
176
229
// / Given an operation `op`, this returns true if `op`'s operand is ultimately
177
230
// / the loop's induction variable. Detecting this helps finding the live-in
178
231
// / value corresponding to the induction variable in case the induction variable
@@ -412,7 +465,7 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop,
412
465
loopNest.insert (
413
466
{currentLoop,
414
467
InductionVariableInfo{
415
- findLoopIndVarMemDecl (currentLoop),
468
+ findLoopIterationVarMemDecl (currentLoop),
416
469
std::move (looputils::extractIndVarUpdateOps (currentLoop))}});
417
470
llvm::SmallVector<fir::DoLoopOp> unorderedLoops;
418
471
@@ -715,6 +768,104 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
715
768
using LiveInShapeInfoMap =
716
769
llvm::DenseMap<mlir::Value, TargetDeclareShapeCreationInfo>;
717
770
771
+ mlir::omp::ParallelOp genParallelOp (mlir::Location loc,
772
+ mlir::ConversionPatternRewriter &rewriter,
773
+ looputils::LoopNestToIndVarMap &loopNest,
774
+ mlir::IRMapping &mapper) const {
775
+ auto parallelOp = rewriter.create <mlir::omp::ParallelOp>(loc);
776
+ rewriter.createBlock (¶llelOp.getRegion ());
777
+ rewriter.setInsertionPoint (rewriter.create <mlir::omp::TerminatorOp>(loc));
778
+
779
+ genLoopNestIndVarAllocs (rewriter, loopNest, mapper);
780
+ return parallelOp;
781
+ }
782
+
783
+ void genLoopNestIndVarAllocs (mlir::ConversionPatternRewriter &rewriter,
784
+ looputils::LoopNestToIndVarMap &loopNest,
785
+ mlir::IRMapping &mapper) const {
786
+
787
+ for (auto &[_, indVarInfo] : loopNest)
788
+ genInductionVariableAlloc (rewriter, indVarInfo.iterVarMemDef , mapper);
789
+ }
790
+
791
+ mlir::Operation *
792
+ genInductionVariableAlloc (mlir::ConversionPatternRewriter &rewriter,
793
+ mlir::Operation *indVarMemDef,
794
+ mlir::IRMapping &mapper) const {
795
+ assert (
796
+ indVarMemDef != nullptr &&
797
+ " Induction variable memdef is expected to have a defining operation." );
798
+
799
+ llvm::SmallSetVector<mlir::Operation *, 2 > indVarDeclareAndAlloc;
800
+ for (auto operand : indVarMemDef->getOperands ())
801
+ indVarDeclareAndAlloc.insert (operand.getDefiningOp ());
802
+ indVarDeclareAndAlloc.insert (indVarMemDef);
803
+
804
+ mlir::Operation *result;
805
+ for (mlir::Operation *opToClone : indVarDeclareAndAlloc)
806
+ result = rewriter.clone (*opToClone, mapper);
807
+
808
+ return result;
809
+ }
810
+
811
+ void genLoopNestClauseOps (
812
+ mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
813
+ looputils::LoopNestToIndVarMap &loopNest, mlir::IRMapping &mapper,
814
+ mlir::omp::LoopNestOperands &loopNestClauseOps,
815
+ mlir::omp::TargetOperands *targetClauseOps = nullptr ) const {
816
+ assert (loopNestClauseOps.loopLowerBounds .empty () &&
817
+ " Loop nest bounds were already emitted!" );
818
+
819
+ auto populateBounds = [](mlir::Value var,
820
+ llvm::SmallVectorImpl<mlir::Value> &bounds) {
821
+ bounds.push_back (var.getDefiningOp ()->getResult (0 ));
822
+ };
823
+
824
+ auto hostEvalCapture = [&](mlir::Value var,
825
+ llvm::SmallVectorImpl<mlir::Value> &bounds) {
826
+ populateBounds (var, bounds);
827
+
828
+ if (targetClauseOps)
829
+ targetClauseOps->hostEvalVars .push_back (var);
830
+ };
831
+
832
+ for (auto &[doLoop, _] : loopNest) {
833
+ hostEvalCapture (doLoop.getLowerBound (),
834
+ loopNestClauseOps.loopLowerBounds );
835
+ hostEvalCapture (doLoop.getUpperBound (),
836
+ loopNestClauseOps.loopUpperBounds );
837
+ hostEvalCapture (doLoop.getStep (), loopNestClauseOps.loopSteps );
838
+ }
839
+
840
+ loopNestClauseOps.loopInclusive = rewriter.getUnitAttr ();
841
+ }
842
+
843
+ mlir::omp::LoopNestOp
844
+ genWsLoopOp (mlir::ConversionPatternRewriter &rewriter, fir::DoLoopOp doLoop,
845
+ mlir::IRMapping &mapper,
846
+ const mlir::omp::LoopNestOperands &clauseOps,
847
+ bool isComposite) const {
848
+
849
+ auto wsloopOp = rewriter.create <mlir::omp::WsloopOp>(doLoop.getLoc ());
850
+ wsloopOp.setComposite (isComposite);
851
+ rewriter.createBlock (&wsloopOp.getRegion ());
852
+
853
+ auto loopNestOp =
854
+ rewriter.create <mlir::omp::LoopNestOp>(doLoop.getLoc (), clauseOps);
855
+
856
+ // Clone the loop's body inside the loop nest construct using the
857
+ // mapped values.
858
+ rewriter.cloneRegionBefore (doLoop.getRegion (), loopNestOp.getRegion (),
859
+ loopNestOp.getRegion ().begin (), mapper);
860
+
861
+ mlir::Operation *terminator = loopNestOp.getRegion ().back ().getTerminator ();
862
+ rewriter.setInsertionPointToEnd (&loopNestOp.getRegion ().back ());
863
+ rewriter.create <mlir::omp::YieldOp>(terminator->getLoc ());
864
+ rewriter.eraseOp (terminator);
865
+
866
+ return loopNestOp;
867
+ }
868
+
718
869
void
719
870
genBoundsOps (mlir::ConversionPatternRewriter &rewriter, mlir::Location loc,
720
871
mlir::Value shape, llvm::SmallVectorImpl<mlir::Value> &boundsOps,
@@ -983,51 +1134,6 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
983
1134
return teamsOp;
984
1135
}
985
1136
986
- void genLoopNestClauseOps (
987
- mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
988
- looputils::LoopNestToIndVarMap &loopNest, mlir::IRMapping &mapper,
989
- mlir::omp::LoopNestOperands &loopNestClauseOps,
990
- mlir::omp::TargetOperands *targetClauseOps = nullptr ) const {
991
- assert (loopNestClauseOps.loopLowerBounds .empty () &&
992
- " Loop nest bounds were already emitted!" );
993
-
994
- // Clones the chain of ops defining a certain loop bound or its step into
995
- // the parallel region. For example, if the value of a bound is defined by a
996
- // `fir.convert`op, this lambda clones the `fir.convert` as well as the
997
- // value it converts from. We do this since `omp.target` regions are
998
- // isolated from above.
999
- auto cloneBoundOrStepOpChain =
1000
- [&](mlir::Operation *operation) -> mlir::Operation * {
1001
- llvm::SetVector<mlir::Operation *> opChain;
1002
- looputils::collectIndirectConstOpChain (operation, opChain);
1003
-
1004
- mlir::Operation *result;
1005
- for (mlir::Operation *link : opChain)
1006
- result = rewriter.clone (*link, mapper);
1007
-
1008
- return result;
1009
- };
1010
-
1011
- auto hostEvalCapture = [&](mlir::Value var,
1012
- llvm::SmallVectorImpl<mlir::Value> &bounds) {
1013
- var = cloneBoundOrStepOpChain (var.getDefiningOp ())->getResult (0 );
1014
- bounds.push_back (var);
1015
-
1016
- if (targetClauseOps)
1017
- targetClauseOps->hostEvalVars .push_back (var);
1018
- };
1019
-
1020
- for (auto &[doLoop, _] : loopNest) {
1021
- hostEvalCapture (doLoop.getLowerBound (),
1022
- loopNestClauseOps.loopLowerBounds );
1023
- hostEvalCapture (doLoop.getUpperBound (),
1024
- loopNestClauseOps.loopUpperBounds );
1025
- hostEvalCapture (doLoop.getStep (), loopNestClauseOps.loopSteps );
1026
- }
1027
-
1028
- loopNestClauseOps.loopInclusive = rewriter.getUnitAttr ();
1029
- }
1030
-
1031
1137
mlir::omp::DistributeOp
1032
1138
genDistributeOp (mlir::Location loc,
1033
1139
mlir::ConversionPatternRewriter &rewriter) const {
@@ -1038,72 +1144,6 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
1038
1144
return distOp;
1039
1145
}
1040
1146
1041
- void genLoopNestIndVarAllocs (mlir::ConversionPatternRewriter &rewriter,
1042
- looputils::LoopNestToIndVarMap &loopNest,
1043
- mlir::IRMapping &mapper) const {
1044
-
1045
- for (auto &[_, indVarInfo] : loopNest)
1046
- genInductionVariableAlloc (rewriter, indVarInfo.iterVarMemDef , mapper);
1047
- }
1048
-
1049
- mlir::Operation *
1050
- genInductionVariableAlloc (mlir::ConversionPatternRewriter &rewriter,
1051
- mlir::Operation *indVarMemDef,
1052
- mlir::IRMapping &mapper) const {
1053
- assert (
1054
- indVarMemDef != nullptr &&
1055
- " Induction variable memdef is expected to have a defining operation." );
1056
-
1057
- llvm::SmallSetVector<mlir::Operation *, 2 > indVarDeclareAndAlloc;
1058
- for (auto operand : indVarMemDef->getOperands ())
1059
- indVarDeclareAndAlloc.insert (operand.getDefiningOp ());
1060
- indVarDeclareAndAlloc.insert (indVarMemDef);
1061
-
1062
- mlir::Operation *result;
1063
- for (mlir::Operation *opToClone : indVarDeclareAndAlloc)
1064
- result = rewriter.clone (*opToClone, mapper);
1065
-
1066
- return result;
1067
- }
1068
-
1069
- mlir::omp::ParallelOp genParallelOp (mlir::Location loc,
1070
- mlir::ConversionPatternRewriter &rewriter,
1071
- looputils::LoopNestToIndVarMap &loopNest,
1072
- mlir::IRMapping &mapper) const {
1073
- auto parallelOp = rewriter.create <mlir::omp::ParallelOp>(loc);
1074
- rewriter.createBlock (¶llelOp.getRegion ());
1075
- rewriter.setInsertionPoint (rewriter.create <mlir::omp::TerminatorOp>(loc));
1076
-
1077
- genLoopNestIndVarAllocs (rewriter, loopNest, mapper);
1078
- return parallelOp;
1079
- }
1080
-
1081
- mlir::omp::LoopNestOp
1082
- genWsLoopOp (mlir::ConversionPatternRewriter &rewriter, fir::DoLoopOp doLoop,
1083
- mlir::IRMapping &mapper,
1084
- const mlir::omp::LoopNestOperands &clauseOps,
1085
- bool isComposite) const {
1086
-
1087
- auto wsloopOp = rewriter.create <mlir::omp::WsloopOp>(doLoop.getLoc ());
1088
- wsloopOp.setComposite (isComposite);
1089
- rewriter.createBlock (&wsloopOp.getRegion ());
1090
-
1091
- auto loopNestOp =
1092
- rewriter.create <mlir::omp::LoopNestOp>(doLoop.getLoc (), clauseOps);
1093
-
1094
- // Clone the loop's body inside the loop nest construct using the
1095
- // mapped values.
1096
- rewriter.cloneRegionBefore (doLoop.getRegion (), loopNestOp.getRegion (),
1097
- loopNestOp.getRegion ().begin (), mapper);
1098
-
1099
- mlir::Operation *terminator = loopNestOp.getRegion ().back ().getTerminator ();
1100
- rewriter.setInsertionPointToEnd (&loopNestOp.getRegion ().back ());
1101
- rewriter.create <mlir::omp::YieldOp>(terminator->getLoc ());
1102
- rewriter.eraseOp (terminator);
1103
-
1104
- return loopNestOp;
1105
- }
1106
-
1107
1147
bool mapToDevice;
1108
1148
llvm::DenseSet<fir::DoLoopOp> &concurrentLoopsToSkip;
1109
1149
};
@@ -1152,8 +1192,6 @@ class DoConcurrentConversionPass
1152
1192
1153
1193
if (mlir::failed (mlir::applyFullConversion (getOperation (), target,
1154
1194
std::move (patterns)))) {
1155
- mlir::emitError (mlir::UnknownLoc::get (context),
1156
- " error in converting do-concurrent op" );
1157
1195
signalPassFailure ();
1158
1196
}
1159
1197
}
0 commit comments