Skip to content

Commit 202917f

Browse files
committed
[Flang] Move genMinMaxlocReductionLoop to a common location.
The shared library build doesn't like references of genMinMaxlocReductionLoop, in Optimizer/Transforms, from HLFIR/Optimizer/Transforms. For the moment I've moved the code to the header file where it can be shared, like other methods in Utils.h
1 parent b0b7be2 commit 202917f

File tree

2 files changed

+128
-135
lines changed

2 files changed

+128
-135
lines changed

flang/include/flang/Optimizer/Support/Utils.h

Lines changed: 128 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "flang/Optimizer/Builder/Todo.h"
1919
#include "flang/Optimizer/Dialect/FIROps.h"
2020
#include "flang/Optimizer/Dialect/FIRType.h"
21+
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
2122
#include "flang/Optimizer/Support/FatalError.h"
2223
#include "mlir/Dialect/Arith/IR/Arith.h"
2324
#include "mlir/Dialect/Func/IR/FuncOps.h"
@@ -144,13 +145,133 @@ using AddrGeneratorTy = llvm::function_ref<mlir::Value(
144145
mlir::Value)>;
145146

146147
// Produces a loop nest for a Minloc intrinsic.
147-
void genMinMaxlocReductionLoop(fir::FirOpBuilder &builder, mlir::Value array,
148-
InitValGeneratorTy initVal,
149-
MinlocBodyOpGeneratorTy genBody,
150-
fir::AddrGeneratorTy getAddrFn, unsigned rank,
151-
mlir::Type elementType, mlir::Location loc,
152-
mlir::Type maskElemType, mlir::Value resultArr,
153-
bool maskMayBeLogicalScalar);
148+
inline void genMinMaxlocReductionLoop(
149+
fir::FirOpBuilder &builder, mlir::Value array,
150+
fir::InitValGeneratorTy initVal, fir::MinlocBodyOpGeneratorTy genBody,
151+
fir::AddrGeneratorTy getAddrFn, unsigned rank, mlir::Type elementType,
152+
mlir::Location loc, mlir::Type maskElemType, mlir::Value resultArr,
153+
bool maskMayBeLogicalScalar) {
154+
mlir::IndexType idxTy = builder.getIndexType();
155+
156+
mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);
157+
158+
fir::SequenceType::Shape flatShape(rank,
159+
fir::SequenceType::getUnknownExtent());
160+
mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType);
161+
mlir::Type boxArrTy = fir::BoxType::get(arrTy);
162+
array = builder.create<fir::ConvertOp>(loc, boxArrTy, array);
163+
164+
mlir::Type resultElemType = hlfir::getFortranElementType(resultArr.getType());
165+
mlir::Value flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
166+
mlir::Value zero = builder.createIntegerConstant(loc, resultElemType, 0);
167+
mlir::Value flagRef = builder.createTemporary(loc, resultElemType);
168+
builder.create<fir::StoreOp>(loc, zero, flagRef);
169+
170+
mlir::Value init = initVal(builder, loc, elementType);
171+
llvm::SmallVector<mlir::Value, Fortran::common::maxRank> bounds;
172+
173+
assert(rank > 0 && "rank cannot be zero");
174+
mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
175+
176+
// Compute all the upper bounds before the loop nest.
177+
// It is not strictly necessary for performance, since the loop nest
178+
// does not have any store operations and any LICM optimization
179+
// should be able to optimize the redundancy.
180+
for (unsigned i = 0; i < rank; ++i) {
181+
mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i);
182+
auto dims =
183+
builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, array, dimIdx);
184+
mlir::Value len = dims.getResult(1);
185+
// We use C indexing here, so len-1 as loopcount
186+
mlir::Value loopCount = builder.create<mlir::arith::SubIOp>(loc, len, one);
187+
bounds.push_back(loopCount);
188+
}
189+
// Create a loop nest consisting of OP operations.
190+
// Collect the loops' induction variables into indices array,
191+
// which will be used in the innermost loop to load the input
192+
// array's element.
193+
// The loops are generated such that the innermost loop processes
194+
// the 0 dimension.
195+
llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices;
196+
for (unsigned i = rank; 0 < i; --i) {
197+
mlir::Value step = one;
198+
mlir::Value loopCount = bounds[i - 1];
199+
auto loop =
200+
builder.create<fir::DoLoopOp>(loc, zeroIdx, loopCount, step, false,
201+
/*finalCountValue=*/false, init);
202+
init = loop.getRegionIterArgs()[0];
203+
indices.push_back(loop.getInductionVar());
204+
// Set insertion point to the loop body so that the next loop
205+
// is inserted inside the current one.
206+
builder.setInsertionPointToStart(loop.getBody());
207+
}
208+
209+
// Reverse the indices such that they are ordered as:
210+
// <dim-0-idx, dim-1-idx, ...>
211+
std::reverse(indices.begin(), indices.end());
212+
mlir::Value reductionVal =
213+
genBody(builder, loc, elementType, array, flagRef, init, indices);
214+
215+
// Unwind the loop nest and insert ResultOp on each level
216+
// to return the updated value of the reduction to the enclosing
217+
// loops.
218+
for (unsigned i = 0; i < rank; ++i) {
219+
auto result = builder.create<fir::ResultOp>(loc, reductionVal);
220+
// Proceed to the outer loop.
221+
auto loop = mlir::cast<fir::DoLoopOp>(result->getParentOp());
222+
reductionVal = loop.getResult(0);
223+
// Set insertion point after the loop operation that we have
224+
// just processed.
225+
builder.setInsertionPointAfter(loop.getOperation());
226+
}
227+
// End of loop nest. The insertion point is after the outermost loop.
228+
if (maskMayBeLogicalScalar) {
229+
if (fir::IfOp ifOp =
230+
mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp())) {
231+
builder.create<fir::ResultOp>(loc, reductionVal);
232+
builder.setInsertionPointAfter(ifOp);
233+
// Redefine flagSet to escape scope of ifOp
234+
flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
235+
reductionVal = ifOp.getResult(0);
236+
}
237+
}
238+
239+
// Check for case where array was full of max values.
240+
// flag will be 0 if mask was never true, 1 if mask was true as some point,
241+
// this is needed to avoid catching cases where we didn't access any elements
242+
// e.g. mask=.FALSE.
243+
mlir::Value flagValue =
244+
builder.create<fir::LoadOp>(loc, resultElemType, flagRef);
245+
mlir::Value flagCmp = builder.create<mlir::arith::CmpIOp>(
246+
loc, mlir::arith::CmpIPredicate::eq, flagValue, flagSet);
247+
fir::IfOp ifMaskTrueOp =
248+
builder.create<fir::IfOp>(loc, flagCmp, /*withElseRegion=*/false);
249+
builder.setInsertionPointToStart(&ifMaskTrueOp.getThenRegion().front());
250+
251+
mlir::Value testInit = initVal(builder, loc, elementType);
252+
fir::IfOp ifMinSetOp;
253+
if (elementType.isa<mlir::FloatType>()) {
254+
mlir::Value cmp = builder.create<mlir::arith::CmpFOp>(
255+
loc, mlir::arith::CmpFPredicate::OEQ, testInit, reductionVal);
256+
ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
257+
/*withElseRegion*/ false);
258+
} else {
259+
mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
260+
loc, mlir::arith::CmpIPredicate::eq, testInit, reductionVal);
261+
ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
262+
/*withElseRegion*/ false);
263+
}
264+
builder.setInsertionPointToStart(&ifMinSetOp.getThenRegion().front());
265+
266+
// Load output array with 1s instead of 0s
267+
for (unsigned int i = 0; i < rank; ++i) {
268+
mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
269+
mlir::Value resultElemAddr =
270+
getAddrFn(builder, loc, resultElemType, resultArr, index);
271+
builder.create<fir::StoreOp>(loc, flagSet, resultElemAddr);
272+
}
273+
builder.setInsertionPointAfter(ifMaskTrueOp);
274+
}
154275

155276
} // namespace fir
156277

flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp

Lines changed: 0 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -353,134 +353,6 @@ genReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
353353
builder.create<mlir::func::ReturnOp>(loc, results[resultIndex]);
354354
}
355355

356-
void fir::genMinMaxlocReductionLoop(
357-
fir::FirOpBuilder &builder, mlir::Value array,
358-
fir::InitValGeneratorTy initVal, fir::MinlocBodyOpGeneratorTy genBody,
359-
fir::AddrGeneratorTy getAddrFn, unsigned rank, mlir::Type elementType,
360-
mlir::Location loc, mlir::Type maskElemType, mlir::Value resultArr,
361-
bool maskMayBeLogicalScalar) {
362-
mlir::IndexType idxTy = builder.getIndexType();
363-
364-
mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);
365-
366-
fir::SequenceType::Shape flatShape(rank,
367-
fir::SequenceType::getUnknownExtent());
368-
mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType);
369-
mlir::Type boxArrTy = fir::BoxType::get(arrTy);
370-
array = builder.create<fir::ConvertOp>(loc, boxArrTy, array);
371-
372-
mlir::Type resultElemType = hlfir::getFortranElementType(resultArr.getType());
373-
mlir::Value flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
374-
mlir::Value zero = builder.createIntegerConstant(loc, resultElemType, 0);
375-
mlir::Value flagRef = builder.createTemporary(loc, resultElemType);
376-
builder.create<fir::StoreOp>(loc, zero, flagRef);
377-
378-
mlir::Value init = initVal(builder, loc, elementType);
379-
llvm::SmallVector<mlir::Value, Fortran::common::maxRank> bounds;
380-
381-
assert(rank > 0 && "rank cannot be zero");
382-
mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
383-
384-
// Compute all the upper bounds before the loop nest.
385-
// It is not strictly necessary for performance, since the loop nest
386-
// does not have any store operations and any LICM optimization
387-
// should be able to optimize the redundancy.
388-
for (unsigned i = 0; i < rank; ++i) {
389-
mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i);
390-
auto dims =
391-
builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, array, dimIdx);
392-
mlir::Value len = dims.getResult(1);
393-
// We use C indexing here, so len-1 as loopcount
394-
mlir::Value loopCount = builder.create<mlir::arith::SubIOp>(loc, len, one);
395-
bounds.push_back(loopCount);
396-
}
397-
// Create a loop nest consisting of OP operations.
398-
// Collect the loops' induction variables into indices array,
399-
// which will be used in the innermost loop to load the input
400-
// array's element.
401-
// The loops are generated such that the innermost loop processes
402-
// the 0 dimension.
403-
llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices;
404-
for (unsigned i = rank; 0 < i; --i) {
405-
mlir::Value step = one;
406-
mlir::Value loopCount = bounds[i - 1];
407-
auto loop =
408-
builder.create<fir::DoLoopOp>(loc, zeroIdx, loopCount, step, false,
409-
/*finalCountValue=*/false, init);
410-
init = loop.getRegionIterArgs()[0];
411-
indices.push_back(loop.getInductionVar());
412-
// Set insertion point to the loop body so that the next loop
413-
// is inserted inside the current one.
414-
builder.setInsertionPointToStart(loop.getBody());
415-
}
416-
417-
// Reverse the indices such that they are ordered as:
418-
// <dim-0-idx, dim-1-idx, ...>
419-
std::reverse(indices.begin(), indices.end());
420-
mlir::Value reductionVal =
421-
genBody(builder, loc, elementType, array, flagRef, init, indices);
422-
423-
// Unwind the loop nest and insert ResultOp on each level
424-
// to return the updated value of the reduction to the enclosing
425-
// loops.
426-
for (unsigned i = 0; i < rank; ++i) {
427-
auto result = builder.create<fir::ResultOp>(loc, reductionVal);
428-
// Proceed to the outer loop.
429-
auto loop = mlir::cast<fir::DoLoopOp>(result->getParentOp());
430-
reductionVal = loop.getResult(0);
431-
// Set insertion point after the loop operation that we have
432-
// just processed.
433-
builder.setInsertionPointAfter(loop.getOperation());
434-
}
435-
// End of loop nest. The insertion point is after the outermost loop.
436-
if (maskMayBeLogicalScalar) {
437-
if (fir::IfOp ifOp =
438-
mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp())) {
439-
builder.create<fir::ResultOp>(loc, reductionVal);
440-
builder.setInsertionPointAfter(ifOp);
441-
// Redefine flagSet to escape scope of ifOp
442-
flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
443-
reductionVal = ifOp.getResult(0);
444-
}
445-
}
446-
447-
// Check for case where array was full of max values.
448-
// flag will be 0 if mask was never true, 1 if mask was true as some point,
449-
// this is needed to avoid catching cases where we didn't access any elements
450-
// e.g. mask=.FALSE.
451-
mlir::Value flagValue =
452-
builder.create<fir::LoadOp>(loc, resultElemType, flagRef);
453-
mlir::Value flagCmp = builder.create<mlir::arith::CmpIOp>(
454-
loc, mlir::arith::CmpIPredicate::eq, flagValue, flagSet);
455-
fir::IfOp ifMaskTrueOp =
456-
builder.create<fir::IfOp>(loc, flagCmp, /*withElseRegion=*/false);
457-
builder.setInsertionPointToStart(&ifMaskTrueOp.getThenRegion().front());
458-
459-
mlir::Value testInit = initVal(builder, loc, elementType);
460-
fir::IfOp ifMinSetOp;
461-
if (elementType.isa<mlir::FloatType>()) {
462-
mlir::Value cmp = builder.create<mlir::arith::CmpFOp>(
463-
loc, mlir::arith::CmpFPredicate::OEQ, testInit, reductionVal);
464-
ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
465-
/*withElseRegion*/ false);
466-
} else {
467-
mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
468-
loc, mlir::arith::CmpIPredicate::eq, testInit, reductionVal);
469-
ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
470-
/*withElseRegion*/ false);
471-
}
472-
builder.setInsertionPointToStart(&ifMinSetOp.getThenRegion().front());
473-
474-
// Load output array with 1s instead of 0s
475-
for (unsigned int i = 0; i < rank; ++i) {
476-
mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
477-
mlir::Value resultElemAddr =
478-
getAddrFn(builder, loc, resultElemType, resultArr, index);
479-
builder.create<fir::StoreOp>(loc, flagSet, resultElemAddr);
480-
}
481-
builder.setInsertionPointAfter(ifMaskTrueOp);
482-
}
483-
484356
static llvm::SmallVector<mlir::Value> nopLoopCond(fir::FirOpBuilder &builder,
485357
mlir::Location loc,
486358
mlir::Value reductionVal) {

0 commit comments

Comments
 (0)