Skip to content

Commit 815a846

Browse files
authored
[Flang] Move genMinMaxlocReductionLoop to Transforms/Utils.cpp (#81380)
This is one option for attempting to move genMinMaxlocReductionLoop to a better location. It moves it into Transforms and makes HLFIRTranforms depend upon FIRTransforms. It passes a build locally, both with and without -DBUILD_SHARED_LIBS, and does OK on the windows CI.
1 parent 070848c commit 815a846

File tree

6 files changed

+170
-142
lines changed

6 files changed

+170
-142
lines changed

flang/include/flang/Optimizer/Support/Utils.h

Lines changed: 0 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
#include "flang/Optimizer/Builder/Todo.h"
1919
#include "flang/Optimizer/Dialect/FIROps.h"
2020
#include "flang/Optimizer/Dialect/FIRType.h"
21-
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
2221
#include "flang/Optimizer/Support/FatalError.h"
2322
#include "mlir/Dialect/Arith/IR/Arith.h"
2423
#include "mlir/Dialect/Func/IR/FuncOps.h"
@@ -135,144 +134,6 @@ inline void intrinsicTypeTODO(fir::FirOpBuilder &builder, mlir::Type type,
135134
" in " + intrinsicName);
136135
}
137136

138-
using MinlocBodyOpGeneratorTy = llvm::function_ref<mlir::Value(
139-
fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
140-
mlir::Value, mlir::Value, const llvm::SmallVectorImpl<mlir::Value> &)>;
141-
using InitValGeneratorTy = llvm::function_ref<mlir::Value(
142-
fir::FirOpBuilder &, mlir::Location, const mlir::Type &)>;
143-
using AddrGeneratorTy = llvm::function_ref<mlir::Value(
144-
fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
145-
mlir::Value)>;
146-
147-
// Produces a loop nest for a Minloc intrinsic.
148-
inline void genMinMaxlocReductionLoop(
149-
fir::FirOpBuilder &builder, mlir::Value array,
150-
fir::InitValGeneratorTy initVal, fir::MinlocBodyOpGeneratorTy genBody,
151-
fir::AddrGeneratorTy getAddrFn, unsigned rank, mlir::Type elementType,
152-
mlir::Location loc, mlir::Type maskElemType, mlir::Value resultArr,
153-
bool maskMayBeLogicalScalar) {
154-
mlir::IndexType idxTy = builder.getIndexType();
155-
156-
mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);
157-
158-
fir::SequenceType::Shape flatShape(rank,
159-
fir::SequenceType::getUnknownExtent());
160-
mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType);
161-
mlir::Type boxArrTy = fir::BoxType::get(arrTy);
162-
array = builder.create<fir::ConvertOp>(loc, boxArrTy, array);
163-
164-
mlir::Type resultElemType = hlfir::getFortranElementType(resultArr.getType());
165-
mlir::Value flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
166-
mlir::Value zero = builder.createIntegerConstant(loc, resultElemType, 0);
167-
mlir::Value flagRef = builder.createTemporary(loc, resultElemType);
168-
builder.create<fir::StoreOp>(loc, zero, flagRef);
169-
170-
mlir::Value init = initVal(builder, loc, elementType);
171-
llvm::SmallVector<mlir::Value, Fortran::common::maxRank> bounds;
172-
173-
assert(rank > 0 && "rank cannot be zero");
174-
mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
175-
176-
// Compute all the upper bounds before the loop nest.
177-
// It is not strictly necessary for performance, since the loop nest
178-
// does not have any store operations and any LICM optimization
179-
// should be able to optimize the redundancy.
180-
for (unsigned i = 0; i < rank; ++i) {
181-
mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i);
182-
auto dims =
183-
builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, array, dimIdx);
184-
mlir::Value len = dims.getResult(1);
185-
// We use C indexing here, so len-1 as loopcount
186-
mlir::Value loopCount = builder.create<mlir::arith::SubIOp>(loc, len, one);
187-
bounds.push_back(loopCount);
188-
}
189-
// Create a loop nest consisting of OP operations.
190-
// Collect the loops' induction variables into indices array,
191-
// which will be used in the innermost loop to load the input
192-
// array's element.
193-
// The loops are generated such that the innermost loop processes
194-
// the 0 dimension.
195-
llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices;
196-
for (unsigned i = rank; 0 < i; --i) {
197-
mlir::Value step = one;
198-
mlir::Value loopCount = bounds[i - 1];
199-
auto loop =
200-
builder.create<fir::DoLoopOp>(loc, zeroIdx, loopCount, step, false,
201-
/*finalCountValue=*/false, init);
202-
init = loop.getRegionIterArgs()[0];
203-
indices.push_back(loop.getInductionVar());
204-
// Set insertion point to the loop body so that the next loop
205-
// is inserted inside the current one.
206-
builder.setInsertionPointToStart(loop.getBody());
207-
}
208-
209-
// Reverse the indices such that they are ordered as:
210-
// <dim-0-idx, dim-1-idx, ...>
211-
std::reverse(indices.begin(), indices.end());
212-
mlir::Value reductionVal =
213-
genBody(builder, loc, elementType, array, flagRef, init, indices);
214-
215-
// Unwind the loop nest and insert ResultOp on each level
216-
// to return the updated value of the reduction to the enclosing
217-
// loops.
218-
for (unsigned i = 0; i < rank; ++i) {
219-
auto result = builder.create<fir::ResultOp>(loc, reductionVal);
220-
// Proceed to the outer loop.
221-
auto loop = mlir::cast<fir::DoLoopOp>(result->getParentOp());
222-
reductionVal = loop.getResult(0);
223-
// Set insertion point after the loop operation that we have
224-
// just processed.
225-
builder.setInsertionPointAfter(loop.getOperation());
226-
}
227-
// End of loop nest. The insertion point is after the outermost loop.
228-
if (maskMayBeLogicalScalar) {
229-
if (fir::IfOp ifOp =
230-
mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp())) {
231-
builder.create<fir::ResultOp>(loc, reductionVal);
232-
builder.setInsertionPointAfter(ifOp);
233-
// Redefine flagSet to escape scope of ifOp
234-
flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
235-
reductionVal = ifOp.getResult(0);
236-
}
237-
}
238-
239-
// Check for case where array was full of max values.
240-
// flag will be 0 if mask was never true, 1 if mask was true as some point,
241-
// this is needed to avoid catching cases where we didn't access any elements
242-
// e.g. mask=.FALSE.
243-
mlir::Value flagValue =
244-
builder.create<fir::LoadOp>(loc, resultElemType, flagRef);
245-
mlir::Value flagCmp = builder.create<mlir::arith::CmpIOp>(
246-
loc, mlir::arith::CmpIPredicate::eq, flagValue, flagSet);
247-
fir::IfOp ifMaskTrueOp =
248-
builder.create<fir::IfOp>(loc, flagCmp, /*withElseRegion=*/false);
249-
builder.setInsertionPointToStart(&ifMaskTrueOp.getThenRegion().front());
250-
251-
mlir::Value testInit = initVal(builder, loc, elementType);
252-
fir::IfOp ifMinSetOp;
253-
if (elementType.isa<mlir::FloatType>()) {
254-
mlir::Value cmp = builder.create<mlir::arith::CmpFOp>(
255-
loc, mlir::arith::CmpFPredicate::OEQ, testInit, reductionVal);
256-
ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
257-
/*withElseRegion*/ false);
258-
} else {
259-
mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
260-
loc, mlir::arith::CmpIPredicate::eq, testInit, reductionVal);
261-
ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
262-
/*withElseRegion*/ false);
263-
}
264-
builder.setInsertionPointToStart(&ifMinSetOp.getThenRegion().front());
265-
266-
// Load output array with 1s instead of 0s
267-
for (unsigned int i = 0; i < rank; ++i) {
268-
mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
269-
mlir::Value resultElemAddr =
270-
getAddrFn(builder, loc, resultElemType, resultArr, index);
271-
builder.create<fir::StoreOp>(loc, flagSet, resultElemAddr);
272-
}
273-
builder.setInsertionPointAfter(ifMaskTrueOp);
274-
}
275-
276137
inline fir::CUDADataAttributeAttr
277138
getCUDADataAttribute(mlir::MLIRContext *mlirContext,
278139
std::optional<Fortran::common::CUDADataAttr> cudaAttr) {
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
//===-- Optimizer/Transforms/Utils.h ----------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H
14+
#define FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H
15+
16+
namespace fir {
17+
18+
using MinlocBodyOpGeneratorTy = llvm::function_ref<mlir::Value(
19+
fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
20+
mlir::Value, mlir::Value, const llvm::SmallVectorImpl<mlir::Value> &)>;
21+
using InitValGeneratorTy = llvm::function_ref<mlir::Value(
22+
fir::FirOpBuilder &, mlir::Location, const mlir::Type &)>;
23+
using AddrGeneratorTy = llvm::function_ref<mlir::Value(
24+
fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
25+
mlir::Value)>;
26+
27+
// Produces a loop nest for a Minloc intrinsic.
28+
void genMinMaxlocReductionLoop(fir::FirOpBuilder &builder, mlir::Value array,
29+
fir::InitValGeneratorTy initVal,
30+
fir::MinlocBodyOpGeneratorTy genBody,
31+
fir::AddrGeneratorTy getAddrFn, unsigned rank,
32+
mlir::Type elementType, mlir::Location loc,
33+
mlir::Type maskElemType, mlir::Value resultArr,
34+
bool maskMayBeLogicalScalar);
35+
36+
} // namespace fir
37+
38+
#endif // FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H

flang/lib/Optimizer/Dialect/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ add_flang_library(FIRDialect
1313
CanonicalizationPatternsIncGen
1414
MLIRIR
1515
FIROpsIncGen
16-
HLFIROpsIncGen
1716
intrinsics_gen
1817

1918
LINK_LIBS

flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ add_flang_library(HLFIRTransforms
2121
FIRBuilder
2222
FIRDialectSupport
2323
FIRSupport
24+
FIRTransforms
2425
HLFIRDialect
2526
MLIRIR
2627
${dialect_libs}

flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
2121
#include "flang/Optimizer/HLFIR/HLFIROps.h"
2222
#include "flang/Optimizer/HLFIR/Passes.h"
23-
#include "flang/Optimizer/Support/Utils.h"
23+
#include "flang/Optimizer/Transforms/Utils.h"
2424
#include "mlir/Dialect/Func/IR/FuncOps.h"
2525
#include "mlir/IR/Dominance.h"
2626
#include "mlir/IR/PatternMatch.h"

flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp

Lines changed: 130 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@
3131
#include "flang/Optimizer/Dialect/FIRType.h"
3232
#include "flang/Optimizer/Dialect/Support/FIRContext.h"
3333
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
34-
#include "flang/Optimizer/Support/Utils.h"
3534
#include "flang/Optimizer/Transforms/Passes.h"
35+
#include "flang/Optimizer/Transforms/Utils.h"
3636
#include "flang/Runtime/entry-names.h"
3737
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
3838
#include "mlir/IR/Matchers.h"
@@ -558,6 +558,135 @@ static mlir::FunctionType genRuntimeMinlocType(fir::FirOpBuilder &builder,
558558
{boxRefType, boxType, boxType}, {});
559559
}
560560

561+
// Produces a loop nest for a Minloc intrinsic.
562+
void fir::genMinMaxlocReductionLoop(
563+
fir::FirOpBuilder &builder, mlir::Value array,
564+
fir::InitValGeneratorTy initVal, fir::MinlocBodyOpGeneratorTy genBody,
565+
fir::AddrGeneratorTy getAddrFn, unsigned rank, mlir::Type elementType,
566+
mlir::Location loc, mlir::Type maskElemType, mlir::Value resultArr,
567+
bool maskMayBeLogicalScalar) {
568+
mlir::IndexType idxTy = builder.getIndexType();
569+
570+
mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);
571+
572+
fir::SequenceType::Shape flatShape(rank,
573+
fir::SequenceType::getUnknownExtent());
574+
mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType);
575+
mlir::Type boxArrTy = fir::BoxType::get(arrTy);
576+
array = builder.create<fir::ConvertOp>(loc, boxArrTy, array);
577+
578+
mlir::Type resultElemType = hlfir::getFortranElementType(resultArr.getType());
579+
mlir::Value flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
580+
mlir::Value zero = builder.createIntegerConstant(loc, resultElemType, 0);
581+
mlir::Value flagRef = builder.createTemporary(loc, resultElemType);
582+
builder.create<fir::StoreOp>(loc, zero, flagRef);
583+
584+
mlir::Value init = initVal(builder, loc, elementType);
585+
llvm::SmallVector<mlir::Value, Fortran::common::maxRank> bounds;
586+
587+
assert(rank > 0 && "rank cannot be zero");
588+
mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
589+
590+
// Compute all the upper bounds before the loop nest.
591+
// It is not strictly necessary for performance, since the loop nest
592+
// does not have any store operations and any LICM optimization
593+
// should be able to optimize the redundancy.
594+
for (unsigned i = 0; i < rank; ++i) {
595+
mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i);
596+
auto dims =
597+
builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, array, dimIdx);
598+
mlir::Value len = dims.getResult(1);
599+
// We use C indexing here, so len-1 as loopcount
600+
mlir::Value loopCount = builder.create<mlir::arith::SubIOp>(loc, len, one);
601+
bounds.push_back(loopCount);
602+
}
603+
// Create a loop nest consisting of OP operations.
604+
// Collect the loops' induction variables into indices array,
605+
// which will be used in the innermost loop to load the input
606+
// array's element.
607+
// The loops are generated such that the innermost loop processes
608+
// the 0 dimension.
609+
llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices;
610+
for (unsigned i = rank; 0 < i; --i) {
611+
mlir::Value step = one;
612+
mlir::Value loopCount = bounds[i - 1];
613+
auto loop =
614+
builder.create<fir::DoLoopOp>(loc, zeroIdx, loopCount, step, false,
615+
/*finalCountValue=*/false, init);
616+
init = loop.getRegionIterArgs()[0];
617+
indices.push_back(loop.getInductionVar());
618+
// Set insertion point to the loop body so that the next loop
619+
// is inserted inside the current one.
620+
builder.setInsertionPointToStart(loop.getBody());
621+
}
622+
623+
// Reverse the indices such that they are ordered as:
624+
// <dim-0-idx, dim-1-idx, ...>
625+
std::reverse(indices.begin(), indices.end());
626+
mlir::Value reductionVal =
627+
genBody(builder, loc, elementType, array, flagRef, init, indices);
628+
629+
// Unwind the loop nest and insert ResultOp on each level
630+
// to return the updated value of the reduction to the enclosing
631+
// loops.
632+
for (unsigned i = 0; i < rank; ++i) {
633+
auto result = builder.create<fir::ResultOp>(loc, reductionVal);
634+
// Proceed to the outer loop.
635+
auto loop = mlir::cast<fir::DoLoopOp>(result->getParentOp());
636+
reductionVal = loop.getResult(0);
637+
// Set insertion point after the loop operation that we have
638+
// just processed.
639+
builder.setInsertionPointAfter(loop.getOperation());
640+
}
641+
// End of loop nest. The insertion point is after the outermost loop.
642+
if (maskMayBeLogicalScalar) {
643+
if (fir::IfOp ifOp =
644+
mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp())) {
645+
builder.create<fir::ResultOp>(loc, reductionVal);
646+
builder.setInsertionPointAfter(ifOp);
647+
// Redefine flagSet to escape scope of ifOp
648+
flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
649+
reductionVal = ifOp.getResult(0);
650+
}
651+
}
652+
653+
// Check for case where array was full of max values.
654+
// flag will be 0 if mask was never true, 1 if mask was true as some point,
655+
// this is needed to avoid catching cases where we didn't access any elements
656+
// e.g. mask=.FALSE.
657+
mlir::Value flagValue =
658+
builder.create<fir::LoadOp>(loc, resultElemType, flagRef);
659+
mlir::Value flagCmp = builder.create<mlir::arith::CmpIOp>(
660+
loc, mlir::arith::CmpIPredicate::eq, flagValue, flagSet);
661+
fir::IfOp ifMaskTrueOp =
662+
builder.create<fir::IfOp>(loc, flagCmp, /*withElseRegion=*/false);
663+
builder.setInsertionPointToStart(&ifMaskTrueOp.getThenRegion().front());
664+
665+
mlir::Value testInit = initVal(builder, loc, elementType);
666+
fir::IfOp ifMinSetOp;
667+
if (elementType.isa<mlir::FloatType>()) {
668+
mlir::Value cmp = builder.create<mlir::arith::CmpFOp>(
669+
loc, mlir::arith::CmpFPredicate::OEQ, testInit, reductionVal);
670+
ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
671+
/*withElseRegion*/ false);
672+
} else {
673+
mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
674+
loc, mlir::arith::CmpIPredicate::eq, testInit, reductionVal);
675+
ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
676+
/*withElseRegion*/ false);
677+
}
678+
builder.setInsertionPointToStart(&ifMinSetOp.getThenRegion().front());
679+
680+
// Load output array with 1s instead of 0s
681+
for (unsigned int i = 0; i < rank; ++i) {
682+
mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
683+
mlir::Value resultElemAddr =
684+
getAddrFn(builder, loc, resultElemType, resultArr, index);
685+
builder.create<fir::StoreOp>(loc, flagSet, resultElemAddr);
686+
}
687+
builder.setInsertionPointAfter(ifMaskTrueOp);
688+
}
689+
561690
static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder,
562691
mlir::func::FuncOp &funcOp, bool isMax,
563692
unsigned rank, int maskRank,

0 commit comments

Comments
 (0)