Skip to content

Commit 5f268d0

Browse files
authored
[flang] Code generation for fir.pack/unpack_array. (#132080)
The code generation relies on `ShallowCopyDirect` runtime to copy data between the original and the temporary arrays (both directions). The allocations are done by the compiler generated code. The heap allocations could have been passed to `ShallowCopy` runtime, but I decided to expose the allocations so that the temporary descriptor passed to `ShallowCopyDirect` has `nocapture` - maybe this will be better for LLVM optimizations.
1 parent 0ac8cb1 commit 5f268d0

File tree

15 files changed

+1665
-79
lines changed

15 files changed

+1665
-79
lines changed

flang/include/flang/Optimizer/Builder/FIRBuilder.h

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,40 @@ class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener {
268268
mlir::ValueRange lenParams = {},
269269
llvm::ArrayRef<mlir::NamedAttribute> attrs = {});
270270

271+
/// Sample genDeclare callback for createArrayTemp() below.
272+
/// It creates fir.declare operation using the given operands.
273+
/// \p memref is the base of the allocated temporary,
274+
/// which may be !fir.ref<!fir.array<>> or !fir.ref<!fir.box/class<>>.
275+
static mlir::Value genTempDeclareOp(fir::FirOpBuilder &builder,
276+
mlir::Location loc, mlir::Value memref,
277+
llvm::StringRef name, mlir::Value shape,
278+
llvm::ArrayRef<mlir::Value> typeParams,
279+
fir::FortranVariableFlagsAttr attrs);
280+
281+
/// Create a temporary array with the given \p arrayType,
282+
/// \p shape, \p extents and \p typeParams. An optional
283+
/// \p polymorphicMold specifies the entity which dynamic type
284+
/// has to be used for the allocation.
285+
/// \p genDeclare callback generates a declare operation
286+
/// for the created temporary. FIR passes may use genTempDeclareOp()
287+
/// function above that creates fir.declare.
288+
/// HLFIR passes may provide their own callback that generates
289+
/// hlfir.declare. Some passes may provide a callback that
290+
/// just passes through the base of the temporary.
291+
/// If \p useStack is true, the function will try to do the allocation
292+
/// in stack memory (which is not always possible currently).
293+
/// The first return value is the base of the temporary object,
294+
/// which may be !fir.ref<!fir.array<>> or !fir.ref<!fir.box/class<>>.
295+
/// The second return value is true, if the actual allocation
296+
/// was done in heap memory.
297+
std::pair<mlir::Value, bool>
298+
createArrayTemp(mlir::Location loc, fir::SequenceType arrayType,
299+
mlir::Value shape, llvm::ArrayRef<mlir::Value> extents,
300+
llvm::ArrayRef<mlir::Value> typeParams,
301+
const std::function<decltype(genTempDeclareOp)> &genDeclare,
302+
mlir::Value polymorphicMold, bool useStack = false,
303+
llvm::StringRef tmpName = ".tmp.array");
304+
271305
/// Create an LLVM stack save intrinsic op. Returns the saved stack pointer.
272306
/// The stack address space is fetched from the data layout of the current
273307
/// module.
@@ -596,6 +630,15 @@ class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener {
596630
return result;
597631
}
598632

633+
/// Compare two pointer-like values using the given predicate.
634+
mlir::Value genPtrCompare(mlir::Location loc,
635+
mlir::arith::CmpIPredicate predicate,
636+
mlir::Value ptr1, mlir::Value ptr2) {
637+
ptr1 = createConvert(loc, getIndexType(), ptr1);
638+
ptr2 = createConvert(loc, getIndexType(), ptr2);
639+
return create<mlir::arith::CmpIOp>(loc, predicate, ptr1, ptr2);
640+
}
641+
599642
private:
600643
/// Set attributes (e.g. FastMathAttr) to \p op operation
601644
/// based on the current attributes setting.
@@ -850,6 +893,17 @@ llvm::SmallVector<mlir::Value> deduceOptimalExtents(mlir::ValueRange extents1,
850893
/// %result1 = arith.select %p4, %c0, %e1 : index
851894
llvm::SmallVector<mlir::Value> updateRuntimeExtentsForEmptyArrays(
852895
fir::FirOpBuilder &builder, mlir::Location loc, mlir::ValueRange extents);
896+
897+
/// Given \p box of type fir::BaseBoxType representing an array,
898+
/// the function generates code to fetch the lower bounds,
899+
/// the extents and the strides from the box. The values are returned via
900+
/// \p lbounds, \p extents and \p strides.
901+
void genDimInfoFromBox(fir::FirOpBuilder &builder, mlir::Location loc,
902+
mlir::Value box,
903+
llvm::SmallVectorImpl<mlir::Value> *lbounds,
904+
llvm::SmallVectorImpl<mlir::Value> *extents,
905+
llvm::SmallVectorImpl<mlir::Value> *strides);
906+
853907
} // namespace fir::factory
854908

855909
#endif // FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H

flang/include/flang/Optimizer/CodeGen/CGPasses.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,4 +99,15 @@ def BoxedProcedurePass : Pass<"boxed-procedure", "mlir::ModuleOp"> {
9999
];
100100
}
101101

102+
def LowerRepackArraysPass : Pass<"lower-repack-arrays", "mlir::ModuleOp"> {
103+
let summary = "Convert fir.pack/unpack_array to other FIR operations";
104+
let description = [{
105+
Convert fir.pack/unpack_array operations to other FIR operations
106+
and Fortran runtime calls that implement the semantics
107+
of packing/unpacking.
108+
}];
109+
let dependentDialects = ["fir::FIROpsDialect", "mlir::arith::ArithDialect",
110+
"mlir::func::FuncDialect"];
111+
}
112+
102113
#endif // FORTRAN_OPTIMIZER_CODEGEN_FIR_PASSES

flang/include/flang/Optimizer/CodeGen/CodeGen.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ struct NameUniquer;
2626
#define GEN_PASS_DECL_CODEGENREWRITE
2727
#define GEN_PASS_DECL_TARGETREWRITEPASS
2828
#define GEN_PASS_DECL_BOXEDPROCEDUREPASS
29+
#define GEN_PASS_DECL_LOWERREPACKARRAYSPASS
2930
#include "flang/Optimizer/CodeGen/CGPasses.h.inc"
3031

3132
/// FIR to LLVM translation pass options.

flang/lib/Optimizer/Builder/FIRBuilder.cpp

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "flang/Optimizer/Builder/Character.h"
1212
#include "flang/Optimizer/Builder/Complex.h"
1313
#include "flang/Optimizer/Builder/MutableBox.h"
14+
#include "flang/Optimizer/Builder/Runtime/Allocatable.h"
1415
#include "flang/Optimizer/Builder/Runtime/Assign.h"
1516
#include "flang/Optimizer/Builder/Runtime/Derived.h"
1617
#include "flang/Optimizer/Builder/Todo.h"
@@ -362,6 +363,72 @@ mlir::Value fir::FirOpBuilder::createHeapTemporary(
362363
name, dynamicLength, dynamicShape, attrs);
363364
}
364365

366+
std::pair<mlir::Value, bool> fir::FirOpBuilder::createArrayTemp(
367+
mlir::Location loc, fir::SequenceType arrayType, mlir::Value shape,
368+
llvm::ArrayRef<mlir::Value> extents, llvm::ArrayRef<mlir::Value> typeParams,
369+
const std::function<decltype(FirOpBuilder::genTempDeclareOp)> &genDeclare,
370+
mlir::Value polymorphicMold, bool useStack, llvm::StringRef tmpName) {
371+
if (polymorphicMold) {
372+
// Create *allocated* polymorphic temporary using the dynamic type
373+
// of the mold and the provided shape/extents. The created temporary
374+
// array will be written element per element, that is why it has to be
375+
// allocated.
376+
mlir::Type boxHeapType = fir::HeapType::get(arrayType);
377+
mlir::Value alloc = fir::factory::genNullBoxStorage(
378+
*this, loc, fir::ClassType::get(boxHeapType));
379+
fir::FortranVariableFlagsAttr declAttrs =
380+
fir::FortranVariableFlagsAttr::get(
381+
getContext(), fir::FortranVariableFlagsEnum::allocatable);
382+
383+
mlir::Value base = genDeclare(*this, loc, alloc, tmpName,
384+
/*shape=*/nullptr, typeParams, declAttrs);
385+
386+
int rank = extents.size();
387+
fir::runtime::genAllocatableApplyMold(*this, loc, alloc, polymorphicMold,
388+
rank);
389+
if (!extents.empty()) {
390+
mlir::Type idxTy = getIndexType();
391+
mlir::Value one = createIntegerConstant(loc, idxTy, 1);
392+
unsigned dim = 0;
393+
for (mlir::Value extent : extents) {
394+
mlir::Value dimIndex = createIntegerConstant(loc, idxTy, dim++);
395+
fir::runtime::genAllocatableSetBounds(*this, loc, alloc, dimIndex, one,
396+
extent);
397+
}
398+
}
399+
if (!typeParams.empty()) {
400+
// We should call AllocatableSetDerivedLength() here.
401+
// TODO: does the mold provide the length parameters or
402+
// the operation itself or should they be in sync?
403+
TODO(loc, "polymorphic type with length parameters");
404+
}
405+
fir::runtime::genAllocatableAllocate(*this, loc, alloc);
406+
407+
return {base, /*isHeapAllocation=*/true};
408+
}
409+
mlir::Value allocmem;
410+
if (useStack)
411+
allocmem = createTemporary(loc, arrayType, tmpName, extents, typeParams);
412+
else
413+
allocmem =
414+
createHeapTemporary(loc, arrayType, tmpName, extents, typeParams);
415+
mlir::Value base = genDeclare(*this, loc, allocmem, tmpName, shape,
416+
typeParams, fir::FortranVariableFlagsAttr{});
417+
return {base, !useStack};
418+
}
419+
420+
mlir::Value fir::FirOpBuilder::genTempDeclareOp(
421+
fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value memref,
422+
llvm::StringRef name, mlir::Value shape,
423+
llvm::ArrayRef<mlir::Value> typeParams,
424+
fir::FortranVariableFlagsAttr fortranAttrs) {
425+
auto nameAttr = mlir::StringAttr::get(builder.getContext(), name);
426+
return builder.create<fir::DeclareOp>(loc, memref.getType(), memref, shape,
427+
typeParams,
428+
/*dummy_scope=*/nullptr, nameAttr,
429+
fortranAttrs, cuf::DataAttributeAttr{});
430+
}
431+
365432
mlir::Value fir::FirOpBuilder::genStackSave(mlir::Location loc) {
366433
mlir::Type voidPtr = mlir::LLVM::LLVMPointerType::get(
367434
getContext(), fir::factory::getAllocaAddressSpace(&getDataLayout()));
@@ -1825,3 +1892,29 @@ llvm::SmallVector<mlir::Value> fir::factory::updateRuntimeExtentsForEmptyArrays(
18251892
}
18261893
return newExtents;
18271894
}
1895+
1896+
void fir::factory::genDimInfoFromBox(
1897+
fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value box,
1898+
llvm::SmallVectorImpl<mlir::Value> *lbounds,
1899+
llvm::SmallVectorImpl<mlir::Value> *extents,
1900+
llvm::SmallVectorImpl<mlir::Value> *strides) {
1901+
auto boxType = mlir::dyn_cast<fir::BaseBoxType>(box.getType());
1902+
assert(boxType && "must be a box");
1903+
if (!lbounds && !extents && !strides)
1904+
return;
1905+
1906+
unsigned rank = fir::getBoxRank(boxType);
1907+
assert(rank != 0 && "must be an array of known rank");
1908+
mlir::Type idxTy = builder.getIndexType();
1909+
for (unsigned i = 0; i < rank; ++i) {
1910+
mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
1911+
auto dimInfo =
1912+
builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, box, dim);
1913+
if (lbounds)
1914+
lbounds->push_back(dimInfo.getLowerBound());
1915+
if (extents)
1916+
extents->push_back(dimInfo.getExtent());
1917+
if (strides)
1918+
strides->push_back(dimInfo.getByteStride());
1919+
}
1920+
}

flang/lib/Optimizer/Builder/HLFIRTools.cpp

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -95,24 +95,6 @@ getExplicitLbounds(fir::FortranVariableOpInterface var) {
9595
return {};
9696
}
9797

98-
static void
99-
genLboundsAndExtentsFromBox(mlir::Location loc, fir::FirOpBuilder &builder,
100-
hlfir::Entity boxEntity,
101-
llvm::SmallVectorImpl<mlir::Value> &lbounds,
102-
llvm::SmallVectorImpl<mlir::Value> *extents) {
103-
assert(mlir::isa<fir::BaseBoxType>(boxEntity.getType()) && "must be a box");
104-
mlir::Type idxTy = builder.getIndexType();
105-
const int rank = boxEntity.getRank();
106-
for (int i = 0; i < rank; ++i) {
107-
mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
108-
auto dimInfo = builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy,
109-
boxEntity, dim);
110-
lbounds.push_back(dimInfo.getLowerBound());
111-
if (extents)
112-
extents->push_back(dimInfo.getExtent());
113-
}
114-
}
115-
11698
static llvm::SmallVector<mlir::Value>
11799
getNonDefaultLowerBounds(mlir::Location loc, fir::FirOpBuilder &builder,
118100
hlfir::Entity entity) {
@@ -128,8 +110,8 @@ getNonDefaultLowerBounds(mlir::Location loc, fir::FirOpBuilder &builder,
128110
if (entity.isMutableBox())
129111
entity = hlfir::derefPointersAndAllocatables(loc, builder, entity);
130112
llvm::SmallVector<mlir::Value> lowerBounds;
131-
genLboundsAndExtentsFromBox(loc, builder, entity, lowerBounds,
132-
/*extents=*/nullptr);
113+
fir::factory::genDimInfoFromBox(builder, loc, entity, &lowerBounds,
114+
/*extents=*/nullptr, /*strides=*/nullptr);
133115
return lowerBounds;
134116
}
135117

@@ -1149,8 +1131,8 @@ static fir::ExtendedValue translateVariableToExtendedValue(
11491131
variable.mayHaveNonDefaultLowerBounds()) {
11501132
// This special case avoids generating two sets of identical
11511133
// fir.box_dim to get both the lower bounds and extents.
1152-
genLboundsAndExtentsFromBox(loc, builder, variable, nonDefaultLbounds,
1153-
&extents);
1134+
fir::factory::genDimInfoFromBox(builder, loc, variable, &nonDefaultLbounds,
1135+
&extents, /*strides=*/nullptr);
11541136
} else {
11551137
extents = getVariableExtents(loc, builder, variable);
11561138
nonDefaultLbounds = getNonDefaultLowerBounds(loc, builder, variable);

flang/lib/Optimizer/CodeGen/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ add_flang_library(FIRCodeGen
44
CodeGen.cpp
55
CodeGenOpenMP.cpp
66
FIROpPatterns.cpp
7+
LowerRepackArrays.cpp
78
PreCGRewrite.cpp
89
TBAABuilder.cpp
910
Target.cpp

0 commit comments

Comments
 (0)