Skip to content

[flang-rt] Added ShallowCopy API. #131702

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions flang-rt/lib/runtime/transformational.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "flang-rt/runtime/descriptor.h"
#include "flang-rt/runtime/terminator.h"
#include "flang-rt/runtime/tools.h"
#include "flang-rt/runtime/type-info.h"
#include "flang/Common/float128.h"

namespace Fortran::runtime {
Expand Down Expand Up @@ -323,6 +324,71 @@ static inline RT_API_ATTRS void DoBesselYnX0(Descriptor &result, int32_t n1,
}
}

static inline RT_API_ATTRS void CheckConformabilityForShallowCopy(
const Descriptor &d1, const Descriptor &d2, Terminator &terminator,
const char *funcName, const char *d1Name, const char *d2Name) {
if (d1.rank() != d2.rank()) {
terminator.Crash(
"Incompatible arguments to %s: %s has rank %d, %s has rank %d",
funcName, d1Name, d1.rank(), d1Name, d2.rank());
}

// Check that the shapes conform.
CheckConformability(d1, d2, terminator, funcName, d1Name, d2Name);

if (d1.ElementBytes() != d2.ElementBytes()) {
terminator.Crash("Incompatible arguments to %s: %s has element byte length "
"%zd, %s has length %zd",
funcName, d1Name, d1.ElementBytes(), d2Name, d2.ElementBytes());
}
if (d1.type() != d2.type()) {
terminator.Crash("Incompatible arguments to %s: %s has type code %d, %s "
"has type code %d",
funcName, d1Name, d1.type(), d2Name, d2.type());
}
const DescriptorAddendum *d1Addendum{d1.Addendum()};
const typeInfo::DerivedType *d1Derived{
d1Addendum ? d1Addendum->derivedType() : nullptr};
const DescriptorAddendum *d2Addendum{d2.Addendum()};
const typeInfo::DerivedType *d2Derived{
d2Addendum ? d2Addendum->derivedType() : nullptr};
if (d1Derived != d2Derived) {
terminator.Crash(
"Incompatible arguments to %s: %s and %s have different derived types",
funcName, d1Name, d2Name);
}
if (d2Derived) {
// Compare LEN parameters.
std::size_t lenParms{d2Derived->LenParameters()};
for (std::size_t j{0}; j < lenParms; ++j) {
if (d1Addendum->LenParameterValue(j) !=
d2Addendum->LenParameterValue(j)) {
terminator.Crash("Incompatible arguments to %s: type length parameter "
"%zd for %s is %zd, for %s is %zd",
funcName, j, d1Name,
static_cast<std::size_t>(d1Addendum->LenParameterValue(j)), d2Name,
static_cast<std::size_t>(d2Addendum->LenParameterValue(j)));
}
}
}
}

template <bool IS_ALLOCATING>
static inline RT_API_ATTRS void DoShallowCopy(
std::conditional_t<IS_ALLOCATING, Descriptor, const Descriptor> &result,
const Descriptor &source, Terminator &terminator, const char *funcName) {
if constexpr (IS_ALLOCATING) {
SubscriptValue extent[maxRank];
source.GetShape(extent);
AllocateResult(result, source, source.rank(), extent, terminator, funcName);
} else {
CheckConformabilityForShallowCopy(
result, source, terminator, funcName, "RESULT=", "SOURCE=");
}

ShallowCopy(result, source);
}

extern "C" {
RT_EXT_API_GROUP_BEGIN

Expand Down Expand Up @@ -815,6 +881,19 @@ void RTDEF(Reshape)(Descriptor &result, const Descriptor &source,
}
}

// ShallowCopy
void RTDEF(ShallowCopy)(Descriptor &result, const Descriptor &source,
const char *sourceFile, int line) {
Terminator terminator{sourceFile, line};
DoShallowCopy<true>(result, source, terminator, "ShallowCopy");
}

void RTDEF(ShallowCopyDirect)(const Descriptor &result,
const Descriptor &source, const char *sourceFile, int line) {
Terminator terminator{sourceFile, line};
DoShallowCopy<false>(result, source, terminator, "ShallowCopyDirect");
}

// SPREAD
void RTDEF(Spread)(Descriptor &result, const Descriptor &source, int dim,
std::int64_t ncopies, const char *sourceFile, int line) {
Expand Down
64 changes: 64 additions & 0 deletions flang-rt/unittests/Runtime/Transformational.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -550,3 +550,67 @@ TEST(Transformational, TransposeReal10) {
result.Destroy();
}
#endif

TEST(Transformational, ShallowCopy) {
auto charArray{MakeArray<TypeCategory::Character, 1>(std::vector<int>{2, 3},
std::vector<std::string>{"ab", "cd", "ef", "gh", "ij", "kl"}, 2)};
charArray->GetDimension(0).SetBounds(-1, 0);
charArray->GetDimension(1).SetBounds(3, 5);
StaticDescriptor<2> staticCharResult;
Descriptor &charResult{staticCharResult.descriptor()};

// Test allocating ShallowCopy.
RTNAME(ShallowCopy)(charResult, *charArray);
ASSERT_TRUE(charResult.IsAllocated());
ASSERT_TRUE(charResult.IsContiguous());
ASSERT_EQ(charResult.type(), charArray->type());
ASSERT_EQ(charResult.ElementBytes(), 2u);
EXPECT_EQ(charResult.GetDimension(0).LowerBound(), 1);
EXPECT_EQ(charResult.GetDimension(0).Extent(), 2);
EXPECT_EQ(charResult.GetDimension(1).LowerBound(), 1);
EXPECT_EQ(charResult.GetDimension(1).Extent(), 3);
std::string expectedCharResult{"abcdefghijkl"};
EXPECT_EQ(std::memcmp(charResult.OffsetElement<char>(0),
expectedCharResult.data(), expectedCharResult.length()),
0);

// Test ShallowCopyDirect with pre-allocated result.
char *allocatedPtr = charResult.OffsetElement<char>(0);
std::memset(
charResult.OffsetElement<char>(0), 'x', expectedCharResult.length());
// Set new lower bounds for charResult.
charResult.GetDimension(0).SetBounds(-2, -1);
charResult.GetDimension(1).SetBounds(2, 4);
RTNAME(ShallowCopyDirect)(charResult, *charArray);
ASSERT_TRUE(charResult.IsAllocated());
ASSERT_TRUE(charResult.IsContiguous());
ASSERT_EQ(charResult.type(), charArray->type());
ASSERT_EQ(charResult.ElementBytes(), 2u);
EXPECT_EQ(charResult.GetDimension(0).LowerBound(), -2);
EXPECT_EQ(charResult.GetDimension(0).Extent(), 2);
EXPECT_EQ(charResult.GetDimension(1).LowerBound(), 2);
EXPECT_EQ(charResult.GetDimension(1).Extent(), 3);
// Test that the result was not re-allocated.
EXPECT_EQ(allocatedPtr, charResult.OffsetElement<char>(0));
EXPECT_EQ(std::memcmp(charResult.OffsetElement<char>(0),
expectedCharResult.data(), expectedCharResult.length()),
0);
charResult.Destroy();

auto intScalar{MakeArray<TypeCategory::Integer, 4>(
std::vector<int>{}, std::vector<std::int32_t>{-1})};
StaticDescriptor<0> staticIntResult;
Descriptor &intResult{staticIntResult.descriptor()};
RTNAME(ShallowCopy)(intResult, *intScalar);
ASSERT_TRUE(intResult.IsAllocated());
ASSERT_EQ(intResult.rank(), 0);
ASSERT_EQ(*intResult.ZeroBasedIndexedElement<std::int32_t>(0), -1);
*intResult.ZeroBasedIndexedElement<std::int32_t>(0) = 0;
allocatedPtr = intResult.OffsetElement<char>(0);
RTNAME(ShallowCopyDirect)(intResult, *intScalar);
ASSERT_TRUE(intResult.IsAllocated());
ASSERT_EQ(intResult.rank(), 0);
ASSERT_EQ(*intResult.ZeroBasedIndexedElement<std::int32_t>(0), -1);
EXPECT_EQ(allocatedPtr, intResult.OffsetElement<char>(0));
intResult.Destroy();
}
19 changes: 18 additions & 1 deletion flang/docs/ArrayRepacking.md
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,24 @@ Lowering of the new operations (after all the optimizations) might be done in a

### Runtime

[TBD] define the runtime APIs.
The goal of packing a non-contiguous array into a contiguous temporary is to allow data cache efficient accesses to the elements of the array. With this in mind, the copy of elements of derived types may be done without following the regular Fortran assign semantics for the allocatable components that may imply memory allocations and the data copies for those components. Making just a shallow copy of the original array can therefore be faster than the corresponding deep copy using Fortran `Assign` runtime.

The following API is proposed in flang-rt:

```C++
void RTDECL(ShallowCopyDirect)(
const Descriptor &result,
const Descriptor &source,
const char *sourceFile = nullptr,
int line = 0);
```

It copies values from `source` array into the pre-allocated `result` array. The semantics is different from the `Assign` runtime for derived types, because it does not perform the recursive assign actions for the components of derived types. For example, ALLOCATABLE component descriptors are copied without creating a new allocation and copying the data (essentially, they are treated as POINTER components).

The arrays must be conforming, i.e. they must have:
* Same rank.
* Same extents.
* Same size and type of elements (including the type parameters).

### Optimization passes

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ void genPack(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value resultBox, mlir::Value arrayBox, mlir::Value maskBox,
mlir::Value vectorBox);

void genShallowCopy(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value resultBox, mlir::Value arrayBox,
bool resultIsAllocated);

void genReshape(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value resultBox, mlir::Value sourceBox,
mlir::Value shapeBox, mlir::Value padBox, mlir::Value orderBox);
Expand Down
21 changes: 21 additions & 0 deletions flang/include/flang/Runtime/transformational.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,27 @@ void RTDECL(Pack)(Descriptor &result, const Descriptor &source,
const Descriptor &mask, const Descriptor *vector = nullptr,
const char *sourceFile = nullptr, int line = 0);

/// Produce a shallow copy of the \p source in \p result.
/// The \p source may have any type and rank.
/// Unless \p source is unallocated, the \p result will
/// be allocated using the same shape and dynamic type,
/// and will contain the same top-level values as the \p source.
/// The \p result will have the default lower bounds, if it is an array.
/// As the name suggests, it is different from the Assign runtime,
/// because it does not perform recursive assign actions
/// for the components of the derived types.
void RTDECL(ShallowCopy)(Descriptor &result, const Descriptor &source,
const char *sourceFile = nullptr, int line = 0);

/// Same as ShallowCopy, where the caller provides a pre-allocated
/// \p result. The \p source and \p result must be conforming:
/// * Same rank.
/// * Same extents.
/// * Same size and type of elements (including the type parameters).
/// If \p result is an array, its lower bounds are not affected.
void RTDECL(ShallowCopyDirect)(const Descriptor &result,
const Descriptor &source, const char *sourceFile = nullptr, int line = 0);

void RTDECL(Spread)(Descriptor &result, const Descriptor &source, int dim,
std::int64_t ncopies, const char *sourceFile = nullptr, int line = 0);

Expand Down
20 changes: 20 additions & 0 deletions flang/lib/Optimizer/Builder/Runtime/Transformational.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,26 @@ void fir::runtime::genReshape(fir::FirOpBuilder &builder, mlir::Location loc,
builder.create<fir::CallOp>(loc, func, args);
}

/// Generate call to ShallowCopy[Direct] runtime routine.
/// ShallowCopyDirect is used iff \p resultIsAllocated is true.
void fir::runtime::genShallowCopy(fir::FirOpBuilder &builder,
mlir::Location loc, mlir::Value resultBox,
mlir::Value arrayBox,
bool resultIsAllocated) {
auto packFunc =
resultIsAllocated
? fir::runtime::getRuntimeFunc<mkRTKey(ShallowCopyDirect)>(loc,
builder)
: fir::runtime::getRuntimeFunc<mkRTKey(ShallowCopy)>(loc, builder);
auto fTy = packFunc.getFunctionType();
auto sourceFile = fir::factory::locationToFilename(builder, loc);
auto sourceLine =
fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
auto args = fir::runtime::createArguments(builder, loc, fTy, resultBox,
arrayBox, sourceFile, sourceLine);
builder.create<fir::CallOp>(loc, packFunc, args);
}

/// Generate call to Spread intrinsic runtime routine.
void fir::runtime::genSpread(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value resultBox, mlir::Value sourceBox,
Expand Down