Skip to content

Commit 7d7b58b

Browse files
authored
[flang-rt] Added ShallowCopy API. (#131702)
This API will be used for copying non-contiguous arrays into contiguous temporaries to support `-frepack-arrays`. The builder factory API will be used in the following commits.
1 parent 4ab011a commit 7d7b58b

File tree

6 files changed

+206
-1
lines changed

6 files changed

+206
-1
lines changed

flang-rt/lib/runtime/transformational.cpp

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "flang-rt/runtime/descriptor.h"
2222
#include "flang-rt/runtime/terminator.h"
2323
#include "flang-rt/runtime/tools.h"
24+
#include "flang-rt/runtime/type-info.h"
2425
#include "flang/Common/float128.h"
2526

2627
namespace Fortran::runtime {
@@ -323,6 +324,71 @@ static inline RT_API_ATTRS void DoBesselYnX0(Descriptor &result, int32_t n1,
323324
}
324325
}
325326

327+
static inline RT_API_ATTRS void CheckConformabilityForShallowCopy(
328+
const Descriptor &d1, const Descriptor &d2, Terminator &terminator,
329+
const char *funcName, const char *d1Name, const char *d2Name) {
330+
if (d1.rank() != d2.rank()) {
331+
terminator.Crash(
332+
"Incompatible arguments to %s: %s has rank %d, %s has rank %d",
333+
funcName, d1Name, d1.rank(), d1Name, d2.rank());
334+
}
335+
336+
// Check that the shapes conform.
337+
CheckConformability(d1, d2, terminator, funcName, d1Name, d2Name);
338+
339+
if (d1.ElementBytes() != d2.ElementBytes()) {
340+
terminator.Crash("Incompatible arguments to %s: %s has element byte length "
341+
"%zd, %s has length %zd",
342+
funcName, d1Name, d1.ElementBytes(), d2Name, d2.ElementBytes());
343+
}
344+
if (d1.type() != d2.type()) {
345+
terminator.Crash("Incompatible arguments to %s: %s has type code %d, %s "
346+
"has type code %d",
347+
funcName, d1Name, d1.type(), d2Name, d2.type());
348+
}
349+
const DescriptorAddendum *d1Addendum{d1.Addendum()};
350+
const typeInfo::DerivedType *d1Derived{
351+
d1Addendum ? d1Addendum->derivedType() : nullptr};
352+
const DescriptorAddendum *d2Addendum{d2.Addendum()};
353+
const typeInfo::DerivedType *d2Derived{
354+
d2Addendum ? d2Addendum->derivedType() : nullptr};
355+
if (d1Derived != d2Derived) {
356+
terminator.Crash(
357+
"Incompatible arguments to %s: %s and %s have different derived types",
358+
funcName, d1Name, d2Name);
359+
}
360+
if (d2Derived) {
361+
// Compare LEN parameters.
362+
std::size_t lenParms{d2Derived->LenParameters()};
363+
for (std::size_t j{0}; j < lenParms; ++j) {
364+
if (d1Addendum->LenParameterValue(j) !=
365+
d2Addendum->LenParameterValue(j)) {
366+
terminator.Crash("Incompatible arguments to %s: type length parameter "
367+
"%zd for %s is %zd, for %s is %zd",
368+
funcName, j, d1Name,
369+
static_cast<std::size_t>(d1Addendum->LenParameterValue(j)), d2Name,
370+
static_cast<std::size_t>(d2Addendum->LenParameterValue(j)));
371+
}
372+
}
373+
}
374+
}
375+
376+
template <bool IS_ALLOCATING>
377+
static inline RT_API_ATTRS void DoShallowCopy(
378+
std::conditional_t<IS_ALLOCATING, Descriptor, const Descriptor> &result,
379+
const Descriptor &source, Terminator &terminator, const char *funcName) {
380+
if constexpr (IS_ALLOCATING) {
381+
SubscriptValue extent[maxRank];
382+
source.GetShape(extent);
383+
AllocateResult(result, source, source.rank(), extent, terminator, funcName);
384+
} else {
385+
CheckConformabilityForShallowCopy(
386+
result, source, terminator, funcName, "RESULT=", "SOURCE=");
387+
}
388+
389+
ShallowCopy(result, source);
390+
}
391+
326392
extern "C" {
327393
RT_EXT_API_GROUP_BEGIN
328394

@@ -815,6 +881,19 @@ void RTDEF(Reshape)(Descriptor &result, const Descriptor &source,
815881
}
816882
}
817883

884+
// ShallowCopy
885+
void RTDEF(ShallowCopy)(Descriptor &result, const Descriptor &source,
886+
const char *sourceFile, int line) {
887+
Terminator terminator{sourceFile, line};
888+
DoShallowCopy<true>(result, source, terminator, "ShallowCopy");
889+
}
890+
891+
void RTDEF(ShallowCopyDirect)(const Descriptor &result,
892+
const Descriptor &source, const char *sourceFile, int line) {
893+
Terminator terminator{sourceFile, line};
894+
DoShallowCopy<false>(result, source, terminator, "ShallowCopyDirect");
895+
}
896+
818897
// SPREAD
819898
void RTDEF(Spread)(Descriptor &result, const Descriptor &source, int dim,
820899
std::int64_t ncopies, const char *sourceFile, int line) {

flang-rt/unittests/Runtime/Transformational.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,3 +550,67 @@ TEST(Transformational, TransposeReal10) {
550550
result.Destroy();
551551
}
552552
#endif
553+
554+
TEST(Transformational, ShallowCopy) {
555+
auto charArray{MakeArray<TypeCategory::Character, 1>(std::vector<int>{2, 3},
556+
std::vector<std::string>{"ab", "cd", "ef", "gh", "ij", "kl"}, 2)};
557+
charArray->GetDimension(0).SetBounds(-1, 0);
558+
charArray->GetDimension(1).SetBounds(3, 5);
559+
StaticDescriptor<2> staticCharResult;
560+
Descriptor &charResult{staticCharResult.descriptor()};
561+
562+
// Test allocating ShallowCopy.
563+
RTNAME(ShallowCopy)(charResult, *charArray);
564+
ASSERT_TRUE(charResult.IsAllocated());
565+
ASSERT_TRUE(charResult.IsContiguous());
566+
ASSERT_EQ(charResult.type(), charArray->type());
567+
ASSERT_EQ(charResult.ElementBytes(), 2u);
568+
EXPECT_EQ(charResult.GetDimension(0).LowerBound(), 1);
569+
EXPECT_EQ(charResult.GetDimension(0).Extent(), 2);
570+
EXPECT_EQ(charResult.GetDimension(1).LowerBound(), 1);
571+
EXPECT_EQ(charResult.GetDimension(1).Extent(), 3);
572+
std::string expectedCharResult{"abcdefghijkl"};
573+
EXPECT_EQ(std::memcmp(charResult.OffsetElement<char>(0),
574+
expectedCharResult.data(), expectedCharResult.length()),
575+
0);
576+
577+
// Test ShallowCopyDirect with pre-allocated result.
578+
char *allocatedPtr = charResult.OffsetElement<char>(0);
579+
std::memset(
580+
charResult.OffsetElement<char>(0), 'x', expectedCharResult.length());
581+
// Set new lower bounds for charResult.
582+
charResult.GetDimension(0).SetBounds(-2, -1);
583+
charResult.GetDimension(1).SetBounds(2, 4);
584+
RTNAME(ShallowCopyDirect)(charResult, *charArray);
585+
ASSERT_TRUE(charResult.IsAllocated());
586+
ASSERT_TRUE(charResult.IsContiguous());
587+
ASSERT_EQ(charResult.type(), charArray->type());
588+
ASSERT_EQ(charResult.ElementBytes(), 2u);
589+
EXPECT_EQ(charResult.GetDimension(0).LowerBound(), -2);
590+
EXPECT_EQ(charResult.GetDimension(0).Extent(), 2);
591+
EXPECT_EQ(charResult.GetDimension(1).LowerBound(), 2);
592+
EXPECT_EQ(charResult.GetDimension(1).Extent(), 3);
593+
// Test that the result was not re-allocated.
594+
EXPECT_EQ(allocatedPtr, charResult.OffsetElement<char>(0));
595+
EXPECT_EQ(std::memcmp(charResult.OffsetElement<char>(0),
596+
expectedCharResult.data(), expectedCharResult.length()),
597+
0);
598+
charResult.Destroy();
599+
600+
auto intScalar{MakeArray<TypeCategory::Integer, 4>(
601+
std::vector<int>{}, std::vector<std::int32_t>{-1})};
602+
StaticDescriptor<0> staticIntResult;
603+
Descriptor &intResult{staticIntResult.descriptor()};
604+
RTNAME(ShallowCopy)(intResult, *intScalar);
605+
ASSERT_TRUE(intResult.IsAllocated());
606+
ASSERT_EQ(intResult.rank(), 0);
607+
ASSERT_EQ(*intResult.ZeroBasedIndexedElement<std::int32_t>(0), -1);
608+
*intResult.ZeroBasedIndexedElement<std::int32_t>(0) = 0;
609+
allocatedPtr = intResult.OffsetElement<char>(0);
610+
RTNAME(ShallowCopyDirect)(intResult, *intScalar);
611+
ASSERT_TRUE(intResult.IsAllocated());
612+
ASSERT_EQ(intResult.rank(), 0);
613+
ASSERT_EQ(*intResult.ZeroBasedIndexedElement<std::int32_t>(0), -1);
614+
EXPECT_EQ(allocatedPtr, intResult.OffsetElement<char>(0));
615+
intResult.Destroy();
616+
}

flang/docs/ArrayRepacking.md

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,24 @@ Lowering of the new operations (after all the optimizations) might be done in a
400400

401401
### Runtime
402402

403-
[TBD] define the runtime APIs.
403+
The goal of packing a non-contiguous array into a contiguous temporary is to allow data cache efficient accesses to the elements of the array. With this in mind, the copy of elements of derived types may be done without following the regular Fortran assign semantics for the allocatable components that may imply memory allocations and the data copies for those components. Making just a shallow copy of the original array can therefore be faster than the corresponding deep copy using Fortran `Assign` runtime.
404+
405+
The following API is proposed in flang-rt:
406+
407+
```C++
408+
void RTDECL(ShallowCopyDirect)(
409+
const Descriptor &result,
410+
const Descriptor &source,
411+
const char *sourceFile = nullptr,
412+
int line = 0);
413+
```
414+
415+
It copies values from `source` array into the pre-allocated `result` array. The semantics is different from the `Assign` runtime for derived types, because it does not perform the recursive assign actions for the components of derived types. For example, ALLOCATABLE component descriptors are copied without creating a new allocation and copying the data (essentially, they are treated as POINTER components).
416+
417+
The arrays must be conforming, i.e. they must have:
418+
* Same rank.
419+
* Same extents.
420+
* Same size and type of elements (including the type parameters).
404421
405422
### Optimization passes
406423

flang/include/flang/Optimizer/Builder/Runtime/Transformational.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ void genPack(fir::FirOpBuilder &builder, mlir::Location loc,
6363
mlir::Value resultBox, mlir::Value arrayBox, mlir::Value maskBox,
6464
mlir::Value vectorBox);
6565

66+
void genShallowCopy(fir::FirOpBuilder &builder, mlir::Location loc,
67+
mlir::Value resultBox, mlir::Value arrayBox,
68+
bool resultIsAllocated);
69+
6670
void genReshape(fir::FirOpBuilder &builder, mlir::Location loc,
6771
mlir::Value resultBox, mlir::Value sourceBox,
6872
mlir::Value shapeBox, mlir::Value padBox, mlir::Value orderBox);

flang/include/flang/Runtime/transformational.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,27 @@ void RTDECL(Pack)(Descriptor &result, const Descriptor &source,
146146
const Descriptor &mask, const Descriptor *vector = nullptr,
147147
const char *sourceFile = nullptr, int line = 0);
148148

149+
/// Produce a shallow copy of the \p source in \p result.
150+
/// The \p source may have any type and rank.
151+
/// Unless \p source is unallocated, the \p result will
152+
/// be allocated using the same shape and dynamic type,
153+
/// and will contain the same top-level values as the \p source.
154+
/// The \p result will have the default lower bounds, if it is an array.
155+
/// As the name suggests, it is different from the Assign runtime,
156+
/// because it does not perform recursive assign actions
157+
/// for the components of the derived types.
158+
void RTDECL(ShallowCopy)(Descriptor &result, const Descriptor &source,
159+
const char *sourceFile = nullptr, int line = 0);
160+
161+
/// Same as ShallowCopy, where the caller provides a pre-allocated
162+
/// \p result. The \p source and \p result must be conforming:
163+
/// * Same rank.
164+
/// * Same extents.
165+
/// * Same size and type of elements (including the type parameters).
166+
/// If \p result is an array, its lower bounds are not affected.
167+
void RTDECL(ShallowCopyDirect)(const Descriptor &result,
168+
const Descriptor &source, const char *sourceFile = nullptr, int line = 0);
169+
149170
void RTDECL(Spread)(Descriptor &result, const Descriptor &source, int dim,
150171
std::int64_t ncopies, const char *sourceFile = nullptr, int line = 0);
151172

flang/lib/Optimizer/Builder/Runtime/Transformational.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,26 @@ void fir::runtime::genReshape(fir::FirOpBuilder &builder, mlir::Location loc,
474474
builder.create<fir::CallOp>(loc, func, args);
475475
}
476476

477+
/// Generate call to ShallowCopy[Direct] runtime routine.
478+
/// ShallowCopyDirect is used iff \p resultIsAllocated is true.
479+
void fir::runtime::genShallowCopy(fir::FirOpBuilder &builder,
480+
mlir::Location loc, mlir::Value resultBox,
481+
mlir::Value arrayBox,
482+
bool resultIsAllocated) {
483+
auto packFunc =
484+
resultIsAllocated
485+
? fir::runtime::getRuntimeFunc<mkRTKey(ShallowCopyDirect)>(loc,
486+
builder)
487+
: fir::runtime::getRuntimeFunc<mkRTKey(ShallowCopy)>(loc, builder);
488+
auto fTy = packFunc.getFunctionType();
489+
auto sourceFile = fir::factory::locationToFilename(builder, loc);
490+
auto sourceLine =
491+
fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
492+
auto args = fir::runtime::createArguments(builder, loc, fTy, resultBox,
493+
arrayBox, sourceFile, sourceLine);
494+
builder.create<fir::CallOp>(loc, packFunc, args);
495+
}
496+
477497
/// Generate call to Spread intrinsic runtime routine.
478498
void fir::runtime::genSpread(fir::FirOpBuilder &builder, mlir::Location loc,
479499
mlir::Value resultBox, mlir::Value sourceBox,

0 commit comments

Comments
 (0)