|
7 | 7 | //===----------------------------------------------------------------------===//
|
8 | 8 |
|
9 | 9 | #include "copy.h"
|
| 10 | +#include "stack.h" |
10 | 11 | #include "terminator.h"
|
11 | 12 | #include "type-info.h"
|
12 | 13 | #include "flang/Runtime/allocatable.h"
|
13 | 14 | #include "flang/Runtime/descriptor.h"
|
14 | 15 | #include <cstring>
|
15 | 16 |
|
16 | 17 | namespace Fortran::runtime {
|
| 18 | +namespace { |
| 19 | +using StaticDescTy = StaticDescriptor<maxRank, true, 0>; |
| 20 | + |
| 21 | +// A structure describing the data copy that needs to be done |
| 22 | +// from one descriptor to another. It is a helper structure |
| 23 | +// for CopyElement. |
| 24 | +struct CopyDescriptor { |
| 25 | + // A constructor specifying all members explicitly. |
| 26 | + RT_API_ATTRS CopyDescriptor(const Descriptor &to, const SubscriptValue toAt[], |
| 27 | + const Descriptor &from, const SubscriptValue fromAt[], |
| 28 | + std::size_t elements, bool usesStaticDescriptors = false) |
| 29 | + : to_(to), from_(from), elements_(elements), |
| 30 | + usesStaticDescriptors_(usesStaticDescriptors) { |
| 31 | + for (int dim{0}; dim < to.rank(); ++dim) { |
| 32 | + toAt_[dim] = toAt[dim]; |
| 33 | + } |
| 34 | + for (int dim{0}; dim < from.rank(); ++dim) { |
| 35 | + fromAt_[dim] = fromAt[dim]; |
| 36 | + } |
| 37 | + } |
| 38 | + // The number of elements to copy is initialized from the to descriptor. |
| 39 | + // The current element subscripts are initialized from the lower bounds |
| 40 | + // of the to and from descriptors. |
| 41 | + RT_API_ATTRS CopyDescriptor(const Descriptor &to, const Descriptor &from, |
| 42 | + bool usesStaticDescriptors = false) |
| 43 | + : to_(to), from_(from), elements_(to.Elements()), |
| 44 | + usesStaticDescriptors_(usesStaticDescriptors) { |
| 45 | + to.GetLowerBounds(toAt_); |
| 46 | + from.GetLowerBounds(fromAt_); |
| 47 | + } |
| 48 | + |
| 49 | + // Descriptor of the destination. |
| 50 | + const Descriptor &to_; |
| 51 | + // A subscript specifying the current element position to copy to. |
| 52 | + SubscriptValue toAt_[maxRank]; |
| 53 | + // Descriptor of the source. |
| 54 | + const Descriptor &from_; |
| 55 | + // A subscript specifying the current element position to copy from. |
| 56 | + SubscriptValue fromAt_[maxRank]; |
| 57 | + // Number of elements left to copy. |
| 58 | + std::size_t elements_; |
| 59 | + // Must be true, if the to and from descriptors are allocated |
| 60 | + // by the CopyElement runtime. The allocated memory belongs |
| 61 | + // to a separate stack that needs to be popped in correspondence |
| 62 | + // with popping such a CopyDescriptor node. |
| 63 | + bool usesStaticDescriptors_; |
| 64 | +}; |
| 65 | + |
| 66 | +// A pair of StaticDescTy elements. |
| 67 | +struct StaticDescriptorsPair { |
| 68 | + StaticDescTy to; |
| 69 | + StaticDescTy from; |
| 70 | +}; |
| 71 | +} // namespace |
| 72 | + |
17 | 73 | RT_OFFLOAD_API_GROUP_BEGIN
|
18 | 74 |
|
19 | 75 | RT_API_ATTRS void CopyElement(const Descriptor &to, const SubscriptValue toAt[],
|
20 | 76 | const Descriptor &from, const SubscriptValue fromAt[],
|
21 | 77 | Terminator &terminator) {
|
22 |
| - char *toPtr{to.Element<char>(toAt)}; |
23 |
| - char *fromPtr{from.Element<char>(fromAt)}; |
24 |
| - RUNTIME_CHECK(terminator, to.ElementBytes() == from.ElementBytes()); |
25 |
| - std::memcpy(toPtr, fromPtr, to.ElementBytes()); |
26 |
| - // Deep copy allocatable and automatic components if any. |
27 |
| - if (const auto *addendum{to.Addendum()}) { |
28 |
| - if (const auto *derived{addendum->derivedType()}; |
29 |
| - derived && !derived->noDestructionNeeded()) { |
30 |
| - RUNTIME_CHECK(terminator, |
31 |
| - from.Addendum() && derived == from.Addendum()->derivedType()); |
32 |
| - const Descriptor &componentDesc{derived->component()}; |
33 |
| - const typeInfo::Component *component{ |
34 |
| - componentDesc.OffsetElement<typeInfo::Component>()}; |
35 |
| - std::size_t nComponents{componentDesc.Elements()}; |
36 |
| - for (std::size_t j{0}; j < nComponents; ++j, ++component) { |
37 |
| - if (component->genre() == typeInfo::Component::Genre::Allocatable || |
38 |
| - component->genre() == typeInfo::Component::Genre::Automatic) { |
39 |
| - Descriptor &toDesc{ |
40 |
| - *reinterpret_cast<Descriptor *>(toPtr + component->offset())}; |
41 |
| - if (toDesc.raw().base_addr != nullptr) { |
42 |
| - toDesc.set_base_addr(nullptr); |
43 |
| - RUNTIME_CHECK(terminator, toDesc.Allocate() == CFI_SUCCESS); |
44 |
| - const Descriptor &fromDesc{*reinterpret_cast<const Descriptor *>( |
45 |
| - fromPtr + component->offset())}; |
46 |
| - CopyArray(toDesc, fromDesc, terminator); |
47 |
| - } |
48 |
| - } else if (component->genre() == typeInfo::Component::Genre::Data && |
49 |
| - component->derivedType() && |
50 |
| - !component->derivedType()->noDestructionNeeded()) { |
51 |
| - SubscriptValue extents[maxRank]; |
52 |
| - const typeInfo::Value *bounds{component->bounds()}; |
53 |
| - for (int dim{0}; dim < component->rank(); ++dim) { |
54 |
| - SubscriptValue lb{bounds[2 * dim].GetValue(&to).value_or(0)}; |
55 |
| - SubscriptValue ub{bounds[2 * dim + 1].GetValue(&to).value_or(0)}; |
56 |
| - extents[dim] = ub >= lb ? ub - lb + 1 : 0; |
| 78 | +#if !defined(RT_DEVICE_COMPILATION) |
| 79 | + constexpr unsigned copyStackReserve{16}; |
| 80 | + constexpr unsigned descriptorStackReserve{6}; |
| 81 | +#else |
| 82 | + // Always use dynamic allocation on the device to avoid |
| 83 | + // big stack sizes. This may be tuned as needed. |
| 84 | + constexpr unsigned copyStackReserve{0}; |
| 85 | + constexpr unsigned descriptorStackReserve{0}; |
| 86 | +#endif |
| 87 | + // Keep a stack of CopyDescriptor's to avoid recursive calls. |
| 88 | + Stack<CopyDescriptor, copyStackReserve> copyStack{terminator}; |
| 89 | + // Keep a separate stack of StaticDescTy pairs. These descriptors |
| 90 | + // may be used for representing copies of Component::Genre::Data |
| 91 | + // components (since they do not have their descriptors allocated |
| 92 | + // in memory). |
| 93 | + Stack<StaticDescriptorsPair, descriptorStackReserve> descriptorsStack{ |
| 94 | + terminator}; |
| 95 | + copyStack.emplace(to, toAt, from, fromAt, /*elements=*/std::size_t{1}); |
| 96 | + |
| 97 | + while (!copyStack.empty()) { |
| 98 | + CopyDescriptor ¤tCopy{copyStack.top()}; |
| 99 | + std::size_t &elements{currentCopy.elements_}; |
| 100 | + if (elements == 0) { |
| 101 | + // This copy has been exhausted. |
| 102 | + if (currentCopy.usesStaticDescriptors_) { |
| 103 | + // Pop the static descriptors, if they were used |
| 104 | + // for the current copy. |
| 105 | + descriptorsStack.pop(); |
| 106 | + } |
| 107 | + copyStack.pop(); |
| 108 | + continue; |
| 109 | + } |
| 110 | + const Descriptor &curTo{currentCopy.to_}; |
| 111 | + SubscriptValue *curToAt{currentCopy.toAt_}; |
| 112 | + const Descriptor &curFrom{currentCopy.from_}; |
| 113 | + SubscriptValue *curFromAt{currentCopy.fromAt_}; |
| 114 | + char *toPtr{curTo.Element<char>(curToAt)}; |
| 115 | + char *fromPtr{curFrom.Element<char>(curFromAt)}; |
| 116 | + RUNTIME_CHECK(terminator, curTo.ElementBytes() == curFrom.ElementBytes()); |
| 117 | + // TODO: the memcpy can be optimized when both to and from are contiguous. |
| 118 | + // Moreover, if we came here from an Component::Genre::Data component, |
| 119 | + // all the per-element copies are redundant, because the parent |
| 120 | + // has already been copied as a whole. |
| 121 | + std::memcpy(toPtr, fromPtr, curTo.ElementBytes()); |
| 122 | + --elements; |
| 123 | + if (elements != 0) { |
| 124 | + curTo.IncrementSubscripts(curToAt); |
| 125 | + curFrom.IncrementSubscripts(curFromAt); |
| 126 | + } |
| 127 | + |
| 128 | + // Deep copy allocatable and automatic components if any. |
| 129 | + if (const auto *addendum{curTo.Addendum()}) { |
| 130 | + if (const auto *derived{addendum->derivedType()}; |
| 131 | + derived && !derived->noDestructionNeeded()) { |
| 132 | + RUNTIME_CHECK(terminator, |
| 133 | + curFrom.Addendum() && derived == curFrom.Addendum()->derivedType()); |
| 134 | + const Descriptor &componentDesc{derived->component()}; |
| 135 | + const typeInfo::Component *component{ |
| 136 | + componentDesc.OffsetElement<typeInfo::Component>()}; |
| 137 | + std::size_t nComponents{componentDesc.Elements()}; |
| 138 | + for (std::size_t j{0}; j < nComponents; ++j, ++component) { |
| 139 | + if (component->genre() == typeInfo::Component::Genre::Allocatable || |
| 140 | + component->genre() == typeInfo::Component::Genre::Automatic) { |
| 141 | + Descriptor &toDesc{ |
| 142 | + *reinterpret_cast<Descriptor *>(toPtr + component->offset())}; |
| 143 | + if (toDesc.raw().base_addr != nullptr) { |
| 144 | + toDesc.set_base_addr(nullptr); |
| 145 | + RUNTIME_CHECK(terminator, toDesc.Allocate() == CFI_SUCCESS); |
| 146 | + const Descriptor &fromDesc{*reinterpret_cast<const Descriptor *>( |
| 147 | + fromPtr + component->offset())}; |
| 148 | + copyStack.emplace(toDesc, fromDesc); |
| 149 | + } |
| 150 | + } else if (component->genre() == typeInfo::Component::Genre::Data && |
| 151 | + component->derivedType() && |
| 152 | + !component->derivedType()->noDestructionNeeded()) { |
| 153 | + SubscriptValue extents[maxRank]; |
| 154 | + const typeInfo::Value *bounds{component->bounds()}; |
| 155 | + std::size_t elements{1}; |
| 156 | + for (int dim{0}; dim < component->rank(); ++dim) { |
| 157 | + SubscriptValue lb{bounds[2 * dim].GetValue(&curTo).value_or(0)}; |
| 158 | + SubscriptValue ub{ |
| 159 | + bounds[2 * dim + 1].GetValue(&curTo).value_or(0)}; |
| 160 | + extents[dim] = ub >= lb ? ub - lb + 1 : 0; |
| 161 | + elements *= extents[dim]; |
| 162 | + } |
| 163 | + if (elements != 0) { |
| 164 | + const typeInfo::DerivedType &compType{*component->derivedType()}; |
| 165 | + // Place a pair of static descriptors onto the descriptors stack. |
| 166 | + descriptorsStack.emplace(); |
| 167 | + StaticDescriptorsPair &descs{descriptorsStack.top()}; |
| 168 | + Descriptor &toCompDesc{descs.to.descriptor()}; |
| 169 | + toCompDesc.Establish(compType, toPtr + component->offset(), |
| 170 | + component->rank(), extents); |
| 171 | + Descriptor &fromCompDesc{descs.from.descriptor()}; |
| 172 | + fromCompDesc.Establish(compType, fromPtr + component->offset(), |
| 173 | + component->rank(), extents); |
| 174 | + copyStack.emplace(toCompDesc, fromCompDesc, |
| 175 | + /*usesStaticDescriptors=*/true); |
| 176 | + } |
57 | 177 | }
|
58 |
| - const typeInfo::DerivedType &compType{*component->derivedType()}; |
59 |
| - StaticDescriptor<maxRank, true, 0> toStaticDescriptor; |
60 |
| - Descriptor &toCompDesc{toStaticDescriptor.descriptor()}; |
61 |
| - toCompDesc.Establish(compType, toPtr + component->offset(), |
62 |
| - component->rank(), extents); |
63 |
| - StaticDescriptor<maxRank, true, 0> fromStaticDescriptor; |
64 |
| - Descriptor &fromCompDesc{fromStaticDescriptor.descriptor()}; |
65 |
| - fromCompDesc.Establish(compType, fromPtr + component->offset(), |
66 |
| - component->rank(), extents); |
67 |
| - CopyArray(toCompDesc, fromCompDesc, terminator); |
68 | 178 | }
|
69 | 179 | }
|
70 | 180 | }
|
71 | 181 | }
|
72 | 182 | }
|
73 |
| - |
74 |
| -RT_API_ATTRS void CopyArray( |
75 |
| - const Descriptor &to, const Descriptor &from, Terminator &terminator) { |
76 |
| - std::size_t elements{to.Elements()}; |
77 |
| - RUNTIME_CHECK(terminator, elements == from.Elements()); |
78 |
| - SubscriptValue toAt[maxRank], fromAt[maxRank]; |
79 |
| - to.GetLowerBounds(toAt); |
80 |
| - from.GetLowerBounds(fromAt); |
81 |
| - while (elements-- > 0) { |
82 |
| - CopyElement(to, toAt, from, fromAt, terminator); |
83 |
| - to.IncrementSubscripts(toAt); |
84 |
| - from.IncrementSubscripts(fromAt); |
85 |
| - } |
86 |
| -} |
87 |
| - |
88 | 183 | RT_OFFLOAD_API_GROUP_END
|
89 | 184 | } // namespace Fortran::runtime
|
0 commit comments