Skip to content

Commit 2b86fb2

Browse files
authored
[flang][runtime] Avoid recursive calls in F18 runtime CUDA build. (#87428)
Recurrencies in the call graph (even if they are not executed) prevent computing the minimal stack size required for a kernel execution. This change disables some functionality of F18 IO to avoid recursive calls. A couple of functions are rewritten to work without using recursion.
1 parent 324436c commit 2b86fb2

File tree

8 files changed

+118
-30
lines changed

8 files changed

+118
-30
lines changed

flang/include/flang/Common/api-attrs.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,18 @@
133133
#undef RT_DEVICE_COMPILATION
134134
#endif
135135

136+
/*
137+
* Recurrence in the call graph prevents computing minimal stack size
138+
* required for a kernel execution. This macro can be used to disable
139+
* some F18 runtime functionality that is implemented using recurrent
140+
* function calls or to use alternative implementation.
141+
*/
142+
#if (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__)
143+
#define RT_DEVICE_AVOID_RECURSION 1
144+
#else
145+
#undef RT_DEVICE_AVOID_RECURSION
146+
#endif
147+
136148
#if defined(__CUDACC__)
137149
#define RT_DIAG_PUSH _Pragma("nv_diagnostic push")
138150
#define RT_DIAG_POP _Pragma("nv_diagnostic pop")

flang/runtime/descriptor-io.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ static RT_API_ATTRS bool DefaultComponentIO(IoStatementState &io,
250250
const typeInfo::Component &component, const Descriptor &origDescriptor,
251251
const SubscriptValue origSubscripts[], Terminator &terminator,
252252
const NonTbpDefinedIoTable *table) {
253+
#if !defined(RT_DEVICE_AVOID_RECURSION)
253254
if (component.genre() == typeInfo::Component::Genre::Data) {
254255
// Create a descriptor for the component
255256
StaticDescriptor<maxRank, true, 16 /*?*/> statDesc;
@@ -266,6 +267,9 @@ static RT_API_ATTRS bool DefaultComponentIO(IoStatementState &io,
266267
const Descriptor &compDesc{*reinterpret_cast<const Descriptor *>(pointer)};
267268
return DescriptorIO<DIR>(io, compDesc, table);
268269
}
270+
#else
271+
terminator.Crash("not yet implemented: component IO");
272+
#endif
269273
}
270274

271275
template <Direction DIR>

flang/runtime/edit-output.cpp

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -751,43 +751,50 @@ RT_API_ATTRS bool RealOutputEditing<KIND>::EditEXOutput(const DataEdit &edit) {
751751

752752
template <int KIND>
753753
RT_API_ATTRS bool RealOutputEditing<KIND>::Edit(const DataEdit &edit) {
754-
switch (edit.descriptor) {
754+
const DataEdit *editPtr{&edit};
755+
DataEdit newEdit;
756+
if (editPtr->descriptor == 'G') {
757+
// Avoid recursive call as in Edit(EditForGOutput(edit)).
758+
newEdit = EditForGOutput(*editPtr);
759+
editPtr = &newEdit;
760+
RUNTIME_CHECK(io_.GetIoErrorHandler(), editPtr->descriptor != 'G');
761+
}
762+
switch (editPtr->descriptor) {
755763
case 'D':
756-
return EditEorDOutput(edit);
764+
return EditEorDOutput(*editPtr);
757765
case 'E':
758-
if (edit.variation == 'X') {
759-
return EditEXOutput(edit);
766+
if (editPtr->variation == 'X') {
767+
return EditEXOutput(*editPtr);
760768
} else {
761-
return EditEorDOutput(edit);
769+
return EditEorDOutput(*editPtr);
762770
}
763771
case 'F':
764-
return EditFOutput(edit);
772+
return EditFOutput(*editPtr);
765773
case 'B':
766-
return EditBOZOutput<1>(io_, edit,
774+
return EditBOZOutput<1>(io_, *editPtr,
767775
reinterpret_cast<const unsigned char *>(&x_),
768776
common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
769777
case 'O':
770-
return EditBOZOutput<3>(io_, edit,
778+
return EditBOZOutput<3>(io_, *editPtr,
771779
reinterpret_cast<const unsigned char *>(&x_),
772780
common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
773781
case 'Z':
774-
return EditBOZOutput<4>(io_, edit,
782+
return EditBOZOutput<4>(io_, *editPtr,
775783
reinterpret_cast<const unsigned char *>(&x_),
776784
common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
777-
case 'G':
778-
return Edit(EditForGOutput(edit));
779785
case 'L':
780-
return EditLogicalOutput(io_, edit, *reinterpret_cast<const char *>(&x_));
786+
return EditLogicalOutput(
787+
io_, *editPtr, *reinterpret_cast<const char *>(&x_));
781788
case 'A': // legacy extension
782789
return EditCharacterOutput(
783-
io_, edit, reinterpret_cast<char *>(&x_), sizeof x_);
790+
io_, *editPtr, reinterpret_cast<char *>(&x_), sizeof x_);
784791
default:
785-
if (edit.IsListDirected()) {
786-
return EditListDirectedOutput(edit);
792+
if (editPtr->IsListDirected()) {
793+
return EditListDirectedOutput(*editPtr);
787794
}
788795
io_.GetIoErrorHandler().SignalError(IostatErrorInFormat,
789796
"Data edit descriptor '%c' may not be used with a REAL data item",
790-
edit.descriptor);
797+
editPtr->descriptor);
791798
return false;
792799
}
793800
return false;

flang/runtime/emit-encoded.h

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,26 @@
1818

1919
namespace Fortran::runtime::io {
2020

21-
template <typename CONTEXT, typename CHAR>
21+
template <typename CONTEXT, typename CHAR, bool NL_ADVANCES_RECORD = true>
2222
RT_API_ATTRS bool EmitEncoded(
2323
CONTEXT &to, const CHAR *data, std::size_t chars) {
2424
ConnectionState &connection{to.GetConnectionState()};
25-
if (connection.access == Access::Stream &&
26-
connection.internalIoCharKind == 0) {
27-
// Stream output: treat newlines as record advancements so that the left tab
28-
// limit is correctly managed
29-
while (const CHAR * nl{FindCharacter(data, CHAR{'\n'}, chars)}) {
30-
auto pos{static_cast<std::size_t>(nl - data)};
31-
if (!EmitEncoded(to, data, pos)) {
32-
return false;
25+
if constexpr (NL_ADVANCES_RECORD) {
26+
if (connection.access == Access::Stream &&
27+
connection.internalIoCharKind == 0) {
28+
// Stream output: treat newlines as record advancements so that the left
29+
// tab limit is correctly managed
30+
while (const CHAR * nl{FindCharacter(data, CHAR{'\n'}, chars)}) {
31+
auto pos{static_cast<std::size_t>(nl - data)};
32+
// The [data, data + pos) does not contain the newline,
33+
// so we can avoid the recursion by calling proper specialization.
34+
if (!EmitEncoded<CONTEXT, CHAR, false>(to, data, pos)) {
35+
return false;
36+
}
37+
data += pos + 1;
38+
chars -= pos + 1;
39+
to.AdvanceRecord();
3340
}
34-
data += pos + 1;
35-
chars -= pos + 1;
36-
to.AdvanceRecord();
3741
}
3842
}
3943
if (connection.useUTF8<CHAR>()) {

flang/runtime/io-stmt.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,11 @@ ExternalIoStatementBase::ExternalIoStatementBase(
220220

221221
MutableModes &ExternalIoStatementBase::mutableModes() {
222222
if (const ChildIo * child{unit_.GetChildIo()}) {
223+
#if !defined(RT_DEVICE_AVOID_RECURSION)
223224
return child->parent().mutableModes();
225+
#else
226+
ReportUnsupportedChildIo();
227+
#endif
224228
}
225229
return unit_.modes;
226230
}
@@ -891,17 +895,29 @@ ChildIoStatementState<DIR>::ChildIoStatementState(
891895

892896
template <Direction DIR>
893897
MutableModes &ChildIoStatementState<DIR>::mutableModes() {
898+
#if !defined(RT_DEVICE_AVOID_RECURSION)
894899
return child_.parent().mutableModes();
900+
#else
901+
ReportUnsupportedChildIo();
902+
#endif
895903
}
896904

897905
template <Direction DIR>
898906
ConnectionState &ChildIoStatementState<DIR>::GetConnectionState() {
907+
#if !defined(RT_DEVICE_AVOID_RECURSION)
899908
return child_.parent().GetConnectionState();
909+
#else
910+
ReportUnsupportedChildIo();
911+
#endif
900912
}
901913

902914
template <Direction DIR>
903915
ExternalFileUnit *ChildIoStatementState<DIR>::GetExternalFileUnit() const {
916+
#if !defined(RT_DEVICE_AVOID_RECURSION)
904917
return child_.parent().GetExternalFileUnit();
918+
#else
919+
ReportUnsupportedChildIo();
920+
#endif
905921
}
906922

907923
template <Direction DIR> int ChildIoStatementState<DIR>::EndIoStatement() {
@@ -914,22 +930,38 @@ template <Direction DIR> int ChildIoStatementState<DIR>::EndIoStatement() {
914930
template <Direction DIR>
915931
bool ChildIoStatementState<DIR>::Emit(
916932
const char *data, std::size_t bytes, std::size_t elementBytes) {
933+
#if !defined(RT_DEVICE_AVOID_RECURSION)
917934
return child_.parent().Emit(data, bytes, elementBytes);
935+
#else
936+
ReportUnsupportedChildIo();
937+
#endif
918938
}
919939

920940
template <Direction DIR>
921941
std::size_t ChildIoStatementState<DIR>::GetNextInputBytes(const char *&p) {
942+
#if !defined(RT_DEVICE_AVOID_RECURSION)
922943
return child_.parent().GetNextInputBytes(p);
944+
#else
945+
ReportUnsupportedChildIo();
946+
#endif
923947
}
924948

925949
template <Direction DIR>
926950
void ChildIoStatementState<DIR>::HandleAbsolutePosition(std::int64_t n) {
951+
#if !defined(RT_DEVICE_AVOID_RECURSION)
927952
return child_.parent().HandleAbsolutePosition(n);
953+
#else
954+
ReportUnsupportedChildIo();
955+
#endif
928956
}
929957

930958
template <Direction DIR>
931959
void ChildIoStatementState<DIR>::HandleRelativePosition(std::int64_t n) {
960+
#if !defined(RT_DEVICE_AVOID_RECURSION)
932961
return child_.parent().HandleRelativePosition(n);
962+
#else
963+
ReportUnsupportedChildIo();
964+
#endif
933965
}
934966

935967
template <Direction DIR, typename CHAR>
@@ -957,13 +989,21 @@ int ChildFormattedIoStatementState<DIR, CHAR>::EndIoStatement() {
957989

958990
template <Direction DIR, typename CHAR>
959991
bool ChildFormattedIoStatementState<DIR, CHAR>::AdvanceRecord(int n) {
992+
#if !defined(RT_DEVICE_AVOID_RECURSION)
960993
return this->child().parent().AdvanceRecord(n);
994+
#else
995+
this->ReportUnsupportedChildIo();
996+
#endif
961997
}
962998

963999
template <Direction DIR>
9641000
bool ChildUnformattedIoStatementState<DIR>::Receive(
9651001
char *data, std::size_t bytes, std::size_t elementBytes) {
1002+
#if !defined(RT_DEVICE_AVOID_RECURSION)
9661003
return this->child().parent().Receive(data, bytes, elementBytes);
1004+
#else
1005+
this->ReportUnsupportedChildIo();
1006+
#endif
9671007
}
9681008

9691009
template <Direction DIR> int ChildListIoStatementState<DIR>::EndIoStatement() {

flang/runtime/io-stmt.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,10 @@ class IoStatementBase : public IoErrorHandler {
296296

297297
RT_API_ATTRS void BadInquiryKeywordHashCrash(InquiryKeywordHash);
298298

299+
RT_API_ATTRS void ReportUnsupportedChildIo() const {
300+
Crash("not yet implemented: child IO");
301+
}
302+
299303
protected:
300304
bool completedOperation_{false};
301305
};

flang/runtime/unit.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ bool ExternalFileUnit::BeginReadingRecord(IoErrorHandler &handler) {
206206
if (anyWriteSinceLastPositioning_ && access == Access::Sequential) {
207207
// Most Fortran implementations allow a READ after a WRITE;
208208
// the read then just hits an EOF.
209-
DoEndfile(handler);
209+
DoEndfile<false, Direction::Input>(handler);
210210
}
211211
recordLength.reset();
212212
RUNTIME_CHECK(handler, isUnformatted.has_value());
@@ -671,13 +671,23 @@ void ExternalFileUnit::DoImpliedEndfile(IoErrorHandler &handler) {
671671
impliedEndfile_ = false;
672672
}
673673

674+
template <bool ANY_DIR, Direction DIR>
674675
void ExternalFileUnit::DoEndfile(IoErrorHandler &handler) {
675676
if (IsRecordFile() && access != Access::Direct) {
676677
furthestPositionInRecord =
677678
std::max(positionInRecord, furthestPositionInRecord);
678679
if (leftTabLimit) { // last I/O was non-advancing
679680
if (access == Access::Sequential && direction_ == Direction::Output) {
680-
AdvanceRecord(handler);
681+
if constexpr (ANY_DIR || DIR == Direction::Output) {
682+
// When DoEndfile() is called from BeginReadingRecord(),
683+
// this call to AdvanceRecord() may appear as a recursion
684+
// though it may never happen. Expose the call only
685+
// under the constexpr direction check.
686+
AdvanceRecord(handler);
687+
} else {
688+
// This check always fails if we are here.
689+
RUNTIME_CHECK(handler, direction_ != Direction::Output);
690+
}
681691
} else { // Access::Stream or input
682692
leftTabLimit.reset();
683693
++currentRecordNumber;
@@ -695,6 +705,12 @@ void ExternalFileUnit::DoEndfile(IoErrorHandler &handler) {
695705
anyWriteSinceLastPositioning_ = false;
696706
}
697707

708+
template void ExternalFileUnit::DoEndfile(IoErrorHandler &handler);
709+
template void ExternalFileUnit::DoEndfile<false, Direction::Output>(
710+
IoErrorHandler &handler);
711+
template void ExternalFileUnit::DoEndfile<false, Direction::Input>(
712+
IoErrorHandler &handler);
713+
698714
void ExternalFileUnit::CommitWrites() {
699715
frameOffsetInFile_ +=
700716
recordOffsetInFrame_ + recordLength.value_or(furthestPositionInRecord);

flang/runtime/unit.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ class ExternalFileUnit : public ConnectionState,
204204
RT_API_ATTRS void BackspaceVariableFormattedRecord(IoErrorHandler &);
205205
RT_API_ATTRS bool SetVariableFormattedRecordLength();
206206
RT_API_ATTRS void DoImpliedEndfile(IoErrorHandler &);
207+
template <bool ANY_DIR = true, Direction DIR = Direction::Output>
207208
RT_API_ATTRS void DoEndfile(IoErrorHandler &);
208209
RT_API_ATTRS void CommitWrites();
209210
RT_API_ATTRS bool CheckDirectAccess(IoErrorHandler &);

0 commit comments

Comments
 (0)