Skip to content

[flang][runtime] Formatted input optimizations #134715

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions flang-rt/include/flang-rt/runtime/connection.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,36 @@ struct ConnectionAttributes {
};

struct ConnectionState : public ConnectionAttributes {
RT_API_ATTRS bool
IsAtEOF() const; // true when read has hit EOF or endfile record
RT_API_ATTRS bool
IsAfterEndfile() const; // true after ENDFILE until repositioned
RT_API_ATTRS bool IsAtEOF() const {
// true when read has hit EOF or endfile record
return endfileRecordNumber && currentRecordNumber >= *endfileRecordNumber;
}
RT_API_ATTRS bool IsAfterEndfile() const {
// true after ENDFILE until repositioned
return endfileRecordNumber && currentRecordNumber > *endfileRecordNumber;
}

// All positions and measurements are always in units of bytes,
// not characters. Multi-byte character encodings are possible in
// both internal I/O (when the character kind of the variable is 2 or 4)
// and external formatted I/O (when the encoding is UTF-8).
RT_API_ATTRS std::size_t RemainingSpaceInRecord() const;
RT_API_ATTRS bool NeedAdvance(std::size_t) const;
RT_API_ATTRS void HandleAbsolutePosition(std::int64_t);
RT_API_ATTRS void HandleRelativePosition(std::int64_t);
RT_API_ATTRS std::size_t RemainingSpaceInRecord() const {
auto recl{recordLength.value_or(openRecl.value_or(
executionEnvironment.listDirectedOutputLineLengthLimit))};
return positionInRecord >= recl ? 0 : recl - positionInRecord;
}
RT_API_ATTRS bool NeedAdvance(std::size_t width) const {
return positionInRecord > 0 && width > RemainingSpaceInRecord();
}
RT_API_ATTRS void HandleAbsolutePosition(std::int64_t n) {
positionInRecord = (n < 0 ? 0 : n) + leftTabLimit.value_or(0);
}
RT_API_ATTRS void HandleRelativePosition(std::int64_t n) {
auto least{leftTabLimit.value_or(0)};
auto newPos{positionInRecord + n};
positionInRecord = newPos < least ? least : newPos;
;
}

RT_API_ATTRS void BeginRecord() {
positionInRecord = 0;
Expand Down
117 changes: 103 additions & 14 deletions flang-rt/include/flang-rt/runtime/io-stmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,20 +130,94 @@ class IoStatementState {
}

// Vacant after the end of the current record
RT_API_ATTRS Fortran::common::optional<char32_t> GetCurrentChar(
RT_API_ATTRS Fortran::common::optional<char32_t> GetCurrentCharSlow(
std::size_t &byteCount);

// For faster formatted input editing, this structure can be built by
// GetUpcomingFastAsciiField() and used to save significant time in
// GetCurrentChar, NextInField() and other input utilities when the input
// is buffered, does not require UTF-8 conversion, and comprises only
// single byte characters.
class FastAsciiField {
public:
RT_API_ATTRS FastAsciiField(ConnectionState &connection)
: connection_{connection} {}
RT_API_ATTRS FastAsciiField(
ConnectionState &connection, const char *start, std::size_t bytes)
: connection_{connection}, at_{start}, limit_{start + bytes} {
CheckForAsterisk();
}
RT_API_ATTRS ConnectionState &connection() { return connection_; }
RT_API_ATTRS std::size_t got() const { return got_; }

RT_API_ATTRS bool MustUseSlowPath() const { return at_ == nullptr; }

RT_API_ATTRS Fortran::common::optional<char32_t> Next() const {
if (at_ && at_ < limit_) {
return *at_;
} else {
return std::nullopt;
}
}
RT_API_ATTRS void NextRecord(IoStatementState &io) {
if (at_) {
if (std::size_t bytes{io.GetNextInputBytes(at_)}) {
limit_ = at_ + bytes;
CheckForAsterisk();
} else {
at_ = limit_ = nullptr;
}
}
}
RT_API_ATTRS void Advance(int gotten, std::size_t bytes) {
if (at_ && at_ < limit_) {
++at_;
got_ += gotten;
}
connection_.HandleRelativePosition(bytes);
}
RT_API_ATTRS bool MightHaveAsterisk() const { return !at_ || hasAsterisk_; }

private:
RT_API_ATTRS void CheckForAsterisk() {
hasAsterisk_ =
at_ && at_ < limit_ && std::memchr(at_, '*', limit_ - at_) != nullptr;
}

ConnectionState &connection_;
const char *at_{nullptr};
const char *limit_{nullptr};
std::size_t got_{0}; // for READ(..., SIZE=)
bool hasAsterisk_{false};
};

RT_API_ATTRS FastAsciiField GetUpcomingFastAsciiField();

RT_API_ATTRS Fortran::common::optional<char32_t> GetCurrentChar(
std::size_t &byteCount, FastAsciiField *field = nullptr) {
if (field) {
if (auto ch{field->Next()}) {
byteCount = ch ? 1 : 0;
return ch;
} else if (!field->MustUseSlowPath()) {
return std::nullopt;
}
}
return GetCurrentCharSlow(byteCount);
}

// The result of CueUpInput() and the "remaining" arguments to SkipSpaces()
// and NextInField() are always in units of bytes, not characters; the
// distinction matters for internal input from CHARACTER(KIND=2 and 4).

// For fixed-width fields, return the number of remaining bytes.
// Skip over leading blanks.
RT_API_ATTRS Fortran::common::optional<int> CueUpInput(const DataEdit &edit) {
RT_API_ATTRS Fortran::common::optional<int> CueUpInput(
const DataEdit &edit, FastAsciiField *fastField = nullptr) {
Fortran::common::optional<int> remaining;
if (edit.IsListDirected()) {
std::size_t byteCount{0};
GetNextNonBlank(byteCount);
GetNextNonBlank(byteCount, fastField);
} else {
if (edit.width.value_or(0) > 0) {
remaining = *edit.width;
Expand All @@ -152,16 +226,17 @@ class IoStatementState {
*remaining *= bytesPerChar;
}
}
SkipSpaces(remaining);
SkipSpaces(remaining, fastField);
}
return remaining;
}

RT_API_ATTRS Fortran::common::optional<char32_t> SkipSpaces(
Fortran::common::optional<int> &remaining) {
Fortran::common::optional<int> &remaining,
FastAsciiField *fastField = nullptr) {
while (!remaining || *remaining > 0) {
std::size_t byteCount{0};
if (auto ch{GetCurrentChar(byteCount)}) {
if (auto ch{GetCurrentChar(byteCount, fastField)}) {
if (*ch != ' ' && *ch != '\t') {
return ch;
}
Expand All @@ -172,7 +247,11 @@ class IoStatementState {
GotChar(byteCount);
*remaining -= byteCount;
}
HandleRelativePosition(byteCount);
if (fastField) {
fastField->Advance(0, byteCount);
} else {
HandleRelativePosition(byteCount);
}
} else {
break;
}
Expand All @@ -183,25 +262,35 @@ class IoStatementState {
// Acquires the next input character, respecting any applicable field width
// or separator character.
RT_API_ATTRS Fortran::common::optional<char32_t> NextInField(
Fortran::common::optional<int> &remaining, const DataEdit &);
Fortran::common::optional<int> &remaining, const DataEdit &,
FastAsciiField *field = nullptr);

// Detect and signal any end-of-record condition after input.
// Returns true if at EOR and remaining input should be padded with blanks.
RT_API_ATTRS bool CheckForEndOfRecord(std::size_t afterReading);
RT_API_ATTRS bool CheckForEndOfRecord(
std::size_t afterReading, const ConnectionState &);

// Skips spaces, advances records, and ignores NAMELIST comments
RT_API_ATTRS Fortran::common::optional<char32_t> GetNextNonBlank(
std::size_t &byteCount) {
auto ch{GetCurrentChar(byteCount)};
std::size_t &byteCount, FastAsciiField *fastField = nullptr) {
auto ch{GetCurrentChar(byteCount, fastField)};
bool inNamelist{mutableModes().inNamelist};
while (!ch || *ch == ' ' || *ch == '\t' || *ch == '\n' ||
(inNamelist && *ch == '!')) {
if (ch && (*ch == ' ' || *ch == '\t' || *ch == '\n')) {
HandleRelativePosition(byteCount);
} else if (!AdvanceRecord()) {
if (fastField) {
fastField->Advance(0, byteCount);
} else {
HandleRelativePosition(byteCount);
}
} else if (AdvanceRecord()) {
if (fastField) {
fastField->NextRecord(*this);
}
} else {
return Fortran::common::nullopt;
}
ch = GetCurrentChar(byteCount);
ch = GetCurrentChar(byteCount, fastField);
}
return ch;
}
Expand Down
27 changes: 0 additions & 27 deletions flang-rt/lib/runtime/connection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,37 +9,10 @@
#include "flang-rt/runtime/connection.h"
#include "flang-rt/runtime/environment.h"
#include "flang-rt/runtime/io-stmt.h"
#include <algorithm>

namespace Fortran::runtime::io {
RT_OFFLOAD_API_GROUP_BEGIN

RT_API_ATTRS std::size_t ConnectionState::RemainingSpaceInRecord() const {
auto recl{recordLength.value_or(openRecl.value_or(
executionEnvironment.listDirectedOutputLineLengthLimit))};
return positionInRecord >= recl ? 0 : recl - positionInRecord;
}

RT_API_ATTRS bool ConnectionState::NeedAdvance(std::size_t width) const {
return positionInRecord > 0 && width > RemainingSpaceInRecord();
}

RT_API_ATTRS bool ConnectionState::IsAtEOF() const {
return endfileRecordNumber && currentRecordNumber >= *endfileRecordNumber;
}

RT_API_ATTRS bool ConnectionState::IsAfterEndfile() const {
return endfileRecordNumber && currentRecordNumber > *endfileRecordNumber;
}

RT_API_ATTRS void ConnectionState::HandleAbsolutePosition(std::int64_t n) {
positionInRecord = std::max(n, std::int64_t{0}) + leftTabLimit.value_or(0);
}

RT_API_ATTRS void ConnectionState::HandleRelativePosition(std::int64_t n) {
positionInRecord = std::max(leftTabLimit.value_or(0), positionInRecord + n);
}

SavedPosition::SavedPosition(IoStatementState &io) : io_{io} {
ConnectionState &conn{io_.GetConnectionState()};
saved_ = conn;
Expand Down
39 changes: 26 additions & 13 deletions flang-rt/lib/runtime/edit-input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,17 +169,18 @@ static inline RT_API_ATTRS char32_t GetRadixPointChar(const DataEdit &edit) {
// Prepares input from a field, and returns the sign, if any, else '\0'.
static RT_API_ATTRS char ScanNumericPrefix(IoStatementState &io,
const DataEdit &edit, Fortran::common::optional<char32_t> &next,
Fortran::common::optional<int> &remaining) {
remaining = io.CueUpInput(edit);
next = io.NextInField(remaining, edit);
Fortran::common::optional<int> &remaining,
IoStatementState::FastAsciiField *fastField = nullptr) {
remaining = io.CueUpInput(edit, fastField);
next = io.NextInField(remaining, edit, fastField);
char sign{'\0'};
if (next) {
if (*next == '-' || *next == '+') {
sign = *next;
if (!edit.IsListDirected()) {
io.SkipSpaces(remaining);
io.SkipSpaces(remaining, fastField);
}
next = io.NextInField(remaining, edit);
next = io.NextInField(remaining, edit, fastField);
}
}
return sign;
Expand Down Expand Up @@ -213,17 +214,18 @@ RT_API_ATTRS bool EditIntegerInput(IoStatementState &io, const DataEdit &edit,
}
Fortran::common::optional<int> remaining;
Fortran::common::optional<char32_t> next;
char sign{ScanNumericPrefix(io, edit, next, remaining)};
auto fastField{io.GetUpcomingFastAsciiField()};
char sign{ScanNumericPrefix(io, edit, next, remaining, &fastField)};
if (sign == '-' && !isSigned) {
io.GetIoErrorHandler().SignalError("Negative sign in UNSIGNED input field");
return false;
}
common::UnsignedInt128 value{0};
common::uint128_t value{0};
bool any{!!sign};
bool overflow{false};
const char32_t comma{GetSeparatorChar(edit)};
static constexpr auto maxu128{~common::UnsignedInt128{0}};
for (; next; next = io.NextInField(remaining, edit)) {
static constexpr auto maxu128{~common::uint128_t{0}};
for (; next; next = io.NextInField(remaining, edit, &fastField)) {
char32_t ch{*next};
if (ch == ' ' || ch == '\t') {
if (edit.modes.editingFlags & blankZero) {
Expand All @@ -243,7 +245,7 @@ RT_API_ATTRS bool EditIntegerInput(IoStatementState &io, const DataEdit &edit,
// input, like a few other Fortran compilers do.
// TODO: also process exponents? Some compilers do, but they obviously
// can't just be ignored.
while ((next = io.NextInField(remaining, edit))) {
while ((next = io.NextInField(remaining, edit, &fastField))) {
if (*next < '0' || *next > '9') {
break;
}
Expand Down Expand Up @@ -271,7 +273,7 @@ RT_API_ATTRS bool EditIntegerInput(IoStatementState &io, const DataEdit &edit,
return false;
}
if (isSigned) {
auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)};
auto maxForKind{common::uint128_t{1} << ((8 * kind) - 1)};
overflow |= value >= maxForKind && (value > maxForKind || sign != '-');
} else {
auto maxForKind{maxu128 >> (((16 - kind) * 8) + (isSigned ? 1 : 0))};
Expand All @@ -287,7 +289,16 @@ RT_API_ATTRS bool EditIntegerInput(IoStatementState &io, const DataEdit &edit,
}
if (any || !io.GetIoErrorHandler().InError()) {
// The value is stored in the lower order bits on big endian platform.
// When memcpy, shift the value to the higher order bit.
// For memcpy, shift the value to the highest order bits.
#if USING_NATIVE_INT128_T
auto shft{static_cast<int>(sizeof value - kind)};
if (!isHostLittleEndian && shft >= 0) {
auto l{value << shft};
std::memcpy(n, &l, kind);
} else {
std::memcpy(n, &value, kind); // a blank field means zero
}
#else
auto shft{static_cast<int>(sizeof(value.low())) - kind};
// For kind==8 (i.e. shft==0), the value is stored in low_ in big endian.
if (!isHostLittleEndian && shft >= 0) {
Expand All @@ -296,6 +307,8 @@ RT_API_ATTRS bool EditIntegerInput(IoStatementState &io, const DataEdit &edit,
} else {
std::memcpy(n, &value, kind); // a blank field means zero
}
#endif
io.GotChar(fastField.got());
return true;
} else {
return false;
Expand Down Expand Up @@ -1070,7 +1083,7 @@ RT_API_ATTRS bool EditCharacterInput(IoStatementState &io, const DataEdit &edit,
readyBytes = io.GetNextInputBytes(input);
if (readyBytes == 0 ||
(readyBytes < remainingChars && edit.modes.nonAdvancing)) {
if (io.CheckForEndOfRecord(readyBytes)) {
if (io.CheckForEndOfRecord(readyBytes, connection)) {
if (readyBytes == 0) {
// PAD='YES' and no more data
Fortran::runtime::fill_n(x, lengthChars, ' ');
Expand Down
15 changes: 8 additions & 7 deletions flang-rt/lib/runtime/io-api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1057,14 +1057,15 @@ bool IODEF(InputDescriptor)(Cookie cookie, const Descriptor &descriptor) {
}

bool IODEF(InputInteger)(Cookie cookie, std::int64_t &n, int kind) {
if (!cookie->CheckFormattedStmtType<Direction::Input>("InputInteger")) {
return false;
IoStatementState &io{*cookie};
if (io.BeginReadingRecord()) {
if (auto edit{io.GetNextDataEdit()}) {
return edit->descriptor == DataEdit::ListDirectedNullValue ||
EditIntegerInput(io, *edit, reinterpret_cast<void *>(&n), kind,
/*isSigned=*/true);
}
}
StaticDescriptor<0> staticDescriptor;
Descriptor &descriptor{staticDescriptor.descriptor()};
descriptor.Establish(
TypeCategory::Integer, kind, reinterpret_cast<void *>(&n), 0);
return descr::DescriptorIO<Direction::Input>(*cookie, descriptor);
return false;
}

bool IODEF(InputReal32)(Cookie cookie, float &x) {
Expand Down
Loading