Skip to content

[flang][runtime] Fix fixed-width field internal wide character input #74683

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 53 additions & 48 deletions flang/runtime/edit-input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -894,48 +894,52 @@ static bool EditListDirectedCharacterInput(
}

template <typename CHAR>
bool EditCharacterInput(
IoStatementState &io, const DataEdit &edit, CHAR *x, std::size_t length) {
bool EditCharacterInput(IoStatementState &io, const DataEdit &edit, CHAR *x,
std::size_t lengthChars) {
switch (edit.descriptor) {
case DataEdit::ListDirected:
return EditListDirectedCharacterInput(io, x, length, edit);
return EditListDirectedCharacterInput(io, x, lengthChars, edit);
case 'A':
case 'G':
break;
case 'B':
return EditBOZInput<1>(io, edit, x, length * sizeof *x);
return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x);
case 'O':
return EditBOZInput<3>(io, edit, x, length * sizeof *x);
return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x);
case 'Z':
return EditBOZInput<4>(io, edit, x, length * sizeof *x);
return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x);
default:
io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
"Data edit descriptor '%c' may not be used with a CHARACTER data item",
edit.descriptor);
return false;
}
const ConnectionState &connection{io.GetConnectionState()};
std::size_t remaining{length};
std::size_t remainingChars{lengthChars};
// Skip leading characters.
// Their bytes don't count towards INQUIRE(IOLENGTH=).
std::size_t skipChars{0};
if (edit.width && *edit.width > 0) {
remaining = *edit.width;
remainingChars = *edit.width;
if (remainingChars > lengthChars) {
skipChars = remainingChars - lengthChars;
}
}
// When the field is wider than the variable, we drop the leading
// characters. When the variable is wider than the field, there can be
// trailing padding or an EOR condition.
const char *input{nullptr};
std::size_t ready{0};
// Skip leading bytes.
// These bytes don't count towards INQUIRE(IOLENGTH=).
std::size_t skip{remaining > length ? remaining - length : 0};
std::size_t readyBytes{0};
// Transfer payload bytes; these do count.
while (remaining > 0) {
if (ready == 0) {
ready = io.GetNextInputBytes(input);
if (ready == 0 || (ready < remaining && edit.modes.nonAdvancing)) {
if (io.CheckForEndOfRecord(ready)) {
if (ready == 0) {
while (remainingChars > 0) {
if (readyBytes == 0) {
readyBytes = io.GetNextInputBytes(input);
if (readyBytes == 0 ||
(readyBytes < remainingChars && edit.modes.nonAdvancing)) {
if (io.CheckForEndOfRecord(readyBytes)) {
if (readyBytes == 0) {
// PAD='YES' and no more data
std::fill_n(x, length, ' ');
std::fill_n(x, lengthChars, ' ');
return !io.GetIoErrorHandler().InError();
} else {
// Do partial read(s) then pad on last iteration
Expand All @@ -945,63 +949,64 @@ bool EditCharacterInput(
}
}
}
std::size_t chunk;
bool skipping{skip > 0};
std::size_t chunkBytes;
std::size_t chunkChars{1};
bool skipping{skipChars > 0};
if (connection.isUTF8) {
chunk = MeasureUTF8Bytes(*input);
chunkBytes = MeasureUTF8Bytes(*input);
if (skipping) {
--skip;
--skipChars;
} else if (auto ucs{DecodeUTF8(input)}) {
*x++ = *ucs;
--length;
} else if (chunk == 0) {
--lengthChars;
} else if (chunkBytes == 0) {
// error recovery: skip bad encoding
chunk = 1;
chunkBytes = 1;
}
--remaining;
} else if (connection.internalIoCharKind > 1) {
// Reading from non-default character internal unit
chunk = connection.internalIoCharKind;
chunkBytes = connection.internalIoCharKind;
if (skipping) {
--skip;
--skipChars;
} else {
char32_t buffer{0};
std::memcpy(&buffer, input, chunk);
std::memcpy(&buffer, input, chunkBytes);
*x++ = buffer;
--length;
--lengthChars;
}
--remaining;
} else if constexpr (sizeof *x > 1) {
// Read single byte with expansion into multi-byte CHARACTER
chunk = 1;
chunkBytes = 1;
if (skipping) {
--skip;
--skipChars;
} else {
*x++ = static_cast<unsigned char>(*input);
--length;
--lengthChars;
}
--remaining;
} else { // single bytes -> default CHARACTER
if (skipping) {
chunk = std::min<std::size_t>(skip, ready);
skip -= chunk;
chunkBytes = std::min<std::size_t>(skipChars, readyBytes);
chunkChars = chunkBytes;
skipChars -= chunkChars;
} else {
chunk = std::min<std::size_t>(remaining, ready);
std::memcpy(x, input, chunk);
x += chunk;
length -= chunk;
chunkBytes = std::min<std::size_t>(remainingChars, readyBytes);
chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes);
chunkChars = chunkBytes;
std::memcpy(x, input, chunkBytes);
x += chunkBytes;
lengthChars -= chunkChars;
}
remaining -= chunk;
}
input += chunk;
input += chunkBytes;
remainingChars -= chunkChars;
if (!skipping) {
io.GotChar(chunk);
io.GotChar(chunkBytes);
}
io.HandleRelativePosition(chunk);
ready -= chunk;
io.HandleRelativePosition(chunkBytes);
readyBytes -= chunkBytes;
}
// Pad the remainder of the input variable, if any.
std::fill_n(x, length, ' ');
std::fill_n(x, lengthChars, ' ');
return CheckCompleteListDirectedField(io, edit);
}

Expand Down
14 changes: 11 additions & 3 deletions flang/runtime/io-stmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ class IoStatementState {
std::size_t GetNextInputBytes(const char *&);
bool AdvanceRecord(int = 1);
void BackspaceRecord();
void HandleRelativePosition(std::int64_t);
void HandleAbsolutePosition(std::int64_t); // for r* in list I/O
void HandleRelativePosition(std::int64_t byteOffset);
void HandleAbsolutePosition(std::int64_t byteOffset); // for r* in list I/O
std::optional<DataEdit> GetNextDataEdit(int maxRepeat = 1);
ExternalFileUnit *GetExternalFileUnit() const; // null if internal unit
bool BeginReadingRecord();
Expand Down Expand Up @@ -124,7 +124,11 @@ class IoStatementState {
// Vacant after the end of the current record
std::optional<char32_t> GetCurrentChar(std::size_t &byteCount);

// For fixed-width fields, return the number of remaining characters.
// The "remaining" arguments to CueUpInput(), SkipSpaces(), & NextInField()
// are always in units of bytes, not characters; the distinction matters
// for internal input from CHARACTER(KIND=2 and 4).

// For fixed-width fields, return the number of remaining bytes.
// Skip over leading blanks.
std::optional<int> CueUpInput(const DataEdit &edit) {
std::optional<int> remaining;
Expand All @@ -134,6 +138,10 @@ class IoStatementState {
} else {
if (edit.width.value_or(0) > 0) {
remaining = *edit.width;
if (int bytesPerChar{GetConnectionState().internalIoCharKind};
bytesPerChar > 1) {
*remaining *= bytesPerChar;
}
}
SkipSpaces(remaining);
}
Expand Down