Skip to content

Commit 353d56d

Browse files
authored
[flang][runtime] Fix fixed-width field internal wide character input (#74683)
There was some confusion about units (bytes vs characters) in the handling of the amount of input remaining in fixed-width formatted input fields. Clarify that any variable or parameter counting "remaining" space in a field in the I/O runtime is always in units of bytes, and make it so where it wasn't. Fixes the bug(s) in llvm-test-suite/Fortran/gfortran/regression/char4_iunit_2.f03, although the test still won't pass due to its dependence on gfortran's list-directed output spacing.
1 parent 4f9cb79 commit 353d56d

File tree

2 files changed

+64
-51
lines changed

2 files changed

+64
-51
lines changed

flang/runtime/edit-input.cpp

Lines changed: 53 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -916,48 +916,52 @@ static bool EditListDirectedCharacterInput(
916916
}
917917

918918
template <typename CHAR>
919-
bool EditCharacterInput(
920-
IoStatementState &io, const DataEdit &edit, CHAR *x, std::size_t length) {
919+
bool EditCharacterInput(IoStatementState &io, const DataEdit &edit, CHAR *x,
920+
std::size_t lengthChars) {
921921
switch (edit.descriptor) {
922922
case DataEdit::ListDirected:
923-
return EditListDirectedCharacterInput(io, x, length, edit);
923+
return EditListDirectedCharacterInput(io, x, lengthChars, edit);
924924
case 'A':
925925
case 'G':
926926
break;
927927
case 'B':
928-
return EditBOZInput<1>(io, edit, x, length * sizeof *x);
928+
return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x);
929929
case 'O':
930-
return EditBOZInput<3>(io, edit, x, length * sizeof *x);
930+
return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x);
931931
case 'Z':
932-
return EditBOZInput<4>(io, edit, x, length * sizeof *x);
932+
return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x);
933933
default:
934934
io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
935935
"Data edit descriptor '%c' may not be used with a CHARACTER data item",
936936
edit.descriptor);
937937
return false;
938938
}
939939
const ConnectionState &connection{io.GetConnectionState()};
940-
std::size_t remaining{length};
940+
std::size_t remainingChars{lengthChars};
941+
// Skip leading characters.
942+
// Their bytes don't count towards INQUIRE(IOLENGTH=).
943+
std::size_t skipChars{0};
941944
if (edit.width && *edit.width > 0) {
942-
remaining = *edit.width;
945+
remainingChars = *edit.width;
946+
if (remainingChars > lengthChars) {
947+
skipChars = remainingChars - lengthChars;
948+
}
943949
}
944950
// When the field is wider than the variable, we drop the leading
945951
// characters. When the variable is wider than the field, there can be
946952
// trailing padding or an EOR condition.
947953
const char *input{nullptr};
948-
std::size_t ready{0};
949-
// Skip leading bytes.
950-
// These bytes don't count towards INQUIRE(IOLENGTH=).
951-
std::size_t skip{remaining > length ? remaining - length : 0};
954+
std::size_t readyBytes{0};
952955
// Transfer payload bytes; these do count.
953-
while (remaining > 0) {
954-
if (ready == 0) {
955-
ready = io.GetNextInputBytes(input);
956-
if (ready == 0 || (ready < remaining && edit.modes.nonAdvancing)) {
957-
if (io.CheckForEndOfRecord(ready)) {
958-
if (ready == 0) {
956+
while (remainingChars > 0) {
957+
if (readyBytes == 0) {
958+
readyBytes = io.GetNextInputBytes(input);
959+
if (readyBytes == 0 ||
960+
(readyBytes < remainingChars && edit.modes.nonAdvancing)) {
961+
if (io.CheckForEndOfRecord(readyBytes)) {
962+
if (readyBytes == 0) {
959963
// PAD='YES' and no more data
960-
std::fill_n(x, length, ' ');
964+
std::fill_n(x, lengthChars, ' ');
961965
return !io.GetIoErrorHandler().InError();
962966
} else {
963967
// Do partial read(s) then pad on last iteration
@@ -967,63 +971,64 @@ bool EditCharacterInput(
967971
}
968972
}
969973
}
970-
std::size_t chunk;
971-
bool skipping{skip > 0};
974+
std::size_t chunkBytes;
975+
std::size_t chunkChars{1};
976+
bool skipping{skipChars > 0};
972977
if (connection.isUTF8) {
973-
chunk = MeasureUTF8Bytes(*input);
978+
chunkBytes = MeasureUTF8Bytes(*input);
974979
if (skipping) {
975-
--skip;
980+
--skipChars;
976981
} else if (auto ucs{DecodeUTF8(input)}) {
977982
*x++ = *ucs;
978-
--length;
979-
} else if (chunk == 0) {
983+
--lengthChars;
984+
} else if (chunkBytes == 0) {
980985
// error recovery: skip bad encoding
981-
chunk = 1;
986+
chunkBytes = 1;
982987
}
983-
--remaining;
984988
} else if (connection.internalIoCharKind > 1) {
985989
// Reading from non-default character internal unit
986-
chunk = connection.internalIoCharKind;
990+
chunkBytes = connection.internalIoCharKind;
987991
if (skipping) {
988-
--skip;
992+
--skipChars;
989993
} else {
990994
char32_t buffer{0};
991-
std::memcpy(&buffer, input, chunk);
995+
std::memcpy(&buffer, input, chunkBytes);
992996
*x++ = buffer;
993-
--length;
997+
--lengthChars;
994998
}
995-
--remaining;
996999
} else if constexpr (sizeof *x > 1) {
9971000
// Read single byte with expansion into multi-byte CHARACTER
998-
chunk = 1;
1001+
chunkBytes = 1;
9991002
if (skipping) {
1000-
--skip;
1003+
--skipChars;
10011004
} else {
10021005
*x++ = static_cast<unsigned char>(*input);
1003-
--length;
1006+
--lengthChars;
10041007
}
1005-
--remaining;
10061008
} else { // single bytes -> default CHARACTER
10071009
if (skipping) {
1008-
chunk = std::min<std::size_t>(skip, ready);
1009-
skip -= chunk;
1010+
chunkBytes = std::min<std::size_t>(skipChars, readyBytes);
1011+
chunkChars = chunkBytes;
1012+
skipChars -= chunkChars;
10101013
} else {
1011-
chunk = std::min<std::size_t>(remaining, ready);
1012-
std::memcpy(x, input, chunk);
1013-
x += chunk;
1014-
length -= chunk;
1014+
chunkBytes = std::min<std::size_t>(remainingChars, readyBytes);
1015+
chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes);
1016+
chunkChars = chunkBytes;
1017+
std::memcpy(x, input, chunkBytes);
1018+
x += chunkBytes;
1019+
lengthChars -= chunkChars;
10151020
}
1016-
remaining -= chunk;
10171021
}
1018-
input += chunk;
1022+
input += chunkBytes;
1023+
remainingChars -= chunkChars;
10191024
if (!skipping) {
1020-
io.GotChar(chunk);
1025+
io.GotChar(chunkBytes);
10211026
}
1022-
io.HandleRelativePosition(chunk);
1023-
ready -= chunk;
1027+
io.HandleRelativePosition(chunkBytes);
1028+
readyBytes -= chunkBytes;
10241029
}
10251030
// Pad the remainder of the input variable, if any.
1026-
std::fill_n(x, length, ' ');
1031+
std::fill_n(x, lengthChars, ' ');
10271032
return CheckCompleteListDirectedField(io, edit);
10281033
}
10291034

flang/runtime/io-stmt.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ class IoStatementState {
9292
std::size_t GetNextInputBytes(const char *&);
9393
bool AdvanceRecord(int = 1);
9494
void BackspaceRecord();
95-
void HandleRelativePosition(std::int64_t);
96-
void HandleAbsolutePosition(std::int64_t); // for r* in list I/O
95+
void HandleRelativePosition(std::int64_t byteOffset);
96+
void HandleAbsolutePosition(std::int64_t byteOffset); // for r* in list I/O
9797
std::optional<DataEdit> GetNextDataEdit(int maxRepeat = 1);
9898
ExternalFileUnit *GetExternalFileUnit() const; // null if internal unit
9999
bool BeginReadingRecord();
@@ -124,7 +124,11 @@ class IoStatementState {
124124
// Vacant after the end of the current record
125125
std::optional<char32_t> GetCurrentChar(std::size_t &byteCount);
126126

127-
// For fixed-width fields, return the number of remaining characters.
127+
// The "remaining" arguments to CueUpInput(), SkipSpaces(), & NextInField()
128+
// are always in units of bytes, not characters; the distinction matters
129+
// for internal input from CHARACTER(KIND=2 and 4).
130+
131+
// For fixed-width fields, return the number of remaining bytes.
128132
// Skip over leading blanks.
129133
std::optional<int> CueUpInput(const DataEdit &edit) {
130134
std::optional<int> remaining;
@@ -134,6 +138,10 @@ class IoStatementState {
134138
} else {
135139
if (edit.width.value_or(0) > 0) {
136140
remaining = *edit.width;
141+
if (int bytesPerChar{GetConnectionState().internalIoCharKind};
142+
bytesPerChar > 1) {
143+
*remaining *= bytesPerChar;
144+
}
137145
}
138146
SkipSpaces(remaining);
139147
}

0 commit comments

Comments
 (0)