Skip to content

Commit c4741dc

Browse files
committed
[flang][runtime] Fix fixed-width field internal wide character input
There was some confusion about units (bytes vs characters) in the handling of the amount of input remaining in fixed-width formatted input fields. Clarify that any variable or parameter counting "remaining" space in a field in the I/O runtime is always in units of bytes, and make it so where it wasn't. Rename many local variables so that their units (characters or bytes) are more clear. Fixes the bug(s) in llvm-test-suite/Fortran/gfortran/regression/char4_iunit_2.f03, although the test still won't pass due to its dependence on gfortran's list-directed output spacing.
1 parent 8f6f5ec commit c4741dc

File tree

2 files changed

+64
-51
lines changed

2 files changed

+64
-51
lines changed

flang/runtime/edit-input.cpp

Lines changed: 53 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -894,48 +894,52 @@ static bool EditListDirectedCharacterInput(
894894
}
895895

896896
template <typename CHAR>
897-
bool EditCharacterInput(
898-
IoStatementState &io, const DataEdit &edit, CHAR *x, std::size_t length) {
897+
bool EditCharacterInput(IoStatementState &io, const DataEdit &edit, CHAR *x,
898+
std::size_t lengthChars) {
899899
switch (edit.descriptor) {
900900
case DataEdit::ListDirected:
901-
return EditListDirectedCharacterInput(io, x, length, edit);
901+
return EditListDirectedCharacterInput(io, x, lengthChars, edit);
902902
case 'A':
903903
case 'G':
904904
break;
905905
case 'B':
906-
return EditBOZInput<1>(io, edit, x, length * sizeof *x);
906+
return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x);
907907
case 'O':
908-
return EditBOZInput<3>(io, edit, x, length * sizeof *x);
908+
return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x);
909909
case 'Z':
910-
return EditBOZInput<4>(io, edit, x, length * sizeof *x);
910+
return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x);
911911
default:
912912
io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
913913
"Data edit descriptor '%c' may not be used with a CHARACTER data item",
914914
edit.descriptor);
915915
return false;
916916
}
917917
const ConnectionState &connection{io.GetConnectionState()};
918-
std::size_t remaining{length};
918+
std::size_t remainingChars{lengthChars};
919+
// Skip leading characters.
920+
// Their bytes don't count towards INQUIRE(IOLENGTH=).
921+
std::size_t skipChars{0};
919922
if (edit.width && *edit.width > 0) {
920-
remaining = *edit.width;
923+
remainingChars = *edit.width;
924+
if (remainingChars > lengthChars) {
925+
skipChars = remainingChars - lengthChars;
926+
}
921927
}
922928
// When the field is wider than the variable, we drop the leading
923929
// characters. When the variable is wider than the field, there can be
924930
// trailing padding or an EOR condition.
925931
const char *input{nullptr};
926-
std::size_t ready{0};
927-
// Skip leading bytes.
928-
// These bytes don't count towards INQUIRE(IOLENGTH=).
929-
std::size_t skip{remaining > length ? remaining - length : 0};
932+
std::size_t readyBytes{0};
930933
// Transfer payload bytes; these do count.
931-
while (remaining > 0) {
932-
if (ready == 0) {
933-
ready = io.GetNextInputBytes(input);
934-
if (ready == 0 || (ready < remaining && edit.modes.nonAdvancing)) {
935-
if (io.CheckForEndOfRecord(ready)) {
936-
if (ready == 0) {
934+
while (remainingChars > 0) {
935+
if (readyBytes == 0) {
936+
readyBytes = io.GetNextInputBytes(input);
937+
if (readyBytes == 0 ||
938+
(readyBytes < remainingChars && edit.modes.nonAdvancing)) {
939+
if (io.CheckForEndOfRecord(readyBytes)) {
940+
if (readyBytes == 0) {
937941
// PAD='YES' and no more data
938-
std::fill_n(x, length, ' ');
942+
std::fill_n(x, lengthChars, ' ');
939943
return !io.GetIoErrorHandler().InError();
940944
} else {
941945
// Do partial read(s) then pad on last iteration
@@ -945,63 +949,64 @@ bool EditCharacterInput(
945949
}
946950
}
947951
}
948-
std::size_t chunk;
949-
bool skipping{skip > 0};
952+
std::size_t chunkBytes;
953+
std::size_t chunkChars{1};
954+
bool skipping{skipChars > 0};
950955
if (connection.isUTF8) {
951-
chunk = MeasureUTF8Bytes(*input);
956+
chunkBytes = MeasureUTF8Bytes(*input);
952957
if (skipping) {
953-
--skip;
958+
--skipChars;
954959
} else if (auto ucs{DecodeUTF8(input)}) {
955960
*x++ = *ucs;
956-
--length;
957-
} else if (chunk == 0) {
961+
--lengthChars;
962+
} else if (chunkBytes == 0) {
958963
// error recovery: skip bad encoding
959-
chunk = 1;
964+
chunkBytes = 1;
960965
}
961-
--remaining;
962966
} else if (connection.internalIoCharKind > 1) {
963967
// Reading from non-default character internal unit
964-
chunk = connection.internalIoCharKind;
968+
chunkBytes = connection.internalIoCharKind;
965969
if (skipping) {
966-
--skip;
970+
--skipChars;
967971
} else {
968972
char32_t buffer{0};
969-
std::memcpy(&buffer, input, chunk);
973+
std::memcpy(&buffer, input, chunkBytes);
970974
*x++ = buffer;
971-
--length;
975+
--lengthChars;
972976
}
973-
--remaining;
974977
} else if constexpr (sizeof *x > 1) {
975978
// Read single byte with expansion into multi-byte CHARACTER
976-
chunk = 1;
979+
chunkBytes = 1;
977980
if (skipping) {
978-
--skip;
981+
--skipChars;
979982
} else {
980983
*x++ = static_cast<unsigned char>(*input);
981-
--length;
984+
--lengthChars;
982985
}
983-
--remaining;
984986
} else { // single bytes -> default CHARACTER
985987
if (skipping) {
986-
chunk = std::min<std::size_t>(skip, ready);
987-
skip -= chunk;
988+
chunkBytes = std::min<std::size_t>(skipChars, readyBytes);
989+
chunkChars = chunkBytes;
990+
skipChars -= chunkChars;
988991
} else {
989-
chunk = std::min<std::size_t>(remaining, ready);
990-
std::memcpy(x, input, chunk);
991-
x += chunk;
992-
length -= chunk;
992+
chunkBytes = std::min<std::size_t>(remainingChars, readyBytes);
993+
chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes);
994+
chunkChars = chunkBytes;
995+
std::memcpy(x, input, chunkBytes);
996+
x += chunkBytes;
997+
lengthChars -= chunkChars;
993998
}
994-
remaining -= chunk;
995999
}
996-
input += chunk;
1000+
input += chunkBytes;
1001+
remainingChars -= chunkChars;
9971002
if (!skipping) {
998-
io.GotChar(chunk);
1003+
io.GotChar(chunkBytes);
9991004
}
1000-
io.HandleRelativePosition(chunk);
1001-
ready -= chunk;
1005+
io.HandleRelativePosition(chunkBytes);
1006+
readyBytes -= chunkBytes;
10021007
}
10031008
// Pad the remainder of the input variable, if any.
1004-
std::fill_n(x, length, ' ');
1009+
std::fill_n(x, lengthChars, ' ');
10051010
return CheckCompleteListDirectedField(io, edit);
10061011
}
10071012

flang/runtime/io-stmt.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ class IoStatementState {
9292
std::size_t GetNextInputBytes(const char *&);
9393
bool AdvanceRecord(int = 1);
9494
void BackspaceRecord();
95-
void HandleRelativePosition(std::int64_t);
96-
void HandleAbsolutePosition(std::int64_t); // for r* in list I/O
95+
void HandleRelativePosition(std::int64_t byteOffset);
96+
void HandleAbsolutePosition(std::int64_t byteOffset); // for r* in list I/O
9797
std::optional<DataEdit> GetNextDataEdit(int maxRepeat = 1);
9898
ExternalFileUnit *GetExternalFileUnit() const; // null if internal unit
9999
bool BeginReadingRecord();
@@ -124,7 +124,11 @@ class IoStatementState {
124124
// Vacant after the end of the current record
125125
std::optional<char32_t> GetCurrentChar(std::size_t &byteCount);
126126

127-
// For fixed-width fields, return the number of remaining characters.
127+
// The "remaining" arguments to CueUpInput(), SkipSpaces(), & NextInField()
128+
// are always in units of bytes, not characters; the distinction matters
129+
// for internal input from CHARACTER(KIND=2 and 4).
130+
131+
// For fixed-width fields, return the number of remaining bytes.
128132
// Skip over leading blanks.
129133
std::optional<int> CueUpInput(const DataEdit &edit) {
130134
std::optional<int> remaining;
@@ -134,6 +138,10 @@ class IoStatementState {
134138
} else {
135139
if (edit.width.value_or(0) > 0) {
136140
remaining = *edit.width;
141+
if (int bytesPerChar{GetConnectionState().internalIoCharKind};
142+
bytesPerChar > 1) {
143+
*remaining *= bytesPerChar;
144+
}
137145
}
138146
SkipSpaces(remaining);
139147
}

0 commit comments

Comments
 (0)