Skip to content

Commit 18fe012

Browse files
authored
[flang][runtime] Formatted input optimizations (llvm#134715)
Make some minor tweaks (inlining, caching) to the formatting input path to improve integer input in a SPEC code. (None of the I/O library has been tuned yet for performance, and there are some easy optimizations for common cases.) Input integer values are now calculated with native C/C++ 128-bit integers. A benchmark that only reads about 5M lines of three integer values each speeds up from over 8 seconds to under 3 in my environment with these changeds. If this works out, the code here can be used to optimize the formatted input paths for real and character data, too. Fixes llvm#134026.
1 parent 750d009 commit 18fe012

File tree

8 files changed

+219
-94
lines changed

8 files changed

+219
-94
lines changed

flang-rt/include/flang-rt/runtime/connection.h

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,19 +45,36 @@ struct ConnectionAttributes {
4545
};
4646

4747
struct ConnectionState : public ConnectionAttributes {
48-
RT_API_ATTRS bool
49-
IsAtEOF() const; // true when read has hit EOF or endfile record
50-
RT_API_ATTRS bool
51-
IsAfterEndfile() const; // true after ENDFILE until repositioned
48+
RT_API_ATTRS bool IsAtEOF() const {
49+
// true when read has hit EOF or endfile record
50+
return endfileRecordNumber && currentRecordNumber >= *endfileRecordNumber;
51+
}
52+
RT_API_ATTRS bool IsAfterEndfile() const {
53+
// true after ENDFILE until repositioned
54+
return endfileRecordNumber && currentRecordNumber > *endfileRecordNumber;
55+
}
5256

5357
// All positions and measurements are always in units of bytes,
5458
// not characters. Multi-byte character encodings are possible in
5559
// both internal I/O (when the character kind of the variable is 2 or 4)
5660
// and external formatted I/O (when the encoding is UTF-8).
57-
RT_API_ATTRS std::size_t RemainingSpaceInRecord() const;
58-
RT_API_ATTRS bool NeedAdvance(std::size_t) const;
59-
RT_API_ATTRS void HandleAbsolutePosition(std::int64_t);
60-
RT_API_ATTRS void HandleRelativePosition(std::int64_t);
61+
RT_API_ATTRS std::size_t RemainingSpaceInRecord() const {
62+
auto recl{recordLength.value_or(openRecl.value_or(
63+
executionEnvironment.listDirectedOutputLineLengthLimit))};
64+
return positionInRecord >= recl ? 0 : recl - positionInRecord;
65+
}
66+
RT_API_ATTRS bool NeedAdvance(std::size_t width) const {
67+
return positionInRecord > 0 && width > RemainingSpaceInRecord();
68+
}
69+
RT_API_ATTRS void HandleAbsolutePosition(std::int64_t n) {
70+
positionInRecord = (n < 0 ? 0 : n) + leftTabLimit.value_or(0);
71+
}
72+
RT_API_ATTRS void HandleRelativePosition(std::int64_t n) {
73+
auto least{leftTabLimit.value_or(0)};
74+
auto newPos{positionInRecord + n};
75+
positionInRecord = newPos < least ? least : newPos;
76+
;
77+
}
6178

6279
RT_API_ATTRS void BeginRecord() {
6380
positionInRecord = 0;

flang-rt/include/flang-rt/runtime/io-stmt.h

Lines changed: 103 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -130,20 +130,94 @@ class IoStatementState {
130130
}
131131

132132
// Vacant after the end of the current record
133-
RT_API_ATTRS Fortran::common::optional<char32_t> GetCurrentChar(
133+
RT_API_ATTRS Fortran::common::optional<char32_t> GetCurrentCharSlow(
134134
std::size_t &byteCount);
135135

136+
// For faster formatted input editing, this structure can be built by
137+
// GetUpcomingFastAsciiField() and used to save significant time in
138+
// GetCurrentChar, NextInField() and other input utilities when the input
139+
// is buffered, does not require UTF-8 conversion, and comprises only
140+
// single byte characters.
141+
class FastAsciiField {
142+
public:
143+
RT_API_ATTRS FastAsciiField(ConnectionState &connection)
144+
: connection_{connection} {}
145+
RT_API_ATTRS FastAsciiField(
146+
ConnectionState &connection, const char *start, std::size_t bytes)
147+
: connection_{connection}, at_{start}, limit_{start + bytes} {
148+
CheckForAsterisk();
149+
}
150+
RT_API_ATTRS ConnectionState &connection() { return connection_; }
151+
RT_API_ATTRS std::size_t got() const { return got_; }
152+
153+
RT_API_ATTRS bool MustUseSlowPath() const { return at_ == nullptr; }
154+
155+
RT_API_ATTRS Fortran::common::optional<char32_t> Next() const {
156+
if (at_ && at_ < limit_) {
157+
return *at_;
158+
} else {
159+
return std::nullopt;
160+
}
161+
}
162+
RT_API_ATTRS void NextRecord(IoStatementState &io) {
163+
if (at_) {
164+
if (std::size_t bytes{io.GetNextInputBytes(at_)}) {
165+
limit_ = at_ + bytes;
166+
CheckForAsterisk();
167+
} else {
168+
at_ = limit_ = nullptr;
169+
}
170+
}
171+
}
172+
RT_API_ATTRS void Advance(int gotten, std::size_t bytes) {
173+
if (at_ && at_ < limit_) {
174+
++at_;
175+
got_ += gotten;
176+
}
177+
connection_.HandleRelativePosition(bytes);
178+
}
179+
RT_API_ATTRS bool MightHaveAsterisk() const { return !at_ || hasAsterisk_; }
180+
181+
private:
182+
RT_API_ATTRS void CheckForAsterisk() {
183+
hasAsterisk_ =
184+
at_ && at_ < limit_ && std::memchr(at_, '*', limit_ - at_) != nullptr;
185+
}
186+
187+
ConnectionState &connection_;
188+
const char *at_{nullptr};
189+
const char *limit_{nullptr};
190+
std::size_t got_{0}; // for READ(..., SIZE=)
191+
bool hasAsterisk_{false};
192+
};
193+
194+
RT_API_ATTRS FastAsciiField GetUpcomingFastAsciiField();
195+
196+
RT_API_ATTRS Fortran::common::optional<char32_t> GetCurrentChar(
197+
std::size_t &byteCount, FastAsciiField *field = nullptr) {
198+
if (field) {
199+
if (auto ch{field->Next()}) {
200+
byteCount = ch ? 1 : 0;
201+
return ch;
202+
} else if (!field->MustUseSlowPath()) {
203+
return std::nullopt;
204+
}
205+
}
206+
return GetCurrentCharSlow(byteCount);
207+
}
208+
136209
// The result of CueUpInput() and the "remaining" arguments to SkipSpaces()
137210
// and NextInField() are always in units of bytes, not characters; the
138211
// distinction matters for internal input from CHARACTER(KIND=2 and 4).
139212

140213
// For fixed-width fields, return the number of remaining bytes.
141214
// Skip over leading blanks.
142-
RT_API_ATTRS Fortran::common::optional<int> CueUpInput(const DataEdit &edit) {
215+
RT_API_ATTRS Fortran::common::optional<int> CueUpInput(
216+
const DataEdit &edit, FastAsciiField *fastField = nullptr) {
143217
Fortran::common::optional<int> remaining;
144218
if (edit.IsListDirected()) {
145219
std::size_t byteCount{0};
146-
GetNextNonBlank(byteCount);
220+
GetNextNonBlank(byteCount, fastField);
147221
} else {
148222
if (edit.width.value_or(0) > 0) {
149223
remaining = *edit.width;
@@ -152,16 +226,17 @@ class IoStatementState {
152226
*remaining *= bytesPerChar;
153227
}
154228
}
155-
SkipSpaces(remaining);
229+
SkipSpaces(remaining, fastField);
156230
}
157231
return remaining;
158232
}
159233

160234
RT_API_ATTRS Fortran::common::optional<char32_t> SkipSpaces(
161-
Fortran::common::optional<int> &remaining) {
235+
Fortran::common::optional<int> &remaining,
236+
FastAsciiField *fastField = nullptr) {
162237
while (!remaining || *remaining > 0) {
163238
std::size_t byteCount{0};
164-
if (auto ch{GetCurrentChar(byteCount)}) {
239+
if (auto ch{GetCurrentChar(byteCount, fastField)}) {
165240
if (*ch != ' ' && *ch != '\t') {
166241
return ch;
167242
}
@@ -172,7 +247,11 @@ class IoStatementState {
172247
GotChar(byteCount);
173248
*remaining -= byteCount;
174249
}
175-
HandleRelativePosition(byteCount);
250+
if (fastField) {
251+
fastField->Advance(0, byteCount);
252+
} else {
253+
HandleRelativePosition(byteCount);
254+
}
176255
} else {
177256
break;
178257
}
@@ -183,25 +262,35 @@ class IoStatementState {
183262
// Acquires the next input character, respecting any applicable field width
184263
// or separator character.
185264
RT_API_ATTRS Fortran::common::optional<char32_t> NextInField(
186-
Fortran::common::optional<int> &remaining, const DataEdit &);
265+
Fortran::common::optional<int> &remaining, const DataEdit &,
266+
FastAsciiField *field = nullptr);
187267

188268
// Detect and signal any end-of-record condition after input.
189269
// Returns true if at EOR and remaining input should be padded with blanks.
190-
RT_API_ATTRS bool CheckForEndOfRecord(std::size_t afterReading);
270+
RT_API_ATTRS bool CheckForEndOfRecord(
271+
std::size_t afterReading, const ConnectionState &);
191272

192273
// Skips spaces, advances records, and ignores NAMELIST comments
193274
RT_API_ATTRS Fortran::common::optional<char32_t> GetNextNonBlank(
194-
std::size_t &byteCount) {
195-
auto ch{GetCurrentChar(byteCount)};
275+
std::size_t &byteCount, FastAsciiField *fastField = nullptr) {
276+
auto ch{GetCurrentChar(byteCount, fastField)};
196277
bool inNamelist{mutableModes().inNamelist};
197278
while (!ch || *ch == ' ' || *ch == '\t' || *ch == '\n' ||
198279
(inNamelist && *ch == '!')) {
199280
if (ch && (*ch == ' ' || *ch == '\t' || *ch == '\n')) {
200-
HandleRelativePosition(byteCount);
201-
} else if (!AdvanceRecord()) {
281+
if (fastField) {
282+
fastField->Advance(0, byteCount);
283+
} else {
284+
HandleRelativePosition(byteCount);
285+
}
286+
} else if (AdvanceRecord()) {
287+
if (fastField) {
288+
fastField->NextRecord(*this);
289+
}
290+
} else {
202291
return Fortran::common::nullopt;
203292
}
204-
ch = GetCurrentChar(byteCount);
293+
ch = GetCurrentChar(byteCount, fastField);
205294
}
206295
return ch;
207296
}

flang-rt/lib/runtime/connection.cpp

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,37 +9,10 @@
99
#include "flang-rt/runtime/connection.h"
1010
#include "flang-rt/runtime/environment.h"
1111
#include "flang-rt/runtime/io-stmt.h"
12-
#include <algorithm>
1312

1413
namespace Fortran::runtime::io {
1514
RT_OFFLOAD_API_GROUP_BEGIN
1615

17-
RT_API_ATTRS std::size_t ConnectionState::RemainingSpaceInRecord() const {
18-
auto recl{recordLength.value_or(openRecl.value_or(
19-
executionEnvironment.listDirectedOutputLineLengthLimit))};
20-
return positionInRecord >= recl ? 0 : recl - positionInRecord;
21-
}
22-
23-
RT_API_ATTRS bool ConnectionState::NeedAdvance(std::size_t width) const {
24-
return positionInRecord > 0 && width > RemainingSpaceInRecord();
25-
}
26-
27-
RT_API_ATTRS bool ConnectionState::IsAtEOF() const {
28-
return endfileRecordNumber && currentRecordNumber >= *endfileRecordNumber;
29-
}
30-
31-
RT_API_ATTRS bool ConnectionState::IsAfterEndfile() const {
32-
return endfileRecordNumber && currentRecordNumber > *endfileRecordNumber;
33-
}
34-
35-
RT_API_ATTRS void ConnectionState::HandleAbsolutePosition(std::int64_t n) {
36-
positionInRecord = std::max(n, std::int64_t{0}) + leftTabLimit.value_or(0);
37-
}
38-
39-
RT_API_ATTRS void ConnectionState::HandleRelativePosition(std::int64_t n) {
40-
positionInRecord = std::max(leftTabLimit.value_or(0), positionInRecord + n);
41-
}
42-
4316
SavedPosition::SavedPosition(IoStatementState &io) : io_{io} {
4417
ConnectionState &conn{io_.GetConnectionState()};
4518
saved_ = conn;

flang-rt/lib/runtime/edit-input.cpp

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -169,17 +169,18 @@ static inline RT_API_ATTRS char32_t GetRadixPointChar(const DataEdit &edit) {
169169
// Prepares input from a field, and returns the sign, if any, else '\0'.
170170
static RT_API_ATTRS char ScanNumericPrefix(IoStatementState &io,
171171
const DataEdit &edit, Fortran::common::optional<char32_t> &next,
172-
Fortran::common::optional<int> &remaining) {
173-
remaining = io.CueUpInput(edit);
174-
next = io.NextInField(remaining, edit);
172+
Fortran::common::optional<int> &remaining,
173+
IoStatementState::FastAsciiField *fastField = nullptr) {
174+
remaining = io.CueUpInput(edit, fastField);
175+
next = io.NextInField(remaining, edit, fastField);
175176
char sign{'\0'};
176177
if (next) {
177178
if (*next == '-' || *next == '+') {
178179
sign = *next;
179180
if (!edit.IsListDirected()) {
180-
io.SkipSpaces(remaining);
181+
io.SkipSpaces(remaining, fastField);
181182
}
182-
next = io.NextInField(remaining, edit);
183+
next = io.NextInField(remaining, edit, fastField);
183184
}
184185
}
185186
return sign;
@@ -213,17 +214,18 @@ RT_API_ATTRS bool EditIntegerInput(IoStatementState &io, const DataEdit &edit,
213214
}
214215
Fortran::common::optional<int> remaining;
215216
Fortran::common::optional<char32_t> next;
216-
char sign{ScanNumericPrefix(io, edit, next, remaining)};
217+
auto fastField{io.GetUpcomingFastAsciiField()};
218+
char sign{ScanNumericPrefix(io, edit, next, remaining, &fastField)};
217219
if (sign == '-' && !isSigned) {
218220
io.GetIoErrorHandler().SignalError("Negative sign in UNSIGNED input field");
219221
return false;
220222
}
221-
common::UnsignedInt128 value{0};
223+
common::uint128_t value{0};
222224
bool any{!!sign};
223225
bool overflow{false};
224226
const char32_t comma{GetSeparatorChar(edit)};
225-
static constexpr auto maxu128{~common::UnsignedInt128{0}};
226-
for (; next; next = io.NextInField(remaining, edit)) {
227+
static constexpr auto maxu128{~common::uint128_t{0}};
228+
for (; next; next = io.NextInField(remaining, edit, &fastField)) {
227229
char32_t ch{*next};
228230
if (ch == ' ' || ch == '\t') {
229231
if (edit.modes.editingFlags & blankZero) {
@@ -243,7 +245,7 @@ RT_API_ATTRS bool EditIntegerInput(IoStatementState &io, const DataEdit &edit,
243245
// input, like a few other Fortran compilers do.
244246
// TODO: also process exponents? Some compilers do, but they obviously
245247
// can't just be ignored.
246-
while ((next = io.NextInField(remaining, edit))) {
248+
while ((next = io.NextInField(remaining, edit, &fastField))) {
247249
if (*next < '0' || *next > '9') {
248250
break;
249251
}
@@ -271,7 +273,7 @@ RT_API_ATTRS bool EditIntegerInput(IoStatementState &io, const DataEdit &edit,
271273
return false;
272274
}
273275
if (isSigned) {
274-
auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)};
276+
auto maxForKind{common::uint128_t{1} << ((8 * kind) - 1)};
275277
overflow |= value >= maxForKind && (value > maxForKind || sign != '-');
276278
} else {
277279
auto maxForKind{maxu128 >> (((16 - kind) * 8) + (isSigned ? 1 : 0))};
@@ -287,7 +289,16 @@ RT_API_ATTRS bool EditIntegerInput(IoStatementState &io, const DataEdit &edit,
287289
}
288290
if (any || !io.GetIoErrorHandler().InError()) {
289291
// The value is stored in the lower order bits on big endian platform.
290-
// When memcpy, shift the value to the higher order bit.
292+
// For memcpy, shift the value to the highest order bits.
293+
#if USING_NATIVE_INT128_T
294+
auto shft{static_cast<int>(sizeof value - kind)};
295+
if (!isHostLittleEndian && shft >= 0) {
296+
auto l{value << shft};
297+
std::memcpy(n, &l, kind);
298+
} else {
299+
std::memcpy(n, &value, kind); // a blank field means zero
300+
}
301+
#else
291302
auto shft{static_cast<int>(sizeof(value.low())) - kind};
292303
// For kind==8 (i.e. shft==0), the value is stored in low_ in big endian.
293304
if (!isHostLittleEndian && shft >= 0) {
@@ -296,6 +307,8 @@ RT_API_ATTRS bool EditIntegerInput(IoStatementState &io, const DataEdit &edit,
296307
} else {
297308
std::memcpy(n, &value, kind); // a blank field means zero
298309
}
310+
#endif
311+
io.GotChar(fastField.got());
299312
return true;
300313
} else {
301314
return false;
@@ -1070,7 +1083,7 @@ RT_API_ATTRS bool EditCharacterInput(IoStatementState &io, const DataEdit &edit,
10701083
readyBytes = io.GetNextInputBytes(input);
10711084
if (readyBytes == 0 ||
10721085
(readyBytes < remainingChars && edit.modes.nonAdvancing)) {
1073-
if (io.CheckForEndOfRecord(readyBytes)) {
1086+
if (io.CheckForEndOfRecord(readyBytes, connection)) {
10741087
if (readyBytes == 0) {
10751088
// PAD='YES' and no more data
10761089
Fortran::runtime::fill_n(x, lengthChars, ' ');

flang-rt/lib/runtime/io-api.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,14 +1057,15 @@ bool IODEF(InputDescriptor)(Cookie cookie, const Descriptor &descriptor) {
10571057
}
10581058

10591059
bool IODEF(InputInteger)(Cookie cookie, std::int64_t &n, int kind) {
1060-
if (!cookie->CheckFormattedStmtType<Direction::Input>("InputInteger")) {
1061-
return false;
1060+
IoStatementState &io{*cookie};
1061+
if (io.BeginReadingRecord()) {
1062+
if (auto edit{io.GetNextDataEdit()}) {
1063+
return edit->descriptor == DataEdit::ListDirectedNullValue ||
1064+
EditIntegerInput(io, *edit, reinterpret_cast<void *>(&n), kind,
1065+
/*isSigned=*/true);
1066+
}
10621067
}
1063-
StaticDescriptor<0> staticDescriptor;
1064-
Descriptor &descriptor{staticDescriptor.descriptor()};
1065-
descriptor.Establish(
1066-
TypeCategory::Integer, kind, reinterpret_cast<void *>(&n), 0);
1067-
return descr::DescriptorIO<Direction::Input>(*cookie, descriptor);
1068+
return false;
10681069
}
10691070

10701071
bool IODEF(InputReal32)(Cookie cookie, float &x) {

0 commit comments

Comments
 (0)