Skip to content

Commit bec051b

Browse files
committed
[libc++] Optimize std::getline
``` ----------------------------------------------- Benchmark old new ----------------------------------------------- BM_getline_string 318 ns 32.4 ns ```
1 parent f2f4eac commit bec051b

File tree

6 files changed

+198
-106
lines changed

6 files changed

+198
-106
lines changed

libcxx/docs/ReleaseNotes/20.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ Improvements and New Features
120120

121121
- Added :ref:`hardening mode <hardening>` support for ``forward_list`` and ``bitset``.
122122

123+
- The performance of ``std::getline`` has been improved, resulting in a performance uplift of up to 10x.
124+
123125
Deprecations and Removals
124126
-------------------------
125127

libcxx/include/istream

Lines changed: 55 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1263,41 +1263,70 @@ _LIBCPP_HIDE_FROM_ABI basic_istream<_CharT, _Traits>&
12631263
getline(basic_istream<_CharT, _Traits>& __is, basic_string<_CharT, _Traits, _Allocator>& __str, _CharT __dlm) {
12641264
ios_base::iostate __state = ios_base::goodbit;
12651265
typename basic_istream<_CharT, _Traits>::sentry __sen(__is, true);
1266-
if (__sen) {
1266+
if (!__sen)
1267+
return __is;
12671268
# if _LIBCPP_HAS_EXCEPTIONS
1268-
try {
1269+
try {
12691270
# endif
1270-
__str.clear();
1271-
streamsize __extr = 0;
1272-
while (true) {
1273-
typename _Traits::int_type __i = __is.rdbuf()->sbumpc();
1274-
if (_Traits::eq_int_type(__i, _Traits::eof())) {
1275-
__state |= ios_base::eofbit;
1276-
break;
1271+
__str.clear();
1272+
1273+
auto& __buffer = *__is.rdbuf();
1274+
1275+
auto __next = __buffer.sgetc();
1276+
for (; !_Traits::eq_int_type(__next, _Traits::eof()); __next = __buffer.sgetc()) {
1277+
const auto* __first = __buffer.gptr();
1278+
const auto* __last = __buffer.egptr();
1279+
_CharT __1buf;
1280+
1281+
if (__first == __last) {
1282+
__1buf = __next;
1283+
__first = std::addressof(__1buf);
1284+
__last = std::addressof(__1buf) + 1;
1285+
}
1286+
1287+
auto __bump_stream = [&](ptrdiff_t __diff) {
1288+
if (__first == std::addressof(__1buf)) {
1289+
_LIBCPP_ASSERT_INTERNAL(__diff == 0 || __diff == 1, "trying to bump stream further than buffer size");
1290+
if (__diff != 0)
1291+
__buffer.sbumpc();
1292+
} else {
1293+
__buffer.__gbump_ptrdiff(__diff);
12771294
}
1278-
++__extr;
1279-
_CharT __ch = _Traits::to_char_type(__i);
1280-
if (_Traits::eq(__ch, __dlm))
1281-
break;
1282-
__str.push_back(__ch);
1283-
if (__str.size() == __str.max_size()) {
1284-
__state |= ios_base::failbit;
1295+
};
1296+
1297+
const auto* const __match = _Traits::find(__first, __last - __first, __dlm);
1298+
if (__match)
1299+
__last = __match;
1300+
1301+
if (auto __cap = __str.max_size() - __str.size(); __cap > static_cast<size_t>(__last - __first)) {
1302+
__str.append(__first, __last);
1303+
__bump_stream(__last - __first);
1304+
1305+
if (__match) {
1306+
__bump_stream(1); // Remove the matched character
12851307
break;
12861308
}
1287-
}
1288-
if (__extr == 0)
1309+
} else {
1310+
__str.append(__first, __cap);
1311+
__bump_stream(__cap);
12891312
__state |= ios_base::failbit;
1290-
# if _LIBCPP_HAS_EXCEPTIONS
1291-
} catch (...) {
1292-
__state |= ios_base::badbit;
1293-
__is.__setstate_nothrow(__state);
1294-
if (__is.exceptions() & ios_base::badbit) {
1295-
throw;
1313+
break;
12961314
}
12971315
}
1298-
# endif
1299-
__is.setstate(__state);
1316+
1317+
if (_Traits::eq_int_type(__next, _Traits::eof()))
1318+
__state |= ios_base::eofbit | (__str.empty() ? ios_base::failbit : ios_base::goodbit);
1319+
1320+
# if _LIBCPP_HAS_EXCEPTIONS
1321+
} catch (...) {
1322+
__state |= ios_base::badbit;
1323+
__is.__setstate_nothrow(__state);
1324+
if (__is.exceptions() & ios_base::badbit) {
1325+
throw;
1326+
}
13001327
}
1328+
# endif
1329+
__is.setstate(__state);
13011330
return __is;
13021331
}
13031332

libcxx/include/streambuf

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,9 @@ protected:
267267

268268
inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void gbump(int __n) { __ninp_ += __n; }
269269

270+
// gbump takes an int, so it might not be able to represent the offset we want to add.
271+
_LIBCPP_HIDE_FROM_ABI void __gbump_ptrdiff(ptrdiff_t __n) { __ninp_ += __n; }
272+
270273
inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void setg(char_type* __gbeg, char_type* __gnext, char_type* __gend) {
271274
_LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gnext), "[gbeg, gnext) must be a valid range");
272275
_LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gend), "[gbeg, gend) must be a valid range");
@@ -370,6 +373,10 @@ private:
370373
char_type* __bout_ = nullptr;
371374
char_type* __nout_ = nullptr;
372375
char_type* __eout_ = nullptr;
376+
377+
template <class _CharT2, class _Traits2, class _Allocator>
378+
_LIBCPP_HIDE_FROM_ABI friend basic_istream<_CharT2, _Traits2>&
379+
getline(basic_istream<_CharT2, _Traits2>&, basic_string<_CharT2, _Traits2, _Allocator>&, _CharT2);
373380
};
374381

375382
extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_streambuf<char>;
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// UNSUPPORTED: c++03
10+
11+
#include <istream>
12+
#include <sstream>
13+
14+
#include <benchmark/benchmark.h>
15+
16+
void BM_getline_string(benchmark::State& state) {
17+
std::istringstream iss;
18+
19+
std::string str;
20+
str.reserve(128);
21+
iss.str("A long string to let getline do some more work, making sure that longer strings are parsed fast enough");
22+
23+
for (auto _ : state) {
24+
benchmark::DoNotOptimize(iss);
25+
26+
std::getline(iss, str);
27+
benchmark::DoNotOptimize(str);
28+
iss.seekg(0);
29+
}
30+
}
31+
32+
BENCHMARK(BM_getline_string);
33+
34+
BENCHMARK_MAIN();

libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line.pass.cpp

Lines changed: 56 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -13,55 +13,42 @@
1313
// getline(basic_istream<charT,traits>& is,
1414
// basic_string<charT,traits,Allocator>& str);
1515

16-
#include <string>
17-
#include <sstream>
1816
#include <cassert>
17+
#include <sstream>
18+
#include <string>
1919

20+
#include "make_string.h"
2021
#include "min_allocator.h"
22+
#include "stream_types.h"
2123
#include "test_macros.h"
2224

23-
template <template <class> class Alloc>
24-
void test_string() {
25-
using S = std::basic_string<char, std::char_traits<char>, Alloc<char> >;
26-
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
27-
using WS = std::basic_string<wchar_t, std::char_traits<wchar_t>, Alloc<wchar_t> >;
28-
#endif
29-
{
30-
std::istringstream in(" abc\n def\n ghij");
31-
S s("initial text");
32-
std::getline(in, s);
33-
assert(in.good());
34-
assert(s == " abc");
35-
std::getline(in, s);
36-
assert(in.good());
37-
assert(s == " def");
38-
std::getline(in, s);
39-
assert(in.eof());
40-
assert(s == " ghij");
41-
}
42-
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
25+
template <class CharT, class Alloc, class Stream, class Streambuf>
26+
void test() {
27+
using string_type = std::basic_string<CharT, std::char_traits<CharT>, Alloc>;
28+
using stream_type = std::basic_istream<CharT>;
29+
using streambuf_type = Streambuf;
30+
4331
{
44-
std::wistringstream in(L" abc\n def\n ghij");
45-
WS s(L"initial text");
32+
streambuf_type sb(MAKE_CSTRING(CharT, " abc\n def\n ghij"));
33+
stream_type in(&sb);
34+
string_type s(MAKE_CSTRING(CharT, "initial text"));
4635
std::getline(in, s);
4736
assert(in.good());
48-
assert(s == L" abc");
37+
assert(s == MAKE_CSTRING(CharT, " abc"));
4938
std::getline(in, s);
5039
assert(in.good());
51-
assert(s == L" def");
40+
assert(s == MAKE_CSTRING(CharT, " def"));
5241
std::getline(in, s);
5342
assert(in.eof());
54-
assert(s == L" ghij");
43+
assert(s == MAKE_CSTRING(CharT, " ghij"));
5544
}
56-
#endif
57-
5845
#ifndef TEST_HAS_NO_EXCEPTIONS
5946
{
60-
std::basic_stringbuf<char> sb("hello");
61-
std::basic_istream<char> is(&sb);
47+
streambuf_type sb(MAKE_CSTRING(CharT, "hello"));
48+
stream_type is(&sb);
6249
is.exceptions(std::ios_base::eofbit);
6350

64-
S s;
51+
string_type s;
6552
bool threw = false;
6653
try {
6754
std::getline(is, s);
@@ -73,36 +60,14 @@ void test_string() {
7360
assert(!is.fail());
7461
assert(is.eof());
7562
assert(threw);
76-
assert(s == "hello");
63+
assert(s == MAKE_CSTRING(CharT, "hello"));
7764
}
78-
# ifndef TEST_HAS_NO_WIDE_CHARACTERS
7965
{
80-
std::basic_stringbuf<wchar_t> sb(L"hello");
81-
std::basic_istream<wchar_t> is(&sb);
82-
is.exceptions(std::ios_base::eofbit);
83-
84-
WS s;
85-
bool threw = false;
86-
try {
87-
std::getline(is, s);
88-
} catch (std::ios::failure const&) {
89-
threw = true;
90-
}
91-
92-
assert(!is.bad());
93-
assert(!is.fail());
94-
assert(is.eof());
95-
assert(threw);
96-
assert(s == L"hello");
97-
}
98-
# endif
99-
100-
{
101-
std::basic_stringbuf<char> sb;
102-
std::basic_istream<char> is(&sb);
66+
streambuf_type sb(MAKE_CSTRING(CharT, ""));
67+
stream_type is(&sb);
10368
is.exceptions(std::ios_base::failbit);
10469

105-
S s;
70+
string_type s;
10671
bool threw = false;
10772
try {
10873
std::getline(is, s);
@@ -114,37 +79,48 @@ void test_string() {
11479
assert(is.fail());
11580
assert(is.eof());
11681
assert(threw);
117-
assert(s == "");
82+
assert(s == MAKE_CSTRING(CharT, ""));
11883
}
119-
# ifndef TEST_HAS_NO_WIDE_CHARACTERS
120-
{
121-
std::basic_stringbuf<wchar_t> sb;
122-
std::basic_istream<wchar_t> is(&sb);
123-
is.exceptions(std::ios_base::failbit);
84+
#endif // TEST_HAS_NO_EXCEPTIONS
85+
}
12486

125-
WS s;
126-
bool threw = false;
127-
try {
128-
std::getline(is, s);
129-
} catch (std::ios::failure const&) {
130-
threw = true;
131-
}
87+
template <template <class> class Alloc>
88+
void test_alloc() {
89+
test<char, Alloc<char>, std::basic_istringstream<char>, std::basic_stringbuf<char> >();
90+
test<char, Alloc<char>, std::basic_istringstream<char>, non_buffering_streambuf<char> >();
91+
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
92+
test<wchar_t, Alloc<wchar_t>, std::basic_istringstream<wchar_t>, std::basic_stringbuf<wchar_t> >();
93+
test<wchar_t, Alloc<wchar_t>, std::basic_istringstream<wchar_t>, non_buffering_streambuf<wchar_t> >();
94+
#endif
95+
}
13296

133-
assert(!is.bad());
134-
assert(is.fail());
135-
assert(is.eof());
136-
assert(threw);
137-
assert(s == L"");
97+
void test_tiny_allocator() {
98+
{
99+
std::string in_str =
100+
"this is a too long line for the string that has to be longer because the implementation is broken\n";
101+
std::istringstream iss(in_str);
102+
std::basic_string<char, std::char_traits<char>, tiny_size_allocator<40, char> > str;
103+
std::getline(iss, str);
104+
assert(iss.rdstate() & std::ios::failbit);
105+
assert(str == in_str.substr(0, str.max_size()).c_str());
106+
}
107+
{
108+
std::string in_str =
109+
"this is a too long line for the string that has to be longer because the implementation is broken";
110+
std::istringstream iss(in_str);
111+
std::basic_string<char, std::char_traits<char>, tiny_size_allocator<40, char> > str;
112+
std::getline(iss, str);
113+
assert(iss.rdstate() & std::ios::failbit);
114+
assert(str == in_str.substr(0, str.max_size()).c_str());
138115
}
139-
# endif
140-
#endif // TEST_HAS_NO_EXCEPTIONS
141116
}
142117

143118
int main(int, char**) {
144-
test_string<std::allocator>();
119+
test_alloc<std::allocator>();
145120
#if TEST_STD_VER >= 11
146-
test_string<min_allocator>();
121+
test_alloc<min_allocator>();
147122
#endif
123+
test_tiny_allocator();
148124

149125
return 0;
150126
}

libcxx/test/support/stream_types.h

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef TEST_SUPPORT_STREAM_TYPES_H
10+
#define TEST_SUPPORT_STREAM_TYPES_H
11+
12+
#include <streambuf>
13+
#include <string>
14+
#include <utility>
15+
16+
template <class CharT>
17+
class non_buffering_streambuf : public std::basic_streambuf<CharT> {
18+
using char_type = CharT;
19+
using traits_type = std::char_traits<CharT>;
20+
using int_type = typename traits_type::int_type;
21+
22+
public:
23+
non_buffering_streambuf(std::basic_string<char_type> underlying_data)
24+
: underlying_data_(std::move(underlying_data)), index_(0) {}
25+
26+
protected:
27+
int_type underflow() override {
28+
if (index_ != underlying_data_.size())
29+
return underlying_data_[index_];
30+
return traits_type::eof();
31+
}
32+
33+
int_type uflow() override {
34+
if (index_ != underlying_data_.size())
35+
return underlying_data_[index_++];
36+
return traits_type::eof();
37+
}
38+
39+
private:
40+
std::basic_string<char_type> underlying_data_;
41+
size_t index_;
42+
};
43+
44+
#endif // TEST_SUPPORT_STREAM_TYPES_H

0 commit comments

Comments
 (0)