Skip to content

[libc++] Optimize std::getline #121346

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions libcxx/docs/ReleaseNotes/20.rst
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ Improvements and New Features

- Added :ref:`hardening mode <hardening>` support for ``forward_list`` and ``bitset``.

- The performance of ``std::getline`` has been improved, resulting in a performance uplift of up to 10x.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@philnik777 Should this change be cherry-picked on to release/20.x branch or will that happen automatically?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why would we cherry-pick this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's release-noted in the LLVM 20 release notes, but I guess it landed later than expected so it should actually have been documented in the LLVM 21 release notes. Is that possible? If so, let's move the release notes to the right file.


Deprecations and Removals
-------------------------

Expand Down
81 changes: 55 additions & 26 deletions libcxx/include/istream
Original file line number Diff line number Diff line change
Expand Up @@ -1263,41 +1263,70 @@ _LIBCPP_HIDE_FROM_ABI basic_istream<_CharT, _Traits>&
getline(basic_istream<_CharT, _Traits>& __is, basic_string<_CharT, _Traits, _Allocator>& __str, _CharT __dlm) {
ios_base::iostate __state = ios_base::goodbit;
typename basic_istream<_CharT, _Traits>::sentry __sen(__is, true);
if (__sen) {
if (!__sen)
return __is;
# if _LIBCPP_HAS_EXCEPTIONS
try {
try {
# endif
__str.clear();
streamsize __extr = 0;
while (true) {
typename _Traits::int_type __i = __is.rdbuf()->sbumpc();
if (_Traits::eq_int_type(__i, _Traits::eof())) {
__state |= ios_base::eofbit;
break;
__str.clear();

auto& __buffer = *__is.rdbuf();

auto __next = __buffer.sgetc();
for (; !_Traits::eq_int_type(__next, _Traits::eof()); __next = __buffer.sgetc()) {
const auto* __first = __buffer.gptr();
const auto* __last = __buffer.egptr();
_CharT __1buf;

if (__first == __last) {
__1buf = __next;
__first = std::addressof(__1buf);
__last = std::addressof(__1buf) + 1;
}

auto __bump_stream = [&](ptrdiff_t __diff) {
if (__first == std::addressof(__1buf)) {
_LIBCPP_ASSERT_INTERNAL(__diff == 0 || __diff == 1, "trying to bump stream further than buffer size");
if (__diff != 0)
__buffer.sbumpc();
} else {
__buffer.__gbump_ptrdiff(__diff);
}
++__extr;
_CharT __ch = _Traits::to_char_type(__i);
if (_Traits::eq(__ch, __dlm))
break;
__str.push_back(__ch);
if (__str.size() == __str.max_size()) {
__state |= ios_base::failbit;
};

const auto* const __match = _Traits::find(__first, __last - __first, __dlm);
if (__match)
__last = __match;

if (auto __cap = __str.max_size() - __str.size(); __cap > static_cast<size_t>(__last - __first)) {
__str.append(__first, __last);
__bump_stream(__last - __first);

if (__match) {
__bump_stream(1); // Remove the matched character
break;
}
}
if (__extr == 0)
} else {
__str.append(__first, __cap);
__bump_stream(__cap);
__state |= ios_base::failbit;
# if _LIBCPP_HAS_EXCEPTIONS
} catch (...) {
__state |= ios_base::badbit;
__is.__setstate_nothrow(__state);
if (__is.exceptions() & ios_base::badbit) {
throw;
break;
}
}
# endif
__is.setstate(__state);

if (_Traits::eq_int_type(__next, _Traits::eof()))
__state |= ios_base::eofbit | (__str.empty() ? ios_base::failbit : ios_base::goodbit);

# if _LIBCPP_HAS_EXCEPTIONS
} catch (...) {
__state |= ios_base::badbit;
__is.__setstate_nothrow(__state);
if (__is.exceptions() & ios_base::badbit) {
throw;
}
}
# endif
__is.setstate(__state);
return __is;
}

Expand Down
7 changes: 7 additions & 0 deletions libcxx/include/streambuf
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,9 @@ protected:

inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void gbump(int __n) { __ninp_ += __n; }

// gbump takes an int, so it might not be able to represent the offset we want to add.
_LIBCPP_HIDE_FROM_ABI void __gbump_ptrdiff(ptrdiff_t __n) { __ninp_ += __n; }

inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void setg(char_type* __gbeg, char_type* __gnext, char_type* __gend) {
_LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gnext), "[gbeg, gnext) must be a valid range");
_LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gend), "[gbeg, gend) must be a valid range");
Expand Down Expand Up @@ -370,6 +373,10 @@ private:
char_type* __bout_ = nullptr;
char_type* __nout_ = nullptr;
char_type* __eout_ = nullptr;

template <class _CharT2, class _Traits2, class _Allocator>
_LIBCPP_HIDE_FROM_ABI friend basic_istream<_CharT2, _Traits2>&
getline(basic_istream<_CharT2, _Traits2>&, basic_string<_CharT2, _Traits2, _Allocator>&, _CharT2);
};

extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_streambuf<char>;
Expand Down
34 changes: 34 additions & 0 deletions libcxx/test/benchmarks/streams/getline.bench.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03

#include <istream>
#include <sstream>

#include <benchmark/benchmark.h>

void BM_getline_string(benchmark::State& state) {
std::istringstream iss;

std::string str;
str.reserve(128);
iss.str("A long string to let getline do some more work, making sure that longer strings are parsed fast enough");

for (auto _ : state) {
benchmark::DoNotOptimize(iss);

std::getline(iss, str);
benchmark::DoNotOptimize(str);
iss.seekg(0);
}
}

BENCHMARK(BM_getline_string);

BENCHMARK_MAIN();
Original file line number Diff line number Diff line change
Expand Up @@ -13,55 +13,42 @@
// getline(basic_istream<charT,traits>& is,
// basic_string<charT,traits,Allocator>& str);

#include <string>
#include <sstream>
#include <cassert>
#include <sstream>
#include <string>

#include "make_string.h"
#include "min_allocator.h"
#include "stream_types.h"
#include "test_macros.h"

template <template <class> class Alloc>
void test_string() {
using S = std::basic_string<char, std::char_traits<char>, Alloc<char> >;
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
using WS = std::basic_string<wchar_t, std::char_traits<wchar_t>, Alloc<wchar_t> >;
#endif
{
std::istringstream in(" abc\n def\n ghij");
S s("initial text");
std::getline(in, s);
assert(in.good());
assert(s == " abc");
std::getline(in, s);
assert(in.good());
assert(s == " def");
std::getline(in, s);
assert(in.eof());
assert(s == " ghij");
}
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
template <class CharT, class Alloc, class Stream, class Streambuf>
void test() {
using string_type = std::basic_string<CharT, std::char_traits<CharT>, Alloc>;
using stream_type = std::basic_istream<CharT>;
using streambuf_type = Streambuf;

{
std::wistringstream in(L" abc\n def\n ghij");
WS s(L"initial text");
streambuf_type sb(MAKE_CSTRING(CharT, " abc\n def\n ghij"));
stream_type in(&sb);
string_type s(MAKE_CSTRING(CharT, "initial text"));
std::getline(in, s);
assert(in.good());
assert(s == L" abc");
assert(s == MAKE_CSTRING(CharT, " abc"));
std::getline(in, s);
assert(in.good());
assert(s == L" def");
assert(s == MAKE_CSTRING(CharT, " def"));
std::getline(in, s);
assert(in.eof());
assert(s == L" ghij");
assert(s == MAKE_CSTRING(CharT, " ghij"));
}
#endif

#ifndef TEST_HAS_NO_EXCEPTIONS
{
std::basic_stringbuf<char> sb("hello");
std::basic_istream<char> is(&sb);
streambuf_type sb(MAKE_CSTRING(CharT, "hello"));
stream_type is(&sb);
is.exceptions(std::ios_base::eofbit);

S s;
string_type s;
bool threw = false;
try {
std::getline(is, s);
Expand All @@ -73,36 +60,14 @@ void test_string() {
assert(!is.fail());
assert(is.eof());
assert(threw);
assert(s == "hello");
assert(s == MAKE_CSTRING(CharT, "hello"));
}
# ifndef TEST_HAS_NO_WIDE_CHARACTERS
{
std::basic_stringbuf<wchar_t> sb(L"hello");
std::basic_istream<wchar_t> is(&sb);
is.exceptions(std::ios_base::eofbit);

WS s;
bool threw = false;
try {
std::getline(is, s);
} catch (std::ios::failure const&) {
threw = true;
}

assert(!is.bad());
assert(!is.fail());
assert(is.eof());
assert(threw);
assert(s == L"hello");
}
# endif

{
std::basic_stringbuf<char> sb;
std::basic_istream<char> is(&sb);
streambuf_type sb(MAKE_CSTRING(CharT, ""));
stream_type is(&sb);
is.exceptions(std::ios_base::failbit);

S s;
string_type s;
bool threw = false;
try {
std::getline(is, s);
Expand All @@ -114,37 +79,48 @@ void test_string() {
assert(is.fail());
assert(is.eof());
assert(threw);
assert(s == "");
assert(s == MAKE_CSTRING(CharT, ""));
}
# ifndef TEST_HAS_NO_WIDE_CHARACTERS
{
std::basic_stringbuf<wchar_t> sb;
std::basic_istream<wchar_t> is(&sb);
is.exceptions(std::ios_base::failbit);
#endif // TEST_HAS_NO_EXCEPTIONS
}

WS s;
bool threw = false;
try {
std::getline(is, s);
} catch (std::ios::failure const&) {
threw = true;
}
template <template <class> class Alloc>
void test_alloc() {
test<char, Alloc<char>, std::basic_istringstream<char>, std::basic_stringbuf<char> >();
test<char, Alloc<char>, std::basic_istringstream<char>, non_buffering_streambuf<char> >();
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
test<wchar_t, Alloc<wchar_t>, std::basic_istringstream<wchar_t>, std::basic_stringbuf<wchar_t> >();
test<wchar_t, Alloc<wchar_t>, std::basic_istringstream<wchar_t>, non_buffering_streambuf<wchar_t> >();
#endif
}

assert(!is.bad());
assert(is.fail());
assert(is.eof());
assert(threw);
assert(s == L"");
void test_tiny_allocator() {
{
std::string in_str =
"this is a too long line for the string that has to be longer because the implementation is broken\n";
std::istringstream iss(in_str);
std::basic_string<char, std::char_traits<char>, tiny_size_allocator<40, char> > str;
std::getline(iss, str);
assert(iss.rdstate() & std::ios::failbit);
assert(str == in_str.substr(0, str.max_size()).c_str());
}
{
std::string in_str =
"this is a too long line for the string that has to be longer because the implementation is broken";
std::istringstream iss(in_str);
std::basic_string<char, std::char_traits<char>, tiny_size_allocator<40, char> > str;
std::getline(iss, str);
assert(iss.rdstate() & std::ios::failbit);
assert(str == in_str.substr(0, str.max_size()).c_str());
}
# endif
#endif // TEST_HAS_NO_EXCEPTIONS
}

int main(int, char**) {
test_string<std::allocator>();
test_alloc<std::allocator>();
#if TEST_STD_VER >= 11
test_string<min_allocator>();
test_alloc<min_allocator>();
#endif
test_tiny_allocator();

return 0;
}
44 changes: 44 additions & 0 deletions libcxx/test/support/stream_types.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef TEST_SUPPORT_STREAM_TYPES_H
#define TEST_SUPPORT_STREAM_TYPES_H

#include <streambuf>
#include <string>
#include <utility>

template <class CharT>
class non_buffering_streambuf : public std::basic_streambuf<CharT> {
using char_type = CharT;
using traits_type = std::char_traits<CharT>;
using int_type = typename traits_type::int_type;

public:
non_buffering_streambuf(std::basic_string<char_type> underlying_data)
: underlying_data_(std::move(underlying_data)), index_(0) {}

protected:
int_type underflow() override {
if (index_ != underlying_data_.size())
return underlying_data_[index_];
return traits_type::eof();
}

int_type uflow() override {
if (index_ != underlying_data_.size())
return underlying_data_[index_++];
return traits_type::eof();
}

private:
std::basic_string<char_type> underlying_data_;
size_t index_;
};

#endif // TEST_SUPPORT_STREAM_TYPES_H
Loading