Skip to content

[libc++] Optimize bitset::to_string #128832

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions libcxx/docs/ReleaseNotes/21.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ Improvements and New Features
- The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of
up to 17.7x for ``std::deque<short>`` iterators, and up to 13.9x for ``std::join_view<vector<vector<short>>>`` iterators.

- The ``bitset::to_string`` function has been optimized, resulting in a performance improvement of up to 8.3x for bitsets
with uniformly distributed zeros and ones, and up to 13.5x and 16.1x for sparse and dense bitsets, respectively.

Deprecations and Removals
-------------------------

Expand Down
48 changes: 42 additions & 6 deletions libcxx/include/bitset
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ template <size_t N> struct hash<std::bitset<N>>;
# include <__algorithm/fill_n.h>
# include <__algorithm/find.h>
# include <__assert>
# include <__bit/countr.h>
# include <__bit/invert_if.h>
# include <__bit_reference>
# include <__config>
# include <__cstddef/ptrdiff_t.h>
Expand Down Expand Up @@ -228,6 +230,21 @@ protected:
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool any() const _NOEXCEPT;
_LIBCPP_HIDE_FROM_ABI size_t __hash_code() const _NOEXCEPT;

template <bool _Sparse, class _CharT, class _Traits, class _Allocator>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 basic_string<_CharT, _Traits, _Allocator>
__to_string(_CharT __zero, _CharT __one) const {
basic_string<_CharT, _Traits, _Allocator> __r(_Size, _Sparse ? __zero : __one);
for (size_t __i = 0, __bits = 0; __i < _N_words; ++__i, __bits += __bits_per_word) {
__storage_type __word = std::__invert_if<!_Sparse>(__first_[__i]);
if (__i == _N_words - 1 && _Size - __bits < __bits_per_word)
__word &= (__storage_type(1) << (_Size - __bits)) - 1;
for (; __word; __word &= (__word - 1))
__r[_Size - 1 - (__bits + std::__countr_zero(__word))] = _Sparse ? __one : __zero;
}

return __r;
}

private:
# ifdef _LIBCPP_CXX03_LANG
void __init(unsigned long long __v, false_type) _NOEXCEPT;
Expand Down Expand Up @@ -483,6 +500,20 @@ protected:
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long to_ulong() const;
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long to_ullong() const;

template <bool _Sparse, class _CharT, class _Traits, class _Allocator>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 basic_string<_CharT, _Traits, _Allocator>
__to_string(_CharT __zero, _CharT __one) const {
basic_string<_CharT, _Traits, _Allocator> __r(_Size, _Sparse ? __zero : __one);
__storage_type __word = std::__invert_if<!_Sparse>(__first_);
if (_Size < __bits_per_word)
__word &= (__storage_type(1) << _Size) - 1;
for (; __word; __word &= (__word - 1)) {
size_t __pos = std::__countr_zero(__word);
__r[_Size - 1 - __pos] = _Sparse ? __one : __zero;
}
return __r;
}

_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool all() const _NOEXCEPT;
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool any() const _NOEXCEPT;

Expand Down Expand Up @@ -594,6 +625,12 @@ protected:
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long to_ulong() const { return 0; }
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long to_ullong() const { return 0; }

template <bool _Sparse, class _CharT, class _Traits, class _Allocator>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 basic_string<_CharT, _Traits, _Allocator>
__to_string(_CharT, _CharT) const {
return basic_string<_CharT, _Traits, _Allocator>();
}

_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool all() const _NOEXCEPT { return true; }
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool any() const _NOEXCEPT { return false; }

Expand Down Expand Up @@ -847,12 +884,11 @@ template <size_t _Size>
template <class _CharT, class _Traits, class _Allocator>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 basic_string<_CharT, _Traits, _Allocator>
bitset<_Size>::to_string(_CharT __zero, _CharT __one) const {
basic_string<_CharT, _Traits, _Allocator> __r(_Size, __zero);
for (size_t __i = 0; __i != _Size; ++__i) {
if ((*this)[__i])
__r[_Size - 1 - __i] = __one;
}
return __r;
bool __sparse = size_t(std::count(__base::__make_iter(0), __base::__make_iter(_Size), true)) < _Size / 2;
if (__sparse)
return __base::template __to_string<true, _CharT, _Traits, _Allocator>(__zero, __one);
else
return __base::template __to_string<false, _CharT, _Traits, _Allocator>(__zero, __one);
}

template <size_t _Size>
Expand Down
106 changes: 106 additions & 0 deletions libcxx/test/benchmarks/bitset.bench.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03

#include "benchmark/benchmark.h"
#include <bitset>
#include <cmath>
#include <cstddef>
#include <random>

template <std::size_t N>
struct GenerateBitset {
// Construct a bitset with N bits, where each bit is set with probability p.
static std::bitset<N> generate(double p) {
std::bitset<N> b;
if (p <= 0.0)
return b;
if (p >= 1.0)
return ~b;

std::random_device rd;
std::mt19937 gen(rd());
std::bernoulli_distribution d(p);
for (std::size_t i = 0; i < N; ++i)
b[i] = d(gen);

return b;
}

static std::bitset<N> sparse() { return generate(0.1); }
static std::bitset<N> dense() { return generate(0.9); }
static std::bitset<N> uniform() { return generate(0.5); }
};

template <std::size_t N>
static void BM_BitsetToString(benchmark::State& state) {
double p = state.range(0) / 100.0;
std::bitset<N> b = GenerateBitset<N>::generate(p);
benchmark::DoNotOptimize(b);

for (auto _ : state) {
benchmark::DoNotOptimize(b.to_string());
}
}

// Sparse bitset
BENCHMARK(BM_BitsetToString<32>)->Arg(10)->Name("BM_BitsetToString<32>/Sparse (10%)");
BENCHMARK(BM_BitsetToString<64>)->Arg(10)->Name("BM_BitsetToString<64>/Sparse (10%)");
BENCHMARK(BM_BitsetToString<128>)->Arg(10)->Name("BM_BitsetToString<128>/Sparse (10%)");
BENCHMARK(BM_BitsetToString<256>)->Arg(10)->Name("BM_BitsetToString<256>/Sparse (10%)");
BENCHMARK(BM_BitsetToString<512>)->Arg(10)->Name("BM_BitsetToString<512>/Sparse (10%)");
BENCHMARK(BM_BitsetToString<1024>)->Arg(10)->Name("BM_BitsetToString<1024>/Sparse (10%)");
BENCHMARK(BM_BitsetToString<2048>)->Arg(10)->Name("BM_BitsetToString<2048>/Sparse (10%)");
BENCHMARK(BM_BitsetToString<4096>)->Arg(10)->Name("BM_BitsetToString<4096>/Sparse (10%)");
BENCHMARK(BM_BitsetToString<8192>)->Arg(10)->Name("BM_BitsetToString<8192>/Sparse (10%)");
BENCHMARK(BM_BitsetToString<16384>)->Arg(10)->Name("BM_BitsetToString<16384>/Sparse (10%)");
BENCHMARK(BM_BitsetToString<32768>)->Arg(10)->Name("BM_BitsetToString<32768>/Sparse (10%)");
BENCHMARK(BM_BitsetToString<65536>)->Arg(10)->Name("BM_BitsetToString<65536>/Sparse (10%)");
BENCHMARK(BM_BitsetToString<131072>)->Arg(10)->Name("BM_BitsetToString<131072>/Sparse (10%)");
BENCHMARK(BM_BitsetToString<262144>)->Arg(10)->Name("BM_BitsetToString<262144>/Sparse (10%)");
BENCHMARK(BM_BitsetToString<524288>)->Arg(10)->Name("BM_BitsetToString<524288>/Sparse (10%)");
BENCHMARK(BM_BitsetToString<1048576>)->Arg(10)->Name("BM_BitsetToString<1048576>/Sparse (10%)"); // 1 << 20

// Dense bitset
BENCHMARK(BM_BitsetToString<32>)->Arg(90)->Name("BM_BitsetToString<32>/Dense (90%)");
BENCHMARK(BM_BitsetToString<64>)->Arg(90)->Name("BM_BitsetToString<64>/Dense (90%)");
BENCHMARK(BM_BitsetToString<128>)->Arg(90)->Name("BM_BitsetToString<128>/Dense (90%)");
BENCHMARK(BM_BitsetToString<256>)->Arg(90)->Name("BM_BitsetToString<256>/Dense (90%)");
BENCHMARK(BM_BitsetToString<512>)->Arg(90)->Name("BM_BitsetToString<512>/Dense (90%)");
BENCHMARK(BM_BitsetToString<1024>)->Arg(90)->Name("BM_BitsetToString<1024>/Dense (90%)");
BENCHMARK(BM_BitsetToString<2048>)->Arg(90)->Name("BM_BitsetToString<2048>/Dense (90%)");
BENCHMARK(BM_BitsetToString<4096>)->Arg(90)->Name("BM_BitsetToString<4096>/Dense (90%)");
BENCHMARK(BM_BitsetToString<8192>)->Arg(90)->Name("BM_BitsetToString<8192>/Dense (90%)");
BENCHMARK(BM_BitsetToString<16384>)->Arg(90)->Name("BM_BitsetToString<16384>/Dense (90%)");
BENCHMARK(BM_BitsetToString<32768>)->Arg(90)->Name("BM_BitsetToString<32768>/Dense (90%)");
BENCHMARK(BM_BitsetToString<65536>)->Arg(90)->Name("BM_BitsetToString<65536>/Dense (90%)");
BENCHMARK(BM_BitsetToString<131072>)->Arg(90)->Name("BM_BitsetToString<131072>/Dense (90%)");
BENCHMARK(BM_BitsetToString<262144>)->Arg(90)->Name("BM_BitsetToString<262144>/Dense (90%)");
BENCHMARK(BM_BitsetToString<524288>)->Arg(90)->Name("BM_BitsetToString<524288>/Dense (90%)");
BENCHMARK(BM_BitsetToString<1048576>)->Arg(90)->Name("BM_BitsetToString<1048576>/Dense (90%)"); // 1 << 20

// Uniform bitset
BENCHMARK(BM_BitsetToString<32>)->Arg(50)->Name("BM_BitsetToString<32>/Uniform (50%)");
BENCHMARK(BM_BitsetToString<64>)->Arg(50)->Name("BM_BitsetToString<64>/Uniform (50%)");
BENCHMARK(BM_BitsetToString<128>)->Arg(50)->Name("BM_BitsetToString<128>/Uniform (50%)");
BENCHMARK(BM_BitsetToString<256>)->Arg(50)->Name("BM_BitsetToString<256>/Uniform (50%)");
BENCHMARK(BM_BitsetToString<512>)->Arg(50)->Name("BM_BitsetToString<512>/Uniform (50%)");
BENCHMARK(BM_BitsetToString<1024>)->Arg(50)->Name("BM_BitsetToString<1024>/Uniform (50%)");
BENCHMARK(BM_BitsetToString<2048>)->Arg(50)->Name("BM_BitsetToString<2048>/Uniform (50%)");
BENCHMARK(BM_BitsetToString<4096>)->Arg(50)->Name("BM_BitsetToString<4096>/Uniform (50%)");
BENCHMARK(BM_BitsetToString<8192>)->Arg(50)->Name("BM_BitsetToString<8192>/Uniform (50%)");
BENCHMARK(BM_BitsetToString<16384>)->Arg(50)->Name("BM_BitsetToString<16384>/Uniform (50%)");
BENCHMARK(BM_BitsetToString<32768>)->Arg(50)->Name("BM_BitsetToString<32768>/Uniform (50%)");
BENCHMARK(BM_BitsetToString<65536>)->Arg(50)->Name("BM_BitsetToString<65536>/Uniform (50%)");
BENCHMARK(BM_BitsetToString<131072>)->Arg(50)->Name("BM_BitsetToString<131072>/Uniform (50%)");
BENCHMARK(BM_BitsetToString<262144>)->Arg(50)->Name("BM_BitsetToString<262144>/Uniform (50%)");
BENCHMARK(BM_BitsetToString<524288>)->Arg(50)->Name("BM_BitsetToString<524288>/Uniform (50%)");
BENCHMARK(BM_BitsetToString<1048576>)->Arg(50)->Name("BM_BitsetToString<1048576>/Uniform (50%)"); // 1 << 20

BENCHMARK_MAIN();
Loading