Skip to content

Commit 81b8135

Browse files
authored
[libc++] Optimize bitset::to_string (#128832)
This patch optimizes `bitset::to_string` by replacing the existing bit-by-bit processing with a more efficient bit traversal strategy. Instead of checking each bit sequentially, we leverage `std::__countr_zero` to efficiently locate the next set bit, skipping over consecutive zero bits. This greatly accelerates the conversion process, especially for sparse `bitset`s where zero bits dominate. To ensure similar improvements for dense `bitset`s, we exploit symmetry by inverting the bit pattern, allowing us to apply the same optimized traversal technique. Even for uniformly distributed `bitset`s, the proposed approach offers measurable performance gains over the existing implementation. Benchmarks demonstrate substantial improvements, achieving up to 13.5x speedup for sparse `bitset`s with `Pr(true bit) = 0.1`, 16.1x for dense `bitset`s with `Pr(true bit) = 0.9`, and 8.3x for uniformly distributed `bitset`s with `Pr(true bit) = 0.5)`.
1 parent fc9898c commit 81b8135

File tree

3 files changed

+151
-6
lines changed

3 files changed

+151
-6
lines changed

libcxx/docs/ReleaseNotes/21.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ Improvements and New Features
7373
- The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of
7474
up to 17.7x for ``std::deque<short>`` iterators, and up to 13.9x for ``std::join_view<vector<vector<short>>>`` iterators.
7575

76+
- The ``bitset::to_string`` function has been optimized, resulting in a performance improvement of up to 8.3x for bitsets
77+
with uniformly distributed zeros and ones, and up to 13.5x and 16.1x for sparse and dense bitsets, respectively.
78+
7679
Deprecations and Removals
7780
-------------------------
7881

libcxx/include/bitset

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ template <size_t N> struct hash<std::bitset<N>>;
137137
# include <__algorithm/fill_n.h>
138138
# include <__algorithm/find.h>
139139
# include <__assert>
140+
# include <__bit/countr.h>
141+
# include <__bit/invert_if.h>
140142
# include <__bit_reference>
141143
# include <__config>
142144
# include <__cstddef/ptrdiff_t.h>
@@ -228,6 +230,21 @@ protected:
228230
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool any() const _NOEXCEPT;
229231
_LIBCPP_HIDE_FROM_ABI size_t __hash_code() const _NOEXCEPT;
230232

233+
template <bool _Sparse, class _CharT, class _Traits, class _Allocator>
234+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 basic_string<_CharT, _Traits, _Allocator>
235+
__to_string(_CharT __zero, _CharT __one) const {
236+
basic_string<_CharT, _Traits, _Allocator> __r(_Size, _Sparse ? __zero : __one);
237+
for (size_t __i = 0, __bits = 0; __i < _N_words; ++__i, __bits += __bits_per_word) {
238+
__storage_type __word = std::__invert_if<!_Sparse>(__first_[__i]);
239+
if (__i == _N_words - 1 && _Size - __bits < __bits_per_word)
240+
__word &= (__storage_type(1) << (_Size - __bits)) - 1;
241+
for (; __word; __word &= (__word - 1))
242+
__r[_Size - 1 - (__bits + std::__countr_zero(__word))] = _Sparse ? __one : __zero;
243+
}
244+
245+
return __r;
246+
}
247+
231248
private:
232249
# ifdef _LIBCPP_CXX03_LANG
233250
void __init(unsigned long long __v, false_type) _NOEXCEPT;
@@ -483,6 +500,20 @@ protected:
483500
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long to_ulong() const;
484501
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long to_ullong() const;
485502

503+
template <bool _Sparse, class _CharT, class _Traits, class _Allocator>
504+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 basic_string<_CharT, _Traits, _Allocator>
505+
__to_string(_CharT __zero, _CharT __one) const {
506+
basic_string<_CharT, _Traits, _Allocator> __r(_Size, _Sparse ? __zero : __one);
507+
__storage_type __word = std::__invert_if<!_Sparse>(__first_);
508+
if (_Size < __bits_per_word)
509+
__word &= (__storage_type(1) << _Size) - 1;
510+
for (; __word; __word &= (__word - 1)) {
511+
size_t __pos = std::__countr_zero(__word);
512+
__r[_Size - 1 - __pos] = _Sparse ? __one : __zero;
513+
}
514+
return __r;
515+
}
516+
486517
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool all() const _NOEXCEPT;
487518
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool any() const _NOEXCEPT;
488519

@@ -594,6 +625,12 @@ protected:
594625
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long to_ulong() const { return 0; }
595626
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long to_ullong() const { return 0; }
596627

628+
template <bool _Sparse, class _CharT, class _Traits, class _Allocator>
629+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 basic_string<_CharT, _Traits, _Allocator>
630+
__to_string(_CharT, _CharT) const {
631+
return basic_string<_CharT, _Traits, _Allocator>();
632+
}
633+
597634
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool all() const _NOEXCEPT { return true; }
598635
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool any() const _NOEXCEPT { return false; }
599636

@@ -847,12 +884,11 @@ template <size_t _Size>
847884
template <class _CharT, class _Traits, class _Allocator>
848885
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 basic_string<_CharT, _Traits, _Allocator>
849886
bitset<_Size>::to_string(_CharT __zero, _CharT __one) const {
850-
basic_string<_CharT, _Traits, _Allocator> __r(_Size, __zero);
851-
for (size_t __i = 0; __i != _Size; ++__i) {
852-
if ((*this)[__i])
853-
__r[_Size - 1 - __i] = __one;
854-
}
855-
return __r;
887+
bool __sparse = size_t(std::count(__base::__make_iter(0), __base::__make_iter(_Size), true)) < _Size / 2;
888+
if (__sparse)
889+
return __base::template __to_string<true, _CharT, _Traits, _Allocator>(__zero, __one);
890+
else
891+
return __base::template __to_string<false, _CharT, _Traits, _Allocator>(__zero, __one);
856892
}
857893

858894
template <size_t _Size>
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// UNSUPPORTED: c++03
10+
11+
#include "benchmark/benchmark.h"
12+
#include <bitset>
13+
#include <cmath>
14+
#include <cstddef>
15+
#include <random>
16+
17+
template <std::size_t N>
18+
struct GenerateBitset {
19+
// Construct a bitset with N bits, where each bit is set with probability p.
20+
static std::bitset<N> generate(double p) {
21+
std::bitset<N> b;
22+
if (p <= 0.0)
23+
return b;
24+
if (p >= 1.0)
25+
return ~b;
26+
27+
std::random_device rd;
28+
std::mt19937 gen(rd());
29+
std::bernoulli_distribution d(p);
30+
for (std::size_t i = 0; i < N; ++i)
31+
b[i] = d(gen);
32+
33+
return b;
34+
}
35+
36+
static std::bitset<N> sparse() { return generate(0.1); }
37+
static std::bitset<N> dense() { return generate(0.9); }
38+
static std::bitset<N> uniform() { return generate(0.5); }
39+
};
40+
41+
template <std::size_t N>
42+
static void BM_BitsetToString(benchmark::State& state) {
43+
double p = state.range(0) / 100.0;
44+
std::bitset<N> b = GenerateBitset<N>::generate(p);
45+
benchmark::DoNotOptimize(b);
46+
47+
for (auto _ : state) {
48+
benchmark::DoNotOptimize(b.to_string());
49+
}
50+
}
51+
52+
// Sparse bitset
53+
BENCHMARK(BM_BitsetToString<32>)->Arg(10)->Name("BM_BitsetToString<32>/Sparse (10%)");
54+
BENCHMARK(BM_BitsetToString<64>)->Arg(10)->Name("BM_BitsetToString<64>/Sparse (10%)");
55+
BENCHMARK(BM_BitsetToString<128>)->Arg(10)->Name("BM_BitsetToString<128>/Sparse (10%)");
56+
BENCHMARK(BM_BitsetToString<256>)->Arg(10)->Name("BM_BitsetToString<256>/Sparse (10%)");
57+
BENCHMARK(BM_BitsetToString<512>)->Arg(10)->Name("BM_BitsetToString<512>/Sparse (10%)");
58+
BENCHMARK(BM_BitsetToString<1024>)->Arg(10)->Name("BM_BitsetToString<1024>/Sparse (10%)");
59+
BENCHMARK(BM_BitsetToString<2048>)->Arg(10)->Name("BM_BitsetToString<2048>/Sparse (10%)");
60+
BENCHMARK(BM_BitsetToString<4096>)->Arg(10)->Name("BM_BitsetToString<4096>/Sparse (10%)");
61+
BENCHMARK(BM_BitsetToString<8192>)->Arg(10)->Name("BM_BitsetToString<8192>/Sparse (10%)");
62+
BENCHMARK(BM_BitsetToString<16384>)->Arg(10)->Name("BM_BitsetToString<16384>/Sparse (10%)");
63+
BENCHMARK(BM_BitsetToString<32768>)->Arg(10)->Name("BM_BitsetToString<32768>/Sparse (10%)");
64+
BENCHMARK(BM_BitsetToString<65536>)->Arg(10)->Name("BM_BitsetToString<65536>/Sparse (10%)");
65+
BENCHMARK(BM_BitsetToString<131072>)->Arg(10)->Name("BM_BitsetToString<131072>/Sparse (10%)");
66+
BENCHMARK(BM_BitsetToString<262144>)->Arg(10)->Name("BM_BitsetToString<262144>/Sparse (10%)");
67+
BENCHMARK(BM_BitsetToString<524288>)->Arg(10)->Name("BM_BitsetToString<524288>/Sparse (10%)");
68+
BENCHMARK(BM_BitsetToString<1048576>)->Arg(10)->Name("BM_BitsetToString<1048576>/Sparse (10%)"); // 1 << 20
69+
70+
// Dense bitset
71+
BENCHMARK(BM_BitsetToString<32>)->Arg(90)->Name("BM_BitsetToString<32>/Dense (90%)");
72+
BENCHMARK(BM_BitsetToString<64>)->Arg(90)->Name("BM_BitsetToString<64>/Dense (90%)");
73+
BENCHMARK(BM_BitsetToString<128>)->Arg(90)->Name("BM_BitsetToString<128>/Dense (90%)");
74+
BENCHMARK(BM_BitsetToString<256>)->Arg(90)->Name("BM_BitsetToString<256>/Dense (90%)");
75+
BENCHMARK(BM_BitsetToString<512>)->Arg(90)->Name("BM_BitsetToString<512>/Dense (90%)");
76+
BENCHMARK(BM_BitsetToString<1024>)->Arg(90)->Name("BM_BitsetToString<1024>/Dense (90%)");
77+
BENCHMARK(BM_BitsetToString<2048>)->Arg(90)->Name("BM_BitsetToString<2048>/Dense (90%)");
78+
BENCHMARK(BM_BitsetToString<4096>)->Arg(90)->Name("BM_BitsetToString<4096>/Dense (90%)");
79+
BENCHMARK(BM_BitsetToString<8192>)->Arg(90)->Name("BM_BitsetToString<8192>/Dense (90%)");
80+
BENCHMARK(BM_BitsetToString<16384>)->Arg(90)->Name("BM_BitsetToString<16384>/Dense (90%)");
81+
BENCHMARK(BM_BitsetToString<32768>)->Arg(90)->Name("BM_BitsetToString<32768>/Dense (90%)");
82+
BENCHMARK(BM_BitsetToString<65536>)->Arg(90)->Name("BM_BitsetToString<65536>/Dense (90%)");
83+
BENCHMARK(BM_BitsetToString<131072>)->Arg(90)->Name("BM_BitsetToString<131072>/Dense (90%)");
84+
BENCHMARK(BM_BitsetToString<262144>)->Arg(90)->Name("BM_BitsetToString<262144>/Dense (90%)");
85+
BENCHMARK(BM_BitsetToString<524288>)->Arg(90)->Name("BM_BitsetToString<524288>/Dense (90%)");
86+
BENCHMARK(BM_BitsetToString<1048576>)->Arg(90)->Name("BM_BitsetToString<1048576>/Dense (90%)"); // 1 << 20
87+
88+
// Uniform bitset
89+
BENCHMARK(BM_BitsetToString<32>)->Arg(50)->Name("BM_BitsetToString<32>/Uniform (50%)");
90+
BENCHMARK(BM_BitsetToString<64>)->Arg(50)->Name("BM_BitsetToString<64>/Uniform (50%)");
91+
BENCHMARK(BM_BitsetToString<128>)->Arg(50)->Name("BM_BitsetToString<128>/Uniform (50%)");
92+
BENCHMARK(BM_BitsetToString<256>)->Arg(50)->Name("BM_BitsetToString<256>/Uniform (50%)");
93+
BENCHMARK(BM_BitsetToString<512>)->Arg(50)->Name("BM_BitsetToString<512>/Uniform (50%)");
94+
BENCHMARK(BM_BitsetToString<1024>)->Arg(50)->Name("BM_BitsetToString<1024>/Uniform (50%)");
95+
BENCHMARK(BM_BitsetToString<2048>)->Arg(50)->Name("BM_BitsetToString<2048>/Uniform (50%)");
96+
BENCHMARK(BM_BitsetToString<4096>)->Arg(50)->Name("BM_BitsetToString<4096>/Uniform (50%)");
97+
BENCHMARK(BM_BitsetToString<8192>)->Arg(50)->Name("BM_BitsetToString<8192>/Uniform (50%)");
98+
BENCHMARK(BM_BitsetToString<16384>)->Arg(50)->Name("BM_BitsetToString<16384>/Uniform (50%)");
99+
BENCHMARK(BM_BitsetToString<32768>)->Arg(50)->Name("BM_BitsetToString<32768>/Uniform (50%)");
100+
BENCHMARK(BM_BitsetToString<65536>)->Arg(50)->Name("BM_BitsetToString<65536>/Uniform (50%)");
101+
BENCHMARK(BM_BitsetToString<131072>)->Arg(50)->Name("BM_BitsetToString<131072>/Uniform (50%)");
102+
BENCHMARK(BM_BitsetToString<262144>)->Arg(50)->Name("BM_BitsetToString<262144>/Uniform (50%)");
103+
BENCHMARK(BM_BitsetToString<524288>)->Arg(50)->Name("BM_BitsetToString<524288>/Uniform (50%)");
104+
BENCHMARK(BM_BitsetToString<1048576>)->Arg(50)->Name("BM_BitsetToString<1048576>/Uniform (50%)"); // 1 << 20
105+
106+
BENCHMARK_MAIN();

0 commit comments

Comments
 (0)