Skip to content

[libc++] Optimize std::{,ranges}::{fill,fill_n} for segmented iterators #132665

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions libcxx/docs/ReleaseNotes/21.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ Improvements and New Features
- The ``bitset::to_string`` function has been optimized, resulting in a performance improvement of up to 8.3x for bitsets
with uniformly distributed zeros and ones, and up to 13.5x and 16.1x for sparse and dense bitsets, respectively.

- The ``std::{fill, fill_n}``, ``std::ranges::{fill, fill_n}`` algorithms have been optimized for segmented iterators,
resulting in a performance improvement of at least 10x for ``std::deque<int>`` iterators and
``std::join_view<std::vector<std::vector<int>>>`` iterators.

Deprecations and Removals
-------------------------

Expand Down
38 changes: 30 additions & 8 deletions libcxx/include/__algorithm/fill.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@
#define _LIBCPP___ALGORITHM_FILL_H

#include <__algorithm/fill_n.h>
#include <__algorithm/for_each_segment.h>
#include <__config>
#include <__iterator/iterator_traits.h>
#include <__iterator/segmented_iterator.h>
#include <__type_traits/enable_if.h>

#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
Expand All @@ -21,23 +24,42 @@ _LIBCPP_BEGIN_NAMESPACE_STD

// fill isn't specialized for std::memset, because the compiler already optimizes the loop to a call to std::memset.

template <class _ForwardIterator, class _Tp>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
__fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, forward_iterator_tag) {
template <class _ForwardIterator, class _Sentinel, class _Tp>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
__fill(_ForwardIterator __first, _Sentinel __last, const _Tp& __value) {
for (; __first != __last; ++__first)
*__first = __value;
return __first;
}

template <class _RandomAccessIterator, class _Tp>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
__fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& __value, random_access_iterator_tag) {
std::fill_n(__first, __last - __first, __value);
template <class _RandomAccessIterator,
class _Tp,
__enable_if_t<__has_random_access_iterator_category<_RandomAccessIterator>::value &&
!__is_segmented_iterator<_RandomAccessIterator>::value,
int> = 0>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
__fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& __value) {
return std::__fill_n(__first, __last - __first, __value);
}

#ifndef _LIBCPP_CXX03_LANG
template <class _SegmentedIterator,
class _Tp,
__enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator
__fill(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value) {
using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
std::__fill(__lfirst, __llast, __value);
});
return __last;
}
#endif // !_LIBCPP_CXX03_LANG

template <class _ForwardIterator, class _Tp>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
std::__fill(__first, __last, __value, typename iterator_traits<_ForwardIterator>::iterator_category());
std::__fill(__first, __last, __value);
}

_LIBCPP_END_NAMESPACE_STD
Expand Down
48 changes: 38 additions & 10 deletions libcxx/include/__algorithm/fill_n.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,16 @@
#ifndef _LIBCPP___ALGORITHM_FILL_N_H
#define _LIBCPP___ALGORITHM_FILL_N_H

#include <__algorithm/for_each_n_segment.h>
#include <__algorithm/min.h>
#include <__config>
#include <__fwd/bit_reference.h>
#include <__iterator/iterator_traits.h>
#include <__iterator/segmented_iterator.h>
#include <__memory/pointer_traits.h>
#include <__type_traits/disjunction.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/negation.h>
#include <__utility/convert_to_integral.h>

#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
Expand All @@ -26,9 +32,39 @@ _LIBCPP_BEGIN_NAMESPACE_STD

// fill_n isn't specialized for std::memset, because the compiler already optimizes the loop to a call to std::memset.

template <class _OutputIterator, class _Size, class _Tp>
template <class _OutputIterator,
class _Size,
class _Tp
#ifndef _LIBCPP_CXX03_LANG
,
__enable_if_t<_Or< _Not<__is_segmented_iterator<_OutputIterator> >,
_Not<__has_random_access_local_iterator<_OutputIterator> > >::value,
int> = 0
#endif
>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value);
__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
for (; __n > 0; ++__first, (void)--__n)
*__first = __value;
return __first;
}

#ifndef _LIBCPP_CXX03_LANG
template < class _OutputIterator,
class _Size,
class _Tp,
__enable_if_t<__is_segmented_iterator<_OutputIterator>::value &&
__has_random_access_iterator_category<
typename __segmented_iterator_traits<_OutputIterator>::__local_iterator>::value,
int> = 0>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _OutputIterator
__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
using __local_iterator_t = typename __segmented_iterator_traits<_OutputIterator>::__local_iterator;
return std::__for_each_n_segment(__first, __n, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
std::__fill_n(__lfirst, __llast - __lfirst, __value);
});
}
#endif // !_LIBCPP_CXX03_LANG

template <bool _FillVal, class _Cp>
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
Expand Down Expand Up @@ -68,14 +104,6 @@ __fill_n(__bit_iterator<_Cp, false> __first, _Size __n, const bool& __value) {
return __first + __n;
}

template <class _OutputIterator, class _Size, class _Tp>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
for (; __n > 0; ++__first, (void)--__n)
*__first = __value;
return __first;
}

template <class _OutputIterator, class _Size, class _Tp>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
Expand Down
13 changes: 7 additions & 6 deletions libcxx/include/__algorithm/ranges_fill.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@
#ifndef _LIBCPP___ALGORITHM_RANGES_FILL_H
#define _LIBCPP___ALGORITHM_RANGES_FILL_H

#include <__algorithm/ranges_fill_n.h>
#include <__algorithm/fill.h>
#include <__algorithm/fill_n.h>
#include <__config>
#include <__iterator/concepts.h>
#include <__ranges/access.h>
#include <__ranges/concepts.h>
#include <__ranges/dangling.h>
#include <__utility/move.h>

#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
Expand All @@ -31,12 +33,11 @@ namespace ranges {
struct __fill {
template <class _Type, output_iterator<const _Type&> _Iter, sentinel_for<_Iter> _Sent>
_LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, const _Type& __value) const {
if constexpr (random_access_iterator<_Iter> && sized_sentinel_for<_Sent, _Iter>) {
return ranges::fill_n(__first, __last - __first, __value);
if constexpr (sized_sentinel_for<_Sent, _Iter>) {
auto __n = __last - __first;
return std::__fill_n(std::move(__first), __n, __value);
} else {
for (; __first != __last; ++__first)
*__first = __value;
return __first;
return std::__fill(std::move(__first), std::move(__last), __value);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include <array>
#include <cassert>
#include <cstddef>
#include <deque>
#include <ranges>
#include <vector>

#include "sized_allocator.h"
Expand Down Expand Up @@ -93,6 +95,13 @@ TEST_CONSTEXPR_CXX20 bool test_vector_bool(std::size_t N) {
return true;
}

/*TEST_CONSTEXPR_CXX26*/ void test_deque() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr
std::deque<int> in(20);
std::deque<int> expected(in.size(), 42);
std::fill(in.begin(), in.end(), 42);
assert(in == expected);
}

TEST_CONSTEXPR_CXX20 bool test() {
types::for_each(types::forward_iterator_list<char*>(), Test<char>());
types::for_each(types::forward_iterator_list<int*>(), Test<int>());
Expand Down Expand Up @@ -138,6 +147,20 @@ TEST_CONSTEXPR_CXX20 bool test() {
}
}

if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr
test_deque();

#if TEST_STD_VER >= 20
{ // Verify that join_view of vectors work properly.
std::vector<std::vector<int>> v{{1, 2}, {1, 2, 3}, {}, {3, 4, 5}, {6}, {7, 8, 9, 6}, {0, 1, 2, 3, 0, 1, 2}};
auto jv = std::ranges::join_view(v);
std::fill(jv.begin(), jv.end(), 42);
for (const auto& vec : v)
for (auto n : vec)
assert(n == 42);
}
#endif

return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include <array>
#include <cassert>
#include <cstddef>
#include <deque>
#include <ranges>
#include <vector>

#include "sized_allocator.h"
Expand Down Expand Up @@ -126,6 +128,13 @@ TEST_CONSTEXPR_CXX20 bool test_vector_bool(std::size_t N) {
return true;
}

/*TEST_CONSTEXPR_CXX26*/ void test_deque() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr
std::deque<int> in(20);
std::deque<int> expected(in.size(), 42);
std::fill_n(in.begin(), in.size(), 42);
assert(in == expected);
}

TEST_CONSTEXPR_CXX20 bool test() {
types::for_each(types::forward_iterator_list<char*>(), Test<char>());
types::for_each(types::forward_iterator_list<int*>(), Test<int>());
Expand Down Expand Up @@ -221,6 +230,20 @@ TEST_CONSTEXPR_CXX20 bool test() {
}
}

if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr
test_deque();

#if TEST_STD_VER >= 20
{
std::vector<std::vector<int>> v{{1, 2}, {1, 2, 3}, {}, {3, 4, 5}, {6}, {7, 8, 9, 6}, {0, 1, 2, 3, 0, 1, 2}};
auto jv = std::ranges::join_view(v);
std::fill_n(jv.begin(), std::distance(jv.begin(), jv.end()), 42);
for (const auto& vec : v)
for (auto n : vec)
assert(n == 42);
}
#endif

return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <algorithm>
#include <array>
#include <cassert>
#include <deque>
#include <ranges>
#include <string>
#include <vector>
Expand Down Expand Up @@ -128,6 +129,13 @@ constexpr bool test_vector_bool(std::size_t N) {
}
#endif

/*TEST_CONSTEXPR_CXX26*/ void test_deque() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr
std::deque<int> in(20);
std::deque<int> expected(in.size(), 42);
std::ranges::fill(in, 42);
assert(in == expected);
}

constexpr bool test() {
test_iterators<cpp17_output_iterator<int*>, sentinel_wrapper<cpp17_output_iterator<int*>>>();
test_iterators<cpp20_output_iterator<int*>, sentinel_wrapper<cpp20_output_iterator<int*>>>();
Expand Down Expand Up @@ -227,6 +235,20 @@ constexpr bool test() {
}
#endif

if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr
test_deque();

#if TEST_STD_VER >= 20
{
std::vector<std::vector<int>> v{{1, 2}, {1, 2, 3}, {}, {3, 4, 5}, {6}, {7, 8, 9, 6}, {0, 1, 2, 3, 0, 1, 2}};
auto jv = std::ranges::join_view(v);
std::ranges::fill(jv, 42);
for (const auto& vec : v)
for (auto n : vec)
assert(n == 42);
}
#endif

return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <algorithm>
#include <array>
#include <cassert>
#include <deque>
#include <ranges>
#include <string>
#include <vector>
Expand Down Expand Up @@ -101,6 +102,13 @@ constexpr bool test_vector_bool(std::size_t N) {
}
#endif

/*TEST_CONSTEXPR_CXX26*/ void test_deque() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr
std::deque<int> in(20);
std::deque<int> expected(in.size(), 42);
std::ranges::fill_n(std::ranges::begin(in), std::ranges::size(in), 42);
assert(in == expected);
}

constexpr bool test() {
test_iterators<cpp17_output_iterator<int*>, sentinel_wrapper<cpp17_output_iterator<int*>>>();
test_iterators<cpp20_output_iterator<int*>, sentinel_wrapper<cpp20_output_iterator<int*>>>();
Expand Down Expand Up @@ -175,6 +183,20 @@ constexpr bool test() {
}
#endif

if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr
test_deque();

#if TEST_STD_VER >= 20
{
std::vector<std::vector<int>> v{{1, 2}, {1, 2, 3}, {}, {3, 4, 5}, {6}, {7, 8, 9, 6}, {0, 1, 2, 3, 0, 1, 2}};
auto jv = std::ranges::join_view(v);
std::ranges::fill_n(std::ranges::begin(jv), std::ranges::distance(jv), 42);
for (const auto& vec : v)
for (auto n : vec)
assert(n == 42);
}
#endif

return true;
}

Expand Down
Loading