Skip to content

[libc++] Extend the scope of radix sorting inside std::stable_sort to floating-point types #129452

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions libcxx/docs/ReleaseNotes/21.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ Improvements and New Features

- The ``num_put::do_put`` integral overloads have been optimized, resulting in a performance improvement of up to 2.4x.

- The ``std::stable_sort`` algorithm uses radix sort for floating-point types now, which can improve the performance
up to 10x, depending on type of sorted elements and the initial state of the sorted array.

Deprecations and Removals
-------------------------

Expand Down
100 changes: 98 additions & 2 deletions libcxx/include/__algorithm/radix_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@

#include <__algorithm/for_each.h>
#include <__algorithm/move.h>
#include <__bit/bit_cast.h>
#include <__bit/bit_log2.h>
#include <__bit/countl.h>
#include <__config>
#include <__cstddef/size_t.h>
#include <__functional/identity.h>
#include <__iterator/access.h>
#include <__iterator/distance.h>
Expand All @@ -44,9 +45,12 @@
#include <__type_traits/enable_if.h>
#include <__type_traits/invoke.h>
#include <__type_traits/is_assignable.h>
#include <__type_traits/is_enum.h>
#include <__type_traits/is_integral.h>
#include <__type_traits/is_unsigned.h>
#include <__type_traits/make_unsigned.h>
#include <__type_traits/void_t.h>
#include <__utility/declval.h>
#include <__utility/forward.h>
#include <__utility/integer_sequence.h>
#include <__utility/move.h>
Expand Down Expand Up @@ -298,6 +302,96 @@ _LIBCPP_HIDE_FROM_ABI constexpr auto __shift_to_unsigned(_Ip __n) {
return static_cast<make_unsigned_t<_Ip> >(__n ^ __min_value);
}

template <size_t _Size>
struct __unsigned_integer_of_size;

template <>
struct __unsigned_integer_of_size<1> {
using type _LIBCPP_NODEBUG = uint8_t;
};

template <>
struct __unsigned_integer_of_size<2> {
using type _LIBCPP_NODEBUG = uint16_t;
};

template <>
struct __unsigned_integer_of_size<4> {
using type _LIBCPP_NODEBUG = uint32_t;
};

template <>
struct __unsigned_integer_of_size<8> {
using type _LIBCPP_NODEBUG = uint64_t;
};

# if _LIBCPP_HAS_INT128
template <>
struct __unsigned_integer_of_size<16> {
using type _LIBCPP_NODEBUG = unsigned __int128;
};
# endif

template <size_t _Size>
using __unsigned_integer_of_size_t _LIBCPP_NODEBUG = typename __unsigned_integer_of_size<_Size>::type;

template <class _Sc>
using __unsigned_representation_for_t _LIBCPP_NODEBUG = __unsigned_integer_of_size_t<sizeof(_Sc)>;

// The function `__to_ordered_integral` is defined for integers and IEEE 754 floating-point numbers.
// Returns an integer representation such that for any `x` and `y` such that `x < y`, the expression
// `__to_ordered_integral(x) < __to_ordered_integral(y)` is true, where `x`, `y` are integers or IEEE 754 floats.
template <class _Integral, enable_if_t< is_integral<_Integral>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(_Integral __n) {
return __n;
}

// An overload for IEEE 754 floating-point numbers

// For the floats conforming to IEEE 754 (IEC 559) standard, we know that:
// 1. The bit representation of positive floats directly reflects their order:
// When comparing floats by magnitude, the number with the larger exponent is greater, and if the exponents are
// equal, the one with the larger mantissa is greater.
// 2. The bit representation of negative floats reflects their reverse order (for the same reasons).
// 3. The most significant bit (sign bit) is zero for positive floats and one for negative floats. Therefore, in the raw
// bit representation, any negative number will be greater than any positive number.

// The only exception from this rule is `NaN`, which is unordered by definition.

// Based on the above, to obtain correctly ordered integral representation of floating-point numbers, we need to:
// 1. Invert the bit representation (including the sign bit) of negative floats to switch from reverse order to direct
// order;
// 2. Invert the sign bit for positive floats.

// Thus, in final integral representation, we have reversed the order for negative floats and made all negative floats
// smaller than all positive numbers (by inverting the sign bit).
template <class _Floating, enable_if_t< numeric_limits<_Floating>::is_iec559, int> = 0>
_LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(_Floating __f) {
using __integral_type = __unsigned_representation_for_t<_Floating>;
constexpr auto __bit_count = std::numeric_limits<__integral_type>::digits;
constexpr auto __sign_bit_mask = static_cast<__integral_type>(__integral_type{1} << (__bit_count - 1));

const auto __u = std::__bit_cast<__integral_type>(__f);

return static_cast<__integral_type>(__u & __sign_bit_mask ? ~__u : __u ^ __sign_bit_mask);
}

// There may exist user-defined comparison for enum, so we cannot compare enums just like integers.
template <class _Enum, enable_if_t< is_enum<_Enum>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(_Enum __e) = delete;

// `long double` varies significantly across platforms and compilers, making it practically
// impossible to determine its actual bit width for conversion to an ordered integer.
inline _LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(long double) = delete;

template <class _Tp, class = void>
inline const bool __is_ordered_integer_representable_v = false;

template <class _Tp>
inline const bool
__is_ordered_integer_representable_v<_Tp, __void_t<decltype(std::__to_ordered_integral(std::declval<_Tp>()))>> =
true;

struct __low_byte_fn {
template <class _Ip>
_LIBCPP_HIDE_FROM_ABI constexpr uint8_t operator()(_Ip __integer) const {
Expand All @@ -314,7 +408,9 @@ __radix_sort(_RandomAccessIterator1 __first,
_RandomAccessIterator2 __buffer,
_Map __map,
_Radix __radix) {
auto __map_to_unsigned = [__map = std::move(__map)](const auto& __x) { return std::__shift_to_unsigned(__map(__x)); };
auto __map_to_unsigned = [__map = std::move(__map)](const auto& __x) {
return std::__shift_to_unsigned(__map(std::__to_ordered_integral(__x)));
};
std::__radix_sort_impl(__first, __last, __buffer, __map_to_unsigned, __radix);
}

Expand Down
15 changes: 7 additions & 8 deletions libcxx/include/__algorithm/stable_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#include <__type_traits/desugars_to.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/is_constant_evaluated.h>
#include <__type_traits/is_integral.h>
#include <__type_traits/is_same.h>
#include <__type_traits/is_trivially_assignable.h>
#include <__utility/move.h>
Expand Down Expand Up @@ -201,7 +200,7 @@ struct __stable_sort_switch {
#if _LIBCPP_STD_VER >= 17
template <class _Tp>
_LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_min_bound() {
static_assert(is_integral<_Tp>::value);
static_assert(__is_ordered_integer_representable_v<_Tp>);
if constexpr (sizeof(_Tp) == 1) {
return 1 << 8;
}
Expand All @@ -211,7 +210,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_min_bound() {

template <class _Tp>
_LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_max_bound() {
static_assert(is_integral<_Tp>::value);
static_assert(__is_ordered_integer_representable_v<_Tp>);
if constexpr (sizeof(_Tp) >= 8) {
return 1 << 15;
}
Expand Down Expand Up @@ -245,11 +244,11 @@ _LIBCPP_CONSTEXPR_SINCE_CXX26 void __stable_sort(
}

#if _LIBCPP_STD_VER >= 17
constexpr auto __default_comp = __desugars_to_v<__totally_ordered_less_tag, _Compare, value_type, value_type >;
constexpr auto __integral_value =
is_integral_v<value_type > && is_same_v< value_type&, __iter_reference<_RandomAccessIterator>>;
constexpr auto __allowed_radix_sort = __default_comp && __integral_value;
if constexpr (__allowed_radix_sort) {
constexpr auto __default_comp = __desugars_to_v<__less_tag, _Compare, value_type, value_type >;
constexpr auto __radix_sortable =
__is_ordered_integer_representable_v<value_type> &&
is_same_v< value_type&, __iter_reference<_RandomAccessIterator>>;
if constexpr (__default_comp && __radix_sortable) {
if (__len <= __buff_size && __len >= static_cast<difference_type>(std::__radix_sort_min_bound<value_type>()) &&
__len <= static_cast<difference_type>(std::__radix_sort_max_bound<value_type>())) {
if (__libcpp_is_constant_evaluated()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
#include <algorithm>
#include <cassert>
#include <iterator>
#include <limits>
#include <random>
#include <type_traits>
#include <vector>

#include "count_new.h"
Expand Down Expand Up @@ -68,6 +70,13 @@ TEST_CONSTEXPR_CXX26 std::vector<T> generate_sawtooth(int N, int M) {
if (++x == M)
x = 0;
}

if (std::is_signed<T>::value) {
for (auto& a : v) {
a -= (M / 2);
}
}

return v;
}

Expand Down Expand Up @@ -193,12 +202,60 @@ TEST_CONSTEXPR_CXX26 bool test() {
return true;
}

template <class T>
bool test_floating_special_values() {
static_assert(std::is_floating_point<T>::value, "");

auto v = generate_sawtooth<T>(1024, 512);
v.insert(v.end(), 256, static_cast<T>(0.0));
v.insert(v.end(), 256, static_cast<T>(-0.0));
v.insert(v.end(), 256, std::numeric_limits<T>::infinity());
v.insert(v.end(), 256, -std::numeric_limits<T>::infinity());

std::mt19937 randomness;
std::shuffle(v.begin(), v.end(), randomness);

std::stable_sort(v.begin(), v.end());
assert(std::is_sorted(v.begin(), v.end()));

return true;
}

template <class T>
bool test_floating() {
return test<T>() && test_floating_special_values<T>();
}

enum struct Enum : int { a, b, c, d, e, f, g, h };
TEST_CONSTEXPR_CXX26 bool operator<(Enum x, Enum y) { return static_cast<int>(x) > static_cast<int>(y); }

TEST_CONSTEXPR_CXX26 bool test_enum() {
auto v = std::vector<Enum>(128, Enum::a);
v.resize(v.size() + 128, Enum::b);
v.resize(v.size() + 128, Enum::c);
v.resize(v.size() + 128, Enum::d);
v.resize(v.size() + 128, Enum::e);
v.resize(v.size() + 128, Enum::f);
v.resize(v.size() + 128, Enum::g);
v.resize(v.size() + 128, Enum::h);

// Order is reversed by definition
std::stable_sort(v.begin(), v.end());
assert(std::is_sorted(v.begin(), v.end()));

return true;
}

int main(int, char**) {
test<int>();
test<float>();
test_floating<float>();
test_floating<double>();
test_floating<long double>();
test_enum();
#if TEST_STD_VER >= 26
static_assert(test<int>());
static_assert(test<float>());
static_assert(test<double>());
// test constexprness of radix sort branch
static_assert(test<char>());
#endif
Expand Down