-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[libc++] Extend the scope of radix sorting inside std::stable_sort to floating-point types #129452
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc++] Extend the scope of radix sorting inside std::stable_sort to floating-point types #129452
Conversation
@llvm/pr-subscribers-libcxx Author: Дмитрий Изволов (izvolov) ChangesThese changes speed up Why does this worth doing?
Comparison for only
Full diff: https://github.com/llvm/llvm-project/pull/129452.diff 3 Files Affected:
diff --git a/libcxx/include/__algorithm/radix_sort.h b/libcxx/include/__algorithm/radix_sort.h
index f6d9fb1ad7ca9..f957b37f3628e 100644
--- a/libcxx/include/__algorithm/radix_sort.h
+++ b/libcxx/include/__algorithm/radix_sort.h
@@ -29,9 +29,11 @@
#include <__algorithm/for_each.h>
#include <__algorithm/move.h>
+#include <__bit/bit_cast.h>
#include <__bit/bit_log2.h>
#include <__bit/countl.h>
#include <__config>
+#include <__cstddef/size_t.h>
#include <__functional/identity.h>
#include <__iterator/access.h>
#include <__iterator/distance.h>
@@ -44,9 +46,12 @@
#include <__type_traits/enable_if.h>
#include <__type_traits/invoke.h>
#include <__type_traits/is_assignable.h>
+#include <__type_traits/is_enum.h>
+#include <__type_traits/is_floating_point.h>
#include <__type_traits/is_integral.h>
#include <__type_traits/is_unsigned.h>
#include <__type_traits/make_unsigned.h>
+#include <__type_traits/underlying_type.h>
#include <__utility/forward.h>
#include <__utility/integer_sequence.h>
#include <__utility/move.h>
@@ -298,6 +303,94 @@ _LIBCPP_HIDE_FROM_ABI constexpr auto __shift_to_unsigned(_Ip __n) {
return static_cast<make_unsigned_t<_Ip> >(__n ^ __min_value);
}
+template <size_t _Size>
+struct __unsigned_integer_of_size {};
+
+template <>
+struct __unsigned_integer_of_size<1> {
+ using type = uint8_t;
+};
+
+template <>
+struct __unsigned_integer_of_size<2> {
+ using type = uint16_t;
+};
+
+template <>
+struct __unsigned_integer_of_size<4> {
+ using type = uint32_t;
+};
+
+template <>
+struct __unsigned_integer_of_size<8> {
+ using type = uint64_t;
+};
+
+template <>
+struct __unsigned_integer_of_size<16> {
+# if _LIBCPP_HAS_INT128
+ using type = __int128;
+# endif
+};
+
+template <size_t _Size>
+using __unsigned_integer_of_size_t = typename __unsigned_integer_of_size<_Size>::type;
+
+template <class _Sc>
+using __unsigned_representation_for_t = __unsigned_integer_of_size_t<sizeof(_Sc)>;
+
+// Represent a scalar type as an ordered integer
+
+// The function is defined for ordered scalar types: integers, floating-point numbers, pointers, and enums.
+// Returns an integer representation such that for any `x` and `y` such that `x < y`, the expression
+// `__to_ordered_integral(x) < __to_ordered_integral(y)` is true, where `x`, `y` are values of the `Scalar` type.
+// __unsigned_representation_for_t<_Scalar> __to_ordered_integral(_Scalar);
+
+template <class _Integral, enable_if_t< is_integral_v<_Integral>, int> = 0>
+constexpr auto __to_ordered_integral(_Integral __n) {
+ return __n;
+}
+
+// An overload for floating-point numbers
+
+// From the IEEE 754 standard, we know that:
+// 1. The bit representation of positive floats directly reflects their order:
+// When comparing floats by magnitude, the number with the larger exponent is greater, and if the exponents are
+// equal, the one with the larger mantissa is greater.
+// 2. The bit representation of negative floats reflects their reverse order (for the same reasons).
+// 3. The most significant bit (sign bit) is zero for positive floats and one for negative floats. Therefore, in the raw
+// bit representation, any negative number will be greater than any positive number.
+
+// The only exception from this rule is `NaN`, which is unordered by definition.
+
+// Based on the above, to obtain correctly ordered integral representation of floating-point numbers, we need to:
+// 1. Invert the bit representation (including the sign bit) of negative floats to switch from reverse order to direct
+// order;
+// 2. Invert the sign bit for positive floats.
+
+// Thus, in final integral representation, we have reversed the order for negative floats and made all negative floats
+// smaller than all positive numbers (by inverting the sign bit).
+template <class _Floating, enable_if_t< is_floating_point_v<_Floating>, int> = 0>
+constexpr auto __to_ordered_integral(_Floating __f) {
+ using __integral_type = __unsigned_representation_for_t<_Floating>;
+ constexpr auto __bit_count = std::numeric_limits<__integral_type>::digits;
+ constexpr auto __sign_bit_mask = static_cast<__integral_type>(__integral_type{1} << (__bit_count - 1));
+
+ const auto __u = std::__bit_cast<__integral_type>(__f);
+
+ return static_cast<__integral_type>(__u & __sign_bit_mask ? ~__u : __u ^ __sign_bit_mask);
+}
+
+template <class _Enum, enable_if_t< is_enum_v<_Enum>, int> = 0>
+constexpr auto __to_ordered_integral(_Enum __e) {
+ return static_cast<std::underlying_type_t<_Enum>>(__e);
+}
+
+template <class _Pointer>
+constexpr auto __to_ordered_integral(_Pointer* __ptr) {
+ return std::__bit_cast<__unsigned_representation_for_t<_Pointer*>>(__ptr);
+}
+
struct __low_byte_fn {
template <class _Ip>
_LIBCPP_HIDE_FROM_ABI constexpr uint8_t operator()(_Ip __integer) const {
@@ -314,7 +407,9 @@ __radix_sort(_RandomAccessIterator1 __first,
_RandomAccessIterator2 __buffer,
_Map __map,
_Radix __radix) {
- auto __map_to_unsigned = [__map = std::move(__map)](const auto& __x) { return std::__shift_to_unsigned(__map(__x)); };
+ auto __map_to_unsigned = [__map = std::move(__map)](const auto& __x) {
+ return std::__shift_to_unsigned(__map(__to_ordered_integral(__x)));
+ };
std::__radix_sort_impl(__first, __last, __buffer, __map_to_unsigned, __radix);
}
diff --git a/libcxx/include/__algorithm/stable_sort.h b/libcxx/include/__algorithm/stable_sort.h
index 76d9e5557008f..2eb62b5230b32 100644
--- a/libcxx/include/__algorithm/stable_sort.h
+++ b/libcxx/include/__algorithm/stable_sort.h
@@ -25,9 +25,10 @@
#include <__memory/unique_temporary_buffer.h>
#include <__type_traits/desugars_to.h>
#include <__type_traits/enable_if.h>
+#include <__type_traits/invoke.h>
#include <__type_traits/is_constant_evaluated.h>
-#include <__type_traits/is_integral.h>
#include <__type_traits/is_same.h>
+#include <__type_traits/is_scalar.h>
#include <__type_traits/is_trivially_assignable.h>
#include <__type_traits/remove_cvref.h>
#include <__utility/move.h>
@@ -202,7 +203,7 @@ struct __stable_sort_switch {
#if _LIBCPP_STD_VER >= 17
template <class _Tp>
_LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_min_bound() {
- static_assert(is_integral<_Tp>::value);
+ static_assert(is_scalar<_Tp>::value);
if constexpr (sizeof(_Tp) == 1) {
return 1 << 8;
}
@@ -212,13 +213,14 @@ _LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_min_bound() {
template <class _Tp>
_LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_max_bound() {
- static_assert(is_integral<_Tp>::value);
+ static_assert(is_scalar<_Tp>::value);
if constexpr (sizeof(_Tp) >= 8) {
return 1 << 15;
}
return 1 << 16;
}
+
#endif // _LIBCPP_STD_VER >= 17
template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
@@ -246,12 +248,12 @@ _LIBCPP_CONSTEXPR_SINCE_CXX26 void __stable_sort(
}
#if _LIBCPP_STD_VER >= 17
- constexpr auto __default_comp =
- __desugars_to_v<__totally_ordered_less_tag, __remove_cvref_t<_Compare>, value_type, value_type >;
- constexpr auto __integral_value =
- is_integral_v<value_type > && is_same_v< value_type&, __iter_reference<_RandomAccessIterator>>;
- constexpr auto __allowed_radix_sort = __default_comp && __integral_value;
- if constexpr (__allowed_radix_sort) {
+ constexpr auto __default_comp = __desugars_to_v<__less_tag, __remove_cvref_t<_Compare>, value_type, value_type >;
+ constexpr auto __scalar_value =
+ is_scalar_v<value_type > && is_same_v< value_type&, __iter_reference<_RandomAccessIterator>>;
+ // There are non-comparable scalars (std::nullptr_t, pointers to members), so we need to exclude them.
+ constexpr auto __comparable_value = is_invocable_r_v<bool, _Compare, value_type, value_type>;
+ if constexpr (__default_comp && __scalar_value && __comparable_value) {
if (__len <= __buff_size && __len >= static_cast<difference_type>(std::__radix_sort_min_bound<value_type>()) &&
__len <= static_cast<difference_type>(std::__radix_sort_max_bound<value_type>())) {
if (__libcpp_is_constant_evaluated()) {
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp
index 4ee1d795a23b2..7fda6c3d7b966 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp
@@ -68,6 +68,13 @@ TEST_CONSTEXPR_CXX26 std::vector<T> generate_sawtooth(int N, int M) {
if (++x == M)
x = 0;
}
+
+ if constexpr (std::is_signed_v<T>) {
+ for (auto& a : v) {
+ a -= (M / 2);
+ }
+ }
+
return v;
}
@@ -193,12 +200,38 @@ TEST_CONSTEXPR_CXX26 bool test() {
return true;
}
+template <class T>
+bool test_floating_special_values() {
+ static_assert(std::is_floating_point_v<T>);
+
+ auto v = generate_sawtooth<T>(1024, 512);
+ v.insert(v.end(), 256, T{0.0});
+ v.insert(v.end(), 256, T{-0.0});
+ v.insert(v.end(), 256, std::numeric_limits<T>::infinity());
+ v.insert(v.end(), 256, -std::numeric_limits<T>::infinity());
+
+ std::mt19937 randomness;
+ std::shuffle(v.begin(), v.end(), randomness);
+
+ std::stable_sort(v.begin(), v.end());
+ assert(std::is_sorted(v.begin(), v.end()));
+
+ return true;
+}
+
+template <class T>
+bool test_floating() {
+ return test<T>() && test_floating_special_values<T>();
+}
+
int main(int, char**) {
test<int>();
- test<float>();
+ test_floating<float>();
+ test_floating<double>();
#if TEST_STD_VER >= 26
static_assert(test<int>());
static_assert(test<float>());
+ static_assert(test<double>());
// test constexprness of radix sort branch
static_assert(test<char>());
#endif
|
803b546
to
d06a762
Compare
// From the IEEE 754 standard, we know that: | ||
// 1. The bit representation of positive floats directly reflects their order: | ||
// When comparing floats by magnitude, the number with the larger exponent is greater, and if the exponents are | ||
// equal, the one with the larger mantissa is greater. | ||
// 2. The bit representation of negative floats reflects their reverse order (for the same reasons). | ||
// 3. The most significant bit (sign bit) is zero for positive floats and one for negative floats. Therefore, in the raw | ||
// bit representation, any negative number will be greater than any positive number. | ||
|
||
// The only exception from this rule is `NaN`, which is unordered by definition. | ||
|
||
// Based on the above, to obtain correctly ordered integral representation of floating-point numbers, we need to: | ||
// 1. Invert the bit representation (including the sign bit) of negative floats to switch from reverse order to direct | ||
// order; | ||
// 2. Invert the sign bit for positive floats. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't this just signed integer comparison? Also, this doesn't exclude non-IEEE floating point types or types with padding.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, because bit representation of positive floats is ascending, but bit representation of negative floats is descending (the larger integer, the smaller float), so we need to reverse the negative ones.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Which is also the case with two's complement integers? 0b1111 == -1
, 0b1110 == -2
and so on. It's not clear to me though what you mean by "larger" and "smaller", since you can interpret these words in very different ways in this context.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the strait order:
x = 0b1110 = -2
y = 0b1110 + 1 = 0b1111 = -1
x < y
But:
0b1111
represents a float which is bigger than 0b1110
by magnitude, but smaller in absolute values.
So bitcast<float>(x) > bitcast<float>(y)
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note that floating-point values in C++ are not required to be IEEE-754. Whether or not they are is determined by numeric_limits<T>::is_iec559
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
determined by numeric_limits::is_iec559
Wow, thanks.
template <class _Enum, enable_if_t< is_enum<_Enum>::value, int> = 0> | ||
_LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(_Enum __e) { | ||
return static_cast<std::underlying_type_t<_Enum>>(__e); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This optimization is incorrect. You're allowed to define your own comparison operators for enums. Please add a regression test to make sure people don't try to add this again.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've missed that, thanks.
So we can allow only arithmetic types and pointers, am I right?
Do we also need to exclude pointers to functions?
return static_cast<std::underlying_type_t<_Enum>>(__e); | ||
} | ||
|
||
template <class _Pointer> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
template <class _Pointer> | |
template <class _Tp> |
since it's not a pointer but the pointee type.
|
||
template <class _Pointer> | ||
_LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(_Pointer* __ptr) { | ||
return std::__bit_cast<__unsigned_representation_for_t<_Pointer*>>(__ptr); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can't we just use uintptr_t
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also, such cast is not constant-evaluation-compatible. The constexpr
should be dropped and we should make sure that the cast is not performed in constant evaluation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you mean that, despite compilers allow this function to be constexpr
, the standard forbids it?
Also why to use uintptr_t
since it is defined as optional?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you mean that, despite compilers allow this function to be
constexpr
, the standard forbids it?
The function can be marked constexpr, but you can't evaluate it during constant evaluation.
Also why to use
uintptr_t
since it is defined as optional?
It's defined as such, but reality disagrees.
|
|
|
I think I've addressed all the issues, can you take a look, please? |
ping |
@@ -298,6 +301,84 @@ _LIBCPP_HIDE_FROM_ABI constexpr auto __shift_to_unsigned(_Ip __n) { | |||
return static_cast<make_unsigned_t<_Ip> >(__n ^ __min_value); | |||
} | |||
|
|||
template <size_t _Size> | |||
struct __unsigned_integer_of_size {}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
struct __unsigned_integer_of_size {}; | |
struct __unsigned_integer_of_size; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
template <> | ||
struct __unsigned_integer_of_size<16> { | ||
# if _LIBCPP_HAS_INT128 | ||
using type = __int128; | ||
# endif | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
template <> | |
struct __unsigned_integer_of_size<16> { | |
# if _LIBCPP_HAS_INT128 | |
using type = __int128; | |
# endif | |
}; | |
template <> | |
# if _LIBCPP_HAS_INT128 | |
struct __unsigned_integer_of_size<16> { | |
using type = __int128; | |
}; | |
# endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
4e339ee
to
0a0513d
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks almost good. Just a few nits.
libcxx/docs/ReleaseNotes/21.rst
Outdated
- The ``std::stable_sort`` algorithm uses radix sort for floating-point types now, which can improve the performance | ||
up to 10 times, depending on type of sorted elements and the initial state of the sorted array. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- The ``std::stable_sort`` algorithm uses radix sort for floating-point types now, which can improve the performance | |
up to 10 times, depending on type of sorted elements and the initial state of the sorted array. | |
- The ``std::stable_sort`` algorithm uses radix sort for floating-point types now, which can improve the performance | |
up to 10x, depending on type of sorted elements and the initial state of the sorted array. |
Since we're using it in the other release notes as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
template <class _Tp, class = void> | ||
struct __is_ordered_integer_representable : false_type {}; | ||
|
||
template <class _Tp> | ||
struct __is_ordered_integer_representable<_Tp, __void_t<decltype(std::__to_ordered_integral(std::declval<_Tp>()))>> | ||
: true_type {}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
template <class _Tp, class = void> | |
struct __is_ordered_integer_representable : false_type {}; | |
template <class _Tp> | |
struct __is_ordered_integer_representable<_Tp, __void_t<decltype(std::__to_ordered_integral(std::declval<_Tp>()))>> | |
: true_type {}; | |
template <class _Tp, class = void> | |
inline const bool __is_ordered_integer_representable_v = false; | |
template <class _Tp> | |
inline const bool __is_ordered_integer_representable_v<_Tp, __void_t<decltype(std::__to_ordered_integral(std::declval<_Tp>()))>> | |
= true; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
a095229
to
6ec578c
Compare
6ec578c
to
3964314
Compare
3964314
to
80afedd
Compare
@philnik777 ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm merging this.
Thanks! |
… floating-point types (llvm#129452) These changes speed up `std::stable_sort` in the case of sorting floating-point types. This applies only to IEEE 754 floats. The speedup is similar to that achieved for integers in PR llvm#104683 (see benchmarks below). Why does this worth doing? Previously, `std::stable_sort` had almost no chance of beating `std::sort`. Now there are cases when `std::stable_sort` is preferrable, and the difference is significant. ``` --------------------------------------------------------------------------- Benchmark | std::stable_sort | std::sort | std::stable_sort | without radix_sort | | with radix_sort --------------------------------------------------------------------------- float_Random_1 | 1.62 ns | 2.15 ns | 1.61 ns float_Random_4 | 18.0 ns | 2.71 ns | 16.3 ns float_Random_16 | 118 ns | 113 ns | 112 ns float_Random_64 | 751 ns | 647 ns | 730 ns float_Random_256 | 4715 ns | 2937 ns | 4669 ns float_Random_1024 | 25713 ns | 13172 ns | 5959 ns <-- float_Random_4096 | 131307 ns | 56870 ns | 19294 ns <-- float_Random_16384 | 624996 ns | 242953 ns | 64264 ns <-- float_Random_65536 | 2895661 ns | 1027279 ns | 288553 ns <-- float_Random_262144 | 13285372 ns | 4342593 ns | 3022377 ns <-- float_Random_1048576 | 60595871 ns | 19087591 ns | 18690457 ns <-- float_Random_2097152 | 131336117 ns | 38800396 ns | 52325016 ns float_Random_4194304 | 270043042 ns | 79978019 ns | 102907726 ns double_Random_1 | 1.60 ns | 2.15 ns | 1.61 ns double_Random_4 | 15.2 ns | 2.70 ns | 16.9 ns double_Random_16 | 104 ns | 112 ns | 119 ns double_Random_64 | 712 ns | 614 ns | 755 ns double_Random_256 | 4496 ns | 2966 ns | 4820 ns double_Random_1024 | 24722 ns | 12679 ns | 6189 ns <-- double_Random_4096 | 126075 ns | 54484 ns | 20999 ns <-- double_Random_16384 | 613782 ns | 232557 ns | 110276 ns <-- double_Random_65536 | 2894972 ns | 988531 ns | 774302 ns <-- double_Random_262144 | 13460273 ns | 4278059 ns | 5115123 ns double_Random_1048576 | 61119996 ns | 18408462 ns | 27166574 ns double_Random_2097152 | 132511525 ns | 37986158 ns | 54423869 ns double_Random_4194304 | 272949862 ns | 77912616 ns | 147670834 ns ``` Comparison for only `std::stable_sort`: ``` Benchmark Time Time Old Time New -------------------------------------------------------------------------------------------------- BM_StableSort_float_Random_1024 -0.7997 25438 5096 BM_StableSort_float_Random_4096 -0.8731 128157 16260 BM_StableSort_float_Random_16384 -0.9024 621271 60623 BM_StableSort_float_Random_65536 -0.9081 2922413 268619 BM_StableSort_float_Random_262144 -0.7766 13386345 2990408 BM_StableSort_float_Random_1048576 -0.6954 60673010 18481751 BM_StableSort_float_Random_2097152 -0.6026 130977358 52052182 BM_StableSort_float_Random_4194304 -0.6252 271556583 101770500 BM_StableSort_float_Ascending_1024 -0.6430 6711 2396 BM_StableSort_float_Ascending_4096 -0.7979 38460 7773 BM_StableSort_float_Ascending_16384 -0.8471 191069 29222 BM_StableSort_float_Ascending_65536 -0.8683 882321 116194 BM_StableSort_float_Ascending_262144 -0.8346 3868552 639937 BM_StableSort_float_Ascending_1048576 -0.7460 16521233 4195953 BM_StableSort_float_Ascending_2097152 -0.5439 21757532 9922776 BM_StableSort_float_Ascending_4194304 -0.7525 67847496 16791582 BM_StableSort_float_Descending_1024 -0.6359 15038 5475 BM_StableSort_float_Descending_4096 -0.7090 62810 18278 BM_StableSort_float_Descending_16384 -0.7763 311844 69750 BM_StableSort_float_Descending_65536 -0.7228 1270513 352202 BM_StableSort_float_Descending_262144 -0.6785 5484173 1763045 BM_StableSort_float_Descending_1048576 -0.5084 20223149 9941852 BM_StableSort_float_Descending_2097152 -0.7646 60523254 14247014 BM_StableSort_float_Descending_4194304 -0.5638 95706839 41748858 BM_StableSort_float_SingleElement_1024 +0.3715 1732 2375 BM_StableSort_float_SingleElement_4096 -0.1685 9357 7781 BM_StableSort_float_SingleElement_16384 -0.3793 47307 29362 BM_StableSort_float_SingleElement_65536 -0.4925 227666 115536 BM_StableSort_float_SingleElement_262144 -0.4271 1075853 616387 BM_StableSort_float_SingleElement_1048576 -0.3736 5097599 3193279 BM_StableSort_float_SingleElement_2097152 -0.2470 9854161 7420158 BM_StableSort_float_SingleElement_4194304 -0.3384 22175964 14670720 BM_StableSort_float_PipeOrgan_1024 -0.4885 10664 5455 BM_StableSort_float_PipeOrgan_4096 -0.6340 50095 18337 BM_StableSort_float_PipeOrgan_16384 -0.7078 238700 69739 BM_StableSort_float_PipeOrgan_65536 -0.6740 1102419 359378 BM_StableSort_float_PipeOrgan_262144 -0.7460 4698739 1193511 BM_StableSort_float_PipeOrgan_1048576 -0.5657 18493972 8032392 BM_StableSort_float_PipeOrgan_2097152 -0.7116 41089206 11850349 BM_StableSort_float_PipeOrgan_4194304 -0.6650 83445011 27955737 BM_StableSort_float_QuickSortAdversary_1024 -0.6863 17402 5460 BM_StableSort_float_QuickSortAdversary_4096 -0.7715 79864 18247 BM_StableSort_float_QuickSortAdversary_16384 -0.7800 317480 69839 BM_StableSort_float_QuickSortAdversary_65536 -0.7400 1357601 352967 BM_StableSort_float_QuickSortAdversary_262144 -0.6450 5662094 2009769 BM_StableSort_float_QuickSortAdversary_1048576 -0.5092 21173627 10392107 BM_StableSort_float_QuickSortAdversary_2097152 -0.7333 61748178 16469993 BM_StableSort_float_QuickSortAdversary_4194304 -0.5607 98459863 43250182 BM_StableSort_double_Random_1024 -0.7657 24769 5802 BM_StableSort_double_Random_4096 -0.8441 126449 19717 BM_StableSort_double_Random_16384 -0.8269 614910 106447 BM_StableSort_double_Random_65536 -0.7413 2905000 751427 BM_StableSort_double_Random_262144 -0.6287 13449514 4994348 BM_StableSort_double_Random_1048576 -0.5635 60863246 26568349 BM_StableSort_double_Random_2097152 -0.5959 130293892 52654532 BM_StableSort_double_Random_4194304 -0.4772 272616445 142526267 BM_StableSort_double_Ascending_1024 -0.4870 6757 3466 BM_StableSort_double_Ascending_4096 -0.7360 37592 9923 BM_StableSort_double_Ascending_16384 -0.7971 183967 37324 BM_StableSort_double_Ascending_65536 -0.7465 897116 227398 BM_StableSort_double_Ascending_262144 -0.6764 4020980 1301033 BM_StableSort_double_Ascending_1048576 -0.6407 16421799 5900751 BM_StableSort_double_Ascending_2097152 -0.6380 29347139 10622419 BM_StableSort_double_Ascending_4194304 -0.5934 70439925 28644185 BM_StableSort_double_Descending_1024 -0.5988 15216 6105 BM_StableSort_double_Descending_4096 -0.6857 65069 20449 BM_StableSort_double_Descending_16384 -0.6922 329321 101381 BM_StableSort_double_Descending_65536 -0.7038 1367970 405242 BM_StableSort_double_Descending_262144 -0.6472 5361644 1891429 BM_StableSort_double_Descending_1048576 -0.6656 22031404 7366459 BM_StableSort_double_Descending_2097152 -0.7593 68922467 16591242 BM_StableSort_double_Descending_4194304 -0.6392 96283643 34743223 BM_StableSort_double_SingleElement_1024 +0.9128 1895 3625 BM_StableSort_double_SingleElement_4096 +0.1475 10013 11490 BM_StableSort_double_SingleElement_16384 -0.1901 52382 42424 BM_StableSort_double_SingleElement_65536 -0.2096 254698 201313 BM_StableSort_double_SingleElement_262144 -0.1833 1248478 1019648 BM_StableSort_double_SingleElement_1048576 -0.1741 5703397 4710603 BM_StableSort_double_SingleElement_2097152 -0.1751 10922197 9009835 BM_StableSort_double_SingleElement_4194304 -0.1538 26571923 22485137 BM_StableSort_double_PipeOrgan_1024 -0.4406 10752 6014 BM_StableSort_double_PipeOrgan_4096 -0.5917 49456 20195 BM_StableSort_double_PipeOrgan_16384 -0.6258 270515 101221 BM_StableSort_double_PipeOrgan_65536 -0.7098 1159462 336457 BM_StableSort_double_PipeOrgan_262144 -0.6591 4735711 1614433 BM_StableSort_double_PipeOrgan_1048576 -0.6620 19353110 6541172 BM_StableSort_double_PipeOrgan_2097152 -0.7288 49131812 13323391 BM_StableSort_double_PipeOrgan_4194304 -0.5988 81958974 32878171 BM_StableSort_double_QuickSortAdversary_1024 -0.6516 17948 6254 BM_StableSort_double_QuickSortAdversary_4096 -0.7527 82359 20363 BM_StableSort_double_QuickSortAdversary_16384 -0.7009 340410 101811 BM_StableSort_double_QuickSortAdversary_65536 -0.6854 1487480 467928 BM_StableSort_double_QuickSortAdversary_262144 -0.6386 5648460 2041377 BM_StableSort_double_QuickSortAdversary_1048576 -0.6127 22859142 8852587 BM_StableSort_double_QuickSortAdversary_2097152 -0.7161 68693975 19499381 BM_StableSort_double_QuickSortAdversary_4194304 -0.5909 95532179 39077491 OVERALL_GEOMEAN -0.6472 0 0 ```
These changes speed up
std::stable_sort
in the case of sorting floating-point types.This applies only to IEEE 754 floats.
The speedup is similar to that achieved for integers in PR #104683 (see benchmarks below).
Why does this worth doing?
Previously,
std::stable_sort
had almost no chance of beatingstd::sort
.Now there are cases when
std::stable_sort
is preferrable, and the difference is significant.Comparison for only
std::stable_sort
: