Skip to content

Commit 80afedd

Browse files
committed
radix-sort-floats
1 parent 761787d commit 80afedd

File tree

4 files changed

+166
-11
lines changed

4 files changed

+166
-11
lines changed

libcxx/docs/ReleaseNotes/21.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ Improvements and New Features
6363

6464
- The ``num_put::do_put`` integral overloads have been optimized, resulting in a performance improvement of up to 2.4x.
6565

66+
- The ``std::stable_sort`` algorithm uses radix sort for floating-point types now, which can improve the performance
67+
up to 10x, depending on type of sorted elements and the initial state of the sorted array.
68+
6669
Deprecations and Removals
6770
-------------------------
6871

libcxx/include/__algorithm/radix_sort.h

Lines changed: 98 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,10 @@
2929

3030
#include <__algorithm/for_each.h>
3131
#include <__algorithm/move.h>
32+
#include <__bit/bit_cast.h>
3233
#include <__bit/bit_log2.h>
33-
#include <__bit/countl.h>
3434
#include <__config>
35+
#include <__cstddef/size_t.h>
3536
#include <__functional/identity.h>
3637
#include <__iterator/access.h>
3738
#include <__iterator/distance.h>
@@ -44,9 +45,12 @@
4445
#include <__type_traits/enable_if.h>
4546
#include <__type_traits/invoke.h>
4647
#include <__type_traits/is_assignable.h>
48+
#include <__type_traits/is_enum.h>
4749
#include <__type_traits/is_integral.h>
4850
#include <__type_traits/is_unsigned.h>
4951
#include <__type_traits/make_unsigned.h>
52+
#include <__type_traits/void_t.h>
53+
#include <__utility/declval.h>
5054
#include <__utility/forward.h>
5155
#include <__utility/integer_sequence.h>
5256
#include <__utility/move.h>
@@ -298,6 +302,96 @@ _LIBCPP_HIDE_FROM_ABI constexpr auto __shift_to_unsigned(_Ip __n) {
298302
return static_cast<make_unsigned_t<_Ip> >(__n ^ __min_value);
299303
}
300304

305+
template <size_t _Size>
306+
struct __unsigned_integer_of_size;
307+
308+
template <>
309+
struct __unsigned_integer_of_size<1> {
310+
using type _LIBCPP_NODEBUG = uint8_t;
311+
};
312+
313+
template <>
314+
struct __unsigned_integer_of_size<2> {
315+
using type _LIBCPP_NODEBUG = uint16_t;
316+
};
317+
318+
template <>
319+
struct __unsigned_integer_of_size<4> {
320+
using type _LIBCPP_NODEBUG = uint32_t;
321+
};
322+
323+
template <>
324+
struct __unsigned_integer_of_size<8> {
325+
using type _LIBCPP_NODEBUG = uint64_t;
326+
};
327+
328+
# if _LIBCPP_HAS_INT128
329+
template <>
330+
struct __unsigned_integer_of_size<16> {
331+
using type _LIBCPP_NODEBUG = unsigned __int128;
332+
};
333+
# endif
334+
335+
template <size_t _Size>
336+
using __unsigned_integer_of_size_t _LIBCPP_NODEBUG = typename __unsigned_integer_of_size<_Size>::type;
337+
338+
template <class _Sc>
339+
using __unsigned_representation_for_t _LIBCPP_NODEBUG = __unsigned_integer_of_size_t<sizeof(_Sc)>;
340+
341+
// The function `__to_ordered_integral` is defined for integers and IEEE 754 floating-point numbers.
342+
// Returns an integer representation such that for any `x` and `y` such that `x < y`, the expression
343+
// `__to_ordered_integral(x) < __to_ordered_integral(y)` is true, where `x`, `y` are integers or IEEE 754 floats.
344+
template <class _Integral, enable_if_t< is_integral<_Integral>::value, int> = 0>
345+
_LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(_Integral __n) {
346+
return __n;
347+
}
348+
349+
// An overload for IEEE 754 floating-point numbers
350+
351+
// For the floats conforming to IEEE 754 (IEC 559) standard, we know that:
352+
// 1. The bit representation of positive floats directly reflects their order:
353+
// When comparing floats by magnitude, the number with the larger exponent is greater, and if the exponents are
354+
// equal, the one with the larger mantissa is greater.
355+
// 2. The bit representation of negative floats reflects their reverse order (for the same reasons).
356+
// 3. The most significant bit (sign bit) is zero for positive floats and one for negative floats. Therefore, in the raw
357+
// bit representation, any negative number will be greater than any positive number.
358+
359+
// The only exception from this rule is `NaN`, which is unordered by definition.
360+
361+
// Based on the above, to obtain correctly ordered integral representation of floating-point numbers, we need to:
362+
// 1. Invert the bit representation (including the sign bit) of negative floats to switch from reverse order to direct
363+
// order;
364+
// 2. Invert the sign bit for positive floats.
365+
366+
// Thus, in final integral representation, we have reversed the order for negative floats and made all negative floats
367+
// smaller than all positive numbers (by inverting the sign bit).
368+
template <class _Floating, enable_if_t< numeric_limits<_Floating>::is_iec559, int> = 0>
369+
_LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(_Floating __f) {
370+
using __integral_type = __unsigned_representation_for_t<_Floating>;
371+
constexpr auto __bit_count = std::numeric_limits<__integral_type>::digits;
372+
constexpr auto __sign_bit_mask = static_cast<__integral_type>(__integral_type{1} << (__bit_count - 1));
373+
374+
const auto __u = std::__bit_cast<__integral_type>(__f);
375+
376+
return static_cast<__integral_type>(__u & __sign_bit_mask ? ~__u : __u ^ __sign_bit_mask);
377+
}
378+
379+
// There may exist user-defined comparison for enum, so we cannot compare enums just like integers.
380+
template <class _Enum, enable_if_t< is_enum<_Enum>::value, int> = 0>
381+
_LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(_Enum __e) = delete;
382+
383+
// `long double` varies significantly across platforms and compilers, making it practically
384+
// impossible to determine its actual bit width for conversion to an ordered integer.
385+
inline _LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(long double) = delete;
386+
387+
template <class _Tp, class = void>
388+
inline const bool __is_ordered_integer_representable_v = false;
389+
390+
template <class _Tp>
391+
inline const bool
392+
__is_ordered_integer_representable_v<_Tp, __void_t<decltype(std::__to_ordered_integral(std::declval<_Tp>()))>> =
393+
true;
394+
301395
struct __low_byte_fn {
302396
template <class _Ip>
303397
_LIBCPP_HIDE_FROM_ABI constexpr uint8_t operator()(_Ip __integer) const {
@@ -314,7 +408,9 @@ __radix_sort(_RandomAccessIterator1 __first,
314408
_RandomAccessIterator2 __buffer,
315409
_Map __map,
316410
_Radix __radix) {
317-
auto __map_to_unsigned = [__map = std::move(__map)](const auto& __x) { return std::__shift_to_unsigned(__map(__x)); };
411+
auto __map_to_unsigned = [__map = std::move(__map)](const auto& __x) {
412+
return std::__shift_to_unsigned(__map(std::__to_ordered_integral(__x)));
413+
};
318414
std::__radix_sort_impl(__first, __last, __buffer, __map_to_unsigned, __radix);
319415
}
320416

libcxx/include/__algorithm/stable_sort.h

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
#include <__type_traits/desugars_to.h>
2727
#include <__type_traits/enable_if.h>
2828
#include <__type_traits/is_constant_evaluated.h>
29-
#include <__type_traits/is_integral.h>
3029
#include <__type_traits/is_same.h>
3130
#include <__type_traits/is_trivially_assignable.h>
3231
#include <__utility/move.h>
@@ -201,7 +200,7 @@ struct __stable_sort_switch {
201200
#if _LIBCPP_STD_VER >= 17
202201
template <class _Tp>
203202
_LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_min_bound() {
204-
static_assert(is_integral<_Tp>::value);
203+
static_assert(__is_ordered_integer_representable_v<_Tp>);
205204
if constexpr (sizeof(_Tp) == 1) {
206205
return 1 << 8;
207206
}
@@ -211,7 +210,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_min_bound() {
211210

212211
template <class _Tp>
213212
_LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_max_bound() {
214-
static_assert(is_integral<_Tp>::value);
213+
static_assert(__is_ordered_integer_representable_v<_Tp>);
215214
if constexpr (sizeof(_Tp) >= 8) {
216215
return 1 << 15;
217216
}
@@ -245,11 +244,11 @@ _LIBCPP_CONSTEXPR_SINCE_CXX26 void __stable_sort(
245244
}
246245

247246
#if _LIBCPP_STD_VER >= 17
248-
constexpr auto __default_comp = __desugars_to_v<__totally_ordered_less_tag, _Compare, value_type, value_type >;
249-
constexpr auto __integral_value =
250-
is_integral_v<value_type > && is_same_v< value_type&, __iter_reference<_RandomAccessIterator>>;
251-
constexpr auto __allowed_radix_sort = __default_comp && __integral_value;
252-
if constexpr (__allowed_radix_sort) {
247+
constexpr auto __default_comp = __desugars_to_v<__less_tag, _Compare, value_type, value_type >;
248+
constexpr auto __radix_sortable =
249+
__is_ordered_integer_representable_v<value_type> &&
250+
is_same_v< value_type&, __iter_reference<_RandomAccessIterator>>;
251+
if constexpr (__default_comp && __radix_sortable) {
253252
if (__len <= __buff_size && __len >= static_cast<difference_type>(std::__radix_sort_min_bound<value_type>()) &&
254253
__len <= static_cast<difference_type>(std::__radix_sort_max_bound<value_type>())) {
255254
if (__libcpp_is_constant_evaluated()) {

libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
#include <algorithm>
1919
#include <cassert>
2020
#include <iterator>
21+
#include <limits>
2122
#include <random>
23+
#include <type_traits>
2224
#include <vector>
2325

2426
#include "count_new.h"
@@ -68,6 +70,13 @@ TEST_CONSTEXPR_CXX26 std::vector<T> generate_sawtooth(int N, int M) {
6870
if (++x == M)
6971
x = 0;
7072
}
73+
74+
if (std::is_signed<T>::value) {
75+
for (auto& a : v) {
76+
a -= (M / 2);
77+
}
78+
}
79+
7180
return v;
7281
}
7382

@@ -193,12 +202,60 @@ TEST_CONSTEXPR_CXX26 bool test() {
193202
return true;
194203
}
195204

205+
template <class T>
206+
bool test_floating_special_values() {
207+
static_assert(std::is_floating_point<T>::value, "");
208+
209+
auto v = generate_sawtooth<T>(1024, 512);
210+
v.insert(v.end(), 256, static_cast<T>(0.0));
211+
v.insert(v.end(), 256, static_cast<T>(-0.0));
212+
v.insert(v.end(), 256, std::numeric_limits<T>::infinity());
213+
v.insert(v.end(), 256, -std::numeric_limits<T>::infinity());
214+
215+
std::mt19937 randomness;
216+
std::shuffle(v.begin(), v.end(), randomness);
217+
218+
std::stable_sort(v.begin(), v.end());
219+
assert(std::is_sorted(v.begin(), v.end()));
220+
221+
return true;
222+
}
223+
224+
template <class T>
225+
bool test_floating() {
226+
return test<T>() && test_floating_special_values<T>();
227+
}
228+
229+
enum struct Enum : int { a, b, c, d, e, f, g, h };
230+
TEST_CONSTEXPR_CXX26 bool operator<(Enum x, Enum y) { return static_cast<int>(x) > static_cast<int>(y); }
231+
232+
TEST_CONSTEXPR_CXX26 bool test_enum() {
233+
auto v = std::vector<Enum>(128, Enum::a);
234+
v.resize(v.size() + 128, Enum::b);
235+
v.resize(v.size() + 128, Enum::c);
236+
v.resize(v.size() + 128, Enum::d);
237+
v.resize(v.size() + 128, Enum::e);
238+
v.resize(v.size() + 128, Enum::f);
239+
v.resize(v.size() + 128, Enum::g);
240+
v.resize(v.size() + 128, Enum::h);
241+
242+
// Order is reversed by definition
243+
std::stable_sort(v.begin(), v.end());
244+
assert(std::is_sorted(v.begin(), v.end()));
245+
246+
return true;
247+
}
248+
196249
int main(int, char**) {
197250
test<int>();
198-
test<float>();
251+
test_floating<float>();
252+
test_floating<double>();
253+
test_floating<long double>();
254+
test_enum();
199255
#if TEST_STD_VER >= 26
200256
static_assert(test<int>());
201257
static_assert(test<float>());
258+
static_assert(test<double>());
202259
// test constexprness of radix sort branch
203260
static_assert(test<char>());
204261
#endif

0 commit comments

Comments
 (0)