Skip to content

Commit aecca52

Browse files
committed
Apply review comments
1 parent efb9857 commit aecca52

File tree

11 files changed

+202
-152
lines changed

11 files changed

+202
-152
lines changed

libc/src/stdlib/qsort.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,10 @@ namespace LIBC_NAMESPACE_DECL {
1818
LLVM_LIBC_FUNCTION(void, qsort,
1919
(void *array, size_t array_size, size_t elem_size,
2020
int (*compare)(const void *, const void *))) {
21-
internal::unstable_sort(
22-
array, array_size, elem_size,
23-
[compare](const void *a, const void *b) noexcept -> bool {
24-
return compare(a, b) < 0;
25-
});
21+
internal::unstable_sort(array, array_size, elem_size,
22+
[compare](const void *a, const void *b) -> bool {
23+
return compare(a, b) < 0;
24+
});
2625
}
2726

2827
} // namespace LIBC_NAMESPACE_DECL

libc/src/stdlib/qsort_data.h

Lines changed: 28 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -19,47 +19,25 @@
1919
namespace LIBC_NAMESPACE_DECL {
2020
namespace internal {
2121

22-
// Returns the max amount of bytes deemed reasonable - based on the target
23-
// properties - for use in local stack arrays.
24-
constexpr size_t max_stack_array_size() {
25-
// Uses target pointer size as heuristic how much memory is available and
26-
// unlikely to run into stack overflow and perf problems.
27-
constexpr size_t ptr_diff_size = sizeof(ptrdiff_t);
28-
29-
if constexpr (ptr_diff_size >= 8) {
30-
return 4096;
31-
}
32-
33-
if constexpr (ptr_diff_size == 4) {
34-
return 512;
35-
}
36-
37-
// 8-bit platforms are just not gonna work well with libc, qsort
38-
// won't be the problem.
39-
// 16-bit platforms ought to be able to store 64 bytes on the stack.
40-
return 64;
41-
}
42-
4322
class ArrayGenericSize {
44-
uint8_t *array_base;
23+
cpp::byte *array_base;
4524
size_t array_len;
4625
size_t elem_size;
4726

48-
uint8_t *get_internal(size_t i) const noexcept {
27+
LIBC_INLINE cpp::byte *get_internal(size_t i) const {
4928
return array_base + (i * elem_size);
5029
}
5130

5231
public:
53-
ArrayGenericSize(uint8_t *a, size_t s, size_t e) noexcept
54-
: array_base(a), array_len(s), elem_size(e) {}
32+
ArrayGenericSize(void *a, size_t s, size_t e)
33+
: array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s),
34+
elem_size(e) {}
5535

5636
static constexpr bool has_fixed_size() { return false; }
5737

58-
void *get(size_t i) const noexcept {
59-
return reinterpret_cast<void *>(get_internal(i));
60-
}
38+
LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
6139

62-
void swap(size_t i, size_t j) const noexcept {
40+
void swap(size_t i, size_t j) const {
6341
// It's possible to use 8 byte blocks with `uint64_t`, but that
6442
// generates more machine code as the remainder loop gets
6543
// unrolled, plus 4 byte operations are more likely to be
@@ -70,8 +48,8 @@ class ArrayGenericSize {
7048
using block_t = uint32_t;
7149
constexpr size_t BLOCK_SIZE = sizeof(block_t);
7250

73-
uint8_t *elem_i = get_internal(i);
74-
uint8_t *elem_j = get_internal(j);
51+
cpp::byte *elem_i = get_internal(i);
52+
cpp::byte *elem_j = get_internal(j);
7553

7654
const size_t elem_size_rem = elem_size % BLOCK_SIZE;
7755
const block_t *elem_i_block_end =
@@ -88,74 +66,73 @@ class ArrayGenericSize {
8866
elem_j_block += 1;
8967
}
9068

91-
elem_i = reinterpret_cast<uint8_t *>(elem_i_block);
92-
elem_j = reinterpret_cast<uint8_t *>(elem_j_block);
69+
elem_i = reinterpret_cast<cpp::byte *>(elem_i_block);
70+
elem_j = reinterpret_cast<cpp::byte *>(elem_j_block);
9371
for (size_t n = 0; n < elem_size_rem; ++n) {
94-
uint8_t tmp = elem_i[n];
72+
cpp::byte tmp = elem_i[n];
9573
elem_i[n] = elem_j[n];
9674
elem_j[n] = tmp;
9775
}
9876
}
9977

100-
size_t len() const noexcept { return array_len; }
78+
LIBC_INLINE size_t len() const { return array_len; }
10179

10280
// Make an Array starting at index |i| and length |s|.
103-
ArrayGenericSize make_array(size_t i, size_t s) const noexcept {
81+
LIBC_INLINE ArrayGenericSize make_array(size_t i, size_t s) const {
10482
return ArrayGenericSize(get_internal(i), s, elem_size);
10583
}
10684

10785
// Reset this Array to point at a different interval of the same
10886
// items starting at index |i|.
109-
void reset_bounds(size_t i, size_t s) noexcept {
87+
LIBC_INLINE void reset_bounds(size_t i, size_t s) {
11088
array_base = get_internal(i);
11189
array_len = s;
11290
}
11391
};
11492

115-
// Having a specialized Array type for sorting that knowns at
93+
// Having a specialized Array type for sorting that knows at
11694
// compile-time what the size of the element is, allows for much more
11795
// efficient swapping and for cheaper offset calculations.
11896
template <size_t ELEM_SIZE> class ArrayFixedSize {
119-
uint8_t *array_base;
97+
cpp::byte *array_base;
12098
size_t array_len;
12199

122-
uint8_t *get_internal(size_t i) const noexcept {
100+
LIBC_INLINE cpp::byte *get_internal(size_t i) const {
123101
return array_base + (i * ELEM_SIZE);
124102
}
125103

126104
public:
127-
ArrayFixedSize(uint8_t *a, size_t s) noexcept : array_base(a), array_len(s) {}
105+
ArrayFixedSize(void *a, size_t s)
106+
: array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s) {}
128107

129108
// Beware this function is used a heuristic for cheap to swap types, so
130109
// instantiating `ArrayFixedSize` with `ELEM_SIZE > 100` is probably a bad
131110
// idea perf wise.
132111
static constexpr bool has_fixed_size() { return true; }
133112

134-
void *get(size_t i) const noexcept {
135-
return reinterpret_cast<void *>(get_internal(i));
136-
}
113+
LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
137114

138-
void swap(size_t i, size_t j) const noexcept {
139-
alignas(32) uint8_t tmp[ELEM_SIZE];
115+
LIBC_INLINE void swap(size_t i, size_t j) const {
116+
alignas(32) cpp::byte tmp[ELEM_SIZE];
140117

141-
uint8_t *elem_i = get_internal(i);
142-
uint8_t *elem_j = get_internal(j);
118+
cpp::byte *elem_i = get_internal(i);
119+
cpp::byte *elem_j = get_internal(j);
143120

144121
inline_memcpy(tmp, elem_i, ELEM_SIZE);
145122
inline_memmove(elem_i, elem_j, ELEM_SIZE);
146123
inline_memcpy(elem_j, tmp, ELEM_SIZE);
147124
}
148125

149-
size_t len() const noexcept { return array_len; }
126+
LIBC_INLINE size_t len() const { return array_len; }
150127

151128
// Make an Array starting at index |i| and length |s|.
152-
ArrayFixedSize<ELEM_SIZE> make_array(size_t i, size_t s) const noexcept {
129+
LIBC_INLINE ArrayFixedSize<ELEM_SIZE> make_array(size_t i, size_t s) const {
153130
return ArrayFixedSize<ELEM_SIZE>(get_internal(i), s);
154131
}
155132

156133
// Reset this Array to point at a different interval of the same
157134
// items starting at index |i|.
158-
void reset_bounds(size_t i, size_t s) noexcept {
135+
LIBC_INLINE void reset_bounds(size_t i, size_t s) {
159136
array_base = get_internal(i);
160137
array_len = s;
161138
}

libc/src/stdlib/qsort_pivot.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
#ifndef LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H
1010
#define LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H
1111

12-
#include "src/stdlib/qsort_pivot.h"
13-
1412
#include <stdint.h>
1513

1614
namespace LIBC_NAMESPACE_DECL {
@@ -75,11 +73,10 @@ size_t median3(const A &array, size_t a, size_t b, size_t c, const F &is_less) {
7573
// If x=y=1 then a < b, c. In this case we want to return min(b, c).
7674
// By toggling the outcome of b < c using XOR x we get this behavior.
7775
const bool z = is_less(b_ptr, c_ptr);
78-
if (z ^ x) {
76+
if (z ^ x)
7977
return c;
80-
} else {
78+
else
8179
return b;
82-
}
8380
} else {
8481
// Either c <= a < b or b <= a < c, thus a is our median.
8582
return a;

libc/src/stdlib/qsort_r.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,10 @@ LLVM_LIBC_FUNCTION(void, qsort_r,
2020
int (*compare)(const void *, const void *, void *),
2121
void *arg)) {
2222

23-
internal::unstable_sort(
24-
array, array_size, elem_size,
25-
[compare, arg](const void *a, const void *b) noexcept -> bool {
26-
return compare(a, b, arg) < 0;
27-
});
23+
internal::unstable_sort(array, array_size, elem_size,
24+
[compare, arg](const void *a, const void *b) -> bool {
25+
return compare(a, b, arg) < 0;
26+
});
2827
}
2928

3029
} // namespace LIBC_NAMESPACE_DECL

libc/src/stdlib/qsort_util.h

Lines changed: 43 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,13 @@
1515
#define LIBC_QSORT_QUICK_SORT 1
1616
#define LIBC_QSORT_HEAP_SORT 2
1717

18+
#ifdef LIBC_OPTIMIZE_FOR_SIZE
19+
#define LIBC_QSORT_IMPL LIBC_QSORT_HEAP_SORT
20+
#else
1821
#ifndef LIBC_QSORT_IMPL
1922
#define LIBC_QSORT_IMPL LIBC_QSORT_QUICK_SORT
2023
#endif // LIBC_QSORT_IMPL
24+
#endif // LIBC_OPTIMIZE_FOR_SIZE
2125

2226
#if (LIBC_QSORT_IMPL != LIBC_QSORT_QUICK_SORT && \
2327
LIBC_QSORT_IMPL != LIBC_QSORT_HEAP_SORT)
@@ -27,45 +31,50 @@
2731
namespace LIBC_NAMESPACE_DECL {
2832
namespace internal {
2933

30-
template <typename A, typename F> void sort_inst(A &array, const F &is_less) {
31-
#if LIBC_QSORT_IMPL == LIBC_QSORT_QUICK_SORT
32-
quick_sort(array, is_less);
33-
#elif LIBC_QSORT_IMPL == LIBC_QSORT_HEAP_SORT
34-
heap_sort(array, is_less);
35-
#endif
34+
template <bool USE_QUICKSORT, typename F>
35+
void unstable_sort_impl(void *array, size_t array_len, size_t elem_size,
36+
const F &is_less) {
37+
if (array == nullptr || array_len == 0 || elem_size == 0)
38+
return;
39+
40+
if constexpr (USE_QUICKSORT) {
41+
switch (elem_size) {
42+
case 4: {
43+
auto arr_fixed_size = internal::ArrayFixedSize<4>(array, array_len);
44+
quick_sort(arr_fixed_size, is_less);
45+
return;
46+
}
47+
case 8: {
48+
auto arr_fixed_size = internal::ArrayFixedSize<8>(array, array_len);
49+
quick_sort(arr_fixed_size, is_less);
50+
return;
51+
}
52+
case 16: {
53+
auto arr_fixed_size = internal::ArrayFixedSize<16>(array, array_len);
54+
quick_sort(arr_fixed_size, is_less);
55+
return;
56+
}
57+
default:
58+
auto arr_generic_size =
59+
internal::ArrayGenericSize(array, array_len, elem_size);
60+
quick_sort(arr_generic_size, is_less);
61+
return;
62+
}
63+
} else {
64+
auto arr_generic_size =
65+
internal::ArrayGenericSize(array, array_len, elem_size);
66+
heap_sort(arr_generic_size, is_less);
67+
}
3668
}
3769

3870
template <typename F>
3971
void unstable_sort(void *array, size_t array_len, size_t elem_size,
4072
const F &is_less) {
41-
if (array == nullptr || array_len == 0 || elem_size == 0) {
42-
return;
43-
}
44-
45-
uint8_t *array_base = reinterpret_cast<uint8_t *>(array);
46-
47-
switch (elem_size) {
48-
case 4: {
49-
auto arr_fixed_size = internal::ArrayFixedSize<4>(array_base, array_len);
50-
sort_inst(arr_fixed_size, is_less);
51-
return;
52-
}
53-
case 8: {
54-
auto arr_fixed_size = internal::ArrayFixedSize<8>(array_base, array_len);
55-
sort_inst(arr_fixed_size, is_less);
56-
return;
57-
}
58-
case 16: {
59-
auto arr_fixed_size = internal::ArrayFixedSize<16>(array_base, array_len);
60-
sort_inst(arr_fixed_size, is_less);
61-
return;
62-
}
63-
default:
64-
auto arr_generic_size =
65-
internal::ArrayGenericSize(array_base, array_len, elem_size);
66-
sort_inst(arr_generic_size, is_less);
67-
return;
68-
}
73+
#if LIBC_QSORT_IMPL == LIBC_QSORT_QUICK_SORT
74+
unstable_sort_impl<true, F>(array, array_len, elem_size, is_less);
75+
#elif LIBC_QSORT_IMPL == LIBC_QSORT_HEAP_SORT
76+
unstable_sort_impl<false, F>(array, array_len, elem_size, is_less);
77+
#endif
6978
}
7079

7180
} // namespace internal

libc/src/stdlib/quick_sort.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
#ifndef LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H
1010
#define LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H
1111

12+
#include "src/__support/CPP/bit.h"
1213
#include "src/__support/CPP/cstddef.h"
13-
#include "src/__support/big_int.h"
1414
#include "src/__support/macros/config.h"
1515
#include "src/stdlib/qsort_pivot.h"
1616

@@ -90,7 +90,7 @@ size_t partition(const A &array, size_t pivot_index, const F &is_less) {
9090
size_t num_lt;
9191
if constexpr (A::has_fixed_size()) {
9292
// Branchless Lomuto avoid branch misprediction penalties, but
93-
// it also swaps more often which only is faster if the swap a
93+
// it also swaps more often which is only faster if the swap is a fast
9494
// constant operation.
9595
num_lt = partition_lomuto_branchless(array_without_pivot, pivot, is_less);
9696
} else {
@@ -130,7 +130,7 @@ void quick_sort_impl(A &array, const void *ancestor_pivot, size_t limit,
130130
if (!is_less(ancestor_pivot, array.get(pivot_index))) {
131131
const size_t num_lt =
132132
partition(array, pivot_index,
133-
[is_less](const void *a, const void *b) noexcept -> bool {
133+
[is_less](const void *a, const void *b) -> bool {
134134
return !is_less(b, a);
135135
});
136136

libc/test/src/stdlib/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,11 +313,11 @@ add_libc_test(
313313
)
314314

315315
add_libc_test(
316-
qsort_test
316+
quick_sort_test
317317
SUITE
318318
libc-stdlib-tests
319319
SRCS
320-
qsort_test.cpp
320+
quick_sort_test.cpp
321321
HDRS
322322
SortingTest.h
323323
DEPENDS

0 commit comments

Comments
 (0)