Skip to content

Commit 9cc8154

Browse files
[libc++] Implement std::gcd using the binary version
The binary version is four times faster than current implementation in my setup, and generally considered a better implementation. Code inspired by https://en.algorithmica.org/hpc/algorithms/gcd/ which itself is inspired by https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/ Fix #77648
1 parent 18bc5b2 commit 9cc8154

File tree

9 files changed

+144
-6
lines changed

9 files changed

+144
-6
lines changed

libcxx/include/__bit/countr.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <__bit/rotate.h>
1313
#include <__concepts/arithmetic.h>
1414
#include <__config>
15+
#include <__type_traits/is_unsigned_integer.h>
1516
#include <limits>
1617

1718
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -35,10 +36,11 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ct
3536
return __builtin_ctzll(__x);
3637
}
3738

38-
#if _LIBCPP_STD_VER >= 20
39+
#if _LIBCPP_STD_VER >= 17
3940

40-
template <__libcpp_unsigned_integer _Tp>
41-
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int countr_zero(_Tp __t) noexcept {
41+
template <typename _Tp>
42+
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero(_Tp __t) _NOEXCEPT {
43+
static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countr_zero requires an unsigned integer type");
4244
if (__t == 0)
4345
return numeric_limits<_Tp>::digits;
4446

@@ -59,6 +61,15 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int countr_zero(_Tp __t) n
5961
}
6062
}
6163

64+
#endif
65+
66+
#if _LIBCPP_STD_VER >= 20
67+
68+
template <__libcpp_unsigned_integer _Tp>
69+
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int countr_zero(_Tp __t) noexcept {
70+
return std::__countr_zero(__t);
71+
}
72+
6273
template <__libcpp_unsigned_integer _Tp>
6374
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int countr_one(_Tp __t) noexcept {
6475
return __t != numeric_limits<_Tp>::max() ? std::countr_zero(static_cast<_Tp>(~__t)) : numeric_limits<_Tp>::digits;

libcxx/include/__numeric/gcd_lcm.h

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
#ifndef _LIBCPP___NUMERIC_GCD_LCM_H
1111
#define _LIBCPP___NUMERIC_GCD_LCM_H
1212

13+
#include <__algorithm/min.h>
1314
#include <__assert>
15+
#include <__bit/countr.h>
1416
#include <__config>
1517
#include <__type_traits/common_type.h>
1618
#include <__type_traits/is_integral.h>
@@ -50,9 +52,25 @@ struct __ct_abs<_Result, _Source, false> {
5052
};
5153

5254
template <class _Tp>
53-
_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN _Tp __gcd(_Tp __m, _Tp __n) {
55+
_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN _Tp __gcd(_Tp __a, _Tp __b) {
5456
static_assert((!is_signed<_Tp>::value), "");
55-
return __n == 0 ? __m : std::__gcd<_Tp>(__n, __m % __n);
57+
if (__a == 0)
58+
return __b;
59+
if (__b == 0)
60+
return __a;
61+
62+
int __az = std::__countr_zero(__a);
63+
int __bz = std::__countr_zero(__b);
64+
int __shift = std::min(__az, __bz);
65+
__b >>= __bz;
66+
while (__a != 0) {
67+
__a >>= __az;
68+
_Tp __absdiff = __a > __b ? __a - __b : __b - __a;
69+
__b = std::min(__a, __b);
70+
__a = __absdiff;
71+
__az = std::__countr_zero(__absdiff);
72+
}
73+
return __b << __shift;
5674
}
5775

5876
template <class _Tp, class _Up>

libcxx/test/libcxx/transitive_includes/cxx03.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,7 @@ numeric cstddef
549549
numeric cstdint
550550
numeric execution
551551
numeric functional
552+
numeric initializer_list
552553
numeric iterator
553554
numeric limits
554555
numeric new

libcxx/test/libcxx/transitive_includes/cxx11.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,7 @@ numeric cstddef
554554
numeric cstdint
555555
numeric execution
556556
numeric functional
557+
numeric initializer_list
557558
numeric iterator
558559
numeric limits
559560
numeric new

libcxx/test/libcxx/transitive_includes/cxx14.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,7 @@ numeric cstddef
556556
numeric cstdint
557557
numeric execution
558558
numeric functional
559+
numeric initializer_list
559560
numeric iterator
560561
numeric limits
561562
numeric new

libcxx/test/libcxx/transitive_includes/cxx17.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,7 @@ numeric cstddef
556556
numeric cstdint
557557
numeric execution
558558
numeric functional
559+
numeric initializer_list
559560
numeric iterator
560561
numeric limits
561562
numeric new

libcxx/test/libcxx/transitive_includes/cxx20.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,7 @@ numeric cstddef
561561
numeric cstdint
562562
numeric execution
563563
numeric functional
564+
numeric initializer_list
564565
numeric iterator
565566
numeric limits
566567
numeric new

libcxx/test/libcxx/transitive_includes/cxx26.csv

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,29 @@ experimental/simd limits
173173
experimental/type_traits initializer_list
174174
experimental/type_traits type_traits
175175
experimental/utility utility
176+
experimental/vector experimental/memory_resource
177+
experimental/vector vector
178+
ext/hash_map algorithm
179+
ext/hash_map cmath
180+
ext/hash_map cstddef
181+
ext/hash_map cstdint
182+
ext/hash_map cstring
183+
ext/hash_map functional
184+
ext/hash_map initializer_list
185+
ext/hash_map limits
186+
ext/hash_map new
187+
ext/hash_map stdexcept
188+
ext/hash_map string
189+
ext/hash_set algorithm
190+
ext/hash_set cmath
191+
ext/hash_set cstddef
192+
ext/hash_set cstdint
193+
ext/hash_set cstring
194+
ext/hash_set functional
195+
ext/hash_set initializer_list
196+
ext/hash_set limits
197+
ext/hash_set new
198+
ext/hash_set string
176199
filesystem compare
177200
filesystem cstddef
178201
filesystem cstdint

libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <cassert>
1818
#include <climits>
1919
#include <cstdint>
20+
#include <random>
2021
#include <type_traits>
2122

2223
#include "test_macros.h"
@@ -48,6 +49,68 @@ constexpr bool test0(int in1, int in2, int out)
4849
return true;
4950
}
5051

52+
template <typename T>
53+
T basic_gcd_(T m, T n) {
54+
return n == 0 ? m : basic_gcd_<T>(n, m % n);
55+
}
56+
57+
template <typename T>
58+
T basic_gcd(T m, T n) {
59+
using Tp = std::make_unsigned_t<T>;
60+
if (m < 0 && m != std::numeric_limits<T>::min())
61+
m = -m;
62+
if (n < 0 && n != std::numeric_limits<T>::min())
63+
n = -n;
64+
return basic_gcd_(static_cast<Tp>(m), static_cast<Tp>(n));
65+
}
66+
67+
template <typename Input>
68+
void do_fuzzy_tests() {
69+
std::mt19937 gen(1938);
70+
std::uniform_int_distribution<Input> distrib;
71+
72+
constexpr int nb_rounds = 10000;
73+
for (int i = 0; i < nb_rounds; ++i) {
74+
Input n = distrib(gen);
75+
Input m = distrib(gen);
76+
assert(std::gcd(n, m) == basic_gcd(n, m));
77+
}
78+
}
79+
80+
template <typename Input>
81+
void do_limit_tests() {
82+
Input inputs[] = {
83+
std::numeric_limits<Input>::min(),
84+
std::numeric_limits<Input>::max(),
85+
0,
86+
1,
87+
2,
88+
3,
89+
4,
90+
5,
91+
6,
92+
7,
93+
8,
94+
9,
95+
10,
96+
(Input)-1,
97+
(Input)-2,
98+
(Input)-3,
99+
(Input)-4,
100+
(Input)-5,
101+
(Input)-6,
102+
(Input)-7,
103+
(Input)-8,
104+
(Input)-9,
105+
(Input)-10,
106+
};
107+
108+
for (auto n : inputs) {
109+
for (auto m : inputs) {
110+
assert(std::gcd(n, m) == basic_gcd(n, m));
111+
}
112+
}
113+
}
51114

52115
template <typename Input1, typename Input2 = Input1>
53116
constexpr bool do_test(int = 0)
@@ -143,5 +206,23 @@ int main(int argc, char**)
143206
assert(res == 2);
144207
}
145208

146-
return 0;
209+
do_fuzzy_tests<std::int8_t>();
210+
do_fuzzy_tests<std::int16_t>();
211+
do_fuzzy_tests<std::int32_t>();
212+
do_fuzzy_tests<std::int64_t>();
213+
do_fuzzy_tests<std::uint8_t>();
214+
do_fuzzy_tests<std::uint16_t>();
215+
do_fuzzy_tests<std::uint32_t>();
216+
do_fuzzy_tests<std::uint64_t>();
217+
218+
do_limit_tests<std::int8_t>();
219+
do_limit_tests<std::int16_t>();
220+
do_limit_tests<std::int32_t>();
221+
do_limit_tests<std::int64_t>();
222+
do_limit_tests<std::uint8_t>();
223+
do_limit_tests<std::uint16_t>();
224+
do_limit_tests<std::uint32_t>();
225+
do_limit_tests<std::uint64_t>();
226+
227+
return 0;
147228
}

0 commit comments

Comments
 (0)