Skip to content

Commit 27a062e

Browse files
[libc++] Implement std::gcd using the binary version (#77747)
The binary version is four times faster than current implementation in my setup, and generally considered a better implementation. Code inspired by https://en.algorithmica.org/hpc/algorithms/gcd/ which itself is inspired by https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/ Fix #77648
1 parent b5afda8 commit 27a062e

File tree

10 files changed

+213
-4
lines changed

10 files changed

+213
-4
lines changed

libcxx/benchmarks/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ endif()
122122
add_library( cxx-benchmarks-flags-libcxx INTERFACE)
123123
target_link_libraries( cxx-benchmarks-flags-libcxx INTERFACE cxx-benchmarks-flags)
124124
target_compile_options(cxx-benchmarks-flags-libcxx INTERFACE ${SANITIZER_FLAGS} -Wno-user-defined-literals -Wno-suggest-override)
125-
target_link_options( cxx-benchmarks-flags-libcxx INTERFACE -nostdlib++ "-L${BENCHMARK_LIBCXX_INSTALL}/lib" "-L${BENCHMARK_LIBCXX_INSTALL}/lib64" ${SANITIZER_FLAGS})
125+
target_link_options( cxx-benchmarks-flags-libcxx INTERFACE -lm -nostdlib++ "-L${BENCHMARK_LIBCXX_INSTALL}/lib" "-L${BENCHMARK_LIBCXX_INSTALL}/lib64" ${SANITIZER_FLAGS})
126126

127127
set(libcxx_benchmark_targets)
128128

@@ -220,6 +220,7 @@ set(BENCHMARK_TESTS
220220
lexicographical_compare_three_way.bench.cpp
221221
map.bench.cpp
222222
monotonic_buffer.bench.cpp
223+
numeric/gcd.bench.cpp
223224
ordered_set.bench.cpp
224225
shared_mutex_vs_mutex.bench.cpp
225226
stop_token.bench.cpp
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <array>
10+
#include <benchmark/benchmark.h>
11+
#include <cstring>
12+
#include <numeric>
13+
#include <random>
14+
15+
template <class T>
16+
static std::array<T, 1000> generate(std::uniform_int_distribution<T> distribution = std::uniform_int_distribution<T>{
17+
std::numeric_limits<T>::min() + 1, std::numeric_limits<T>::max()}) {
18+
std::mt19937 generator;
19+
std::array<T, 1000> result;
20+
std::generate_n(result.begin(), result.size(), [&] { return distribution(generator); });
21+
return result;
22+
}
23+
24+
static void bm_gcd_random(benchmark::State& state) {
25+
std::array data = generate<int>();
26+
while (state.KeepRunningBatch(data.size()))
27+
for (auto v0 : data)
28+
for (auto v1 : data)
29+
benchmark::DoNotOptimize(std::gcd(v0, v1));
30+
}
31+
BENCHMARK(bm_gcd_random);
32+
33+
static void bm_gcd_trivial(benchmark::State& state) {
34+
int lhs = ~static_cast<int>(0), rhs = 1;
35+
for (auto _ : state) {
36+
benchmark::DoNotOptimize(lhs);
37+
benchmark::DoNotOptimize(rhs);
38+
benchmark::DoNotOptimize(std::gcd(lhs, rhs));
39+
}
40+
}
41+
BENCHMARK(bm_gcd_trivial);
42+
43+
static void bm_gcd_complex(benchmark::State& state) {
44+
int lhs = 2971215073, rhs = 1836311903;
45+
for (auto _ : state) {
46+
benchmark::DoNotOptimize(lhs);
47+
benchmark::DoNotOptimize(rhs);
48+
benchmark::DoNotOptimize(std::gcd(lhs, rhs));
49+
}
50+
}
51+
BENCHMARK(bm_gcd_complex);
52+
53+
BENCHMARK_MAIN();

libcxx/include/__numeric/gcd_lcm.h

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
#ifndef _LIBCPP___NUMERIC_GCD_LCM_H
1111
#define _LIBCPP___NUMERIC_GCD_LCM_H
1212

13+
#include <__algorithm/min.h>
1314
#include <__assert>
15+
#include <__bit/countr.h>
1416
#include <__config>
1517
#include <__type_traits/common_type.h>
1618
#include <__type_traits/is_integral.h>
@@ -50,9 +52,47 @@ struct __ct_abs<_Result, _Source, false> {
5052
};
5153

5254
template <class _Tp>
53-
_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN _Tp __gcd(_Tp __m, _Tp __n) {
55+
_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN _Tp __gcd(_Tp __a, _Tp __b) {
5456
static_assert((!is_signed<_Tp>::value), "");
55-
return __n == 0 ? __m : std::__gcd<_Tp>(__n, __m % __n);
57+
58+
// From: https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor
59+
//
60+
// If power of two divides both numbers, we can push it out.
61+
// - gcd( 2^x * a, 2^x * b) = 2^x * gcd(a, b)
62+
//
63+
// If and only if exactly one number is even, we can divide that number by that power.
64+
// - if a, b are odd, then gcd(2^x * a, b) = gcd(a, b)
65+
//
66+
// And standard gcd algorithm where instead of modulo, minus is used.
67+
68+
if (__a < __b) {
69+
_Tp __tmp = __b;
70+
__b = __a;
71+
__a = __tmp;
72+
}
73+
if (__b == 0)
74+
return __a;
75+
__a %= __b; // Make both argument of the same size, and early result in the easy case.
76+
if (__a == 0)
77+
return __b;
78+
79+
int __az = std::__countr_zero(__a);
80+
int __bz = std::__countr_zero(__b);
81+
int __shift = std::min(__az, __bz);
82+
__a >>= __az;
83+
__b >>= __bz;
84+
do {
85+
_Tp __diff = __a - __b;
86+
if (__a > __b) {
87+
__a = __b;
88+
__b = __diff;
89+
} else {
90+
__b = __b - __a;
91+
}
92+
if (__diff != 0)
93+
__b >>= std::__countr_zero(__diff);
94+
} while (__b != 0);
95+
return __a << __shift;
5696
}
5797

5898
template <class _Tp, class _Up>

libcxx/test/libcxx/transitive_includes/cxx03.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,7 @@ numeric cstddef
570570
numeric cstdint
571571
numeric execution
572572
numeric functional
573+
numeric initializer_list
573574
numeric iterator
574575
numeric limits
575576
numeric new

libcxx/test/libcxx/transitive_includes/cxx11.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,7 @@ numeric cstddef
575575
numeric cstdint
576576
numeric execution
577577
numeric functional
578+
numeric initializer_list
578579
numeric iterator
579580
numeric limits
580581
numeric new

libcxx/test/libcxx/transitive_includes/cxx14.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,7 @@ numeric cstddef
578578
numeric cstdint
579579
numeric execution
580580
numeric functional
581+
numeric initializer_list
581582
numeric iterator
582583
numeric limits
583584
numeric new

libcxx/test/libcxx/transitive_includes/cxx17.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,7 @@ numeric cstddef
578578
numeric cstdint
579579
numeric execution
580580
numeric functional
581+
numeric initializer_list
581582
numeric iterator
582583
numeric limits
583584
numeric new

libcxx/test/libcxx/transitive_includes/cxx20.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,7 @@ numeric cstddef
589589
numeric cstdint
590590
numeric execution
591591
numeric functional
592+
numeric initializer_list
592593
numeric iterator
593594
numeric limits
594595
numeric new

libcxx/test/libcxx/transitive_includes/cxx26.csv

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,29 @@ experimental/simd limits
176176
experimental/type_traits initializer_list
177177
experimental/type_traits type_traits
178178
experimental/utility utility
179+
experimental/vector experimental/memory_resource
180+
experimental/vector vector
181+
ext/hash_map algorithm
182+
ext/hash_map cmath
183+
ext/hash_map cstddef
184+
ext/hash_map cstdint
185+
ext/hash_map cstring
186+
ext/hash_map functional
187+
ext/hash_map initializer_list
188+
ext/hash_map limits
189+
ext/hash_map new
190+
ext/hash_map stdexcept
191+
ext/hash_map string
192+
ext/hash_set algorithm
193+
ext/hash_set cmath
194+
ext/hash_set cstddef
195+
ext/hash_set cstdint
196+
ext/hash_set cstring
197+
ext/hash_set functional
198+
ext/hash_set initializer_list
199+
ext/hash_set limits
200+
ext/hash_set new
201+
ext/hash_set string
179202
filesystem compare
180203
filesystem cstddef
181204
filesystem cstdint

libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <cassert>
1818
#include <climits>
1919
#include <cstdint>
20+
#include <random>
2021
#include <type_traits>
2122

2223
#include "test_macros.h"
@@ -48,6 +49,74 @@ constexpr bool test0(int in1, int in2, int out)
4849
return true;
4950
}
5051

52+
template <typename T>
53+
T basic_gcd_(T m, T n) {
54+
return n == 0 ? m : basic_gcd_<T>(n, m % n);
55+
}
56+
57+
template <typename T>
58+
T basic_gcd(T m, T n) {
59+
using Tp = std::make_unsigned_t<T>;
60+
if (m < 0 && m != std::numeric_limits<T>::min())
61+
m = -m;
62+
if (n < 0 && n != std::numeric_limits<T>::min())
63+
n = -n;
64+
return basic_gcd_(static_cast<Tp>(m), static_cast<Tp>(n));
65+
}
66+
67+
template <typename Input>
68+
void do_fuzzy_tests() {
69+
std::mt19937 gen(1938);
70+
std::uniform_int_distribution<Input> distrib;
71+
72+
constexpr int nb_rounds = 10000;
73+
for (int i = 0; i < nb_rounds; ++i) {
74+
Input n = distrib(gen);
75+
Input m = distrib(gen);
76+
assert(std::gcd(n, m) == basic_gcd(n, m));
77+
}
78+
}
79+
80+
template <typename Input>
81+
void do_limit_tests() {
82+
Input inputs[] = {
83+
// The behavior of std::gcd is undefined if the absolute value of one of its
84+
// operand is not representable in the result type.
85+
std::numeric_limits<Input>::min() + (std::is_signed<Input>::value ? 3 : 0),
86+
std::numeric_limits<Input>::min() + 1,
87+
std::numeric_limits<Input>::min() + 2,
88+
std::numeric_limits<Input>::max(),
89+
std::numeric_limits<Input>::max() - 1,
90+
std::numeric_limits<Input>::max() - 2,
91+
0,
92+
1,
93+
2,
94+
3,
95+
4,
96+
5,
97+
6,
98+
7,
99+
8,
100+
9,
101+
10,
102+
(Input)-1,
103+
(Input)-2,
104+
(Input)-3,
105+
(Input)-4,
106+
(Input)-5,
107+
(Input)-6,
108+
(Input)-7,
109+
(Input)-8,
110+
(Input)-9,
111+
(Input)-10,
112+
};
113+
114+
for (auto n : inputs) {
115+
for (auto m : inputs) {
116+
assert(std::gcd(n, m) == basic_gcd(n, m));
117+
}
118+
}
119+
}
51120

52121
template <typename Input1, typename Input2 = Input1>
53122
constexpr bool do_test(int = 0)
@@ -143,5 +212,23 @@ int main(int argc, char**)
143212
assert(res == 2);
144213
}
145214

146-
return 0;
215+
do_fuzzy_tests<std::int8_t>();
216+
do_fuzzy_tests<std::int16_t>();
217+
do_fuzzy_tests<std::int32_t>();
218+
do_fuzzy_tests<std::int64_t>();
219+
do_fuzzy_tests<std::uint8_t>();
220+
do_fuzzy_tests<std::uint16_t>();
221+
do_fuzzy_tests<std::uint32_t>();
222+
do_fuzzy_tests<std::uint64_t>();
223+
224+
do_limit_tests<std::int8_t>();
225+
do_limit_tests<std::int16_t>();
226+
do_limit_tests<std::int32_t>();
227+
do_limit_tests<std::int64_t>();
228+
do_limit_tests<std::uint8_t>();
229+
do_limit_tests<std::uint16_t>();
230+
do_limit_tests<std::uint32_t>();
231+
do_limit_tests<std::uint64_t>();
232+
233+
return 0;
147234
}

0 commit comments

Comments
 (0)