Skip to content

Commit 0b58bac

Browse files
bpo-37295: More direct computation of power-of-two factor in math.comb (GH-30313)
Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent e18d815 commit 0b58bac

File tree

1 file changed

+25
-7
lines changed

1 file changed

+25
-7
lines changed

Modules/mathmodule.c

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3462,7 +3462,7 @@ Python code to generate the values:
34623462
reduced_fac_odd_part = fac_odd_part % (2**64)
34633463
print(f"{reduced_fac_odd_part:#018x}u")
34643464
*/
3465-
static uint64_t reduced_factorial_odd_part[] = {
3465+
static const uint64_t reduced_factorial_odd_part[] = {
34663466
0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000003u,
34673467
0x0000000000000003u, 0x000000000000000fu, 0x000000000000002du, 0x000000000000013bu,
34683468
0x000000000000013bu, 0x0000000000000b13u, 0x000000000000375fu, 0x0000000000026115u,
@@ -3494,7 +3494,7 @@ Python code to generate the values:
34943494
inverted_fac_odd_part = pow(fac_odd_part, -1, 2**64)
34953495
print(f"{inverted_fac_odd_part:#018x}u")
34963496
*/
3497-
static uint64_t inverted_factorial_odd_part[] = {
3497+
static const uint64_t inverted_factorial_odd_part[] = {
34983498
0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0xaaaaaaaaaaaaaaabu,
34993499
0xaaaaaaaaaaaaaaabu, 0xeeeeeeeeeeeeeeefu, 0x4fa4fa4fa4fa4fa5u, 0x2ff2ff2ff2ff2ff3u,
35003500
0x2ff2ff2ff2ff2ff3u, 0x938cc70553e3771bu, 0xb71c27cddd93e49fu, 0xb38e3229fcdee63du,
@@ -3514,6 +3514,25 @@ static uint64_t inverted_factorial_odd_part[] = {
35143514
0x547fb1b8ab9d0ba3u, 0x8f15a826498852e3u, 0x32e1a03f38880283u, 0x3de4cce63283f0c1u,
35153515
};
35163516

3517+
/* exponent of the largest power of 2 dividing factorial(n), for n in range(68)
3518+
3519+
Python code to generate the values:
3520+
3521+
import math
3522+
3523+
for n in range(68):
3524+
fac = math.factorial(n)
3525+
fac_trailing_zeros = (fac & -fac).bit_length() - 1
3526+
print(fac_trailing_zeros)
3527+
*/
3528+
3529+
static const uint8_t factorial_trailing_zeros[] = {
3530+
0, 0, 1, 1, 3, 3, 4, 4, 7, 7, 8, 8, 10, 10, 11, 11, // 0-15
3531+
15, 15, 16, 16, 18, 18, 19, 19, 22, 22, 23, 23, 25, 25, 26, 26, // 16-31
3532+
31, 31, 32, 32, 34, 34, 35, 35, 38, 38, 39, 39, 41, 41, 42, 42, // 32-47
3533+
46, 46, 47, 47, 49, 49, 50, 50, 53, 53, 54, 54, 56, 56, 57, 57, // 48-63
3534+
63, 63, 64, 64, // 64-67
3535+
};
35173536

35183537
/*[clinic input]
35193538
math.comb
@@ -3588,15 +3607,14 @@ math_comb_impl(PyObject *module, PyObject *n, PyObject *k)
35883607
where 2**shift is the largest power of two dividing comb(n, k)
35893608
and comb_odd_part is comb(n, k) >> shift. comb_odd_part can be
35903609
calculated efficiently via arithmetic modulo 2**64, using three
3591-
lookups and two uint64_t multiplications, while the necessary
3592-
shift can be computed via Kummer's theorem: it's the number of
3593-
carries when adding k to n - k in binary, which in turn is the
3594-
number of set bits of n ^ k ^ (n - k).
3610+
lookups and two uint64_t multiplications.
35953611
*/
35963612
uint64_t comb_odd_part = reduced_factorial_odd_part[ni]
35973613
* inverted_factorial_odd_part[ki]
35983614
* inverted_factorial_odd_part[ni - ki];
3599-
int shift = _Py_popcount32((uint32_t)(ni ^ ki ^ (ni - ki)));
3615+
int shift = factorial_trailing_zeros[ni]
3616+
- factorial_trailing_zeros[ki]
3617+
- factorial_trailing_zeros[ni - ki];
36003618
result = PyLong_FromUnsignedLongLong(comb_odd_part << shift);
36013619
goto done;
36023620
}

0 commit comments

Comments
 (0)