Skip to content

[WIP] ext/bcmath: optimized bcsqrt() #18771

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions ext/bcmath/libbcmath/src/div.c
Original file line number Diff line number Diff line change
Expand Up @@ -429,3 +429,27 @@ bool bc_divide(bc_num numerator, bc_num divisor, bc_num *quot, size_t scale)
*quot = bc_copy_num(BCG(_zero_));
return true;
}

bool bc_divide_vector(
BC_VECTOR *numerator_vectors, size_t numerator_arr_size,
const BC_VECTOR *divisor_vectors, size_t divisor_arr_size,
BC_VECTOR *quot_vectors, size_t quot_arr_size
) {
ZEND_ASSERT(divisor_vectors[divisor_arr_size - 1] != 0);
ZEND_ASSERT(quot_arr_size == numerator_arr_size - divisor_arr_size + 1);

size_t divisor_size = (divisor_arr_size - 1) * BC_VECTOR_SIZE;
BC_VECTOR tmp_divisor_top = divisor_vectors[divisor_arr_size - 1];
while (tmp_divisor_top > 0) {
divisor_size++;
tmp_divisor_top /= BASE;
}

/* Do the division */
if (divisor_arr_size == 1) {
bc_fast_div(numerator_vectors, numerator_arr_size, divisor_vectors[0], quot_vectors, quot_arr_size);
} else {
bc_standard_div(numerator_vectors, numerator_arr_size, divisor_vectors, divisor_arr_size, divisor_size, quot_vectors, quot_arr_size);
}
return true;
}
4 changes: 4 additions & 0 deletions ext/bcmath/libbcmath/src/private.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ bc_num _bc_do_sub (bc_num n1, bc_num n2);
void bc_multiply_vector(
const BC_VECTOR *n1_vector, size_t n1_arr_size, const BC_VECTOR *n2_vector, size_t n2_arr_size,
BC_VECTOR *prod_vector, size_t prod_arr_size);
bool bc_divide_vector(
BC_VECTOR *numerator_vectors, size_t numerator_arr_size,
const BC_VECTOR *divisor_vectors, size_t divisor_arr_size,
BC_VECTOR *quot_vectors, size_t quot_arr_size);
void _bc_rm_leading_zeros (bc_num num);

#endif
260 changes: 206 additions & 54 deletions ext/bcmath/libbcmath/src/sqrt.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,28 +30,224 @@
*************************************************************************/

#include "bcmath.h"
#include "convert.h"
#include <stdbool.h>
#include <stddef.h>
#include "private.h"

/* Take the square root NUM and return it in NUM with SCALE digits
after the decimal place. */

static inline BC_VECTOR bc_sqrt_get_pow_10(size_t exponent)
{
BC_VECTOR value = 1;
while (exponent >= 8) {
value *= BC_POW_10_LUT[8];
exponent -= 8;
}
value *= BC_POW_10_LUT[exponent];
return value;
}

static BC_VECTOR bc_fast_sqrt_vector(BC_VECTOR n_vector)
{
/* Use a bitwise method for approximating the square root
* as the initial guess for Newton's method. */
union {
uint64_t i;
double d;
} u;
u.d = (double) n_vector;
u.i = (1ULL << 61) + (u.i >> 1) - (1ULL << 50);
BC_VECTOR guess_vector = (BC_VECTOR) u.d;

/* Newton's algorithm. */
BC_VECTOR guess1_vector;
size_t diff;
do {
guess1_vector = guess_vector;
guess_vector = n_vector / guess_vector;
guess_vector += guess1_vector;
guess_vector /= 2;
diff = guess1_vector > guess_vector ? guess1_vector - guess_vector : guess_vector - guess1_vector;
} while (diff > 1);
return guess_vector;
}

static inline void bc_fast_sqrt(bc_num *num, size_t rscale)
{
BC_VECTOR n_vector = 0;
size_t i = 0;
for (; i < (*num)->n_len + (*num)->n_scale; i++) {
n_vector = n_vector * BASE + (*num)->n_value[i];
}
/* When calculating the square root of a number using only integer operations,
* need to adjust the digit scale accordingly.
* Considering that the original number is the square of the result,
* if the desired scale of the result is 5, the input number should be scaled
* by twice that, i.e., scale 10. */
n_vector *= bc_sqrt_get_pow_10((rscale + 1) * 2 - (*num)->n_scale);

/* Get sqrt */
BC_VECTOR guess_vector = bc_fast_sqrt_vector(n_vector);

size_t full_len = 0;
BC_VECTOR tmp_guess_vector = guess_vector;
do {
tmp_guess_vector /= BASE;
full_len++;
} while (tmp_guess_vector > 0);

size_t ret_ren = full_len > rscale + 1 ? full_len - (rscale + 1) : 1; /* for int zero */
bc_num ret = bc_new_num_nonzeroed(ret_ren, rscale);
char *rptr = ret->n_value;
char *rend = rptr + ret_ren + rscale - 1;

guess_vector /= BASE; /* Since the scale of guess_vector is rscale + 1, reduce the scale by 1. */
while (rend >= rptr) {
*rend-- = guess_vector % BASE;
guess_vector /= BASE;
}
bc_free_num(num);
*num = ret;
}

static inline void bc_standard_sqrt(bc_num *num, size_t rscale, size_t num_calc_full_len)
{
/* allocate memory */
size_t n_arr_size = BC_ARR_SIZE_FROM_LEN(num_calc_full_len);

size_t guess_len = ((*num)->n_len + 1) / 2;
size_t guess_scale = rscale + 1;
size_t guess_full_len = guess_len + guess_scale;
/* Since add the old guess and the new guess together during the calculation,
* there is a chance of overflow, so allocate an extra size. */
size_t guess_arr_size = BC_ARR_SIZE_FROM_LEN(guess_full_len) + 1;

size_t allocate_size = n_arr_size * 2 + guess_arr_size * 3;
BC_VECTOR *buf = safe_emalloc(allocate_size, sizeof(BC_VECTOR), 0);

BC_VECTOR *n_vector = buf;
BC_VECTOR *n_vector_copy = n_vector + n_arr_size;
BC_VECTOR *guess_vector = n_vector_copy + n_arr_size;
BC_VECTOR *guess1_vector = guess_vector + guess_arr_size;
BC_VECTOR *tmp_div_ret_vector = guess1_vector + guess_arr_size;

/* convert num to n_vector */
size_t n_full_len = (*num)->n_len + (*num)->n_scale;
const char *nend = (*num)->n_value + n_full_len - 1;
size_t n_extend_zeros = num_calc_full_len - n_full_len;

bc_convert_to_vector_with_zero_pad(n_vector, nend, n_full_len, n_extend_zeros);

/* Prepare guess_vector. Use bc_fast_sqrt_vector to quickly obtain a highly accurate initial value. */
size_t n_top_len_for_initial_guess = SIZEOF_SIZE_T == 8 ? 18 : 10;

/* Set the number of digits of num to be used as the initial value for Newton's method.
* Just as the square roots of 1000 and 100 differ significantly, the number of digits
* to "ignore" here must be even. */
if (num_calc_full_len & 1) {
n_top_len_for_initial_guess--;
}
const char *nptr = (*num)->n_value;
BC_VECTOR n_top = 0;
for (size_t i = 0; i < n_top_len_for_initial_guess; i++) {
n_top *= BASE;
n_top += *nptr++;
}

for (size_t i = 0; i < guess_arr_size; i++) {
guess_vector[i] = 0;
}
BC_VECTOR initial_guess = bc_fast_sqrt_vector(n_top);

size_t initial_guess_len = SIZEOF_SIZE_T == 8 ? 9 : 5;
size_t guess_top_vector_len = guess_full_len % BC_VECTOR_SIZE == 0 ? BC_VECTOR_SIZE : guess_full_len % BC_VECTOR_SIZE;
size_t guess_len_diff = initial_guess_len - guess_top_vector_len;
guess_vector[guess_arr_size - 2] = initial_guess / BC_POW_10_LUT[guess_len_diff];
initial_guess %= BC_POW_10_LUT[guess_len_diff];
guess_vector[guess_arr_size - 3] = initial_guess * BC_POW_10_LUT[BC_VECTOR_SIZE - guess_len_diff];

guess1_vector[guess_arr_size - 1] = 0;

size_t quot_size = n_arr_size - (guess_arr_size - 1) + 1;

BC_VECTOR two[1] = { 2 };

/* Newton's algorithm. */
bool done = false;
do {
for (size_t i = 0; i < n_arr_size; i++) {
n_vector_copy[i] = n_vector[i];
}
bool div_ret = bc_divide_vector(n_vector_copy, n_arr_size, guess_vector, guess_arr_size - 1, tmp_div_ret_vector, quot_size);
ZEND_ASSERT(div_ret);

BC_VECTOR *tmp_vptr = guess1_vector;
guess1_vector = guess_vector;
guess_vector = tmp_vptr;
int carry = 0;
for (size_t i = 0; i < guess_arr_size - 1; i++) {
guess_vector[i] = guess1_vector[i] + tmp_div_ret_vector[i] + carry;
if (guess_vector[i] >= BC_VECTOR_BOUNDARY_NUM) {
guess_vector[i] -= BC_VECTOR_BOUNDARY_NUM;
carry = 1;
} else {
carry = 0;
}
}
guess_vector[guess_arr_size - 1] = carry;

div_ret = bc_divide_vector(guess_vector, guess_arr_size, two, 1, tmp_div_ret_vector, guess_arr_size);
ZEND_ASSERT(div_ret);

for (size_t i = 0; i < guess_arr_size; i++) {
guess_vector[i] = tmp_div_ret_vector[i];
}

size_t diff = guess_vector[0] > guess1_vector[0] ? guess_vector[0] - guess1_vector[0] : guess1_vector[0] - guess_vector[0];
if (diff <= 1) {
bool is_same = true;
for (size_t i = 1; i < guess_arr_size - 1; i++) {
if (guess_vector[i] != guess1_vector[i]) {
is_same = false;
break;
}
}
done = is_same;
}
} while (!done);

bc_num ret = bc_new_num_nonzeroed(guess_len, guess_scale);
char *rptr = ret->n_value;
char *rend = rptr + guess_full_len - 1;

bc_convert_vector_to_char(guess_vector, rptr, rend, guess_arr_size - 1);

ret->n_scale = rscale;
_bc_rm_leading_zeros(ret);

bc_free_num(num);
*num = ret;

efree(buf);
}

bool bc_sqrt(bc_num *num, size_t scale)
{
const bc_num local_num = *num;
/* Initial checks. */
if (bc_is_neg(local_num)) {
if (bc_is_neg(*num)) {
/* Cannot take the square root of a negative number */
return false;
}
/* Square root of 0 is 0 */
if (bc_is_zero(local_num)) {
if (bc_is_zero(*num)) {
bc_free_num (num);
*num = bc_copy_num(BCG(_zero_));
return true;
}

bcmath_compare_result num_cmp_one = bc_compare(local_num, BCG(_one_), local_num->n_scale);
bcmath_compare_result num_cmp_one = bc_compare(*num, BCG(_one_), (*num)->n_scale);
/* Square root of 1 is 1 */
if (num_cmp_one == BCMATH_EQUAL) {
bc_free_num (num);
Expand All @@ -60,58 +256,14 @@ bool bc_sqrt(bc_num *num, size_t scale)
}

/* Initialize the variables. */
size_t cscale;
bc_num guess, guess1, point5, diff;
size_t rscale = MAX(scale, local_num->n_scale);

bc_init_num(&guess1);
bc_init_num(&diff);
point5 = bc_new_num (1, 1);
point5->n_value[1] = 5;


/* Calculate the initial guess. */
if (num_cmp_one == BCMATH_RIGHT_GREATER) {
/* The number is between 0 and 1. Guess should start at 1. */
guess = bc_copy_num(BCG(_one_));
cscale = local_num->n_scale;
} else {
/* The number is greater than 1. Guess should start at 10^(exp/2). */
bc_init_num(&guess);
bc_int2num(&guess, 10);

bc_int2num(&guess1, local_num->n_len);
bc_multiply_ex(guess1, point5, &guess1, 0);
guess1->n_scale = 0;
bc_raise_bc_exponent(guess, guess1, &guess, 0);
bc_free_num (&guess1);
cscale = 3;
}
size_t rscale = MAX(scale, (*num)->n_scale);
size_t num_calc_full_len = (*num)->n_len + (rscale + 1) * 2;

/* Find the square root using Newton's algorithm. */
bool done = false;
while (!done) {
bc_free_num (&guess1);
guess1 = bc_copy_num(guess);
bc_divide(*num, guess, &guess, cscale);
bc_add_ex(guess, guess1, &guess, 0);
bc_multiply_ex(guess, point5, &guess, cscale);
bc_sub_ex(guess, guess1, &diff, cscale + 1);
if (bc_is_near_zero(diff, cscale)) {
if (cscale < rscale + 1) {
cscale = MIN (cscale * 3, rscale + 1);
} else {
done = true;
}
}
if (num_calc_full_len < MAX_LENGTH_OF_LONG) {
bc_fast_sqrt(num, rscale);
} else {
bc_standard_sqrt(num, rscale, num_calc_full_len);
}

/* Assign the number and clean up. */
bc_free_num (num);
bc_divide(guess, BCG(_one_), num, rscale);
bc_free_num (&guess);
bc_free_num (&guess1);
bc_free_num (&point5);
bc_free_num (&diff);
return true;
}
Loading