Skip to content

Commit d34b3c9

Browse files
[libc] Add max length argument to decimal to float (#84091)
The implementation for from_chars in libcxx is possibly going to use our decimal to float utilities, but to do that we need to support limiting the length of the string to be parsed. This patch adds support for that length limiting to decimal_exp_to_float, as well as the functions it calls (high precision decimal, str to integer).
1 parent f8c5a68 commit d34b3c9

File tree

7 files changed

+418
-108
lines changed

7 files changed

+418
-108
lines changed

libc/src/__support/high_precision_decimal.h

Lines changed: 63 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#ifndef LLVM_LIBC_SRC___SUPPORT_HIGH_PRECISION_DECIMAL_H
1010
#define LLVM_LIBC_SRC___SUPPORT_HIGH_PRECISION_DECIMAL_H
1111

12+
#include "src/__support/CPP/limits.h"
1213
#include "src/__support/ctype_utils.h"
1314
#include "src/__support/str_to_integer.h"
1415
#include <stdint.h>
@@ -115,9 +116,10 @@ class HighPrecisionDecimal {
115116
uint8_t digits[MAX_NUM_DIGITS];
116117

117118
private:
118-
bool should_round_up(int32_t roundToDigit, RoundDirection round) {
119-
if (roundToDigit < 0 ||
120-
static_cast<uint32_t>(roundToDigit) >= this->num_digits) {
119+
LIBC_INLINE bool should_round_up(int32_t round_to_digit,
120+
RoundDirection round) {
121+
if (round_to_digit < 0 ||
122+
static_cast<uint32_t>(round_to_digit) >= this->num_digits) {
121123
return false;
122124
}
123125

@@ -133,8 +135,8 @@ class HighPrecisionDecimal {
133135
// Else round to nearest.
134136

135137
// If we're right in the middle and there are no extra digits
136-
if (this->digits[roundToDigit] == 5 &&
137-
static_cast<uint32_t>(roundToDigit + 1) == this->num_digits) {
138+
if (this->digits[round_to_digit] == 5 &&
139+
static_cast<uint32_t>(round_to_digit + 1) == this->num_digits) {
138140

139141
// Round up if we've truncated (since that means the result is slightly
140142
// higher than what's represented.)
@@ -143,22 +145,22 @@ class HighPrecisionDecimal {
143145
}
144146

145147
// If this exactly halfway, round to even.
146-
if (roundToDigit == 0)
148+
if (round_to_digit == 0)
147149
// When the input is ".5".
148150
return false;
149-
return this->digits[roundToDigit - 1] % 2 != 0;
151+
return this->digits[round_to_digit - 1] % 2 != 0;
150152
}
151-
// If there are digits after roundToDigit, they must be non-zero since we
153+
// If there are digits after round_to_digit, they must be non-zero since we
152154
// trim trailing zeroes after all operations that change digits.
153-
return this->digits[roundToDigit] >= 5;
155+
return this->digits[round_to_digit] >= 5;
154156
}
155157

156158
// Takes an amount to left shift and returns the number of new digits needed
157159
// to store the result based on LEFT_SHIFT_DIGIT_TABLE.
158-
uint32_t get_num_new_digits(uint32_t lShiftAmount) {
160+
LIBC_INLINE uint32_t get_num_new_digits(uint32_t lshift_amount) {
159161
const char *power_of_five =
160-
LEFT_SHIFT_DIGIT_TABLE[lShiftAmount].power_of_five;
161-
uint32_t new_digits = LEFT_SHIFT_DIGIT_TABLE[lShiftAmount].new_digits;
162+
LEFT_SHIFT_DIGIT_TABLE[lshift_amount].power_of_five;
163+
uint32_t new_digits = LEFT_SHIFT_DIGIT_TABLE[lshift_amount].new_digits;
162164
uint32_t digit_index = 0;
163165
while (power_of_five[digit_index] != 0) {
164166
if (digit_index >= this->num_digits) {
@@ -176,7 +178,7 @@ class HighPrecisionDecimal {
176178
}
177179

178180
// Trim all trailing 0s
179-
void trim_trailing_zeroes() {
181+
LIBC_INLINE void trim_trailing_zeroes() {
180182
while (this->num_digits > 0 && this->digits[this->num_digits - 1] == 0) {
181183
--this->num_digits;
182184
}
@@ -186,19 +188,19 @@ class HighPrecisionDecimal {
186188
}
187189

188190
// Perform a digitwise binary non-rounding right shift on this value by
189-
// shiftAmount. The shiftAmount can't be more than MAX_SHIFT_AMOUNT to prevent
190-
// overflow.
191-
void right_shift(uint32_t shiftAmount) {
191+
// shift_amount. The shift_amount can't be more than MAX_SHIFT_AMOUNT to
192+
// prevent overflow.
193+
LIBC_INLINE void right_shift(uint32_t shift_amount) {
192194
uint32_t read_index = 0;
193195
uint32_t write_index = 0;
194196

195197
uint64_t accumulator = 0;
196198

197-
const uint64_t shift_mask = (uint64_t(1) << shiftAmount) - 1;
199+
const uint64_t shift_mask = (uint64_t(1) << shift_amount) - 1;
198200

199201
// Warm Up phase: we don't have enough digits to start writing, so just
200202
// read them into the accumulator.
201-
while (accumulator >> shiftAmount == 0) {
203+
while (accumulator >> shift_amount == 0) {
202204
uint64_t read_digit = 0;
203205
// If there are still digits to read, read the next one, else the digit is
204206
// assumed to be 0.
@@ -217,7 +219,7 @@ class HighPrecisionDecimal {
217219
// read. Keep reading until we run out of digits.
218220
while (read_index < this->num_digits) {
219221
uint64_t read_digit = this->digits[read_index];
220-
uint64_t write_digit = accumulator >> shiftAmount;
222+
uint64_t write_digit = accumulator >> shift_amount;
221223
accumulator &= shift_mask;
222224
this->digits[write_index] = static_cast<uint8_t>(write_digit);
223225
accumulator = accumulator * 10 + read_digit;
@@ -228,7 +230,7 @@ class HighPrecisionDecimal {
228230
// Cool Down phase: All of the readable digits have been read, so just write
229231
// the remainder, while treating any more digits as 0.
230232
while (accumulator > 0) {
231-
uint64_t write_digit = accumulator >> shiftAmount;
233+
uint64_t write_digit = accumulator >> shift_amount;
232234
accumulator &= shift_mask;
233235
if (write_index < MAX_NUM_DIGITS) {
234236
this->digits[write_index] = static_cast<uint8_t>(write_digit);
@@ -243,10 +245,10 @@ class HighPrecisionDecimal {
243245
}
244246

245247
// Perform a digitwise binary non-rounding left shift on this value by
246-
// shiftAmount. The shiftAmount can't be more than MAX_SHIFT_AMOUNT to prevent
247-
// overflow.
248-
void left_shift(uint32_t shiftAmount) {
249-
uint32_t new_digits = this->get_num_new_digits(shiftAmount);
248+
// shift_amount. The shift_amount can't be more than MAX_SHIFT_AMOUNT to
249+
// prevent overflow.
250+
LIBC_INLINE void left_shift(uint32_t shift_amount) {
251+
uint32_t new_digits = this->get_num_new_digits(shift_amount);
250252

251253
int32_t read_index = this->num_digits - 1;
252254
uint32_t write_index = this->num_digits + new_digits;
@@ -260,7 +262,7 @@ class HighPrecisionDecimal {
260262
// writing.
261263
while (read_index >= 0) {
262264
accumulator += static_cast<uint64_t>(this->digits[read_index])
263-
<< shiftAmount;
265+
<< shift_amount;
264266
uint64_t next_accumulator = accumulator / 10;
265267
uint64_t write_digit = accumulator - (10 * next_accumulator);
266268
--write_index;
@@ -296,45 +298,52 @@ class HighPrecisionDecimal {
296298
}
297299

298300
public:
299-
// numString is assumed to be a string of numeric characters. It doesn't
301+
// num_string is assumed to be a string of numeric characters. It doesn't
300302
// handle leading spaces.
301-
HighPrecisionDecimal(const char *__restrict numString) {
303+
LIBC_INLINE
304+
HighPrecisionDecimal(
305+
const char *__restrict num_string,
306+
const size_t num_len = cpp::numeric_limits<size_t>::max()) {
302307
bool saw_dot = false;
308+
size_t num_cur = 0;
303309
// This counts the digits in the number, even if there isn't space to store
304310
// them all.
305311
uint32_t total_digits = 0;
306-
while (isdigit(*numString) || *numString == '.') {
307-
if (*numString == '.') {
312+
while (num_cur < num_len &&
313+
(isdigit(num_string[num_cur]) || num_string[num_cur] == '.')) {
314+
if (num_string[num_cur] == '.') {
308315
if (saw_dot) {
309316
break;
310317
}
311318
this->decimal_point = total_digits;
312319
saw_dot = true;
313320
} else {
314-
if (*numString == '0' && this->num_digits == 0) {
321+
if (num_string[num_cur] == '0' && this->num_digits == 0) {
315322
--this->decimal_point;
316-
++numString;
323+
++num_cur;
317324
continue;
318325
}
319326
++total_digits;
320327
if (this->num_digits < MAX_NUM_DIGITS) {
321328
this->digits[this->num_digits] =
322-
static_cast<uint8_t>(*numString - '0');
329+
static_cast<uint8_t>(num_string[num_cur] - '0');
323330
++this->num_digits;
324-
} else if (*numString != '0') {
331+
} else if (num_string[num_cur] != '0') {
325332
this->truncated = true;
326333
}
327334
}
328-
++numString;
335+
++num_cur;
329336
}
330337

331338
if (!saw_dot)
332339
this->decimal_point = total_digits;
333340

334-
if ((*numString | 32) == 'e') {
335-
++numString;
336-
if (isdigit(*numString) || *numString == '+' || *numString == '-') {
337-
auto result = strtointeger<int32_t>(numString, 10);
341+
if (num_cur < num_len && ((num_string[num_cur] | 32) == 'e')) {
342+
++num_cur;
343+
if (isdigit(num_string[num_cur]) || num_string[num_cur] == '+' ||
344+
num_string[num_cur] == '-') {
345+
auto result =
346+
strtointeger<int32_t>(num_string + num_cur, 10, num_len - num_cur);
338347
if (result.has_error()) {
339348
// TODO: handle error
340349
}
@@ -358,33 +367,34 @@ class HighPrecisionDecimal {
358367
this->trim_trailing_zeroes();
359368
}
360369

361-
// Binary shift left (shiftAmount > 0) or right (shiftAmount < 0)
362-
void shift(int shiftAmount) {
363-
if (shiftAmount == 0) {
370+
// Binary shift left (shift_amount > 0) or right (shift_amount < 0)
371+
LIBC_INLINE void shift(int shift_amount) {
372+
if (shift_amount == 0) {
364373
return;
365374
}
366375
// Left
367-
else if (shiftAmount > 0) {
368-
while (static_cast<uint32_t>(shiftAmount) > MAX_SHIFT_AMOUNT) {
376+
else if (shift_amount > 0) {
377+
while (static_cast<uint32_t>(shift_amount) > MAX_SHIFT_AMOUNT) {
369378
this->left_shift(MAX_SHIFT_AMOUNT);
370-
shiftAmount -= MAX_SHIFT_AMOUNT;
379+
shift_amount -= MAX_SHIFT_AMOUNT;
371380
}
372-
this->left_shift(shiftAmount);
381+
this->left_shift(shift_amount);
373382
}
374383
// Right
375384
else {
376-
while (static_cast<uint32_t>(shiftAmount) < -MAX_SHIFT_AMOUNT) {
385+
while (static_cast<uint32_t>(shift_amount) < -MAX_SHIFT_AMOUNT) {
377386
this->right_shift(MAX_SHIFT_AMOUNT);
378-
shiftAmount += MAX_SHIFT_AMOUNT;
387+
shift_amount += MAX_SHIFT_AMOUNT;
379388
}
380-
this->right_shift(-shiftAmount);
389+
this->right_shift(-shift_amount);
381390
}
382391
}
383392

384393
// Round the number represented to the closest value of unsigned int type T.
385394
// This is done ignoring overflow.
386395
template <class T>
387-
T round_to_integer_type(RoundDirection round = RoundDirection::Nearest) {
396+
LIBC_INLINE T
397+
round_to_integer_type(RoundDirection round = RoundDirection::Nearest) {
388398
T result = 0;
389399
uint32_t cur_digit = 0;
390400

@@ -404,10 +414,10 @@ class HighPrecisionDecimal {
404414

405415
// Extra functions for testing.
406416

407-
uint8_t *get_digits() { return this->digits; }
408-
uint32_t get_num_digits() { return this->num_digits; }
409-
int32_t get_decimal_point() { return this->decimal_point; }
410-
void set_truncated(bool trunc) { this->truncated = trunc; }
417+
LIBC_INLINE uint8_t *get_digits() { return this->digits; }
418+
LIBC_INLINE uint32_t get_num_digits() { return this->num_digits; }
419+
LIBC_INLINE int32_t get_decimal_point() { return this->decimal_point; }
420+
LIBC_INLINE void set_truncated(bool trunc) { this->truncated = trunc; }
411421
};
412422

413423
} // namespace internal

libc/src/__support/str_to_float.h

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -313,14 +313,15 @@ constexpr int32_t NUM_POWERS_OF_TWO =
313313
// on the Simple Decimal Conversion algorithm by Nigel Tao, described at this
314314
// link: https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html
315315
template <class T>
316-
LIBC_INLINE FloatConvertReturn<T>
317-
simple_decimal_conversion(const char *__restrict numStart,
318-
RoundDirection round = RoundDirection::Nearest) {
316+
LIBC_INLINE FloatConvertReturn<T> simple_decimal_conversion(
317+
const char *__restrict numStart,
318+
const size_t num_len = cpp::numeric_limits<size_t>::max(),
319+
RoundDirection round = RoundDirection::Nearest) {
319320
using FPBits = typename fputil::FPBits<T>;
320321
using StorageType = typename FPBits::StorageType;
321322

322323
int32_t exp2 = 0;
323-
HighPrecisionDecimal hpd = HighPrecisionDecimal(numStart);
324+
HighPrecisionDecimal hpd = HighPrecisionDecimal(numStart, num_len);
324325

325326
FloatConvertReturn<T> output;
326327

@@ -600,13 +601,17 @@ clinger_fast_path(ExpandedFloat<T> init_num,
600601
// non-inf result for this size of float. The value is
601602
// log10(2^(exponent bias)).
602603
// The generic approximation uses the fact that log10(2^x) ~= x/3
603-
template <typename T> constexpr int32_t get_upper_bound() {
604+
template <typename T> LIBC_INLINE constexpr int32_t get_upper_bound() {
604605
return fputil::FPBits<T>::EXP_BIAS / 3;
605606
}
606607

607-
template <> constexpr int32_t get_upper_bound<float>() { return 39; }
608+
template <> LIBC_INLINE constexpr int32_t get_upper_bound<float>() {
609+
return 39;
610+
}
608611

609-
template <> constexpr int32_t get_upper_bound<double>() { return 309; }
612+
template <> LIBC_INLINE constexpr int32_t get_upper_bound<double>() {
613+
return 309;
614+
}
610615

611616
// The lower bound is the largest negative base-10 exponent that could possibly
612617
// give a non-zero result for this size of float. The value is
@@ -616,18 +621,18 @@ template <> constexpr int32_t get_upper_bound<double>() { return 309; }
616621
// low base 10 exponent with a very high intermediate mantissa can cancel each
617622
// other out, and subnormal numbers allow for the result to be at the very low
618623
// end of the final mantissa.
619-
template <typename T> constexpr int32_t get_lower_bound() {
624+
template <typename T> LIBC_INLINE constexpr int32_t get_lower_bound() {
620625
using FPBits = typename fputil::FPBits<T>;
621626
return -((FPBits::EXP_BIAS +
622627
static_cast<int32_t>(FPBits::FRACTION_LEN + FPBits::STORAGE_LEN)) /
623628
3);
624629
}
625630

626-
template <> constexpr int32_t get_lower_bound<float>() {
631+
template <> LIBC_INLINE constexpr int32_t get_lower_bound<float>() {
627632
return -(39 + 6 + 10);
628633
}
629634

630-
template <> constexpr int32_t get_lower_bound<double>() {
635+
template <> LIBC_INLINE constexpr int32_t get_lower_bound<double>() {
631636
return -(309 + 15 + 20);
632637
}
633638

@@ -637,9 +642,10 @@ template <> constexpr int32_t get_lower_bound<double>() {
637642
// accuracy. The resulting mantissa and exponent are placed in outputMantissa
638643
// and outputExp2.
639644
template <class T>
640-
LIBC_INLINE FloatConvertReturn<T>
641-
decimal_exp_to_float(ExpandedFloat<T> init_num, const char *__restrict numStart,
642-
bool truncated, RoundDirection round) {
645+
LIBC_INLINE FloatConvertReturn<T> decimal_exp_to_float(
646+
ExpandedFloat<T> init_num, bool truncated, RoundDirection round,
647+
const char *__restrict numStart,
648+
const size_t num_len = cpp::numeric_limits<size_t>::max()) {
643649
using FPBits = typename fputil::FPBits<T>;
644650
using StorageType = typename FPBits::StorageType;
645651

@@ -701,7 +707,7 @@ decimal_exp_to_float(ExpandedFloat<T> init_num, const char *__restrict numStart,
701707
#endif // LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE
702708

703709
#ifndef LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION
704-
output = simple_decimal_conversion<T>(numStart, round);
710+
output = simple_decimal_conversion<T>(numStart, num_len, round);
705711
#else
706712
#warning "Simple decimal conversion is disabled, result may not be correct."
707713
#endif // LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION
@@ -894,6 +900,8 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT,
894900
if (!seen_digit)
895901
return output;
896902

903+
// TODO: When adding max length argument, handle the case of a trailing
904+
// EXPONENT MARKER, see scanf for more details.
897905
if (tolower(src[index]) == EXPONENT_MARKER) {
898906
bool has_sign = false;
899907
if (src[index + 1] == '+' || src[index + 1] == '-') {
@@ -928,7 +936,7 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT,
928936
output.value = {0, 0};
929937
} else {
930938
auto temp =
931-
decimal_exp_to_float<T>({mantissa, exponent}, src, truncated, round);
939+
decimal_exp_to_float<T>({mantissa, exponent}, truncated, round, src);
932940
output.value = temp.num;
933941
output.error = temp.error;
934942
}
@@ -1071,6 +1079,8 @@ nan_mantissa_from_ncharseq(const cpp::string_view ncharseq) {
10711079

10721080
// Takes a pointer to a string and a pointer to a string pointer. This function
10731081
// is used as the backend for all of the string to float functions.
1082+
// TODO: Add src_len member to match strtointeger.
1083+
// TODO: Next, move from char* and length to string_view
10741084
template <class T>
10751085
LIBC_INLINE StrToNumResult<T> strtofloatingpoint(const char *__restrict src) {
10761086
using FPBits = typename fputil::FPBits<T>;

0 commit comments

Comments
 (0)