Skip to content

Commit 3e15c97

Browse files
[libc++] LWG2381: Inconsistency in parsing floating point numbers (#77948)
This PR implements [LWG2381](https://cplusplus.github.io/LWG/issue2381) by rejecting `'i'`, `'I'`, `'n'`, `'N'` in FP parsing, as inf and NaN are intendedly rejected by that LWG issue. The source character array used for parsing is `"0123456789abcdefABCDEFxX+-pPiInN"`, whose first 26 or 28 characters are used for parsing integers or floating-point values respectively. Previously, libc++ used 32 characters, including `'i'`, `'I'`, `'n'`, `'N'`, for FP parsing, which was inconsistent with LWG2381. This PR also replaces magic numbers 26 and 28 (formerly 32) with named constants. Drive-by change: when the first character (possibly after the leading `'+'` or `'-'`) is not a decimal digit but an acceptable character (e.g., `'p'` or `'e'`), the character is not accumulated now (per Stage 2 in [facet.num.get.virtuals]/3). #65168 may be rendered invalid, see #65168 (comment). Apple back-deployment targets remain broken, likely due to dylib. XFAIL is marked in related tests. --------- Co-authored-by: Mark de Wever <[email protected]>
1 parent 0eca265 commit 3e15c97

File tree

5 files changed

+467
-71
lines changed

5 files changed

+467
-71
lines changed

libcxx/docs/Status/Cxx23Issues.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@
9898
`3555 <https://wg21.link/LWG3555>`__,"``{transform,elements}_view::iterator::iterator_concept`` should consider const-qualification of the underlying range","June 2021","","","|ranges|"
9999
"","","","","",""
100100
`2191 <https://wg21.link/LWG2191>`__,"Incorrect specification of ``match_results(match_results&&)``","October 2021","|Nothing To Do|",""
101-
`2381 <https://wg21.link/LWG2381>`__,"Inconsistency in parsing floating point numbers","October 2021","",""
101+
`2381 <https://wg21.link/LWG2381>`__,"Inconsistency in parsing floating point numbers","October 2021","|Complete|","19.0"
102102
`2762 <https://wg21.link/LWG2762>`__,"``unique_ptr operator*()`` should be ``noexcept``","October 2021","",""
103103
`3121 <https://wg21.link/LWG3121>`__,"``tuple`` constructor constraints for ``UTypes&&...`` overloads","October 2021","",""
104104
`3123 <https://wg21.link/LWG3123>`__,"``duration`` constructor from representation shouldn't be effectively non-throwing","October 2021","","","|chrono|"

libcxx/include/locale

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,11 @@ struct _LIBCPP_EXPORTED_FROM_ABI __num_get_base {
368368
static const int __num_get_buf_sz = 40;
369369

370370
static int __get_base(ios_base&);
371-
static const char __src[33];
371+
static const char __src[33]; // "0123456789abcdefABCDEFxX+-pPiInN"
372+
// count of leading characters in __src used for parsing integers ("012..X+-")
373+
static const size_t __int_chr_cnt = 26;
374+
// count of leading characters in __src used for parsing floating-point values ("012..-pP")
375+
static const size_t __fp_chr_cnt = 28;
372376
};
373377

374378
_LIBCPP_EXPORTED_FROM_ABI void
@@ -431,7 +435,7 @@ private:
431435
template <typename _Tp>
432436
const _Tp* __do_widen_p(ios_base& __iob, _Tp* __atoms) const {
433437
locale __loc = __iob.getloc();
434-
use_facet<ctype<_Tp> >(__loc).widen(__src, __src + 26, __atoms);
438+
use_facet<ctype<_Tp> >(__loc).widen(__src, __src + __int_chr_cnt, __atoms);
435439
return __atoms;
436440
}
437441

@@ -447,7 +451,7 @@ private:
447451
template <class _CharT>
448452
string __num_get<_CharT>::__stage2_int_prep(ios_base& __iob, _CharT* __atoms, _CharT& __thousands_sep) {
449453
locale __loc = __iob.getloc();
450-
std::use_facet<ctype<_CharT> >(__loc).widen(__src, __src + 26, __atoms);
454+
std::use_facet<ctype<_CharT> >(__loc).widen(__src, __src + __int_chr_cnt, __atoms);
451455
const numpunct<_CharT>& __np = std::use_facet<numpunct<_CharT> >(__loc);
452456
__thousands_sep = __np.thousands_sep();
453457
return __np.grouping();
@@ -458,7 +462,7 @@ template <class _CharT>
458462
string __num_get<_CharT>::__stage2_float_prep(
459463
ios_base& __iob, _CharT* __atoms, _CharT& __decimal_point, _CharT& __thousands_sep) {
460464
locale __loc = __iob.getloc();
461-
std::use_facet<ctype<_CharT> >(__loc).widen(__src, __src + 32, __atoms);
465+
std::use_facet<ctype<_CharT> >(__loc).widen(__src, __src + __fp_chr_cnt, __atoms);
462466
const numpunct<_CharT>& __np = std::use_facet<numpunct<_CharT> >(__loc);
463467
__decimal_point = __np.decimal_point();
464468
__thousands_sep = __np.thousands_sep();
@@ -490,7 +494,7 @@ __num_get<_CharT>::__stage2_int_loop(_CharT __ct, int __base, char* __a, char*&
490494
}
491495
return 0;
492496
}
493-
ptrdiff_t __f = std::find(__atoms, __atoms + 26, __ct) - __atoms;
497+
ptrdiff_t __f = std::find(__atoms, __atoms + __int_chr_cnt, __ct) - __atoms;
494498
if (__f >= 24)
495499
return -1;
496500
switch (__base) {
@@ -546,8 +550,8 @@ int __num_get<_CharT>::__stage2_float_loop(
546550
}
547551
return 0;
548552
}
549-
ptrdiff_t __f = std::find(__atoms, __atoms + 32, __ct) - __atoms;
550-
if (__f >= 32)
553+
ptrdiff_t __f = std::find(__atoms, __atoms + __num_get_base::__fp_chr_cnt, __ct) - __atoms;
554+
if (__f >= static_cast<ptrdiff_t>(__num_get_base::__fp_chr_cnt))
551555
return -1;
552556
char __x = __src[__f];
553557
if (__x == '-' || __x == '+') {
@@ -846,7 +850,7 @@ _InputIterator num_get<_CharT, _InputIterator>::__do_get_signed(
846850
int __base = this->__get_base(__iob);
847851
// Stage 2
848852
char_type __thousands_sep;
849-
const int __atoms_size = 26;
853+
const int __atoms_size = __num_get_base::__int_chr_cnt;
850854
#ifdef _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET
851855
char_type __atoms1[__atoms_size];
852856
const char_type* __atoms = this->__do_widen(__iob, __atoms1);
@@ -895,7 +899,7 @@ _InputIterator num_get<_CharT, _InputIterator>::__do_get_unsigned(
895899
int __base = this->__get_base(__iob);
896900
// Stage 2
897901
char_type __thousands_sep;
898-
const int __atoms_size = 26;
902+
const int __atoms_size = __num_get_base::__int_chr_cnt;
899903
#ifdef _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET
900904
char_type __atoms1[__atoms_size];
901905
const char_type* __atoms = this->__do_widen(__iob, __atoms1);
@@ -942,7 +946,7 @@ _InputIterator num_get<_CharT, _InputIterator>::__do_get_floating_point(
942946
iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _Fp& __v) const {
943947
// Stage 1, nothing to do
944948
// Stage 2
945-
char_type __atoms[32];
949+
char_type __atoms[__num_get_base::__fp_chr_cnt];
946950
char_type __decimal_point;
947951
char_type __thousands_sep;
948952
string __grouping = this->__stage2_float_prep(__iob, __atoms, __decimal_point, __thousands_sep);
@@ -951,10 +955,11 @@ _InputIterator num_get<_CharT, _InputIterator>::__do_get_floating_point(
951955
char* __a = &__buf[0];
952956
char* __a_end = __a;
953957
unsigned __g[__num_get_base::__num_get_buf_sz];
954-
unsigned* __g_end = __g;
955-
unsigned __dc = 0;
956-
bool __in_units = true;
957-
char __exp = 'E';
958+
unsigned* __g_end = __g;
959+
unsigned __dc = 0;
960+
bool __in_units = true;
961+
char __exp = 'E';
962+
bool __is_leading_parsed = false;
958963
for (; __b != __e; ++__b) {
959964
if (__a_end == __a + __buf.size()) {
960965
size_t __tmp = __buf.size();
@@ -977,6 +982,21 @@ _InputIterator num_get<_CharT, _InputIterator>::__do_get_floating_point(
977982
__dc,
978983
__atoms))
979984
break;
985+
986+
// the leading character excluding the sign must be a decimal digit
987+
if (!__is_leading_parsed) {
988+
if (__a_end - __a >= 1 && __a[0] != '-' && __a[0] != '+') {
989+
if ('0' <= __a[0] && __a[0] <= '9')
990+
__is_leading_parsed = true;
991+
else
992+
break;
993+
} else if (__a_end - __a >= 2 && (__a[0] == '-' || __a[0] == '+')) {
994+
if ('0' <= __a[1] && __a[1] <= '9')
995+
__is_leading_parsed = true;
996+
else
997+
break;
998+
}
999+
}
9801000
}
9811001
if (__grouping.size() != 0 && __in_units && __g_end - __g < __num_get_base::__num_get_buf_sz)
9821002
*__g_end++ = __dc;
@@ -996,10 +1016,11 @@ _InputIterator num_get<_CharT, _InputIterator>::do_get(
9961016
// Stage 1
9971017
int __base = 16;
9981018
// Stage 2
999-
char_type __atoms[26];
1019+
char_type __atoms[__num_get_base::__int_chr_cnt];
10001020
char_type __thousands_sep = char_type();
10011021
string __grouping;
1002-
std::use_facet<ctype<_CharT> >(__iob.getloc()).widen(__num_get_base::__src, __num_get_base::__src + 26, __atoms);
1022+
std::use_facet<ctype<_CharT> >(__iob.getloc())
1023+
.widen(__num_get_base::__src, __num_get_base::__src + __num_get_base::__int_chr_cnt, __atoms);
10031024
string __buf;
10041025
__buf.resize(__buf.capacity());
10051026
char* __a = &__buf[0];

libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp

Lines changed: 143 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
// The fix for LWG2381 (https://github.com/llvm/llvm-project/pull/77948) changed
10+
// behavior of FP parsing, while Apple back-deployment targets remain broken due
11+
// to the dylib.
12+
// UNSUPPORTED: using-built-library-before-llvm-19
13+
914
// <locale>
1015

1116
// class num_get<charT, InputIterator>
@@ -116,9 +121,9 @@ int main(int, char**)
116121
f.get(cpp17_input_iterator<const char*>(str),
117122
cpp17_input_iterator<const char*>(str+sizeof(str)),
118123
ios, err, v);
119-
assert(base(iter) == str+sizeof(str)-1);
120-
assert(err == ios.goodbit);
121-
assert(v == INFINITY);
124+
assert(base(iter) == str);
125+
assert(err == ios.failbit);
126+
assert(v == 0.0);
122127
}
123128
{
124129
const char str[] = "INF";
@@ -128,9 +133,9 @@ int main(int, char**)
128133
f.get(cpp17_input_iterator<const char*>(str),
129134
cpp17_input_iterator<const char*>(str+sizeof(str)),
130135
ios, err, v);
131-
assert(base(iter) == str+sizeof(str)-1);
132-
assert(err == ios.goodbit);
133-
assert(v == INFINITY);
136+
assert(base(iter) == str);
137+
assert(err == ios.failbit);
138+
assert(v == 0.0);
134139
}
135140
{
136141
const char str[] = "-inf";
@@ -140,9 +145,9 @@ int main(int, char**)
140145
f.get(cpp17_input_iterator<const char*>(str),
141146
cpp17_input_iterator<const char*>(str+sizeof(str)),
142147
ios, err, v);
143-
assert(base(iter) == str+sizeof(str)-1);
144-
assert(err == ios.goodbit);
145-
assert(v == -INFINITY);
148+
assert(base(iter) == str + 1);
149+
assert(err == ios.failbit);
150+
assert(v == 0.0);
146151
}
147152
{
148153
const char str[] = "-INF";
@@ -152,9 +157,9 @@ int main(int, char**)
152157
f.get(cpp17_input_iterator<const char*>(str),
153158
cpp17_input_iterator<const char*>(str+sizeof(str)),
154159
ios, err, v);
155-
assert(base(iter) == str+sizeof(str)-1);
156-
assert(err == ios.goodbit);
157-
assert(v == -INFINITY);
160+
assert(base(iter) == str + 1);
161+
assert(err == ios.failbit);
162+
assert(v == 0.0);
158163
}
159164
{
160165
const char str[] = "nan";
@@ -164,9 +169,9 @@ int main(int, char**)
164169
f.get(cpp17_input_iterator<const char*>(str),
165170
cpp17_input_iterator<const char*>(str+sizeof(str)),
166171
ios, err, v);
167-
assert(base(iter) == str+sizeof(str)-1);
168-
assert(err == ios.goodbit);
169-
assert(std::isnan(v));
172+
assert(base(iter) == str);
173+
assert(err == ios.failbit);
174+
assert(v == 0.0);
170175
}
171176
{
172177
const char str[] = "NAN";
@@ -176,9 +181,129 @@ int main(int, char**)
176181
f.get(cpp17_input_iterator<const char*>(str),
177182
cpp17_input_iterator<const char*>(str+sizeof(str)),
178183
ios, err, v);
179-
assert(base(iter) == str+sizeof(str)-1);
180-
assert(err == ios.goodbit);
181-
assert(std::isnan(v));
184+
assert(base(iter) == str);
185+
assert(err == ios.failbit);
186+
assert(v == 0.0);
187+
}
188+
{
189+
const char str[] = "p00";
190+
std::hex(ios);
191+
std::ios_base::iostate err = ios.goodbit;
192+
cpp17_input_iterator<const char*> iter = f.get(
193+
cpp17_input_iterator<const char*>(str), cpp17_input_iterator<const char*>(str + sizeof(str)), ios, err, v);
194+
assert(base(iter) == str);
195+
assert(err == ios.failbit);
196+
assert(v == 0.0);
197+
}
198+
{
199+
const char str[] = "P00";
200+
std::hex(ios);
201+
std::ios_base::iostate err = ios.goodbit;
202+
cpp17_input_iterator<const char*> iter = f.get(
203+
cpp17_input_iterator<const char*>(str), cpp17_input_iterator<const char*>(str + sizeof(str)), ios, err, v);
204+
assert(base(iter) == str);
205+
assert(err == ios.failbit);
206+
assert(v == 0.0);
207+
}
208+
{
209+
const char str[] = "+p00";
210+
std::hex(ios);
211+
std::ios_base::iostate err = ios.goodbit;
212+
cpp17_input_iterator<const char*> iter = f.get(
213+
cpp17_input_iterator<const char*>(str), cpp17_input_iterator<const char*>(str + sizeof(str)), ios, err, v);
214+
assert(base(iter) == str + 1);
215+
assert(err == ios.failbit);
216+
assert(v == 0.0);
217+
}
218+
{
219+
const char str[] = "+P00";
220+
std::hex(ios);
221+
std::ios_base::iostate err = ios.goodbit;
222+
cpp17_input_iterator<const char*> iter = f.get(
223+
cpp17_input_iterator<const char*>(str), cpp17_input_iterator<const char*>(str + sizeof(str)), ios, err, v);
224+
assert(base(iter) == str + 1);
225+
assert(err == ios.failbit);
226+
assert(v == 0.0);
227+
}
228+
{
229+
const char str[] = "-p00";
230+
std::hex(ios);
231+
std::ios_base::iostate err = ios.goodbit;
232+
cpp17_input_iterator<const char*> iter = f.get(
233+
cpp17_input_iterator<const char*>(str), cpp17_input_iterator<const char*>(str + sizeof(str)), ios, err, v);
234+
assert(base(iter) == str + 1);
235+
assert(err == ios.failbit);
236+
assert(v == 0.0);
237+
}
238+
{
239+
const char str[] = "-P00";
240+
std::hex(ios);
241+
std::ios_base::iostate err = ios.goodbit;
242+
cpp17_input_iterator<const char*> iter = f.get(
243+
cpp17_input_iterator<const char*>(str), cpp17_input_iterator<const char*>(str + sizeof(str)), ios, err, v);
244+
assert(base(iter) == str + 1);
245+
assert(err == ios.failbit);
246+
assert(v == 0.0);
247+
}
248+
{
249+
const char str[] = "e00";
250+
std::hex(ios);
251+
std::ios_base::iostate err = ios.goodbit;
252+
cpp17_input_iterator<const char*> iter = f.get(
253+
cpp17_input_iterator<const char*>(str), cpp17_input_iterator<const char*>(str + sizeof(str)), ios, err, v);
254+
assert(base(iter) == str);
255+
assert(err == ios.failbit);
256+
assert(v == 0.0);
257+
}
258+
{
259+
const char str[] = "E00";
260+
std::hex(ios);
261+
std::ios_base::iostate err = ios.goodbit;
262+
cpp17_input_iterator<const char*> iter = f.get(
263+
cpp17_input_iterator<const char*>(str), cpp17_input_iterator<const char*>(str + sizeof(str)), ios, err, v);
264+
assert(base(iter) == str);
265+
assert(err == ios.failbit);
266+
assert(v == 0.0);
267+
}
268+
{
269+
const char str[] = "+e00";
270+
std::hex(ios);
271+
std::ios_base::iostate err = ios.goodbit;
272+
cpp17_input_iterator<const char*> iter = f.get(
273+
cpp17_input_iterator<const char*>(str), cpp17_input_iterator<const char*>(str + sizeof(str)), ios, err, v);
274+
assert(base(iter) == str + 1);
275+
assert(err == ios.failbit);
276+
assert(v == 0.0);
277+
}
278+
{
279+
const char str[] = "+E00";
280+
std::hex(ios);
281+
std::ios_base::iostate err = ios.goodbit;
282+
cpp17_input_iterator<const char*> iter = f.get(
283+
cpp17_input_iterator<const char*>(str), cpp17_input_iterator<const char*>(str + sizeof(str)), ios, err, v);
284+
assert(base(iter) == str + 1);
285+
assert(err == ios.failbit);
286+
assert(v == 0.0);
287+
}
288+
{
289+
const char str[] = "-e00";
290+
std::hex(ios);
291+
std::ios_base::iostate err = ios.goodbit;
292+
cpp17_input_iterator<const char*> iter = f.get(
293+
cpp17_input_iterator<const char*>(str), cpp17_input_iterator<const char*>(str + sizeof(str)), ios, err, v);
294+
assert(base(iter) == str + 1);
295+
assert(err == ios.failbit);
296+
assert(v == 0.0);
297+
}
298+
{
299+
const char str[] = "-E00";
300+
std::hex(ios);
301+
std::ios_base::iostate err = ios.goodbit;
302+
cpp17_input_iterator<const char*> iter = f.get(
303+
cpp17_input_iterator<const char*>(str), cpp17_input_iterator<const char*>(str + sizeof(str)), ios, err, v);
304+
assert(base(iter) == str + 1);
305+
assert(err == ios.failbit);
306+
assert(v == 0.0);
182307
}
183308
{
184309
v = -1;

0 commit comments

Comments
 (0)