Skip to content

Commit 45415ef

Browse files
committed
[libcxx] Fix the ctype is (pointer version) function for Windows
Previously, this test snippet would report incorrect information: F::mask m; std::wstring in(L"\u00DA"); // LATIN CAPITAL LETTER U WITH ACUTE f.is(in.data(), in.data() + 1, &m); // m & F::lower would be set The single-character version of the `is` function wasn't affected by this issue though. Define `_LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA` for Windows, as the `alpha` / `_ALPHA` constant is a mask consisting of multiple bits set, which avoids setting `alpha` whenver any of the bits is set, in the `do_is` implementation. On Windows, with the "C" locale, wchars are classified according to their Unicode interpretation, just as in the en_US.UTF-8 locale on all platforms. Due to the differing classification of some characters, the `scan_is` and `scan_not` tests are quite annoying to fix, thus just ifdef out some of the tests for the "C" locale there - the code gets tested with the more standard en_US.UTF-8 locale anyway. Differential Revision: https://reviews.llvm.org/D120796
1 parent 3347e7d commit 45415ef

File tree

5 files changed

+39
-13
lines changed

5 files changed

+39
-13
lines changed

libcxx/include/__locale

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,7 @@ public:
454454
static const mask blank = _BLANK;
455455
static const mask __regex_word = 0x4000; // 0x8000 and 0x0100 and 0x00ff are used
456456
# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT
457+
# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA
457458
#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__)
458459
# ifdef __APPLE__
459460
typedef __uint32_t mask;

libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
// bool is(mask m, charT c) const;
1414

1515
// REQUIRES: locale.en_US.UTF-8
16-
// XFAIL: LIBCXX-WINDOWS-FIXME
1716
// XFAIL: libcpp-has-no-wide-characters
1817

1918
#include <locale>
@@ -107,8 +106,15 @@ int main(int, char**)
107106
assert(f.is(F::graph, L'.'));
108107
assert(!f.is(F::graph, L'\x07'));
109108

109+
#if defined(_WIN32)
110+
// On Windows, these wchars are classified according to their
111+
// Unicode interpretation even in the "C" locale.
112+
assert(f.is(F::alpha, L'\x00DA'));
113+
assert(f.is(F::upper, L'\x00DA'));
114+
#else
110115
assert(!f.is(F::alpha, L'\x00DA'));
111116
assert(!f.is(F::upper, L'\x00DA'));
117+
#endif
112118
}
113119
}
114120

libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
// const charT* do_is(const charT* low, const charT* high, mask* vec) const;
1414

1515
// REQUIRES: locale.en_US.UTF-8
16-
// XFAIL: LIBCXX-WINDOWS-FIXME
1716
// XFAIL: libcpp-has-no-wide-characters
1817

1918
#include <locale>
@@ -149,17 +148,27 @@ int main(int, char**)
149148

150149
// L'\x00DA'
151150
assert(!(m[0] & F::space));
152-
assert(!(m[0] & F::print));
153151
assert(!(m[0] & F::cntrl));
154-
assert(!(m[0] & F::upper));
155152
assert(!(m[0] & F::lower));
156-
assert(!(m[0] & F::alpha));
157153
assert(!(m[0] & F::digit));
158154
assert(!(m[0] & F::punct));
159155
assert(!(m[0] & F::xdigit));
160156
assert(!(m[0] & F::blank));
157+
#if defined(_WIN32)
158+
// On Windows, these wchars are classified according to their
159+
// Unicode interpretation even in the "C" locale.
160+
assert( (m[0] & F::alpha));
161+
assert( (m[0] & F::upper));
162+
assert( (m[0] & F::print));
163+
assert( (m[0] & F::alnum));
164+
assert( (m[0] & F::graph));
165+
#else
166+
assert(!(m[0] & F::alpha));
167+
assert(!(m[0] & F::upper));
168+
assert(!(m[0] & F::print));
161169
assert(!(m[0] & F::alnum));
162170
assert(!(m[0] & F::graph));
171+
#endif
163172

164173
// L' '
165174
assert( (m[1] & F::space));

libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
// const charT* scan_is(mask m, const charT* low, const charT* high) const;
1414

1515
// REQUIRES: locale.en_US.UTF-8
16-
// XFAIL: LIBCXX-WINDOWS-FIXME
1716
// XFAIL: libcpp-has-no-wide-characters
1817

1918
#include <locale>
@@ -57,17 +56,23 @@ int main(int, char**)
5756
const std::wstring in(L"\x00DA A\x07.a1");
5857
std::vector<F::mask> m(in.size());
5958
assert(f.scan_is(F::space, in.data(), in.data() + in.size()) - in.data() == 1);
60-
assert(f.scan_is(F::print, in.data(), in.data() + in.size()) - in.data() == 1);
6159
assert(f.scan_is(F::cntrl, in.data(), in.data() + in.size()) - in.data() == 3);
62-
assert(f.scan_is(F::upper, in.data(), in.data() + in.size()) - in.data() == 2);
6360
assert(f.scan_is(F::lower, in.data(), in.data() + in.size()) - in.data() == 5);
64-
assert(f.scan_is(F::alpha, in.data(), in.data() + in.size()) - in.data() == 2);
6561
assert(f.scan_is(F::digit, in.data(), in.data() + in.size()) - in.data() == 6);
6662
assert(f.scan_is(F::punct, in.data(), in.data() + in.size()) - in.data() == 4);
6763
assert(f.scan_is(F::xdigit, in.data(), in.data() + in.size()) - in.data() == 2);
6864
assert(f.scan_is(F::blank, in.data(), in.data() + in.size()) - in.data() == 1);
65+
#if !defined(_WIN32)
66+
// On Windows, these wchars are classified according to their
67+
// Unicode interpretation even in the "C" locale, where
68+
// the scan_is function returns the same as above for the
69+
// en_US.UTF-8 locale.
70+
assert(f.scan_is(F::print, in.data(), in.data() + in.size()) - in.data() == 1);
71+
assert(f.scan_is(F::upper, in.data(), in.data() + in.size()) - in.data() == 2);
72+
assert(f.scan_is(F::alpha, in.data(), in.data() + in.size()) - in.data() == 2);
6973
assert(f.scan_is(F::alnum, in.data(), in.data() + in.size()) - in.data() == 2);
7074
assert(f.scan_is(F::graph, in.data(), in.data() + in.size()) - in.data() == 2);
75+
#endif
7176
}
7277
}
7378

libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
// const charT* scan_not(mask m, const charT* low, const charT* high) const;
1414

1515
// REQUIRES: locale.en_US.UTF-8
16-
// XFAIL: LIBCXX-WINDOWS-FIXME
1716
// XFAIL: libcpp-has-no-wide-characters
1817

1918
#include <locale>
@@ -57,17 +56,23 @@ int main(int, char**)
5756
const std::wstring in(L"\x00DA A\x07.a1");
5857
std::vector<F::mask> m(in.size());
5958
assert(f.scan_not(F::space, in.data(), in.data() + in.size()) - in.data() == 0);
60-
assert(f.scan_not(F::print, in.data(), in.data() + in.size()) - in.data() == 0);
6159
assert(f.scan_not(F::cntrl, in.data(), in.data() + in.size()) - in.data() == 0);
62-
assert(f.scan_not(F::upper, in.data(), in.data() + in.size()) - in.data() == 0);
6360
assert(f.scan_not(F::lower, in.data(), in.data() + in.size()) - in.data() == 0);
64-
assert(f.scan_not(F::alpha, in.data(), in.data() + in.size()) - in.data() == 0);
6561
assert(f.scan_not(F::digit, in.data(), in.data() + in.size()) - in.data() == 0);
6662
assert(f.scan_not(F::punct, in.data(), in.data() + in.size()) - in.data() == 0);
6763
assert(f.scan_not(F::xdigit, in.data(), in.data() + in.size()) - in.data() == 0);
6864
assert(f.scan_not(F::blank, in.data(), in.data() + in.size()) - in.data() == 0);
65+
#if !defined(_WIN32)
66+
// On Windows, these wchars are classified according to their
67+
// Unicode interpretation even in the "C" locale, where
68+
// the scan_is function returns the same as above for the
69+
// en_US.UTF-8 locale.
70+
assert(f.scan_not(F::print, in.data(), in.data() + in.size()) - in.data() == 0);
71+
assert(f.scan_not(F::upper, in.data(), in.data() + in.size()) - in.data() == 0);
72+
assert(f.scan_not(F::alpha, in.data(), in.data() + in.size()) - in.data() == 0);
6973
assert(f.scan_not(F::alnum, in.data(), in.data() + in.size()) - in.data() == 0);
7074
assert(f.scan_not(F::graph, in.data(), in.data() + in.size()) - in.data() == 0);
75+
#endif
7176
}
7277
}
7378

0 commit comments

Comments
 (0)