Skip to content

Commit 18974c3

Browse files
authored
bpo-30647: Check nl_langinfo(CODESET) in locale coercion (GH-2374)
- On some versions of FreeBSD, setting the "UTF-8" locale succeeds, but a subsequent "nl_langinfo(CODESET)" fails - adding a check for this in the coercion logic means that coercion will happen on systems where this check succeeds, and will be skipped otherwise - that way CPython should automatically adapt to changes in platform behaviour, rather than needing a new release to enable coercion at build time - this also allows UTF-8 to be re-enabled as a coercion target, restoring the locale coercion behaviour on Mac OS X
1 parent f7d090c commit 18974c3

File tree

2 files changed

+29
-15
lines changed

2 files changed

+29
-15
lines changed

Lib/test/test_c_locale_coercion.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Tests the attempted automatic coercion of the C locale to a UTF-8 locale
22

33
import unittest
4+
import locale
45
import os
56
import sys
67
import sysconfig
@@ -32,24 +33,34 @@
3233

3334
# In order to get the warning messages to match up as expected, the candidate
3435
# order here must much the target locale order in Python/pylifecycle.c
35-
_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8") #, "UTF-8")
36-
37-
# XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
38-
# problems encountered on *BSD systems with those test cases
39-
# For additional details see:
40-
# nl_langinfo CODESET error: https://bugs.python.org/issue30647
41-
# locale handling differences: https://bugs.python.org/issue30672
36+
_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8")
4237

4338
# There's no reliable cross-platform way of checking locale alias
4439
# lists, so the only way of knowing which of these locales will work
4540
# is to try them with locale.setlocale(). We do that in a subprocess
4641
# to avoid altering the locale of the test runner.
42+
#
43+
# If the relevant locale module attributes exist, and we're not on a platform
44+
# where we expect it to always succeed, we also check that
45+
# `locale.nl_langinfo(locale.CODESET)` works, as if it fails, the interpreter
46+
# will skip locale coercion for that particular target locale
47+
_check_nl_langinfo_CODESET = bool(
48+
sys.platform not in ("darwin", "linux") and
49+
hasattr(locale, "nl_langinfo") and
50+
hasattr(locale, "CODESET")
51+
)
52+
4753
def _set_locale_in_subprocess(locale_name):
4854
cmd_fmt = "import locale; print(locale.setlocale(locale.LC_CTYPE, '{}'))"
55+
if _check_nl_langinfo_CODESET:
56+
# If there's no valid CODESET, we expect coercion to be skipped
57+
cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))"
4958
cmd = cmd_fmt.format(locale_name)
5059
result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)
5160
return result.rc == 0
5261

62+
63+
5364
_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all"
5465
_EncodingDetails = namedtuple("EncodingDetails", _fields)
5566

Python/pylifecycle.c

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -399,17 +399,10 @@ typedef struct _CandidateLocale {
399399
static _LocaleCoercionTarget _TARGET_LOCALES[] = {
400400
{"C.UTF-8"},
401401
{"C.utf8"},
402-
/* {"UTF-8"}, */
402+
{"UTF-8"},
403403
{NULL}
404404
};
405405

406-
/* XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
407-
* problems encountered on *BSD systems with those test cases
408-
* For additional details see:
409-
* nl_langinfo CODESET error: https://bugs.python.org/issue30647
410-
* locale handling differences: https://bugs.python.org/issue30672
411-
*/
412-
413406
static char *
414407
get_default_standard_stream_error_handler(void)
415408
{
@@ -490,6 +483,16 @@ _Py_CoerceLegacyLocale(void)
490483
const char *new_locale = setlocale(LC_CTYPE,
491484
target->locale_name);
492485
if (new_locale != NULL) {
486+
#if !defined(__APPLE__) && defined(HAVE_LANGINFO_H) && defined(CODESET)
487+
/* Also ensure that nl_langinfo works in this locale */
488+
char *codeset = nl_langinfo(CODESET);
489+
if (!codeset || *codeset == '\0') {
490+
/* CODESET is not set or empty, so skip coercion */
491+
new_locale = NULL;
492+
setlocale(LC_CTYPE, "");
493+
continue;
494+
}
495+
#endif
493496
/* Successfully configured locale, so make it the default */
494497
_coerce_default_locale_settings(target);
495498
return;

0 commit comments

Comments
 (0)