Skip to content

Commit dbdee00

Browse files
authored
bpo-34589: Add -X coerce_c_locale command line option (GH-9378)
Add a new -X coerce_c_locale command line option to control C locale coercion (PEP 538).
1 parent 7a0791b commit dbdee00

File tree

8 files changed

+160
-52
lines changed

8 files changed

+160
-52
lines changed

Doc/using/cmdline.rst

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,13 +438,22 @@ Miscellaneous options
438438
* Set the :attr:`~sys.flags.dev_mode` attribute of :attr:`sys.flags` to
439439
``True``
440440

441-
* ``-X utf8`` enables UTF-8 mode for operating system interfaces, overriding
441+
* ``-X utf8`` enables UTF-8 mode (:pep:`540`) for operating system interfaces, overriding
442442
the default locale-aware mode. ``-X utf8=0`` explicitly disables UTF-8
443443
mode (even when it would otherwise activate automatically).
444444
See :envvar:`PYTHONUTF8` for more details.
445445
* ``-X pycache_prefix=PATH`` enables writing ``.pyc`` files to a parallel
446446
tree rooted at the given directory instead of to the code tree. See also
447447
:envvar:`PYTHONPYCACHEPREFIX`.
448+
* ``-X coerce_c_locale`` or ``-X coerce_c_locale=1`` tries to coerce the C
449+
locale (:pep:`538`).
450+
``-X coerce_c_locale=0`` skips coercing the legacy ASCII-based C and POSIX
451+
locales to a more capable UTF-8 based alternative.
452+
``-X coerce_c_locale=warn`` will cause Python to emit warning messages on
453+
``stderr`` if either the locale coercion activates, or else if a locale
454+
that *would* have triggered coercion is still active when the Python
455+
runtime is initialized.
456+
See :envvar:`PYTHONCOERCECLOCALE` for more details.
448457

449458
It also allows passing arbitrary values and retrieving them through the
450459
:data:`sys._xoptions` dictionary.
@@ -464,6 +473,9 @@ Miscellaneous options
464473
.. versionadded:: 3.7
465474
The ``-X importtime``, ``-X dev`` and ``-X utf8`` options.
466475

476+
.. versionadded:: 3.7.1
477+
The ``-X coerce_c_locale`` option.
478+
467479
.. versionadded:: 3.8
468480
The ``-X pycache_prefix`` option.
469481

@@ -850,6 +862,8 @@ conflict.
850862
order to force the interpreter to use ``ASCII`` instead of ``UTF-8`` for
851863
system interfaces.
852864

865+
Also available as the :option:`-X` ``coerce_c_locale`` option.
866+
853867
Availability: \*nix
854868

855869
.. versionadded:: 3.7

Doc/whatsnew/3.7.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2494,3 +2494,10 @@ versions, it respected an ill-defined subset of those environment variables,
24942494
while in Python 3.7.0 it didn't read any of them due to :issue:`34247`). If
24952495
this behavior is unwanted, set :c:data:`Py_IgnoreEnvironmentFlag` to 1 before
24962496
calling :c:func:`Py_Initialize`.
2497+
2498+
:c:func:`Py_Initialize` and :c:func:`Py_Main` cannot enable the C locale
2499+
coercion (:pep:`538`) anymore: it is always disabled. It can now only be
2500+
enabled by the Python program ("python3).
2501+
2502+
New :option:`-X` ``coerce_c_locale`` command line option to control C locale
2503+
coercion (:pep:`538`).

Lib/test/test_c_locale_coercion.py

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def _handle_output_variations(data):
139139
return data
140140

141141
@classmethod
142-
def get_child_details(cls, env_vars):
142+
def get_child_details(cls, env_vars, xoption=None):
143143
"""Retrieves fsencoding and standard stream details from a child process
144144
145145
Returns (encoding_details, stderr_lines):
@@ -150,10 +150,11 @@ def get_child_details(cls, env_vars):
150150
The child is run in isolated mode if the current interpreter supports
151151
that.
152152
"""
153-
result, py_cmd = run_python_until_end(
154-
"-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT,
155-
**env_vars
156-
)
153+
args = []
154+
if xoption:
155+
args.extend(("-X", f"coerce_c_locale={xoption}"))
156+
args.extend(("-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT))
157+
result, py_cmd = run_python_until_end(*args, **env_vars)
157158
if not result.rc == 0:
158159
result.fail(py_cmd)
159160
# All subprocess outputs in this test case should be pure ASCII
@@ -212,15 +213,16 @@ def _check_child_encoding_details(self,
212213
expected_fs_encoding,
213214
expected_stream_encoding,
214215
expected_warnings,
215-
coercion_expected):
216+
coercion_expected,
217+
xoption=None):
216218
"""Check the C locale handling for the given process environment
217219
218220
Parameters:
219221
expected_fs_encoding: expected sys.getfilesystemencoding() result
220222
expected_stream_encoding: expected encoding for standard streams
221223
expected_warning: stderr output to expect (if any)
222224
"""
223-
result = EncodingDetails.get_child_details(env_vars)
225+
result = EncodingDetails.get_child_details(env_vars, xoption)
224226
encoding_details, stderr_lines = result
225227
expected_details = EncodingDetails.get_expected_details(
226228
coercion_expected,
@@ -290,6 +292,7 @@ def _check_c_locale_coercion(self,
290292
coerce_c_locale,
291293
expected_warnings=None,
292294
coercion_expected=True,
295+
use_xoption=False,
293296
**extra_vars):
294297
"""Check the C locale handling for various configurations
295298
@@ -319,8 +322,12 @@ def _check_c_locale_coercion(self,
319322
"PYTHONCOERCECLOCALE": "",
320323
}
321324
base_var_dict.update(extra_vars)
325+
xoption = None
322326
if coerce_c_locale is not None:
323-
base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
327+
if use_xoption:
328+
xoption = coerce_c_locale
329+
else:
330+
base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
324331

325332
# Check behaviour for the default locale
326333
with self.subTest(default_locale=True,
@@ -342,7 +349,8 @@ def _check_c_locale_coercion(self,
342349
fs_encoding,
343350
stream_encoding,
344351
_expected_warnings,
345-
_coercion_expected)
352+
_coercion_expected,
353+
xoption=xoption)
346354

347355
# Check behaviour for explicitly configured locales
348356
for locale_to_set in EXPECTED_C_LOCALE_EQUIVALENTS:
@@ -357,7 +365,8 @@ def _check_c_locale_coercion(self,
357365
fs_encoding,
358366
stream_encoding,
359367
expected_warnings,
360-
coercion_expected)
368+
coercion_expected,
369+
xoption=xoption)
361370

362371
def test_PYTHONCOERCECLOCALE_not_set(self):
363372
# This should coerce to the first available target locale by default
@@ -404,6 +413,32 @@ def test_LC_ALL_set_to_C(self):
404413
expected_warnings=[LEGACY_LOCALE_WARNING],
405414
coercion_expected=False)
406415

416+
def test_xoption_set_to_1(self):
417+
self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale="1",
418+
use_xoption=True)
419+
420+
def test_xoption_set_to_zero(self):
421+
# The setting "0" should result in the locale coercion being disabled
422+
self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
423+
EXPECTED_C_LOCALE_STREAM_ENCODING,
424+
coerce_c_locale="0",
425+
coercion_expected=False,
426+
use_xoption=True)
427+
# Setting LC_ALL=C shouldn't make any difference to the behaviour
428+
self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
429+
EXPECTED_C_LOCALE_STREAM_ENCODING,
430+
coerce_c_locale="0",
431+
LC_ALL="C",
432+
coercion_expected=False,
433+
use_xoption=True)
434+
435+
def test_xoption_set_to_warn(self):
436+
# -X coerce_c_locale=warn enables runtime warnings for legacy locales
437+
self._check_c_locale_coercion("utf-8", "utf-8",
438+
coerce_c_locale="warn",
439+
expected_warnings=[CLI_COERCION_WARNING],
440+
use_xoption=True)
441+
407442
def test_main():
408443
test.support.run_unittest(
409444
LocaleConfigurationTests,

Lib/test/test_cmd_line.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,13 +159,16 @@ def test_undecodable_code(self):
159159
env = os.environ.copy()
160160
# Use C locale to get ascii for the locale encoding
161161
env['LC_ALL'] = 'C'
162-
env['PYTHONCOERCECLOCALE'] = '0'
163162
code = (
164163
b'import locale; '
165164
b'print(ascii("' + undecodable + b'"), '
166165
b'locale.getpreferredencoding())')
167166
p = subprocess.Popen(
168-
[sys.executable, "-c", code],
167+
[sys.executable,
168+
# Disable C locale coercion and UTF-8 Mode to not use UTF-8
169+
"-X", "coerce_c_locale=0",
170+
"-X", "utf8=0",
171+
"-c", code],
169172
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
170173
env=env)
171174
stdout, stderr = p.communicate()

Lib/test/test_sys.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -656,9 +656,8 @@ def test_getfilesystemencoding(self):
656656

657657
def c_locale_get_error_handler(self, locale, isolated=False, encoding=None):
658658
# Force the POSIX locale
659-
env = os.environ.copy()
659+
env = dict(os.environ)
660660
env["LC_ALL"] = locale
661-
env["PYTHONCOERCECLOCALE"] = "0"
662661
code = '\n'.join((
663662
'import sys',
664663
'def dump(name):',
@@ -668,7 +667,10 @@ def c_locale_get_error_handler(self, locale, isolated=False, encoding=None):
668667
'dump("stdout")',
669668
'dump("stderr")',
670669
))
671-
args = [sys.executable, "-X", "utf8=0", "-c", code]
670+
args = [sys.executable,
671+
"-X", "utf8=0",
672+
"-X", "coerce_c_locale=0",
673+
"-c", code]
672674
if isolated:
673675
args.append("-I")
674676
if encoding is not None:

Lib/test/test_utf8_mode.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ def posix_locale(self):
2727
return (loc in POSIX_LOCALES)
2828

2929
def get_output(self, *args, failure=False, **kw):
30+
# Always disable the C locale coercion (PEP 538)
31+
args = ('-X', 'coerce_c_locale=0', *args)
3032
kw = dict(self.DEFAULT_ENV, **kw)
3133
if failure:
3234
out = assert_python_failure(*args, **kw)
@@ -116,7 +118,6 @@ def test_filesystemencoding(self):
116118
# PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
117119
# and has the priority over -X utf8 and PYTHONUTF8
118120
out = self.get_output('-X', 'utf8', '-c', code,
119-
PYTHONUTF8='strict',
120121
PYTHONLEGACYWINDOWSFSENCODING='1')
121122
self.assertEqual(out, 'mbcs/replace')
122123

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add a new :option:`-X` ``coerce_c_locale`` command line option to control C
2+
locale coercion (:pep:`538`).

Python/coreconfig.c

Lines changed: 79 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,17 @@ config_init_utf8_mode(_PyCoreConfig *config)
705705
return _Py_INIT_OK();
706706
}
707707

708+
#ifndef MS_WINDOWS
709+
/* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
710+
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
711+
if (ctype_loc != NULL
712+
&& (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0))
713+
{
714+
config->utf8_mode = 1;
715+
return _Py_INIT_OK();
716+
}
717+
#endif
718+
708719
return _Py_INIT_OK();
709720
}
710721

@@ -808,25 +819,6 @@ config_read_env_vars(_PyCoreConfig *config)
808819
config->malloc_stats = 1;
809820
}
810821

811-
const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE");
812-
if (env) {
813-
if (strcmp(env, "0") == 0) {
814-
if (config->_coerce_c_locale < 0) {
815-
config->_coerce_c_locale = 0;
816-
}
817-
}
818-
else if (strcmp(env, "warn") == 0) {
819-
if (config->_coerce_c_locale_warn < 0) {
820-
config->_coerce_c_locale_warn = 1;
821-
}
822-
}
823-
else {
824-
if (config->_coerce_c_locale < 0) {
825-
config->_coerce_c_locale = 1;
826-
}
827-
}
828-
}
829-
830822
wchar_t *path;
831823
int res = _PyCoreConfig_GetEnvDup(config, &path,
832824
L"PYTHONPATH", "PYTHONPATH");
@@ -966,28 +958,76 @@ config_read_complex_options(_PyCoreConfig *config)
966958
}
967959

968960

969-
static void
970-
config_init_locale(_PyCoreConfig *config)
961+
static _PyInitError
962+
config_init_coerce_c_locale(_PyCoreConfig *config)
971963
{
964+
const wchar_t *xopt = config_get_xoption(config, L"coerce_c_locale");
965+
if (xopt) {
966+
wchar_t *sep = wcschr(xopt, L'=');
967+
if (sep) {
968+
xopt = sep + 1;
969+
if (wcscmp(xopt, L"1") == 0) {
970+
if (config->_coerce_c_locale < 0) {
971+
config->_coerce_c_locale = 1;
972+
}
973+
}
974+
else if (wcscmp(xopt, L"0") == 0) {
975+
if (config->_coerce_c_locale < 0) {
976+
config->_coerce_c_locale = 0;
977+
}
978+
}
979+
else if (wcscmp(xopt, L"warn") == 0) {
980+
if (config->_coerce_c_locale_warn < 0) {
981+
config->_coerce_c_locale_warn = 1;
982+
}
983+
}
984+
else {
985+
return _Py_INIT_USER_ERR("invalid -X coerce_c_locale option value");
986+
}
987+
}
988+
else {
989+
if (config->_coerce_c_locale < 0) {
990+
config->_coerce_c_locale = 1;
991+
}
992+
}
993+
994+
if (config->_coerce_c_locale_warn < 0) {
995+
config->_coerce_c_locale_warn = 0;
996+
}
997+
}
998+
999+
const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE");
1000+
if (env) {
1001+
if (strcmp(env, "0") == 0) {
1002+
if (config->_coerce_c_locale < 0) {
1003+
config->_coerce_c_locale = 0;
1004+
}
1005+
}
1006+
else if (strcmp(env, "warn") == 0) {
1007+
if (config->_coerce_c_locale_warn < 0) {
1008+
config->_coerce_c_locale_warn = 1;
1009+
}
1010+
}
1011+
else {
1012+
if (config->_coerce_c_locale < 0) {
1013+
config->_coerce_c_locale = 1;
1014+
}
1015+
}
1016+
1017+
if (config->_coerce_c_locale_warn < 0) {
1018+
config->_coerce_c_locale_warn = 0;
1019+
}
1020+
}
1021+
9721022
if (config->_coerce_c_locale < 0) {
9731023
/* The C locale enables the C locale coercion (PEP 538) */
9741024
if (_Py_LegacyLocaleDetected()) {
9751025
config->_coerce_c_locale = 1;
1026+
return _Py_INIT_OK();
9761027
}
9771028
}
9781029

979-
#ifndef MS_WINDOWS
980-
if (config->utf8_mode < 0) {
981-
/* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
982-
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
983-
if (ctype_loc != NULL
984-
&& (strcmp(ctype_loc, "C") == 0
985-
|| strcmp(ctype_loc, "POSIX") == 0))
986-
{
987-
config->utf8_mode = 1;
988-
}
989-
}
990-
#endif
1030+
return _Py_INIT_OK();
9911031
}
9921032

9931033

@@ -1293,8 +1333,11 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
12931333
}
12941334
}
12951335

1296-
if (config->utf8_mode < 0 || config->_coerce_c_locale < 0) {
1297-
config_init_locale(config);
1336+
if (config->_coerce_c_locale < 0 || config->_coerce_c_locale_warn < 0) {
1337+
err = config_init_coerce_c_locale(config);
1338+
if (_Py_INIT_FAILED(err)) {
1339+
return err;
1340+
}
12981341
}
12991342

13001343
if (config->_install_importlib) {
@@ -1349,6 +1392,7 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
13491392
}
13501393

13511394
assert(config->_coerce_c_locale >= 0);
1395+
assert(config->_coerce_c_locale_warn >= 0);
13521396
assert(config->use_environment >= 0);
13531397
assert(config->filesystem_encoding != NULL);
13541398
assert(config->filesystem_errors != NULL);

0 commit comments

Comments
 (0)