Skip to content

Commit 0b9ea4b

Browse files
authored
[3.7] bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986) (GH-8987)
* bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986) Standard streams like sys.stdout now use the "surrogateescape" error handler, instead of "strict", on the POSIX locale (when the C locale is not coerced and the UTF-8 Mode is disabled). Add tests on sys.stdout.errors with LC_ALL=POSIX. Fix the error handler of standard streams like sys.stdout: PYTHONIOENCODING=":" is now ignored instead of setting the error handler to "strict". (cherry picked from commit 315877d)
1 parent 98c49c6 commit 0b9ea4b

File tree

5 files changed

+60
-26
lines changed

5 files changed

+60
-26
lines changed

Lib/test/test_sys.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -654,10 +654,10 @@ def test_getfilesystemencoding(self):
654654
expected = None
655655
self.check_fsencoding(fs_encoding, expected)
656656

657-
def c_locale_get_error_handler(self, isolated=False, encoding=None):
657+
def c_locale_get_error_handler(self, locale, isolated=False, encoding=None):
658658
# Force the POSIX locale
659659
env = os.environ.copy()
660-
env["LC_ALL"] = "C"
660+
env["LC_ALL"] = locale
661661
env["PYTHONCOERCECLOCALE"] = "0"
662662
code = '\n'.join((
663663
'import sys',
@@ -683,44 +683,50 @@ def c_locale_get_error_handler(self, isolated=False, encoding=None):
683683
stdout, stderr = p.communicate()
684684
return stdout
685685

686-
def test_c_locale_surrogateescape(self):
687-
out = self.c_locale_get_error_handler(isolated=True)
686+
def check_locale_surrogateescape(self, locale):
687+
out = self.c_locale_get_error_handler(locale, isolated=True)
688688
self.assertEqual(out,
689689
'stdin: surrogateescape\n'
690690
'stdout: surrogateescape\n'
691691
'stderr: backslashreplace\n')
692692

693693
# replace the default error handler
694-
out = self.c_locale_get_error_handler(encoding=':ignore')
694+
out = self.c_locale_get_error_handler(locale, encoding=':ignore')
695695
self.assertEqual(out,
696696
'stdin: ignore\n'
697697
'stdout: ignore\n'
698698
'stderr: backslashreplace\n')
699699

700700
# force the encoding
701-
out = self.c_locale_get_error_handler(encoding='iso8859-1')
701+
out = self.c_locale_get_error_handler(locale, encoding='iso8859-1')
702702
self.assertEqual(out,
703703
'stdin: strict\n'
704704
'stdout: strict\n'
705705
'stderr: backslashreplace\n')
706-
out = self.c_locale_get_error_handler(encoding='iso8859-1:')
706+
out = self.c_locale_get_error_handler(locale, encoding='iso8859-1:')
707707
self.assertEqual(out,
708708
'stdin: strict\n'
709709
'stdout: strict\n'
710710
'stderr: backslashreplace\n')
711711

712712
# have no any effect
713-
out = self.c_locale_get_error_handler(encoding=':')
713+
out = self.c_locale_get_error_handler(locale, encoding=':')
714714
self.assertEqual(out,
715-
'stdin: strict\n'
716-
'stdout: strict\n'
715+
'stdin: surrogateescape\n'
716+
'stdout: surrogateescape\n'
717717
'stderr: backslashreplace\n')
718-
out = self.c_locale_get_error_handler(encoding='')
718+
out = self.c_locale_get_error_handler(locale, encoding='')
719719
self.assertEqual(out,
720720
'stdin: surrogateescape\n'
721721
'stdout: surrogateescape\n'
722722
'stderr: backslashreplace\n')
723723

724+
def test_c_locale_surrogateescape(self):
725+
self.check_locale_surrogateescape('C')
726+
727+
def test_posix_locale_surrogateescape(self):
728+
self.check_locale_surrogateescape('POSIX')
729+
724730
def test_implementation(self):
725731
# This test applies to all implementations equally.
726732

Lib/test/test_utf8_mode.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,9 @@ def test_stdio(self):
146146
out = self.get_output('-X', 'utf8', '-c', code,
147147
PYTHONIOENCODING=":namereplace")
148148
self.assertEqual(out.splitlines(),
149-
['stdin: UTF-8/namereplace',
150-
'stdout: UTF-8/namereplace',
151-
'stderr: UTF-8/backslashreplace'])
149+
['stdin: utf-8/namereplace',
150+
'stdout: utf-8/namereplace',
151+
'stderr: utf-8/backslashreplace'])
152152

153153
def test_io(self):
154154
code = textwrap.dedent('''
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix the error handler of standard streams like sys.stdout:
2+
PYTHONIOENCODING=":" is now ignored instead of setting the error handler to
3+
"strict".
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Standard streams like sys.stdout now use the "surrogateescape" error
2+
handler, instead of "strict", on the POSIX locale (when the C locale is not
3+
coerced and the UTF-8 Mode is disabled).

Python/pylifecycle.c

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -423,13 +423,13 @@ get_default_standard_stream_error_handler(void)
423423
{
424424
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
425425
if (ctype_loc != NULL) {
426-
/* "surrogateescape" is the default in the legacy C locale */
427-
if (strcmp(ctype_loc, "C") == 0) {
426+
/* surrogateescape is the default in the legacy C and POSIX locales */
427+
if (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0) {
428428
return "surrogateescape";
429429
}
430430

431431
#ifdef PY_COERCE_C_LOCALE
432-
/* "surrogateescape" is the default in locale coercion target locales */
432+
/* surrogateescape is the default in locale coercion target locales */
433433
const _LocaleCoercionTarget *target = NULL;
434434
for (target = _TARGET_LOCALES; target->locale_name; target++) {
435435
if (strcmp(ctype_loc, target->locale_name) == 0) {
@@ -440,7 +440,7 @@ get_default_standard_stream_error_handler(void)
440440
}
441441

442442
/* Otherwise return NULL to request the typical default error handler */
443-
return NULL;
443+
return "strict";
444444
}
445445

446446
#ifdef PY_COERCE_C_LOCALE
@@ -1851,20 +1851,42 @@ init_sys_streams(PyInterpreterState *interp)
18511851
if (err) {
18521852
*err = '\0';
18531853
err++;
1854-
if (*err && !errors) {
1855-
errors = err;
1854+
if (!err[0]) {
1855+
err = NULL;
18561856
}
18571857
}
1858-
if (*pythonioencoding && !encoding) {
1859-
encoding = pythonioencoding;
1858+
1859+
/* Does PYTHONIOENCODING contain an encoding? */
1860+
if (pythonioencoding[0]) {
1861+
if (!encoding) {
1862+
encoding = pythonioencoding;
1863+
}
1864+
1865+
/* If the encoding is set but not the error handler,
1866+
use "strict" error handler by default.
1867+
PYTHONIOENCODING=latin1 behaves as
1868+
PYTHONIOENCODING=latin1:strict. */
1869+
if (!err) {
1870+
err = "strict";
1871+
}
1872+
}
1873+
1874+
if (!errors && err != NULL) {
1875+
errors = err;
18601876
}
18611877
}
1862-
else if (interp->core_config.utf8_mode) {
1863-
encoding = "utf-8";
1864-
errors = "surrogateescape";
1878+
1879+
if (interp->core_config.utf8_mode) {
1880+
if (!encoding) {
1881+
encoding = "utf-8";
1882+
}
1883+
if (!errors) {
1884+
errors = "surrogateescape";
1885+
}
18651886
}
18661887

1867-
if (!errors && !pythonioencoding) {
1888+
1889+
if (!errors) {
18681890
/* Choose the default error handler based on the current locale */
18691891
errors = get_default_standard_stream_error_handler();
18701892
}

0 commit comments

Comments
 (0)