Skip to content

Commit 315877d

Browse files
authored
bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986)
Standard streams like sys.stdout now use the "surrogateescape" error handler, instead of "strict", on the POSIX locale (when the C locale is not coerced and the UTF-8 Mode is disabled). Add tests on sys.stdout.errors with LC_ALL=POSIX.
1 parent 21786f5 commit 315877d

File tree

3 files changed

+40
-18
lines changed

3 files changed

+40
-18
lines changed

Lib/test/test_sys.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -654,10 +654,10 @@ def test_getfilesystemencoding(self):
654654
expected = None
655655
self.check_fsencoding(fs_encoding, expected)
656656

657-
def c_locale_get_error_handler(self, isolated=False, encoding=None):
657+
def c_locale_get_error_handler(self, locale, isolated=False, encoding=None):
658658
# Force the POSIX locale
659659
env = os.environ.copy()
660-
env["LC_ALL"] = "C"
660+
env["LC_ALL"] = locale
661661
env["PYTHONCOERCECLOCALE"] = "0"
662662
code = '\n'.join((
663663
'import sys',
@@ -683,44 +683,50 @@ def c_locale_get_error_handler(self, isolated=False, encoding=None):
683683
stdout, stderr = p.communicate()
684684
return stdout
685685

686-
def test_c_locale_surrogateescape(self):
687-
out = self.c_locale_get_error_handler(isolated=True)
686+
def check_locale_surrogateescape(self, locale):
687+
out = self.c_locale_get_error_handler(locale, isolated=True)
688688
self.assertEqual(out,
689689
'stdin: surrogateescape\n'
690690
'stdout: surrogateescape\n'
691691
'stderr: backslashreplace\n')
692692

693693
# replace the default error handler
694-
out = self.c_locale_get_error_handler(encoding=':ignore')
694+
out = self.c_locale_get_error_handler(locale, encoding=':ignore')
695695
self.assertEqual(out,
696696
'stdin: ignore\n'
697697
'stdout: ignore\n'
698698
'stderr: backslashreplace\n')
699699

700700
# force the encoding
701-
out = self.c_locale_get_error_handler(encoding='iso8859-1')
701+
out = self.c_locale_get_error_handler(locale, encoding='iso8859-1')
702702
self.assertEqual(out,
703703
'stdin: strict\n'
704704
'stdout: strict\n'
705705
'stderr: backslashreplace\n')
706-
out = self.c_locale_get_error_handler(encoding='iso8859-1:')
706+
out = self.c_locale_get_error_handler(locale, encoding='iso8859-1:')
707707
self.assertEqual(out,
708708
'stdin: strict\n'
709709
'stdout: strict\n'
710710
'stderr: backslashreplace\n')
711711

712712
# have no any effect
713-
out = self.c_locale_get_error_handler(encoding=':')
713+
out = self.c_locale_get_error_handler(locale, encoding=':')
714714
self.assertEqual(out,
715715
'stdin: surrogateescape\n'
716716
'stdout: surrogateescape\n'
717717
'stderr: backslashreplace\n')
718-
out = self.c_locale_get_error_handler(encoding='')
718+
out = self.c_locale_get_error_handler(locale, encoding='')
719719
self.assertEqual(out,
720720
'stdin: surrogateescape\n'
721721
'stdout: surrogateescape\n'
722722
'stderr: backslashreplace\n')
723723

724+
def test_c_locale_surrogateescape(self):
725+
self.check_locale_surrogateescape('C')
726+
727+
def test_posix_locale_surrogateescape(self):
728+
self.check_locale_surrogateescape('POSIX')
729+
724730
def test_implementation(self):
725731
# This test applies to all implementations equally.
726732

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Standard streams like sys.stdout now use the "surrogateescape" error
2+
handler, instead of "strict", on the POSIX locale (when the C locale is not
3+
coerced and the UTF-8 Mode is disabled).

Python/pylifecycle.c

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -345,13 +345,13 @@ get_stdio_errors(void)
345345
{
346346
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
347347
if (ctype_loc != NULL) {
348-
/* "surrogateescape" is the default in the legacy C locale */
349-
if (strcmp(ctype_loc, "C") == 0) {
348+
/* surrogateescape is the default in the legacy C and POSIX locales */
349+
if (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0) {
350350
return "surrogateescape";
351351
}
352352

353353
#ifdef PY_COERCE_C_LOCALE
354-
/* "surrogateescape" is the default in locale coercion target locales */
354+
/* surrogateescape is the default in locale coercion target locales */
355355
const _LocaleCoercionTarget *target = NULL;
356356
for (target = _TARGET_LOCALES; target->locale_name; target++) {
357357
if (strcmp(ctype_loc, target->locale_name) == 0) {
@@ -1791,16 +1791,29 @@ init_sys_streams(PyInterpreterState *interp)
17911791
if (err) {
17921792
*err = '\0';
17931793
err++;
1794-
if (*err && !errors) {
1795-
errors = err;
1794+
if (!err[0]) {
1795+
err = NULL;
17961796
}
17971797
}
1798-
if (!encoding && *pythonioencoding) {
1799-
encoding = pythonioencoding;
1800-
if (!errors) {
1801-
errors = "strict";
1798+
1799+
/* Does PYTHONIOENCODING contain an encoding? */
1800+
if (pythonioencoding[0]) {
1801+
if (!encoding) {
1802+
encoding = pythonioencoding;
1803+
}
1804+
1805+
/* If the encoding is set but not the error handler,
1806+
use "strict" error handler by default.
1807+
PYTHONIOENCODING=latin1 behaves as
1808+
PYTHONIOENCODING=latin1:strict. */
1809+
if (!err) {
1810+
err = "strict";
18021811
}
18031812
}
1813+
1814+
if (!errors && err != NULL) {
1815+
errors = err;
1816+
}
18041817
}
18051818

18061819
if (interp->core_config.utf8_mode) {

0 commit comments

Comments
 (0)