Skip to content

Commit e251095

Browse files
authored
bpo-36775: Add _Py_FORCE_UTF8_FS_ENCODING macro (GH-13056)
Add _Py_FORCE_UTF8_LOCALE and _Py_FORCE_UTF8_FS_ENCODING macros to avoid factorize "#if defined(__ANDROID__) || defined(__VXWORKS__)" and "#if defined(__APPLE__)". Cleanup also config_init_fs_encoding().
1 parent c4e78b1 commit e251095

File tree

5 files changed

+48
-57
lines changed

5 files changed

+48
-57
lines changed

Include/pyport.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -819,4 +819,14 @@ extern _invalid_parameter_handler _Py_silent_invalid_parameter_handler;
819819
# error "Py_TRACE_REFS ABI is not compatible with release and debug ABI"
820820
#endif
821821

822+
#if defined(__ANDROID__) || defined(__VXWORKS__)
823+
/* Ignore the locale encoding: force UTF-8 */
824+
# define _Py_FORCE_UTF8_LOCALE
825+
#endif
826+
827+
#if defined(_Py_FORCE_UTF8_LOCALE) || defined(__APPLE__)
828+
/* Use UTF-8 as filesystem encoding */
829+
# define _Py_FORCE_UTF8_FS_ENCODING
830+
#endif
831+
822832
#endif /* Py_PYPORT_H */

Objects/unicodeobject.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3506,7 +3506,7 @@ PyUnicode_EncodeFSDefault(PyObject *unicode)
35063506
{
35073507
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
35083508
const _PyCoreConfig *config = &interp->core_config;
3509-
#if defined(__APPLE__)
3509+
#ifdef _Py_FORCE_UTF8_FS_ENCODING
35103510
return _PyUnicode_AsUTF8String(unicode, config->filesystem_errors);
35113511
#else
35123512
/* Bootstrap check: if the filesystem codec is implemented in Python, we
@@ -3730,7 +3730,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
37303730
{
37313731
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
37323732
const _PyCoreConfig *config = &interp->core_config;
3733-
#if defined(__APPLE__)
3733+
#ifdef _Py_FORCE_UTF8_FS_ENCODING
37343734
return PyUnicode_DecodeUTF8Stateful(s, size, config->filesystem_errors, NULL);
37353735
#else
37363736
/* Bootstrap check: if the filesystem codec is implemented in Python, we

Python/coreconfig.c

Lines changed: 27 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,7 +1313,7 @@ config_get_locale_encoding(char **locale_encoding)
13131313
#ifdef MS_WINDOWS
13141314
char encoding[20];
13151315
PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP());
1316-
#elif defined(__ANDROID__) || defined(__VXWORKS__)
1316+
#elif defined(_Py_FORCE_UTF8_LOCALE)
13171317
const char *encoding = "UTF-8";
13181318
#else
13191319
const char *encoding = nl_langinfo(CODESET);
@@ -1450,81 +1450,63 @@ config_init_fs_encoding(_PyCoreConfig *config, const _PyPreConfig *preconfig)
14501450
{
14511451
_PyInitError err;
14521452

1453-
#ifdef MS_WINDOWS
1454-
if (preconfig->legacy_windows_fs_encoding) {
1455-
/* Legacy Windows filesystem encoding: mbcs/replace */
1456-
if (config->filesystem_encoding == NULL) {
1457-
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
1458-
"mbcs");
1459-
if (_Py_INIT_FAILED(err)) {
1460-
return err;
1461-
}
1462-
}
1463-
if (config->filesystem_errors == NULL) {
1464-
err = _PyCoreConfig_SetString(&config->filesystem_errors,
1465-
"replace");
1466-
if (_Py_INIT_FAILED(err)) {
1467-
return err;
1468-
}
1469-
}
1470-
}
1471-
1472-
/* Windows defaults to utf-8/surrogatepass (PEP 529).
1473-
1474-
Note: UTF-8 Mode takes the same code path and the Legacy Windows FS
1475-
encoding has the priortiy over UTF-8 Mode. */
14761453
if (config->filesystem_encoding == NULL) {
1454+
#ifdef _Py_FORCE_UTF8_FS_ENCODING
14771455
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
14781456
"utf-8");
1479-
if (_Py_INIT_FAILED(err)) {
1480-
return err;
1481-
}
1482-
}
1457+
#else
14831458

1484-
if (config->filesystem_errors == NULL) {
1485-
err = _PyCoreConfig_SetString(&config->filesystem_errors,
1486-
"surrogatepass");
1487-
if (_Py_INIT_FAILED(err)) {
1488-
return err;
1459+
#ifdef MS_WINDOWS
1460+
if (preconfig->legacy_windows_fs_encoding) {
1461+
/* Legacy Windows filesystem encoding: mbcs/replace */
1462+
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
1463+
"mbcs");
14891464
}
1490-
}
1491-
#else
1492-
if (config->filesystem_encoding == NULL) {
1465+
else
1466+
#endif
14931467
if (preconfig->utf8_mode) {
1494-
/* UTF-8 Mode use: utf-8/surrogateescape */
14951468
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
14961469
"utf-8");
1497-
/* errors defaults to surrogateescape above */
14981470
}
1471+
#ifndef MS_WINDOWS
14991472
else if (_Py_GetForceASCII()) {
15001473
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
15011474
"ascii");
15021475
}
1476+
#endif
15031477
else {
1504-
/* macOS and Android use UTF-8,
1505-
other platforms use the locale encoding. */
1506-
#if defined(__APPLE__) || defined(__ANDROID__)
1478+
#ifdef MS_WINDOWS
1479+
/* Windows defaults to utf-8/surrogatepass (PEP 529). */
15071480
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
15081481
"utf-8");
15091482
#else
15101483
err = config_get_locale_encoding(&config->filesystem_encoding);
15111484
#endif
15121485
}
1486+
#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
15131487

15141488
if (_Py_INIT_FAILED(err)) {
15151489
return err;
15161490
}
15171491
}
15181492

15191493
if (config->filesystem_errors == NULL) {
1520-
/* by default, use the "surrogateescape" error handler */
1521-
err = _PyCoreConfig_SetString(&config->filesystem_errors,
1522-
"surrogateescape");
1494+
const char *errors;
1495+
#ifdef MS_WINDOWS
1496+
if (preconfig->legacy_windows_fs_encoding) {
1497+
errors = "replace";
1498+
}
1499+
else {
1500+
errors = "surrogatepass";
1501+
}
1502+
#else
1503+
errors = "surrogateescape";
1504+
#endif
1505+
err = _PyCoreConfig_SetString(&config->filesystem_errors, errors);
15231506
if (_Py_INIT_FAILED(err)) {
15241507
return err;
15251508
}
15261509
}
1527-
#endif
15281510
return _Py_INIT_OK();
15291511
}
15301512

Python/fileutils.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ _Py_device_encoding(int fd)
8585
Py_RETURN_NONE;
8686
}
8787

88-
#if !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS)
88+
#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
8989

9090
#define USE_FORCE_ASCII
9191

@@ -309,7 +309,7 @@ _Py_ResetForceASCII(void)
309309
{
310310
/* nothing to do */
311311
}
312-
#endif /* !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS) */
312+
#endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
313313

314314

315315
#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
@@ -536,15 +536,15 @@ _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
536536
int current_locale, _Py_error_handler errors)
537537
{
538538
if (current_locale) {
539-
#if defined(__ANDROID__) || defined(__VXWORKS__)
539+
#ifdef _Py_FORCE_UTF8_LOCALE
540540
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
541541
errors);
542542
#else
543543
return decode_current_locale(arg, wstr, wlen, reason, errors);
544544
#endif
545545
}
546546

547-
#if defined(__APPLE__) || defined(__ANDROID__) || defined(__VXWORKS__)
547+
#ifdef _Py_FORCE_UTF8_FS_ENCODING
548548
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
549549
errors);
550550
#else
@@ -569,7 +569,7 @@ _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
569569
#endif
570570

571571
return decode_current_locale(arg, wstr, wlen, reason, errors);
572-
#endif /* __APPLE__ or __ANDROID__ or __VXWORKS__ */
572+
#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
573573
}
574574

575575

@@ -727,7 +727,7 @@ encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
727727
int raw_malloc, int current_locale, _Py_error_handler errors)
728728
{
729729
if (current_locale) {
730-
#ifdef __ANDROID__
730+
#ifdef _Py_FORCE_UTF8_LOCALE
731731
return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
732732
raw_malloc, errors);
733733
#else
@@ -736,7 +736,7 @@ encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
736736
#endif
737737
}
738738

739-
#if defined(__APPLE__) || defined(__ANDROID__)
739+
#ifdef _Py_FORCE_UTF8_FS_ENCODING
740740
return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
741741
raw_malloc, errors);
742742
#else
@@ -762,7 +762,7 @@ encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
762762

763763
return encode_current_locale(text, str, error_pos, reason,
764764
raw_malloc, errors);
765-
#endif /* __APPLE__ or __ANDROID__ */
765+
#endif /* _Py_FORCE_UTF8_FS_ENCODING */
766766
}
767767

768768
static char*

Python/pylifecycle.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -366,8 +366,7 @@ _Py_CoerceLegacyLocale(int warn)
366366
const char *new_locale = setlocale(LC_CTYPE,
367367
target->locale_name);
368368
if (new_locale != NULL) {
369-
#if !defined(__APPLE__) && !defined(__ANDROID__) && \
370-
defined(HAVE_LANGINFO_H) && defined(CODESET)
369+
#if !defined(_Py_FORCE_UTF8_LOCALE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
371370
/* Also ensure that nl_langinfo works in this locale */
372371
char *codeset = nl_langinfo(CODESET);
373372
if (!codeset || *codeset == '\0') {

0 commit comments

Comments
 (0)