Skip to content

Commit 43fc3bb

Browse files
authored
bpo-36775: Add _PyUnicode_InitEncodings() (GH-13057)
Move get_codec_name() and initfsencoding() from pylifecycle.c to unicodeobject.c. Rename also "init" functions in pylifecycle.c.
1 parent e251095 commit 43fc3bb

File tree

3 files changed

+115
-92
lines changed

3 files changed

+115
-92
lines changed

Include/internal/pycore_pylifecycle.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,11 @@ PyAPI_DATA(int) _Py_UnhandledKeyboardInterrupt;
1616

1717
PyAPI_FUNC(int) _Py_UnixMain(int argc, char **argv);
1818

19-
PyAPI_FUNC(int) _Py_SetFileSystemEncoding(
19+
extern int _Py_SetFileSystemEncoding(
2020
const char *encoding,
2121
const char *errors);
22-
PyAPI_FUNC(void) _Py_ClearFileSystemEncoding(void);
22+
extern void _Py_ClearFileSystemEncoding(void);
23+
extern _PyInitError _PyUnicode_InitEncodings(PyInterpreterState *interp);
2324

2425
PyAPI_FUNC(void) _Py_ClearStandardStreamEncoding(void);
2526

Objects/unicodeobject.c

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
4242
#include "Python.h"
4343
#include "pycore_fileutils.h"
4444
#include "pycore_object.h"
45+
#include "pycore_pylifecycle.h"
4546
#include "pycore_pystate.h"
4647
#include "ucnhash.h"
4748
#include "bytes_methods.h"
@@ -15574,6 +15575,102 @@ PyUnicode_AsUnicodeCopy(PyObject *unicode)
1557415575
}
1557515576

1557615577

15578+
static char*
15579+
get_codec_name(const char *encoding)
15580+
{
15581+
PyObject *codec, *name_obj = NULL;
15582+
15583+
codec = _PyCodec_Lookup(encoding);
15584+
if (!codec)
15585+
goto error;
15586+
15587+
name_obj = PyObject_GetAttrString(codec, "name");
15588+
Py_CLEAR(codec);
15589+
if (!name_obj) {
15590+
goto error;
15591+
}
15592+
15593+
const char *name_utf8 = PyUnicode_AsUTF8(name_obj);
15594+
if (name_utf8 == NULL) {
15595+
goto error;
15596+
}
15597+
15598+
char *name = _PyMem_RawStrdup(name_utf8);
15599+
Py_DECREF(name_obj);
15600+
if (name == NULL) {
15601+
PyErr_NoMemory();
15602+
return NULL;
15603+
}
15604+
return name;
15605+
15606+
error:
15607+
Py_XDECREF(codec);
15608+
Py_XDECREF(name_obj);
15609+
return NULL;
15610+
}
15611+
15612+
15613+
static _PyInitError
15614+
init_stdio_encoding(PyInterpreterState *interp)
15615+
{
15616+
_PyCoreConfig *config = &interp->core_config;
15617+
15618+
char *codec_name = get_codec_name(config->stdio_encoding);
15619+
if (codec_name == NULL) {
15620+
return _Py_INIT_ERR("failed to get the Python codec name "
15621+
"of the stdio encoding");
15622+
}
15623+
PyMem_RawFree(config->stdio_encoding);
15624+
config->stdio_encoding = codec_name;
15625+
return _Py_INIT_OK();
15626+
}
15627+
15628+
15629+
static _PyInitError
15630+
init_fs_encoding(PyInterpreterState *interp)
15631+
{
15632+
_PyCoreConfig *config = &interp->core_config;
15633+
15634+
char *encoding = get_codec_name(config->filesystem_encoding);
15635+
if (encoding == NULL) {
15636+
/* Such error can only occurs in critical situations: no more
15637+
memory, import a module of the standard library failed, etc. */
15638+
return _Py_INIT_ERR("failed to get the Python codec "
15639+
"of the filesystem encoding");
15640+
}
15641+
15642+
/* Update the filesystem encoding to the normalized Python codec name.
15643+
For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
15644+
(Python codec name). */
15645+
PyMem_RawFree(config->filesystem_encoding);
15646+
config->filesystem_encoding = encoding;
15647+
15648+
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
15649+
global configuration variables. */
15650+
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
15651+
config->filesystem_errors) < 0) {
15652+
return _Py_INIT_NO_MEMORY();
15653+
}
15654+
15655+
/* PyUnicode can now use the Python codec rather than C implementation
15656+
for the filesystem encoding */
15657+
interp->fscodec_initialized = 1;
15658+
return _Py_INIT_OK();
15659+
}
15660+
15661+
15662+
_PyInitError
15663+
_PyUnicode_InitEncodings(PyInterpreterState *interp)
15664+
{
15665+
_PyInitError err = init_fs_encoding(interp);
15666+
if (_Py_INIT_FAILED(err)) {
15667+
return err;
15668+
}
15669+
15670+
return init_stdio_encoding(interp);
15671+
}
15672+
15673+
1557715674
void
1557815675
_PyUnicode_Fini(void)
1557915676
{

Python/pylifecycle.c

Lines changed: 15 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,9 @@ extern grammar _PyParser_Grammar; /* From graminit.c */
5959

6060
/* Forward */
6161
static _PyInitError add_main_module(PyInterpreterState *interp);
62-
static _PyInitError initfsencoding(PyInterpreterState *interp);
63-
static _PyInitError initsite(void);
62+
static _PyInitError init_import_size(void);
6463
static _PyInitError init_sys_streams(PyInterpreterState *interp);
65-
static _PyInitError initsigs(void);
64+
static _PyInitError init_signals(void);
6665
static void call_py_exitfuncs(PyInterpreterState *);
6766
static void wait_for_thread_shutdown(void);
6867
static void call_ll_exitfuncs(_PyRuntimeState *runtime);
@@ -144,42 +143,8 @@ Py_IsInitialized(void)
144143
145144
*/
146145

147-
static char*
148-
get_codec_name(const char *encoding)
149-
{
150-
const char *name_utf8;
151-
char *name_str;
152-
PyObject *codec, *name = NULL;
153-
154-
codec = _PyCodec_Lookup(encoding);
155-
if (!codec)
156-
goto error;
157-
158-
name = _PyObject_GetAttrId(codec, &PyId_name);
159-
Py_CLEAR(codec);
160-
if (!name)
161-
goto error;
162-
163-
name_utf8 = PyUnicode_AsUTF8(name);
164-
if (name_utf8 == NULL)
165-
goto error;
166-
name_str = _PyMem_RawStrdup(name_utf8);
167-
Py_DECREF(name);
168-
if (name_str == NULL) {
169-
PyErr_NoMemory();
170-
return NULL;
171-
}
172-
return name_str;
173-
174-
error:
175-
Py_XDECREF(codec);
176-
Py_XDECREF(name);
177-
return NULL;
178-
}
179-
180-
181146
static _PyInitError
182-
initimport(PyInterpreterState *interp, PyObject *sysmod)
147+
init_importlib(PyInterpreterState *interp, PyObject *sysmod)
183148
{
184149
PyObject *importlib;
185150
PyObject *impmod;
@@ -229,7 +194,7 @@ initimport(PyInterpreterState *interp, PyObject *sysmod)
229194
}
230195

231196
static _PyInitError
232-
initexternalimport(PyInterpreterState *interp)
197+
init_importlib_external(PyInterpreterState *interp)
233198
{
234199
PyObject *value;
235200
value = PyObject_CallMethod(interp->importlib,
@@ -661,7 +626,7 @@ pycore_init_import_warnings(PyInterpreterState *interp, PyObject *sysmod)
661626

662627
/* This call sets up builtin and frozen import support */
663628
if (interp->core_config._install_importlib) {
664-
err = initimport(interp, sysmod);
629+
err = init_importlib(interp, sysmod);
665630
if (_Py_INIT_FAILED(err)) {
666631
return err;
667632
}
@@ -940,7 +905,7 @@ _Py_InitializeMainInterpreter(_PyRuntimeState *runtime,
940905
return _Py_INIT_ERR("can't finish initializing sys");
941906
}
942907

943-
_PyInitError err = initexternalimport(interp);
908+
_PyInitError err = init_importlib_external(interp);
944909
if (_Py_INIT_FAILED(err)) {
945910
return err;
946911
}
@@ -951,13 +916,13 @@ _Py_InitializeMainInterpreter(_PyRuntimeState *runtime,
951916
return err;
952917
}
953918

954-
err = initfsencoding(interp);
919+
err = _PyUnicode_InitEncodings(interp);
955920
if (_Py_INIT_FAILED(err)) {
956921
return err;
957922
}
958923

959924
if (core_config->install_signal_handlers) {
960-
err = initsigs(); /* Signal handling stuff, including initintr() */
925+
err = init_signals();
961926
if (_Py_INIT_FAILED(err)) {
962927
return err;
963928
}
@@ -992,7 +957,7 @@ _Py_InitializeMainInterpreter(_PyRuntimeState *runtime,
992957
runtime->initialized = 1;
993958

994959
if (core_config->site_import) {
995-
err = initsite(); /* Module site */
960+
err = init_import_size(); /* Module site */
996961
if (_Py_INIT_FAILED(err)) {
997962
return err;
998963
}
@@ -1497,17 +1462,17 @@ new_interpreter(PyThreadState **tstate_p)
14971462
return err;
14981463
}
14991464

1500-
err = initimport(interp, sysmod);
1465+
err = init_importlib(interp, sysmod);
15011466
if (_Py_INIT_FAILED(err)) {
15021467
return err;
15031468
}
15041469

1505-
err = initexternalimport(interp);
1470+
err = init_importlib_external(interp);
15061471
if (_Py_INIT_FAILED(err)) {
15071472
return err;
15081473
}
15091474

1510-
err = initfsencoding(interp);
1475+
err = _PyUnicode_InitEncodings(interp);
15111476
if (_Py_INIT_FAILED(err)) {
15121477
return err;
15131478
}
@@ -1523,7 +1488,7 @@ new_interpreter(PyThreadState **tstate_p)
15231488
}
15241489

15251490
if (core_config->site_import) {
1526-
err = initsite();
1491+
err = init_import_size();
15271492
if (_Py_INIT_FAILED(err)) {
15281493
return err;
15291494
}
@@ -1649,42 +1614,10 @@ add_main_module(PyInterpreterState *interp)
16491614
return _Py_INIT_OK();
16501615
}
16511616

1652-
static _PyInitError
1653-
initfsencoding(PyInterpreterState *interp)
1654-
{
1655-
_PyCoreConfig *config = &interp->core_config;
1656-
1657-
char *encoding = get_codec_name(config->filesystem_encoding);
1658-
if (encoding == NULL) {
1659-
/* Such error can only occurs in critical situations: no more
1660-
memory, import a module of the standard library failed, etc. */
1661-
return _Py_INIT_ERR("failed to get the Python codec "
1662-
"of the filesystem encoding");
1663-
}
1664-
1665-
/* Update the filesystem encoding to the normalized Python codec name.
1666-
For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
1667-
(Python codec name). */
1668-
PyMem_RawFree(config->filesystem_encoding);
1669-
config->filesystem_encoding = encoding;
1670-
1671-
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
1672-
global configuration variables. */
1673-
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
1674-
config->filesystem_errors) < 0) {
1675-
return _Py_INIT_NO_MEMORY();
1676-
}
1677-
1678-
/* PyUnicode can now use the Python codec rather than C implementation
1679-
for the filesystem encoding */
1680-
interp->fscodec_initialized = 1;
1681-
return _Py_INIT_OK();
1682-
}
1683-
16841617
/* Import the site module (not into __main__ though) */
16851618

16861619
static _PyInitError
1687-
initsite(void)
1620+
init_import_size(void)
16881621
{
16891622
PyObject *m;
16901623
m = PyImport_ImportModule("site");
@@ -1880,14 +1813,6 @@ init_sys_streams(PyInterpreterState *interp)
18801813
}
18811814
#endif
18821815

1883-
char *codec_name = get_codec_name(config->stdio_encoding);
1884-
if (codec_name == NULL) {
1885-
return _Py_INIT_ERR("failed to get the Python codec name "
1886-
"of the stdio encoding");
1887-
}
1888-
PyMem_RawFree(config->stdio_encoding);
1889-
config->stdio_encoding = codec_name;
1890-
18911816
/* Hack to avoid a nasty recursion issue when Python is invoked
18921817
in verbose mode: pre-import the Latin-1 and UTF-8 codecs */
18931818
if ((m = PyImport_ImportModule("encodings.utf_8")) == NULL) {
@@ -2287,7 +2212,7 @@ Py_Exit(int sts)
22872212
}
22882213

22892214
static _PyInitError
2290-
initsigs(void)
2215+
init_signals(void)
22912216
{
22922217
#ifdef SIGPIPE
22932218
PyOS_setsig(SIGPIPE, SIG_IGN);

0 commit comments

Comments
 (0)