Skip to content

bpo-36775: Add _PyUnicode_InitEncodings() #13057

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Include/internal/pycore_pylifecycle.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@ PyAPI_DATA(int) _Py_UnhandledKeyboardInterrupt;

PyAPI_FUNC(int) _Py_UnixMain(int argc, char **argv);

PyAPI_FUNC(int) _Py_SetFileSystemEncoding(
extern int _Py_SetFileSystemEncoding(
const char *encoding,
const char *errors);
PyAPI_FUNC(void) _Py_ClearFileSystemEncoding(void);
extern void _Py_ClearFileSystemEncoding(void);
extern _PyInitError _PyUnicode_InitEncodings(PyInterpreterState *interp);

PyAPI_FUNC(void) _Py_ClearStandardStreamEncoding(void);

Expand Down
97 changes: 97 additions & 0 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include "Python.h"
#include "pycore_fileutils.h"
#include "pycore_object.h"
#include "pycore_pylifecycle.h"
#include "pycore_pystate.h"
#include "ucnhash.h"
#include "bytes_methods.h"
Expand Down Expand Up @@ -15574,6 +15575,102 @@ PyUnicode_AsUnicodeCopy(PyObject *unicode)
}


static char*
get_codec_name(const char *encoding)
{
PyObject *codec, *name_obj = NULL;

codec = _PyCodec_Lookup(encoding);
if (!codec)
goto error;

name_obj = PyObject_GetAttrString(codec, "name");
Py_CLEAR(codec);
if (!name_obj) {
goto error;
}

const char *name_utf8 = PyUnicode_AsUTF8(name_obj);
if (name_utf8 == NULL) {
goto error;
}

char *name = _PyMem_RawStrdup(name_utf8);
Py_DECREF(name_obj);
if (name == NULL) {
PyErr_NoMemory();
return NULL;
}
return name;

error:
Py_XDECREF(codec);
Py_XDECREF(name_obj);
return NULL;
}


static _PyInitError
init_stdio_encoding(PyInterpreterState *interp)
{
_PyCoreConfig *config = &interp->core_config;

char *codec_name = get_codec_name(config->stdio_encoding);
if (codec_name == NULL) {
return _Py_INIT_ERR("failed to get the Python codec name "
"of the stdio encoding");
}
PyMem_RawFree(config->stdio_encoding);
config->stdio_encoding = codec_name;
return _Py_INIT_OK();
}


static _PyInitError
init_fs_encoding(PyInterpreterState *interp)
{
_PyCoreConfig *config = &interp->core_config;

char *encoding = get_codec_name(config->filesystem_encoding);
if (encoding == NULL) {
/* Such error can only occurs in critical situations: no more
memory, import a module of the standard library failed, etc. */
return _Py_INIT_ERR("failed to get the Python codec "
"of the filesystem encoding");
}

/* Update the filesystem encoding to the normalized Python codec name.
For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
(Python codec name). */
PyMem_RawFree(config->filesystem_encoding);
config->filesystem_encoding = encoding;

/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
global configuration variables. */
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
config->filesystem_errors) < 0) {
return _Py_INIT_NO_MEMORY();
}

/* PyUnicode can now use the Python codec rather than C implementation
for the filesystem encoding */
interp->fscodec_initialized = 1;
return _Py_INIT_OK();
}


_PyInitError
_PyUnicode_InitEncodings(PyInterpreterState *interp)
{
_PyInitError err = init_fs_encoding(interp);
if (_Py_INIT_FAILED(err)) {
return err;
}

return init_stdio_encoding(interp);
}


void
_PyUnicode_Fini(void)
{
Expand Down
105 changes: 15 additions & 90 deletions Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,9 @@ extern grammar _PyParser_Grammar; /* From graminit.c */

/* Forward */
static _PyInitError add_main_module(PyInterpreterState *interp);
static _PyInitError initfsencoding(PyInterpreterState *interp);
static _PyInitError initsite(void);
static _PyInitError init_import_size(void);
static _PyInitError init_sys_streams(PyInterpreterState *interp);
static _PyInitError initsigs(void);
static _PyInitError init_signals(void);
static void call_py_exitfuncs(PyInterpreterState *);
static void wait_for_thread_shutdown(void);
static void call_ll_exitfuncs(_PyRuntimeState *runtime);
Expand Down Expand Up @@ -144,42 +143,8 @@ Py_IsInitialized(void)

*/

static char*
get_codec_name(const char *encoding)
{
const char *name_utf8;
char *name_str;
PyObject *codec, *name = NULL;

codec = _PyCodec_Lookup(encoding);
if (!codec)
goto error;

name = _PyObject_GetAttrId(codec, &PyId_name);
Py_CLEAR(codec);
if (!name)
goto error;

name_utf8 = PyUnicode_AsUTF8(name);
if (name_utf8 == NULL)
goto error;
name_str = _PyMem_RawStrdup(name_utf8);
Py_DECREF(name);
if (name_str == NULL) {
PyErr_NoMemory();
return NULL;
}
return name_str;

error:
Py_XDECREF(codec);
Py_XDECREF(name);
return NULL;
}


static _PyInitError
initimport(PyInterpreterState *interp, PyObject *sysmod)
init_importlib(PyInterpreterState *interp, PyObject *sysmod)
{
PyObject *importlib;
PyObject *impmod;
Expand Down Expand Up @@ -229,7 +194,7 @@ initimport(PyInterpreterState *interp, PyObject *sysmod)
}

static _PyInitError
initexternalimport(PyInterpreterState *interp)
init_importlib_external(PyInterpreterState *interp)
{
PyObject *value;
value = PyObject_CallMethod(interp->importlib,
Expand Down Expand Up @@ -661,7 +626,7 @@ pycore_init_import_warnings(PyInterpreterState *interp, PyObject *sysmod)

/* This call sets up builtin and frozen import support */
if (interp->core_config._install_importlib) {
err = initimport(interp, sysmod);
err = init_importlib(interp, sysmod);
if (_Py_INIT_FAILED(err)) {
return err;
}
Expand Down Expand Up @@ -940,7 +905,7 @@ _Py_InitializeMainInterpreter(_PyRuntimeState *runtime,
return _Py_INIT_ERR("can't finish initializing sys");
}

_PyInitError err = initexternalimport(interp);
_PyInitError err = init_importlib_external(interp);
if (_Py_INIT_FAILED(err)) {
return err;
}
Expand All @@ -951,13 +916,13 @@ _Py_InitializeMainInterpreter(_PyRuntimeState *runtime,
return err;
}

err = initfsencoding(interp);
err = _PyUnicode_InitEncodings(interp);
if (_Py_INIT_FAILED(err)) {
return err;
}

if (core_config->install_signal_handlers) {
err = initsigs(); /* Signal handling stuff, including initintr() */
err = init_signals();
if (_Py_INIT_FAILED(err)) {
return err;
}
Expand Down Expand Up @@ -992,7 +957,7 @@ _Py_InitializeMainInterpreter(_PyRuntimeState *runtime,
runtime->initialized = 1;

if (core_config->site_import) {
err = initsite(); /* Module site */
err = init_import_size(); /* Module site */
if (_Py_INIT_FAILED(err)) {
return err;
}
Expand Down Expand Up @@ -1497,17 +1462,17 @@ new_interpreter(PyThreadState **tstate_p)
return err;
}

err = initimport(interp, sysmod);
err = init_importlib(interp, sysmod);
if (_Py_INIT_FAILED(err)) {
return err;
}

err = initexternalimport(interp);
err = init_importlib_external(interp);
if (_Py_INIT_FAILED(err)) {
return err;
}

err = initfsencoding(interp);
err = _PyUnicode_InitEncodings(interp);
if (_Py_INIT_FAILED(err)) {
return err;
}
Expand All @@ -1523,7 +1488,7 @@ new_interpreter(PyThreadState **tstate_p)
}

if (core_config->site_import) {
err = initsite();
err = init_import_size();
if (_Py_INIT_FAILED(err)) {
return err;
}
Expand Down Expand Up @@ -1649,42 +1614,10 @@ add_main_module(PyInterpreterState *interp)
return _Py_INIT_OK();
}

static _PyInitError
initfsencoding(PyInterpreterState *interp)
{
_PyCoreConfig *config = &interp->core_config;

char *encoding = get_codec_name(config->filesystem_encoding);
if (encoding == NULL) {
/* Such error can only occurs in critical situations: no more
memory, import a module of the standard library failed, etc. */
return _Py_INIT_ERR("failed to get the Python codec "
"of the filesystem encoding");
}

/* Update the filesystem encoding to the normalized Python codec name.
For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
(Python codec name). */
PyMem_RawFree(config->filesystem_encoding);
config->filesystem_encoding = encoding;

/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
global configuration variables. */
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
config->filesystem_errors) < 0) {
return _Py_INIT_NO_MEMORY();
}

/* PyUnicode can now use the Python codec rather than C implementation
for the filesystem encoding */
interp->fscodec_initialized = 1;
return _Py_INIT_OK();
}

/* Import the site module (not into __main__ though) */

static _PyInitError
initsite(void)
init_import_size(void)
{
PyObject *m;
m = PyImport_ImportModule("site");
Expand Down Expand Up @@ -1880,14 +1813,6 @@ init_sys_streams(PyInterpreterState *interp)
}
#endif

char *codec_name = get_codec_name(config->stdio_encoding);
if (codec_name == NULL) {
return _Py_INIT_ERR("failed to get the Python codec name "
"of the stdio encoding");
}
PyMem_RawFree(config->stdio_encoding);
config->stdio_encoding = codec_name;

/* Hack to avoid a nasty recursion issue when Python is invoked
in verbose mode: pre-import the Latin-1 and UTF-8 codecs */
if ((m = PyImport_ImportModule("encodings.utf_8")) == NULL) {
Expand Down Expand Up @@ -2287,7 +2212,7 @@ Py_Exit(int sts)
}

static _PyInitError
initsigs(void)
init_signals(void)
{
#ifdef SIGPIPE
PyOS_setsig(SIGPIPE, SIG_IGN);
Expand Down