Skip to content

Commit 91698d8

Browse files
authored
bpo-40521: Optimize PyBytes_FromStringAndSize(str, 0) (GH-21142)
Always create the empty bytes string singleton. Optimize PyBytes_FromStringAndSize(str, 0): it no longer has to check if the empty string singleton was created or not, it is always available. Add functions: * _PyBytes_Init() * bytes_get_empty(), bytes_new_empty() * bytes_create_empty_string_singleton() * unicode_create_empty_string_singleton() _Py_unicode_state: rename empty structure member to empty_string.
1 parent 0f8ec1f commit 91698d8

File tree

5 files changed

+107
-53
lines changed

5 files changed

+107
-53
lines changed

Include/internal/pycore_interp.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,13 @@ struct _Py_unicode_fs_codec {
6666
};
6767

6868
struct _Py_bytes_state {
69+
PyObject *empty_string;
6970
PyBytesObject *characters[256];
70-
PyBytesObject *empty_string;
7171
};
7272

7373
struct _Py_unicode_state {
7474
// The empty Unicode object is a singleton to improve performance.
75-
PyObject *empty;
75+
PyObject *empty_string;
7676
/* Single character Unicode strings in the Latin-1 range are being
7777
shared as well. */
7878
PyObject *latin1[256];

Include/internal/pycore_pylifecycle.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ PyAPI_FUNC(int) _Py_IsLocaleCoercionTarget(const char *ctype_loc);
3232
/* Various one-time initializers */
3333

3434
extern PyStatus _PyUnicode_Init(PyThreadState *tstate);
35+
extern PyStatus _PyBytes_Init(PyThreadState *tstate);
3536
extern int _PyStructSequence_Init(void);
3637
extern int _PyLong_Init(PyThreadState *tstate);
3738
extern PyStatus _PyTuple_Init(PyThreadState *tstate);

Objects/bytesobject.c

Lines changed: 64 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44

55
#include "Python.h"
66
#include "pycore_abstract.h" // _PyIndex_Check()
7-
#include "pycore_bytes_methods.h"
8-
#include "pycore_object.h"
7+
#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
8+
#include "pycore_initconfig.h" // _PyStatus_OK()
9+
#include "pycore_object.h" // _PyObject_GC_TRACK
910
#include "pycore_pymem.h" // PYMEM_CLEANBYTE
1011

1112
#include "pystrhex.h"
@@ -41,6 +42,44 @@ get_bytes_state(void)
4142
}
4243

4344

45+
// Return a borrowed reference to the empty bytes string singleton.
46+
static inline PyObject* bytes_get_empty(void)
47+
{
48+
struct _Py_bytes_state *state = get_bytes_state();
49+
// bytes_get_empty() must not be called before _PyBytes_Init()
50+
// or after _PyBytes_Fini()
51+
assert(state->empty_string != NULL);
52+
return state->empty_string;
53+
}
54+
55+
56+
// Return a strong reference to the empty bytes string singleton.
57+
static inline PyObject* bytes_new_empty(void)
58+
{
59+
PyObject *empty = bytes_get_empty();
60+
Py_INCREF(empty);
61+
return (PyObject *)empty;
62+
}
63+
64+
65+
static int
66+
bytes_create_empty_string_singleton(struct _Py_bytes_state *state)
67+
{
68+
// Create the empty bytes string singleton
69+
PyBytesObject *op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE);
70+
if (op == NULL) {
71+
return -1;
72+
}
73+
_PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, 0);
74+
op->ob_shash = -1;
75+
op->ob_sval[0] = '\0';
76+
77+
assert(state->empty_string == NULL);
78+
state->empty_string = (PyObject *)op;
79+
return 0;
80+
}
81+
82+
4483
/*
4584
For PyBytes_FromString(), the parameter `str' points to a null-terminated
4685
string containing exactly `size' bytes.
@@ -70,12 +109,7 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
70109
assert(size >= 0);
71110

72111
if (size == 0) {
73-
struct _Py_bytes_state *state = get_bytes_state();
74-
op = state->empty_string;
75-
if (op != NULL) {
76-
Py_INCREF(op);
77-
return (PyObject *)op;
78-
}
112+
return bytes_new_empty();
79113
}
80114

81115
if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
@@ -94,13 +128,8 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
94128
}
95129
_PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
96130
op->ob_shash = -1;
97-
if (!use_calloc)
131+
if (!use_calloc) {
98132
op->ob_sval[size] = '\0';
99-
/* empty byte string singleton */
100-
if (size == 0) {
101-
struct _Py_bytes_state *state = get_bytes_state();
102-
Py_INCREF(op);
103-
state->empty_string = op;
104133
}
105134
return (PyObject *) op;
106135
}
@@ -122,6 +151,9 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
122151
return (PyObject *)op;
123152
}
124153
}
154+
if (size == 0) {
155+
return bytes_new_empty();
156+
}
125157

126158
op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
127159
if (op == NULL)
@@ -155,11 +187,7 @@ PyBytes_FromString(const char *str)
155187

156188
struct _Py_bytes_state *state = get_bytes_state();
157189
if (size == 0) {
158-
op = state->empty_string;
159-
if (op != NULL) {
160-
Py_INCREF(op);
161-
return (PyObject *)op;
162-
}
190+
return bytes_new_empty();
163191
}
164192
else if (size == 1) {
165193
op = state->characters[*str & UCHAR_MAX];
@@ -178,11 +206,8 @@ PyBytes_FromString(const char *str)
178206
op->ob_shash = -1;
179207
memcpy(op->ob_sval, str, size+1);
180208
/* share short strings */
181-
if (size == 0) {
182-
Py_INCREF(op);
183-
state->empty_string = op;
184-
}
185-
else if (size == 1) {
209+
if (size == 1) {
210+
assert(state->characters[*str & UCHAR_MAX] == NULL);
186211
Py_INCREF(op);
187212
state->characters[*str & UCHAR_MAX] = op;
188213
}
@@ -1272,7 +1297,7 @@ PyBytes_AsStringAndSize(PyObject *obj,
12721297
/* -------------------------------------------------------------------- */
12731298
/* Methods */
12741299

1275-
#define STRINGLIB_GET_EMPTY() get_bytes_state()->empty_string
1300+
#define STRINGLIB_GET_EMPTY() bytes_get_empty()
12761301

12771302
#include "stringlib/stringdefs.h"
12781303

@@ -3053,9 +3078,9 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
30533078
goto error;
30543079
}
30553080
if (newsize == 0) {
3056-
*pv = _PyBytes_FromSize(0, 0);
3081+
*pv = bytes_new_empty();
30573082
Py_DECREF(v);
3058-
return (*pv == NULL) ? -1 : 0;
3083+
return 0;
30593084
}
30603085
/* XXX UNREF/NEWREF interface should be more symmetrical */
30613086
#ifdef Py_REF_DEBUG
@@ -3084,6 +3109,18 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
30843109
return -1;
30853110
}
30863111

3112+
3113+
PyStatus
3114+
_PyBytes_Init(PyThreadState *tstate)
3115+
{
3116+
struct _Py_bytes_state *state = &tstate->interp->bytes;
3117+
if (bytes_create_empty_string_singleton(state) < 0) {
3118+
return _PyStatus_NO_MEMORY();
3119+
}
3120+
return _PyStatus_OK();
3121+
}
3122+
3123+
30873124
void
30883125
_PyBytes_Fini(PyThreadState *tstate)
30893126
{

Objects/unicodeobject.c

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,15 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
4141
#define PY_SSIZE_T_CLEAN
4242
#include "Python.h"
4343
#include "pycore_abstract.h" // _PyIndex_Check()
44-
#include "pycore_bytes_methods.h"
45-
#include "pycore_fileutils.h"
46-
#include "pycore_initconfig.h"
44+
#include "pycore_bytes_methods.h" // _Py_bytes_lower()
45+
#include "pycore_initconfig.h" // _PyStatus_OK()
4746
#include "pycore_interp.h" // PyInterpreterState.fs_codec
48-
#include "pycore_object.h"
49-
#include "pycore_pathconfig.h"
50-
#include "pycore_pylifecycle.h"
47+
#include "pycore_object.h" // _PyObject_GC_TRACK()
48+
#include "pycore_pathconfig.h" // _Py_DumpPathConfig()
49+
#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding()
5150
#include "pycore_pystate.h" // _PyInterpreterState_GET()
52-
#include "ucnhash.h"
53-
#include "stringlib/eq.h"
51+
#include "ucnhash.h" // _PyUnicode_Name_CAPI
52+
#include "stringlib/eq.h" // unicode_eq()
5453

5554
#ifdef MS_WINDOWS
5655
#include <windows.h>
@@ -236,10 +235,12 @@ static inline PyObject* unicode_get_empty(void)
236235
struct _Py_unicode_state *state = get_unicode_state();
237236
// unicode_get_empty() must not be called before _PyUnicode_Init()
238237
// or after _PyUnicode_Fini()
239-
assert(state->empty != NULL);
240-
return state->empty;
238+
assert(state->empty_string != NULL);
239+
return state->empty_string;
241240
}
242241

242+
243+
// Return a strong reference to the empty string singleton.
243244
static inline PyObject* unicode_new_empty(void)
244245
{
245246
PyObject *empty = unicode_get_empty();
@@ -1385,6 +1386,26 @@ _PyUnicode_Dump(PyObject *op)
13851386
}
13861387
#endif
13871388

1389+
static int
1390+
unicode_create_empty_string_singleton(struct _Py_unicode_state *state)
1391+
{
1392+
// Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be
1393+
// optimized to always use state->empty_string without having to check if
1394+
// it is NULL or not.
1395+
PyObject *empty = PyUnicode_New(1, 0);
1396+
if (empty == NULL) {
1397+
return -1;
1398+
}
1399+
PyUnicode_1BYTE_DATA(empty)[0] = 0;
1400+
_PyUnicode_LENGTH(empty) = 0;
1401+
assert(_PyUnicode_CheckConsistency(empty, 1));
1402+
1403+
assert(state->empty_string == NULL);
1404+
state->empty_string = empty;
1405+
return 0;
1406+
}
1407+
1408+
13881409
PyObject *
13891410
PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
13901411
{
@@ -1972,7 +1993,7 @@ static int
19721993
unicode_is_singleton(PyObject *unicode)
19731994
{
19741995
struct _Py_unicode_state *state = get_unicode_state();
1975-
if (unicode == state->empty) {
1996+
if (unicode == state->empty_string) {
19761997
return 1;
19771998
}
19781999
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
@@ -15542,20 +15563,10 @@ _PyUnicode_Init(PyThreadState *tstate)
1554215563
0x2029, /* PARAGRAPH SEPARATOR */
1554315564
};
1554415565

15545-
// Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be
15546-
// optimized to always use state->empty without having to check if it is
15547-
// NULL or not.
15548-
PyObject *empty = PyUnicode_New(1, 0);
15549-
if (empty == NULL) {
15566+
struct _Py_unicode_state *state = &tstate->interp->unicode;
15567+
if (unicode_create_empty_string_singleton(state) < 0) {
1555015568
return _PyStatus_NO_MEMORY();
1555115569
}
15552-
PyUnicode_1BYTE_DATA(empty)[0] = 0;
15553-
_PyUnicode_LENGTH(empty) = 0;
15554-
assert(_PyUnicode_CheckConsistency(empty, 1));
15555-
15556-
struct _Py_unicode_state *state = &tstate->interp->unicode;
15557-
assert(state->empty == NULL);
15558-
state->empty = empty;
1555915570

1556015571
if (_Py_IsMainInterpreter(tstate)) {
1556115572
/* initialize the linebreak bloom filter */
@@ -16223,7 +16234,7 @@ _PyUnicode_Fini(PyThreadState *tstate)
1622316234
#endif /* __INSURE__ */
1622416235
}
1622516236

16226-
Py_CLEAR(state->empty);
16237+
Py_CLEAR(state->empty_string);
1622716238

1622816239
for (Py_ssize_t i = 0; i < 256; i++) {
1622916240
Py_CLEAR(state->latin1[i]);

Python/pylifecycle.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,11 @@ pycore_init_types(PyThreadState *tstate)
607607
return status;
608608
}
609609

610+
status = _PyBytes_Init(tstate);
611+
if (_PyStatus_EXCEPTION(status)) {
612+
return status;
613+
}
614+
610615
status = _PyExc_Init(tstate);
611616
if (_PyStatus_EXCEPTION(status)) {
612617
return status;

0 commit comments

Comments
 (0)