Skip to content

Commit c47d574

Browse files
committed
ascii_new
1 parent 73c381e commit c47d574

File tree

1 file changed

+49
-5
lines changed

1 file changed

+49
-5
lines changed

Objects/unicodeobject.c

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1305,6 +1305,45 @@ _PyUnicode_Dump(PyObject *op)
13051305
}
13061306
#endif
13071307

1308+
// Simplified version of PyUnicode_New() that only creates ASCII strings.
1309+
// This function does not test if size == 0.
1310+
static PyObject *
1311+
ascii_new(Py_ssize_t size)
1312+
{
1313+
PyObject *obj;
1314+
void *data;
1315+
Py_ssize_t struct_size = sizeof(PyASCIIObject);
1316+
1317+
if (size > ((PY_SSIZE_T_MAX - struct_size) - 1))
1318+
return PyErr_NoMemory();
1319+
1320+
/* Duplicated allocation code from _PyObject_New() instead of a call to
1321+
* PyObject_New() so we are able to allocate space for the object and
1322+
* it's data buffer.
1323+
*/
1324+
obj = (PyObject *) PyObject_Malloc(struct_size + (size + 1));
1325+
if (obj == NULL) {
1326+
return PyErr_NoMemory();
1327+
}
1328+
_PyObject_Init(obj, &PyUnicode_Type);
1329+
1330+
data = ((PyASCIIObject*)obj) + 1;
1331+
1332+
_PyUnicode_LENGTH(obj) = size;
1333+
_PyUnicode_HASH(obj) = -1;
1334+
_PyUnicode_STATE(obj).interned = 0;
1335+
_PyUnicode_STATE(obj).kind = PyUnicode_1BYTE_KIND;
1336+
_PyUnicode_STATE(obj).compact = 1;
1337+
_PyUnicode_STATE(obj).ascii = 1;
1338+
_PyUnicode_STATE(obj).statically_allocated = 0;
1339+
((char*)data)[size] = 0;
1340+
1341+
#ifdef Py_DEBUG
1342+
unicode_fill_invalid((PyObject*)unicode, 0);
1343+
#endif
1344+
assert(_PyUnicode_CheckConsistency(obj, 0));
1345+
return obj;
1346+
}
13081347

13091348
PyObject *
13101349
PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
@@ -2208,13 +2247,16 @@ _PyUnicode_FromASCII(const char *buffer, Py_ssize_t size)
22082247
{
22092248
const unsigned char *s = (const unsigned char *)buffer;
22102249
PyObject *unicode;
2250+
if (size == 0) {
2251+
return unicode_get_empty();
2252+
}
22112253
if (size == 1) {
22122254
#ifdef Py_DEBUG
22132255
assert((unsigned char)s[0] < 128);
22142256
#endif
22152257
return get_latin1_char(s[0]);
22162258
}
2217-
unicode = PyUnicode_New(size, 127);
2259+
unicode = ascii_new(size);
22182260
if (!unicode)
22192261
return NULL;
22202262
memcpy(PyUnicode_1BYTE_DATA(unicode), s, size);
@@ -5297,11 +5339,13 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
52975339

52985340
Py_ssize_t pos = find_first_nonascii(starts, end);
52995341
if (pos == size) { // fast path: ASCII string.
5300-
PyObject *u = PyUnicode_New(size, 127);
5342+
PyObject *u = ascii_new(size);
53015343
if (u == NULL) {
53025344
return NULL;
53035345
}
5304-
memcpy(PyUnicode_1BYTE_DATA(u), s, size);
5346+
// memcpy(PyUnicode_1BYTE_DATA(u), s, size);
5347+
// bypass iscompact & isascii checks.
5348+
memcpy(_Py_STATIC_CAST(void*, (_PyASCIIObject_CAST(u) + 1)), s, size);
53055349
if (consumed) {
53065350
*consumed = size;
53075351
}
@@ -5338,7 +5382,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
53385382
_PyUnicodeWriter writer;
53395383
_PyUnicodeWriter_InitWithBuffer(&writer, u);
53405384
if (maxchr <= 255) {
5341-
memcpy(PyUnicode_1BYTE_DATA(u), s, pos);
5385+
memcpy(_PyUnicode_COMPACT_DATA(u), s, pos);
53425386
s += pos;
53435387
size -= pos;
53445388
writer.pos = pos;
@@ -7419,7 +7463,7 @@ PyUnicode_DecodeASCII(const char *s,
74197463
}
74207464

74217465
// Shortcut for simple case
7422-
PyObject *u = PyUnicode_New(size, 127);
7466+
PyObject *u = ascii_new(size);
74237467
if (u == NULL) {
74247468
return NULL;
74257469
}

0 commit comments

Comments
 (0)