Skip to content

Commit 0cc3fbe

Browse files
committed
Add PyUnicodeWriter_WriteString()
1 parent b2e4305 commit 0cc3fbe

File tree

4 files changed

+86
-12
lines changed

4 files changed

+86
-12
lines changed

Include/cpython/unicodeobject.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -458,10 +458,10 @@ PyAPI_FUNC(void) PyUnicodeWriter_SetOverallocate(
458458
PyAPI_FUNC(int) PyUnicodeWriter_WriteChar(
459459
PyUnicodeWriter *writer,
460460
Py_UCS4 ch);
461-
PyAPI_FUNC(int) PyUnicodeWriter_WriteASCIIString(
461+
PyAPI_FUNC(int) PyUnicodeWriter_WriteString(
462462
PyUnicodeWriter *writer,
463-
const char *ascii,
464-
Py_ssize_t len);
463+
const char *str,
464+
Py_ssize_t size);
465465
PyAPI_FUNC(int) PyUnicodeWriter_WriteStr(
466466
PyUnicodeWriter *writer,
467467
PyObject *str);

Include/internal/pycore_unicodeobject.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,14 @@ struct _PyUnicodeWriter {
345345
unsigned char readonly;
346346
};
347347

348+
static inline void*
349+
_PyUnicodeWriter_GetDataEnd(_PyUnicodeWriter *writer)
350+
{
351+
char *data = writer->data;
352+
data += writer->pos * writer->kind;
353+
return data;
354+
}
355+
348356
// Initialize a Unicode writer.
349357
//
350358
// By default, the minimum buffer size is 0 character and overallocation is

Modules/_testcapi/unicode.c

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,8 @@ test_unicodewriter(PyObject *self, PyObject *Py_UNUSED(args))
259259
goto error;
260260
}
261261

262-
// test PyUnicodeWriter_WriteASCIIString()
263-
if (PyUnicodeWriter_WriteASCIIString(writer, " value", 6) < 0) {
262+
// test PyUnicodeWriter_WriteString()
263+
if (PyUnicodeWriter_WriteString(writer, " value", 6) < 0) {
264264
goto error;
265265
}
266266

@@ -279,6 +279,65 @@ test_unicodewriter(PyObject *self, PyObject *Py_UNUSED(args))
279279
}
280280

281281

282+
static PyObject *
283+
test_unicodewriter_utf8(PyObject *self, PyObject *Py_UNUSED(args))
284+
{
285+
PyUnicodeWriter *writer = PyUnicodeWriter_Create();
286+
if (writer == NULL) {
287+
return NULL;
288+
}
289+
if (PyUnicodeWriter_WriteString(writer, "ascii", -1) < 0) {
290+
goto error;
291+
}
292+
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
293+
goto error;
294+
}
295+
if (PyUnicodeWriter_WriteString(writer, "latin1=\xC3\xA9", -1) < 0) {
296+
goto error;
297+
}
298+
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
299+
goto error;
300+
}
301+
if (PyUnicodeWriter_WriteString(writer, "euro=\xE2\x82\xAC", -1) < 0) {
302+
goto error;
303+
}
304+
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
305+
goto error;
306+
}
307+
308+
PyObject *result = PyUnicodeWriter_Finish(writer);
309+
if (result == NULL) {
310+
return NULL;
311+
}
312+
assert(PyUnicode_EqualToUTF8(result,
313+
"ascii-latin1=\xC3\xA9-euro=\xE2\x82\xAC."));
314+
Py_DECREF(result);
315+
316+
Py_RETURN_NONE;
317+
318+
error:
319+
PyUnicodeWriter_Free(writer);
320+
return NULL;
321+
}
322+
323+
324+
static PyObject *
325+
test_unicodewriter_invalid_utf8(PyObject *self, PyObject *Py_UNUSED(args))
326+
{
327+
PyUnicodeWriter *writer = PyUnicodeWriter_Create();
328+
if (writer == NULL) {
329+
return NULL;
330+
}
331+
assert(PyUnicodeWriter_WriteString(writer, "invalid=\xFF", -1) < 0);
332+
PyUnicodeWriter_Free(writer);
333+
334+
assert(PyErr_ExceptionMatches(PyExc_UnicodeDecodeError));
335+
PyErr_Clear();
336+
337+
Py_RETURN_NONE;
338+
}
339+
340+
282341
static PyObject *
283342
test_unicodewriter_format(PyObject *self, PyObject *Py_UNUSED(args))
284343
{
@@ -321,6 +380,8 @@ static PyMethodDef TestMethods[] = {
321380
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
322381
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
323382
{"test_unicodewriter", test_unicodewriter, METH_NOARGS},
383+
{"test_unicodewriter_utf8", test_unicodewriter_utf8, METH_NOARGS},
384+
{"test_unicodewriter_invalid_utf8", test_unicodewriter_invalid_utf8, METH_NOARGS},
324385
{"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS},
325386
{NULL},
326387
};

Objects/unicodeobject.c

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4721,8 +4721,9 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
47214721
const char *p = start;
47224722

47234723
#if SIZEOF_SIZE_T <= SIZEOF_VOID_P
4724-
assert(_Py_IS_ALIGNED(dest, ALIGNOF_SIZE_T));
4725-
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
4724+
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)
4725+
&& _Py_IS_ALIGNED(dest, ALIGNOF_SIZE_T))
4726+
{
47264727
/* Fast path, see in STRINGLIB(utf8_decode) for
47274728
an explanation. */
47284729
/* Help allocation */
@@ -4945,6 +4946,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
49454946
}
49464947

49474948

4949+
// Used by PyUnicodeWriter_WriteString() implementation
49484950
static int
49494951
unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
49504952
const char *s, Py_ssize_t size,
@@ -13525,12 +13527,15 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
1352513527
}
1352613528

1352713529
int
13528-
PyUnicodeWriter_WriteASCIIString(PyUnicodeWriter *writer,
13529-
const char *ascii,
13530-
Py_ssize_t len)
13530+
PyUnicodeWriter_WriteString(PyUnicodeWriter *writer,
13531+
const char *str,
13532+
Py_ssize_t size)
1353113533
{
13532-
return _PyUnicodeWriter_WriteASCIIString((_PyUnicodeWriter*)writer,
13533-
ascii, len);
13534+
if (size == -1) {
13535+
size = strlen(str);
13536+
}
13537+
return unicode_decode_utf8_writer((_PyUnicodeWriter*)writer, str, size,
13538+
_Py_ERROR_STRICT, NULL, NULL);
1353413539
}
1353513540

1353613541
int

0 commit comments

Comments
 (0)