Skip to content

Commit 14e739b

Browse files
committed
gh-119182: Add PyUnicodeWriter C API
1 parent e94dbe4 commit 14e739b

File tree

3 files changed

+359
-19
lines changed

3 files changed

+359
-19
lines changed

Include/cpython/unicodeobject.h

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,44 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
444444
Py_ssize_t size);
445445

446446

447-
/* --- _PyUnicodeWriter API ----------------------------------------------- */
447+
/* --- Public PyUnicodeWriter API ----------------------------------------- */
448+
449+
typedef struct PyUnicodeWriter PyUnicodeWriter;
450+
451+
PyAPI_FUNC(PyUnicodeWriter*) PyUnicodeWriter_Create(void);
452+
PyAPI_FUNC(void) PyUnicodeWriter_Discard(PyUnicodeWriter *writer);
453+
PyAPI_FUNC(PyObject*) PyUnicodeWriter_Finish(PyUnicodeWriter *writer);
454+
455+
PyAPI_FUNC(void) PyUnicodeWriter_SetOverallocate(
456+
PyUnicodeWriter *writer,
457+
int overallocate);
458+
459+
PyAPI_FUNC(int) PyUnicodeWriter_WriteChar(
460+
PyUnicodeWriter *writer,
461+
Py_UCS4 ch);
462+
PyAPI_FUNC(int) PyUnicodeWriter_WriteUTF8(
463+
PyUnicodeWriter *writer,
464+
const char *str,
465+
Py_ssize_t size);
466+
467+
PyAPI_FUNC(int) PyUnicodeWriter_WriteStr(
468+
PyUnicodeWriter *writer,
469+
PyObject *str);
470+
PyAPI_FUNC(int) PyUnicodeWriter_WriteRepr(
471+
PyUnicodeWriter *writer,
472+
PyObject *obj);
473+
PyAPI_FUNC(int) PyUnicodeWriter_WriteSubstring(
474+
PyUnicodeWriter *writer,
475+
PyObject *str,
476+
Py_ssize_t start,
477+
Py_ssize_t end);
478+
PyAPI_FUNC(int) PyUnicodeWriter_Format(
479+
PyUnicodeWriter *writer,
480+
const char *format,
481+
...);
482+
483+
484+
/* --- Private _PyUnicodeWriter API --------------------------------------- */
448485

449486
typedef struct {
450487
PyObject *buffer;
@@ -466,7 +503,7 @@ typedef struct {
466503
/* If readonly is 1, buffer is a shared string (cannot be modified)
467504
and size is set to 0. */
468505
unsigned char readonly;
469-
} _PyUnicodeWriter ;
506+
} _PyUnicodeWriter;
470507

471508
// Initialize a Unicode writer.
472509
//

Modules/_testcapi/unicode.c

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,167 @@ unicode_copycharacters(PyObject *self, PyObject *args)
221221
}
222222

223223

224+
static PyObject *
225+
test_unicodewriter(PyObject *self, PyObject *Py_UNUSED(args))
226+
{
227+
PyUnicodeWriter *writer = PyUnicodeWriter_Create();
228+
if (writer == NULL) {
229+
return NULL;
230+
}
231+
232+
// test PyUnicodeWriter_SetOverallocate()
233+
PyUnicodeWriter_SetOverallocate(writer, 1);
234+
235+
// test PyUnicodeWriter_WriteUTF8()
236+
if (PyUnicodeWriter_WriteUTF8(writer, "var", -1) < 0) {
237+
goto error;
238+
}
239+
240+
// test PyUnicodeWriter_WriteChar()
241+
if (PyUnicodeWriter_WriteChar(writer, '=') < 0) {
242+
goto error;
243+
}
244+
245+
// test PyUnicodeWriter_WriteSubstring()
246+
PyObject *str = PyUnicode_FromString("[long]");
247+
if (str == NULL) {
248+
goto error;
249+
}
250+
int ret = PyUnicodeWriter_WriteSubstring(writer, str, 1, 5);
251+
Py_CLEAR(str);
252+
if (ret < 0) {
253+
goto error;
254+
}
255+
256+
// test PyUnicodeWriter_WriteStr()
257+
str = PyUnicode_FromString(" value ");
258+
if (str == NULL) {
259+
goto error;
260+
}
261+
ret = PyUnicodeWriter_WriteStr(writer, str);
262+
Py_CLEAR(str);
263+
if (ret < 0) {
264+
goto error;
265+
}
266+
267+
// test PyUnicodeWriter_WriteRepr()
268+
str = PyUnicode_FromString("repr");
269+
if (str == NULL) {
270+
goto error;
271+
}
272+
ret = PyUnicodeWriter_WriteRepr(writer, str);
273+
Py_CLEAR(str);
274+
if (ret < 0) {
275+
goto error;
276+
}
277+
278+
PyObject *result = PyUnicodeWriter_Finish(writer);
279+
if (result == NULL) {
280+
return NULL;
281+
}
282+
assert(PyUnicode_EqualToUTF8(result, "var=long value 'repr'"));
283+
Py_DECREF(result);
284+
285+
Py_RETURN_NONE;
286+
287+
error:
288+
PyUnicodeWriter_Discard(writer);
289+
return NULL;
290+
}
291+
292+
293+
static PyObject *
294+
test_unicodewriter_utf8(PyObject *self, PyObject *Py_UNUSED(args))
295+
{
296+
PyUnicodeWriter *writer = PyUnicodeWriter_Create();
297+
if (writer == NULL) {
298+
return NULL;
299+
}
300+
if (PyUnicodeWriter_WriteUTF8(writer, "ascii", -1) < 0) {
301+
goto error;
302+
}
303+
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
304+
goto error;
305+
}
306+
if (PyUnicodeWriter_WriteUTF8(writer, "latin1=\xC3\xA9", -1) < 0) {
307+
goto error;
308+
}
309+
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
310+
goto error;
311+
}
312+
if (PyUnicodeWriter_WriteUTF8(writer, "euro=\xE2\x82\xAC", -1) < 0) {
313+
goto error;
314+
}
315+
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
316+
goto error;
317+
}
318+
319+
PyObject *result = PyUnicodeWriter_Finish(writer);
320+
if (result == NULL) {
321+
return NULL;
322+
}
323+
assert(PyUnicode_EqualToUTF8(result,
324+
"ascii-latin1=\xC3\xA9-euro=\xE2\x82\xAC."));
325+
Py_DECREF(result);
326+
327+
Py_RETURN_NONE;
328+
329+
error:
330+
PyUnicodeWriter_Discard(writer);
331+
return NULL;
332+
}
333+
334+
335+
static PyObject *
336+
test_unicodewriter_invalid_utf8(PyObject *self, PyObject *Py_UNUSED(args))
337+
{
338+
PyUnicodeWriter *writer = PyUnicodeWriter_Create();
339+
if (writer == NULL) {
340+
return NULL;
341+
}
342+
assert(PyUnicodeWriter_WriteUTF8(writer, "invalid=\xFF", -1) < 0);
343+
PyUnicodeWriter_Discard(writer);
344+
345+
assert(PyErr_ExceptionMatches(PyExc_UnicodeDecodeError));
346+
PyErr_Clear();
347+
348+
Py_RETURN_NONE;
349+
}
350+
351+
352+
static PyObject *
353+
test_unicodewriter_format(PyObject *self, PyObject *Py_UNUSED(args))
354+
{
355+
PyUnicodeWriter *writer = PyUnicodeWriter_Create();
356+
if (writer == NULL) {
357+
return NULL;
358+
}
359+
360+
// test PyUnicodeWriter_Format()
361+
if (PyUnicodeWriter_Format(writer, "%s %i", "Hello", 123) < 0) {
362+
goto error;
363+
}
364+
365+
// test PyUnicodeWriter_WriteChar()
366+
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
367+
goto error;
368+
}
369+
370+
PyObject *result = PyUnicodeWriter_Finish(writer);
371+
if (result == NULL) {
372+
return NULL;
373+
}
374+
assert(PyUnicode_EqualToUTF8(result, "Hello 123."));
375+
Py_DECREF(result);
376+
377+
Py_RETURN_NONE;
378+
379+
error:
380+
PyUnicodeWriter_Discard(writer);
381+
return NULL;
382+
}
383+
384+
224385
static PyMethodDef TestMethods[] = {
225386
{"unicode_new", unicode_new, METH_VARARGS},
226387
{"unicode_fill", unicode_fill, METH_VARARGS},
@@ -229,6 +390,10 @@ static PyMethodDef TestMethods[] = {
229390
{"unicode_asucs4copy", unicode_asucs4copy, METH_VARARGS},
230391
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
231392
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
393+
{"test_unicodewriter", test_unicodewriter, METH_NOARGS},
394+
{"test_unicodewriter_utf8", test_unicodewriter_utf8, METH_NOARGS},
395+
{"test_unicodewriter_invalid_utf8", test_unicodewriter_invalid_utf8, METH_NOARGS},
396+
{"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS},
232397
{NULL},
233398
};
234399

0 commit comments

Comments
 (0)