Skip to content

Commit 49a46ac

Browse files
committed
gh-119182: Add PyUnicodeWriter C API
Move the private _PyUnicodeWriter API to the internal C API.
1 parent e94dbe4 commit 49a46ac

File tree

4 files changed

+475
-126
lines changed

4 files changed

+475
-126
lines changed

Include/cpython/unicodeobject.h

Lines changed: 29 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -444,121 +444,41 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
444444
Py_ssize_t size);
445445

446446

447-
/* --- _PyUnicodeWriter API ----------------------------------------------- */
447+
/* --- PyUnicodeWriter API ------------------------------------------------ */
448448

449-
typedef struct {
450-
PyObject *buffer;
451-
void *data;
452-
int kind;
453-
Py_UCS4 maxchar;
454-
Py_ssize_t size;
455-
Py_ssize_t pos;
456-
457-
/* minimum number of allocated characters (default: 0) */
458-
Py_ssize_t min_length;
459-
460-
/* minimum character (default: 127, ASCII) */
461-
Py_UCS4 min_char;
449+
typedef struct PyUnicodeWriter PyUnicodeWriter;
462450

463-
/* If non-zero, overallocate the buffer (default: 0). */
464-
unsigned char overallocate;
451+
PyAPI_FUNC(PyUnicodeWriter*) PyUnicodeWriter_Create(void);
452+
PyAPI_FUNC(void) PyUnicodeWriter_Discard(PyUnicodeWriter *writer);
453+
PyAPI_FUNC(PyObject*) PyUnicodeWriter_Finish(PyUnicodeWriter *writer);
465454

466-
/* If readonly is 1, buffer is a shared string (cannot be modified)
467-
and size is set to 0. */
468-
unsigned char readonly;
469-
} _PyUnicodeWriter ;
455+
PyAPI_FUNC(void) PyUnicodeWriter_SetOverallocate(
456+
PyUnicodeWriter *writer,
457+
int overallocate);
470458

471-
// Initialize a Unicode writer.
472-
//
473-
// By default, the minimum buffer size is 0 character and overallocation is
474-
// disabled. Set min_length, min_char and overallocate attributes to control
475-
// the allocation of the buffer.
476-
PyAPI_FUNC(void)
477-
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
478-
479-
/* Prepare the buffer to write 'length' characters
480-
with the specified maximum character.
481-
482-
Return 0 on success, raise an exception and return -1 on error. */
483-
#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
484-
(((MAXCHAR) <= (WRITER)->maxchar \
485-
&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
486-
? 0 \
487-
: (((LENGTH) == 0) \
488-
? 0 \
489-
: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
490-
491-
/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
492-
instead. */
493-
PyAPI_FUNC(int)
494-
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
495-
Py_ssize_t length, Py_UCS4 maxchar);
496-
497-
/* Prepare the buffer to have at least the kind KIND.
498-
For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
499-
support characters in range U+000-U+FFFF.
500-
501-
Return 0 on success, raise an exception and return -1 on error. */
502-
#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
503-
((KIND) <= (WRITER)->kind \
504-
? 0 \
505-
: _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
506-
507-
/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
508-
macro instead. */
509-
PyAPI_FUNC(int)
510-
_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
511-
int kind);
512-
513-
/* Append a Unicode character.
514-
Return 0 on success, raise an exception and return -1 on error. */
515-
PyAPI_FUNC(int)
516-
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
517-
Py_UCS4 ch
518-
);
519-
520-
/* Append a Unicode string.
521-
Return 0 on success, raise an exception and return -1 on error. */
522-
PyAPI_FUNC(int)
523-
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
524-
PyObject *str /* Unicode string */
525-
);
459+
PyAPI_FUNC(int) PyUnicodeWriter_WriteChar(
460+
PyUnicodeWriter *writer,
461+
Py_UCS4 ch);
462+
PyAPI_FUNC(int) PyUnicodeWriter_WriteUTF8(
463+
PyUnicodeWriter *writer,
464+
const char *str,
465+
Py_ssize_t size);
526466

527-
/* Append a substring of a Unicode string.
528-
Return 0 on success, raise an exception and return -1 on error. */
529-
PyAPI_FUNC(int)
530-
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
531-
PyObject *str, /* Unicode string */
467+
PyAPI_FUNC(int) PyUnicodeWriter_WriteStr(
468+
PyUnicodeWriter *writer,
469+
PyObject *str);
470+
PyAPI_FUNC(int) PyUnicodeWriter_WriteRepr(
471+
PyUnicodeWriter *writer,
472+
PyObject *obj);
473+
PyAPI_FUNC(int) PyUnicodeWriter_WriteSubstring(
474+
PyUnicodeWriter *writer,
475+
PyObject *str,
532476
Py_ssize_t start,
533-
Py_ssize_t end
534-
);
535-
536-
/* Append an ASCII-encoded byte string.
537-
Return 0 on success, raise an exception and return -1 on error. */
538-
PyAPI_FUNC(int)
539-
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
540-
const char *str, /* ASCII-encoded byte string */
541-
Py_ssize_t len /* number of bytes, or -1 if unknown */
542-
);
543-
544-
/* Append a latin1-encoded byte string.
545-
Return 0 on success, raise an exception and return -1 on error. */
546-
PyAPI_FUNC(int)
547-
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
548-
const char *str, /* latin1-encoded byte string */
549-
Py_ssize_t len /* length in bytes */
550-
);
551-
552-
/* Get the value of the writer as a Unicode string. Clear the
553-
buffer of the writer. Raise an exception and return NULL
554-
on error. */
555-
PyAPI_FUNC(PyObject *)
556-
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
557-
558-
/* Deallocate memory of a writer (clear its internal buffer). */
559-
PyAPI_FUNC(void)
560-
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
561-
477+
Py_ssize_t end);
478+
PyAPI_FUNC(int) PyUnicodeWriter_Format(
479+
PyUnicodeWriter *writer,
480+
const char *format,
481+
...);
562482

563483
/* --- Manage the default encoding ---------------------------------------- */
564484

Include/internal/pycore_unicodeobject.h

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ extern "C" {
1313
#include "pycore_identifier.h" // _Py_Identifier
1414
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
1515

16+
typedef struct _PyUnicodeWriter _PyUnicodeWriter;
17+
1618
/* --- Characters Type APIs ----------------------------------------------- */
1719

1820
extern int _PyUnicode_IsXidStart(Py_UCS4 ch);
@@ -319,6 +321,130 @@ extern void _PyUnicode_ClearInterned(PyInterpreterState *interp);
319321
PyAPI_FUNC(const char *) _PyUnicode_AsUTF8NoNUL(PyObject *);
320322

321323

324+
/* --- _PyUnicodeWriter API ----------------------------------------------- */
325+
326+
struct _PyUnicodeWriter {
327+
PyObject *buffer;
328+
void *data;
329+
int kind;
330+
Py_UCS4 maxchar;
331+
Py_ssize_t size;
332+
Py_ssize_t pos;
333+
334+
/* minimum number of allocated characters (default: 0) */
335+
Py_ssize_t min_length;
336+
337+
/* minimum character (default: 127, ASCII) */
338+
Py_UCS4 min_char;
339+
340+
/* If non-zero, overallocate the buffer (default: 0). */
341+
unsigned char overallocate;
342+
343+
/* If readonly is 1, buffer is a shared string (cannot be modified)
344+
and size is set to 0. */
345+
unsigned char readonly;
346+
};
347+
348+
static inline void*
349+
_PyUnicodeWriter_GetDataEnd(_PyUnicodeWriter *writer)
350+
{
351+
char *data = writer->data;
352+
data += writer->pos * writer->kind;
353+
return data;
354+
}
355+
356+
// Initialize a Unicode writer.
357+
//
358+
// By default, the minimum buffer size is 0 character and overallocation is
359+
// disabled. Set min_length, min_char and overallocate attributes to control
360+
// the allocation of the buffer.
361+
PyAPI_FUNC(void)
362+
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
363+
364+
/* Prepare the buffer to write 'length' characters
365+
with the specified maximum character.
366+
367+
Return 0 on success, raise an exception and return -1 on error. */
368+
#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
369+
(((MAXCHAR) <= (WRITER)->maxchar \
370+
&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
371+
? 0 \
372+
: (((LENGTH) == 0) \
373+
? 0 \
374+
: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
375+
376+
/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
377+
instead. */
378+
PyAPI_FUNC(int)
379+
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
380+
Py_ssize_t length, Py_UCS4 maxchar);
381+
382+
/* Prepare the buffer to have at least the kind KIND.
383+
For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
384+
support characters in range U+000-U+FFFF.
385+
386+
Return 0 on success, raise an exception and return -1 on error. */
387+
#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
388+
((KIND) <= (WRITER)->kind \
389+
? 0 \
390+
: _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
391+
392+
/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
393+
macro instead. */
394+
PyAPI_FUNC(int)
395+
_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
396+
int kind);
397+
398+
/* Append a Unicode character.
399+
Return 0 on success, raise an exception and return -1 on error. */
400+
PyAPI_FUNC(int)
401+
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
402+
Py_UCS4 ch
403+
);
404+
405+
/* Append a Unicode string.
406+
Return 0 on success, raise an exception and return -1 on error. */
407+
PyAPI_FUNC(int)
408+
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
409+
PyObject *str /* Unicode string */
410+
);
411+
412+
/* Append a substring of a Unicode string.
413+
Return 0 on success, raise an exception and return -1 on error. */
414+
PyAPI_FUNC(int)
415+
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
416+
PyObject *str, /* Unicode string */
417+
Py_ssize_t start,
418+
Py_ssize_t end
419+
);
420+
421+
/* Append an ASCII-encoded byte string.
422+
Return 0 on success, raise an exception and return -1 on error. */
423+
PyAPI_FUNC(int)
424+
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
425+
const char *str, /* ASCII-encoded byte string */
426+
Py_ssize_t len /* number of bytes, or -1 if unknown */
427+
);
428+
429+
/* Append a latin1-encoded byte string.
430+
Return 0 on success, raise an exception and return -1 on error. */
431+
PyAPI_FUNC(int)
432+
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
433+
const char *str, /* latin1-encoded byte string */
434+
Py_ssize_t len /* length in bytes */
435+
);
436+
437+
/* Get the value of the writer as a Unicode string. Clear the
438+
buffer of the writer. Raise an exception and return NULL
439+
on error. */
440+
PyAPI_FUNC(PyObject *)
441+
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
442+
443+
/* Deallocate memory of a writer (clear its internal buffer). */
444+
PyAPI_FUNC(void)
445+
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
446+
447+
322448
#ifdef __cplusplus
323449
}
324450
#endif

0 commit comments

Comments
 (0)