Skip to content

Commit fd85a1b

Browse files
committed
gh-119182: Add PyUnicodeWriter C API
Move the private _PyUnicodeWriter API to the internal C API.
1 parent 9b422fc commit fd85a1b

File tree

3 files changed

+227
-114
lines changed

3 files changed

+227
-114
lines changed

Include/cpython/unicodeobject.h

Lines changed: 25 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -444,121 +444,32 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
444444
Py_ssize_t size);
445445

446446

447-
/* --- _PyUnicodeWriter API ----------------------------------------------- */
448-
449-
typedef struct {
450-
PyObject *buffer;
451-
void *data;
452-
int kind;
453-
Py_UCS4 maxchar;
454-
Py_ssize_t size;
455-
Py_ssize_t pos;
456-
457-
/* minimum number of allocated characters (default: 0) */
458-
Py_ssize_t min_length;
459-
460-
/* minimum character (default: 127, ASCII) */
461-
Py_UCS4 min_char;
462-
463-
/* If non-zero, overallocate the buffer (default: 0). */
464-
unsigned char overallocate;
465-
466-
/* If readonly is 1, buffer is a shared string (cannot be modified)
467-
and size is set to 0. */
468-
unsigned char readonly;
469-
} _PyUnicodeWriter ;
470-
471-
// Initialize a Unicode writer.
472-
//
473-
// By default, the minimum buffer size is 0 character and overallocation is
474-
// disabled. Set min_length, min_char and overallocate attributes to control
475-
// the allocation of the buffer.
476-
PyAPI_FUNC(void)
477-
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
478-
479-
/* Prepare the buffer to write 'length' characters
480-
with the specified maximum character.
481-
482-
Return 0 on success, raise an exception and return -1 on error. */
483-
#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
484-
(((MAXCHAR) <= (WRITER)->maxchar \
485-
&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
486-
? 0 \
487-
: (((LENGTH) == 0) \
488-
? 0 \
489-
: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
490-
491-
/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
492-
instead. */
493-
PyAPI_FUNC(int)
494-
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
495-
Py_ssize_t length, Py_UCS4 maxchar);
496-
497-
/* Prepare the buffer to have at least the kind KIND.
498-
For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
499-
support characters in range U+000-U+FFFF.
500-
501-
Return 0 on success, raise an exception and return -1 on error. */
502-
#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
503-
((KIND) <= (WRITER)->kind \
504-
? 0 \
505-
: _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
506-
507-
/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
508-
macro instead. */
509-
PyAPI_FUNC(int)
510-
_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
511-
int kind);
512-
513-
/* Append a Unicode character.
514-
Return 0 on success, raise an exception and return -1 on error. */
515-
PyAPI_FUNC(int)
516-
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
517-
Py_UCS4 ch
518-
);
519-
520-
/* Append a Unicode string.
521-
Return 0 on success, raise an exception and return -1 on error. */
522-
PyAPI_FUNC(int)
523-
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
524-
PyObject *str /* Unicode string */
525-
);
526-
527-
/* Append a substring of a Unicode string.
528-
Return 0 on success, raise an exception and return -1 on error. */
529-
PyAPI_FUNC(int)
530-
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
531-
PyObject *str, /* Unicode string */
447+
/* --- PyUnicodeWriter API ------------------------------------------------ */
448+
449+
typedef struct PyUnicodeWriter PyUnicodeWriter;
450+
451+
PyAPI_FUNC(PyUnicodeWriter*) PyUnicodeWriter_Create(void);
452+
PyAPI_FUNC(void) PyUnicodeWriter_Free(PyUnicodeWriter *writer);
453+
PyAPI_FUNC(PyObject*) PyUnicodeWriter_Finish(PyUnicodeWriter *writer);
454+
PyAPI_FUNC(void) PyUnicodeWriter_SetOverallocate(
455+
PyUnicodeWriter *writer,
456+
int overallocate);
457+
458+
PyAPI_FUNC(int) PyUnicodeWriter_WriteChar(
459+
PyUnicodeWriter *writer,
460+
Py_UCS4 ch);
461+
PyAPI_FUNC(int) PyUnicodeWriter_WriteASCIIString(
462+
PyUnicodeWriter *writer,
463+
const char *ascii,
464+
Py_ssize_t len);
465+
PyAPI_FUNC(int) PyUnicodeWriter_WriteStr(
466+
PyUnicodeWriter *writer,
467+
PyObject *str);
468+
PyAPI_FUNC(int) PyUnicodeWriter_WriteSubstring(
469+
PyUnicodeWriter *writer,
470+
PyObject *str,
532471
Py_ssize_t start,
533-
Py_ssize_t end
534-
);
535-
536-
/* Append an ASCII-encoded byte string.
537-
Return 0 on success, raise an exception and return -1 on error. */
538-
PyAPI_FUNC(int)
539-
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
540-
const char *str, /* ASCII-encoded byte string */
541-
Py_ssize_t len /* number of bytes, or -1 if unknown */
542-
);
543-
544-
/* Append a latin1-encoded byte string.
545-
Return 0 on success, raise an exception and return -1 on error. */
546-
PyAPI_FUNC(int)
547-
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
548-
const char *str, /* latin1-encoded byte string */
549-
Py_ssize_t len /* length in bytes */
550-
);
551-
552-
/* Get the value of the writer as a Unicode string. Clear the
553-
buffer of the writer. Raise an exception and return NULL
554-
on error. */
555-
PyAPI_FUNC(PyObject *)
556-
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
557-
558-
/* Deallocate memory of a writer (clear its internal buffer). */
559-
PyAPI_FUNC(void)
560-
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
561-
472+
Py_ssize_t stop);
562473

563474
/* --- Manage the default encoding ---------------------------------------- */
564475

Include/internal/pycore_unicodeobject.h

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ extern "C" {
1313
#include "pycore_identifier.h" // _Py_Identifier
1414
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
1515

16+
typedef struct _PyUnicodeWriter _PyUnicodeWriter;
17+
1618
/* --- Characters Type APIs ----------------------------------------------- */
1719

1820
extern int _PyUnicode_IsXidStart(Py_UCS4 ch);
@@ -319,6 +321,122 @@ extern void _PyUnicode_ClearInterned(PyInterpreterState *interp);
319321
PyAPI_FUNC(const char *) _PyUnicode_AsUTF8NoNUL(PyObject *);
320322

321323

324+
/* --- _PyUnicodeWriter API ----------------------------------------------- */
325+
326+
struct _PyUnicodeWriter {
327+
PyObject *buffer;
328+
void *data;
329+
int kind;
330+
Py_UCS4 maxchar;
331+
Py_ssize_t size;
332+
Py_ssize_t pos;
333+
334+
/* minimum number of allocated characters (default: 0) */
335+
Py_ssize_t min_length;
336+
337+
/* minimum character (default: 127, ASCII) */
338+
Py_UCS4 min_char;
339+
340+
/* If non-zero, overallocate the buffer (default: 0). */
341+
unsigned char overallocate;
342+
343+
/* If readonly is 1, buffer is a shared string (cannot be modified)
344+
and size is set to 0. */
345+
unsigned char readonly;
346+
};
347+
348+
// Initialize a Unicode writer.
349+
//
350+
// By default, the minimum buffer size is 0 character and overallocation is
351+
// disabled. Set min_length, min_char and overallocate attributes to control
352+
// the allocation of the buffer.
353+
PyAPI_FUNC(void)
354+
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
355+
356+
/* Prepare the buffer to write 'length' characters
357+
with the specified maximum character.
358+
359+
Return 0 on success, raise an exception and return -1 on error. */
360+
#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
361+
(((MAXCHAR) <= (WRITER)->maxchar \
362+
&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
363+
? 0 \
364+
: (((LENGTH) == 0) \
365+
? 0 \
366+
: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
367+
368+
/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
369+
instead. */
370+
PyAPI_FUNC(int)
371+
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
372+
Py_ssize_t length, Py_UCS4 maxchar);
373+
374+
/* Prepare the buffer to have at least the kind KIND.
375+
For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
376+
support characters in range U+000-U+FFFF.
377+
378+
Return 0 on success, raise an exception and return -1 on error. */
379+
#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
380+
((KIND) <= (WRITER)->kind \
381+
? 0 \
382+
: _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
383+
384+
/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
385+
macro instead. */
386+
PyAPI_FUNC(int)
387+
_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
388+
int kind);
389+
390+
/* Append a Unicode character.
391+
Return 0 on success, raise an exception and return -1 on error. */
392+
PyAPI_FUNC(int)
393+
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
394+
Py_UCS4 ch
395+
);
396+
397+
/* Append a Unicode string.
398+
Return 0 on success, raise an exception and return -1 on error. */
399+
PyAPI_FUNC(int)
400+
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
401+
PyObject *str /* Unicode string */
402+
);
403+
404+
/* Append a substring of a Unicode string.
405+
Return 0 on success, raise an exception and return -1 on error. */
406+
PyAPI_FUNC(int)
407+
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
408+
PyObject *str, /* Unicode string */
409+
Py_ssize_t start,
410+
Py_ssize_t end
411+
);
412+
413+
/* Append an ASCII-encoded byte string.
414+
Return 0 on success, raise an exception and return -1 on error. */
415+
PyAPI_FUNC(int)
416+
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
417+
const char *str, /* ASCII-encoded byte string */
418+
Py_ssize_t len /* number of bytes, or -1 if unknown */
419+
);
420+
421+
/* Append a latin1-encoded byte string.
422+
Return 0 on success, raise an exception and return -1 on error. */
423+
PyAPI_FUNC(int)
424+
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
425+
const char *str, /* latin1-encoded byte string */
426+
Py_ssize_t len /* length in bytes */
427+
);
428+
429+
/* Get the value of the writer as a Unicode string. Clear the
430+
buffer of the writer. Raise an exception and return NULL
431+
on error. */
432+
PyAPI_FUNC(PyObject *)
433+
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
434+
435+
/* Deallocate memory of a writer (clear its internal buffer). */
436+
PyAPI_FUNC(void)
437+
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
438+
439+
322440
#ifdef __cplusplus
323441
}
324442
#endif

0 commit comments

Comments
 (0)