Skip to content

Commit 76e6eac

Browse files
committed
WIP: Add PyUnicodeWriter API
TODO: update API doc.
1 parent 2353a3d commit 76e6eac

File tree

3 files changed

+216
-0
lines changed

3 files changed

+216
-0
lines changed

docs/changelog.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,18 @@
11
Changelog
22
=========
33

4+
* 2024-05-23: Add functions:
5+
6+
* ``PyUnicodeWriter_Create()``
7+
* ``PyUnicodeWriter_Free()``
8+
* ``PyUnicodeWriter_Finish()``
9+
* ``PyUnicodeWriter_SetOverallocate()``
10+
* ``PyUnicodeWriter_WriteChar()``
11+
* ``PyUnicodeWriter_WriteUTF8()``
12+
* ``PyUnicodeWriter_WriteString()``
13+
* ``PyUnicodeWriter_WriteSubstring()``
14+
* ``PyUnicodeWriter_Format()``
15+
416
* 2024-04-02: Add ``PyDict_SetDefaultRef()`` function.
517
* 2024-03-29: Add ``PyList_GetItemRef()`` function.
618
* 2024-03-21: Add functions:

pythoncapi_compat.h

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,6 +1338,112 @@ PyDict_SetDefaultRef(PyObject *d, PyObject *key, PyObject *default_value,
13381338
}
13391339
#endif
13401340

1341+
#if PY_VERSION_HEX < 0x030E0000
1342+
typedef struct PyUnicodeWriter PyUnicodeWriter;
1343+
1344+
static inline PyUnicodeWriter* PyUnicodeWriter_Create(void)
1345+
{
1346+
const size_t size = sizeof(_PyUnicodeWriter);
1347+
PyUnicodeWriter *writer = (PyUnicodeWriter *)PyMem_Malloc(size);
1348+
if (writer == _Py_NULL) {
1349+
PyErr_NoMemory();
1350+
return _Py_NULL;
1351+
}
1352+
_PyUnicodeWriter_Init((_PyUnicodeWriter*)writer);
1353+
((_PyUnicodeWriter*)writer)->overallocate = 1;
1354+
return writer;
1355+
}
1356+
1357+
static inline void PyUnicodeWriter_Free(PyUnicodeWriter *writer)
1358+
{
1359+
_PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer);
1360+
PyMem_Free(writer);
1361+
}
1362+
1363+
static inline PyObject* PyUnicodeWriter_Finish(PyUnicodeWriter *writer)
1364+
{
1365+
PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer);
1366+
PyMem_Free(writer);
1367+
return str;
1368+
}
1369+
1370+
static inline void
1371+
PyUnicodeWriter_SetOverallocate(PyUnicodeWriter *writer, int overallocate)
1372+
{
1373+
((_PyUnicodeWriter*)writer)->overallocate = (unsigned char)overallocate;
1374+
}
1375+
1376+
static inline int
1377+
PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch)
1378+
{
1379+
return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch);
1380+
}
1381+
1382+
static inline int
1383+
PyUnicodeWriter_WriteString(PyUnicodeWriter *writer, PyObject *str)
1384+
{
1385+
if (!PyUnicode_Check(str)) {
1386+
PyErr_Format(PyExc_TypeError,
1387+
"expect str, not %s", Py_TYPE(str)->tp_name);
1388+
return -1;
1389+
}
1390+
return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
1391+
}
1392+
1393+
static inline int
1394+
PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
1395+
const char *str, Py_ssize_t size)
1396+
{
1397+
if (size < 0) {
1398+
size = (Py_ssize_t)strlen(str);
1399+
}
1400+
PyObject *str_obj = PyUnicode_FromStringAndSize(str, size);
1401+
if (str_obj == _Py_NULL) {
1402+
return -1;
1403+
}
1404+
1405+
int res = PyUnicodeWriter_WriteString(writer, str_obj);
1406+
Py_DECREF(str_obj);
1407+
return res;
1408+
}
1409+
1410+
static inline int
1411+
PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str,
1412+
Py_ssize_t start, Py_ssize_t end)
1413+
{
1414+
if (!PyUnicode_Check(str)) {
1415+
PyErr_Format(PyExc_TypeError, "expect str, not %T", str);
1416+
return -1;
1417+
}
1418+
if (start < 0 || start > end) {
1419+
PyErr_Format(PyExc_ValueError, "invalid start argument");
1420+
return -1;
1421+
}
1422+
if (end > PyUnicode_GET_LENGTH(str)) {
1423+
PyErr_Format(PyExc_ValueError, "invalid end argument");
1424+
return -1;
1425+
}
1426+
1427+
return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str,
1428+
start, end);
1429+
}
1430+
1431+
static inline int
1432+
PyUnicodeWriter_Format(PyUnicodeWriter *writer, const char *format, ...)
1433+
{
1434+
va_list vargs;
1435+
va_start(vargs, format);
1436+
PyObject *str = PyUnicode_FromFormatV(format, vargs);
1437+
va_end(vargs);
1438+
if (str == _Py_NULL) {
1439+
return -1;
1440+
}
1441+
1442+
int res = PyUnicodeWriter_WriteString(writer, str);
1443+
Py_DECREF(str);
1444+
return res;
1445+
}
1446+
#endif // PY_VERSION_HEX < 0x030E0000
13411447

13421448
#ifdef __cplusplus
13431449
}

tests/test_pythoncapi_compat_cext.c

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1724,6 +1724,102 @@ test_get_constant(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
17241724
}
17251725

17261726

1727+
static PyObject *
1728+
test_unicodewriter(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args))
1729+
{
1730+
PyUnicodeWriter *writer = PyUnicodeWriter_Create();
1731+
if (writer == NULL) {
1732+
return NULL;
1733+
}
1734+
int ret;
1735+
1736+
// test PyUnicodeWriter_SetOverallocate()
1737+
PyUnicodeWriter_SetOverallocate(writer, 1);
1738+
1739+
// test PyUnicodeWriter_WriteString()
1740+
PyObject *str = PyUnicode_FromString("var");
1741+
if (str == NULL) {
1742+
goto error;
1743+
}
1744+
ret = PyUnicodeWriter_WriteString(writer, str);
1745+
Py_CLEAR(str);
1746+
if (ret < 0) {
1747+
goto error;
1748+
}
1749+
1750+
// test PyUnicodeWriter_WriteChar()
1751+
if (PyUnicodeWriter_WriteChar(writer, '=') < 0) {
1752+
goto error;
1753+
}
1754+
1755+
// test PyUnicodeWriter_WriteSubstring()
1756+
str = PyUnicode_FromString("[long]");
1757+
if (str == NULL) {
1758+
goto error;
1759+
}
1760+
ret = PyUnicodeWriter_WriteSubstring(writer, str, 1, 5);
1761+
Py_CLEAR(str);
1762+
if (ret < 0) {
1763+
goto error;
1764+
}
1765+
1766+
// test PyUnicodeWriter_WriteUTF8()
1767+
if (PyUnicodeWriter_WriteUTF8(writer, " valu\xC3\xA9", -1) < 0) {
1768+
goto error;
1769+
}
1770+
1771+
{
1772+
PyObject *result = PyUnicodeWriter_Finish(writer);
1773+
if (result == NULL) {
1774+
return NULL;
1775+
}
1776+
assert(PyUnicode_EqualToUTF8(result, "var=long valu\xC3\xA9"));
1777+
Py_DECREF(result);
1778+
}
1779+
1780+
Py_RETURN_NONE;
1781+
1782+
error:
1783+
PyUnicodeWriter_Free(writer);
1784+
return NULL;
1785+
}
1786+
1787+
1788+
static PyObject *
1789+
test_unicodewriter_format(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args))
1790+
{
1791+
PyUnicodeWriter *writer = PyUnicodeWriter_Create();
1792+
if (writer == NULL) {
1793+
return NULL;
1794+
}
1795+
1796+
// test PyUnicodeWriter_Format()
1797+
if (PyUnicodeWriter_Format(writer, "%s %i", "Hello", 123) < 0) {
1798+
goto error;
1799+
}
1800+
1801+
// test PyUnicodeWriter_WriteChar()
1802+
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
1803+
goto error;
1804+
}
1805+
1806+
{
1807+
PyObject *result = PyUnicodeWriter_Finish(writer);
1808+
if (result == NULL) {
1809+
return NULL;
1810+
}
1811+
assert(PyUnicode_EqualToUTF8(result, "Hello 123."));
1812+
Py_DECREF(result);
1813+
}
1814+
1815+
Py_RETURN_NONE;
1816+
1817+
error:
1818+
PyUnicodeWriter_Free(writer);
1819+
return NULL;
1820+
}
1821+
1822+
17271823
static struct PyMethodDef methods[] = {
17281824
{"test_object", test_object, METH_NOARGS, _Py_NULL},
17291825
{"test_py_is", test_py_is, METH_NOARGS, _Py_NULL},
@@ -1762,6 +1858,8 @@ static struct PyMethodDef methods[] = {
17621858
{"test_time", test_time, METH_NOARGS, _Py_NULL},
17631859
#endif
17641860
{"test_get_constant", test_get_constant, METH_NOARGS, _Py_NULL},
1861+
{"test_unicodewriter", test_unicodewriter, METH_NOARGS, _Py_NULL},
1862+
{"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS, _Py_NULL},
17651863
{_Py_NULL, _Py_NULL, 0, _Py_NULL}
17661864
};
17671865

0 commit comments

Comments
 (0)