Skip to content

Commit fd84166

Browse files
authored
Revert "bpo-39087: Add _PyUnicode_GetUTF8Buffer() (GH-17659)"
This reverts commit c7ad974.
1 parent c7ad974 commit fd84166

File tree

5 files changed

+6
-284
lines changed

5 files changed

+6
-284
lines changed

Include/cpython/unicodeobject.h

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -734,19 +734,6 @@ PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
734734

735735
/* --- Manage the default encoding ---------------------------------------- */
736736

737-
/* Get a buffer to the UTF-8 encoding of the Unicode object unicode.
738-
Returns -1 on error.
739-
740-
Successful calls must be paired to
741-
calls to PyBuffer_Release.
742-
*/
743-
744-
PyAPI_FUNC(int) _PyUnicode_GetUTF8Buffer(
745-
PyObject *unicode, /* Unicode object */
746-
const char *errors, /* error handling */
747-
Py_buffer *view /* (out) buffer to the UTF-8 encoding */
748-
);
749-
750737
/* Returns a pointer to the default encoding (UTF-8) of the
751738
Unicode object unicode and the size of the encoded representation
752739
in bytes stored in *size.
@@ -759,6 +746,12 @@ PyAPI_FUNC(int) _PyUnicode_GetUTF8Buffer(
759746
760747
_PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
761748
support the previous internal function with the same behaviour.
749+
750+
*** This API is for interpreter INTERNAL USE ONLY and will likely
751+
*** be removed or changed in the future.
752+
753+
*** If you need to access the Unicode object as UTF-8 bytes string,
754+
*** please use PyUnicode_AsUTF8String() instead.
762755
*/
763756

764757
PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize(

Lib/test/test_unicode.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2830,28 +2830,6 @@ def test_asucs4(self):
28302830
self.assertEqual(unicode_asucs4(s, len(s), True), s+'\0')
28312831
self.assertEqual(unicode_asucs4(s, len(s), False), s+'\uffff')
28322832

2833-
# Test _PyUnicode_GetUTF8Buffer()
2834-
@support.cpython_only
2835-
def test_getutf8buffer(self):
2836-
from _testcapi import unicode_getutf8buffer, unicode_test_getutf8buffer
2837-
2838-
# Run tests wrtten in C. Raise an error when test failed.
2839-
unicode_test_getutf8buffer()
2840-
2841-
ascii_ = "foo"
2842-
bmp = '\u0100'
2843-
bmp2 = '\uffff'
2844-
nonbmp = chr(0x10ffff)
2845-
surrogates = 'a\ud800b\udfffc'
2846-
2847-
self.assertEqual(unicode_getutf8buffer(ascii_), b'foo')
2848-
self.assertEqual(unicode_getutf8buffer(bmp), b'\xc4\x80')
2849-
self.assertEqual(unicode_getutf8buffer(bmp2), b'\xef\xbf\xbf')
2850-
self.assertEqual(unicode_getutf8buffer(nonbmp), b'\xf4\x8f\xbf\xbf')
2851-
self.assertRaises(UnicodeEncodeError, unicode_getutf8buffer, surrogates)
2852-
self.assertEqual(unicode_getutf8buffer(surrogates, "surrogatepass"),
2853-
b'a\xed\xa0\x80b\xed\xbf\xbfc')
2854-
28552833
# Test PyUnicode_AsUTF8()
28562834
@support.cpython_only
28572835
def test_asutf8(self):

Misc/NEWS.d/next/C API/2019-12-19-21-19-53.bpo-39087.l4A11-.rst

Lines changed: 0 additions & 2 deletions
This file was deleted.

Modules/_testcapimodule.c

Lines changed: 0 additions & 212 deletions
Original file line numberDiff line numberDiff line change
@@ -1967,216 +1967,6 @@ unicode_asutf8andsize(PyObject *self, PyObject *args)
19671967
return Py_BuildValue("(Nn)", result, utf8_len);
19681968
}
19691969

1970-
static PyObject *
1971-
unicode_getutf8buffer(PyObject *self, PyObject *args)
1972-
{
1973-
PyObject *unicode;
1974-
const char *errors = NULL;
1975-
if(!PyArg_ParseTuple(args, "O|s", &unicode, &errors)) {
1976-
return NULL;
1977-
}
1978-
1979-
Py_buffer buffer;
1980-
if (_PyUnicode_GetUTF8Buffer(unicode, errors, &buffer) < 0) {
1981-
return NULL;
1982-
}
1983-
1984-
assert(buffer.obj != NULL);
1985-
assert(buffer.obj == unicode || PyBytes_CheckExact(buffer.obj));
1986-
1987-
PyObject *result = PyBytes_FromStringAndSize(buffer.buf, buffer.len);
1988-
PyBuffer_Release(&buffer);
1989-
return result;
1990-
}
1991-
1992-
static PyObject *
1993-
unicode_test_getutf8buffer(PyObject *self, PyObject *Py_UNUSED(ignored))
1994-
{
1995-
Py_buffer buf;
1996-
1997-
// Test 1: ASCII string
1998-
PyObject *str = PyUnicode_FromString("hello");
1999-
if (str == NULL) {
2000-
return NULL;
2001-
}
2002-
Py_ssize_t refcnt = Py_REFCNT(str);
2003-
2004-
// _PyUnicode_GetUTF8Buffer() must not fail for ASCII string.
2005-
int ret = _PyUnicode_GetUTF8Buffer(str, NULL, &buf);
2006-
assert(ret == 0);
2007-
2008-
if (buf.obj != str) {
2009-
PyErr_Format(TestError,
2010-
"buf.obj must be equal to str. (%s:%d)",
2011-
__FILE__, __LINE__);
2012-
PyBuffer_Release(&buf);
2013-
Py_DECREF(str);
2014-
return NULL;
2015-
}
2016-
2017-
if (buf.len != PyUnicode_GET_LENGTH(str)) {
2018-
PyErr_Format(TestError,
2019-
"buf.len must be equal to len(str). (%s:%d)",
2020-
__FILE__, __LINE__);
2021-
PyBuffer_Release(&buf);
2022-
Py_DECREF(str);
2023-
return NULL;
2024-
}
2025-
assert(((const char*)buf.buf)[5] == '\0');
2026-
2027-
if ((Py_UCS1*)buf.buf != PyUnicode_1BYTE_DATA(str)) {
2028-
PyErr_Format(TestError,
2029-
"buf.buf must be equal to PyUnicode_1BYTE_DATA(str). (%s:%d)",
2030-
__FILE__, __LINE__);
2031-
PyBuffer_Release(&buf);
2032-
Py_DECREF(str);
2033-
return NULL;
2034-
}
2035-
2036-
if (refcnt + 1 != Py_REFCNT(str)) {
2037-
PyErr_Format(TestError,
2038-
"Py_REFCNT(str); expected %zd, got %zd. (%s:%d)",
2039-
refcnt + 1, Py_REFCNT(str),
2040-
__FILE__, __LINE__);
2041-
PyBuffer_Release(&buf);
2042-
Py_DECREF(str);
2043-
return NULL;
2044-
}
2045-
2046-
PyBuffer_Release(&buf);
2047-
2048-
if (refcnt != Py_REFCNT(str)) {
2049-
PyErr_Format(TestError,
2050-
"Py_REFCNT(str); expected %zd, got %zd. (%s:%d)",
2051-
refcnt, Py_REFCNT(str),
2052-
__FILE__, __LINE__);
2053-
Py_DECREF(str);
2054-
return NULL;
2055-
}
2056-
2057-
Py_DECREF(str);
2058-
2059-
// Test 2: non-ASCII string
2060-
2061-
// "hello" in Japanese. len(str)==5, len(str.encode()) == 15.
2062-
str = PyUnicode_FromString("\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf");
2063-
if (str == NULL) {
2064-
return NULL;
2065-
}
2066-
refcnt = Py_REFCNT(str);
2067-
assert(PyUnicode_GET_LENGTH(str) == 5);
2068-
2069-
if (_PyUnicode_GetUTF8Buffer(str, NULL, &buf) < 0) {
2070-
Py_DECREF(str);
2071-
if (!PyErr_Occurred()) {
2072-
PyErr_Format(TestError,
2073-
"_PyUnicode_GetUTF8Buffer() returned nonzero "
2074-
"without exception set. (%s:%d)",
2075-
__FILE__, __LINE__);
2076-
}
2077-
return NULL;
2078-
}
2079-
2080-
if (!PyBytes_CheckExact(buf.obj)) {
2081-
PyErr_Format(TestError,
2082-
"buf.obj must be a bytes object, got %R (%s:%d)",
2083-
buf.obj, __FILE__, __LINE__);
2084-
PyBuffer_Release(&buf);
2085-
Py_DECREF(str);
2086-
return NULL;
2087-
}
2088-
2089-
if (buf.len != 15) {
2090-
PyErr_Format(TestError,
2091-
"Expected buf.len == 15, actual %zd (%s:%d)",
2092-
buf.len, __FILE__, __LINE__);
2093-
PyBuffer_Release(&buf);
2094-
Py_DECREF(str);
2095-
return NULL;
2096-
}
2097-
assert(((const char*)buf.buf)[15] == '\0');
2098-
2099-
if (refcnt != Py_REFCNT(str)) {
2100-
PyErr_Format(TestError,
2101-
"Py_REFCNT(str) must not be changed. (%s:%d)",
2102-
__FILE__, __LINE__);
2103-
// Do not DECREF here because refcnt is broken.
2104-
return NULL;
2105-
}
2106-
2107-
PyBuffer_Release(&buf);
2108-
2109-
// Test 3: There is a UTF-8 cache
2110-
// Reuse str of the previoss test.
2111-
2112-
const char *cache = PyUnicode_AsUTF8(str);
2113-
if (cache == NULL) {
2114-
return NULL;
2115-
}
2116-
2117-
if (_PyUnicode_GetUTF8Buffer(str, NULL, &buf) < 0) {
2118-
Py_DECREF(str);
2119-
if (!PyErr_Occurred()) {
2120-
PyErr_Format(TestError,
2121-
"_PyUnicode_GetUTF8Buffer() returned nonzero "
2122-
"without exception set. (%s:%d)",
2123-
__FILE__, __LINE__);
2124-
}
2125-
return NULL;
2126-
}
2127-
2128-
if (buf.obj != str) {
2129-
PyErr_Format(TestError,
2130-
"buf.obj must be equal to str. (%s:%d)",
2131-
__FILE__, __LINE__);
2132-
PyBuffer_Release(&buf);
2133-
Py_DECREF(str);
2134-
return NULL;
2135-
}
2136-
2137-
if (buf.buf != cache) {
2138-
PyErr_Format(TestError,
2139-
"buf.buf must be equal to the UTF-8 cache (%s:%d)",
2140-
__FILE__, __LINE__);
2141-
PyBuffer_Release(&buf);
2142-
Py_DECREF(str);
2143-
return NULL;
2144-
}
2145-
2146-
if (buf.len != 15) {
2147-
PyErr_Format(TestError,
2148-
"Expected buf.len == 15, actual %zd (%s:%d)",
2149-
buf.len, __FILE__, __LINE__);
2150-
PyBuffer_Release(&buf);
2151-
Py_DECREF(str);
2152-
return NULL;
2153-
}
2154-
assert(((const char*)buf.buf)[15] == '\0');
2155-
2156-
if (refcnt + 1 != Py_REFCNT(str)) {
2157-
PyErr_Format(TestError,
2158-
"Py_REFCNT(str); expected %zd, got %zd. (%s:%d)",
2159-
refcnt + 1, Py_REFCNT(str),
2160-
__FILE__, __LINE__);
2161-
// Do not DECREF here because refcnt is broken.
2162-
return NULL;
2163-
}
2164-
2165-
PyBuffer_Release(&buf);
2166-
2167-
if (refcnt != Py_REFCNT(str)) {
2168-
PyErr_Format(TestError,
2169-
"Py_REFCNT(str); expected %zd, got %zd. (%s:%d)",
2170-
refcnt, Py_REFCNT(str),
2171-
__FILE__, __LINE__);
2172-
// Do not DECREF here because refcnt is broken.
2173-
return NULL;
2174-
}
2175-
2176-
Py_DECREF(str);
2177-
Py_RETURN_NONE;
2178-
}
2179-
21801970
static PyObject *
21811971
unicode_findchar(PyObject *self, PyObject *args)
21821972
{
@@ -5602,8 +5392,6 @@ static PyMethodDef TestMethods[] = {
56025392
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
56035393
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
56045394
{"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
5605-
{"unicode_getutf8buffer", unicode_getutf8buffer, METH_VARARGS},
5606-
{"unicode_test_getutf8buffer", unicode_test_getutf8buffer, METH_NOARGS},
56075395
{"unicode_findchar", unicode_findchar, METH_VARARGS},
56085396
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
56095397
{"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},

Objects/unicodeobject.c

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3991,41 +3991,6 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
39913991
}
39923992

39933993

3994-
int
3995-
_PyUnicode_GetUTF8Buffer(PyObject *unicode, const char *errors,
3996-
Py_buffer *view)
3997-
{
3998-
if (!PyUnicode_Check(unicode)) {
3999-
PyErr_BadArgument();
4000-
return -1;
4001-
}
4002-
if (PyUnicode_READY(unicode) == -1) {
4003-
return -1;
4004-
}
4005-
4006-
if (PyUnicode_UTF8(unicode) != NULL
4007-
&& Py_TYPE(unicode)->tp_as_buffer == NULL) {
4008-
return PyBuffer_FillInfo(view, unicode,
4009-
PyUnicode_UTF8(unicode),
4010-
PyUnicode_UTF8_LENGTH(unicode),
4011-
/* readonly */ 1, PyBUF_SIMPLE);
4012-
}
4013-
4014-
// Unlike PyUnicode_AsUTF8AndSize(), this function doesn't
4015-
// create a UTF-8 cache for speed and efficiency.
4016-
PyObject *bytes = _PyUnicode_AsUTF8String(unicode, errors);
4017-
if (bytes == NULL) {
4018-
return -1;
4019-
}
4020-
assert(PyBytes_CheckExact(bytes));
4021-
if (PyObject_GetBuffer(bytes, view, PyBUF_SIMPLE) < 0) {
4022-
Py_DECREF(bytes);
4023-
return -1;
4024-
}
4025-
return 0;
4026-
}
4027-
4028-
40293994
static int unicode_fill_utf8(PyObject *unicode);
40303995

40313996
const char *

0 commit comments

Comments
 (0)