Skip to content

bpo-40130: _PyUnicode_AsKind() should not be exported. #19265

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -726,12 +726,6 @@ PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
Py_ssize_t start,
Py_ssize_t end);

/* --- wchar_t support for platforms which support it --------------------- */

#ifdef HAVE_WCHAR_H
PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
#endif

/* --- Manage the default encoding ---------------------------------------- */

/* Returns a pointer to the default encoding (UTF-8) of the
Expand Down
95 changes: 46 additions & 49 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2043,9 +2043,9 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
void *data = PyUnicode_DATA(unicode);
const char *end = str + len;

assert(index + len <= PyUnicode_GET_LENGTH(unicode));
switch (kind) {
case PyUnicode_1BYTE_KIND: {
assert(index + len <= PyUnicode_GET_LENGTH(unicode));
#ifdef Py_DEBUG
if (PyUnicode_IS_ASCII(unicode)) {
Py_UCS4 maxchar = ucs1lib_find_max_char(
Expand All @@ -2060,25 +2060,25 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
case PyUnicode_2BYTE_KIND: {
Py_UCS2 *start = (Py_UCS2 *)data + index;
Py_UCS2 *ucs2 = start;
assert(index <= PyUnicode_GET_LENGTH(unicode));

for (; str < end; ++ucs2, ++str)
*ucs2 = (Py_UCS2)*str;

assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode));
break;
}
default: {
case PyUnicode_4BYTE_KIND: {
Py_UCS4 *start = (Py_UCS4 *)data + index;
Py_UCS4 *ucs4 = start;
assert(kind == PyUnicode_4BYTE_KIND);
assert(index <= PyUnicode_GET_LENGTH(unicode));

for (; str < end; ++ucs4, ++str)
*ucs4 = (Py_UCS4)*str;

assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode));
break;
}
default:
Py_UNREACHABLE();
}
}

Expand Down Expand Up @@ -2458,13 +2458,15 @@ unicode_adjust_maxchar(PyObject **p_unicode)
if (max_char >= 256)
return;
}
else {
else if (kind == PyUnicode_4BYTE_KIND) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe a switch/case would be more appropriate.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe. I left a chain of ifs because I am sure that in this case the compiler would generate the optimal code for PyUnicode_1BYTE_KIND, and I am not so sure about this in the case of switch/case. It may be a matter of other issue, I do not want to introduce regression.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was just a remark. You can leave the code as it is, I already approved your PR.

const Py_UCS4 *u = PyUnicode_4BYTE_DATA(unicode);
assert(kind == PyUnicode_4BYTE_KIND);
max_char = ucs4lib_find_max_char(u, u + len);
if (max_char >= 0x10000)
return;
}
else
Py_UNREACHABLE();

copy = PyUnicode_New(len, max_char);
if (copy != NULL)
_PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, len);
Expand Down Expand Up @@ -2501,22 +2503,12 @@ _PyUnicode_Copy(PyObject *unicode)
/* Widen Unicode objects to larger buffers. Don't write terminating null
character. Return NULL on error. */

void*
_PyUnicode_AsKind(PyObject *s, unsigned int kind)
static void*
unicode_askind(unsigned int skind, void const *data, Py_ssize_t len, unsigned int kind)
{
Py_ssize_t len;
void *result;
unsigned int skind;

if (PyUnicode_READY(s) == -1)
return NULL;

len = PyUnicode_GET_LENGTH(s);
skind = PyUnicode_KIND(s);
if (skind >= kind) {
PyErr_SetString(PyExc_SystemError, "invalid widening attempt");
return NULL;
}
assert(skind < kind);
switch (kind) {
case PyUnicode_2BYTE_KIND:
result = PyMem_New(Py_UCS2, len);
Expand All @@ -2525,8 +2517,8 @@ _PyUnicode_AsKind(PyObject *s, unsigned int kind)
assert(skind == PyUnicode_1BYTE_KIND);
_PyUnicode_CONVERT_BYTES(
Py_UCS1, Py_UCS2,
PyUnicode_1BYTE_DATA(s),
PyUnicode_1BYTE_DATA(s) + len,
(const Py_UCS1 *)data,
((const Py_UCS1 *)data) + len,
result);
return result;
case PyUnicode_4BYTE_KIND:
Expand All @@ -2536,24 +2528,23 @@ _PyUnicode_AsKind(PyObject *s, unsigned int kind)
if (skind == PyUnicode_2BYTE_KIND) {
_PyUnicode_CONVERT_BYTES(
Py_UCS2, Py_UCS4,
PyUnicode_2BYTE_DATA(s),
PyUnicode_2BYTE_DATA(s) + len,
(const Py_UCS2 *)data,
((const Py_UCS2 *)data) + len,
result);
}
else {
assert(skind == PyUnicode_1BYTE_KIND);
_PyUnicode_CONVERT_BYTES(
Py_UCS1, Py_UCS4,
PyUnicode_1BYTE_DATA(s),
PyUnicode_1BYTE_DATA(s) + len,
(const Py_UCS1 *)data,
((const Py_UCS1 *)data) + len,
result);
}
return result;
default:
break;
Py_UNREACHABLE();
return NULL;
}
PyErr_SetString(PyExc_SystemError, "invalid kind");
return NULL;
}

static Py_UCS4*
Expand Down Expand Up @@ -9420,7 +9411,7 @@ any_find_slice(PyObject* s1, PyObject* s2,
}

if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(s2, kind1);
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return -2;
}
Expand Down Expand Up @@ -9642,7 +9633,7 @@ PyUnicode_Count(PyObject *str,
buf1 = PyUnicode_DATA(str);
buf2 = PyUnicode_DATA(substr);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substr, kind1);
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
goto onError;
}
Expand Down Expand Up @@ -10415,7 +10406,7 @@ split(PyObject *self,
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substring, kind1);
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
Expand Down Expand Up @@ -10506,7 +10497,7 @@ rsplit(PyObject *self,
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substring, kind1);
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
Expand Down Expand Up @@ -10665,7 +10656,7 @@ replace(PyObject *self, PyObject *str1,

if (kind1 < rkind) {
/* widen substring */
buf1 = _PyUnicode_AsKind(str1, rkind);
buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error;
release1 = 1;
}
Expand All @@ -10674,19 +10665,22 @@ replace(PyObject *self, PyObject *str1,
goto nothing;
if (rkind > kind2) {
/* widen replacement */
buf2 = _PyUnicode_AsKind(str2, rkind);
buf2 = unicode_askind(kind2, buf2, len2, rkind);
if (!buf2) goto error;
release2 = 1;
}
else if (rkind < kind2) {
/* widen self and buf1 */
rkind = kind2;
if (release1) PyMem_Free(buf1);
release1 = 0;
sbuf = _PyUnicode_AsKind(self, rkind);
if (release1) {
PyMem_Free(buf1);
buf1 = PyUnicode_DATA(str1);
release1 = 0;
}
sbuf = unicode_askind(skind, sbuf, slen, rkind);
if (!sbuf) goto error;
srelease = 1;
buf1 = _PyUnicode_AsKind(str1, rkind);
buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error;
release1 = 1;
}
Expand Down Expand Up @@ -10724,7 +10718,7 @@ replace(PyObject *self, PyObject *str1,

if (kind1 < rkind) {
/* widen substring */
buf1 = _PyUnicode_AsKind(str1, rkind);
buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error;
release1 = 1;
}
Expand All @@ -10733,19 +10727,22 @@ replace(PyObject *self, PyObject *str1,
goto nothing;
if (kind2 < rkind) {
/* widen replacement */
buf2 = _PyUnicode_AsKind(str2, rkind);
buf2 = unicode_askind(kind2, buf2, len2, rkind);
if (!buf2) goto error;
release2 = 1;
}
else if (kind2 > rkind) {
/* widen self and buf1 */
rkind = kind2;
sbuf = _PyUnicode_AsKind(self, rkind);
sbuf = unicode_askind(skind, sbuf, slen, rkind);
if (!sbuf) goto error;
srelease = 1;
if (release1) PyMem_Free(buf1);
release1 = 0;
buf1 = _PyUnicode_AsKind(str1, rkind);
if (release1) {
PyMem_Free(buf1);
buf1 = PyUnicode_DATA(str1);
release1 = 0;
}
buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error;
release1 = 1;
}
Expand Down Expand Up @@ -11361,7 +11358,7 @@ PyUnicode_Contains(PyObject *str, PyObject *substr)
return result;
}
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substr, kind1);
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return -1;
}
Expand Down Expand Up @@ -11578,7 +11575,7 @@ unicode_count(PyObject *self, PyObject *args)
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substring, kind1);
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
Expand Down Expand Up @@ -13081,7 +13078,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(sep_obj, kind1);
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
Expand Down Expand Up @@ -13138,7 +13135,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(sep_obj, kind1);
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
Expand Down