Skip to content

gh-128013: Convert unicodeobject.c macros to functions #128061

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 18, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 78 additions & 45 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,47 +112,80 @@ NOTE: In the interpreter's initialization phase, some globals are currently
# define _PyUnicode_CHECK(op) PyUnicode_Check(op)
#endif

#define _PyUnicode_UTF8(op) \
(_PyCompactUnicodeObject_CAST(op)->utf8)
#define PyUnicode_UTF8(op) \
(assert(_PyUnicode_CHECK(op)), \
PyUnicode_IS_COMPACT_ASCII(op) ? \
((char*)(_PyASCIIObject_CAST(op) + 1)) : \
_PyUnicode_UTF8(op))
#define _PyUnicode_UTF8_LENGTH(op) \
(_PyCompactUnicodeObject_CAST(op)->utf8_length)
#define PyUnicode_UTF8_LENGTH(op) \
(assert(_PyUnicode_CHECK(op)), \
PyUnicode_IS_COMPACT_ASCII(op) ? \
_PyASCIIObject_CAST(op)->length : \
_PyUnicode_UTF8_LENGTH(op))
static inline char* _PyUnicode_UTF8(PyObject *op)
{
return (_PyCompactUnicodeObject_CAST(op)->utf8);
}

static inline char* PyUnicode_UTF8(PyObject *op)
{
assert(_PyUnicode_CHECK(op));
if (PyUnicode_IS_COMPACT_ASCII(op)) {
return ((char*)(_PyASCIIObject_CAST(op) + 1));
}
else {
return _PyUnicode_UTF8(op);
}
}

static inline void PyUnicode_SET_UTF8(PyObject *op, char *utf8)
{
_PyCompactUnicodeObject_CAST(op)->utf8 = utf8;
}

static inline Py_ssize_t PyUnicode_UTF8_LENGTH(PyObject *op)
{
assert(_PyUnicode_CHECK(op));
if (PyUnicode_IS_COMPACT_ASCII(op)) {
return _PyASCIIObject_CAST(op)->length;
}
else {
return _PyCompactUnicodeObject_CAST(op)->utf8_length;
}
}

static inline void PyUnicode_SET_UTF8_LENGTH(PyObject *op, Py_ssize_t length)
{
_PyCompactUnicodeObject_CAST(op)->utf8_length = length;
}

#define _PyUnicode_LENGTH(op) \
(_PyASCIIObject_CAST(op)->length)
#define _PyUnicode_STATE(op) \
(_PyASCIIObject_CAST(op)->state)
#define _PyUnicode_HASH(op) \
(_PyASCIIObject_CAST(op)->hash)
#define _PyUnicode_KIND(op) \
(assert(_PyUnicode_CHECK(op)), \
_PyASCIIObject_CAST(op)->state.kind)
#define _PyUnicode_GET_LENGTH(op) \
(assert(_PyUnicode_CHECK(op)), \
_PyASCIIObject_CAST(op)->length)

static inline Py_hash_t PyUnicode_HASH(PyObject *op)
{
assert(_PyUnicode_CHECK(op));
return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash);
}

static inline void PyUnicode_SET_HASH(PyObject *op, Py_hash_t hash)
{
FT_ATOMIC_STORE_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash, hash);
}

#define _PyUnicode_DATA_ANY(op) \
(_PyUnicodeObject_CAST(op)->data.any)

#define _PyUnicode_SHARE_UTF8(op) \
(assert(_PyUnicode_CHECK(op)), \
assert(!PyUnicode_IS_COMPACT_ASCII(op)), \
(_PyUnicode_UTF8(op) == PyUnicode_DATA(op)))
static inline int _PyUnicode_SHARE_UTF8(PyObject *op)
{
assert(_PyUnicode_CHECK(op));
assert(!PyUnicode_IS_COMPACT_ASCII(op));
return (_PyUnicode_UTF8(op) == PyUnicode_DATA(op));
}

/* true if the Unicode object has an allocated UTF-8 memory block
(not shared with other data) */
#define _PyUnicode_HAS_UTF8_MEMORY(op) \
((!PyUnicode_IS_COMPACT_ASCII(op) \
&& _PyUnicode_UTF8(op) \
&& _PyUnicode_UTF8(op) != PyUnicode_DATA(op)))
static inline int _PyUnicode_HAS_UTF8_MEMORY(PyObject *op)
{
return (!PyUnicode_IS_COMPACT_ASCII(op)
&& _PyUnicode_UTF8(op) != NULL
&& _PyUnicode_UTF8(op) != PyUnicode_DATA(op));
}


/* Generic helper macro to convert characters of different types.
from_type and to_type have to be valid type names, begin and end
Expand Down Expand Up @@ -1123,8 +1156,8 @@ resize_compact(PyObject *unicode, Py_ssize_t length)

if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
PyMem_Free(_PyUnicode_UTF8(unicode));
_PyUnicode_UTF8(unicode) = NULL;
_PyUnicode_UTF8_LENGTH(unicode) = 0;
PyUnicode_SET_UTF8(unicode, NULL);
PyUnicode_SET_UTF8_LENGTH(unicode, 0);
}
#ifdef Py_TRACE_REFS
_Py_ForgetReference(unicode);
Expand Down Expand Up @@ -1177,8 +1210,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode))
{
PyMem_Free(_PyUnicode_UTF8(unicode));
_PyUnicode_UTF8(unicode) = NULL;
_PyUnicode_UTF8_LENGTH(unicode) = 0;
PyUnicode_SET_UTF8(unicode, NULL);
PyUnicode_SET_UTF8_LENGTH(unicode, 0);
}

data = (PyObject *)PyObject_Realloc(data, new_size);
Expand All @@ -1188,8 +1221,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
}
_PyUnicode_DATA_ANY(unicode) = data;
if (share_utf8) {
_PyUnicode_UTF8(unicode) = data;
_PyUnicode_UTF8_LENGTH(unicode) = length;
PyUnicode_SET_UTF8(unicode, data);
PyUnicode_SET_UTF8_LENGTH(unicode, length);
}
_PyUnicode_LENGTH(unicode) = length;
PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0);
Expand Down Expand Up @@ -1769,7 +1802,7 @@ unicode_modifiable(PyObject *unicode)
assert(_PyUnicode_CHECK(unicode));
if (Py_REFCNT(unicode) != 1)
return 0;
if (FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(unicode)) != -1)
if (PyUnicode_HASH(unicode) != -1)
return 0;
if (PyUnicode_CHECK_INTERNED(unicode))
return 0;
Expand Down Expand Up @@ -5862,8 +5895,8 @@ unicode_fill_utf8(PyObject *unicode)
PyErr_NoMemory();
return -1;
}
_PyUnicode_UTF8(unicode) = cache;
_PyUnicode_UTF8_LENGTH(unicode) = len;
PyUnicode_SET_UTF8(unicode, cache);
PyUnicode_SET_UTF8_LENGTH(unicode, len);
memcpy(cache, start, len);
cache[len] = '\0';
_PyBytesWriter_Dealloc(&writer);
Expand Down Expand Up @@ -11434,9 +11467,9 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
return 0;
}

Py_hash_t right_hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(right_uni));
Py_hash_t right_hash = PyUnicode_HASH(right_uni);
assert(right_hash != -1);
Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(left));
Py_hash_t hash = PyUnicode_HASH(left);
if (hash != -1 && hash != right_hash) {
return 0;
}
Expand Down Expand Up @@ -11916,14 +11949,14 @@ unicode_hash(PyObject *self)
#ifdef Py_DEBUG
assert(_Py_HashSecret_Initialized);
#endif
Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(self));
Py_hash_t hash = PyUnicode_HASH(self);
if (hash != -1) {
return hash;
}
x = Py_HashBuffer(PyUnicode_DATA(self),
PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));

FT_ATOMIC_STORE_SSIZE_RELAXED(_PyUnicode_HASH(self), x);
PyUnicode_SET_HASH(self, x);
return x;
}

Expand Down Expand Up @@ -15427,8 +15460,8 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
_PyUnicode_STATE(self).compact = 0;
_PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii;
_PyUnicode_STATE(self).statically_allocated = 0;
_PyUnicode_UTF8_LENGTH(self) = 0;
_PyUnicode_UTF8(self) = NULL;
PyUnicode_SET_UTF8_LENGTH(self, 0);
PyUnicode_SET_UTF8(self, NULL);
_PyUnicode_DATA_ANY(self) = NULL;

share_utf8 = 0;
Expand Down Expand Up @@ -15458,8 +15491,8 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)

_PyUnicode_DATA_ANY(self) = data;
if (share_utf8) {
_PyUnicode_UTF8_LENGTH(self) = length;
_PyUnicode_UTF8(self) = data;
PyUnicode_SET_UTF8_LENGTH(self, length);
PyUnicode_SET_UTF8(self, data);
}

memcpy(data, PyUnicode_DATA(unicode), kind * (length + 1));
Expand Down
Loading