Skip to content

Commit f3e7ea5

Browse files
authored
bpo-39500: Document PyUnicode_IsIdentifier() function (GH-18397)
PyUnicode_IsIdentifier() does not call Py_FatalError() anymore if the string is not ready.
1 parent 1ea45ae commit f3e7ea5

File tree

4 files changed

+47
-15
lines changed

4 files changed

+47
-15
lines changed

Doc/c-api/unicode.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,16 @@ access internal read-only data of Unicode objects:
240240
:c:func:`PyUnicode_nBYTE_DATA` family of macros.
241241
242242
243+
.. c:function:: int PyUnicode_IsIdentifier(PyObject *o)
244+
245+
Return ``1`` if the string is a valid identifier according to the language
246+
definition, section :ref:`identifiers`. Return ``0`` otherwise.
247+
248+
.. versionchanged:: 3.9
249+
The function does not call :c:func:`Py_FatalError` anymore if the string
250+
is not ready.
251+
252+
243253
Unicode Character Properties
244254
""""""""""""""""""""""""""""
245255
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:c:func:`PyUnicode_IsIdentifier` does not call :c:func:`Py_FatalError`
2+
anymore if the string is not ready.

Objects/unicodeobject.c

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12198,22 +12198,33 @@ unicode_isnumeric_impl(PyObject *self)
1219812198
int
1219912199
PyUnicode_IsIdentifier(PyObject *self)
1220012200
{
12201-
int kind;
12202-
void *data;
1220312201
Py_ssize_t i;
12204-
Py_UCS4 first;
12202+
int ready = PyUnicode_IS_READY(self);
1220512203

12206-
if (PyUnicode_READY(self) == -1) {
12207-
Py_FatalError("identifier not ready");
12204+
Py_ssize_t len = ready ? PyUnicode_GET_LENGTH(self) : PyUnicode_GET_SIZE(self);
12205+
if (len == 0) {
12206+
/* an empty string is not a valid identifier */
1220812207
return 0;
1220912208
}
1221012209

12211-
/* Special case for empty strings */
12212-
if (PyUnicode_GET_LENGTH(self) == 0)
12213-
return 0;
12214-
kind = PyUnicode_KIND(self);
12215-
data = PyUnicode_DATA(self);
12210+
int kind;
12211+
void *data;
12212+
wchar_t *wstr;
12213+
if (ready) {
12214+
kind = PyUnicode_KIND(self);
12215+
data = PyUnicode_DATA(self);
12216+
}
12217+
else {
12218+
wstr = _PyUnicode_WSTR(self);
12219+
}
1221612220

12221+
Py_UCS4 ch;
12222+
if (ready) {
12223+
ch = PyUnicode_READ(kind, data, 0);
12224+
}
12225+
else {
12226+
ch = wstr[0];
12227+
}
1221712228
/* PEP 3131 says that the first character must be in
1221812229
XID_Start and subsequent characters in XID_Continue,
1221912230
and for the ASCII range, the 2.x rules apply (i.e
@@ -12222,13 +12233,21 @@ PyUnicode_IsIdentifier(PyObject *self)
1222212233
definition of XID_Start and XID_Continue, it is sufficient
1222312234
to check just for these, except that _ must be allowed
1222412235
as starting an identifier. */
12225-
first = PyUnicode_READ(kind, data, 0);
12226-
if (!_PyUnicode_IsXidStart(first) && first != 0x5F /* LOW LINE */)
12236+
if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
1222712237
return 0;
12238+
}
1222812239

12229-
for (i = 1; i < PyUnicode_GET_LENGTH(self); i++)
12230-
if (!_PyUnicode_IsXidContinue(PyUnicode_READ(kind, data, i)))
12240+
for (i = 1; i < len; i++) {
12241+
if (ready) {
12242+
ch = PyUnicode_READ(kind, data, i);
12243+
}
12244+
else {
12245+
ch = wstr[i];
12246+
}
12247+
if (!_PyUnicode_IsXidContinue(ch)) {
1223112248
return 0;
12249+
}
12250+
}
1223212251
return 1;
1223312252
}
1223412253

Parser/tokenizer.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1079,8 +1079,9 @@ verify_identifier(struct tok_state *tok)
10791079
}
10801080
result = PyUnicode_IsIdentifier(s);
10811081
Py_DECREF(s);
1082-
if (result == 0)
1082+
if (result == 0) {
10831083
tok->done = E_IDENTIFIER;
1084+
}
10841085
return result;
10851086
}
10861087

0 commit comments

Comments
 (0)