Skip to content

Commit 3a4f667

Browse files
gnpricebenjaminp
authored andcommitted
Cut disused recode_encoding logic in _PyBytes_DecodeEscape. (GH-16013)
All call sites pass NULL for `recode_encoding`, so this path is completely untested. That's been true since before Python 3.0. It adds significant complexity to this logic, so it's best to take it out. All call sites now have a literal NULL, and that's been true since commit 768921c eliminated a conditional (`foo ? bar : NULL`) at the call site in Python/ast.c where we're parsing a bytes literal. But even before then, that condition `foo` had been a constant since unadorned string literals started meaning Unicode, in commit 572dbf8 aka v3.0a1~1035 . The `unicode` parameter is already unused, so mark it as unused too. The code that acted on it was also taken out before Python 3.0, in commit 8d30cc0 aka v3.0a1~1031 . The function (PyBytes_DecodeEscape) is exposed in the API, but it's never been documented.
1 parent a44f3dc commit 3a4f667

File tree

4 files changed

+8
-63
lines changed

4 files changed

+8
-63
lines changed

Include/bytesobject.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,7 @@ PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t,
7777
#ifndef Py_LIMITED_API
7878
/* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */
7979
PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
80-
const char *, Py_ssize_t,
81-
const char *,
82-
const char **);
80+
const char *, const char **);
8381
#endif
8482

8583
/* Macro, trading safety for speed */

Include/longobject.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ PyAPI_FUNC(int) _PyLong_Size_t_Converter(PyObject *, void *);
7474
#endif
7575

7676
/* Used by Python/mystrtoul.c, _PyBytes_FromHex(),
77-
_PyBytes_DecodeEscapeRecode(), etc. */
77+
_PyBytes_DecodeEscape(), etc. */
7878
#ifndef Py_LIMITED_API
7979
PyAPI_DATA(unsigned char) _PyLong_DigitValue[256];
8080
#endif

Objects/bytesobject.c

Lines changed: 5 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,52 +1077,10 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
10771077
return NULL;
10781078
}
10791079

1080-
/* Unescape a backslash-escaped string. If unicode is non-zero,
1081-
the string is a u-literal. If recode_encoding is non-zero,
1082-
the string is UTF-8 encoded and should be re-encoded in the
1083-
specified encoding. */
1084-
1085-
static char *
1086-
_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1087-
const char *errors, const char *recode_encoding,
1088-
_PyBytesWriter *writer, char *p)
1089-
{
1090-
PyObject *u, *w;
1091-
const char* t;
1092-
1093-
t = *s;
1094-
/* Decode non-ASCII bytes as UTF-8. */
1095-
while (t < end && (*t & 0x80))
1096-
t++;
1097-
u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1098-
if (u == NULL)
1099-
return NULL;
1100-
1101-
/* Recode them in target encoding. */
1102-
w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1103-
Py_DECREF(u);
1104-
if (w == NULL)
1105-
return NULL;
1106-
assert(PyBytes_Check(w));
1107-
1108-
/* Append bytes to output buffer. */
1109-
writer->min_size--; /* subtract 1 preallocated byte */
1110-
p = _PyBytesWriter_WriteBytes(writer, p,
1111-
PyBytes_AS_STRING(w),
1112-
PyBytes_GET_SIZE(w));
1113-
Py_DECREF(w);
1114-
if (p == NULL)
1115-
return NULL;
1116-
1117-
*s = t;
1118-
return p;
1119-
}
1120-
1080+
/* Unescape a backslash-escaped string. */
11211081
PyObject *_PyBytes_DecodeEscape(const char *s,
11221082
Py_ssize_t len,
11231083
const char *errors,
1124-
Py_ssize_t unicode,
1125-
const char *recode_encoding,
11261084
const char **first_invalid_escape)
11271085
{
11281086
int c;
@@ -1142,17 +1100,7 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
11421100
end = s + len;
11431101
while (s < end) {
11441102
if (*s != '\\') {
1145-
if (!(recode_encoding && (*s & 0x80))) {
1146-
*p++ = *s++;
1147-
}
1148-
else {
1149-
/* non-ASCII character and need to recode */
1150-
p = _PyBytes_DecodeEscapeRecode(&s, end,
1151-
errors, recode_encoding,
1152-
&writer, p);
1153-
if (p == NULL)
1154-
goto failed;
1155-
}
1103+
*p++ = *s++;
11561104
continue;
11571105
}
11581106

@@ -1241,12 +1189,11 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
12411189
PyObject *PyBytes_DecodeEscape(const char *s,
12421190
Py_ssize_t len,
12431191
const char *errors,
1244-
Py_ssize_t unicode,
1245-
const char *recode_encoding)
1192+
Py_ssize_t Py_UNUSED(unicode),
1193+
const char *Py_UNUSED(recode_encoding))
12461194
{
12471195
const char* first_invalid_escape;
1248-
PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1249-
recode_encoding,
1196+
PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
12501197
&first_invalid_escape);
12511198
if (result == NULL)
12521199
return NULL;

Python/ast.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4766,7 +4766,7 @@ decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
47664766
size_t len)
47674767
{
47684768
const char *first_invalid_escape;
4769-
PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
4769+
PyObject *result = _PyBytes_DecodeEscape(s, len, NULL,
47704770
&first_invalid_escape);
47714771
if (result == NULL)
47724772
return NULL;

0 commit comments

Comments
 (0)