Skip to content

Commit 5150795

Browse files
authored
gh-119182: Optimize PyUnicode_FromFormat() (#120796)
Use strchr() and ucs1lib_find_max_char() to optimize the code path formatting sub-strings between '%' formats.
1 parent 85d90b5 commit 5150795

File tree

1 file changed

+26
-26
lines changed

1 file changed

+26
-26
lines changed

Objects/unicodeobject.c

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2875,47 +2875,47 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
28752875
static int
28762876
unicode_from_format(_PyUnicodeWriter *writer, const char *format, va_list vargs)
28772877
{
2878-
writer->min_length += strlen(format) + 100;
2878+
Py_ssize_t len = strlen(format);
2879+
writer->min_length += len + 100;
28792880
writer->overallocate = 1;
28802881

2881-
va_list vargs2;
2882-
const char *f;
2883-
28842882
// Copy varags to be able to pass a reference to a subfunction.
2883+
va_list vargs2;
28852884
va_copy(vargs2, vargs);
28862885

2887-
for (f = format; *f; ) {
2886+
// _PyUnicodeWriter_WriteASCIIString() below requires the format string
2887+
// to be encoded to ASCII.
2888+
int is_ascii = (ucs1lib_find_max_char((Py_UCS1*)format, (Py_UCS1*)format + len) < 128);
2889+
if (!is_ascii) {
2890+
Py_ssize_t i;
2891+
for (i=0; i < len && (unsigned char)format[i] <= 127; i++);
2892+
PyErr_Format(PyExc_ValueError,
2893+
"PyUnicode_FromFormatV() expects an ASCII-encoded format "
2894+
"string, got a non-ASCII byte: 0x%02x",
2895+
(unsigned char)format[i]);
2896+
goto fail;
2897+
}
2898+
2899+
for (const char *f = format; *f; ) {
28882900
if (*f == '%') {
28892901
f = unicode_fromformat_arg(writer, f, &vargs2);
28902902
if (f == NULL)
28912903
goto fail;
28922904
}
28932905
else {
2894-
const char *p;
2895-
Py_ssize_t len;
2896-
2897-
p = f;
2898-
do
2899-
{
2900-
if ((unsigned char)*p > 127) {
2901-
PyErr_Format(PyExc_ValueError,
2902-
"PyUnicode_FromFormatV() expects an ASCII-encoded format "
2903-
"string, got a non-ASCII byte: 0x%02x",
2904-
(unsigned char)*p);
2905-
goto fail;
2906-
}
2907-
p++;
2906+
const char *p = strchr(f, '%');
2907+
if (p != NULL) {
2908+
len = p - f;
29082909
}
2909-
while (*p != '\0' && *p != '%');
2910-
len = p - f;
2911-
2912-
if (*p == '\0')
2910+
else {
2911+
len = strlen(f);
29132912
writer->overallocate = 0;
2913+
}
29142914

2915-
if (_PyUnicodeWriter_WriteASCIIString(writer, f, len) < 0)
2915+
if (_PyUnicodeWriter_WriteASCIIString(writer, f, len) < 0) {
29162916
goto fail;
2917-
2918-
f = p;
2917+
}
2918+
f += len;
29192919
}
29202920
}
29212921
va_end(vargs2);

0 commit comments

Comments
 (0)