Skip to content

Commit 7adcb0d

Browse files
committed
pythongh-121940: Update so write size is limited larger
- Unify capping code to always respect character boundaries. - Make a soft cap rather than a hard limit - Make the cap for what _should_ be non-interactive / filesystem files 5 times that of interactive files (so big writes are faster) - Make write size for expected intractive to be 1 MB (1024 * 1024), feels relatively responsive on my machines.
1 parent 7059c6f commit 7adcb0d

File tree

3 files changed

+99
-3
lines changed

3 files changed

+99
-3
lines changed

Include/internal/pycore_fileutils.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,11 @@ extern int _Py_GetTicksPerSecond(long *ticks_per_second);
329329
// Export for '_testcapi' shared extension
330330
PyAPI_FUNC(int) _Py_IsValidFD(int fd);
331331

332+
#ifdef MS_WINDOWS
333+
size_t _Py_LimitConsoleWriteSize(const void *buf, size_t requested_size,
334+
size_t cap_size);
335+
#endif
336+
332337
#ifdef __cplusplus
333338
}
334339
#endif

Modules/_io/winconsoleio.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@
4848
of less than one character */
4949
#define SMALLBUF 4
5050

51+
/* Limit write size to consoles so that interruptss feel
52+
responsive. */
53+
#define WRITE_LIMIT_CONSOLE (1024 * 1024)
54+
5155
char _get_console_type(HANDLE handle) {
5256
DWORD mode, peek_count;
5357

@@ -134,7 +138,6 @@ char _PyIO_get_console_type(PyObject *path_or_fd) {
134138
return m;
135139
}
136140

137-
138141
/*[clinic input]
139142
module _io
140143
class _io._WindowsConsoleIO "winconsoleio *" "clinic_state()->PyWindowsConsoleIO_Type"
@@ -999,11 +1002,21 @@ _io__WindowsConsoleIO_write_impl(winconsoleio *self, PyTypeObject *cls,
9991002
if (!b->len) {
10001003
return PyLong_FromLong(0);
10011004
}
1002-
if (b->len > BUFMAX)
1003-
len = BUFMAX;
1005+
/* Ensure len fits in a DWORD. This cap is larger than the write
1006+
limit because it doesn't respect utf-8 characters boundaries.
1007+
Rely on _Py_LimitConsoleWriteSize to do a character split. */
1008+
if (b->len > WRITE_LIMIT_CONSOLE * 2)
1009+
len = WRITE_LIMIT_CONSOLE * 2;
10041010
else
10051011
len = (DWORD)b->len;
10061012

1013+
1014+
/* Limit console write size to keep interactivity.
1015+
1016+
This is a soft cap / wlen may be higher, but that is
1017+
okay because it isn't a hard OS limit in Windows 8+. */
1018+
len = (DWORD)_Py_LimitConsoleWriteSize(b->buf, len, WRITE_LIMIT_CONSOLE);
1019+
10071020
Py_BEGIN_ALLOW_THREADS
10081021
wlen = MultiByteToWideChar(CP_UTF8, 0, b->buf, len, NULL, 0);
10091022
Py_END_ALLOW_THREADS

Python/fileutils.c

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,17 @@ int _Py_open_cloexec_works = -1;
5252
// The value must be the same in unicodeobject.c.
5353
#define MAX_UNICODE 0x10ffff
5454

55+
/* Limit write size on terminals in Windows to keep the interpreter
56+
feeling responsive.
57+
58+
This is higher than WRITE_LIMIT_CONSOLE because `.write()`
59+
is targeted at non-console I/O (but may happen to touch a tty). Use
60+
WinConsoleIO for best console interactivity.
61+
62+
This should ideally be bigger than DEFAULT_BUFFER_SIZE so common
63+
case write to file on disk is quick. */
64+
#define WRITE_LIMIT_INTERACTIVE (5 * 1024 * 1024)
65+
5566
// mbstowcs() and mbrtowc() errors
5667
static const size_t DECODE_ERROR = ((size_t)-1);
5768
static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
@@ -1922,6 +1933,24 @@ _Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
19221933
int async_err = 0;
19231934

19241935
_Py_BEGIN_SUPPRESS_IPH
1936+
#ifdef MS_WINDOWS
1937+
/* isatty is guarded because don't want it in common case of
1938+
writing DEFAULT_BUFFER_SIZE to regular files (gh-121940). */
1939+
if (count > WRITE_LIMIT_INTERACTIVE) {
1940+
if (gil_held) {
1941+
Py_BEGIN_ALLOW_THREADS
1942+
if (isatty(fd)) {
1943+
count = _Py_LimitConsoleWriteSize(buf, count, WRITE_LIMIT_INTERACTIVE);
1944+
}
1945+
Py_END_ALLOW_THREADS
1946+
} else {
1947+
if (isatty(fd)) {
1948+
count = _Py_LimitConsoleWriteSize(buf, count, WRITE_LIMIT_INTERACTIVE);
1949+
}
1950+
}
1951+
}
1952+
1953+
#endif
19251954
if (count > _PY_WRITE_MAX) {
19261955
count = _PY_WRITE_MAX;
19271956
}
@@ -3081,3 +3110,52 @@ _Py_IsValidFD(int fd)
30813110
return (fstat(fd, &st) == 0);
30823111
#endif
30833112
}
3113+
3114+
#ifdef MS_WINDOWS
3115+
static size_t
3116+
_find_last_utf8_boundary(const char *buf, size_t len)
3117+
{
3118+
/* This function never returns 0, returns the original len instead */
3119+
DWORD count = 1;
3120+
if (len == 0 || (buf[len - 1] & 0x80) == 0) {
3121+
return len;
3122+
}
3123+
for (;; count++) {
3124+
if (count > 3 || count >= len) {
3125+
return len;
3126+
}
3127+
if ((buf[len - count] & 0xc0) != 0x80) {
3128+
return len - count;
3129+
}
3130+
}
3131+
}
3132+
3133+
/* Put a soft limit on the number of bytes to be written.
3134+
3135+
In older versions of Windows a hard limit was necessary because
3136+
there was a hard limit to the number of bytes (bpo-11395), but that
3137+
is not the case in Windows 8+.
3138+
3139+
For Windows 8+ the console host synchronizes I/O operations which
3140+
means a Ctrl-C doesn't generate an interrupt until after the write
3141+
is completed. That means large writes which take multiple seconds
3142+
will reduce responsiveness to interrupts.
3143+
3144+
This does a "soft cap" (not exact number of utf-16 bytes, but close
3145+
enough) to maintain responsiveness of consoles on
3146+
Windows (gh-121940). */
3147+
size_t _Py_LimitConsoleWriteSize(const void *buf, size_t requested_size,
3148+
size_t cap_size) {
3149+
if (requested_size <= cap_size) {
3150+
return requested_size;
3151+
}
3152+
3153+
/* Fix for github issues gh-110913 and gh-82052.
3154+
3155+
Splitting utf-8 can't be done at arbitrary byte boundaries
3156+
because that results in broken utf-8 byte sequences being
3157+
presented to the user. */
3158+
return _find_last_utf8_boundary(buf, cap_size);
3159+
}
3160+
3161+
#endif

0 commit comments

Comments
 (0)