Skip to content

Commit eefc9db

Browse files
naschemeGadgetSteve
authored andcommitted
bpo-17852: Maintain a list of BufferedWriter objects. Flush them on exit. (python#1908)
* Maintain a list of BufferedWriter objects. Flush them on exit. In Python 3, the buffer and the underlying file object are separate and so the order in which objects are finalized matters. This is unlike Python 2 where the file and buffer were a single object and finalization was done for both at the same time. In Python 3, if the file is finalized and closed before the buffer then the data in the buffer is lost. This change adds a doubly linked list of open file buffers. An atexit hook ensures they are flushed before proceeding with interpreter shutdown. This is addition does not remove the need to properly close files as there are other reasons why buffered data could get lost during finalization. Initial patch by Armin Rigo. * Use weakref.WeakSet instead of WeakKeyDictionary. * Simplify buffered double-linked list types. * In _flush_all_writers(), suppress errors from flush(). * Remove NEWS entry, use blurb.
1 parent e6091aa commit eefc9db

File tree

5 files changed

+75
-1
lines changed

5 files changed

+75
-1
lines changed

Lib/_pyio.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,6 +1185,7 @@ def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
11851185
self.buffer_size = buffer_size
11861186
self._write_buf = bytearray()
11871187
self._write_lock = Lock()
1188+
_register_writer(self)
11881189

11891190
def writable(self):
11901191
return self.raw.writable()
@@ -2574,3 +2575,26 @@ def encoding(self):
25742575
def detach(self):
25752576
# This doesn't make sense on StringIO.
25762577
self._unsupported("detach")
2578+
2579+
2580+
# ____________________________________________________________
2581+
2582+
import atexit, weakref
2583+
2584+
_all_writers = weakref.WeakSet()
2585+
2586+
def _register_writer(w):
2587+
# keep weak-ref to buffered writer
2588+
_all_writers.add(w)
2589+
2590+
def _flush_all_writers():
2591+
# Ensure all buffered writers are flushed before proceeding with
2592+
# normal shutdown. Otherwise, if the underlying file objects get
2593+
# finalized before the buffered writer wrapping it then any buffered
2594+
# data will be lost.
2595+
for w in _all_writers:
2596+
try:
2597+
w.flush()
2598+
except:
2599+
pass
2600+
atexit.register(_flush_all_writers)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Maintain a list of open buffered files, flush them before exiting the
2+
interpreter. Based on a patch from Armin Rigo.

Modules/_io/_iomodule.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -766,6 +766,8 @@ PyInit__io(void)
766766
!(_PyIO_empty_bytes = PyBytes_FromStringAndSize(NULL, 0)))
767767
goto fail;
768768

769+
_Py_PyAtExit(_PyIO_atexit_flush);
770+
769771
state->initialized = 1;
770772

771773
return m;

Modules/_io/_iomodule.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,3 +183,5 @@ extern PyObject *_PyIO_empty_str;
183183
extern PyObject *_PyIO_empty_bytes;
184184

185185
extern PyTypeObject _PyBytesIOBuffer_Type;
186+
187+
extern void _PyIO_atexit_flush(void);

Modules/_io/bufferedio.c

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ bufferediobase_write(PyObject *self, PyObject *args)
196196
}
197197

198198

199-
typedef struct {
199+
typedef struct _buffered {
200200
PyObject_HEAD
201201

202202
PyObject *raw;
@@ -240,8 +240,18 @@ typedef struct {
240240

241241
PyObject *dict;
242242
PyObject *weakreflist;
243+
244+
/* a doubly-linked chained list of "buffered" objects that need to
245+
be flushed when the process exits */
246+
struct _buffered *next, *prev;
243247
} buffered;
244248

249+
/* the actual list of buffered objects */
250+
static buffered buffer_list_end = {
251+
.next = &buffer_list_end,
252+
.prev = &buffer_list_end
253+
};
254+
245255
/*
246256
Implementation notes:
247257
@@ -386,6 +396,15 @@ _enter_buffered_busy(buffered *self)
386396
(self->buffer_size * (size / self->buffer_size)))
387397

388398

399+
static void
400+
remove_from_linked_list(buffered *self)
401+
{
402+
self->next->prev = self->prev;
403+
self->prev->next = self->next;
404+
self->prev = NULL;
405+
self->next = NULL;
406+
}
407+
389408
static void
390409
buffered_dealloc(buffered *self)
391410
{
@@ -394,6 +413,8 @@ buffered_dealloc(buffered *self)
394413
return;
395414
_PyObject_GC_UNTRACK(self);
396415
self->ok = 0;
416+
if (self->next != NULL)
417+
remove_from_linked_list(self);
397418
if (self->weakreflist != NULL)
398419
PyObject_ClearWeakRefs((PyObject *)self);
399420
Py_CLEAR(self->raw);
@@ -1817,10 +1838,33 @@ _io_BufferedWriter___init___impl(buffered *self, PyObject *raw,
18171838
self->fast_closed_checks = (Py_TYPE(self) == &PyBufferedWriter_Type &&
18181839
Py_TYPE(raw) == &PyFileIO_Type);
18191840

1841+
if (self->next == NULL) {
1842+
self->prev = &buffer_list_end;
1843+
self->next = buffer_list_end.next;
1844+
buffer_list_end.next->prev = self;
1845+
buffer_list_end.next = self;
1846+
}
1847+
18201848
self->ok = 1;
18211849
return 0;
18221850
}
18231851

1852+
/*
1853+
* Ensure all buffered writers are flushed before proceeding with
1854+
* normal shutdown. Otherwise, if the underlying file objects get
1855+
* finalized before the buffered writer wrapping it then any buffered
1856+
* data will be lost.
1857+
*/
1858+
void _PyIO_atexit_flush(void)
1859+
{
1860+
while (buffer_list_end.next != &buffer_list_end) {
1861+
buffered *buf = buffer_list_end.next;
1862+
remove_from_linked_list(buf);
1863+
buffered_flush(buf, NULL);
1864+
PyErr_Clear();
1865+
}
1866+
}
1867+
18241868
static Py_ssize_t
18251869
_bufferedwriter_raw_write(buffered *self, char *start, Py_ssize_t len)
18261870
{

0 commit comments

Comments
 (0)