@@ -609,7 +609,12 @@ err_iterbuffered(void)
609
609
return NULL ;
610
610
}
611
611
612
- static void drop_readahead (PyFileObject * );
612
+ static void
613
+ drop_file_readahead (PyFileObject * f )
614
+ {
615
+ PyMem_FREE (f -> f_buf );
616
+ f -> f_buf = NULL ;
617
+ }
613
618
614
619
/* Methods */
615
620
@@ -632,7 +637,7 @@ file_dealloc(PyFileObject *f)
632
637
Py_XDECREF (f -> f_mode );
633
638
Py_XDECREF (f -> f_encoding );
634
639
Py_XDECREF (f -> f_errors );
635
- drop_readahead (f );
640
+ drop_file_readahead (f );
636
641
Py_TYPE (f )-> tp_free ((PyObject * )f );
637
642
}
638
643
@@ -767,7 +772,7 @@ file_seek(PyFileObject *f, PyObject *args)
767
772
768
773
if (f -> f_fp == NULL )
769
774
return err_closed ();
770
- drop_readahead (f );
775
+ drop_file_readahead (f );
771
776
whence = 0 ;
772
777
if (!PyArg_ParseTuple (args , "O|i:seek" , & offobj , & whence ))
773
778
return NULL ;
@@ -2236,48 +2241,51 @@ static PyGetSetDef file_getsetlist[] = {
2236
2241
{0 },
2237
2242
};
2238
2243
2244
+ typedef struct {
2245
+ char * buf , * bufptr , * bufend ;
2246
+ } readaheadbuffer ;
2247
+
2239
2248
static void
2240
- drop_readahead ( PyFileObject * f )
2249
+ drop_readaheadbuffer ( readaheadbuffer * rab )
2241
2250
{
2242
- if (f -> f_buf != NULL ) {
2243
- PyMem_Free ( f -> f_buf );
2244
- f -> f_buf = NULL ;
2251
+ if (rab -> buf != NULL ) {
2252
+ PyMem_FREE ( rab -> buf );
2253
+ rab -> buf = NULL ;
2245
2254
}
2246
2255
}
2247
2256
2248
2257
/* Make sure that file has a readahead buffer with at least one byte
2249
2258
(unless at EOF) and no more than bufsize. Returns negative value on
2250
2259
error, will set MemoryError if bufsize bytes cannot be allocated. */
2251
2260
static int
2252
- readahead (PyFileObject * f , Py_ssize_t bufsize )
2261
+ readahead (PyFileObject * f , readaheadbuffer * rab , Py_ssize_t bufsize )
2253
2262
{
2254
2263
Py_ssize_t chunksize ;
2255
2264
2256
- if (f -> f_buf != NULL ) {
2257
- if ( ( f -> f_bufend - f -> f_bufptr ) >= 1 )
2265
+ if (rab -> buf != NULL ) {
2266
+ if (( rab -> bufend - rab -> bufptr ) >= 1 )
2258
2267
return 0 ;
2259
2268
else
2260
- drop_readahead ( f );
2269
+ drop_readaheadbuffer ( rab );
2261
2270
}
2262
- if ((f -> f_buf = ( char * ) PyMem_Malloc (bufsize )) == NULL ) {
2271
+ if ((rab -> buf = PyMem_MALLOC (bufsize )) == NULL ) {
2263
2272
PyErr_NoMemory ();
2264
2273
return -1 ;
2265
2274
}
2266
2275
FILE_BEGIN_ALLOW_THREADS (f )
2267
2276
errno = 0 ;
2268
- chunksize = Py_UniversalNewlineFread (
2269
- f -> f_buf , bufsize , f -> f_fp , (PyObject * )f );
2277
+ chunksize = Py_UniversalNewlineFread (rab -> buf , bufsize , f -> f_fp , (PyObject * )f );
2270
2278
FILE_END_ALLOW_THREADS (f )
2271
2279
if (chunksize == 0 ) {
2272
2280
if (ferror (f -> f_fp )) {
2273
2281
PyErr_SetFromErrno (PyExc_IOError );
2274
2282
clearerr (f -> f_fp );
2275
- drop_readahead ( f );
2283
+ drop_readaheadbuffer ( rab );
2276
2284
return -1 ;
2277
2285
}
2278
2286
}
2279
- f -> f_bufptr = f -> f_buf ;
2280
- f -> f_bufend = f -> f_buf + chunksize ;
2287
+ rab -> bufptr = rab -> buf ;
2288
+ rab -> bufend = rab -> buf + chunksize ;
2281
2289
return 0 ;
2282
2290
}
2283
2291
@@ -2287,45 +2295,43 @@ readahead(PyFileObject *f, Py_ssize_t bufsize)
2287
2295
logarithmic buffer growth to about 50 even when reading a 1gb line. */
2288
2296
2289
2297
static PyStringObject *
2290
- readahead_get_line_skip (PyFileObject * f , Py_ssize_t skip , Py_ssize_t bufsize )
2298
+ readahead_get_line_skip (PyFileObject * f , readaheadbuffer * rab , Py_ssize_t skip , Py_ssize_t bufsize )
2291
2299
{
2292
2300
PyStringObject * s ;
2293
2301
char * bufptr ;
2294
2302
char * buf ;
2295
2303
Py_ssize_t len ;
2296
2304
2297
- if (f -> f_buf == NULL )
2298
- if (readahead (f , bufsize ) < 0 )
2305
+ if (rab -> buf == NULL )
2306
+ if (readahead (f , rab , bufsize ) < 0 )
2299
2307
return NULL ;
2300
2308
2301
- len = f -> f_bufend - f -> f_bufptr ;
2309
+ len = rab -> bufend - rab -> bufptr ;
2302
2310
if (len == 0 )
2303
- return (PyStringObject * )
2304
- PyString_FromStringAndSize (NULL , skip );
2305
- bufptr = (char * )memchr (f -> f_bufptr , '\n' , len );
2311
+ return (PyStringObject * )PyString_FromStringAndSize (NULL , skip );
2312
+ bufptr = (char * )memchr (rab -> bufptr , '\n' , len );
2306
2313
if (bufptr != NULL ) {
2307
2314
bufptr ++ ; /* Count the '\n' */
2308
- len = bufptr - f -> f_bufptr ;
2309
- s = (PyStringObject * )
2310
- PyString_FromStringAndSize (NULL , skip + len );
2315
+ len = bufptr - rab -> bufptr ;
2316
+ s = (PyStringObject * )PyString_FromStringAndSize (NULL , skip + len );
2311
2317
if (s == NULL )
2312
2318
return NULL ;
2313
- memcpy (PyString_AS_STRING (s ) + skip , f -> f_bufptr , len );
2314
- f -> f_bufptr = bufptr ;
2315
- if (bufptr == f -> f_bufend )
2316
- drop_readahead ( f );
2319
+ memcpy (PyString_AS_STRING (s ) + skip , rab -> bufptr , len );
2320
+ rab -> bufptr = bufptr ;
2321
+ if (bufptr == rab -> bufend )
2322
+ drop_readaheadbuffer ( rab );
2317
2323
} else {
2318
- bufptr = f -> f_bufptr ;
2319
- buf = f -> f_buf ;
2320
- f -> f_buf = NULL ; /* Force new readahead buffer */
2324
+ bufptr = rab -> bufptr ;
2325
+ buf = rab -> buf ;
2326
+ rab -> buf = NULL ; /* Force new readahead buffer */
2321
2327
assert (len <= PY_SSIZE_T_MAX - skip );
2322
- s = readahead_get_line_skip (f , skip + len , bufsize + (bufsize >>2 ));
2328
+ s = readahead_get_line_skip (f , rab , skip + len , bufsize + (bufsize >>2 ));
2323
2329
if (s == NULL ) {
2324
- PyMem_Free (buf );
2330
+ PyMem_FREE (buf );
2325
2331
return NULL ;
2326
2332
}
2327
2333
memcpy (PyString_AS_STRING (s ) + skip , bufptr , len );
2328
- PyMem_Free (buf );
2334
+ PyMem_FREE (buf );
2329
2335
}
2330
2336
return s ;
2331
2337
}
@@ -2343,7 +2349,30 @@ file_iternext(PyFileObject *f)
2343
2349
if (!f -> readable )
2344
2350
return err_mode ("reading" );
2345
2351
2346
- l = readahead_get_line_skip (f , 0 , READAHEAD_BUFSIZE );
2352
+ {
2353
+ /*
2354
+ Multiple threads can enter this method while the GIL is released
2355
+ during file read and wreak havoc on the file object's readahead
2356
+ buffer. To avoid dealing with cross-thread coordination issues, we
2357
+ cache the file buffer state locally and only set it back on the file
2358
+ object when we're done.
2359
+ */
2360
+ readaheadbuffer rab = {f -> f_buf , f -> f_bufptr , f -> f_bufend };
2361
+ f -> f_buf = NULL ;
2362
+ l = readahead_get_line_skip (f , & rab , 0 , READAHEAD_BUFSIZE );
2363
+ /*
2364
+ Make sure the file's internal read buffer is cleared out. This will
2365
+ only do anything if some other thread interleaved with us during
2366
+ readahead. We want to drop any changeling buffer, so we don't leak
2367
+ memory. We may lose data, but that's what you get for reading the same
2368
+ file object in multiple threads.
2369
+ */
2370
+ drop_file_readahead (f );
2371
+ f -> f_buf = rab .buf ;
2372
+ f -> f_bufptr = rab .bufptr ;
2373
+ f -> f_bufend = rab .bufend ;
2374
+ }
2375
+
2347
2376
if (l == NULL || PyString_GET_SIZE (l ) == 0 ) {
2348
2377
Py_XDECREF (l );
2349
2378
return NULL ;
0 commit comments