@@ -609,7 +609,12 @@ err_iterbuffered(void)
609
609
return NULL ;
610
610
}
611
611
612
- static void drop_readahead (PyFileObject * );
612
+ static void
613
+ drop_file_readahead (PyFileObject * f )
614
+ {
615
+ PyMem_FREE (f -> f_buf );
616
+ f -> f_buf = NULL ;
617
+ }
613
618
614
619
/* Methods */
615
620
@@ -632,7 +637,7 @@ file_dealloc(PyFileObject *f)
632
637
Py_XDECREF (f -> f_mode );
633
638
Py_XDECREF (f -> f_encoding );
634
639
Py_XDECREF (f -> f_errors );
635
- drop_readahead (f );
640
+ drop_file_readahead (f );
636
641
Py_TYPE (f )-> tp_free ((PyObject * )f );
637
642
}
638
643
@@ -767,13 +772,7 @@ file_seek(PyFileObject *f, PyObject *args)
767
772
768
773
if (f -> f_fp == NULL )
769
774
return err_closed ();
770
- if (f -> unlocked_count > 0 ) {
771
- PyErr_SetString (PyExc_IOError ,
772
- "seek() called during concurrent "
773
- "operation on the same file object" );
774
- return NULL ;
775
- }
776
- drop_readahead (f );
775
+ drop_file_readahead (f );
777
776
whence = 0 ;
778
777
if (!PyArg_ParseTuple (args , "O|i:seek" , & offobj , & whence ))
779
778
return NULL ;
@@ -2242,49 +2241,51 @@ static PyGetSetDef file_getsetlist[] = {
2242
2241
{0 },
2243
2242
};
2244
2243
2244
+ typedef struct {
2245
+ char * buf , * bufptr , * bufend ;
2246
+ } readaheadbuffer ;
2247
+
2245
2248
static void
2246
- drop_readahead ( PyFileObject * f )
2249
+ drop_readaheadbuffer ( readaheadbuffer * rab )
2247
2250
{
2248
- if (f -> f_buf != NULL ) {
2249
- PyMem_Free ( f -> f_buf );
2250
- f -> f_buf = NULL ;
2251
+ if (rab -> buf != NULL ) {
2252
+ PyMem_FREE ( rab -> buf );
2253
+ rab -> buf = NULL ;
2251
2254
}
2252
2255
}
2253
2256
2254
2257
/* Make sure that file has a readahead buffer with at least one byte
2255
2258
(unless at EOF) and no more than bufsize. Returns negative value on
2256
2259
error, will set MemoryError if bufsize bytes cannot be allocated. */
2257
2260
static int
2258
- readahead (PyFileObject * f , Py_ssize_t bufsize )
2261
+ readahead (PyFileObject * f , readaheadbuffer * rab , Py_ssize_t bufsize )
2259
2262
{
2260
2263
Py_ssize_t chunksize ;
2261
2264
2262
- assert (f -> unlocked_count == 0 );
2263
- if (f -> f_buf != NULL ) {
2264
- if ( (f -> f_bufend - f -> f_bufptr ) >= 1 )
2265
+ if (rab -> buf != NULL ) {
2266
+ if ((rab -> bufend - rab -> bufptr ) >= 1 )
2265
2267
return 0 ;
2266
2268
else
2267
- drop_readahead ( f );
2269
+ drop_readaheadbuffer ( rab );
2268
2270
}
2269
- if ((f -> f_buf = ( char * ) PyMem_Malloc (bufsize )) == NULL ) {
2271
+ if ((rab -> buf = PyMem_MALLOC (bufsize )) == NULL ) {
2270
2272
PyErr_NoMemory ();
2271
2273
return -1 ;
2272
2274
}
2273
2275
FILE_BEGIN_ALLOW_THREADS (f )
2274
2276
errno = 0 ;
2275
- chunksize = Py_UniversalNewlineFread (
2276
- f -> f_buf , bufsize , f -> f_fp , (PyObject * )f );
2277
+ chunksize = Py_UniversalNewlineFread (rab -> buf , bufsize , f -> f_fp , (PyObject * )f );
2277
2278
FILE_END_ALLOW_THREADS (f )
2278
2279
if (chunksize == 0 ) {
2279
2280
if (ferror (f -> f_fp )) {
2280
2281
PyErr_SetFromErrno (PyExc_IOError );
2281
2282
clearerr (f -> f_fp );
2282
- drop_readahead ( f );
2283
+ drop_readaheadbuffer ( rab );
2283
2284
return -1 ;
2284
2285
}
2285
2286
}
2286
- f -> f_bufptr = f -> f_buf ;
2287
- f -> f_bufend = f -> f_buf + chunksize ;
2287
+ rab -> bufptr = rab -> buf ;
2288
+ rab -> bufend = rab -> buf + chunksize ;
2288
2289
return 0 ;
2289
2290
}
2290
2291
@@ -2294,51 +2295,43 @@ readahead(PyFileObject *f, Py_ssize_t bufsize)
2294
2295
logarithmic buffer growth to about 50 even when reading a 1gb line. */
2295
2296
2296
2297
static PyStringObject *
2297
- readahead_get_line_skip (PyFileObject * f , Py_ssize_t skip , Py_ssize_t bufsize )
2298
+ readahead_get_line_skip (PyFileObject * f , readaheadbuffer * rab , Py_ssize_t skip , Py_ssize_t bufsize )
2298
2299
{
2299
2300
PyStringObject * s ;
2300
2301
char * bufptr ;
2301
2302
char * buf ;
2302
2303
Py_ssize_t len ;
2303
2304
2304
- if (f -> unlocked_count > 0 ) {
2305
- PyErr_SetString (PyExc_IOError ,
2306
- "next() called during concurrent "
2307
- "operation on the same file object" );
2308
- return NULL ;
2309
- }
2310
- if (f -> f_buf == NULL )
2311
- if (readahead (f , bufsize ) < 0 )
2305
+ if (rab -> buf == NULL )
2306
+ if (readahead (f , rab , bufsize ) < 0 )
2312
2307
return NULL ;
2313
2308
2314
- len = f -> f_bufend - f -> f_bufptr ;
2309
+ len = rab -> bufend - rab -> bufptr ;
2315
2310
if (len == 0 )
2316
- return (PyStringObject * )
2317
- PyString_FromStringAndSize (NULL , skip );
2318
- bufptr = (char * )memchr (f -> f_bufptr , '\n' , len );
2311
+ return (PyStringObject * )PyString_FromStringAndSize (NULL , skip );
2312
+ bufptr = (char * )memchr (rab -> bufptr , '\n' , len );
2319
2313
if (bufptr != NULL ) {
2320
2314
bufptr ++ ; /* Count the '\n' */
2321
- len = bufptr - f -> f_bufptr ;
2322
- s = (PyStringObject * )
2323
- PyString_FromStringAndSize (NULL , skip + len );
2315
+ len = bufptr - rab -> bufptr ;
2316
+ s = (PyStringObject * )PyString_FromStringAndSize (NULL , skip + len );
2324
2317
if (s == NULL )
2325
2318
return NULL ;
2326
- memcpy (PyString_AS_STRING (s ) + skip , f -> f_bufptr , len );
2327
- f -> f_bufptr = bufptr ;
2328
- if (bufptr == f -> f_bufend )
2329
- drop_readahead ( f );
2319
+ memcpy (PyString_AS_STRING (s ) + skip , rab -> bufptr , len );
2320
+ rab -> bufptr = bufptr ;
2321
+ if (bufptr == rab -> bufend )
2322
+ drop_readaheadbuffer ( rab );
2330
2323
} else {
2331
- bufptr = f -> f_bufptr ;
2332
- buf = f -> f_buf ;
2333
- f -> f_buf = NULL ; /* Force new readahead buffer */
2324
+ bufptr = rab -> bufptr ;
2325
+ buf = rab -> buf ;
2326
+ rab -> buf = NULL ; /* Force new readahead buffer */
2334
2327
assert (len <= PY_SSIZE_T_MAX - skip );
2335
- s = readahead_get_line_skip (f , skip + len , bufsize + (bufsize >>2 ));
2328
+ s = readahead_get_line_skip (f , rab , skip + len , bufsize + (bufsize >>2 ));
2336
2329
if (s == NULL ) {
2337
- PyMem_Free (buf );
2330
+ PyMem_FREE (buf );
2338
2331
return NULL ;
2339
2332
}
2340
2333
memcpy (PyString_AS_STRING (s ) + skip , bufptr , len );
2341
- PyMem_Free (buf );
2334
+ PyMem_FREE (buf );
2342
2335
}
2343
2336
return s ;
2344
2337
}
@@ -2356,7 +2349,30 @@ file_iternext(PyFileObject *f)
2356
2349
if (!f -> readable )
2357
2350
return err_mode ("reading" );
2358
2351
2359
- l = readahead_get_line_skip (f , 0 , READAHEAD_BUFSIZE );
2352
+ {
2353
+ /*
2354
+ Multiple threads can enter this method while the GIL is released
2355
+ during file read and wreak havoc on the file object's readahead
2356
+ buffer. To avoid dealing with cross-thread coordination issues, we
2357
+ cache the file buffer state locally and only set it back on the file
2358
+ object when we're done.
2359
+ */
2360
+ readaheadbuffer rab = {f -> f_buf , f -> f_bufptr , f -> f_bufend };
2361
+ f -> f_buf = NULL ;
2362
+ l = readahead_get_line_skip (f , & rab , 0 , READAHEAD_BUFSIZE );
2363
+ /*
2364
+ Make sure the file's internal read buffer is cleared out. This will
2365
+ only do anything if some other thread interleaved with us during
2366
+ readahead. We want to drop any changeling buffer, so we don't leak
2367
+ memory. We may lose data, but that's what you get for reading the same
2368
+ file object in multiple threads.
2369
+ */
2370
+ drop_file_readahead (f );
2371
+ f -> f_buf = rab .buf ;
2372
+ f -> f_bufptr = rab .bufptr ;
2373
+ f -> f_bufend = rab .bufend ;
2374
+ }
2375
+
2360
2376
if (l == NULL || PyString_GET_SIZE (l ) == 0 ) {
2361
2377
Py_XDECREF (l );
2362
2378
return NULL ;
0 commit comments