@@ -348,38 +348,44 @@ def _validate_nrows(nrows):
348
348
return nrows
349
349
350
350
351
+ _compression_to_extension = {
352
+ 'gzip' : '.gz' ,
353
+ 'bz2' : '.bz2' ,
354
+ 'zip' : '.zip' ,
355
+ 'xz' : '.xz' ,
356
+ }
357
+
358
+ def _infer_compression (filepath_or_buffer ):
359
+ """
360
+ Infer compression of a filepath or buffer. In case of buffer, compression
361
+ is None. Otherwise, inference is perfomed using the extension of the
362
+ filename or URL.
363
+ """
364
+ if not isinstance (filepath_or_buffer , compat .string_types ):
365
+ return None
366
+ filepath = str (filepath_or_buffer )
367
+ for compression , extension in _compression_to_extension .items ():
368
+ if filepath .endswith (extension ):
369
+ return compression
370
+ return None
371
+
351
372
def _read (filepath_or_buffer , kwds ):
352
- "Generic reader of line files."
373
+ """ Generic reader of line files."" "
353
374
encoding = kwds .get ('encoding' , None )
354
375
if encoding is not None :
355
376
encoding = re .sub ('_' , '-' , encoding ).lower ()
356
377
kwds ['encoding' ] = encoding
357
378
358
- # If the input could be a filename, check for a recognizable compression
359
- # extension. If we're reading from a URL, the `get_filepath_or_buffer`
360
- # will use header info to determine compression, so use what it finds in
361
- # that case.
362
- inferred_compression = kwds .get ('compression' )
363
- if inferred_compression == 'infer' :
364
- if isinstance (filepath_or_buffer , compat .string_types ):
365
- if filepath_or_buffer .endswith ('.gz' ):
366
- inferred_compression = 'gzip'
367
- elif filepath_or_buffer .endswith ('.bz2' ):
368
- inferred_compression = 'bz2'
369
- elif filepath_or_buffer .endswith ('.zip' ):
370
- inferred_compression = 'zip'
371
- elif filepath_or_buffer .endswith ('.xz' ):
372
- inferred_compression = 'xz'
373
- else :
374
- inferred_compression = None
375
- else :
376
- inferred_compression = None
379
+ compression = kwds .get ('compression' )
380
+ if compression not in set (_compression_to_extension ) | {None , 'infer' }:
381
+ raise ValueError ('"{}" is not a valid compression' .format (compression ))
382
+
383
+ if compression == 'infer' :
384
+ compression = _infer_compression (filepath_or_buffer )
377
385
378
386
filepath_or_buffer , _ , compression = get_filepath_or_buffer (
379
- filepath_or_buffer , encoding ,
380
- compression = kwds .get ('compression' , None ))
381
- kwds ['compression' ] = (inferred_compression if compression == 'infer'
382
- else compression )
387
+ filepath_or_buffer , encoding , compression )
388
+ kwds ['compression' ] = compression
383
389
384
390
if kwds .get ('date_parser' , None ) is not None :
385
391
if isinstance (kwds ['parse_dates' ], bool ):
0 commit comments