2
2
Tests for the pandas.io.common functionalities
3
3
"""
4
4
import mmap
5
- import pytest
6
5
import os
7
- from os . path import isabs
6
+ import pytest
8
7
9
8
import pandas as pd
10
- import pandas .util . testing as tm
9
+ import pandas .io . common as cmn
11
10
import pandas .util ._test_decorators as td
12
-
13
- from pandas .io import common
14
- from pandas .compat import is_platform_windows , StringIO , FileNotFoundError
15
-
16
- from pandas import read_csv , concat
11
+ import pandas .util .testing as tm
12
+ from pandas .compat import (
13
+ is_platform_windows ,
14
+ StringIO ,
15
+ FileNotFoundError ,
16
+ )
17
17
18
18
19
19
class CustomFSPath (object ):
@@ -55,36 +55,36 @@ class TestCommonIOCapabilities(object):
55
55
56
56
def test_expand_user (self ):
57
57
filename = '~/sometest'
58
- expanded_name = common ._expand_user (filename )
58
+ expanded_name = cmn ._expand_user (filename )
59
59
60
60
assert expanded_name != filename
61
- assert isabs (expanded_name )
61
+ assert os . path . isabs (expanded_name )
62
62
assert os .path .expanduser (filename ) == expanded_name
63
63
64
64
def test_expand_user_normal_path (self ):
65
65
filename = '/somefolder/sometest'
66
- expanded_name = common ._expand_user (filename )
66
+ expanded_name = cmn ._expand_user (filename )
67
67
68
68
assert expanded_name == filename
69
69
assert os .path .expanduser (filename ) == expanded_name
70
70
71
71
@td .skip_if_no ('pathlib' )
72
72
def test_stringify_path_pathlib (self ):
73
- rel_path = common ._stringify_path (Path ('.' ))
73
+ rel_path = cmn ._stringify_path (Path ('.' ))
74
74
assert rel_path == '.'
75
- redundant_path = common ._stringify_path (Path ('foo//bar' ))
75
+ redundant_path = cmn ._stringify_path (Path ('foo//bar' ))
76
76
assert redundant_path == os .path .join ('foo' , 'bar' )
77
77
78
78
@td .skip_if_no ('py.path' )
79
79
def test_stringify_path_localpath (self ):
80
80
path = os .path .join ('foo' , 'bar' )
81
81
abs_path = os .path .abspath (path )
82
82
lpath = LocalPath (path )
83
- assert common ._stringify_path (lpath ) == abs_path
83
+ assert cmn ._stringify_path (lpath ) == abs_path
84
84
85
85
def test_stringify_path_fspath (self ):
86
86
p = CustomFSPath ('foo/bar.csv' )
87
- result = common ._stringify_path (p )
87
+ result = cmn ._stringify_path (p )
88
88
assert result == 'foo/bar.csv'
89
89
90
90
@pytest .mark .parametrize ('extension,expected' , [
@@ -97,36 +97,36 @@ def test_stringify_path_fspath(self):
97
97
@pytest .mark .parametrize ('path_type' , path_types )
98
98
def test_infer_compression_from_path (self , extension , expected , path_type ):
99
99
path = path_type ('foo/bar.csv' + extension )
100
- compression = common ._infer_compression (path , compression = 'infer' )
100
+ compression = cmn ._infer_compression (path , compression = 'infer' )
101
101
assert compression == expected
102
102
103
103
def test_get_filepath_or_buffer_with_path (self ):
104
104
filename = '~/sometest'
105
- filepath_or_buffer , _ , _ , should_close = common .get_filepath_or_buffer (
105
+ filepath_or_buffer , _ , _ , should_close = cmn .get_filepath_or_buffer (
106
106
filename )
107
107
assert filepath_or_buffer != filename
108
- assert isabs (filepath_or_buffer )
108
+ assert os . path . isabs (filepath_or_buffer )
109
109
assert os .path .expanduser (filename ) == filepath_or_buffer
110
110
assert not should_close
111
111
112
112
def test_get_filepath_or_buffer_with_buffer (self ):
113
113
input_buffer = StringIO ()
114
- filepath_or_buffer , _ , _ , should_close = common .get_filepath_or_buffer (
114
+ filepath_or_buffer , _ , _ , should_close = cmn .get_filepath_or_buffer (
115
115
input_buffer )
116
116
assert filepath_or_buffer == input_buffer
117
117
assert not should_close
118
118
119
119
def test_iterator (self ):
120
- reader = read_csv (StringIO (self .data1 ), chunksize = 1 )
121
- result = concat (reader , ignore_index = True )
122
- expected = read_csv (StringIO (self .data1 ))
120
+ reader = pd . read_csv (StringIO (self .data1 ), chunksize = 1 )
121
+ result = pd . concat (reader , ignore_index = True )
122
+ expected = pd . read_csv (StringIO (self .data1 ))
123
123
tm .assert_frame_equal (result , expected )
124
124
125
125
# GH12153
126
- it = read_csv (StringIO (self .data1 ), chunksize = 1 )
126
+ it = pd . read_csv (StringIO (self .data1 ), chunksize = 1 )
127
127
first = next (it )
128
128
tm .assert_frame_equal (first , expected .iloc [[0 ]])
129
- tm .assert_frame_equal (concat (it ), expected .iloc [1 :])
129
+ tm .assert_frame_equal (pd . concat (it ), expected .iloc [1 :])
130
130
131
131
@pytest .mark .parametrize ('reader, module, error_class, fn_ext' , [
132
132
(pd .read_csv , 'os' , FileNotFoundError , 'csv' ),
@@ -246,18 +246,18 @@ def test_constructor_bad_file(self, mmap_file):
246
246
msg = "[Errno 22]"
247
247
err = mmap .error
248
248
249
- tm .assert_raises_regex (err , msg , common .MMapWrapper , non_file )
249
+ tm .assert_raises_regex (err , msg , cmn .MMapWrapper , non_file )
250
250
251
251
target = open (mmap_file , 'r' )
252
252
target .close ()
253
253
254
254
msg = "I/O operation on closed file"
255
255
tm .assert_raises_regex (
256
- ValueError , msg , common .MMapWrapper , target )
256
+ ValueError , msg , cmn .MMapWrapper , target )
257
257
258
258
def test_get_attr (self , mmap_file ):
259
259
with open (mmap_file , 'r' ) as target :
260
- wrapper = common .MMapWrapper (target )
260
+ wrapper = cmn .MMapWrapper (target )
261
261
262
262
attrs = dir (wrapper .mmap )
263
263
attrs = [attr for attr in attrs
@@ -271,7 +271,7 @@ def test_get_attr(self, mmap_file):
271
271
272
272
def test_next (self , mmap_file ):
273
273
with open (mmap_file , 'r' ) as target :
274
- wrapper = common .MMapWrapper (target )
274
+ wrapper = cmn .MMapWrapper (target )
275
275
lines = target .readlines ()
276
276
277
277
for line in lines :
@@ -285,4 +285,100 @@ def test_unknown_engine(self):
285
285
df = tm .makeDataFrame ()
286
286
df .to_csv (path )
287
287
with tm .assert_raises_regex (ValueError , 'Unknown engine' ):
288
- read_csv (path , engine = 'pyt' )
288
+ pd .read_csv (path , engine = 'pyt' )
289
+
290
+
291
+ @pytest .mark .parametrize ('obj' , [
292
+ pd .DataFrame (100 * [[0.123456 , 0.234567 , 0.567567 ],
293
+ [12.32112 , 123123.2 , 321321.2 ]],
294
+ columns = ['X' , 'Y' , 'Z' ]),
295
+ pd .Series (100 * [0.123456 , 0.234567 , 0.567567 ], name = 'X' )])
296
+ @pytest .mark .parametrize ('method' , ['to_pickle' , 'to_json' , 'to_csv' ])
297
+ def test_compression_size (obj , method , compression_only ):
298
+
299
+ with tm .ensure_clean () as path :
300
+ getattr (obj , method )(path , compression = compression_only )
301
+ compressed = os .path .getsize (path )
302
+ getattr (obj , method )(path , compression = None )
303
+ uncompressed = os .path .getsize (path )
304
+ assert uncompressed > compressed
305
+
306
+
307
+ @pytest .mark .parametrize ('obj' , [
308
+ pd .DataFrame (100 * [[0.123456 , 0.234567 , 0.567567 ],
309
+ [12.32112 , 123123.2 , 321321.2 ]],
310
+ columns = ['X' , 'Y' , 'Z' ]),
311
+ pd .Series (100 * [0.123456 , 0.234567 , 0.567567 ], name = 'X' )])
312
+ @pytest .mark .parametrize ('method' , ['to_csv' , 'to_json' ])
313
+ def test_compression_size_fh (obj , method , compression_only ):
314
+
315
+ with tm .ensure_clean () as path :
316
+ f , handles = cmn ._get_handle (path , 'w' , compression = compression_only )
317
+ with f :
318
+ getattr (obj , method )(f )
319
+ assert not f .closed
320
+ assert f .closed
321
+ compressed = os .path .getsize (path )
322
+ with tm .ensure_clean () as path :
323
+ f , handles = cmn ._get_handle (path , 'w' , compression = None )
324
+ with f :
325
+ getattr (obj , method )(f )
326
+ assert not f .closed
327
+ assert f .closed
328
+ uncompressed = os .path .getsize (path )
329
+ assert uncompressed > compressed
330
+
331
+
332
+ @pytest .mark .parametrize ('write_method, write_kwargs, read_method' , [
333
+ ('to_csv' , {'index' : False }, pd .read_csv ),
334
+ ('to_json' , {}, pd .read_json ),
335
+ ('to_pickle' , {}, pd .read_pickle ),
336
+ ])
337
+ def test_dataframe_compression_defaults_to_infer (
338
+ write_method , write_kwargs , read_method , compression_only ):
339
+ # Test that DataFrame.to_* methods default to inferring compression from
340
+ # paths. GH 22004
341
+ input = pd .DataFrame ([[1.0 , 0 , - 4 ], [3.4 , 5 , 2 ]], columns = ['X' , 'Y' , 'Z' ])
342
+ extension = cmn ._compression_to_extension [compression_only ]
343
+ with tm .ensure_clean ('compressed' + extension ) as path :
344
+ getattr (input , write_method )(path , ** write_kwargs )
345
+ output = read_method (path , compression = compression_only )
346
+ tm .assert_frame_equal (output , input )
347
+
348
+
349
+ @pytest .mark .parametrize ('write_method,write_kwargs,read_method,read_kwargs' , [
350
+ ('to_csv' , {'index' : False , 'header' : True },
351
+ pd .read_csv , {'squeeze' : True }),
352
+ ('to_json' , {}, pd .read_json , {'typ' : 'series' }),
353
+ ('to_pickle' , {}, pd .read_pickle , {}),
354
+ ])
355
+ def test_series_compression_defaults_to_infer (
356
+ write_method , write_kwargs , read_method , read_kwargs ,
357
+ compression_only ):
358
+ # Test that Series.to_* methods default to inferring compression from
359
+ # paths. GH 22004
360
+ input = pd .Series ([0 , 5 , - 2 , 10 ], name = 'X' )
361
+ extension = cmn ._compression_to_extension [compression_only ]
362
+ with tm .ensure_clean ('compressed' + extension ) as path :
363
+ getattr (input , write_method )(path , ** write_kwargs )
364
+ output = read_method (path , compression = compression_only , ** read_kwargs )
365
+ tm .assert_series_equal (output , input , check_names = False )
366
+
367
+
368
+ def test_compression_warning (compression_only ):
369
+ # Assert that passing a file object to to_csv while explicitly specifying a
370
+ # compression protocol triggers a RuntimeWarning, as per GH 21227.
371
+ # Note that pytest has an issue that causes assert_produces_warning to fail
372
+ # in Python 2 if the warning has occurred in previous tests
373
+ # (see https://git.io/fNEBm & https://git.io/fNEBC). Hence, should this
374
+ # test fail in just Python 2 builds, it likely indicates that other tests
375
+ # are producing RuntimeWarnings, thereby triggering the pytest bug.
376
+ df = pd .DataFrame (100 * [[0.123456 , 0.234567 , 0.567567 ],
377
+ [12.32112 , 123123.2 , 321321.2 ]],
378
+ columns = ['X' , 'Y' , 'Z' ])
379
+ with tm .ensure_clean () as path :
380
+ f , handles = cmn ._get_handle (path , 'w' , compression = compression_only )
381
+ with tm .assert_produces_warning (RuntimeWarning ,
382
+ check_stacklevel = False ):
383
+ with f :
384
+ df .to_csv (f , compression = compression_only )
0 commit comments