1
1
from .pandas_vb_common import *
2
2
import os
3
- from pandas import read_csv , read_table
3
+ from pandas import read_csv
4
4
try :
5
5
from cStringIO import StringIO
6
6
except ImportError :
7
7
from io import StringIO
8
8
9
9
10
- class read_csv_comment2 (object ):
10
+ class read_csv1 (object ):
11
11
goal_time = 0.2
12
12
13
13
def setup (self ):
14
- self .data = ['A,B,C' ]
15
- self .data = (self .data + (['1,2,3 # comment' ] * 100000 ))
16
- self .data = '\n ' .join (self .data )
17
-
18
- def time_read_csv_comment2 (self ):
19
- read_csv (StringIO (self .data ), comment = '#' )
20
-
21
-
22
- class read_csv_default_converter (object ):
23
- goal_time = 0.2
24
-
25
- def setup (self ):
26
- self .data = """0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336\n
27
- 0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285\n
28
- 0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126\n
29
- 0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394\n
30
- 0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020\n """
31
- self .data = (self .data * 200 )
32
-
33
- def time_read_csv_default_converter (self ):
34
- read_csv (StringIO (self .data ), sep = ',' , header = None , float_precision = None )
14
+ self .N = 10000
15
+ self .K = 8
16
+ self .df = DataFrame ((np .random .randn (self .N , self .K ) * np .random .randint (100 , 10000 , (self .N , self .K ))))
17
+ self .df .to_csv ('test.csv' , sep = '|' )
35
18
19
+ self .format = (lambda x : '{:,}' .format (x ))
20
+ self .df2 = self .df .applymap (self .format )
21
+ self .df2 .to_csv ('test2.csv' , sep = '|' )
36
22
37
- class read_csv_default_converter_with_decimal ( object ):
38
- goal_time = 0.2
23
+ def time_sep ( self ):
24
+ read_csv ( 'test.csv' , sep = '|' )
39
25
40
- def setup (self ):
41
- self .data = """0,1213700904466425978256438611;0,0525708283766902484401839501;0,4174092731488769913994474336\n
42
- 0,4096341697147408700274695547;0,1587830198973579909349496119;0,1292545832485494372576795285\n
43
- 0,8323255650024565799327547210;0,9694902427379478160318626578;0,6295047811546814475747169126\n
44
- 0,4679375305798131323697930383;0,2963942381834381301075609371;0,5268936082160610157032465394\n
45
- 0,6685382761849776311890991564;0,6721207066140679753374342908;0,6519975277021627935170045020\n """
46
- self .data = (self .data * 200 )
26
+ def time_thousands (self ):
27
+ read_csv ('test.csv' , sep = '|' , thousands = ',' )
47
28
48
- def time_read_csv_default_converter_with_decimal (self ):
49
- read_csv ( StringIO ( self . data ), sep = ';' , header = None ,
50
- float_precision = None , decimal = ', ' )
29
+ def teardown (self ):
30
+ os . remove ( 'test.csv' )
31
+ os . remove ( 'test2.csv ' )
51
32
52
33
53
- class read_csv_precise_converter (object ):
34
+ class read_csv2 (object ):
54
35
goal_time = 0.2
55
36
56
37
def setup (self ):
57
- self .data = """0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336\n
58
- 0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285\n
59
- 0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126\n
60
- 0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394\n
61
- 0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020\n """
62
- self .data = (self .data * 200 )
38
+ self .data = ['A,B,C' ]
39
+ self .data = (self .data + (['1,2,3 # comment' ] * 100000 ))
40
+ self .data = '\n ' .join (self .data )
63
41
64
- def time_read_csv_precise_converter (self ):
65
- read_csv (StringIO (self .data ), sep = ',' , header = None , float_precision = 'high ' )
42
+ def time_comment (self ):
43
+ read_csv (StringIO (self .data ), comment = '# ' )
66
44
67
45
68
- class read_csv_roundtrip_converter (object ):
46
+ class read_csv3 (object ):
69
47
goal_time = 0.2
70
48
71
49
def setup (self ):
@@ -74,44 +52,33 @@ def setup(self):
74
52
0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126\n
75
53
0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394\n
76
54
0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020\n """
55
+ self .data2 = self .data .replace (',' , ';' ).replace ('.' , ',' )
77
56
self .data = (self .data * 200 )
57
+ self .data2 = (self .data2 * 200 )
78
58
79
- def time_read_csv_roundtrip_converter (self ):
80
- read_csv (StringIO (self .data ), sep = ',' , header = None , float_precision = 'round_trip' )
81
-
82
-
83
- class read_csv_thou_vb (object ):
84
- goal_time = 0.2
85
-
86
- def setup (self ):
87
- self .N = 10000
88
- self .K = 8
89
- self .format = (lambda x : '{:,}' .format (x ))
90
- self .df = DataFrame ((np .random .randn (self .N , self .K ) * np .random .randint (100 , 10000 , (self .N , self .K ))))
91
- self .df = self .df .applymap (self .format )
92
- self .df .to_csv ('test.csv' , sep = '|' )
93
-
94
- def time_read_csv_thou_vb (self ):
95
- read_csv ('test.csv' , sep = '|' , thousands = ',' )
96
-
97
- def teardown (self ):
98
- os .remove ('test.csv' )
59
+ def time_default_converter (self ):
60
+ read_csv (StringIO (self .data ), sep = ',' , header = None ,
61
+ float_precision = None )
99
62
63
+ def time_default_converter_with_decimal (self ):
64
+ read_csv (StringIO (self .data2 ), sep = ';' , header = None ,
65
+ float_precision = None , decimal = ',' )
100
66
101
- class read_csv_vb (object ):
102
- goal_time = 0.2
67
+ def time_default_converter_python_engine (self ):
68
+ read_csv (StringIO (self .data ), sep = ',' , header = None ,
69
+ float_precision = None , engine = 'python' )
103
70
104
- def setup (self ):
105
- self .N = 10000
106
- self .K = 8
107
- self .df = DataFrame ((np .random .randn (self .N , self .K ) * np .random .randint (100 , 10000 , (self .N , self .K ))))
108
- self .df .to_csv ('test.csv' , sep = '|' )
71
+ def time_default_converter_with_decimal_python_engine (self ):
72
+ read_csv (StringIO (self .data2 ), sep = ';' , header = None ,
73
+ float_precision = None , decimal = ',' , engine = 'python' )
109
74
110
- def time_read_csv_vb (self ):
111
- read_csv ('test.csv' , sep = '|' )
75
+ def time_precise_converter (self ):
76
+ read_csv (StringIO (self .data ), sep = ',' , header = None ,
77
+ float_precision = 'high' )
112
78
113
- def teardown (self ):
114
- os .remove ('test.csv' )
79
+ def time_roundtrip_converter (self ):
80
+ read_csv (StringIO (self .data ), sep = ',' , header = None ,
81
+ float_precision = 'round_trip' )
115
82
116
83
117
84
class read_csv_categorical (object ):
@@ -125,61 +92,30 @@ def setup(self):
125
92
'c' : np .random .choice (group1 , N ).astype ('object' )})
126
93
df .to_csv ('strings.csv' , index = False )
127
94
128
- def time_read_csv_categorical_post (self ):
95
+ def time_convert_post (self ):
129
96
read_csv ('strings.csv' ).apply (pd .Categorical )
130
97
131
- def time_read_csv_categorical_direct (self ):
98
+ def time_convert_direct (self ):
132
99
read_csv ('strings.csv' , dtype = 'category' )
133
100
134
101
def teardown (self ):
135
102
os .remove ('strings.csv' )
136
103
137
104
138
- class read_table_multiple_date (object ):
105
+ class read_csv_dateparsing (object ):
139
106
goal_time = 0.2
140
107
141
108
def setup (self ):
142
109
self .N = 10000
143
110
self .K = 8
144
111
self .data = 'KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000\n KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000\n KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000\n KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000\n KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000\n '
145
112
self .data = (self .data * 200 )
113
+ self .data2 = 'KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000\n KORD,19990127 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000\n KORD,19990127 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000\n KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000\n KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000\n '
114
+ self .data2 = (self .data2 * 200 )
146
115
147
- def time_read_table_multiple_date (self ):
148
- read_table (StringIO (self .data ), sep = ',' , header = None , parse_dates = [[1 , 2 ], [1 , 3 ]])
149
-
150
-
151
- class read_table_multiple_date_baseline (object ):
152
- goal_time = 0.2
153
-
154
- def setup (self ):
155
- self .N = 10000
156
- self .K = 8
157
- self .data = 'KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000\n KORD,19990127 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000\n KORD,19990127 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000\n KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000\n KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000\n '
158
- self .data = (self .data * 200 )
159
-
160
- def time_read_table_multiple_date_baseline (self ):
161
- read_table (StringIO (self .data ), sep = ',' , header = None , parse_dates = [1 ])
162
-
163
-
164
- class read_csv_default_converter_python_engine (object ):
165
- goal_time = 0.2
166
-
167
- def setup (self ):
168
- self .data = '0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336\n 0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285\n 0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126\n 0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394\n 0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020\n '
169
- self .data = (self .data * 200 )
170
-
171
- def time_read_csv_default_converter (self ):
116
+ def time_multiple_date (self ):
172
117
read_csv (StringIO (self .data ), sep = ',' , header = None ,
173
- float_precision = None , engine = 'python' )
174
-
118
+ parse_dates = [[1 , 2 ], [1 , 3 ]])
175
119
176
- class read_csv_default_converter_with_decimal_python_engine (object ):
177
- goal_time = 0.2
178
-
179
- def setup (self ):
180
- self .data = '0,1213700904466425978256438611;0,0525708283766902484401839501;0,4174092731488769913994474336\n 0,4096341697147408700274695547;0,1587830198973579909349496119;0,1292545832485494372576795285\n 0,8323255650024565799327547210;0,9694902427379478160318626578;0,6295047811546814475747169126\n 0,4679375305798131323697930383;0,2963942381834381301075609371;0,5268936082160610157032465394\n 0,6685382761849776311890991564;0,6721207066140679753374342908;0,6519975277021627935170045020\n '
181
- self .data = (self .data * 200 )
182
-
183
- def time_read_csv_default_converter_with_decimal (self ):
184
- read_csv (StringIO (self .data ), sep = ';' , header = None ,
185
- float_precision = None , decimal = ',' , engine = 'python' )
120
+ def time_baseline (self ):
121
+ read_csv (StringIO (self .data2 ), sep = ',' , header = None , parse_dates = [1 ])
0 commit comments