Skip to content

Commit 1138c71

Browse files
Clean-up parser_vb benchmarks
1 parent fac58ce commit 1138c71

File tree

1 file changed

+52
-116
lines changed

1 file changed

+52
-116
lines changed

asv_bench/benchmarks/parser_vb.py

Lines changed: 52 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -1,71 +1,49 @@
11
from .pandas_vb_common import *
22
import os
3-
from pandas import read_csv, read_table
3+
from pandas import read_csv
44
try:
55
from cStringIO import StringIO
66
except ImportError:
77
from io import StringIO
88

99

10-
class read_csv_comment2(object):
10+
class read_csv1(object):
1111
goal_time = 0.2
1212

1313
def setup(self):
14-
self.data = ['A,B,C']
15-
self.data = (self.data + (['1,2,3 # comment'] * 100000))
16-
self.data = '\n'.join(self.data)
17-
18-
def time_read_csv_comment2(self):
19-
read_csv(StringIO(self.data), comment='#')
20-
21-
22-
class read_csv_default_converter(object):
23-
goal_time = 0.2
24-
25-
def setup(self):
26-
self.data = """0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336\n
27-
0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285\n
28-
0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126\n
29-
0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394\n
30-
0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020\n"""
31-
self.data = (self.data * 200)
32-
33-
def time_read_csv_default_converter(self):
34-
read_csv(StringIO(self.data), sep=',', header=None, float_precision=None)
14+
self.N = 10000
15+
self.K = 8
16+
self.df = DataFrame((np.random.randn(self.N, self.K) * np.random.randint(100, 10000, (self.N, self.K))))
17+
self.df.to_csv('test.csv', sep='|')
3518

19+
self.format = (lambda x: '{:,}'.format(x))
20+
self.df2 = self.df.applymap(self.format)
21+
self.df2.to_csv('test2.csv', sep='|')
3622

37-
class read_csv_default_converter_with_decimal(object):
38-
goal_time = 0.2
23+
def time_sep(self):
24+
read_csv('test.csv', sep='|')
3925

40-
def setup(self):
41-
self.data = """0,1213700904466425978256438611;0,0525708283766902484401839501;0,4174092731488769913994474336\n
42-
0,4096341697147408700274695547;0,1587830198973579909349496119;0,1292545832485494372576795285\n
43-
0,8323255650024565799327547210;0,9694902427379478160318626578;0,6295047811546814475747169126\n
44-
0,4679375305798131323697930383;0,2963942381834381301075609371;0,5268936082160610157032465394\n
45-
0,6685382761849776311890991564;0,6721207066140679753374342908;0,6519975277021627935170045020\n"""
46-
self.data = (self.data * 200)
26+
def time_thousands(self):
27+
read_csv('test.csv', sep='|', thousands=',')
4728

48-
def time_read_csv_default_converter_with_decimal(self):
49-
read_csv(StringIO(self.data), sep=';', header=None,
50-
float_precision=None, decimal=',')
29+
def teardown(self):
30+
os.remove('test.csv')
31+
os.remove('test2.csv')
5132

5233

53-
class read_csv_precise_converter(object):
34+
class read_csv2(object):
5435
goal_time = 0.2
5536

5637
def setup(self):
57-
self.data = """0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336\n
58-
0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285\n
59-
0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126\n
60-
0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394\n
61-
0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020\n"""
62-
self.data = (self.data * 200)
38+
self.data = ['A,B,C']
39+
self.data = (self.data + (['1,2,3 # comment'] * 100000))
40+
self.data = '\n'.join(self.data)
6341

64-
def time_read_csv_precise_converter(self):
65-
read_csv(StringIO(self.data), sep=',', header=None, float_precision='high')
42+
def time_comment(self):
43+
read_csv(StringIO(self.data), comment='#')
6644

6745

68-
class read_csv_roundtrip_converter(object):
46+
class read_csv3(object):
6947
goal_time = 0.2
7048

7149
def setup(self):
@@ -74,44 +52,33 @@ def setup(self):
7452
0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126\n
7553
0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394\n
7654
0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020\n"""
55+
self.data2 = self.data.replace(',', ';').replace('.', ',')
7756
self.data = (self.data * 200)
57+
self.data2 = (self.data2 * 200)
7858

79-
def time_read_csv_roundtrip_converter(self):
80-
read_csv(StringIO(self.data), sep=',', header=None, float_precision='round_trip')
81-
82-
83-
class read_csv_thou_vb(object):
84-
goal_time = 0.2
85-
86-
def setup(self):
87-
self.N = 10000
88-
self.K = 8
89-
self.format = (lambda x: '{:,}'.format(x))
90-
self.df = DataFrame((np.random.randn(self.N, self.K) * np.random.randint(100, 10000, (self.N, self.K))))
91-
self.df = self.df.applymap(self.format)
92-
self.df.to_csv('test.csv', sep='|')
93-
94-
def time_read_csv_thou_vb(self):
95-
read_csv('test.csv', sep='|', thousands=',')
96-
97-
def teardown(self):
98-
os.remove('test.csv')
59+
def time_default_converter(self):
60+
read_csv(StringIO(self.data), sep=',', header=None,
61+
float_precision=None)
9962

63+
def time_default_converter_with_decimal(self):
64+
read_csv(StringIO(self.data2), sep=';', header=None,
65+
float_precision=None, decimal=',')
10066

101-
class read_csv_vb(object):
102-
goal_time = 0.2
67+
def time_default_converter_python_engine(self):
68+
read_csv(StringIO(self.data), sep=',', header=None,
69+
float_precision=None, engine='python')
10370

104-
def setup(self):
105-
self.N = 10000
106-
self.K = 8
107-
self.df = DataFrame((np.random.randn(self.N, self.K) * np.random.randint(100, 10000, (self.N, self.K))))
108-
self.df.to_csv('test.csv', sep='|')
71+
def time_default_converter_with_decimal_python_engine(self):
72+
read_csv(StringIO(self.data2), sep=';', header=None,
73+
float_precision=None, decimal=',', engine='python')
10974

110-
def time_read_csv_vb(self):
111-
read_csv('test.csv', sep='|')
75+
def time_precise_converter(self):
76+
read_csv(StringIO(self.data), sep=',', header=None,
77+
float_precision='high')
11278

113-
def teardown(self):
114-
os.remove('test.csv')
79+
def time_roundtrip_converter(self):
80+
read_csv(StringIO(self.data), sep=',', header=None,
81+
float_precision='round_trip')
11582

11683

11784
class read_csv_categorical(object):
@@ -125,61 +92,30 @@ def setup(self):
12592
'c': np.random.choice(group1, N).astype('object')})
12693
df.to_csv('strings.csv', index=False)
12794

128-
def time_read_csv_categorical_post(self):
95+
def time_convert_post(self):
12996
read_csv('strings.csv').apply(pd.Categorical)
13097

131-
def time_read_csv_categorical_direct(self):
98+
def time_convert_direct(self):
13299
read_csv('strings.csv', dtype='category')
133100

134101
def teardown(self):
135102
os.remove('strings.csv')
136103

137104

138-
class read_table_multiple_date(object):
105+
class read_csv_dateparsing(object):
139106
goal_time = 0.2
140107

141108
def setup(self):
142109
self.N = 10000
143110
self.K = 8
144111
self.data = 'KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000\n KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000\n KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000\n KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000\n KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000\n '
145112
self.data = (self.data * 200)
113+
self.data2 = 'KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000\n KORD,19990127 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000\n KORD,19990127 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000\n KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000\n KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000\n '
114+
self.data2 = (self.data2 * 200)
146115

147-
def time_read_table_multiple_date(self):
148-
read_table(StringIO(self.data), sep=',', header=None, parse_dates=[[1, 2], [1, 3]])
149-
150-
151-
class read_table_multiple_date_baseline(object):
152-
goal_time = 0.2
153-
154-
def setup(self):
155-
self.N = 10000
156-
self.K = 8
157-
self.data = 'KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000\n KORD,19990127 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000\n KORD,19990127 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000\n KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000\n KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000\n '
158-
self.data = (self.data * 200)
159-
160-
def time_read_table_multiple_date_baseline(self):
161-
read_table(StringIO(self.data), sep=',', header=None, parse_dates=[1])
162-
163-
164-
class read_csv_default_converter_python_engine(object):
165-
goal_time = 0.2
166-
167-
def setup(self):
168-
self.data = '0.1213700904466425978256438611,0.0525708283766902484401839501,0.4174092731488769913994474336\n 0.4096341697147408700274695547,0.1587830198973579909349496119,0.1292545832485494372576795285\n 0.8323255650024565799327547210,0.9694902427379478160318626578,0.6295047811546814475747169126\n 0.4679375305798131323697930383,0.2963942381834381301075609371,0.5268936082160610157032465394\n 0.6685382761849776311890991564,0.6721207066140679753374342908,0.6519975277021627935170045020\n '
169-
self.data = (self.data * 200)
170-
171-
def time_read_csv_default_converter(self):
116+
def time_multiple_date(self):
172117
read_csv(StringIO(self.data), sep=',', header=None,
173-
float_precision=None, engine='python')
174-
118+
parse_dates=[[1, 2], [1, 3]])
175119

176-
class read_csv_default_converter_with_decimal_python_engine(object):
177-
goal_time = 0.2
178-
179-
def setup(self):
180-
self.data = '0,1213700904466425978256438611;0,0525708283766902484401839501;0,4174092731488769913994474336\n 0,4096341697147408700274695547;0,1587830198973579909349496119;0,1292545832485494372576795285\n 0,8323255650024565799327547210;0,9694902427379478160318626578;0,6295047811546814475747169126\n 0,4679375305798131323697930383;0,2963942381834381301075609371;0,5268936082160610157032465394\n 0,6685382761849776311890991564;0,6721207066140679753374342908;0,6519975277021627935170045020\n '
181-
self.data = (self.data * 200)
182-
183-
def time_read_csv_default_converter_with_decimal(self):
184-
read_csv(StringIO(self.data), sep=';', header=None,
185-
float_precision=None, decimal=',', engine='python')
120+
def time_baseline(self):
121+
read_csv(StringIO(self.data2), sep=',', header=None, parse_dates=[1])

0 commit comments

Comments
 (0)