forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathio_bench.py
100 lines (79 loc) · 3.34 KB
/
io_bench.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from vbench.api import Benchmark
from datetime import datetime
common_setup = """from pandas_vb_common import *
"""
#----------------------------------------------------------------------
# read_csv
setup1 = common_setup + """
index = [rands(10) for _ in xrange(10000)]
df = DataFrame({'float1' : randn(10000),
'float2' : randn(10000),
'string1' : ['foo'] * 10000,
'bool1' : [True] * 10000,
'int1' : np.random.randint(0, 100000, size=10000)},
index=index)
df.to_csv('__test__.csv')
"""
read_csv_standard = Benchmark("read_csv('__test__.csv')", setup1,
start_date=datetime(2011, 9, 15))
#----------------------------------------------------------------------
# write_csv
setup2 = common_setup + """
index = [rands(10) for _ in xrange(10000)]
df = DataFrame({'float1' : randn(10000),
'float2' : randn(10000),
'string1' : ['foo'] * 10000,
'bool1' : [True] * 10000,
'int1' : np.random.randint(0, 100000, size=10000)},
index=index)
"""
write_csv_standard = Benchmark("df.to_csv('__test__.csv')", setup2,
start_date=datetime(2011, 9, 15))
#----------------------------------
setup = common_setup + """
df = DataFrame(np.random.randn(3000, 30))
"""
frame_to_csv = Benchmark("df.to_csv('__test__.csv')", setup,
start_date=datetime(2011, 1, 1))
#----------------------------------
setup = common_setup + """
df=DataFrame({'A':range(50000)})
df['B'] = df.A + 1.0
df['C'] = df.A + 2.0
df['D'] = df.A + 3.0
"""
frame_to_csv2 = Benchmark("df.to_csv('__test__.csv')", setup,
start_date=datetime(2011, 1, 1))
#----------------------------------
setup = common_setup + """
from pandas import concat, Timestamp
def create_cols(name):
return [ "%s%03d" % (name,i) for i in xrange(5) ]
df_float = DataFrame(np.random.randn(5000, 5),dtype='float64',columns=create_cols('float'))
df_int = DataFrame(np.random.randn(5000, 5),dtype='int64',columns=create_cols('int'))
df_bool = DataFrame(True,index=df_float.index,columns=create_cols('bool'))
df_object = DataFrame('foo',index=df_float.index,columns=create_cols('object'))
df_dt = DataFrame(Timestamp('20010101'),index=df_float.index,columns=create_cols('date'))
# add in some nans
df_float.ix[30:500,1:3] = np.nan
df = concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1)
"""
frame_to_csv_mixed = Benchmark("df.to_csv('__test__.csv')", setup,
start_date=datetime(2012, 6, 1))
#----------------------------------------------------------------------
# parse dates, ISO8601 format
setup = common_setup + """
rng = date_range('1/1/2000', periods=1000)
data = '\\n'.join(rng.map(lambda x: x.strftime("%Y-%m-%d %H:%M:%S")))
"""
stmt = ("read_csv(StringIO(data), header=None, names=['foo'], "
" parse_dates=['foo'])")
read_parse_dates_iso8601 = Benchmark(stmt, setup,
start_date=datetime(2012, 3, 1))
setup = common_setup + """
rng = date_range('1/1/2000', periods=1000)
data = DataFrame(rng, index=rng)
"""
stmt = ("data.to_csv('__test__.csv', date_format='%Y%m%d')")
frame_to_csv_date_formatting = Benchmark(stmt, setup,
start_date=datetime(2013, 9, 1))