Skip to content

Commit 0f59d14

Browse files
committed
BUG: fix failing vbenchmarks pandas-dev#2404
1 parent 6abbbc1 commit 0f59d14

File tree

7 files changed

+27
-34
lines changed

7 files changed

+27
-34
lines changed

Diff for: vb_suite/index_object.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@
1212

1313
setup = common_setup + """
1414
rng = DateRange('1/1/2000', periods=10000, offset=datetools.Minute())
15-
rng = rng.view(Index)
15+
if rng.dtype == object:
16+
rng = rng.view(Index)
17+
else:
18+
rng = rng.asobject
1619
rng2 = rng[:-1]
1720
"""
1821

Diff for: vb_suite/join_merge.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,19 @@
6868

6969
#----------------------------------------------------------------------
7070
# Joins on integer keys
71+
setup = common_setup + """
72+
df = DataFrame({'key1': np.tile(np.arange(500).repeat(10), 2),
73+
'key2': np.tile(np.arange(250).repeat(10), 4),
74+
'value': np.random.randn(10000)})
75+
df2 = DataFrame({'key1': np.arange(500), 'value2': randn(500)})
76+
df3 = df[:5000]
77+
"""
78+
7179

72-
join_dataframe_integer_key = Benchmark("merge(df, df2, on='key')", setup,
80+
join_dataframe_integer_key = Benchmark("merge(df, df2, on='key1')", setup,
7381
start_date=datetime(2011, 10, 20))
82+
join_dataframe_integer_2key = Benchmark("merge(df, df3)", setup,
83+
start_date=datetime(2011, 10, 20))
7484

7585
#----------------------------------------------------------------------
7686
# DataFrame joins on index

Diff for: vb_suite/pandas_vb_common.py

+5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
import random
77
import numpy as np
88

9+
try:
10+
import pandas._tseries as lib
11+
except:
12+
import pandas.lib as lib
13+
914
try:
1015
Panel = WidePanel
1116
except Exception:

Diff for: vb_suite/parser.py

-16
Original file line numberDiff line numberDiff line change
@@ -33,22 +33,6 @@
3333
cleanup="os.remove('test.csv')",
3434
start_date=datetime(2012, 5, 7))
3535

36-
setup = common_setup + """
37-
import os
38-
N = 10000
39-
K = 8
40-
format = lambda x: '%f' % x
41-
df = DataFrame(np.random.randn(N, K) * np.random.randint(100, 10000, (N, K)))
42-
df = df.applymap(format)
43-
df.ix[:5, 0] = '#'
44-
df.to_csv('test.csv', sep='|')
45-
"""
46-
47-
read_csv_comment = Benchmark("read_csv('test.csv', sep='|', comment='#')",
48-
setup,
49-
cleanup="os.remove('test.csv')",
50-
start_date=datetime(2012, 5, 7))
51-
5236
setup = common_setup + """
5337
data = ['A,B,C']
5438
data = data + ['1,2,3 # comment'] * 100000

Diff for: vb_suite/reindex.py

+2-11
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
"""
1414
statement = "df.reindex(columns=df.columns[1:5])"
1515

16-
reindex_frame_columns = Benchmark(statement, setup,
17-
name='dataframe_reindex_columns')
16+
frame_reindex_columns = Benchmark(statement, setup)
1817

1918
#----------------------------------------------------------------------
2019

@@ -26,8 +25,7 @@
2625
rng2 = Index(rng[::2])
2726
"""
2827
statement = "df.reindex(rng2)"
29-
reindex_frame_daterange = Benchmark(statement, setup,
30-
name='dataframe_reindex_daterange')
28+
dataframe_reindex = Benchmark(statement, setup)
3129

3230
#----------------------------------------------------------------------
3331
# multiindex reindexing
@@ -116,7 +114,6 @@ def backfill():
116114

117115
# pathological, but realistic
118116
setup = common_setup + """
119-
import pandas._tseries as lib
120117
N = 10000
121118
K = 10
122119
@@ -128,19 +125,16 @@ def backfill():
128125
"""
129126
statement = "df.sort_index(by=['key1', 'key2'])"
130127
frame_sort_index_by_columns = Benchmark(statement, setup,
131-
name='frame_sort_index_by_columns',
132128
start_date=datetime(2011, 11, 1))
133129

134130
# drop_duplicates
135131

136132
statement = "df.drop_duplicates(['key1', 'key2'])"
137133
frame_drop_duplicates = Benchmark(statement, setup,
138-
name='frame_drop_duplicates',
139134
start_date=datetime(2011, 11, 15))
140135

141136
statement = "df.drop_duplicates(['key1', 'key2'], inplace=True)"
142137
frame_drop_dup_inplace = Benchmark(statement, setup,
143-
name='frame_drop_dup_inplace',
144138
start_date=datetime(2012, 5, 16))
145139

146140
lib_fast_zip = Benchmark('lib.fast_zip(df.values.T)', setup,
@@ -152,16 +146,13 @@ def backfill():
152146
"""
153147
statement2 = "df.drop_duplicates(['key1', 'key2'])"
154148
frame_drop_duplicates_na = Benchmark(statement2, setup,
155-
name='frame_drop_duplicates_na',
156149
start_date=datetime(2012, 5, 15))
157150

158151
lib_fast_zip_fillna = Benchmark('lib.fast_zip_fillna(df.values.T)', setup,
159-
name='lib_fast_zip_fillna',
160152
start_date=datetime(2012, 5, 15))
161153

162154
statement2 = "df.drop_duplicates(['key1', 'key2'], inplace=True)"
163155
frame_drop_dup_na_inplace = Benchmark(statement2, setup,
164-
name='frame_drop_dup_na_inplace',
165156
start_date=datetime(2012, 5, 16))
166157

167158
setup = common_setup + """

Diff for: vb_suite/replace.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
common_setup = """from pandas_vb_common import *
55
from datetime import timedelta
6-
import pandas._tseries as lib
6+
77
N = 1000000
88
99
try:

Diff for: vb_suite/reshape.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@ def unpivot(frame):
3737

3838
setup = common_setup + """
3939
NUM_ROWS = 1000
40-
df = DataFrame({'A' : np.random.randint(25, size=NUM_ROWS),
41-
'B' : np.random.randint(25, size=NUM_ROWS),
42-
'C' : np.random.randint(0,10, size=NUM_ROWS),
43-
'D' : np.random.randint(0,10, size=NUM_ROWS),
40+
df = DataFrame({'A' : np.random.randint(50, size=NUM_ROWS),
41+
'B' : np.random.randint(50, size=NUM_ROWS),
42+
'C' : np.random.randint(-10,10, size=NUM_ROWS),
43+
'D' : np.random.randint(-10,10, size=NUM_ROWS),
4444
'E' : np.random.randint(10, size=NUM_ROWS),
4545
'F' : np.random.randn(NUM_ROWS)})
4646
idf = df.set_index(['A', 'B', 'C', 'D', 'E'])

0 commit comments

Comments
 (0)