BUG: fix failing vbenchmarks pandas-dev#2404

wesm · wesm · commit 0f59d14de76f · 2012-12-01T21:36:14.000-05:00
diff --git a/vb_suite/index_object.py b/vb_suite/index_object.py
@@ -12,7 +12,10 @@
 
 setup = common_setup + """
 rng = DateRange('1/1/2000', periods=10000, offset=datetools.Minute())
-rng = rng.view(Index)
+if rng.dtype == object:
+    rng = rng.view(Index)
+else:
+    rng = rng.asobject
 rng2 = rng[:-1]
 """
 
diff --git a/vb_suite/join_merge.py b/vb_suite/join_merge.py
@@ -68,9 +68,19 @@
 
 #----------------------------------------------------------------------
 # Joins on integer keys
+setup = common_setup + """
+df = DataFrame({'key1': np.tile(np.arange(500).repeat(10), 2),
+                'key2': np.tile(np.arange(250).repeat(10), 4),
+                'value': np.random.randn(10000)})
+df2 = DataFrame({'key1': np.arange(500), 'value2': randn(500)})
+df3 = df[:5000]
+"""
+
 
-join_dataframe_integer_key = Benchmark("merge(df, df2, on='key')", setup,
+join_dataframe_integer_key = Benchmark("merge(df, df2, on='key1')", setup,
                                        start_date=datetime(2011, 10, 20))
+join_dataframe_integer_2key = Benchmark("merge(df, df3)", setup,
+                                        start_date=datetime(2011, 10, 20))
 
 #----------------------------------------------------------------------
 # DataFrame joins on index
diff --git a/vb_suite/pandas_vb_common.py b/vb_suite/pandas_vb_common.py
@@ -6,6 +6,11 @@
 import random
 import numpy as np
 
+try:
+    import pandas._tseries as lib
+except:
+    import pandas.lib as lib
+
 try:
     Panel = WidePanel
 except Exception:
diff --git a/vb_suite/parser.py b/vb_suite/parser.py
@@ -33,22 +33,6 @@
                              cleanup="os.remove('test.csv')",
                              start_date=datetime(2012, 5, 7))
 
-setup = common_setup + """
-import os
-N = 10000
-K = 8
-format = lambda x: '%f' % x
-df = DataFrame(np.random.randn(N, K) * np.random.randint(100, 10000, (N, K)))
-df = df.applymap(format)
-df.ix[:5, 0] = '#'
-df.to_csv('test.csv', sep='|')
-"""
-
-read_csv_comment = Benchmark("read_csv('test.csv', sep='|', comment='#')",
-                             setup,
-                             cleanup="os.remove('test.csv')",
-                             start_date=datetime(2012, 5, 7))
-
 setup = common_setup + """
 data = ['A,B,C']
 data = data + ['1,2,3 # comment'] * 100000
diff --git a/vb_suite/reindex.py b/vb_suite/reindex.py
@@ -13,8 +13,7 @@
 """
 statement = "df.reindex(columns=df.columns[1:5])"
 
-reindex_frame_columns = Benchmark(statement, setup,
-                                  name='dataframe_reindex_columns')
+frame_reindex_columns = Benchmark(statement, setup)
 
 #----------------------------------------------------------------------
 
@@ -26,8 +25,7 @@
 rng2 = Index(rng[::2])
 """
 statement = "df.reindex(rng2)"
-reindex_frame_daterange = Benchmark(statement, setup,
-                                    name='dataframe_reindex_daterange')
+dataframe_reindex = Benchmark(statement, setup)
 
 #----------------------------------------------------------------------
 # multiindex reindexing
@@ -116,7 +114,6 @@ def backfill():
 
 # pathological, but realistic
 setup = common_setup + """
-import pandas._tseries as lib
 N = 10000
 K = 10
 
@@ -128,19 +125,16 @@ def backfill():
 """
 statement = "df.sort_index(by=['key1', 'key2'])"
 frame_sort_index_by_columns = Benchmark(statement, setup,
-                                        name='frame_sort_index_by_columns',
                                         start_date=datetime(2011, 11, 1))
 
 # drop_duplicates
 
 statement = "df.drop_duplicates(['key1', 'key2'])"
 frame_drop_duplicates = Benchmark(statement, setup,
-                                  name='frame_drop_duplicates',
                                   start_date=datetime(2011, 11, 15))
 
 statement = "df.drop_duplicates(['key1', 'key2'], inplace=True)"
 frame_drop_dup_inplace = Benchmark(statement, setup,
-                                  name='frame_drop_dup_inplace',
                                   start_date=datetime(2012, 5, 16))
 
 lib_fast_zip = Benchmark('lib.fast_zip(df.values.T)', setup,
@@ -152,16 +146,13 @@ def backfill():
 """
 statement2 = "df.drop_duplicates(['key1', 'key2'])"
 frame_drop_duplicates_na = Benchmark(statement2, setup,
-                                     name='frame_drop_duplicates_na',
                                      start_date=datetime(2012, 5, 15))
 
 lib_fast_zip_fillna = Benchmark('lib.fast_zip_fillna(df.values.T)', setup,
-                                name='lib_fast_zip_fillna',
                                 start_date=datetime(2012, 5, 15))
 
 statement2 = "df.drop_duplicates(['key1', 'key2'], inplace=True)"
 frame_drop_dup_na_inplace = Benchmark(statement2, setup,
-                                  name='frame_drop_dup_na_inplace',
                                   start_date=datetime(2012, 5, 16))
 
 setup = common_setup + """
diff --git a/vb_suite/replace.py b/vb_suite/replace.py
@@ -3,7 +3,7 @@
 
 common_setup = """from pandas_vb_common import *
 from datetime import timedelta
-import pandas._tseries as lib
+
 N = 1000000
 
 try:
diff --git a/vb_suite/reshape.py b/vb_suite/reshape.py
@@ -37,10 +37,10 @@ def unpivot(frame):
 
 setup = common_setup + """
 NUM_ROWS = 1000
-df = DataFrame({'A' : np.random.randint(25, size=NUM_ROWS),
-                'B' : np.random.randint(25, size=NUM_ROWS),
-                'C' : np.random.randint(0,10, size=NUM_ROWS),
-                'D' : np.random.randint(0,10, size=NUM_ROWS),
+df = DataFrame({'A' : np.random.randint(50, size=NUM_ROWS),
+                'B' : np.random.randint(50, size=NUM_ROWS),
+                'C' : np.random.randint(-10,10, size=NUM_ROWS),
+                'D' : np.random.randint(-10,10, size=NUM_ROWS),
                 'E' : np.random.randint(10, size=NUM_ROWS),
                 'F' : np.random.randn(NUM_ROWS)})
 idf = df.set_index(['A', 'B', 'C', 'D', 'E'])