vb_suite/eval.py

from vbench.benchmark import Benchmark
from datetime import datetime

common_setup = """from pandas_vb_common import *
import pandas as pd
df  = DataFrame(np.random.randn(20000, 100))
df2 = DataFrame(np.random.randn(20000, 100))
df3 = DataFrame(np.random.randn(20000, 100))
df4 = DataFrame(np.random.randn(20000, 100))
"""

setup = common_setup + """
import pandas.computation.expressions as expr
expr.set_numexpr_threads(1)
"""

SECTION = 'Eval'

#----------------------------------------------------------------------
# binary ops

#----------------------------------------------------------------------
# add
eval_frame_add_all_threads = \
    Benchmark("pd.eval('df + df2 + df3 + df4')", common_setup,
              name='eval_frame_add_all_threads',
              start_date=datetime(2013, 7, 21))


eval_frame_add_one_thread = \
    Benchmark("pd.eval('df + df2 + df3 + df4')", setup,
              name='eval_frame_add_one_thread',
              start_date=datetime(2013, 7, 26))

eval_frame_add_python = \
    Benchmark("pd.eval('df + df2 + df3 + df4', engine='python')", common_setup,
              name='eval_frame_add_python', start_date=datetime(2013, 7, 21))

eval_frame_add_python_one_thread = \
    Benchmark("pd.eval('df + df2 + df3 + df4', engine='python')", setup,
              name='eval_frame_add_python_one_thread',
              start_date=datetime(2013, 7, 26))
#----------------------------------------------------------------------
# mult

eval_frame_mult_all_threads = \
    Benchmark("pd.eval('df * df2 * df3 * df4')", common_setup,
              name='eval_frame_mult_all_threads',
              start_date=datetime(2013, 7, 21))

eval_frame_mult_one_thread = \
    Benchmark("pd.eval('df * df2 * df3 * df4')", setup,
              name='eval_frame_mult_one_thread',
              start_date=datetime(2013, 7, 26))

eval_frame_mult_python = \
    Benchmark("pd.eval('df * df2 * df3 * df4', engine='python')",
              common_setup,
              name='eval_frame_mult_python', start_date=datetime(2013, 7, 21))

eval_frame_mult_python_one_thread = \
    Benchmark("pd.eval('df * df2 * df3 * df4', engine='python')", setup,
              name='eval_frame_mult_python_one_thread',
              start_date=datetime(2013, 7, 26))

#----------------------------------------------------------------------
# multi and

eval_frame_and_all_threads = \
    Benchmark("pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)')",
              common_setup,
              name='eval_frame_and_all_threads',
              start_date=datetime(2013, 7, 21))

eval_frame_and_one_thread = \
    Benchmark("pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)')", setup,
              name='eval_frame_and_one_thread',
              start_date=datetime(2013, 7, 26))

eval_frame_and_python = \
    Benchmark("pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)', engine='python')",
              common_setup, name='eval_frame_and_python',
              start_date=datetime(2013, 7, 21))

eval_frame_and_one_thread = \
    Benchmark("pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)', engine='python')",
              setup,
              name='eval_frame_and_python_one_thread',
              start_date=datetime(2013, 7, 26))

#--------------------------------------------------------------------
# chained comp
eval_frame_chained_cmp_all_threads = \
    Benchmark("pd.eval('df < df2 < df3 < df4')", common_setup,
              name='eval_frame_chained_cmp_all_threads',
              start_date=datetime(2013, 7, 21))

eval_frame_chained_cmp_one_thread = \
    Benchmark("pd.eval('df < df2 < df3 < df4')", setup,
              name='eval_frame_chained_cmp_one_thread',
              start_date=datetime(2013, 7, 26))

eval_frame_chained_cmp_python = \
    Benchmark("pd.eval('df < df2 < df3 < df4', engine='python')",
              common_setup, name='eval_frame_chained_cmp_python',
              start_date=datetime(2013, 7, 26))

eval_frame_chained_cmp_one_thread = \
    Benchmark("pd.eval('df < df2 < df3 < df4', engine='python')", setup,
              name='eval_frame_chained_cmp_python_one_thread',
              start_date=datetime(2013, 7, 26))


common_setup = """from pandas_vb_common import *
"""

setup = common_setup + """
N = 1000000
halfway = N // 2 - 1
index = date_range('20010101', periods=N, freq='T')
s = Series(index)
ts = s.iloc[halfway]
"""

series_setup = setup + """
df = DataFrame({'dates': s.values})
"""

query_datetime_series = Benchmark("df.query('dates < @ts')",
                                  series_setup,
                                  start_date=datetime(2013, 9, 27))

index_setup = setup + """
df = DataFrame({'a': np.random.randn(N)}, index=index)
"""

query_datetime_index = Benchmark("df.query('index < @ts')",
                                 index_setup, start_date=datetime(2013, 9, 27))

setup = setup + """
N = 1000000
df = DataFrame({'a': np.random.randn(N)})
min_val = df['a'].min()
max_val = df['a'].max()
"""

query_with_boolean_selection = Benchmark("df.query('(a >= @min_val) & (a <= @max_val)')",
                                         setup, start_date=datetime(2013, 9, 27))