from vbench.api import Benchmark
from datetime import datetime

common_setup = """from pandas_vb_common import *
"""

#----------------------------------------------------------------------
# lookup

setup = common_setup + """
df = DataFrame(np.random.randn(10000, 8), columns=list('abcdefgh'))
df['foo'] = 'bar'

row_labels = list(df.index[::10])[:900]
col_labels = list(df.columns) * 100
row_labels_all = np.array(list(df.index) * len(df.columns), dtype='object')
col_labels_all = np.array(list(df.columns) * len(df.index), dtype='object')
"""

frame_fancy_lookup = Benchmark('df.lookup(row_labels, col_labels)', setup,
                               start_date=datetime(2012, 1, 12))

frame_fancy_lookup_all = Benchmark('df.lookup(row_labels_all, col_labels_all)',
                                   setup,
                                   start_date=datetime(2012, 1, 12))

#----------------------------------------------------------------------
# fillna in place

setup = common_setup + """
df = DataFrame(randn(10000, 100))
df.values[::2] = np.nan
"""

frame_fillna_inplace = Benchmark('df.fillna(0, inplace=True)', setup,
                                 start_date=datetime(2012, 4, 4))


#----------------------------------------------------------------------
# reindex both axes

setup = common_setup + """
df = DataFrame(randn(10000, 10000))
idx = np.arange(4000, 7000)
"""

frame_reindex_axis0 = Benchmark('df.reindex(idx)', setup)

frame_reindex_axis1 = Benchmark('df.reindex(columns=idx)', setup)

frame_reindex_both_axes = Benchmark('df.reindex(index=idx, columns=idx)',
                                    setup, start_date=datetime(2011, 1, 1))

frame_reindex_both_axes_ix = Benchmark('df.ix[idx, idx]', setup,
                                       start_date=datetime(2011, 1, 1))

#----------------------------------------------------------------------
# reindex with upcasts
setup = common_setup + """
df=DataFrame(dict([(c, {
        0: randint(0, 2, 1000).astype(np.bool_),
        1: randint(0, 1000, 1000).astype(np.int16),
        2: randint(0, 1000, 1000).astype(np.int32),
        3: randint(0, 1000, 1000).astype(np.int64)
    }[randint(0, 4)]) for c in range(1000)]))
"""

frame_reindex_upcast = Benchmark('df.reindex(permutation(range(1200)))', setup)

#----------------------------------------------------------------------
# boolean indexing

setup = common_setup + """
df = DataFrame(randn(10000, 100))
bool_arr = np.zeros(10000, dtype=bool)
bool_arr[:1000] = True
"""

frame_boolean_row_select = Benchmark('df[bool_arr]', setup,
                                     start_date=datetime(2011, 1, 1))

#----------------------------------------------------------------------
# iteritems (monitor no-copying behaviour)

setup = common_setup + """
df = DataFrame(randn(10000, 1000))
df2 = DataFrame(randn(3000,1),columns=['A'])
df3 = DataFrame(randn(3000,1))

def f():
    if hasattr(df, '_item_cache'):
        df._item_cache.clear()
    for name, col in df.iteritems():
        pass

def g():
    for name, col in df.iteritems():
        pass

def h():
    for i in xrange(10000):
        df2['A']

def j():
    for i in xrange(10000):
        df3[0]

"""

# as far back as the earliest test currently in the suite
frame_iteritems = Benchmark('f()', setup,
                            start_date=datetime(2010, 6, 1))

frame_iteritems_cached = Benchmark('g()', setup,
                                   start_date=datetime(2010, 6, 1))

frame_getitem_single_column = Benchmark('h()', setup,
                                        start_date=datetime(2010, 6, 1))

frame_getitem_single_column2 = Benchmark('j()', setup,
                                         start_date=datetime(2010, 6, 1))

#----------------------------------------------------------------------
# assignment

setup = common_setup + """
idx = date_range('1/1/2000', periods=100000, freq='D')
df = DataFrame(randn(100000, 1),columns=['A'],index=idx)
def f(x):
    x = x.copy()
    x['date'] = x.index
"""

frame_assign_timeseries_index = Benchmark('f(df)', setup,
                                          start_date=datetime(2013, 10, 1))


#----------------------------------------------------------------------
# to_string

setup = common_setup + """
df = DataFrame(randn(100, 10))
"""

frame_to_string_floats = Benchmark('df.to_string()', setup,
                                   start_date=datetime(2010, 6, 1))

#----------------------------------------------------------------------
# to_html

setup = common_setup + """
nrows=500
df = DataFrame(randn(nrows, 10))
df[0]=period_range("2000","2010",nrows)
df[1]=range(nrows)

"""

frame_to_html_mixed = Benchmark('df.to_html()', setup,
                                   start_date=datetime(2011, 11, 18))


# truncated repr_html, single index

setup = common_setup + """
nrows=10000
data=randn(nrows,10)
idx=MultiIndex.from_arrays(np.tile(randn(3,nrows/100),100))
df=DataFrame(data,index=idx)

"""

frame_html_repr_trunc_mi = Benchmark('df._repr_html_()', setup,
                                   start_date=datetime(2013, 11, 25))

# truncated repr_html, MultiIndex

setup = common_setup + """
nrows=10000
data=randn(nrows,10)
idx=randn(nrows)
df=DataFrame(data,index=idx)

"""

frame_html_repr_trunc_si = Benchmark('df._repr_html_()', setup,
                                   start_date=datetime(2013, 11, 25))


# insert many columns

setup = common_setup + """
N = 1000

def f(K=500):
    df = DataFrame(index=range(N))
    new_col = np.random.randn(N)
    for i in range(K):
        df[i] = new_col
"""

frame_insert_500_columns_end = Benchmark('f()', setup, start_date=datetime(2011, 1, 1))

setup = common_setup + """
N = 1000

def f(K=100):
    df = DataFrame(index=range(N))
    new_col = np.random.randn(N)
    for i in range(K):
        df.insert(0,i,new_col)
"""

frame_insert_100_columns_begin = Benchmark('f()', setup, start_date=datetime(2011, 1, 1))

#----------------------------------------------------------------------
# strings methods, #2602

setup = common_setup + """
s = Series(['abcdefg', np.nan]*500000)
"""

series_string_vector_slice = Benchmark('s.str[:5]', setup,
                                       start_date=datetime(2012, 8, 1))

#----------------------------------------------------------------------
# df.info() and get_dtype_counts() # 2807

setup = common_setup + """
df = pandas.DataFrame(np.random.randn(10,10000))
"""

frame_get_dtype_counts = Benchmark('df.get_dtype_counts()', setup,
                                       start_date=datetime(2012, 8, 1))

##
setup = common_setup + """
df = pandas.DataFrame(np.random.randn(10,10000))
"""

frame_repr_wide = Benchmark('repr(df)', setup,
                            start_date=datetime(2012, 8, 1))

##
setup = common_setup + """
df = pandas.DataFrame(np.random.randn(10000, 10))
"""

frame_repr_tall = Benchmark('repr(df)', setup,
                            start_date=datetime(2012, 8, 1))

##
setup = common_setup + """
df = DataFrame(randn(100000, 1))
"""

frame_xs_row = Benchmark('df.xs(50000)', setup)

##
setup = common_setup + """
df = DataFrame(randn(1,100000))
"""

frame_xs_col = Benchmark('df.xs(50000,axis = 1)', setup)

#----------------------------------------------------------------------
# nulls/masking

## masking
setup = common_setup + """
data = np.random.randn(1000, 500)
df = DataFrame(data)
df = df.where(df > 0) # create nans
bools = df > 0
mask = isnull(df)
"""

frame_mask_bools = Benchmark('bools.mask(mask)', setup,
                             start_date=datetime(2013,1,1))

frame_mask_floats  = Benchmark('bools.astype(float).mask(mask)', setup,
                             start_date=datetime(2013,1,1))

## isnull
setup = common_setup + """
data = np.random.randn(1000, 1000)
df = DataFrame(data)
"""
frame_isnull  = Benchmark('isnull(df)', setup,
                           start_date=datetime(2012,1,1))

## dropna
dropna_setup = common_setup + """
data = np.random.randn(10000, 1000)
df = DataFrame(data)
df.ix[50:1000,20:50] = np.nan
df.ix[2000:3000] = np.nan
df.ix[:,60:70] = np.nan
"""
frame_dropna_axis0_any  = Benchmark('df.dropna(how="any",axis=0)', dropna_setup,
                                     start_date=datetime(2012,1,1))
frame_dropna_axis0_all  = Benchmark('df.dropna(how="all",axis=0)', dropna_setup,
                                     start_date=datetime(2012,1,1))

frame_dropna_axis1_any  = Benchmark('df.dropna(how="any",axis=1)', dropna_setup,
                                    start_date=datetime(2012,1,1))

frame_dropna_axis1_all  = Benchmark('df.dropna(how="all",axis=1)', dropna_setup,
                                    start_date=datetime(2012,1,1))

# dropna on mixed dtypes
dropna_mixed_setup = common_setup + """
data = np.random.randn(10000, 1000)
df = DataFrame(data)
df.ix[50:1000,20:50] = np.nan
df.ix[2000:3000] = np.nan
df.ix[:,60:70] = np.nan
df['foo'] = 'bar'
"""
frame_dropna_axis0_any_mixed_dtypes  = Benchmark('df.dropna(how="any",axis=0)', dropna_mixed_setup,
                                                 start_date=datetime(2012,1,1))
frame_dropna_axis0_all_mixed_dtypes  = Benchmark('df.dropna(how="all",axis=0)', dropna_mixed_setup,
                                                 start_date=datetime(2012,1,1))

frame_dropna_axis1_any_mixed_dtypes  = Benchmark('df.dropna(how="any",axis=1)', dropna_mixed_setup,
                                                 start_date=datetime(2012,1,1))

frame_dropna_axis1_all_mixed_dtypes  = Benchmark('df.dropna(how="all",axis=1)', dropna_mixed_setup,
                                                 start_date=datetime(2012,1,1))

## dropna multi
dropna_setup = common_setup + """
data = np.random.randn(10000, 1000)
df = DataFrame(data)
df.ix[50:1000,20:50] = np.nan
df.ix[2000:3000] = np.nan
df.ix[:,60:70] = np.nan
df.index = MultiIndex.from_tuples(df.index.map(lambda x: (x, x)))
df.columns = MultiIndex.from_tuples(df.columns.map(lambda x: (x, x)))
"""
frame_count_level_axis0_multi = Benchmark('df.count(axis=0, level=1)', dropna_setup,
                                          start_date=datetime(2012,1,1))

frame_count_level_axis1_multi = Benchmark('df.count(axis=1, level=1)', dropna_setup,
                                          start_date=datetime(2012,1,1))

# dropna on mixed dtypes
dropna_mixed_setup = common_setup + """
data = np.random.randn(10000, 1000)
df = DataFrame(data)
df.ix[50:1000,20:50] = np.nan
df.ix[2000:3000] = np.nan
df.ix[:,60:70] = np.nan
df['foo'] = 'bar'
df.index = MultiIndex.from_tuples(df.index.map(lambda x: (x, x)))
df.columns = MultiIndex.from_tuples(df.columns.map(lambda x: (x, x)))
"""
frame_count_level_axis0_mixed_dtypes_multi  = Benchmark('df.count(axis=0, level=1)', dropna_mixed_setup,
                                                        start_date=datetime(2012,1,1))

frame_count_level_axis1_mixed_dtypes_multi  = Benchmark('df.count(axis=1, level=1)', dropna_mixed_setup,
                                                        start_date=datetime(2012,1,1))

#----------------------------------------------------------------------
# apply

setup = common_setup + """
s = Series(np.arange(1028.))
df = DataFrame({ i:s for i in range(1028) })
"""
frame_apply_user_func = Benchmark('df.apply(lambda x: np.corrcoef(x,s)[0,1])', setup,
                           name = 'frame_apply_user_func',
                           start_date=datetime(2012,1,1))

setup = common_setup + """
df = DataFrame(np.random.randn(1000,100))
"""
frame_apply_lambda_mean = Benchmark('df.apply(lambda x: x.sum())', setup,
                                    name = 'frame_apply_lambda_mean',
                                    start_date=datetime(2012,1,1))
setup = common_setup + """
df = DataFrame(np.random.randn(1000,100))
"""
frame_apply_np_mean = Benchmark('df.apply(np.mean)', setup,
                               name = 'frame_apply_np_mean',
                               start_date=datetime(2012,1,1))

setup = common_setup + """
df = DataFrame(np.random.randn(1000,100))
"""
frame_apply_pass_thru = Benchmark('df.apply(lambda x: x)', setup,
                                  name = 'frame_apply_pass_thru',
                                  start_date=datetime(2012,1,1))

setup = common_setup + """
df = DataFrame(np.random.randn(1000,100))
"""
frame_apply_axis_1 = Benchmark('df.apply(lambda x: x+1,axis=1)', setup,
                               name = 'frame_apply_axis_1',
                               start_date=datetime(2012,1,1))

setup = common_setup + """
df = DataFrame(np.random.randn(1000,3),columns=list('ABC'))
"""
frame_apply_ref_by_name = Benchmark('df.apply(lambda x: x["A"] + x["B"],axis=1)', setup,
                                     name = 'frame_apply_ref_by_name',
                                     start_date=datetime(2012,1,1))

#----------------------------------------------------------------------
# dtypes

setup = common_setup + """
df = DataFrame(np.random.randn(1000,1000))
"""
frame_dtypes = Benchmark('df.dtypes', setup,
                         start_date=datetime(2012,1,1))

#----------------------------------------------------------------------
# equals
setup = common_setup + """
def make_pair(name):
    df = globals()[name]
    df2 = df.copy()
    df2.ix[-1,-1] = np.nan
    return df, df2

def test_equal(name):
    df, df2 = pairs[name]
    return df.equals(df)

def test_unequal(name):
    df, df2 = pairs[name]
    return df.equals(df2)

float_df = DataFrame(np.random.randn(1000, 1000))
object_df = DataFrame([['foo']*1000]*1000)
nonunique_cols = object_df.copy()
nonunique_cols.columns = ['A']*len(nonunique_cols.columns)

pairs = dict([(name,make_pair(name))
         for name in ('float_df', 'object_df', 'nonunique_cols')])
"""
frame_float_equal = Benchmark('test_equal("float_df")', setup)
frame_object_equal = Benchmark('test_equal("object_df")', setup)
frame_nonunique_equal = Benchmark('test_equal("nonunique_cols")', setup)

frame_float_unequal = Benchmark('test_unequal("float_df")', setup)
frame_object_unequal = Benchmark('test_unequal("object_df")', setup)
frame_nonunique_unequal = Benchmark('test_unequal("nonunique_cols")', setup)

#-----------------------------------------------------------------------------
# interpolate
# this is the worst case, where every column has NaNs.
setup = common_setup + """
df = DataFrame(randn(10000, 100))
df.values[::2] = np.nan
"""

frame_interpolate = Benchmark('df.interpolate()', setup,
                               start_date=datetime(2014, 2, 7))

setup = common_setup + """
df = DataFrame({'A': np.arange(0, 10000),
                'B': np.random.randint(0, 100, 10000),
                'C': randn(10000),
                'D': randn(10000)})
df.loc[1::5, 'A'] = np.nan
df.loc[1::5, 'C'] = np.nan
"""

frame_interpolate_some_good = Benchmark('df.interpolate()', setup,
                                        start_date=datetime(2014, 2, 7))
frame_interpolate_some_good_infer = Benchmark('df.interpolate(downcast="infer")',
                                              setup,
                                              start_date=datetime(2014, 2, 7))


#-------------------------------------------------------------------------
# frame shift speedup issue-5609

setup = common_setup + """
df = DataFrame(np.random.rand(10000,500))
# note: df._data.blocks are f_contigous
"""
frame_shift_axis0 = Benchmark('df.shift(1,axis=0)', setup,
                    start_date=datetime(2014,1,1))
frame_shift_axis1 = Benchmark('df.shift(1,axis=1)', setup,
                    name = 'frame_shift_axis_1',
                    start_date=datetime(2014,1,1))


#-----------------------------------------------------------------------------
# from_records issue-6700

setup = common_setup + """
def get_data(n=100000):
    return ((x, x*20, x*100) for x in xrange(n))
"""

frame_from_records_generator = Benchmark('df = DataFrame.from_records(get_data())',
                                setup,
                                name='frame_from_records_generator',
                                start_date=datetime(2013,10,04))  # issue-4911

frame_from_records_generator_nrows = Benchmark('df = DataFrame.from_records(get_data(), nrows=1000)',
                                setup,
                                name='frame_from_records_generator_nrows',
                                start_date=datetime(2013,10,04))  # issue-4911