forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreshape.py
65 lines (53 loc) · 2.28 KB
/
reshape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from vbench.api import Benchmark
from datetime import datetime
common_setup = """from pandas_vb_common import *
index = MultiIndex.from_arrays([np.arange(100).repeat(100),
np.roll(np.tile(np.arange(100), 100), 25)])
df = DataFrame(np.random.randn(10000, 4), index=index)
"""
reshape_unstack_simple = Benchmark('df.unstack(1)', common_setup,
start_date=datetime(2011, 10, 1))
setup = common_setup + """
udf = df.unstack(1)
"""
reshape_stack_simple = Benchmark('udf.stack()', setup,
start_date=datetime(2011, 10, 1))
setup = common_setup + """
def unpivot(frame):
N, K = frame.shape
data = {'value' : frame.values.ravel('F'),
'variable' : np.asarray(frame.columns).repeat(N),
'date' : np.tile(np.asarray(frame.index), K)}
return DataFrame(data, columns=['date', 'variable', 'value'])
index = date_range('1/1/2000', periods=10000, freq='h')
df = DataFrame(randn(10000, 50), index=index, columns=range(50))
pdf = unpivot(df)
f = lambda: pdf.pivot('date', 'variable', 'value')
"""
reshape_pivot_time_series = Benchmark('f()', setup,
start_date=datetime(2012, 5, 1))
# Sparse key space, re: #2278
setup = common_setup + """
NUM_ROWS = 1000
for iter in range(10):
df = DataFrame({'A' : np.random.randint(50, size=NUM_ROWS),
'B' : np.random.randint(50, size=NUM_ROWS),
'C' : np.random.randint(-10,10, size=NUM_ROWS),
'D' : np.random.randint(-10,10, size=NUM_ROWS),
'E' : np.random.randint(10, size=NUM_ROWS),
'F' : np.random.randn(NUM_ROWS)})
idf = df.set_index(['A', 'B', 'C', 'D', 'E'])
if len(idf.index.unique()) == NUM_ROWS:
break
"""
unstack_sparse_keyspace = Benchmark('idf.unstack()', setup,
start_date=datetime(2011, 10, 1))
# Melt
setup = common_setup + """
from pandas.core.reshape import melt
df = DataFrame(np.random.randn(10000, 3), columns=['A', 'B', 'C'])
df['id1'] = np.random.randint(0, 10, 10000)
df['id2'] = np.random.randint(100, 1000, 10000)
"""
melt_dataframe = Benchmark("melt(df, id_vars=['id1', 'id2'])", setup,
start_date=datetime(2012, 8, 1))