-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
/
Copy pathdev_attempts.py
39 lines (31 loc) · 1.25 KB
/
dev_attempts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pandas as pd
import numpy as np
import timeit
np.random.seed(43)
# also tested for n = 1000, 10_000, 100_000
n=1_000_000
cols = list('abcdef')
df = pd.DataFrame(np.random.randint(0, 10, size=(n,len(cols))), columns=cols)
df['col'] = np.random.choice(cols, n)
idx = df['col'].index.to_numpy()
cols = df['col'].to_numpy()
def og_lookup(idx, cols):
return df.lookup(idx, cols,'og')
# def melt_lookup():
# melt = df.melt('col')
# melt = melt.loc[lambda x: x['col']==x['variable'], 'value']
# melt = melt.reset_index(drop=True)
# return melt
# def quan_lookup(idx,cols):
# return df.reindex(cols,axis=1).to_numpy()[np.arange(df.shape[0]), idx]
# def quan_lookup2(idx,cols):
# return df.reindex(cols,axis=1).to_numpy()[np.arange(df.shape[0]), idx]
# def marco_lookup():
# return df.melt('col', ignore_index=False).query('col==variable')['value'].reindex(df.index).to_numpy()
timeit.timeit(lambda: og_lookup(idx,cols),number=10)
# timeit.timeit(lambda: melt_lookup(idx,cols),number=10)
# timeit.timeit(lambda: quan_lookup(idx,cols),number=10)
# timeit.timeit(lambda: quan_lookup2(idx,cols),number=10)
# timeit.timeit(lambda: marco_lookup(idx,cols),number=10)
# idx, cols = pd.factorize(df['col'])
# df.reindex(cols, axis=1).to_numpy()[np.arange(len(df)), idx]