Python Pandas Handson
Python Pandas Handson
me/fresco_milestone ( @fresco_milestone )
import pandas as pd
import numpy as np
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A
print(df_A.shape)
my_mean = 170.0
my_std = 25.0
np.random.seed(100)
heights_B= pd.Series(np.random.normal(loc=my_mean, scale=my_std, size=5))
heights_B.index = ['s1', 's2', 's3', 's4','s5']
my_mean1 = 75.0
my_std1 = 12.0
weights_B =pd.Series(np.random.normal(loc=my_mean, scale=my_std, size=5))
weights_B.index = ['s1', 's2', 's3', 's4','s5']
print(heights_B.mean())
df_B = pd.DataFrame()
df_B['Student_height'] = heights_B
df_B['Student_weight'] = weights_B
print(df_B.columns.values.tolist() )
2. Accessing Pandas Data Structures
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
Join our channel if you haven’t joined yet https://t.me/fresco_milestone ( @fresco_milestone )
df_A['Student_weight'] = weights_A
height = df_A['Student_height']
print(type(height))
df_s1s2 = df_A[df_A.index.isin(['s1','s2'])]
print(df_s1s2)
df_s2s5s1 = df_A[df_A.index.isin(['s1','s2','s5'])]
df_s2s5s1 = df_s2s5s1.reindex(['s2', 's5', 's1'])
print(df_s2s5s1)
df_s1s4 = df_A[df_A.index.isin(['s1','s4'])]
print(df_s1s4)
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A
df_A.to_csv('classA.csv')
df_A2 = pd.read_csv('classA.csv')
print(df_A2)
my_mean = 170.0
my_std = 25.0
np.random.seed(100)
heights_B = pd.Series(np.random.normal(loc=my_mean, scale=my_std, size=5))
heights_B.index = ['s1', 's2', 's3', 's4','s5']
Join our channel if you haven’t joined yet https://t.me/fresco_milestone ( @fresco_milestone )
my_mean1 = 75.0
my_std1 = 12.0
np.random.seed(100)
weights_B = pd.Series(np.random.normal(loc=my_mean1, scale=my_std1, size=5))
weights_B.index = ['s1', 's2', 's3', 's4','s5']
df_B = pd.DataFrame()
df_B['Student_height'] = heights_B
df_B['Student_weight'] = weights_B
df_B.to_csv('classB.csv', index=False)
df_B2 = pd.read_csv('classB.csv')
print(df_B2)
df_B3 = pd.read_csv('classB.csv',header=None)
print(df_B3)
df_B4 = pd.read_csv('classB.csv',header=None,skiprows=2)
print(df_B4)
4. Indexing Dataframes
print(dates_to_be_searched)
print(dates_to_be_searched.isin(DatetimeIndex))
5. Data Cleaning
Join our channel if you haven’t joined yet https://t.me/fresco_milestone ( @fresco_milestone )
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A
df_A.loc['s3'] = np.nan
df_A.loc['s5'][1] = np.nan
6. Data Aggregation
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A
df_A_filter2 = df_A[df_A.index.isin(['s5'])]
print(df_A_filter2)
Join our channel if you haven’t joined yet https://t.me/fresco_milestone ( @fresco_milestone )
7. Data Merge 1
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A
df_AA = df_A.append(s)
print(df_AA)
my_mean = 170.0
my_std = 25.0
np.random.seed(100)
heights_B = pd.Series(np.random.normal(loc=my_mean, scale=my_std, size=5))
heights_B.index = ['s1', 's2', 's3', 's4','s5']
my_mean1 = 75.0
my_std1 = 12.0
np.random.seed(100)
weights_B = pd.Series(np.random.normal(loc=my_mean1, scale=my_std1, size=5))
weights_B.index = ['s1', 's2', 's3', 's4','s5']
df_B = pd.DataFrame()
df_B['Student_height'] = heights_B
df_B['Student_weight'] = weights_B
Join our channel if you haven’t joined yet https://t.me/fresco_milestone ( @fresco_milestone )
df = pd.concat([df_AA,df_B])
print(df)
8. Data Merge – 2
mdf = pd.merge(master,transaction,on='nameid')
print(mdf)