Multi Index
Multi Index
multi = pd.MultiIndex.from_tuples(index_val)
multi.levels
# Slicing
sr['cse']
sr[('cse',2021)]
# unstack
unstack = sr.unstack()
stack = unstack.stack()
# Mulitidex Dataframe
list_data = [[1,2],[3,4],[5,6],[7,8],[9,10],[11,12],[13,14],[15,16]]
branchdf.loc['cse']
branchdf.loc['cse',2019]
branch_df3 = pd.DataFrame(
[
[1,2,0,0],[3,4,0,0],[5,6,0,0],[7,8,0,0],[9,10,0,0],[11,12,0,0],[13,14,0,0],[15,16,0,0],
],
index = pd.MultiIndex.from_product([['cse','ece'],[2019,2020,2021,2022]]),
columns = pd.MultiIndex.from_product([['delhi','mumbai'],['avg_package','students']]))
# Slicing
branch_df3['delhi'].loc['cse']
branch_df3['delhi','avg_package'].loc['cse',2020]
# .loc[("CA", "Dustinmouth"), ("Services", "Schools")]
branch_df3['delhi']['avg_package']
branch_df3['delhi','avg_package']
branch_df3.iloc[[0,4],[0,2,1]]
branch_df3[[('delhi','avg_package'),('mumbai','avg_package')]]
unstack1 = branch_df3.unstack()
unstaked_3 = stacked_3.unstack().unstack().unstack().unstack()
## Basic features
branch_df3.shape
stacked_3.shape
unstaked_3.shape
branch_df3.info()
branch_df3.describe()
# GETTING LEVEL
branch_df3.index.get_level_values(0)
branch_df3.index.get_level_values(1)
unstaked_3.index.get_level_values(3)
unstaked_3.index.get_level_values(0)
branch_df3.columns.names = ['Catagory','sub-Catagory']
branch_df3.index.names = ['Catagory','year']
# SORTING
branch_df3.sort_index(ascending=False)
#
branch_df3.sort_index(ascending=[False,True])
branch_df3.sort_index(level = ['Catagory','year'] , ascending=[True,False])
branch_df3.sort_index(level = 1 , ascending = False)
branch_df3.transpose()
branch_df3.swaplevel()
branch_df3.swaplevel(axis = 1)
pd.DataFrame({'branch':['cse','ece','mech'],
'2020':[100,150,60],'2021':[120,130,80],'2022':[150,140,70] } ).melt(id_vars = ['branch'])
# Melt - it will take only col name and convert that col to index
#eg date in covid data was in col and we changed it to row
# this reduced the num of col from 311253 to 6
path = r'C:\Users\rkcas\Desktop\datasets\datasets-session-21\time_series_covid19_confirmed_global.csv'
path_death = r'C:\Users\rkcas\Desktop\datasets\datasets-session-21\time_series_covid19_deaths_global.csv'
confirmed = pd.read_csv(path)
dealth = pd.read_csv(path_death)
a['date'].between()
('ece',2021),('ece',2022)]
confirmed_global.csv'
vid19_deaths_global.csv'
],how = 'left')