Python Pandas-DataFrames Complete - Jupyter Notebook
Python Pandas-DataFrames Complete - Jupyter Notebook
Out[1]: 0
G1 Cricket
G2 Volleyball
G3 Judo
G4 Hockey
0 Riya 19 12
1 Rishab 23 11
2 Isha 20 12
3 Rahul 18 12
In [7]: 1 #Create a dataframe from a dictionary with custom index
2 import pandas as pd
3 dict1={"Name":["Riya","Rishab","Isha","Rahul"],"Age":[19,23,20,18]}
4 df=pd.DataFrame(dict1,index=["P1","P2","P3","P4"])
5 df
P1 Riya 19
P2 Rishab 23
P3 Isha 20
P4 Rahul 18
P1 1 Prerna Singh 24 24 20 22
P2 2 Manish Arora 18 17 19 22
P3 3 Tanish Goel 20 22 18 24
P4 4 Falguni Jain 22 20 24 20
P5 5 Kanika Bhatnagar 15 20 18 22
P6 6 Ramandeep Kaur 20 15 22 24
In [32]: 1 df.index
In [31]: 1 df.info
Out[30]: Rollno 6
Name 6
UT1 6
UT2 6
UT3 6
UT4 6
dtype: int64
In [27]: 1 df.ndim
Out[27]: 2
In [28]: 1 df.shape
Out[28]: (6, 6)
In [29]: 1 df.columns
P1 Riya 19
P2 Rishab 23
P3 Isha 20
P4 Rahul 18
Out[14]: P1 19
P2 23
P3 20
P4 18
Name: Age, dtype: int64
In [15]: 1 df['Name']
Out[15]: P1 Riya
P2 Rishab
P3 Isha
P4 Rahul
Name: Name, dtype: object
In [16]: 1 df.Age
Out[16]: P1 19
P2 23
P3 20
P4 18
Name: Age, dtype: int64
In [17]: 1 df['Age']
Out[17]: P1 19
P2 23
P3 20
P4 18
Name: Age, dtype: int64
In [18]: 1 df.index
In [19]: 1 df.info
In [20]: 1 df.shape
Out[20]: (4, 2)
In [21]: 1 df.columns
In [22]: 1 df.ndim
Out[22]: 2
In [23]: 1 df
P1 Riya 19
P2 Rishab 23
P3 Isha 20
P4 Rahul 18
In [24]: 1 print(df)
Name Age
P1 Riya 19
P2 Rishab 23
P3 Isha 20
P4 Rahul 18
In [25]: 1 df.count()
Out[25]: Name 4
Age 4
dtype: int64
0 Anita 14
1 Sajal 32
2 Ayaan 3
3 Abhey 6
In [9]: 1 df.tail(-1)
1 Sajal 32
2 Ayaan 3
3 Abhey 6
4 Rahul 10
5 Isha 13
0 aparna BCA 90
1 pankaj BCA 40
2 sudhir M.Tech 80
3 Geeku BCA 98
In [19]: 1 df[df['Score']<=40]
1 pankaj BCA 40
In [15]: 1 print(df['Score'])
2 print(df['Degree'])
3 print(df['Name'])
0 90
1 40
2 80
3 98
Name: Score, dtype: int64
0 BCA
1 BCA
2 M.Tech
3 BCA
Name: Degree, dtype: object
0 aparna
1 pankaj
2 sudhir
3 Geeku
Name: Name, dtype: object
In [4]: 1 df[df['CGPA']>9]
Out[5]: 0 23000
1 34000
2 12000
3 45000
4 34000
Name: SALARY, dtype: int64
In [6]: 1 df['SALARY']
Out[6]: 0 23000
1 34000
2 12000
3 45000
4 34000
Name: SALARY, dtype: int64
In [7]: 1 df['SALARY']>16000
Out[7]: 0 True
1 True
2 False
3 True
4 True
Name: SALARY, dtype: bool
In [10]: 1 df[df['SALARY']>16000]
TNAME
In [8]: 1 df=df.set_index('TNAME')
In [9]: 1 df
TNAME
In [25]: 1 df[df['rollno']==4]
3 4 Falguni Jain 22 20 24 20
In [28]: 1 print(df.count())
rollno 6
name 6
UT1 6
UT2 5
UT3 6
UT4 6
dtype: int64
In [29]: 1 print(df.columns)
loc is label based and iloc is index based integers to retreive rows from dataframe
In [10]: 1 df.iloc[1:4]
In [10]: 1 df.loc[2:4]
Adding a column
In [17]: 1 df ['Grade']=['A','B','A','A','B']
2 df
In [2]: 1 df=df.set_index('TNAME')
In [3]: 1 df
TNAME
In [4]: 1 df['Allowance']=[4000,6000,8000,10000,'']
2 df
TNAME
TNAME
In [7]: 1 df
TNAME
In [8]: 1 df.loc['AMIT']
In [9]: 1 df.loc[['AMIT','BINNY']]
TNAME
TNAME
In [11]: 1 df.loc[['AMIT','BINNY'],'SALARY']
Out[11]: TNAME
AMIT 23000
BINNY 12000
Name: SALARY, dtype: int64
In [12]: 1 df.loc['AMIT':'BINNY','SALARY']
Out[12]: TNAME
AMIT 23000
RAJESH 34000
BINNY 12000
Name: SALARY, dtype: int64
In [14]: 1 df
TNAME
AMIT T01 123 PASCHIM VIHAR 23000 4000 Manager 500 3000
RAJESH TO2 6/11 RAMESH NAGAR 34000 6000 Clerk 100 4000
BINNY T03 5 WEST PUNJABHI BAG H 12000 8000 Manager 300 5000
Out[15]: TNAME
AMIT 3000
RAJESH 4000
BINNY 5000
CHARU 3000
MEENAKSHI 6000
Name: HRA, dtype: int64
In [16]: 1 df.HRA
Out[16]: TNAME
AMIT 3000
RAJESH 4000
BINNY 5000
CHARU 3000
MEENAKSHI 6000
Name: HRA, dtype: int64
In [17]: 1 df["HRA"]
Out[17]: TNAME
AMIT 3000
RAJESH 4000
BINNY 5000
CHARU 3000
MEENAKSHI 6000
Name: HRA, dtype: int64
In [18]: 1 df.loc[["AMIT","CHARU"],"HRA"]
Out[18]: TNAME
AMIT 3000
CHARU 3000
Name: HRA, dtype: int64
In [19]: 1 df
TNAME
AMIT T01 123 PASCHIM VIHAR 23000 4000 Manager 500 3000
RAJESH TO2 6/11 RAMESH NAGAR 34000 6000 Clerk 100 4000
BINNY T03 5 WEST PUNJABHI BAG H 12000 8000 Manager 300 5000
In [21]: 1 df
TNAME
123 PASCHIM
AMIT T01 23000 4000 Manager 500 3000 25500
VIHAR
6/11 RAMESH
RAJESH TO2 34000 6000 Clerk 100 4000 37900
NAGAR
5 WEST
BINNY T03 12000 8000 Manager 300 5000 16700
PUNJABHI BAG H
23 MALVIYA
CHARU T04 45000 10000 HR 200 3000 47800
NAGAR
TNAME
5 WEST
BINNY T03 12000 8000 Manager 300 5000 16700
PUNJABHI BAG H
123 PASCHIM
AMIT T01 23000 4000 Manager 500 3000 25500
VIHAR
6/11 RAMESH
RAJESH TO2 34000 6000 Clerk 100 4000 37900
NAGAR
23 MALVIYA
CHARU T04 45000 10000 HR 200 3000 47800
NAGAR
In [23]: 1 dfsort=df.sort_values('Total_Salary',ascending=False)
2 dfsort
TNAME
23 MALVIYA
CHARU T04 45000 10000 HR 200 3000 47800
NAGAR
6/11 RAMESH
RAJESH TO2 34000 6000 Clerk 100 4000 37900
NAGAR
123 PASCHIM
AMIT T01 23000 4000 Manager 500 3000 25500
VIHAR
5 WEST
BINNY T03 12000 8000 Manager 300 5000 16700
PUNJABHI BAG H
In [24]: 1 df['SALARY']=df['SALARY']+df['SALARY']*10/100
2 df
TNAME
123 PASCHIM
AMIT T01 25300.0 4000 Manager 500 3000 25500
VIHAR
6/11 RAMESH
RAJESH TO2 37400.0 6000 Clerk 100 4000 37900
NAGAR
5 WEST
BINNY T03 13200.0 8000 Manager 300 5000 16700
PUNJABHI BAG H
23 MALVIYA
CHARU T04 49500.0 10000 HR 200 3000 47800
NAGAR
In [26]: 1 df
TNAME
AMIT T01 123 PASCHIM VIHAR 25300.0 4000 Manager 500 3000
RAJESH TO2 6/11 RAMESH NAGAR 37400.0 6000 Clerk 100 4000
BINNY T03 5 WEST PUNJABHI BAG H 13200.0 8000 Manager 300 5000
Out[27]: TNAME
AMIT Manager
RAJESH Clerk
BINNY Manager
CHARU HR
MEENAKSHI Manager
Name: Desig, dtype: object
In [28]: 1 df
TNAME
In [29]: 1 df.drop(labels='Allowance',axis=1)
TNAME
In [30]: 1 df.drop(labels='Tax',axis=1,inplace=True)
In [31]: 1 df
TNAME
In [32]: 1 df
TNAME
In [33]: 1 df.drop(labels=['Allowance','HRA'],axis=1,inplace=True)
In [34]: 1 df
TNAME
In [2]: 1 df=df.set_index('TNAME')
2 df
TNAME
In [3]: 1 #INSERT NEW ROW WITH VALUES["ISHA","T06","23 MODEL TOWN",35000] using loc
2 df.loc["ISHA"]=["T06","23 MODEL TOWN",35000]
3 df
TNAME
TNAME
TNAME
DELETING ROW
In [6]: 1 df.drop("RAJESH",axis=0)
TNAME
TNAME
In [8]: 1 df.drop("RAJESH",axis=0,inplace=True)
In [9]: 1 df
TNAME
In [10]: 1 df
TNAME
In [12]: 1 #df.drop(labels=["ISHA","CHARU"],axis=0,inplace=True)
2 df.drop(["ISHA","CHARU"],0,inplace=True)
In [13]: 1 df
TNAME
In [14]: 1 df.drop(df.index[1])
TNAME
In [15]: 1 df
TNAME
In [16]: 1 df.drop(df.index[1],inplace=True)
In [17]: 1 df
TNAME
In [18]: 1 df.drop(df.index[[0,1]],inplace=True)
In [19]: 1 df
TNAME
BOOLEAN INDEXING
In [1]: 1 import pandas as pd
2 dict1={'Names':['Sush','Adarsh','Ravi','Manu','Sushma'],
3 'Clas':[11,12,11,12,12],
4 'Sec':['A','A','C','A','B'],
5 'Phy':[34,40,56,67,50],
6 'Chem':[78,90,50,65,90],
7 'Eng':[50,55,67,68,69],
8 'Proj_rem':['Avg','Good','Good','Fair','Avg']
9 }
10 student=pd.DataFrame(dict1,index=[100,101,102,103,104])
11 student
12
In [8]: 1 student.loc[student.Clas==11]
In [10]: 1 student["Proj_rem"]
In [11]: 1 student.Proj_rem
In [13]: 1 student
101 40 90 55
103 67 65 68
104 50 90 69
In [2]: 1 student.loc[student.Clas==12,["Phy","Chem","Eng"]]
101 40 90 55
103 67 65 68
104 50 90 69
WAC to view the Project remark for those who have got more than 80 in chemistry.
In [3]: 1 student.loc[student.Chem>80,"Proj_rem"]
Display the details of students who have got Good in their Project remarks.
In [4]: 1 student.loc[student.Proj_rem=="Good"]
In [5]: 1 student[student.Proj_rem=="Good"]
In [8]: 1 student
In [9]: 1 student.loc[student.Names=="Sushma","Names"]="Sushmita"
In [10]: 1 student
In [13]: 1 student.loc[student.Names=="Sush"]="Sushmita"
In [15]: 1 student
WAC to change the Project remark to “Excellent” for those who have got more than 80 in
chemistry.
In [3]: 1 student.loc[student.Chem>80,"Proj_rem"]="Excellent"
In [5]: 1 student
In [18]: 1 student.drop(100,0,inplace=True)
In [4]: 1 student
In [20]: 1 student.loc[student.Chem>80,"Proj_rem"]="Excellent"
In [21]: 1 student
Out[1]: True
In [4]: 1 import pandas as pd
2 import numpy as np
3 a1=np.array([2,3,4,5,6])
4 s1=pd.Series(a1,index=list("ABCDE"))
5 print(s1.ndim)
P1 Riya 19
P2 Rishab 23
P3 Isha 20
P4 Rahul 18
In [4]: 1 df.shape
Out[4]: (4, 2)
In [5]: 1 df.count()
Out[5]: Name 4
Age 4
dtype: int64
In [1]: 1 import pandas as pd
2 dic={'Rollno':[1,2,3,4,5,6],
3 'Name':["Prerna Singh","Manish Arora","Tanish Goel", "Falguni Jain","
4 'UT1':[24,18,20,22,15,20],
5 'UT2':[24,17,22,20,20,15],
6 'UT3':[20,19,18,24,18,22],
7 'UT4':[22,22,24,20,22,24]
8 }
9 df=pd.DataFrame(dic,index=["P1","P2","P3","P4","P5","P6"])
10 print(df.index)
11 print(df.info)
12 print(df.columns)
13 print(df)
In [4]: 1 df1=df[["Product","Price","Quantity"]]
In [9]: 1 df2=df.loc[df.Price>100,["Product","Price","Quantity"]]
2 df2
6 Tea 120 1
10 Bucket 200 1
15 Perfume 150 1
In [5]: 1 df1
0 Chips 10 15
1 Milk 60 5
2 Maggi 20 5
3 Juice 100 4
4 Bread 20 2
5 Biscuit 20 2
6 Tea 120 1
7 Bourn-Vita 70 1
8 Bottle 80 2
9 Tiffin Box 75 2
10 Bucket 200 1
11 Detergent 80 1
12 Tissues 30 5
13 Soap 40 4
14 Brush 30 2
15 Perfume 150 1
16 Hair-Oil 100 1
17 Pen 5 10
18 Pencil 2 10
In [ ]: 1