Practical File Python
Practical File Python
import pandas as p
lst = []
n = int(input("Enter number of elements : "))
for i in range(0, n):
t = int(input("Enter "+str(i+1)+" Number "))
lst.append(t) # adding the element
my_series = pd.Series(lst)
print("Sum of all the elements =",my_series.sum())
print("Largest Value =", my_series.max())
print("Smallest Value =",my_series.min())
print("Mean Value =",my_series.mean())
print("Median =",my_series.median())
print("Standard Deviation =",my_series.std())
print("Describe DataSeriese =",my_series.describe())
Output:
Enter number of elements : 5
Enter 1 Number 1
Enter 2 Number 2
Enter 3 Number 3
Enter 4 Number 4
Enter 5 Number 5
Sum of all the elements = 15
Largest Value = 5
Smallest Value = 1
Mean Value = 3.0
Median = 3.0
Standard Deviation = 1.5811388300841898
Describe DataSeriese = count 5.000000
mean 3.000000
std 1.581139
min 1.000000
25% 2.000000
50% 3.000000
75% 4.000000
max 5.000000
dtype: float64
Source Code:
import pandas as pd
import numpy as np
lst = []
n = int(input("Enter number of elements : "))
for i in range(0, n):
t = int(input("Enter "+str(i+1)+" Number "))
lst.append(t) # adding the element
df = pd.DataFrame(lst,columns=['Values'])
display(df)
Output:
Enter number of elements : 5
Enter 1 Number 10
Enter 2 Number 11
Enter 3 Number 12
Enter 4 Number 13
Enter 5 Number 14
Values
0 10
1 11
2 12
3 13
4 14
Source Code:
import pandas as pd
import numpy as np
data = [['tom', 10], ['nick', 15], ['juli', 14],['Suzan',28],['Sam',30],['tom',15]]
df = pd.DataFrame(data, columns = ['Name', 'Age'])
print("First three Elements :\n",df.head(3)) #default 5 items displayed head() and tail()
print("\nExtract Name Column\n",df.Name) #can also use df['Name'] or df.iloc[:,0]
print("\nExtract Age Column\n" ,df.Age) # df['Age'] or df.iloc[:,1] for both df.iloc[:,0:2]
Output:
First three Elements :
Name Age
0 tom 10
1 nick 15
2 juli 14
Source Code:
import pandas as pd
import numpy as np
data = { 'Name':['Tom','Alex','Suzain','Rayan','Steve'],
'Age':[28,34,29,28,25],
'English':[87,67,54,89,73],
'Hindi':[54,65,34,65,76],
'Maths':[65,54,67,54,75],
'IP': [90,84,94,75,43]}
df = pd.DataFrame(data,index=[1,2,3,4,5])
display(df)
i=int(input("Enter rollno to see the marks : "))
print(df.iloc[i-1:i])
Output:
1 Tom 28 87 54 65 90
2 Alex 34 67 65 54 84
3 Suzain 29 54 34 67 94
4 Rayan 28 89 65 54 75
5 Steve 25 73 76 75 43
Source Code
Output:
English Hindi
Suzan 90 95
Sam 50 40
Juli 55 70
IP Maths PEd
Na
Sam 92.0 NaN
N
Na
Juli NaN 70.0
N
Source Code
import pandas as pd
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d) # df DataFrame object
print(df)
print ("Adding a new column by passing as Series:")
df['three']=pd.Series([10,20,30],index=['a','b','c'])
print( df )
Output:
one two
a 1.0 1
b 2.0 2
c 3.0 3
d NaN 4
Adding a new column by passing as Series:
one two three
a 1.0 1 10.0
b 2.0 2 20.0
c 3.0 3 30.0
d NaN 4 NaN
Deleting the first column using DEL function:
two three
a 1 10.0
b 2 20.0
c 3 30.0
d 4 NaN
Deleting another column using POP function:
three
a 10.0
b 20.0
c 30.0
d NaN
Source Code
import pandas as pd
import numpy as np
data1 = [[1,'tom', 10],[2,'nick', 15], [3,'juli', 14]]
df1 = pd.DataFrame(data1, columns = ['RollNo','Name', 'Age'])
data2 = [[1,98, 100],[2,98, 15], [3,75, 50]]
df2 = pd.DataFrame(data2, columns = ['RollNo','Eng', 'Hin'])
avg=merged[t].sum()/merged[t].count()
print("\nPercentage of ",t," column \n",avg)
Output:
0 1 tom 10 98 100
nic
1 2 15 98 15
k
2 3 juli 14 75 50
Enter the Subject Code For Example 'Eng' for English :Eng
Eng column data
0 98
1 98
2 75
Name: Eng, dtype: int64
output:
Columns in DataFrame :
Index(['EMPLOYEE_ID', 'FIRST_NAME', 'LAST_NAME', 'EMAIL', 'PHONE_NUMBER',
'HIRE_DATE', 'JOB_ID', 'SALARY', 'COMMISSION_PCT', 'MANAGER_ID',
'DEPARTMENT_ID'],
dtype='object')
Range Index :
RangeIndex(start=0, stop=107, step=1)
DataType of columns :
EMPLOYEE_ID int64
FIRST_NAME object
LAST_NAME object
...
MANAGER_ID float64
DEPARTMENT_ID float64
dtype: object
Source code:
import pandas as pd
import numpy as np
data=pd.read_csv("c:\emp.csv")
print(data.axes)
print ("Extracting Columns by Column Names :\n",data[['EMPLOYEE_ID','FIRST_NAME','SALARY']])
print ("\nExtracting Columns by Column Numbers :\n",data[data.columns[1:6]])
print ("\nExtracting Rows (1-3) :\n",data.loc[1:3])
print ("\nExtracting 3 Rows and Columns by Column Names Using loc() :\n",
data.loc[1:3,['FIRST_NAME','SALARY','DEPARTMENT_ID']])
print ("\nExtracting 3 Rows and Columns numbers Using loc() :\n",data.loc[1:3,data.columns[1:4]])
print ("\nExtracting 3 Rows and Columns Range Using loc() :\n",data.loc[1:3, 'FIRST_NAME':'SALARY'])
print ("\nExtracting 3 Rows and Columns Range Using loc() :\n",data.loc[1:3,'JOB_ID':])
print ("\nExtracting 3 Rows and Columns Range Using iloc() :\n",data.iloc[1:3,0:2])
print ("\nExtracting 3 Rows and Columns Range Using iloc() :\n",data.iloc[1:3,1:5])
output:
Practical 10: Importing CSV file Modifying data and Saving to CSV file
Date:
Aim : To modifying data in CSV file and writing it back to disk
Source code:
import pandas as pd
import numpy as np
data=pd.read_csv("e:\emp.csv")
print ("\nExtracting 3 Rows and Columns Range Using loc() :\n",data.loc[1:3,
'FIRST_NAME':'SALARY'])
#modifying dataframe value
data.FIRST_NAME[1]='Amit' # gives a warning
data.LAST_NAME[1]='Singh'
data.EMAIL[1]="s.amit18"
data.SALARY[1]=20000
data.HIRE_DATE='27-12-1975' #updates all the columns
data.PHONE_NUMBER[1]='955.95.83030'
print ("\nExtracting 3 Rows and Columns Range Using loc() :\
n",data.loc[1:3,'FIRST_NAME':'SALARY'])
#adding row
data.at[2,:]=102,'Punita','Singh','P.amit18','201.92.0102','21-10-
89','AD_VP',30000,.5,100,20
print ("\nExtracting 3 Rows and Columns Range Using loc() :\
n",data.loc[1:3,'EMPLOYEE_ID':])
output:
Extracting 3 Rows and Columns Range Using loc() :
FIRST_NAME LAST_NAME EMAIL PHONE_NUMBER HIRE_DATE JOB_ID
SALARY
1 Neena Kochhar NKOCHHAR 515.123.4568 21-SEP-89 AD_VP 17000
2 Lex De Haan LDEHAAN 515.123.4569 13-JAN-93 AD_VP 17000
3 Alexander Hunold AHUNOLD 590.423.4567 03-JAN-90 IT_PROG 9000
Practical 11: Iteration rows and columns using iterrows() and iteritems()
Date:
Aim : To do iteration on rows and columns using iterrows() and iteritems()
Source code:
import pandas as pd
import numpy as np
data1 = [[1,'tom', 10],[2,'nick', 15], [3,'juli', 14]]
df1 = pd.DataFrame(data1, columns = ['RollNo','Name', 'Age'])
sum=0
for label, contents in df1.iterrows():
print ("\nLabel ", label)
print ("contents:", contents, sep='\n')
output:
Label 0
contents:
RollNo 1
Name tom
Age 10
Name: 0, dtype: object
Label 1
contents:
RollNo 2
Name nick
Age 15
Name: 1, dtype: object
Label 2
contents:
RollNo 3
Name juli
Age 14
Name: 2, dtype: object
Label RollNo
contents:
0 1
1 2
2 3
Name: RollNo, dtype: int64
Label Name
contents:
0 tom
1 nick
2 juli
Name: Name, dtype: object
Label Age
contents:
0 10
1 15
2 14
Name: Age, dtype: int64
Source code:
import pandas as pd
import numpy as np
diSales={2016:{'Qtr1':34500,'Qtr2':56000,'Qtr3':47000,'Qtr4':49000},
2017:{'Qtr1':44900,'Qtr2':46100,'Qtr3':57000,'Qtr4':59000},
2018:{'Qtr1':54500,'Qtr2':51000,'Qtr3':57000,'Qtr4':58000},
2019:{'Qtr1':61000}}
sal_df=pd.DataFrame(diSales)
print ( "Data Frame :\n",sal_df )
print (" min() : \n",sal_df.min())
print (" max() : \n",sal_df.max())
# multiple rows
print(" Sum of Qtr1 to Qtr3 : \n",sal_df.loc['Qtr1':'Qtr3'].sum())
print(" Min of Qtr1 to Qtr3: \n",sal_df.loc['Qtr1':'Qtr3'].min())
print(" Max Qtr1 to Qtr3: \n",sal_df.loc['Qtr1':'Qtr3'].max())
output:
Data Frame :
2016 2017 2018 2019
Qtr1 34500 44900 54500 61000.0
Qtr2 56000 46100 51000 NaN
Qtr3 47000 57000 57000 NaN
Qtr4 49000 59000 58000 NaN
min() :
2016 34500.0
. . .
2019 61000.0
dtype: float64
max() :
2016 56000.0
. . .
2019 61000.0
dtype: float64
min() axis 1 :
Qtr1 34500.0
. . .
Qtr4 49000.0
dtype: float64
max() axis 1 :
Qtr1 61000.0
. . .
Qtr4 59000.0
dtype: float64
mode() :
2016 2017 2018 2019
0 34500 44900 51000 61000.0
. . .
3 56000 59000 58000 NaN
mean() :
2016 46625.0
. . .
2019 61000.0
dtype: float64
median() :
2016 48000.0
. . .
2019 61000.0
dtype: float64
Count() :
2016 4
. . .
2019 1
dtype: int64
Sum() axis=0:
2016 186500.0
. . .
2019 61000.0
dtype: float64
Quantile() axis=0:
2016 2017 2018 2019
0.25 43875.0 45800.0 53625.0 61000.0
. . .
1.00 56000.0 59000.0 58000.0 61000.0
Var() axis=0:
2016 8.022917e+07
. . .
2019 NaN
dtype: float64
Sum of Qtr1 :
194900.0
Min of Qtr1:
34500.0
Max Qtr1:
61000.0
import pandas as pd
import numpy as np
d={ 'Tutor':['Tahira','Gurjyot','Anusha','Jacob','Vankat'],
'Classes':[28,36,41,32,48],
'Country':['USA','UK','Japan','USA','Brazil']}
df=pd.DataFrame(d)
print(df)
df.pivot(index='Country', columns='Tutor',values='Classes')
test=df.pivot(index='Country', columns='Tutor',values='Classes')
print(test)
#pivot_table
import pandas as pd
import numpy as np
d={ 'Tutor':['Tahira','Gurjyot','Anusha','Jacob','Vankat',
'Tahira','Gurjyot','Anusha','Jacob','Vankat',
'Tahira','Gurjyot','Anusha','Jacob','Vankat',
'Tahira','Gurjyot','Anusha','Jacob','Vankat'],
'Classes':[28,36,41,32,40,36,40,36,40,46,24,30,44,40,32,36,32,36,24,38],
'Quarter':[1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4],
'Country':['USA','UK','Japan','USA','Brazil','USA','USA','Japan',
'Brazil','USA','Brazil','USA','UK','Brazil','USA','Japan',
'Japan','Brazil','UK','USA']}
df=pd.DataFrame(d)
print(df)
test=df.pivot_table(index='Tutor', columns='Country',values='Classes')
print(test)
#sorting
df.sort_values('Country')
df.sort_values('Tutor')
df.sort_values(['Country','Tutor'])
df.sort_values(['Tutor','Country'])
df.sort_values(['Tutor','Country'], ascending=False)
output:
Tutor Classes Country
0 Tahira 28 USA
1 Gurjyot 36 UK
2 Anusha 41 Japan
3 Jacob 32 USA
4 Vankat 48 Brazil
import pandas as pd
import numpy as np
d={ 'Age':[37,28,38,44,53,69,74,53,35,38,66,46,24,45,92,48,51,62,57]}
hage=pd.DataFrame(d)
hage.hist()
hage.hist(column='Age',grid=True,bins=20 )
Output:
array([[<matplotlib.axes._subplots.AxesSubplot object at
0x0000019A7508D888>]],
dtype=object)
Source code:
#userdefined Function
def addnum (): #function defination
a=int(input("Please enter a number"))
b=int(input("Please enter a number"))
return(a+b)
Output:
Source Code:
import pandas as pd
import numpy as np
import math
# User defined function
def adder(adder1,adder2):
return adder1+adder2
#Create a Dictionary of series
d = {'Score_Math':pd.Series([66,57,75,44,31,67]),
'Score_Science':pd.Series([89,87,67,55,47,72])}
df = pd.DataFrame(d)
print ("DataFrame\n",df)
print ("PIPE() \n",df.pipe(adder,2))
Output:
DataFrame
Score_Math Score_Science
0 66 89
1 57 87
. . . .
5 67 72
PIPE()
Score_Math Score_Science
0 68 91
1 59 89
2 77 69
3 46 57
4 33 49
5 69 74
On Rows apply(np.mean,axis=1)
0 77.5
1 72.0
2 71.0
3 49.5
4 39.0
5 69.5
dtype: float64
On Columns apply(np.mean,axis=0)
Score_Math 56.666667
Score_Science 69.500000
dtype: float64
LAMBDA
Score_Math Score_Science
0 8.124038 9.433981
1 7.549834 9.327379
2 8.660254 8.185353
3 6.633250 7.416198
4 5.567764 6.855655
5 8.185353 8.485281
Source Code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pl
RollNo=[1,2,3,4,5]
Maths=[20,22,26,28,30]
IP=[21,24,29,16,25]
Science=[26,23,20,26,22]
pl.title("Grade 12 Preboard Exams")
names={'Rayan','Unnati','Khushi','Aryan','Yakshesh'}
pl.xlabel('Names')
pl.ylabel('Marks')
pl.xticks(RollNo,names)
pl.plot(RollNo,Maths,'r',marker='o',label='Maths')
pl.plot(RollNo,IP,'k',marker='s',label='IP')
pl.plot(RollNo,Science,'b',marker='*',label='Science')
pl.legend()
pl.grid(color='y')
Output:
Source Code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
ItemCode=np.arange(1,6)
SalesJan=[50,60,25,80,60]
SalesFeb=[30,40,35,70,80]
SalesMar=[40,50,45,40,92]
plt.bar(ItemCode-0.2,SalesJan,width=0.2 ,color='red',label="Jan")
plt.bar(ItemCode,SalesFeb,width=0.2, color='blue',label="Feb")
plt.bar(ItemCode+0.2,SalesMar,width=0.2, color='green',label="Feb")
plt.title('Total Sales March')
plt.xlabel('Items')
plt.ylabel('Quantity')
plt.xticks(RollNo,["Mouse","Printer","Scanner","WebCam","PenTab"],rotation=90)
pl.legend()
pl.grid(True)
output:
Source Code:
import numpy as np
import matplotlib.pyplot as pl
marks=[22,25,18,19,11,21,28,30,24,24,23,15,20,27,21,21,13,30,18,25]
pl.hist(marks,edgecolor='r',bins=5,color='blue')
pl.ylabel ('Frequency' )
pl.xlabel ('Bins/Ranges')
pl.title('My Chart')
x=np.random.randn(1000)
y=np.random.randn(1000)
pl.hist([x,y], bins=10,edgecolor='k',histtype='barstacked')
x=np.random.randn(1000)
y=pl.hist(x,bins=10,edgecolor='b',color='yellow')
a=pd.Series(y[1])
b=pd.Series(y[0])
a.pop(10)
a=a+.25
pl.plot(a,b,'k')
output:
Source Code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pl
x=[2,3,1,4,4,6,8,10,10,3,4]
y=[5,3,1,7,6,8,9,10,8,3,4]
z=pl.boxplot([x,y],patch_artist=True,labels=['LG','OGeneral'])
pl.title('Air-conditioner' )
output:
Practical 21: Data Visualization Piechart
Date:
import numpy as np
import pandas as pd
x=[10,30,27,13,8,12]
fr=['Peach','Banana','Grapes','Oranges','Pineapple','Apple']
co=['pink','yellow','lightgreen','orange','brown','red']
plt.pie(x,labels=fr,colors=co,autopct='%1.0f%
%',shadow=True,explode=(0,.1,.1,0,0,0))
plt.show()
output: