FDS All Practicals
FDS All Practicals
Assignment 1
SET A
1,2).
import numpy as np
import pandas as pd
dict1={"name":["Rita","Sita","Amey","Ajay","Yash","Archana","Prerna","Shruti","Paurnima",
"Rutika"],
"age":["21","22","20","23","24","19","18","23","21","17"],
"Percentage":["80","87","90","92","86","94","76","79","89","80"]}
df=pd.DataFrame(dict1)
print(df)
s=df.shape
print("Shape=",s,"\n")
c=len(df.columns)
print("No. of Columns=",c,"\n")
r=len(df)
print("No.of rows=",r,"\n")
d=df.dtypes
print("Datatypes is: ",d,"\n")
featurenm=pd.get_dummies(df)
print("feature names:",featurenm,"\n")
a=df.size
print("size=",a,"\n")
3,4,5,6).
import pandas as pd
import numpy as np
dict2={"rno":["5","6","7","8","9","10","11","12","13","14","15"],
"mark":["89","80","87","90","92","86","94","76","89","80","81"]}
df2=pd.DataFrame(dict2)
print(df2)
print(df2.dtypes)
print(df2.astype(float).describe())
df2.loc[5]=[6,None]
df2.loc[6]=[None,None]
df2.loc[7]=[None,85]
df2.loc[8]=[9,21]
df2.loc[9]=[9,21]
df2.loc[10]=[7,None]
df2.loc[11]=[None,45]
df2.loc[12]=[34,2]
df2.loc[13]=[None,None]
df2.loc[14]=[11,None]
df2.loc[15]=[8,90]
print(df2)
df2["remarks"]=None
print(df2)
print(df2.info())
print(df2.isnull())
print(df2.duplicated())
df2.drop(columns="remarks",axis=1,inplace=True)
print(df2)
df2.dropna(subset=["rno","mark"],inplace=True)
print(df2)
7,8).
import matplotlib.pyplot as plt
plt.plot(["Rita","Sita","Amey","Ajay","Yash"],[85,90,75,60,95])
plt.ylabel("percentage")
plt.xlabel("name")
plt.show()
plt.scatter(["Rita","Sita","Amey","Ajay","Yash"],[85,90,75,60,95])
plt.title("Scatter plot")
plt.ylabel("percentage")
plt.xlabel("Name")
plt.show()
Assignment 2
SET A
Q.1
import numpy as np
a=np.arange(4).reshape((2,2))
print("Flattened Array: ",a);
print("Maximum Value: ",np.amax(a))
print("Minimum Value: ",np.min(a))
Q.2
import numpy as np
point1= np.array((1,2,3))
point2=np.array((1,1,1,))
dist=np.linalg.norm(point1- point2)
print("Euclidian Distance: ",dist)
Q.3
import numpy as np
import pandas as pd
df=pd.DataFrame({'rating':[90,85,82,88,94,90,76,75,87,86],'points':[25,20,14,16,27,2
0,12,15,14,19],'assists':[5,7,7,8,5,7,6,9,9,5],'rebound':[11,8,10,6,6,9,6,10,10,7]})
def find_iqr(x):
return np.subtract(*np.percentile(x,[75,25]))
print(df[['rating','points']].apply(find_iqr))
print(df.apply(find_iqr))
Q.4
import numpy as np
import pandas as pd
def distancesum(x,y,n):
sum=0
for i in range(n):
for j in range(i+1,n):
sum+=(abs(x[i]-x[j])+abs(y[i]-y[j]))
return sum
x=[-1,1,3,2]
y=[5,6,5,3]
n=len(x)
print("Manhattan Distance: ",distancesum(x,y,n))
Q.5
import numpy as np
import matplotlib.pyplot as plt
nums=np.array([0.5,0.7,1.0,1.2,1.3,2.1])
bins=np.array([0,1,2,3])
print("nums:",nums)
print("bins:",bins)
print("Result:",np.histogram(nums,bins))
plt.hist(nums,bins=bins)
plt.show()
Q.6
import pandas as pd
import numpy as np
import statistics as s
df=pd.read_csv("student.csv")
print(df)
ageAvg=df['age'].mean()
print(ageAvg)
perAvg=df['gper'].mean()
print(perAvg)
print(df.describe())
Assignment 3
SET A
import pandas as pd
data=pd.read_csv("Data.csv")
print(data)
a , b , c).
2).
print("\na2")
print(data['Salary'].fillna(data['Salary'].mean()))
print("\nage")
print(data['Age'].fillna(data['Age'].mean()))
3). i)
print("\na3")
from sklearn.preprocessing import LabelEncoder
labelencoder=LabelEncoder()
data['Purchased']=labelencoder.fit_transform(data['Purchased'])
print(data)
ii).
print("\na4")
from sklearn.preprocessing import OneHotEncoder
enc=OneHotEncoder(handle_unknown='ignore')
enc_df=pd.DataFrame(enc.fit_transform(data[['Country']]).toarray())
print(enc_df)
Assignment 4
SETA)
(1)
1)HISTOGRAM
import numpy as np
import matplotlib.pyplot as plt
data=np.random.randn(1000)
plt.hist(data,facecolor='y',linewidth=2,edgecolor='k',bins=30,alpha=0.6)
plt.show()
2)linechart
import numpy as np
import matplotlib.pyplot as plt
x=np.linspace(0,50,100)
y=x * np.linspace(100,150,100)
plt.plot(x,y,c="r",marker="v",markersize=3,label="normal")
plt.grid(True)
plt.legend()
plt.show()
3)piechart
import numpy as np
import matplotlib.pyplot as plt
x=[10,20,8,35,30,25]
y=["raj","ramesh","Krishna","arun","virag","mahesh"]
plt.pie(x,labels=y)
plt.show()
piechart2(breakable format)
import numpy as np
import matplotlib.pyplot as plt
x=[10,20,8,35,30,25]
y=["raj","ramesh","krishna","Arun","virag","mahesh"]
explode=[0.2,0.1,0,0.1,0,0]
plt.pie(x,labels=y,explode=explode,autopct='%1.1f%%')
plt.show()
4)scatter(dot)
import numpy as np
import matplotlib.pyplot as plt
x=np.random.randn(1000)
y=np.random.randn(1000)
plt.scatter(x,y)
plt.show()
scatter(triangle)
import numpy as np
import matplotlib.pyplot as plt
x=np.random.randn(1000)
y=np.random.randn(1000)
size=150*np.random.randn(1000)
colors=100*np.random.randn(1000)
plt.scatter(x,y,s=size,c=colors,marker="v",alpha=0.7)
plt.show()
5)boxplot
import numpy as np
import matplotlib.pyplot as plt
data=[1,1,2,2,4,6,6.8,7.2,8,8.3,9,10,10,11.5]
plt.boxplot(data,vert=False)
plt.show()
(2)
import numpy as np
import matplotlib.pyplot as plt
data=[1,1,2,2,4,6,6.8,7.2,8,8.3,9,10,10,11.5,50,60]
plt.boxplot(data,vert=False)
plt.show()
(3)
i)bar
import numpy as np
import matplotlib.pyplot as plt
x=["MAths","SCience","history","Computer","geography","english","Marathi","Hind"]
y=[56,67,54,65,87,78,65,88]
plt.bar(x,y)
plt.show()
ii)Piechart(breakable format)
import numpy as np
import matplotlib.pyplot as plt
x=["MAths","SCience","history","Computer","geography","english","Marathi","Hind"]
y=[56,67,54,65,87,78,65,88]
explode=[0.2,0.1,0,0.1,0,0,0.1,0]
plt.pie(y,labels=x,explode=explode,autopct="%1.1f%%")
plt.show()
(4) Iris.csv
(5)iris
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
iris=pd.read_csv("Iris.csv")
iris["Species"].value_counts().plot.pie(explode=[0.1,0.1,0.1],autopct="%1.1f%%",sha
dow=True,figsize=(10,8))
plt.title("Species %")
plt.show()
(6)
import pandas as pd
import matplotlib.pyplot as plt
iris=pd.read_csv("Iris.csv")