0% found this document useful (0 votes)
15 views10 pages

FDS All Practicals

The document contains code to perform data analysis and visualization on various datasets using Python libraries like Pandas, NumPy, and Matplotlib. Some key steps include: 1) Loading and exploring datasets including student data, Iris data, and other csv files. Features like shape, describe, head are used. 2) Data preprocessing - handling missing values, label encoding, one hot encoding. 3) Various plots generated - histograms, line plots, scatter plots, boxplots, bar plots, pie charts on different datasets. 4) Statistical metrics like mean, median calculated and datasets explored.

Uploaded by

salimsayyad817
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
15 views10 pages

FDS All Practicals

The document contains code to perform data analysis and visualization on various datasets using Python libraries like Pandas, NumPy, and Matplotlib. Some key steps include: 1) Loading and exploring datasets including student data, Iris data, and other csv files. Features like shape, describe, head are used. 2) Data preprocessing - handling missing values, label encoding, one hot encoding. 3) Various plots generated - histograms, line plots, scatter plots, boxplots, bar plots, pie charts on different datasets. 4) Statistical metrics like mean, median calculated and datasets explored.

Uploaded by

salimsayyad817
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 10

FDS ALL PRACTICALS

Assignment 1
SET A
1,2).
import numpy as np
import pandas as pd
dict1={"name":["Rita","Sita","Amey","Ajay","Yash","Archana","Prerna","Shruti","Paurnima",
"Rutika"],
"age":["21","22","20","23","24","19","18","23","21","17"],
"Percentage":["80","87","90","92","86","94","76","79","89","80"]}
df=pd.DataFrame(dict1)

print(df)
s=df.shape
print("Shape=",s,"\n")
c=len(df.columns)
print("No. of Columns=",c,"\n")

r=len(df)
print("No.of rows=",r,"\n")
d=df.dtypes
print("Datatypes is: ",d,"\n")
featurenm=pd.get_dummies(df)

print("feature names:",featurenm,"\n")
a=df.size
print("size=",a,"\n")
3,4,5,6).
import pandas as pd

import numpy as np

dict2={"rno":["5","6","7","8","9","10","11","12","13","14","15"],

"mark":["89","80","87","90","92","86","94","76","89","80","81"]}

df2=pd.DataFrame(dict2)

print(df2)

print(df2.dtypes)

print(df2.astype(float).describe())

df2.loc[5]=[6,None]

df2.loc[6]=[None,None]

df2.loc[7]=[None,85]

df2.loc[8]=[9,21]

df2.loc[9]=[9,21]

df2.loc[10]=[7,None]

df2.loc[11]=[None,45]

df2.loc[12]=[34,2]

df2.loc[13]=[None,None]

df2.loc[14]=[11,None]

df2.loc[15]=[8,90]

print(df2)

df2["remarks"]=None

print(df2)

print(df2.info())

print(df2.isnull())

print(df2.duplicated())

df2.drop(columns="remarks",axis=1,inplace=True)

print(df2)

df2.dropna(subset=["rno","mark"],inplace=True)

print(df2)
7,8).
import matplotlib.pyplot as plt

plt.plot(["Rita","Sita","Amey","Ajay","Yash"],[85,90,75,60,95])

plt.title("Line Plot: ")

plt.ylabel("percentage")

plt.xlabel("name")

plt.show()

plt.scatter(["Rita","Sita","Amey","Ajay","Yash"],[85,90,75,60,95])

plt.title("Scatter plot")

plt.ylabel("percentage")

plt.xlabel("Name")

plt.show()
Assignment 2
SET A
Q.1
import numpy as np
a=np.arange(4).reshape((2,2))
print("Flattened Array: ",a);
print("Maximum Value: ",np.amax(a))
print("Minimum Value: ",np.min(a))

Q.2
import numpy as np
point1= np.array((1,2,3))
point2=np.array((1,1,1,))
dist=np.linalg.norm(point1- point2)
print("Euclidian Distance: ",dist)

Q.3
import numpy as np
import pandas as pd
df=pd.DataFrame({'rating':[90,85,82,88,94,90,76,75,87,86],'points':[25,20,14,16,27,2
0,12,15,14,19],'assists':[5,7,7,8,5,7,6,9,9,5],'rebound':[11,8,10,6,6,9,6,10,10,7]})
def find_iqr(x):
return np.subtract(*np.percentile(x,[75,25]))
print(df[['rating','points']].apply(find_iqr))
print(df.apply(find_iqr))

Q.4
import numpy as np
import pandas as pd
def distancesum(x,y,n):
sum=0
for i in range(n):
for j in range(i+1,n):
sum+=(abs(x[i]-x[j])+abs(y[i]-y[j]))
return sum
x=[-1,1,3,2]
y=[5,6,5,3]
n=len(x)
print("Manhattan Distance: ",distancesum(x,y,n))

Q.5
import numpy as np
import matplotlib.pyplot as plt
nums=np.array([0.5,0.7,1.0,1.2,1.3,2.1])
bins=np.array([0,1,2,3])
print("nums:",nums)
print("bins:",bins)
print("Result:",np.histogram(nums,bins))
plt.hist(nums,bins=bins)
plt.show()

Q.6
import pandas as pd
import numpy as np
import statistics as s
df=pd.read_csv("student.csv")
print(df)
ageAvg=df['age'].mean()
print(ageAvg)
perAvg=df['gper'].mean()
print(perAvg)
print(df.describe())

Assignment 3
SET A
import pandas as pd
data=pd.read_csv("Data.csv")
print(data)

a , b , c).

print("Shape of the data: ")


print(data.shape)
print("\nDescribe data: ")
print(data.describe())
print("\nFirst 3 row: ")
print(data.head(3))

2).

print("\na2")
print(data['Salary'].fillna(data['Salary'].mean()))
print("\nage")
print(data['Age'].fillna(data['Age'].mean()))

3). i)

print("\na3")
from sklearn.preprocessing import LabelEncoder
labelencoder=LabelEncoder()
data['Purchased']=labelencoder.fit_transform(data['Purchased'])
print(data)

ii).

print("\na4")
from sklearn.preprocessing import OneHotEncoder
enc=OneHotEncoder(handle_unknown='ignore')
enc_df=pd.DataFrame(enc.fit_transform(data[['Country']]).toarray())
print(enc_df)

Assignment 4
SETA)

(1)
1)HISTOGRAM
import numpy as np
import matplotlib.pyplot as plt
data=np.random.randn(1000)
plt.hist(data,facecolor='y',linewidth=2,edgecolor='k',bins=30,alpha=0.6)
plt.show()

2)linechart
import numpy as np
import matplotlib.pyplot as plt
x=np.linspace(0,50,100)
y=x * np.linspace(100,150,100)
plt.plot(x,y,c="r",marker="v",markersize=3,label="normal")
plt.grid(True)
plt.legend()
plt.show()

3)piechart
import numpy as np
import matplotlib.pyplot as plt
x=[10,20,8,35,30,25]
y=["raj","ramesh","Krishna","arun","virag","mahesh"]
plt.pie(x,labels=y)
plt.show()

piechart2(breakable format)
import numpy as np
import matplotlib.pyplot as plt
x=[10,20,8,35,30,25]
y=["raj","ramesh","krishna","Arun","virag","mahesh"]
explode=[0.2,0.1,0,0.1,0,0]
plt.pie(x,labels=y,explode=explode,autopct='%1.1f%%')
plt.show()

4)scatter(dot)
import numpy as np
import matplotlib.pyplot as plt
x=np.random.randn(1000)
y=np.random.randn(1000)
plt.scatter(x,y)
plt.show()

scatter(triangle)
import numpy as np
import matplotlib.pyplot as plt
x=np.random.randn(1000)
y=np.random.randn(1000)
size=150*np.random.randn(1000)
colors=100*np.random.randn(1000)
plt.scatter(x,y,s=size,c=colors,marker="v",alpha=0.7)
plt.show()

5)boxplot
import numpy as np
import matplotlib.pyplot as plt
data=[1,1,2,2,4,6,6.8,7.2,8,8.3,9,10,10,11.5]
plt.boxplot(data,vert=False)
plt.show()

(2)

import numpy as np
import matplotlib.pyplot as plt
data=[1,1,2,2,4,6,6.8,7.2,8,8.3,9,10,10,11.5,50,60]
plt.boxplot(data,vert=False)
plt.show()

(3)

i)bar
import numpy as np
import matplotlib.pyplot as plt
x=["MAths","SCience","history","Computer","geography","english","Marathi","Hind"]
y=[56,67,54,65,87,78,65,88]
plt.bar(x,y)
plt.show()

ii)Piechart(breakable format)
import numpy as np
import matplotlib.pyplot as plt
x=["MAths","SCience","history","Computer","geography","english","Marathi","Hind"]
y=[56,67,54,65,87,78,65,88]
explode=[0.2,0.1,0,0.1,0,0,0.1,0]
plt.pie(y,labels=x,explode=explode,autopct="%1.1f%%")
plt.show()

(4) Iris.csv

import matplotlib.pyplot as plt


import pandas as pd
import seaborn as sns
iris=pd.read_csv("Iris.csv")
print(iris)
sns.countplot(x="Species",data=iris)
plt.title("Iris species COunt")
plt.show()

(5)iris
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
iris=pd.read_csv("Iris.csv")
iris["Species"].value_counts().plot.pie(explode=[0.1,0.1,0.1],autopct="%1.1f%%",sha
dow=True,figsize=(10,8))
plt.title("Species %")
plt.show()

(6)

import pandas as pd
import matplotlib.pyplot as plt
iris=pd.read_csv("Iris.csv")

#1:Histogram for sepal length


plt.figure(figsize=(10,7))
x=iris["SepalLengthCm"]
plt.hist(x,bins=20,color="green")
plt.title("Sepal Length in am")
plt.xlabel("Sepal_Length_cm")
plt.ylabel("Count")
plt.show()

#2:Histogram for sepal width


plt.figure(figsize=(10,7))
x=iris.SepalWidthCm
plt.hist(x,bins=20,color="Red")
plt.title("Sepal_width_cm")
plt.xlabel("Sepal_Width_cm")
plt.ylabel("Count")
plt.show()

#3:Histogram for petal length


plt.figure(figsize=(10,7))
x=iris.PetalLengthCm
plt.hist(x,bins=20,color="blue")
plt.title("Petal_Length_Cm")
plt.xlabel("Petallength cm")
plt.ylabel("Count")
plt.show()

#4:Histogram for Petal width


plt.figure(figsize=(10,7))
x=iris.PetalWidthCm
plt.hist(x,bins=20,color="yellow")
plt.title("Petal Width Cm")
plt.xlabel("Petal_Width_CM")
plt.ylabel("Count")
plt.show()

You might also like