0% found this document useful (0 votes)
5 views5 pages

Fds Assigns

The document contains multiple assignments related to data analysis and visualization using Python libraries such as NumPy, Pandas, and Matplotlib. It includes tasks like generating random data, creating various plots (line, scatter, histogram, box plot, pie chart, and bar chart), handling missing values, and performing data preprocessing techniques like normalization and one-hot encoding. Additionally, it covers basic DataFrame operations, including reading CSV files, data description, and handling duplicates and null values.

Uploaded by

psb18039
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views5 pages

Fds Assigns

The document contains multiple assignments related to data analysis and visualization using Python libraries such as NumPy, Pandas, and Matplotlib. It includes tasks like generating random data, creating various plots (line, scatter, histogram, box plot, pie chart, and bar chart), handling missing values, and performing data preprocessing techniques like normalization and one-hot encoding. Additionally, it covers basic DataFrame operations, including reading CSV files, data description, and handling duplicates and null values.

Uploaded by

psb18039
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 5

Assign 4

Set A

1]import numpy as np
random_data = np.random.randint(1, 100, size=50)
print(random_data)

import matplotlib.pyplot as plt


plt.figure(figsize=(10,6))
plt.plot(random_data, color='blue')
plt.title('Line Chart of Random Integers')
plt.xlabel('Index')
plt.ylabel('Value')
plt.grid()
plt.show()

plt.figure(figsize=(10,6))
plt.scatter(range(50), random_data, color='red')
plt.title('Scatter plot of random integers')
plt.xlabel('index')
plt.ylabel('value')
plt.grid()
plt.show()

plt.subplot(2,2,3)
plt.hist(random_data, bins=10, color='g', edgecolor='blue')
plt.title('Histogram')
plt.xlabel('Value')
plt.ylabel('Frequency')

plt.subplot(2,2,4)
plt.boxplot(random_data, patch_artist=True, boxprops=dict(facecolor='purple'))
plt.title("Box Plot")
plt.ylabel('Value')

2]x= random_data
plt.boxplot(x,vert=False)
plt.show()

3]import matplotlib.pyplot as plt


marks=[10,20,30,40,50,60]
subject=['english','hindi','science','maths','sst','geography']
plt.pie(marks,labels=subject)
plt.show()

import matplotlib.pyplot as plt


marks=[10,20,30,40,50,60]
subject=['english','hindi','science','maths','sst','geography']
plt.bar(marks,subject)
plt.show()

SET B

2]import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv('/home/ty138/IRIS.csv')
plt.scatter(df['sepal_length'],df['petal_length'],color='red')
plt.title('Relationship')
plt.xlabel('Sepal length')
plt.ylabel('Petal length')
plt.show()

ASSIGN 3

SET A

1]a)import pandas as pd
data=pd.read_csv('data1.csv')
print(data)
data.describe()

b)data.shape

c)data.head(3)

2]meanAge=data['Age'].mean()
meanSal=data['Salary'].mean()
data['Age'].fillna(meanAge,inplace=True)
data['Salary'].fillna(meanSal,inplace=True)
data

3]a)from sklearn.preprocessing import OneHotEncoder


enc = OneHotEncoder(handle_unknown='ignore')
enc_df = pd.DataFrame(enc.fit_transform(data[['Country']]).toarray())
enc_df

data = data.join(enc_df)
data

b)from sklearn.preprocessing import LabelEncoder


labelEncoder = LabelEncoder()
data['Purchased'] = labelEncoder.fit_transform(data['Purchased'])
data

SET B
1]import pandas as pd
data=pd.read_csv('winequality-red.csv')
data

2]from sklearn import preprocessing


data_scaler = preprocessing.MinMaxScaler(feature_range=(0,100))
data_scaled = data_scaler.fit_transform(data)
data_scaled.round(2)

import numpy as np
import scipy.stats as s
X_train = data_scaled.round(2)
print("Initial array : ",X_train)
print("Initial Mean :",s.tmean(X_train).round(2))

dn = preprocessing.normalize(data,norm='l1')
dn.round(4)

data_binarized = preprocessing.Binarizer(threshold=5).transform(data)
print(data_binarized)
ASSIGN 2

SET A

1]import numpy as np
data=np.array([[0,1],[2,3]])
print("original flattened array is:\n",data )
print("maximum value of the above flattened array:\n",np.max(data))
print("minimum vale of the above flattemed array:\n",np.min(data))

4]def manhattan(point1 , point2):


return abs(point1[0]-point2[0])+abs(point1[1]-point2[1])
def sum_of_man(points):
distance = 0
for i in range(len(points)):
for j in range(i+1,len(points)):
distance += manhattan(points[i],points[j])
return distance

points = [(1,2),(3,4),(5,6),(7,8)]
print("Sum : ",sum_of_man(points))

5]import numpy as np
import matplotlib.pyplot as plt
nums = np.array([0.5,0.7,1,1.2,1.3,2.1])
bins = np.array([0,1,2,3])
hist,_ = np.histogram(nums,bins=bins)
plt.bar(bins[:-1],hist,width=0.5)
plt.xticks(bins)
plt.ylabel('Frequency')
plt.title('Histogram of nums against bins')
plt.show()

SET B

1]import pandas as pd
import numpy as np
df=pd.read_csv('/home/ty138/IRIS.csv')
df

print("Maximun of above data: ",np.max(df))


print("\n")
print("Minimun of above data: ",np.min(df))

ASSIGN 1

SET A

1]import pandas as pd
df=pd.DataFrame(columns=['Name','Age','Percentage'])
df.loc[0]=['Ankit',18,67.8]
df.loc[1]=['Maya',16,68.05]
df.loc[2]=['Seema',21,84.21]
df.loc[3]=['Raj',19,76.66]
df.loc[4]=['Sayali',17,78.91]
df.loc[5]=['Veer',18,55.86]
df.loc[6]=['Om',19,67]
df.loc[7]=['Kaushik',19,78]
df.loc[8]=['Prachiti',20,77]
df.loc[9]=['Sonal',21,88]
df

2]df.shape
df.size
df.columns

3]import pandas as pd
df=pd.DataFrame(columns=['Name','Age','Percentage'])
df.loc[0]=['Ankit',18,67.8]
df.loc[1]=['Maya',16,68.05]
df.loc[2]=['Seema',21,84.21]
df.loc[3]=['Raj',19,76.66]
df.loc[4]=['Sayali',17,78.91]
df.loc[5]=['Veer',18,55.86]
df.loc[6]=['Om',19,67]
df.loc[7]=['Kaushik',19,78]
df.loc[8]=['Prachiti',20,77]
df.loc[9]=['Sonal',21,88]
df

df.dtypes
df.describe()

4]import pandas as pd
df=pd.DataFrame(columns=['Name','Age','Persentage'])
df.loc[0]=['Ankit',18,67.8]
df.loc[1]=['Ankit',16,68.05]
df.loc[2]=['Seema',20,84.21]
df.loc[3]=['Raj',19,76.66]
df.loc[4]=['Sayali',17,78.91]
df["Remarks"]=None
df

6]df["Remarks"]=None
df

df.drop(4)

df.isnull()
df.duplicate()

7]import pandas as pd
df=pd.DataFrame(columns=['Name','Age','Persentage'])
df.loc[0]=['Ankit',18,67.8]
df.loc[1]=['Maya',None,68.05]
df.loc[2]=['Maya',None,68.05]
df.loc[3]=['Raj',19,76.66]
df.loc[4]=['Sayali',17,78.91]
df.loc[5]=['Veer',18,55.86]
df.loc[6]=['Veer',18,55.86]
df.loc[7]=['Kaushik',19,78]
df.loc[8]=['Prachiti',20,77]
df.loc[9]=['Sonal',21,88]
df

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df.plot(x="Name", y="Persentage")
plt.show()

8]df.plot(kind="scatter",x='Name',y='Persentage')
plt.show()

You might also like