ML Lab Manual
ML Lab Manual
A Laboratory Record
Submitted by
Register No.:
Name:
BACHELOR OF TECHNOLOGY
IN
OCTOBER 2024
BONAFIDE CERTIFICATE
Register No: Date:
Index
2 Logistic Regression
3 K-Fold
5 K-Nearest Neighbor
6 K-Means Clustering
7 Hierarchical Clustering
EX NO: 1
import numpy as np
np.random.seed(0)
m = 50 # creating 50 samples
X = np.linspace(0,10,m).reshape(m,1)
y = X + np.random.randn(m,1)
print(X)
len(X)
len(y)
plt.scatter(X,y)
model = LinearRegression()
model.fit(X,y)
model.score(X,y)
Prediction = model.predict(X)
plt.scatter(X,y)
plt.plot(X,Prediction,'r')
rmse = np.sqrt(mse)
import numpy as np
import pandas as pd
data=pd.read_csv("Salary_Data.csv")
print(data)
print(data)
len(data)
x=data[["YearsExperience"]]
y=data[["Salary"]]
len(x_train)
len(x_test)
model = LinearRegression()
plt.scatter(x_test,y_test)
mae=mean_absolute_error(y_test, predict)
mse=mean_squared_error(y_test, predict)
rmse=np.sqrt(mse)
r2=r2_score(y_test, predict)
print("MAE:",mae)
print("MSE:",mse)
print("RMSE:",rmse)
print("R2:",r2)
Result: Thus the linear regression using dataset sample evaluated successfully
Output:
MAE: 5950.500000000003
MSE: 47645779.63765114
RMSE: 6902.592240430485
R2: 0.9124538165906179
DATE:
Aim: To perform Logistic Regression and predict digit from digits dataset
Program:
digits=load_digits()
dir(digits)
len(digits.data[1])
plt.gray()
plt.matshow(digits.images[3])
digits.target[3]
x_train,x_test,y_train,y_test=train_test_split(digits.data,digits.target,test_size=0.2)
LGR=LogisticRegression(max_iter=30)
LGR.fit(x_train,y_train)
prediction=LGR.predict(x_test)
cm=confusion_matrix(y_test,prediction)
import seaborn as sn
plt.figure(figsize=(5,5))
sn.heatmap(cm,annot=True)
plt.xlabel(‘prediction’)
plt.ylabel(‘Actual’)
import numpy as np
correct_pred=np.trace(cm)
total_pred=np.sum(cm)
accuracy=correct_pred/total_pred
print(accuracy)
Result: Thus the Logistic Regression and predict digit from digits dataset
implemented successfully
Output:
accuracy=0.9611111111111111
DATE:
Aim: To perform K-Fold on Sample Dataset
Program:
import numpy as np
data=np.arange(1,21)
data
k=5
kf=KFold(n_splits=5,shuffle=True,random_state=42)
train_index,test_index=data[train_index],data[test_index]
print(f"fold:{fold}")
print(f"Train index:{train_index}")
print(f"Test index:{test_index}")
Output:
Fold:1
Fold:2
Fold:3
Fold:4
Fold:5
import numpy as np
np.random.seed(42)
x=2*np.random.randn(100,1)
y=4+3*x+np.random.randn(100,1)
model=LinearRegression()
kf=KFold(n_splits=5,shuffle=True,random_state=42)
mse_scores=[]
fold_index=1
print(f"fold:{fold_index}")
print(f"Train index:{train_index}")
print(f"Test index:{test_index}")
X_train,X_test=x[train_index],x[test_index]
Y_train,Y_test=y[train_index],y[test_index]
model.fit(X_train,Y_train)
y_pred=model.predict(X_test)
mse=mean_squared_error(Y_test,y_pred)
mse_scores.append(mse)
fold_index +=1
mean_mse=np.mean(mse_scores)
std_mse=np.std(mse_scores)
print(f"MSE SCORE:{mse_scores}")
print(f"MEAN MASE:{mean_mse}")
print(f"Standard deviation:{std_mse}")
Output:
MSE SCORE:[]
MEAN MASE:0.7479020958894542
Standard Deviation:0.0
import numpy as np
digits = load_digits()
X = digits.data
Y = digits.target
model = LogisticRegression(max_iter=10000)
accuracy_scores = []
model.fit(X_train,Y_train)
Y_pred=model.predict(X_test)
# Calculate accuracy
accuracy_scores.append(accuracy)
mean_accuracy=np.mean(accuracy_scores)
std_accuracy=np.std(accuracy_scores)
Result: Thus the K-Fold on linear and logistic regression has been evaluated
successfully
Output:
Accuracy Scores for each fold: [0.9526462395543176]
Program:
digits=load_digits()
dir(digits)
len(digits.data[1])
plt.gray()
plt.matshow(digits.images[3])
digits.target[3]
x_train,x_test,y_train,y_test=train_test_split(digits.data,digits.target,test_size=0.2)
svm=SVC()
svm.fit(x_train,y_train)
prediction=svm.predict(x_test)
cm=confusion_matrix(y_test,prediction)
import seaborn as sn
plt.figure(figsize=(5,5))
sn.heatmap(cm,annot=True)
plt.xlabel('prediction')
plt.ylabel('Actual')
import numpy as np
correct_pred=np.trace(cm)
total_pred=np.sum(cm)
accuracy=correct_pred/total_pred
print(accuracy)
Result:
Output:
Accuracy=0.9805555555555555
Program:
import pandas as pd
import numpy as np
ship = sns.load_dataset('titanic')
ship.shape
ship = ship[['survived','pclass','sex','age']]
ship.dropna(axis=0,inplace=True)
ship['sex'].replace(['male','female'],[0,1],inplace=True)
ship.head()
knn = KNeighborsClassifier()
y = ship['survived']
X = ship.drop('survived',axis=1)
knn.fit(X,y)
knn.score(X,y)
def survivedPerson(knn,pclass=3,sex=1,age=30):
x = np.array([pclass,sex,age]).reshape(1,3)
print(knn.predict(x))
survivedPerson(knn)
Output:
Accuracy = 0.8305322128851541
SurvivedPerson = [0]
Program:
import numpy as np
kmeans = KMeans(n_clusters=4)
kmeans.fit(X)
labels = kmeans.labels_
centroids = kmeans.cluster_centers_
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()
Output:
EX NO: 7 Implementation of Hierarchical Clustering
DATE:
Program:
import numpy as np
import pandas as pd
model = AgglomerativeClustering(n_clusters=4,linkage='ward')
labels = model.fit_predict(X)
plt.title('Hierarchical Clustering')
plt.show()
Z = linkage(X, method='ward')
plt.figure(figsize=(10, 7))
dendrogram(Z)
plt.show()
Program:
import pandas as pd
fromsklearn.preprocessingimportStandardScale
data = pd.read_csv(file_path)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)
pca = PCA(n_components=2)
pca_transformed = pca.fit_transform(scaled_features)
explained_variance = pca.explained_variance_ratio_
print(pca_df.head())
plt.show()
Result: Principal Component Analysis on Customer dataset been implemented
Output:
PCA Results:
0 0.193291 -0.305100 2
1 0.434420 -0.328413 2
2 0.811143 0.815096 2
3 -0.778648 0.652754 1
4 0.166287 1.271434 2
Explained Variance: