Machine Learning LAB MANUAL
Machine Learning LAB MANUAL
1. https://youtu.be/_sNNxXSfyY0
2. https://youtu.be/rKxRUnT1f8Y
3. https://youtu.be/wqMFutUxBMM, https://youtu.be/9PcbUh_4PJE
4. https://youtu.be/tY4qKXwmTe4, https://youtu.be/B4Cblyc_7W4
5. https://youtu.be/F7u9e9JtuTc
6. https://youtu.be/F7u9e9JtuTc , https://youtu.be/uSHqXNccpKA
7. https://www.youtube.com/watch?v=Jk4-POycbR8
8. https://youtu.be/RPErP1lDXDk
9. https://youtu.be/JtTohICZNCw
10. https://youtu.be/HYKILaI1J5E
DATASET
https://drive.google.com/drive/folders/1TZAoKYN5LML-cnUt2tXYAZizksUuiUtp?usp=share_link
Program 1 . find S
import random
import csv
attributes = [['Sunny','Rainy'],
['Warm','Cold'],
['Normal','High'],
['Strong','Weak'],
['Warm','Cool'],
['Same','Change']]
num_attributes = len(attributes)
a = []
reader = csv.reader(csvFile)
a.append (row)
print(row)
print(hypothesis)
for j in range(0,num_attributes):
hypothesis[j] = a[0][j];
# Comparing with Remaining Training Examples of Given Data Set
for i in range(0,len(a)):
if a[i][num_attributes]=='Yes':
for j in range(0,num_attributes):
if a[i][j]!=hypothesis[j]:
hypothesis[j]='?'
else :
hypothesis[j]= a[i][j]
print("\n The Maximally Specific Hypothesis for a given Training Examples :\n")
print(hypothesis)
OUTPUT:
The most general hypothesis : ['?','?','?','?','?','?']
The most specific hypothesis : ['0','0','0','0','0','0']
with open("Program1dataset.csv") as f:
csv_file = csv.reader(f)
data = list(csv_file)
specific = data[1][:-1]
for i in data:
if i[-1] == "Yes":
for j in range(len(specific)):
if i[j] != specific[j]:
specific[j] = "?"
general[j][j] = "?"
for j in range(len(specific)):
if i[j] != specific[j]:
general[j][j] = specific[j]
else:
general[j][j] = "?"
print(specific)
print(general)
gh = [] # gh = general Hypothesis
for i in general:
for j in i:
if j != '?':
gh.append(i)
break
OUTPUT:
Step 1 of Candidate Elimination Algorithm
['sunny', 'warm', '?', 'strong', 'warm', 'same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '
?', '?'], ['?', '?', '?', '?', '?', '?']]
import pandas as pd
from pprint import pprint
from sklearn.feature_selection import mutual_info_classif
from collections import Counter
else:
gainz = mutual_info_classif(df[attribute_names],df[target_attribute],discrete_features=True)
index_of_max=gainz.tolist().index(max(gainz))
best_attr=attribute_names[index_of_max]
tree={best_attr:{}}
remaining_attribute_names=[i for i in attribute_names if i!=best_attr]
return tree
df=pd.read_csv("Playtennis.csv")
attribute_names=df.columns.tolist()
print("List of attribut name")
attribute_names.remove("Target")
print(df)
OUTPUT:
List of attribut name
Outlook Temperature Humidity Wind Target
0 0 0 0 0 0
1 0 0 0 1 0
2 1 0 0 0 1
3 2 1 0 0 1
4 2 2 1 0 1
5 2 2 1 1 0
6 1 2 1 1 1
7 0 1 0 0 0
8 0 2 1 0 1
9 2 1 1 0 1
10 0 1 1 1 1
11 1 1 0 1 1
12 1 0 1 0 1
13 2 1 0 1 0
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float) # Features ( Hrs Slept, Hrs Studied)
y = np.array(([92], [86], [89]), dtype=float) # Labels(Marks obtained)
X = X/np.amax(X,axis=0) # Normalize
y = y/100
def sigmoid(x):
return 1/(1 + np.exp(-x))
def sigmoid_grad(x):
return x * (1 - x)
# Variable initialization
epoch=1000 #Setting training iterations
eta =0.2 #Setting learning rate (eta)
input_neurons = 2 #number of features in data set
hidden_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer
for i in range(epoch):
#Forward Propogation
h_ip=np.dot(X,wh) + bh # Dot product + bias
h_act = sigmoid(h_ip) # Activation function
o_ip=np.dot(h_act,wout) + bout
output = sigmoid(o_ip)
#Backpropagation
# Error at Output layer
Eo = y-output # Error at o/p
outgrad = sigmoid_grad(output)
d_output = Eo* outgrad # Errj=Oj(1-Oj)(Tj-Oj)
OUTPUT:
Normalized Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.89541292]
[0.8790127 ]
[0.89494954]]
PROGRAM 5: Naïve Bayesian Classifier
# import necessary libraries
import pandas as pd
data = pd.read_csv('Playtennis.csv')
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
le_outlook = LabelEncoder()
X.Outlook = le_outlook.fit_transform(X.Outlook)
le_Temperature = LabelEncoder()
X.Temperature = le_Temperature.fit_transform(X.Temperature)
le_Humidity = LabelEncoder()
X.Humidity = le_Humidity.fit_transform(X.Humidity)
le_Wind = LabelEncoder()
X.Wind = le_Wind.fit_transform(X.Wind)
print("\nNow the Train output is\n", X.head())
le_PlayTennis = LabelEncoder()
y = le_PlayTennis.fit_transform(y)
classifier = GaussianNB()
classifier.fit(X_train, y_train)
OUTPUT:
The first 5 Values of data is :
Outlook Temperature Humidity Wind Target
0 Sunny Hot High Weak No
1 Sunny Hot High Strong No
2 Overcast Hot High Weak Yes
3 Rainy Mild High Weak Yes
4 Rainy Cool Normal Weak Yes
msg['labelnum']=msg.label.map({'pos':1,'neg':0})
X=msg.message
Y=msg.labelnum
print('\nThe message and its label of first 5 instances are listed belo
w')
X5, Y5 = X[0:5], msg.label[0:5]
for x, y in zip(X5,Y5):
print(x,',',y)
OUTPUT:
Total instances in the dataset: 8
The message and its label of first 5 instances are listed below
I love this sandwich , pos
This is an amazing place , pos
I feel very good about these beers , pos
This is my best work , pos
What a great holiday , pos
[5 rows x 27 columns]
Accuracy metrics
Accuracy of the classifer is 0.5
Recall : 1.0
Precison : 0.5
Confusion matrix
[[0 1]
[0 1]]
PROGRAM 7: Bayesian Network Model to demonstrate Diagnosis of Heart dis
ease
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
heart_disease=pd.read_csv('data7.csv')
print('columns in datasets')
for col in heart_disease.columns:
print(col)
print(model.get_cpds('sex'))
OUTPUT:
columns in datasets
age
sex
cp
trestbps
chol
fbs
restecg
thalach
exang
oldpeak
slope
ca
thal
heartdisease
+--------+----------+
| sex(0) | 0.320132 |
+--------+----------+
| sex(1) | 0.679868 |
+--------+----------+
+-----------------+---------------------+
| heartdisease | phi(heartdisease) |
+=================+=====================+
| heartdisease(0) | 0.3587 |
+-----------------+---------------------+
| heartdisease(1) | 0.1220 |
+-----------------+---------------------+
| heartdisease(2) | 0.2020 |
+-----------------+---------------------+
| heartdisease(3) | 0.2053 |
+-----------------+---------------------+
| heartdisease(4) | 0.1120 |
+-----------------+---------------------+
PROGRAM 8: K-means Clustering
import matplotlib.pyplot as plt
import numpy as np
X.columns =
['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y = pd.DataFrame(iris.target)
y.columns = ['Targets']
model = KMeans(n_clusters=3)
model.fit(X) # model.labels_ : Gives cluster no for which samples
belongs to
plt.ylabel('Petal Width')
# Plot the Models Classifications
plt.subplot(2, 2, 2)
xsa = scaler.transform(X)
xs = pd.DataFrame(xsa, columns = X.columns)
gmm = GaussianMixture(n_components=3)
gmm.fit(xs)
gmm_y = gmm.predict(xs)
plt.subplot(2, 2, 3)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[gmm_y], s=40)
plt.title('GMM Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
OUTPUT:
Observation: The GMM using EM algorithm based clustering matched the true l
abels more closely than the Kmeans.
PROGRAM 9: K nearest neighbour
# import the required packages
# Load dataset
iris=datasets.load_iris()
print("Iris Data set loaded...")
# Split the data into train and test samples
for i in range(len(iris.target_names)):
print("Label", i , "-",str(iris.target_names[i]))
# Perform Training
classifier.fit(x_train, y_train)
# Perform testing
y_pred=classifier.predict(x_test)
import pandas as pd
import numpy as np
weights[j,j] = np.exp(diff*diff.T/(-2.0*k**2))
return weights
def localWeight(point,xmat,ymat,k):
wei = kernel(point,xmat,k)
W = (X.T*(wei*X)).I*(X.T*(wei*ymat.T))
return W
def localWeightRegression(xmat,ymat,k):
m,n = np.shape(xmat)
ypred = np.zeros(m)
for i in range(m):
ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k)
return ypred
def graphPlot(X,ypred):
sortindex = X[:,1].argsort(0) #argsort - index of the smallest
xsort = X[sortindex][:,0]
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(bill,tip, color='green')
tip = np.array(data.tip)
one = np.mat(np.ones(m))
X = np.hstack((one.T,mbill.T)) # 244 rows, 2 cols
graphPlot(X,ypred)
OUTPUT: