Machine Learning Lab Record: Dr. Sarika Hegde
Machine Learning Lab Record: Dr. Sarika Hegde
RECORD
Submitted To:
Submitted By:
Sachin Singh
4NM18CS142
VI sem
Sec: ‘C’
Program 1:
Implement and demonstrate the FIND-S algorithm for finding the most specific
hypothesis based on a given set of training data samples. Read the training data
from a .CSV file.
DATASET:
CODE:
import csv
a= []
with open('./enjoysport.csv','r') as csvfile:
for row in csv.reader(csvfile):
a.append(row)
print(a)
OUTPUT:
[['sky ', 'airtemp', 'humidity', 'wind', 'water', 'forecast', 'enjysport'], ['sunny', 'warm', 'normal',
'strong', 'warm', 'same', 'yes'], ['sunny', 'warm', 'high', 'strong', 'warm', 'same', 'yes'], ['rainy',
'cold', 'high', 'strong', 'warm', 'change', 'no'], ['sunny', 'warm', 'high', 'strong', 'cold', 'change',
'yes']]
Program 2:
For a given set of training data examples stored in a .CSV file, implement and
demonstrate the Candidate-Elimination algorithm to output a description of
the set of all hypotheses consistent with the training examples.
DATASET:
CODE:
import numpy as np
import pandas as pd
data = pd.DataFrame(data=pd.read_csv('enjoysport.csv'))
concepts=np.array(data.iloc[:,0:-1])
print("\nInstances are \n",concepts)
target=np.array(data.iloc[:,-1])
print("\ntarget values:",target)
if target[i] == "no":
print("Instance is negative")
for x in range(len(specific_h)):
if h[x]!=specific_h[x]:
general_h[x][x]=specific_h[x]
else:
general_h[x][x]='?'
s_final, g_final=learn(concepts,target)
Program 3:
Write a program to demonstrate the working of the decision tree based ID3
algorithm. Use an appropriate data set for building the decision tree and apply
this knowledge to classify a new sample
DATASET:
CODE:
import pandas as pd
import numpy as np
dataset=pd.read_csv('./playtennis.csv')
print(dataset)
def entropy(target_col):
elements,counts = np.unique(target_col,return_counts = True)
# print(elements,counts)
entropy = np.sum([(-counts[i]/np.sum(counts))*np.log2(counts[i]/np.sum(counts)) for i in
range(len(elements))])
#print(entropy)
return entropy
def InfoGain(data,split_attribute_name,target_name="PlayTennis"):
total_entropy = entropy(data[target_name])
vals,counts= np.unique(data[split_attribute_name],return_counts=True)
Weighted_Entropy =
np.sum([(counts[i]/np.sum(counts))*entropy(data.where(data[split_attribute_name]==vals[i]).
dropna()[target_name]) for i in range(len(vals))])
Information_Gain = total_entropy - Weighted_Entropy
return Information_Gain
def ID3(data,originaldata,features,target_attribute_name="PlayTennis",parent_node_class =
None):
if len(np.unique(data[target_attribute_name])) <= 1:
return np.unique(data[target_attribute_name])[0]
elif len(data)==0:
return
np.unique(originaldata[target_attribute_name])[np.argmax(np.unique(originaldata[target_attri
bute_name],return_counts=True)[1])]
elif len(features) ==0:
return parent_node_class
else:
parent_node_class
np.unique(data[target_attribute_name])[np.argmax(np.unique(data[target_attribute_name],re
turn_counts=True)[1])]
item_values = [InfoGain(data,feature,target_attribute_name)
for feature in features] #Return the information gain values for the features in the
dataset
best_feature_index = np.argmax(item_values)
best_feature = features[best_feature_index]
tree = {best_feature:{}}
features = [i for i in features if i != best_feature]
for value in np.unique(data[best_feature]):
value = value
sub_data = data.where(data[best_feature] == value).dropna()
subtree =
ID3(sub_data,dataset,features,target_attribute_name,parent_node_class)
tree[best_feature][value] = subtree
return(tree)
tree = ID3(dataset,dataset,dataset.columns[:-1])
print(' \nDisplay Tree\n', tree)
OUTPUT:
Display Tree {'outlook': {'overcast': 'yes', 'rainy': {'wind': {'strong': 'no', 'weak': 'yes'}}, 'sunny':
{'temperature': {'cool': 'yes', 'hot': 'no', 'mild': {'wind': {'strong': 'yes', 'weak': 'no'}}}}}}
Program 4:
CODE:
import numpy as np
#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))
#Variable initialization
epoch=5 #Setting training iterations
lr=0.1 #Setting learning rate
wh = np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh = np.random.uniform(size=(1,hiddenlayer_neurons))
wout = np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout = np.random.uniform(size=(1,output_neurons))
#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO * outgrad
EH = d_output.dot(wout.T)
hiddengrad = derivatives_sigmoid(hlayer_act)#how much hidden layer wts contributed to
error
d_hiddenlayer = EH * hiddengrad
OUTPUT:
Program 5:
DATASET:
…………..
CODE:
import csv
import random
import math
def loadcsv(filename):
lines = csv.reader(open(filename, "r"));
dataset = list(lines)
for i in range(len(dataset)):
#converting strings into numbers for processing
dataset[i] = [x.replace(' ','') for x in dataset[i]]
dataset[i] = [float(x) for x in dataset[i]]
return dataset
def separatebyclass(dataset):
separated = {} #dictionary of classes 1 and 0
#creates a dictionary of classes 1 and 0 where the values are
#the instances belonging to each class
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated
def mean(numbers):
return sum(numbers)/float(len(numbers))
def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
return math.sqrt(variance)
def summarizebyclass(dataset):
separated = separatebyclass(dataset);
#print(separated)
summaries = {}
for classvalue, instances in separated.items():
#for key,value in dic.items()
#summaries is a dic of tuples(mean,std) for each class value
summaries[classvalue] = summarize(instances)
#summarize is used to cal to mean and std
return summaries
main()
OUTPUT:
Program 6:
Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same
data set for clustering using k-Means algorithm. Compare the results of these
two algorithms and comment on the quality of clustering. You can add
Java/Python ML library classes/API in the program.
DATASET:
CODE:
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
import sklearn.metrics as sm
import pandas as pd
import numpy as np
iris = datasets.load_iris()
X = pd.DataFrame(iris.data)
X.columns = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y = pd.DataFrame(iris.target)
y.columns = ['Targets']
model = KMeans(n_clusters=3)
model.fit(X)
plt.figure(figsize=(14,7))
y_gmm = gmm.predict(xs)
#y_cluster_gmm
plt.subplot(1, 3, 3)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y_gmm], s=40)
plt.title('GMM Classification')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
OUTPUT:
Program 7:
DATASET:
CODE:
# k-nearest neighbors on the Iris Flowers Dataset
#sepal_length sepal_width petal_lenth pedal_width specices
from random import seed
from random import randrange
from csv import reader
from math import sqrt
import pandas as pd
import csv
# kNN Algorithm
def k_nearest_neighbors(train, test, num_neighbors):
predictions = list()
for row in test:
output = predict_classification(train, row, num_neighbors)
predictions.append(output)
return(predictions)
for i in range(len(dataset[0])-1):
str_column_to_float(dataset, i)
# convert class column to integers
str_column_to_int(dataset, len(dataset[0])-1)
# evaluate algorithm
n_folds = 5
num_neighbors = 5
scores = evaluate_algorithm(dataset, k_nearest_neighbors, n_folds, num_neighbors)
print('Scores: %s' % scores)
print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores))),end="\n\n")
O UTPUT:
Program 8:
Implement the non-parametric Locally Weighted Regression algorithm in order
to fit data points. Select appropriate data set for your experiment and draw
graphs
DATASET:
CODE:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
m= np.shape(mbill)[1]
one = np.mat(np.ones(m))
X = np.hstack((one.T,mbill.T))
#set k here
ypred = localWeightRegression(X,mtip,0.5)
SortIndex = X[:,1].argsort(0)
xsort = X[SortIndex][:,0]
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(bill,tip, color='green')
ax.plot(xsort[:,1],ypred[SortIndex], color = 'red', linewidth=5)
plt.xlabel('Total bill')
plt.ylabel('Tip')
plt.show();
OUTPUT: