0% found this document useful (0 votes)
21 views

Machine Learning practical file

Uploaded by

pavitra.bijyt
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
21 views

Machine Learning practical file

Uploaded by

pavitra.bijyt
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 31

1.

Implement and demonstrate the FIND-S algorithm for Finding the most specific
hypothesis based on a given set of training data samples. Read the training data
from a .CSV file.
Program:
import csv
num_attributes = 6
a = []
print("\n The Given Training Data Set \n")
with open('enjoysport.csv', 'r') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
a.append (row)
print(row)
print("\n The initial value of hypothesis: ")
hypothesis = ['0'] * num_attributes
print(hypothesis)
for j in range(0,num_attributes):
hypothesis[j] = a[0][j];
print("\n Find S: Finding a Maximally Specific Hypothesis\n")
for i in range(0,len(a)):
if a[i][num_attributes]=='yes':
for j in range(0,num_attributes):
if a[i][j]!=hypothesis[j]:
hypothesis[j]='?'
else :
hypothesis[j]= a[i][j]
print(" For Training instance No:{0} the hypothesis is
".format(i),hypothesis)
print("\n The Maximally Specific Hypothesis for a given Training Examples :\n")
print(hypothesis)
2. For a given set of a training data examples store in a .CSV file,
implement and demonstrate the candidate elimination algorithm output
and description of the set of all hypothesis consistent with the training
examples.

importnumpyas np
import pandas as pd
data=pd.DataFrame(data=pd.read_csv('trainingdata.csv'))
print(data)
concepts=np.array(data.iloc[:,0:-1])
print(concepts)
target=np.array(data.iloc[:,-1])
print(target)
deflearn(concepts, target):

'''
learn() function implements the learning method of the Candidate elimination
algorithm.
Arguments:
concepts - a data frame with all the features
target - a data frame with corresponding output values
'''

# Initialise S0 with the first instance from concepts


# .copy() makes sure a new list is created instead of just pointing to the same
memory location
specific_h= concepts[0].copy()
print("\nInitialization of specific_h and general_h")
print(specific_h)
#h=["#" for i in range(0,5)]
#print(h)

general_h= [["?" foriin range(len(specific_h))] foriin range(len(specific_h))]


print(general_h)
# The learning iterations
fori, h in enumerate(concepts):

# Checking if the hypothesis has a positive target


if target[i] == "Yes":
for x in range(len(specific_h)):

# Change values in S & G only if values change


if h[x] !=specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
# Checking if the hypothesis has a positive target
if target[i] == "No":
for x in range(len(specific_h)):
# For negative hyposthesis change values only in G
if h[x] !=specific_h[x]:
general_h[x][x] =specific_h[x]
else:
general_h[x][x] = '?'

print("\nSteps of Candidate Elimination Algorithm",i+1)


print(specific_h)
print(general_h)

# find indices where we have empty rows, meaning those that are unchanged
indices = [ifori, valin enumerate(general_h) ifval== ['?', '?', '?', '?', '?', '?']]
foriin indices:
# remove those rows from general_h
general_h.remove(['?', '?', '?', '?', '?', '?'])
# Return final values
returnspecific_h, general_h
In [6]:
s_final, g_final=learn(concepts, target)
print("\nFinalSpecific_h:", s_final, sep="\n")
print("\nFinalGeneral_h:", g_final, sep="\n")
3. Write a program to demonstrate the working of the decision tree based
ID3 algorithm use an appropriate data sets for building the decision tree and
apply the knowledge to classify new sample.

#Import libraries and read data using read_csv() function. Remove the
target from the data and store attributes in the features variable.

import pandas as pd
import math
import numpy as np

data = pd.read_csv("Dataset/4-dataset.csv")
features = [feat for feat in data]
features.remove("answer")

#Create a class named Node with four members children, value, isLeaf and pred.

class Node:
def __init__(self):
self.children = []
self.value = ""
self.isLeaf = False
self.pred = ""

#Define a function called entropy to find the entropy oof the dataset.

def entropy(examples):
pos = 0.0
neg = 0.0
for _, row in examples.iterrows():
if row["answer"] == "yes":
pos += 1
else:
neg += 1
if pos == 0.0 or neg == 0.0:
return 0.0
else:
p = pos / (pos + neg)
n = neg / (pos + neg)
return -(p * math.log(p, 2) + n * math.log(n, 2))

#Define a function named info_gain to find the gain of the attribute

def info_gain(examples, attr):


uniq = np.unique(examples[attr])
#print ("\n",uniq)
gain = entropy(examples)
#print ("\n",gain)
for u in uniq:
subdata = examples[examples[attr] == u]
#print ("\n",subdata)
sub_e = entropy(subdata)
gain -= (float(len(subdata)) / float(len(examples))) * sub_e
#print ("\n",gain)
return gain

#Define a function named ID3 to get the decision tree for the given dataset

def ID3(examples, attrs):


root = Node()

max_gain = 0
max_feat = ""
for feature in attrs:
#print ("\n",examples)
gain = info_gain(examples, feature)
if gain >max_gain:
max_gain = gain
max_feat = feature
root.value = max_feat
#print ("\nMax feature attr",max_feat)
uniq = np.unique(examples[max_feat])
#print ("\n",uniq)
for u in uniq:
#print ("\n",u)
subdata = examples[examples[max_feat] == u]
#print ("\n",subdata)
if entropy(subdata) == 0.0:
newNode = Node()
newNode.isLeaf = True
newNode.value = u
newNode.pred = np.unique(subdata["answer"])
root.children.append(newNode)
else:
dummyNode = Node()
dummyNode.value = u
new_attrs = attrs.copy()
new_attrs.remove(max_feat)
child = ID3(subdata, new_attrs)
dummyNode.children.append(child)
root.children.append(dummyNode)

return root
#Define a function named printTree to draw the decision tree

def printTree(root: Node, depth=0):


for i in range(depth):
print("\t", end="")
print(root.value, end="")
if root.isLeaf:
print(" -> ", root.pred)
print()
for child in root.children:
printTree(child, depth + 1)

#Define a function named classify to classify the new example

def classify(root: Node, new):


for child in root.children:
if child.value == new[root.value]:
if child.isLeaf:
print ("Predicted Label for new example", new," is:", child.pred)
exit
else:
classify (child.children[0], new)

#Finally, call the ID3, printTree and classify functions

root = ID3(data, features)


print("Decision Tree is:")
printTree(root)
print ("------------------")

new = {"outlook":"sunny", "temperature":"hot", "humidity":"normal",


"wind":"strong"}
classify (root, new)
4. Build an ANN by implementing the back propagation algorithm and test
the same use appropriate data set.

import random
from math import exp
from random import seed

# Initialize a network

definitialize_network(n_inputs, n_hidden, n_outputs):


network =list()
hidden_layer= [{'weights':[random.uniform(-0.5,0.5) foriin range(n_inputs+ 1)]}
foriin range(n_hidden)]
network.append(hidden_layer)
output_layer= [{'weights':[random.uniform(-0.5,0.5) foriin range(n_hidden+ 1)]}
foriin range(n_outputs)]
network.append(output_layer)
i= 1
print("\n The initialised Neural Network:\n")
for layer in network:
j=1
for sub in layer:
print("\n Layer[%d] Node[%d]:\n" %(i,j),sub)
j=j+1
i=i+1
return network

# Calculate neuron activation (net) for an input

defactivate(weights, inputs):
activation =weights[-1]
foriin range(len(weights)-1):
activation += weights[i] * inputs[i]
return activation

# Transfer neuron activation to sigmoid function


def transfer(activation):
return 1.0 / (1.0 + exp(-activation))

# Forward propagate input to a network output


defforward_propagate(network, row):
inputs = row
for layer in network:
new_inputs= []
for neuron in layer:
activation = activate(neuron['weights'], inputs)
neuron['output'] = transfer(activation)
new_inputs.append(neuron['output'])
inputs =new_inputs
return inputs

# Calculate the derivative of an neuron output


deftransfer_derivative(output):
return output * (1.0 - output)

# Backpropagate error and store in neurons


defbackward_propagate_error(network, expected):
foriin reversed(range(len(network))):
layer = network[i]
errors =list()

ifi!=len(network)-1:
for j in range(len(layer)):
error = 0.0
for neuron innetwork[i+ 1]:
error += (neuron['weights'][j] * neuron['delta'])
errors.append(error)
else:
for j in range(len(layer)):
neuron = layer[j]
errors.append(expected[j] - neuron['output'])

for j in range(len(layer)):
neuron = layer[j]
neuron['delta'] = errors[j] *transfer_derivative(neuron['output'])

# Update network weights with error


defupdate_weights(network, row, l_rate):
foriin range(len(network)):
inputs =row[:-1]
ifi!= 0:
inputs = [neuron['output'] for neuron innetwork[i- 1]]
for neuron in network[i]:
for j in range(len(inputs)):
neuron['weights'][j] +=l_rate* neuron['delta'] * inputs[j]
neuron['weights'][-1] +=l_rate* neuron['delta']

# Train a network for a fixed number of epochs


deftrain_network(network, train, l_rate, n_epoch, n_outputs):
print("\n Network Training Begins:\n")
for epoch in range(n_epoch):
sum_error= 0
for row in train:
outputs =forward_propagate(network, row)
expected = [0 foriin range(n_outputs)]
expected[row[-1]] = 1
sum_error+= sum([(expected[i]-outputs[i])**2 foriin range(len(expected))])
backward_propagate_error(network, expected)
update_weights(network, row, l_rate)
print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error))

print("\n Network Training Ends:\n")

#Test training backprop algorithm


seed(2)
dataset = [[2.7810836,2.550537003,0],
[1.465489372,2.362125076,0],
[3.396561688,4.400293529,0],
[1.38807019,1.850220317,0],
[3.06407232,3.005305973,0],
[7.627531214,2.759262235,1],
[5.332441248,2.088626775,1],
[6.922596716,1.77106367,1],
[8.675418651,-0.242068655,1],
[7.673756466,3.508563011,1]]

print("\n The input Data Set :\n",dataset)


n_inputs=len(dataset[0]) - 1
print("\n Number of Inputs :\n",n_inputs)
n_outputs=len(set([row[-1] for row in dataset]))
print("\n Number of Outputs :\n",n_outputs)

#Network Initialization
network =initialize_network(n_inputs, 2, n_outputs)

# Training the Network


train_network(network, dataset, 0.5, 20, n_outputs)

print("\n Final Neural Network :")

i= 1
for layer in network:
j=1
for sub in layer:
print("\n Layer[%d] Node[%d]:\n" %(i,j),sub)
j=j+1
i=i+1
5. Write a program to implement the Naïve Bayesian classifier for a sample training
data set stored as a csv file compute the accuracy of a classifier considering few
test data set.

import csv
import random
import math

# 1.Data Handling
# 1.1 Loading the Data from csv file of Pima indians diabetes dataset.
defloadcsv(filename):
lines =csv.reader(open(filename, "r"))
dataset = list(lines)
foriin range(len(dataset)):
# converting the attributes from string to floating point numbers
dataset[i] = [float(x) for x in dataset[i]]
return dataset

#1.2 Splitting the Data set into Training Set


defsplitDataset(dataset, splitRatio):
trainSize= int(len(dataset) *splitRatio)
trainSet= []
copy = list(dataset)
whilelen(trainSet) <trainSize:
index =random.randrange(len(copy)) # random index
trainSet.append(copy.pop(index))
return [trainSet, copy]

#2.Summarize Data
#The naive bayes model is comprised of a
#summary of the data in the training dataset.
#This summary is then used when making predictions.
#involves the mean and the standard deviation for each attribute, by class value

#2.1: Separate Data By Class


#Function to categorize the dataset in terms of classes
#The function assumes that the last attribute (-1) is the class value.
#The function returns a map of class values to lists of data instances.
defseparateByClass(dataset):
separated = {}
foriin range(len(dataset)):
vector = dataset[i]
if (vector[-1] notin separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated
#The mean is the central middle or central tendency of the data,
# and we will use it as the middle of our gaussian distribution
# when calculating probabilities

#2.2 : Calculate Mean


def mean(numbers):
return sum(numbers)/float(len(numbers))

#The standard deviation describes the variation of spread of the data,


#and we will use it to characterize the expected spread of each attribute
#in our Gaussian distribution when calculating probabilities.

#2.3 : Calculate Standard Deviation


defstdev(numbers):
avg= mean(numbers)
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
returnmath.sqrt(variance)

#2.4 : Summarize Dataset


#Summarize Data Set for a list of instances (for a class value)
#The zip function groups the values for each attribute across our data instances
#into their own lists so that we can compute the mean and standard deviation values
#for the attribute.

def summarize(dataset):
summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]
delsummaries[-1]
return summaries

#2.5 : Summarize Attributes By Class


#We can pull it all together by first separating our training dataset into
#instances grouped by class.Then calculate the summaries for each attribute.

defsummarizeByClass(dataset):
separated =separateByClass(dataset)
summaries = {}
forclassValue, instances inseparated.items():
summaries[classValue] = summarize(instances)
return summaries

#3.Make Prediction
#3.1 Calculate Probaility Density Function
defcalculateProbability(x, mean, stdev):
exponent =math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
return (1 / (math.sqrt(2*math.pi) *stdev)) * exponent
#3.2 Calculate Class Probabilities
defcalculateClassProbabilities(summaries, inputVector):
probabilities = {}
forclassValue, classSummariesinsummaries.items():
probabilities[classValue] = 1
foriin range(len(classSummaries)):
mean, stdev=classSummaries[i]
x =inputVector[i]
probabilities[classValue] *=calculateProbability(x, mean, stdev)
return probabilities

#3.3 Prediction : look for the largest probability and return the associated class
defpredict(summaries, inputVector):
probabilities =calculateClassProbabilities(summaries, inputVector)
bestLabel, bestProb=None, -1
forclassValue, probability inprobabilities.items():
ifbestLabelisNoneor probability >bestProb:
bestProb= probability
bestLabel=classValue
returnbestLabel

#4.Make Predictions
# Function which return predictions for list of predictions
# For each instance

defgetPredictions(summaries, testSet):
predictions = []
foriin range(len(testSet)):
result =predict(summaries, testSet[i])
predictions.append(result)
return predictions

#5. Computing Accuracy


defgetAccuracy(testSet, predictions):
correct = 0
foriin range(len(testSet)):
iftestSet[i][-1] == predictions[i]:
correct += 1
return (correct/float(len(testSet))) * 100.0

#Main Function
defmain():
filename = 'C:\\Users\\Dr.Thyagaraju\\Desktop\\Data\\pima-indians-diabetes.csv'
splitRatio= 0.67
dataset =loadcsv(filename)

#print("\n The Data Set :\n",dataset)


print("\n The length of the Data Set : ",len(dataset))

print("\n The Data Set Splitting into Training and Testing \n")
trainingSet, testSet=splitDataset(dataset, splitRatio)

print('\n Number of Rows in Training Set:{0} rows'.format(len(trainingSet)))


print('\n Number of Rows in Testing Set:{0} rows'.format(len(testSet)))

print("\n First Five Rows of Training Set:\n")


foriinrange(0,5):
print(trainingSet[i],"\n")

print("\n First Five Rows of Testing Set:\n")


foriinrange(0,5):
print(testSet[i],"\n")

# prepare model
summaries =summarizeByClass(trainingSet)
print("\n Model Summaries:\n",summaries)

# test model
predictions =getPredictions(summaries, testSet)
print("\nPredictions:\n",predictions)

accuracy =getAccuracy(testSet, predictions)


print('\n Accuracy: {0}%'.format(accuracy))
main()
6. Assuming a set of document that need to be classify use the naïve
Bayesian classifier model to perform the task. Build an java classes/API can
be used to write the program. Calculate the accuracy, precision and recall for
our data sets.

importpandasaspd
msg=pd.read_csv('document.csv',names=['message','label'])
print("Total Instances of Dataset: ",msg.shape[0])
msg['labelnum']=msg.label.map({'pos':1,'neg':0})

X=msg.message
y=msg.labelnum
fromsklearn.model_selectionimporttrain_test_split
Xtrain,Xtest,ytrain,ytest=train_test_split(X,y)
fromsklearn.feature_extraction.textimportCountVectorizer

count_v=CountVectorizer()
Xtrain_dm=count_v.fit_transform(Xtrain)
Xtest_dm=count_v.transform(Xtest)

fromsklearn.naive_bayesimportMultinomialNB
clf=MultinomialNB()
clf.fit(Xtrain_dm, ytrain)
pred=clf.predict(Xtest_dm)
In [5]:
for doc, p inzip(Xtrain, pred):
p = 'pos' if p == 1 else 'neg'
print("%s -> %s" % (doc, p))

fromsklearn.metricsimportaccuracy_score,confusion_matrix,precision_score,recall_
score
print('Accuracy Metrics: \n')
print('Accuracy: ',accuracy_score(ytest,pred))
print('Recall: ',recall_score(ytest,pred))
print('Precision: ',precision_score(ytest,pred))
print('Confusion Matrix: \n',confusion_matrix(ytest,pred))
7. Write a program to construct a Bayesian network considering medical data
use this model to demonstrate the diagnosis of heart patients using standard
heart disease data set. You can use python ML library API?

import pandas as pd
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination

data = pd.read_csv("ds4.csv")
heart_disease = pd.DataFrame(data)
print(heart_disease)

model = BayesianModel([
('age', 'Lifestyle'),
('Gender', 'Lifestyle'),
('Family', 'heartdisease'),
('diet', 'cholestrol'),
('Lifestyle', 'diet'),
('cholestrol', 'heartdisease'),
('diet', 'cholestrol')
])

model.fit(heart_disease, estimator=MaximumLikelihoodEstimator)

HeartDisease_infer = VariableElimination(model)

print('For Age enter SuperSeniorCitizen:0, SeniorCitizen:1, MiddleAged:2, Youth:3,


Teen:4')
print('For Gender enter Male:0, Female:1')
print('For Family History enter Yes:1, No:0')
print('For Diet enter High:0, Medium:1')
print('for LifeStyle enter Athlete:0, Active:1, Moderate:2, Sedentary:3')
print('for Cholesterol enter High:0, BorderLine:1, Normal:2')

q = HeartDisease_infer.query(variables=['heartdisease'], evidence={
'age': int(input('Enter Age: ')),
'Gender': int(input('Enter Gender: ')),
'Family': int(input('Enter Family History: ')),
'diet': int(input('Enter Diet: ')),
'Lifestyle': int(input('Enter Lifestyle: ')),
'cholestrol': int(input('Enter Cholestrol: '))
})

print(q)
"""
Output:

age Gender Family diet Lifestyle cholestrol heartdisease


0 0 0 1 1 3 0 1
1 0 1 1 1 3 0 1
2 1 0 0 0 2 1 1
3 4 0 1 1 3 2 0
4 3 1 1 0 0 2 0
5 2 0 1 1 1 0 1
6 4 0 1 0 2 0 1
7 0 0 1 1 3 0 1
8 3 1 1 0 0 2 0
9 1 1 0 0 0 2 1
10 4 1 0 1 2 0 1
11 4 0 1 1 3 2 0
12 2 1 0 0 0 0 0
13 2 0 1 1 1 0 1
14 3 1 1 0 0 1 0
15 0 0 1 0 0 2 1
16 1 1 0 1 2 1 1
17 3 1 1 1 0 1 0
18 4 0 1 1 3 2 0
For Age enter SuperSeniorCitizen:0, SeniorCitizen:1, MiddleAged:2, Youth:3, Teen:4
For Gender enter Male:0, Female:1
For Family History enter Yes:1, No:0
For Diet enter High:0, Medium:1
for LifeStyle enter Athlete:0, Active:1, Moderate:2, Sedentary:3
for Cholesterol enter High:0, BorderLine:1, Normal:2
Enter Age: 0
Enter Gender: 0
Enter Family History: 0
Enter Diet: 0
Enter Lifestyle: 3
Enter Cholestrol: 0
+-----------------+---------------------+
| heartdisease | phi(heartdisease) |
+=================+=====================+
| heartdisease(0) | 0.5000 |
+-----------------+---------------------+
| heartdisease(1) | 0.5000 |
+-----------------+---------------------+
Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]
8. Apply EM algorithm to cluster a set of data stored in .csv file use the same
data set for clustering using K-means algorithm. Compare the result of this
two algorithm and comment on the quality of clustering. You can add python
ML library classes/API in the program?

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# MACHINE LEARNING LAB - 8 ( k-Means Algorithm )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**8. Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same
data\n",
"set for clustering using k-Means algorithm. Compare the results of these two\n",
"algorithms and comment on the quality of clustering. You can add Java/Python ML\n",
"library classes/API in the program.**"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.cluster import KMeans\n",
"from sklearn import preprocessing\n",
"from sklearn.mixture import GaussianMixture\n",
"from sklearn.datasets import load_iris\n",
"import sklearn.metrics as sm\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"dataset=load_iris()\n",
"# print(dataset)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"X=pd.DataFrame(dataset.data)\n",
"X.columns=['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']\n",
"y=pd.DataFrame(dataset.target)\n",
"y.columns=['Targets']\n",
"# print(X)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, 'GMM Classification')"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png":
"text/plain": [
"<Figure size 1008x504 with 3 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(14,7))\n",
"colormap=np.array(['red','lime','black'])\n",
"\n",
"# REAL PLOT\n",
"plt.subplot(1,3,1)\n",
"plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y.Targets],s=40)\n",
"plt.title('Real')\n",
"\n",
"# K-PLOT\n",
"plt.subplot(1,3,2)\n",
"model=KMeans(n_clusters=3)\n",
"model.fit(X)\n",
"predY=np.choose(model.labels_,[0,1,2]).astype(np.int64)\n",
"plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[predY],s=40)\n",
"plt.title('KMeans')\n",
"\n",
"# GMM PLOT\n",
"scaler=preprocessing.StandardScaler()\n",
"scaler.fit(X)\n",
"xsa=scaler.transform(X)\n",
"xs=pd.DataFrame(xsa,columns=X.columns)\n",
"gmm=GaussianMixture(n_components=3)\n",
"gmm.fit(xs)\n",
"y_cluster_gmm=gmm.predict(xs)\n",
"plt.subplot(1,3,3)\n",
"plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y_cluster_gmm],s=40)\n",
"plt.title('GMM Classification')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
9. Write a program to implement K-nearest neighbor algorithm to classify the
iris data set. Print both correct and incorrect prediction. Python ML library
classes can be used for this problem?
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# MACHINE LEARNING LAB - 9 ( k-Nearest Neighbour Algorithm )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**9. Write a program to implement k-Nearest Neighbour algorithm to classify the
iris\n",
"data set. Print both correct and wrong predictions. Java/Python ML library classes
can\n",
"be used for this problem.**"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import load_iris\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.model_selection import train_test_split\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"dataset=load_iris()\n",
"#print(dataset)\n",

"X_train,X_test,y_train,y_test=train_test_split(dataset[\"data\"],dataset[\"target\"],
random_state=0)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
" metric_params=None, n_jobs=None, n_neighbors=1, p=2,\n",
" weights='uniform')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"kn=KNeighborsClassifier(n_neighbors=1)\n",
"kn.fit(X_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TARGET= 2 virginica PREDICTED= [2] ['virginica']\n",
"TARGET= 1 versicolor PREDICTED= [1] ['versicolor']\n",
"TARGET= 0 setosa PREDICTED= [0] ['setosa']\n",
"TARGET= 2 virginica PREDICTED= [2] ['virginica']\n",
"TARGET= 0 setosa PREDICTED= [0] ['setosa']\n",
"TARGET= 2 virginica PREDICTED= [2] ['virginica']\n",
"TARGET= 0 setosa PREDICTED= [0] ['setosa']\n",
"TARGET= 1 versicolor PREDICTED= [1] ['versicolor']\n",
"TARGET= 1 versicolor PREDICTED= [1] ['versicolor']\n",
"TARGET= 1 versicolor PREDICTED= [1] ['versicolor']\n",
"TARGET= 2 virginica PREDICTED= [2] ['virginica']\n",
"TARGET= 1 versicolor PREDICTED= [1] ['versicolor']\n",
"TARGET= 1 versicolor PREDICTED= [1] ['versicolor']\n",
"TARGET= 1 versicolor PREDICTED= [1] ['versicolor']\n",
"TARGET= 1 versicolor PREDICTED= [1] ['versicolor']\n",
"TARGET= 0 setosa PREDICTED= [0] ['setosa']\n",
"TARGET= 1 versicolor PREDICTED= [1] ['versicolor']\n",
"TARGET= 1 versicolor PREDICTED= [1] ['versicolor']\n",
"TARGET= 0 setosa PREDICTED= [0] ['setosa']\n",
"TARGET= 0 setosa PREDICTED= [0] ['setosa']\n",
"TARGET= 2 virginica PREDICTED= [2] ['virginica']\n",
"TARGET= 1 versicolor PREDICTED= [1] ['versicolor']\n",
"TARGET= 0 setosa PREDICTED= [0] ['setosa']\n",
"TARGET= 0 setosa PREDICTED= [0] ['setosa']\n",
"TARGET= 2 virginica PREDICTED= [2] ['virginica']\n",
"TARGET= 0 setosa PREDICTED= [0] ['setosa']\n",
"TARGET= 0 setosa PREDICTED= [0] ['setosa']\n",
"TARGET= 1 versicolor PREDICTED= [1] ['versicolor']\n",
"TARGET= 1 versicolor PREDICTED= [1] ['versicolor']\n",
"TARGET= 0 setosa PREDICTED= [0] ['setosa']\n",
"TARGET= 2 virginica PREDICTED= [2] ['virginica']\n",
"TARGET= 1 versicolor PREDICTED= [1] ['versicolor']\n",
"TARGET= 0 setosa PREDICTED= [0] ['setosa']\n",
"TARGET= 2 virginica PREDICTED= [2] ['virginica']\n",
"TARGET= 2 virginica PREDICTED= [2] ['virginica']\n",
"TARGET= 1 versicolor PREDICTED= [1] ['versicolor']\n",
"TARGET= 0 setosa PREDICTED= [0] ['setosa']\n",
"TARGET= 1 versicolor PREDICTED= [2] ['virginica']\n",
"0.9736842105263158\n"
]
}
],
"source": [
"for i in range(len(X_test)):\n",
" x=X_test[i]\n",
" x_new=np.array([x])\n",
" prediction=kn.predict(x_new)\n",
"
print(\"TARGET=\",y_test[i],dataset[\"target_names\"][y_test[i]],\"PREDICTED=\",pr
ediction,dataset[\"target_names\"][prediction])\n",
"print(kn.score(X_test,y_test))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
10. Implement the non parametric locally weighted regression algorithm in
order to fit data points. Select appropriate date sets for your experiment and
draw graph?
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# MACHINE LEARNING LAB - 10 ( Locally Weighted Regression Algorithm )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**10. Implement the non-parametric Locally Weighted Regression Algorithm in
order to\n",
"fit data points. Select appropriate data set for your experiment and draw
graphs.**"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from math import ceil\n",
"import numpy as np\n",
"from scipy import linalg"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def lowess(x, y, f, iterations):\n",
" n = len(x)\n",
" r = int(ceil(f * n))\n",
" h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]\n",
" w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)\n",
" w = (1 - w ** 3) ** 3\n",
" yest = np.zeros(n)\n",
" delta = np.ones(n)\n",
" for iteration in range(iterations):\n",
" for i in range(n):\n",
" weights = delta * w[:, i]\n",
" b = np.array([np.sum(weights * y), np.sum(weights * y * x)])\n",
" A = np.array([[np.sum(weights), np.sum(weights * x)],[np.sum(weights * x),
np.sum(weights * x * x)]])\n",
" beta = linalg.solve(A, b)\n",
" yest[i] = beta[0] + beta[1] * x[i]\n",
"\n",
" residuals = y - yest\n",
" s = np.median(np.abs(residuals))\n",
" delta = np.clip(residuals / (6.0 * s), -1, 1)\n",
" delta = (1 - delta ** 2) ** 2\n",
"\n",
" return yest"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x37459696d8>]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png":
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"import math\n",
"n = 100\n",
"x = np.linspace(0, 2 * math.pi, n)\n",
"y = np.sin(x) + 0.3 * np.random.randn(n)\n",
"f =0.25\n",
"iterations=3\n",
"yest = lowess(x, y, f, iterations)\n",
" \n",
"import matplotlib.pyplot as plt\n",
"plt.plot(x,y,\"r.\")\n",
"plt.plot(x,yest,\"b-\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

You might also like