0% found this document useful (0 votes)
3 views11 pages

ML Lab

The document contains Python code for machine learning tasks including data loading, implementing a learning algorithm, decision tree creation, and a simple neural network. It demonstrates the use of concepts like specific and general hypotheses in learning, entropy for decision trees, and backpropagation in neural networks. The code also includes functions for data preprocessing, probability calculation, and predictions based on a Naive Bayes classifier.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views11 pages

ML Lab

The document contains Python code for machine learning tasks including data loading, implementing a learning algorithm, decision tree creation, and a simple neural network. It demonstrates the use of concepts like specific and general hypotheses in learning, entropy for decision trees, and backpropagation in neural networks. The code also includes functions for data preprocessing, probability calculation, and predictions based on a Naive Bayes classifier.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 11

2/4/25, 4:18 PM ML.

ipynb - Colab

import numpy as np
import pandas as pd

data = pd.read_csv('/content/sample_data/2.csv')
concepts = np.array(data.iloc[:,0:-1])
target = np.array(data.iloc[:,-1])
def learn(concepts, target):
specific_h = concepts[0].copy()
print("initialization of specific_h \n",specific_h)
general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
print("initialization of general_h \n", general_h)

for i, h in enumerate(concepts):
if target[i] == "yes":
print("If instance is Positive ")
for x in range(len(specific_h)):
if h[x]!= specific_h[x]:
specific_h[x] ='?'
general_h[x][x] ='?'

if target[i] == "no":
print("If instance is Negative ")
for x in range(len(specific_h)):
if h[x]!= specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'

print(" step {}".format(i+1))


print(specific_h)
print(general_h)
print("\n")
print("\n")

indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]
for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
return specific_h, general_h

s_final, g_final = learn(concepts, target)

print("Final Specific_h:", s_final, sep="\n")


print("Final General_h:", g_final, sep="\n")

'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?']]

'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?']]

'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?']]

'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?']]

'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?']]

'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?']]

import numpy as np
https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 1/11
2/4/25, 4:18 PM ML.ipynb - Colab
import numpy as np
import math
import csv

def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile, delimiter=',')
headers = next(datareader)
metadata = []
traindata = []
for name in headers:
metadata.append(name)
for row in datareader:
traindata.append(row)

return (metadata, traindata)

class Node:
def __init__(self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""

def __str__(self):
return self.attribute
def subtables(data, col, delete):
dict = {}
items = np.unique(data[:, col])
count = np.zeros((items.shape[0], 1), dtype=np.int32)

for x in range(items.shape[0]):
for y in range(data.shape[0]):
if data[y, col] == items[x]:
count[x] += 1

for x in range(items.shape[0]):
dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32")
pos = 0
for y in range(data.shape[0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1
if delete:
dict[items[x]] = np.delete(dict[items[x]], col, 1)

return items, dict


def entropy(S):
items = np.unique(S)

if items.size == 1:
return 0

counts = np.zeros((items.shape[0], 1))


sums = 0

for x in range(items.shape[0]):
counts[x] = sum(S == items[x]) / (S.size * 1.0)

for count in counts:


sums += -1 * count * math.log(count, 2)
return sums

def gain_ratio(data, col):


items, dict = subtables(data, col, delete=False)

total_size = data.shape[0]
entropies = np.zeros((items.shape[0], 1))
intrinsic = np.zeros((items.shape[0], 1))

for x in range(items.shape[0]):
ratio = dict[items[x]].shape[0]/(total_size * 1.0)
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * math.log(ratio, 2)

total_entropy = entropy(data[:, -1])


iv = -1 * sum(intrinsic)

for x in range(entropies.shape[0]):
total_entropy -= entropies[x]

return total_entropy / iv
def create_node(data, metadata):
if (np.unique(data[:, -1])).shape[0] == 1:
d N d ("")
https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 2/11
2/4/25, 4:18 PM ML.ipynb - Colab
node = Node("")
node.answer = np.unique(data[:, -1])[0]
return node

gains = np.zeros((data.shape[1] - 1, 1))

for col in range(data.shape[1] - 1):


gains[col] = gain_ratio(data, col)

split = np.argmax(gains)

node = Node(metadata[split])
metadata = np.delete(metadata, split, 0)

items, dict = subtables(data, split, delete=True)

for x in range(items.shape[0]):
child = create_node(dict[items[x]], metadata)
node.children.append((items[x], child))

return node
def empty(size):
s = ""
for x in range(size):
s += " "
return s

def print_tree(node, level):


if node.answer != "":
print(empty(level), node.answer)
return
print(empty(level), node.attribute)
for value, n in node.children:
print(empty(level + 1), value)
print_tree(n, level + 2)
metadata, traindata = read_data("/content/sample_data/tennisdata.csv")
data = np.array(traindata)
node = create_node(data, metadata)
print_tree(node, 0)

Outlook
Overcast
b'Yes'
Rainy
Windy
b'False'
b'Yes'
b'True'
b'No'
Sunny
Humidity
b'High'
b'No'
b'Normal'
b'Yes'
<ipython-input-8-efc07287c0d1>:37: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will erro
dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32")
<ipython-input-8-efc07287c0d1>:60: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will erro
sums += -1 * count * math.log(count, 2)

import numpy as np

X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)


y = np.array(([92], [86], [89]), dtype=float)
X = X/np.amax(X,axis=0) #maximum of X array longitudinally
y = y/100

#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))

#Derivative of Sigmoid Function


def derivatives_sigmoid(x):
return x * (1 - x)

#Variable initialization
epoch=5 #Setting training iterations
lr=0.1 #Setting learning rate

inputlayer_neurons = 2 #number of features in data set


hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer
#weight and bias initialization
https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 3/11
2/4/25, 4:18 PM ML.ipynb - Colab

wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))

#draws a random range of numbers uniformly of dim x*y


for i in range(epoch):
#Forward Propogation
hinp1=np.dot(X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp= outinp1+bout
output = sigmoid(outinp)

#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO * outgrad
EH = d_output.dot(wout.T)
hiddengrad = derivatives_sigmoid(hlayer_act)#how much hidden layer wts contributed to error
d_hiddenlayer = EH * hiddengrad

wout += hlayer_act.T.dot(d_output) *lr # dotproduct of nextlayererror and currentlayerop


wh += X.T.dot(d_hiddenlayer) *lr

print ("-----------Epoch-", i+1, "Starts----------")


print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)
print ("-----------Epoch-", i+1, "Ends----------\n")

print("Input: \n" + str(X))


print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)

-----------Epoch- 3 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.83892115]
[0.81725673]
[0.83411988]]
-----------Epoch- 3 Ends----------

-----------Epoch- 4 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]

https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 4/11
2/4/25, 4:18 PM ML.ipynb - Colab
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.8401741 ]
[0.81850337]
[0.83537874]]

import csv
import random
import math

def loadcsv(filename):
lines = csv.reader(open(filename, "r"));
dataset = list(lines)
for i in range(len(dataset)):
#converting strings into numbers for processing
dataset[i] = [float(x) for x in dataset[i]]

return dataset

def splitdataset(dataset, splitratio):


#67% training size
trainsize = int(len(dataset) * splitratio);
trainset = []
copy = list(dataset);
while len(trainset) < trainsize:
#generate indices for the dataset list randomly to pick ele for training data
index = random.randrange(len(copy));
trainset.append(copy.pop(index))
return [trainset, copy]

def separatebyclass(dataset):
separated = {} #dictionary of classes 1 and 0
#creates a dictionary of classes 1 and 0 where the values are
#the instances belonging to each class
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated

def mean(numbers):
return sum(numbers)/float(len(numbers))

def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
return math.sqrt(variance)

def summarize(dataset): #creates a dictionary of classes


summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)];
del summaries[-1] #excluding labels +ve or -ve
return summaries

def summarizebyclass(dataset):
separated = separatebyclass(dataset);
#print(separated)
summaries = {}
for classvalue, instances in separated.items():
#for key,value in dic.items()
#summaries is a dic of tuples(mean,std) for each class value
summaries[classvalue] = summarize(instances) #summarize is used to cal to mean and std
return summaries

def calculateprobability(x, mean, stdev):


exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
return (1 / (math.sqrt(2*math.pi) * stdev)) * exponent

def calculateclassprobabilities(summaries, inputvector):


probabilities = {} # probabilities contains the all prob of all class of test data
for classvalue, classsummaries in summaries.items():#class and attribute information as mean and sd
probabilities[classvalue] = 1
for i in range(len(classsummaries)):
mean, stdev = classsummaries[i] #take mean and sd of every attribute for class 0 and 1 seperaely
x = inputvector[i] #testvector's first attribute
probabilities[classvalue] *= calculateprobability(x, mean, stdev);#use normal dist
return probabilities

def predict(summaries, inputvector): #training and test data is passed


probabilities = calculateclassprobabilities(summaries, inputvector)

https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 5/11
2/4/25, 4:18 PM ML.ipynb - Colab
bestLabel, bestProb = None, -1
for classvalue, probability in probabilities.items():#assigns that class which has he highest prob
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classvalue
return bestLabel

def getpredictions(summaries, testset):


predictions = []
for i in range(len(testset)):
result = predict(summaries, testset[i])
predictions.append(result)
return predictions

def getaccuracy(testset, predictions):


correct = 0
for i in range(len(testset)):
if testset[i][-1] == predictions[i]:
correct += 1
return (correct/float(len(testset))) * 100.0

def main():
filename = '/content/sample_data/5-dataset.csv'
splitratio = 0.67
dataset = loadcsv(filename);

trainingset, testset = splitdataset(dataset, splitratio)


print('Split {0} rows into train={1} and test={2} rows'.format(len(dataset), len(trainingset), len(testset)))
# prepare model
summaries = summarizebyclass(trainingset);
#print(summaries)
# test model
predictions = getpredictions(summaries, testset) #find the predictions of test data with the training data
accuracy = getaccuracy(testset, predictions)
print('Accuracy of the classifier is : {0}%'.format(accuracy))

main()

Split 767 rows into train=513 and test=254 rows


Accuracy of the classifier is : 74.40944881889764%

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics

msg=pd.read_csv('/content/sample_data/docu.csv',names=['message','label'])

print('The dimensions of the dataset',msg.shape)

msg['labelnum']=msg.label.map({'pos':1,'neg':0})
X=msg.message
y=msg.labelnum

#splitting the dataset into train and test data


xtrain,xtest,ytrain,ytest=train_test_split(X,y)
print ('\n the total number of Training Data :',ytrain.shape)
print ('\n the total number of Test Data :',ytest.shape)

#output the words or Tokens in the text documents


cv = CountVectorizer()
xtrain_dtm = cv.fit_transform(xtrain)
xtest_dtm=cv.transform(xtest)
print('\n The words or Tokens in the text documents \n')
# Replacing get_feature_names with get_feature_names_out
print(cv.get_feature_names_out())
# Replacing get_feature_names with get_feature_names_out
df=pd.DataFrame(xtrain_dtm.toarray(),columns=cv.get_feature_names_out())

# Training Naive Bayes (NB) classifier on training data.


clf = MultinomialNB().fit(xtrain_dtm,ytrain)
predicted = clf.predict(xtest_dtm)

#printing accuracy, Confusion matrix, Precision and Recall

The dimensions of the dataset (19, 2)

the total number of Training Data : (14,)

https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 6/11
2/4/25, 4:18 PM ML.ipynb - Colab
the total number of Test Data : (5,)

The words or Tokens in the text documents

['about' 'am' 'an' 'and' 'awesome' 'bad' 'beers' 'boss' 'can' 'dance'
'deal' 'do' 'enemy' 'feel' 'fun' 'good' 'have' 'he' 'horrible' 'house'
'is' 'like' 'locality' 'love' 'my' 'not' 'of' 'place' 'restaurant'
'sandwich' 'sick' 'stay' 'stuff' 'sworn' 'that' 'these' 'this' 'tired'
'to' 'today' 'tomorrow' 'very' 'view' 'we' 'went' 'what' 'will' 'with']

Start coding or generate with AI.

from pgmpy.models import BayesianNetwork


from pgmpy.inference import VariableElimination
from pgmpy.factors.discrete import TabularCPD

# Define the structure of the Bayesian Network


model = BayesianNetwork([
('Corona', 'Fever'),
('Corona', 'Cough'),
('Corona', 'Breathlessness'),
('Corona', 'Fatigue'),
('Corona', 'TestResult')
])

# Define the CPDs (Conditional Probability Distributions)


cpd_corona = TabularCPD(variable='Corona', variable_card=2, values=[[0.99], [0.01]])

cpd_fever = TabularCPD(variable='Fever', variable_card=2,


values=[[0.90, 0.30], [0.10, 0.70]],
evidence=['Corona'], evidence_card=[2])

cpd_cough = TabularCPD(variable='Cough', variable_card=2,


values=[[0.85, 0.20], [0.15, 0.80]],
evidence=['Corona'], evidence_card=[2])

cpd_breathlessness = TabularCPD(variable='Breathlessness', variable_card=2,


values=[[0.80, 0.10], [0.20, 0.90]],
evidence=['Corona'], evidence_card=[2])

cpd_fatigue = TabularCPD(variable='Fatigue', variable_card=2,


values=[[0.75, 0.15], [0.25, 0.85]],
evidence=['Corona'], evidence_card=[2])

cpd_test_result = TabularCPD(variable='TestResult', variable_card=2,


values=[[0.95, 0.05], [0.05, 0.95]],
evidence=['Corona'], evidence_card=[2])

# Add CPDs to the model


model.add_cpds(cpd_corona, cpd_fever, cpd_cough, cpd_breathlessness, cpd_fatigue, cpd_test_result)

# Check if the model is valid


assert model.check_model(), "Model structure or CPDs are inconsistent!"

# Perform inference
inference = VariableElimination(model)

# Query: Probability of having Corona given Fever and Cough


result = inference.query(variables=['Corona'], evidence={'Fever': 1, 'Cough': 1})
print(result)

+-----------+---------------+
| Corona | phi(Corona) |
+===========+===============+
| Corona(0) | 0.7262 |
+-----------+---------------+
| Corona(1) | 0.2738 |
+-----------+---------------+

import matplotlib.pyplot as plt


from sklearn import datasets
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np

# import some data to play with


iris = datasets.load_iris()
X = pd.DataFrame(iris.data)
X.columns = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y = pd.DataFrame(iris.target)
y.columns = ['Targets']

https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 7/11
2/4/25, 4:18 PM ML.ipynb - Colab

# Build the K Means Model


model = KMeans(n_clusters=3)
model.fit(X) # model.labels_ : Gives cluster no for which samples belongs to

# # Visualise the clustering results


plt.figure(figsize=(14,7))
colormap = np.array(['red', 'lime', 'black'])

# Plot the Original Classifications using Petal features


plt.subplot(1, 3, 1)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y.Targets], s=40)
plt.title('Real Clusters')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')

# Plot the Models Classifications


plt.subplot(1, 3, 2)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[model.labels_], s=40)
plt.title('K-Means Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')

# General EM for GMM


from sklearn import preprocessing

# transform your data such that its distribution will have a # mean value 0 and standard deviation of 1.
scaler = preprocessing.StandardScaler()
scaler.fit(X)
xsa = scaler.transform(X)
xs = pd.DataFrame(xsa, columns = X.columns)
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(n_components=40)
gmm.fit(xs)
plt.subplot(1, 3, 3)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[0], s=40)
plt.title('GMM Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')

print('Observation: The GMM using EM algorithm based clustering matched the true labels more closely than the Kmeans.')

vation: The GMM using EM algorithm based clustering matched the true labels more closely than the Kmeans.

from sklearn.datasets import load_iris


from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import numpy as np

https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 8/11
2/4/25, 4:18 PM ML.ipynb - Colab

dataset=load_iris()
#print(dataset)
X_train,X_test,y_train,y_test=train_test_split(dataset["data"],dataset["target"],random_state=0)

kn=KNeighborsClassifier(n_neighbors=1)
kn.fit(X_train,y_train)

for i in range(len(X_test)):
x=X_test[i]
x_new=np.array([x])
prediction=kn.predict(x_new)
print("TARGET=",y_test[i],dataset["target_names"][y_test[i]],"PREDICTED=",prediction,dataset["target_names"][prediction])
print(kn.score(X_test,y_test))

TARGET= 2 virginica PREDICTED= [2] ['virginica']


TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 1 versicolor PREDICTED= [2] ['virginica']
0.9736842105263158

from math import ceil


import numpy as np
from scipy import linalg

def lowess(x, y, f, iterations):


n = len(x)
r = int(ceil(f * n))
h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
w = (1 - w ** 3) ** 3
yest = np.zeros(n)
delta = np.ones(n)
for iteration in range(iterations):
for i in range(n):
weights = delta * w[:, i]
b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
A = np.array([[np.sum(weights), np.sum(weights * x)],[np.sum(weights * x), np.sum(weights * x * x)]])
beta = linalg.solve(A, b)
yest[i] = beta[0] + beta[1] * x[i]

residuals = y - yest
s = np.median(np.abs(residuals))
delta = np.clip(residuals / (6.0 * s), -1, 1)
delta = (1 - delta ** 2) ** 2

return yest

import math
n = 100
x = np.linspace(0, 2 * math.pi, n)

https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 9/11
2/4/25, 4:18 PM ML.ipynb - Colab
y = np.sin(x) + 0.3 * np.random.randn(n)
f =0.25
iterations=3
yest = lowess(x, y, f, iterations)

import matplotlib.pyplot as plt


plt.plot(x,y,"r.")
plt.plot(x,yest,"b-")

[<matplotlib.lines.Line2D at 0x7bcdf96277d0>]

import csv
hypo = ['%','%','%','%','%','%'];

with open('/content/sample_data/tennisdata.csv') as csv_file:


readcsv = csv.reader(csv_file, delimiter=',')
print(readcsv)

data = []
print("\nThe given training examples are:")
for row in readcsv:
print(row)
if row[len(row)-1].upper() == "YES":
data.append(row)

print("\nThe positive examples are:");


for x in data:
print(x);
print("\n");

TotalExamples = len(data);
i=0;
j=0;
k=0;
print("The steps of the Find-s algorithm are :\n",hypo);
list = [];
p=0;
d=len(data[p])-1;
for j in range(d):
list.append(data[i][j]);
hypo=list;
i=1;
for i in range(TotalExamples):
for k in range(d):
if hypo[k]!=data[i][k]:
hypo[k]='?';
k=k+1;
else:
hypo[k];
print(hypo);
i=i+1;

print("\nThe maximally specific Find-s hypothesis for the given training examples is :");
list=[];
for i in range(d):
list.append(hypo[i]);
print(list);

<_csv.reader object at 0x7bcdfc357ed0>

https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 10/11
2/4/25, 4:18 PM ML.ipynb - Colab
The given training examples are:
['Outlook', 'Temperature', 'Humidity', 'Windy', 'PlayTennis']
['Sunny', 'Hot', 'High', 'False', 'No']
['Sunny', 'Hot', 'High', 'True', 'No']
['Overcast', 'Hot', 'High', 'False', 'Yes']
['Rainy', 'Mild', 'High', 'False', 'Yes']
['Rainy', 'Cool', 'Normal', 'False', 'Yes']
['Rainy', 'Cool', 'Normal', 'True', 'No']
['Overcast', 'Cool', 'Normal', 'True', 'Yes']
['Sunny', 'Mild', 'High', 'False', 'No']
['Sunny', 'Cool', 'Normal', 'False', 'Yes']
['Rainy', 'Mild', 'Normal', 'False', 'Yes']
['Sunny', 'Mild', 'Normal', 'True', 'Yes']
['Overcast', 'Mild', 'High', 'True', 'Yes']
['Overcast', 'Hot', 'Normal', 'False', 'Yes']
['Rainy', 'Mild', 'High', 'True', 'No']

The positive examples are:


['Overcast', 'Hot', 'High', 'False', 'Yes']
['Rainy', 'Mild', 'High', 'False', 'Yes']
['Rainy', 'Cool', 'Normal', 'False', 'Yes']
['Overcast', 'Cool', 'Normal', 'True', 'Yes']
['Sunny', 'Cool', 'Normal', 'False', 'Yes']
['Rainy', 'Mild', 'Normal', 'False', 'Yes']
['Sunny', 'Mild', 'Normal', 'True', 'Yes']
['Overcast', 'Mild', 'High', 'True', 'Yes']
['Overcast', 'Hot', 'Normal', 'False', 'Yes']

The steps of the Find-s algorithm are :


['%', '%', '%', '%', '%', '%']
['Overcast', 'Hot', 'High', 'False']
['?', '?', 'High', 'False']
['?', '?', '?', 'False']
['?', '?', '?', '?']
['?', '?', '?', '?']
['?', '?', '?', '?']
['?', '?', '?', '?']
['?', '?', '?', '?']
['?', '?', '?', '?']

The maximally specific Find-s hypothesis for the given training examples is :
['?', '?', '?', '?']

https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 11/11

You might also like