ML Lab
ML Lab
ipynb - Colab
import numpy as np
import pandas as pd
data = pd.read_csv('/content/sample_data/2.csv')
concepts = np.array(data.iloc[:,0:-1])
target = np.array(data.iloc[:,-1])
def learn(concepts, target):
specific_h = concepts[0].copy()
print("initialization of specific_h \n",specific_h)
general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
print("initialization of general_h \n", general_h)
for i, h in enumerate(concepts):
if target[i] == "yes":
print("If instance is Positive ")
for x in range(len(specific_h)):
if h[x]!= specific_h[x]:
specific_h[x] ='?'
general_h[x][x] ='?'
if target[i] == "no":
print("If instance is Negative ")
for x in range(len(specific_h)):
if h[x]!= specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]
for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
return specific_h, general_h
'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?']]
'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?']]
'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?']]
'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?']]
'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?']]
'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?', '?']]
import numpy as np
https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 1/11
2/4/25, 4:18 PM ML.ipynb - Colab
import numpy as np
import math
import csv
def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile, delimiter=',')
headers = next(datareader)
metadata = []
traindata = []
for name in headers:
metadata.append(name)
for row in datareader:
traindata.append(row)
class Node:
def __init__(self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""
def __str__(self):
return self.attribute
def subtables(data, col, delete):
dict = {}
items = np.unique(data[:, col])
count = np.zeros((items.shape[0], 1), dtype=np.int32)
for x in range(items.shape[0]):
for y in range(data.shape[0]):
if data[y, col] == items[x]:
count[x] += 1
for x in range(items.shape[0]):
dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32")
pos = 0
for y in range(data.shape[0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1
if delete:
dict[items[x]] = np.delete(dict[items[x]], col, 1)
if items.size == 1:
return 0
for x in range(items.shape[0]):
counts[x] = sum(S == items[x]) / (S.size * 1.0)
total_size = data.shape[0]
entropies = np.zeros((items.shape[0], 1))
intrinsic = np.zeros((items.shape[0], 1))
for x in range(items.shape[0]):
ratio = dict[items[x]].shape[0]/(total_size * 1.0)
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * math.log(ratio, 2)
for x in range(entropies.shape[0]):
total_entropy -= entropies[x]
return total_entropy / iv
def create_node(data, metadata):
if (np.unique(data[:, -1])).shape[0] == 1:
d N d ("")
https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 2/11
2/4/25, 4:18 PM ML.ipynb - Colab
node = Node("")
node.answer = np.unique(data[:, -1])[0]
return node
split = np.argmax(gains)
node = Node(metadata[split])
metadata = np.delete(metadata, split, 0)
for x in range(items.shape[0]):
child = create_node(dict[items[x]], metadata)
node.children.append((items[x], child))
return node
def empty(size):
s = ""
for x in range(size):
s += " "
return s
Outlook
Overcast
b'Yes'
Rainy
Windy
b'False'
b'Yes'
b'True'
b'No'
Sunny
Humidity
b'High'
b'No'
b'Normal'
b'Yes'
<ipython-input-8-efc07287c0d1>:37: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will erro
dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32")
<ipython-input-8-efc07287c0d1>:60: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will erro
sums += -1 * count * math.log(count, 2)
import numpy as np
#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))
#Variable initialization
epoch=5 #Setting training iterations
lr=0.1 #Setting learning rate
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))
#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO * outgrad
EH = d_output.dot(wout.T)
hiddengrad = derivatives_sigmoid(hlayer_act)#how much hidden layer wts contributed to error
d_hiddenlayer = EH * hiddengrad
-----------Epoch- 3 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.83892115]
[0.81725673]
[0.83411988]]
-----------Epoch- 3 Ends----------
-----------Epoch- 4 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 4/11
2/4/25, 4:18 PM ML.ipynb - Colab
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.8401741 ]
[0.81850337]
[0.83537874]]
import csv
import random
import math
def loadcsv(filename):
lines = csv.reader(open(filename, "r"));
dataset = list(lines)
for i in range(len(dataset)):
#converting strings into numbers for processing
dataset[i] = [float(x) for x in dataset[i]]
return dataset
def separatebyclass(dataset):
separated = {} #dictionary of classes 1 and 0
#creates a dictionary of classes 1 and 0 where the values are
#the instances belonging to each class
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated
def mean(numbers):
return sum(numbers)/float(len(numbers))
def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
return math.sqrt(variance)
def summarizebyclass(dataset):
separated = separatebyclass(dataset);
#print(separated)
summaries = {}
for classvalue, instances in separated.items():
#for key,value in dic.items()
#summaries is a dic of tuples(mean,std) for each class value
summaries[classvalue] = summarize(instances) #summarize is used to cal to mean and std
return summaries
https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 5/11
2/4/25, 4:18 PM ML.ipynb - Colab
bestLabel, bestProb = None, -1
for classvalue, probability in probabilities.items():#assigns that class which has he highest prob
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classvalue
return bestLabel
def main():
filename = '/content/sample_data/5-dataset.csv'
splitratio = 0.67
dataset = loadcsv(filename);
main()
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
msg=pd.read_csv('/content/sample_data/docu.csv',names=['message','label'])
msg['labelnum']=msg.label.map({'pos':1,'neg':0})
X=msg.message
y=msg.labelnum
https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 6/11
2/4/25, 4:18 PM ML.ipynb - Colab
the total number of Test Data : (5,)
['about' 'am' 'an' 'and' 'awesome' 'bad' 'beers' 'boss' 'can' 'dance'
'deal' 'do' 'enemy' 'feel' 'fun' 'good' 'have' 'he' 'horrible' 'house'
'is' 'like' 'locality' 'love' 'my' 'not' 'of' 'place' 'restaurant'
'sandwich' 'sick' 'stay' 'stuff' 'sworn' 'that' 'these' 'this' 'tired'
'to' 'today' 'tomorrow' 'very' 'view' 'we' 'went' 'what' 'will' 'with']
# Perform inference
inference = VariableElimination(model)
+-----------+---------------+
| Corona | phi(Corona) |
+===========+===============+
| Corona(0) | 0.7262 |
+-----------+---------------+
| Corona(1) | 0.2738 |
+-----------+---------------+
https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 7/11
2/4/25, 4:18 PM ML.ipynb - Colab
# transform your data such that its distribution will have a # mean value 0 and standard deviation of 1.
scaler = preprocessing.StandardScaler()
scaler.fit(X)
xsa = scaler.transform(X)
xs = pd.DataFrame(xsa, columns = X.columns)
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(n_components=40)
gmm.fit(xs)
plt.subplot(1, 3, 3)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[0], s=40)
plt.title('GMM Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
print('Observation: The GMM using EM algorithm based clustering matched the true labels more closely than the Kmeans.')
vation: The GMM using EM algorithm based clustering matched the true labels more closely than the Kmeans.
https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 8/11
2/4/25, 4:18 PM ML.ipynb - Colab
dataset=load_iris()
#print(dataset)
X_train,X_test,y_train,y_test=train_test_split(dataset["data"],dataset["target"],random_state=0)
kn=KNeighborsClassifier(n_neighbors=1)
kn.fit(X_train,y_train)
for i in range(len(X_test)):
x=X_test[i]
x_new=np.array([x])
prediction=kn.predict(x_new)
print("TARGET=",y_test[i],dataset["target_names"][y_test[i]],"PREDICTED=",prediction,dataset["target_names"][prediction])
print(kn.score(X_test,y_test))
residuals = y - yest
s = np.median(np.abs(residuals))
delta = np.clip(residuals / (6.0 * s), -1, 1)
delta = (1 - delta ** 2) ** 2
return yest
import math
n = 100
x = np.linspace(0, 2 * math.pi, n)
https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 9/11
2/4/25, 4:18 PM ML.ipynb - Colab
y = np.sin(x) + 0.3 * np.random.randn(n)
f =0.25
iterations=3
yest = lowess(x, y, f, iterations)
[<matplotlib.lines.Line2D at 0x7bcdf96277d0>]
import csv
hypo = ['%','%','%','%','%','%'];
data = []
print("\nThe given training examples are:")
for row in readcsv:
print(row)
if row[len(row)-1].upper() == "YES":
data.append(row)
TotalExamples = len(data);
i=0;
j=0;
k=0;
print("The steps of the Find-s algorithm are :\n",hypo);
list = [];
p=0;
d=len(data[p])-1;
for j in range(d):
list.append(data[i][j]);
hypo=list;
i=1;
for i in range(TotalExamples):
for k in range(d):
if hypo[k]!=data[i][k]:
hypo[k]='?';
k=k+1;
else:
hypo[k];
print(hypo);
i=i+1;
print("\nThe maximally specific Find-s hypothesis for the given training examples is :");
list=[];
for i in range(d):
list.append(hypo[i]);
print(list);
https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 10/11
2/4/25, 4:18 PM ML.ipynb - Colab
The given training examples are:
['Outlook', 'Temperature', 'Humidity', 'Windy', 'PlayTennis']
['Sunny', 'Hot', 'High', 'False', 'No']
['Sunny', 'Hot', 'High', 'True', 'No']
['Overcast', 'Hot', 'High', 'False', 'Yes']
['Rainy', 'Mild', 'High', 'False', 'Yes']
['Rainy', 'Cool', 'Normal', 'False', 'Yes']
['Rainy', 'Cool', 'Normal', 'True', 'No']
['Overcast', 'Cool', 'Normal', 'True', 'Yes']
['Sunny', 'Mild', 'High', 'False', 'No']
['Sunny', 'Cool', 'Normal', 'False', 'Yes']
['Rainy', 'Mild', 'Normal', 'False', 'Yes']
['Sunny', 'Mild', 'Normal', 'True', 'Yes']
['Overcast', 'Mild', 'High', 'True', 'Yes']
['Overcast', 'Hot', 'Normal', 'False', 'Yes']
['Rainy', 'Mild', 'High', 'True', 'No']
The maximally specific Find-s hypothesis for the given training examples is :
['?', '?', '?', '?']
https://colab.research.google.com/drive/1Om5HReZCA1FrEFt4OsWxjhUyMR1oHzHm#scrollTo=DP0FRxI7kdC_&printMode=true 11/11