0% found this document useful (0 votes)
30 views20 pages

ML Record Print

Uploaded by

keerthanashree02
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
30 views20 pages

ML Record Print

Uploaded by

keerthanashree02
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 20

1 . Implement and demonstrate the FIND-S algorithm?

Source code:
import csv

# Initialize the hypothesis


hypo = ['%', '%', '%', '%', '%', '%']

with open('trainingdata.csv') as csv_file:


readcsv = csv.reader(csv_file, delimiter=',')
print(readcsv)

data = []
print("\nThe given training examples are:")
for row in readcsv:
print(row)
if row[len(row)-1].upper() == "YES":
data.append(row)

print("\nThe positive examples are:")


for x in data:
print(x)
print("\n")

TotalExamples = len(data)
i=0
j=0
k=0
print("The steps of the Find-s algorithm are :\n", hypo)

# Initialize the list


list = []
p=0
d = len(data[p]) - 1
for j in range(d):
list.append(data[i][j])
hypo = list

i=1
for i in range(TotalExamples):
for k in range(d):
if hypo[k] != data[i][k]:
hypo[k] = '?'
else:
hypo[k] = hypo[k]
print(hypo)
i=i+1
print("\nThe maximally specific Find-s hypothesis for the given training examples is :")
list = []
for i in range(d):
list.append(hypo[i])
print(list)

Output :
<_csv.reader object at 0x00000260B3FFB3A0>

The given training examples are:


['sky', 'airTemp', 'humidity', 'wind', 'water', 'forecast', 'enjoySport']
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes']
['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Yes']
['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'No']
['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'Yes']

The positive examples are:


['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes']
['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Yes']
['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'Yes']

The steps of the Find-s algorithm are :


['%', '%', '%', '%', '%', '%']
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']
['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
['Sunny', 'Warm', '?', 'Strong', '?', '?']

The maximally specific Find-s hypothesis for the given training examples is :
['Sunny', 'Warm', '?', 'Strong', '?', '?']
2. Implement Linear Regression?

Source code:
import numpy as np
import matplotlib.pyplot as plt

def estimate_coef(x, y):


# number of observations/points
n = np.size(x)

# mean of x and y vector


m_x = np.mean(x)
m_y = np.mean(y)

# calculating cross-deviation and deviation about x


SS_xy = np.sum(y * x) - n * m_y * m_x
SS_xx = np.sum(x * x) - n * m_x * m_x

# calculating regression coefficients


b_1 = SS_xy / SS_xx
b_0 = m_y - b_1 * m_x

return (b_0, b_1)

def plot_regression_line(x, y, b):


# plotting the actual points as scatter plot
plt.scatter(x, y, color="m", marker="o", s=30)

# predicted response vector


y_pred = b[0] + b[1] * x

# plotting the regression line


plt.plot(x, y_pred, color="g")

# putting labels
plt.xlabel('x')
plt.ylabel('y')
plt.show()

def main():
# observations / data
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])

# estimating coefficients
b = estimate_coef(x, y)
print("Estimated coefficients:\nb_0 = {} \nb_1 = {}".format(b[0], b[1]))

# plotting regression line


plot_regression_line(x, y, b)

# calling main function


if __name__ == "__main__":
main()

Output:

Estimated coefficients:
b_0 = 1.2363636363636363
b_1 = 1.1696969696969697
3. Implement RANDOM-FOREST REGRESSION?

Source code:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings('ignore')

# Load the Titanic dataset


url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
titanic_data = pd.read_csv(url)

# Drop rows with missing target values


titanic_data = titanic_data.dropna(subset=['Survived'])

# Select relevant features and target variable


X = titanic_data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']]
y = titanic_data['Survived']

# Convert categorical variable 'Sex' to numerical


X['Sex'] = X['Sex'].map({'female': 0, 'male': 1})

# Handle missing values in the 'Age' column


X['Age'].fillna(X['Age'].median(), inplace=True)

# Split the dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest Classifier


rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier


rf_classifier.fit(X_train, y_train)

# Make predictions on the test set


y_pred = rf_classifier.predict(X_test)

# Evaluate the model


accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Print the results


print(f"Accuracy: {accuracy:.2f}")
print("\nClassification Report:\n", classification_rep)

Output :
Accuracy: 0.80

Classification Report:
precision recall f1-score support

0 0.82 0.85 0.83 105


1 0.77 0.73 0.75 74

accuracy 0.80 179


macro avg 0.79 0.79 0.79 179
weighted avg 0.80 0.80 0.80 179
4. Build an artificial Neural Network by Implementing the BACK
PROPAGATION algorithm & test the same?

Source code:
import numpy as np

X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float) # X = (hours sleeping, hours studying)
y = np.array(([92], [86], [89]), dtype=float) # y = score on test

# scale units
X = X / np.amax(X, axis=0) # maximum of X array
y = y / 100 # max test score is 100

class Neural_Network(object):
def __init__(self):
# Parameters
self.inputSize = 2
self.outputSize = 1
self.hiddenSize = 3
# Weights
self.W1 = np.random.randn(self.inputSize, self.hiddenSize) # (3x2) weight matrix from input to
hidden layer
self.W2 = np.random.randn(self.hiddenSize, self.outputSize) # (3x1) weight matrix from hidden to
output layer

def forward(self, X):


# forward propagation through our network
self.z = np.dot(X, self.W1) # dot product of X (input) and first set of 3x2 weights
self.z2 = self.sigmoid(self.z) # activation function
self.z3 = np.dot(self.z2, self.W2) # dot product of hidden layer (z2) and second set of 3x1 weights
o = self.sigmoid(self.z3) # final activation function
return o

def sigmoid(self, s):


return 1 / (1 + np.exp(-s)) # activation function

def sigmoidPrime(self, s):


return s * (1 - s) # derivative of sigmoid

def backward(self, X, y, o):


# backward propagate through the network
self.o_error = y - o # error in output
self.o_delta = self.o_error * self.sigmoidPrime(o) # applying derivative of sigmoid to output error
self.z2_error = self.o_delta.dot(self.W2.T) # z2 error: how much our hidden layer weights
contributed to output error
self.z2_delta = self.z2_error * self.sigmoidPrime(self.z2) # applying derivative of sigmoid to z2 error
self.W1 += X.T.dot(self.z2_delta) # adjusting first set (input --> hidden) weights
self.W2 += self.z2.T.dot(self.o_delta) # adjusting second set (hidden --> output) weights

def train(self, X, y):


o = self.forward(X)
self.backward(X, y, o)

# Instantiate the Neural Network


NN = Neural_Network()

# Print initial outputs


print("\nInput: \n" + str(X))
print("\nActual Output: \n" + str(y))
print("\nPredicted Output before training: \n" + str(NN.forward(X)))

# Print outputs after training

print("\nLoss: \n" + str(np.mean(np.square(y - NN.forward(X))))) # mean sum squared loss


NN.train(X,y)

output:
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]

Actual Output:
[[0.92]
[0.86]
[0.89]]

Predicted Output before training:


[[0.59715299]
[0.58580314]
[0.58832216]]

Loss:
0.09014121131592957
5. Implement LOCALLY WEIGHTED REGRESSION?

Source code:
from math import ceil
import numpy as np
from scipy import linalg
import matplotlib.pyplot as plt

def lowess(x, y, f, iterations):


n = len(x)
r = int(ceil(f * n))
h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
w = (1 - w ** 3) ** 3
yest = np.zeros(n)
delta = np.ones(n)

for iteration in range(iterations):


for i in range(n):
weights = delta * w[:, i]
b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
A = np.array([[np.sum(weights), np.sum(weights * x)],
[np.sum(weights * x), np.sum(weights * x * x)]])
beta = linalg.solve(A, b)
yest[i] = beta[0] + beta[1] * x[i]

residuals = y - yest
s = np.median(np.abs(residuals))
delta = np.clip(residuals / (6.0 * s), -1, 1)
delta = (1 - delta ** 2) ** 2

return yest

n = 100
x = np.linspace(0, 2 * np.pi, n)
y = np.sin(x) + 0.3 * np.random.randn(n)
f = 0.25
iterations = 3
yest = lowess(x, y, f, iterations)

plt.plot(x, y, "r.")
plt.plot(x, yest, "b-")
plt.show()

output:
6. For a given set of training data example store in csv.file implement
and demonstrate ELIMINATION algorithm?

Source code:
import numpy as np
import pandas as pd

# Loading Data from a CSV File


data = pd.read_csv('trainingdata.csv')
print(data)

# Separating concept features from Target


concepts = np.array(data.iloc[:, 0:-1])
print("Concepts:\n", concepts)

# Isolating target into a separate array


target = np.array(data.iloc[:, -1])
print("Target:\n", target)

def learn(concepts, target):


"""
learn() function implements the learning method of the Candidate elimination algorithm.
Arguments:
concepts - a data frame with all the features
target - a data frame with corresponding output values
"""
# Initialize S0 with the first instance from concepts
specific_h = concepts[0].copy()
print("\nInitialization of specific_h and general_h")
print("specific_h:", specific_h)

general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]


print("general_h:", general_h)

# The learning iterations


for i, h in enumerate(concepts):
# Checking if the hypothesis has a positive target
if target[i] == "Yes":
for x in range(len(specific_h)):
# Change values in S & G only if values change
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
# Checking if the hypothesis has a negative target
if target[i] == "No":
for x in range(len(specific_h)):
# For negative hypothesis change values only in G
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'

print("\nSteps of Candidate Elimination Algorithm", i+1)


print("specific_h:", specific_h)
print("general_h:", general_h)

# Remove empty rows from general_h


indices = [i for i, val in enumerate(general_h) if val == ['?' for _ in range(len(specific_h))]]
for i in indices:
general_h.remove(['?' for _ in range(len(specific_h))])

# Return final values


return specific_h, general_h

s_final, g_final = learn(concepts, target)


print("\nFinal Specific_h:", s_final)
print("\nFinal General_h:", g_final)

output:
sky airTemp humidity wind water forecast enjoySport
0 Sunny Warm Normal Strong Warm Same Yes
1 Sunny Warm High Strong Warm Same Yes
2 Rainy Cold High Strong Warm Change No
3 Sunny Warm High Strong Cool Change Yes
Concepts:
[['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
['Sunny' 'Warm' 'High' 'Strong' 'Warm' 'Same']
['Rainy' 'Cold' 'High' 'Strong' 'Warm' 'Change']
['Sunny' 'Warm' 'High' 'Strong' 'Cool' 'Change']]
Target:
['Yes' 'Yes' 'No' 'Yes']

Initialization of specific_h and general_h


specific_h: ['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
general_h: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Steps of Candidate Elimination Algorithm 1


specific_h: ['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
general_h: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Steps of Candidate Elimination Algorithm 2


specific_h: ['Sunny' 'Warm' '?' 'Strong' 'Warm' 'Same']
general_h: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Steps of Candidate Elimination Algorithm 3


specific_h: ['Sunny' 'Warm' '?' 'Strong' 'Warm' 'Same']
general_h: [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', 'Same']]

Steps of Candidate Elimination Algorithm 4


specific_h: ['Sunny' 'Warm' '?' 'Strong' '?' '?']
general_h: [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Final Specific_h: ['Sunny' 'Warm' '?' 'Strong' '?' '?']

Final General_h: [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']]
7. Write a program to demonstrate the working of the DECISION TREE
based ID3 algorithm?

Source code:
import numpy as np
import math
import csv

def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile, delimiter=',')
headers = next(datareader)
metadata = headers # No need to loop over headers to append to metadata
traindata = []
for row in datareader:
traindata.append(row)
return metadata, traindata

class Node:
def __init__(self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""

def __str__(self):
return self.attribute

def subtables(data, col, delete):


dict = {}
items = np.unique(data[:, col])
count = np.zeros((items.shape[0],), dtype=int)

for x in range(items.shape[0]):
for y in range(data.shape[0]):
if data[y, col] == items[x]:
count[x] += 1

for x in range(items.shape[0]):
dict[items[x]] = np.empty((count[x], data.shape[1]), dtype=data.dtype)
pos = 0
for y in range(data.shape[0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1
if delete:
dict[items[x]] = np.delete(dict[items[x]], col, 1)
return items, dict

def entropy(S):
items, counts = np.unique(S, return_counts=True)
entropy_value = 0

for i in range(len(items)):
p_i = counts[i] / len(S)
entropy_value -= p_i * math.log2(p_i)

return entropy_value

def gain_ratio(data, col):


items, dict = subtables(data, col, delete=False)
total_size = data.shape[0]
entropies = np.zeros((items.shape[0],))
intrinsic = np.zeros((items.shape[0],))

for x in range(items.shape[0]):
ratio = dict[items[x]].shape[0] / total_size
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * math.log2(ratio) if ratio != 0 else 0

total_entropy = entropy(data[:, -1])


iv = -sum(intrinsic)

total_gain = total_entropy - sum(entropies)


return total_gain / iv if iv != 0 else 0

def create_node(data, metadata):


if len(np.unique(data[:, -1])) == 1:
node = Node("")
node.answer = np.unique(data[:, -1])[0]
return node

gains = np.zeros((data.shape[1] - 1,))

for col in range(data.shape[1] - 1):


gains[col] = gain_ratio(data, col)

split = np.argmax(gains)
node = Node(metadata[split])
new_metadata = np.delete(metadata, split)

items, dict = subtables(data, split, delete=True)

for x in range(items.shape[0]):
child = create_node(dict[items[x]], new_metadata)
node.children.append((items[x], child))

return node

def empty(size):
return " " * size

def print_tree(node, level=0):


if node.answer != "":
print(empty(level), node.answer)
return
print(empty(level), node.attribute)
for value, n in node.children:
print(empty(level + 1), value)
print_tree(n, level + 2)

metadata, traindata = read_data("tennisdata.csv")


data = np.array(traindata)
node = create_node(data, np.array(metadata))
print_tree(node)

output:
Outlook
Overcast
Yes
Rainy
Windy
False
Yes
True
No
Sunny
Humidity
High
No
Normal
Yes
8. Write a program NAIVE BAYESIAN CLASSIFIER for or a sample training
data set stored?

Source code:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load data from CSV


data = pd.read_csv('tennisdata.csv')
print("The first 5 values of the data are:\n", data.head())

# Obtain train data (features) and train output (target)


X = data.iloc[:, :-1]
print("\nThe first 5 values of the train data are:\n", X.head())

y = data.iloc[:, -1]
print("\nThe first 5 values of the train output are:\n", y.head())

# Convert categorical data to numerical data


le_outlook = LabelEncoder()
X['Outlook'] = le_outlook.fit_transform(X['Outlook'])

le_temperature = LabelEncoder()
X['Temperature'] = le_temperature.fit_transform(X['Temperature'])

le_humidity = LabelEncoder()
X['Humidity'] = le_humidity.fit_transform(X['Humidity'])

le_windy = LabelEncoder()
X['Windy'] = le_windy.fit_transform(X['Windy'])

print("\nNow the train data is:\n", X.head())

le_play_tennis = LabelEncoder()
y = le_play_tennis.fit_transform(y)
print("\nNow the train output is:\n", y)

# Split the dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Train the Naive Bayes classifier


classifier = GaussianNB()
classifier.fit(X_train, y_train)
# Predict and evaluate the model
y_pred = classifier.predict(X_test)
print("Accuracy is:", accuracy_score(y_pred, y_test))

output:
The first 5 values of the data are:
Outlook Temperature Humidity Windy PlayTennis
0 Sunny Hot High False No
1 Sunny Hot High True No
2 Overcast Hot High False Yes
3 Rainy Mild High False Yes
4 Rainy Cool Normal False Yes

The first 5 values of the train data are:


Outlook Temperature Humidity Windy
0 Sunny Hot High False
1 Sunny Hot High True
2 Overcast Hot High False
3 Rainy Mild High False
4 Rainy Cool Normal False

The first 5 values of the train output are:


0 No
1 No
2 Yes
3 Yes
4 Yes
Name: PlayTennis, dtype: object

Now the train data is:


Outlook Temperature Humidity Windy
0 2 1 0 0
1 2 1 0 1
2 0 1 0 0
3 1 2 0 0
4 1 0 1 0

Now the train output is:


[0 0 1 1 1 0 1 0 1 1 1 1 1 0]
Accuracy is: 1.0

You might also like