Lab Program
Lab Program
Input attributes are (from left to right) income, recreation, job, status, age-group, home-
owner. Find the unconditional probability of `golf' and the conditional probability of `single'
given `medRisk' in the dataset?
6. Implement linear regression using python.
7. Implement Naïve Bayes theorem to classify the English text
8. Implement an algorithm to demonstrate the significance of genetic algorithm
9. Implement the finite words classification system using Back-propagation algorithm
PROGRAM 1
The probability that it is Friday and that a student is absent is 3 %. Since there are 5 school
days in a week, the probability that it is Friday is 20 %. What is the probability that a student
is absent given that today is Friday? Apply Baye’s rule in python to get the result.
# The probability that it is Friday and that a student is abse
nt is 3%
pAB=0.03
print("The probability that it is Friday and that a student is
absent :",pAB)
# The probability that it is Friday is 20%
pB=0.2
print("The probability that it is Friday : ",pB)
# The probability that there are 5 school days in a week
pA=0.05
print("The probability that there are 5 school days in a week
: ",pA)
# The probability that a student is absent given that today is
Friday
pBA=pAB/pA
print("The probability that there are 5 school days in a week
and probability that a student is absent on any given day is 3
% : ",pBA)
pResult=(pBA*pA)/pB
# Display the Result
print("The probability that a student is absent given that tod
ay is Friday : ",pResult * 100,"%")
PROGRAM 2
Extract the data from database using python
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_m
atrix
# Load the data
data = pd.read_csv('/content/sample_data/iris_csv.csv')
# Split the data into features and labels
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test
_size=0.2, random_state=0)
# Scale the features
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Train the k-nearest neighbors classifier
k = 5 # Choose the number of neighbors to consider
classifier = KNeighborsClassifier(n_neighbors=k)
classifier.fit(X_train, y_train)
# Make predictions on the test set
y_pred = classifier.predict(X_test)
# Evaluate the performance of the classifier
print("Output")
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
PROGRAM 4 Implementation of K Means Clustering Program using python
Given the following data, which specify classifications for nine combinations of VAR1 and
VAR2 predict a classification for a case where VAR1=0.906 and VAR2=0.606, using the
result of kmeans clustering with 3 means (i.e., 3 centroids)
import pandas as pd
# Define the dataset
data = {
'income': ['medium', 'high', 'low', 'medium', 'high', 'low
', 'medium', 'medium', 'high', 'low'],
'recreation': ['skiing', 'golf', 'speedway', 'football', '
flying', 'football', 'golf', 'golf', 'skiing', 'golf'],
'job': ['design', 'trading', 'transport', 'banking', 'medi
a', 'security', 'media', 'transport', 'banking', 'unemployed']
,
'status': ['single', 'married', 'married', 'single', 'marr
ied', 'single', 'single', 'married', 'single', 'married'],
'age-group': ['twenties', 'forties', 'thirties', 'thirties
', 'fifties', 'twenties', 'thirties', 'forties', 'thirties', '
forties'],
'home-owner': ['no', 'yes', 'yes', 'yes', 'yes', 'no', 'ye
s', 'yes', 'yes', 'yes'],
'credit-worthiness': ['highRisk', 'lowRisk', 'medRisk', 'l
owRisk', 'highRisk', 'medRisk', 'medRisk', 'lowRisk', 'highRis
k', 'highRisk']
}
# Convert the dictionary to a Pandas dataframe
df = pd.DataFrame(data)
# Calculate the unconditional probability of 'golf'
total_count = df.shape[0] # total number of examples
golf_count = df[df['recreation'] == 'golf'].shape[0] # number
of examples with 'golf' as recreation
golf_prob = golf_count / total_count
print("OUTPUT")
print("The unconditional probability of 'golf' is:", golf_prob
)
# Calculate the conditional probability of 'single' given 'med
Risk'
medRisk_count = df[df['credit-worthiness'] == 'medRisk'].shape
[0] # number of examples with 'medRisk' as credit-worthiness
single_count = df[(df['credit-worthiness'] == 'medRisk') & (df
['status'] == 'single')].shape[0] # number of examples with '
medRisk' as credit-worthiness and 'single' as status
single_given_medRisk_prob = single_count / medRisk_count
print("The conditional probability of 'single' given 'medRisk'
is:", single_given_medRisk_prob)
PROGRAM 6 Implement of linear regression using python.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# To read data from Age_Income.csv file
dataFrame = pd.read_csv('/content/sample_data/ageincome.csv')
# To place data in to age and income vectors
age = dataFrame['Age']
income = dataFrame['Income']
# number of points
num = np.size(age)
# To find the mean of age and income vector
mean_age = np.mean(age)
mean_income = np.mean(income)
# calculating cross-deviation and deviation about age
CD_ageincome = np.sum(income*age) - num*mean_income*mean_age
CD_ageage = np.sum(age*age) - num*mean_age*mean_age
# calculating regression coefficients
b1 = CD_ageincome / CD_ageage
b0 = mean_income - b1*mean_age
# to display coefficients
print("Estimated Coefficients :")
print("b0 = ",b0,"\nb1 = ",b1)
# To plot the actual points as scatter plot
plt.scatter(age, income, color = "b",marker = "o")
# TO predict response vector
response_Vec = b0 + b1*age
# To plot the regression line
plt.plot(age, response_Vec, color = "r")
# Placing labels
plt.xlabel('Age')
plt.ylabel('Income')
# To display plot
plt.show()
PROGRAM 7 Implement Naïve Bayes theorem to classify the English text
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix,
precision_score, recall_score
msglbl_data = pd.read_csv('/content/sample_data/
StatementsData.csv', names=['Message', 'Label'])
print("The Total instances in the Dataset: ", msglbl_data.shap
e[0])
msglbl_data['labelnum'] = msglbl_data.Label.map({'pos': 1, 'ne
g': 0})
# place the data in X and Y Vectors
X = msglbl_data["Message"]
Y = msglbl_data.labelnum
# to split the data into train se and test set
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y)
count_vect = CountVectorizer()
Xtrain_dims = count_vect.fit_transform(Xtrain)
Xtest_dims = count_vect.transform(Xtest)
df = pd.DataFrame(Xtrain_dims.toarray(),columns=count_vect.get
_feature_names_out())
clf = MultinomialNB()
# to fit the train data into model
clf.fit(Xtrain_dims, Ytrain)
# to predict the test data
prediction = clf.predict(Xtest_dims)
print('******** Accuracy Metrics *********')
print('Accuracy : ', accuracy_score(Ytest, prediction))
print('Recall : ', recall_score(Ytest, prediction))
print('Precision : ',precision_score(Ytest, prediction))
print('Confusion Matrix : \n', confusion_matrix(Ytest, predict
ion))
print(10*"-")
# to predict the input statement
test_stmt = [input("Enter any statement to predict :")]
test_dims = count_vect.transform(test_stmt)
pred = clf.predict(test_dims)
for stmt,lbl in zip(test_stmt,pred):
if lbl == 1:
print("Statement is Positive")
else:
print("Statement is Negative")
PROGRAM 8 Implement an algorithm to demonstrate the significance of genetic algorithm
# Python3 program to create target string, starting from
# random string using Genetic Algorithm
import random
# Number of individuals in each generation
POPULATION_SIZE = 100
# Valid genes
GENES = '''abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOP
QRSTUVWXYZ 1234567890, .-;:_!"#%&/()=?@${[]}'''
# Target string to be generated
TARGET = "SWCET"
class Individual(object):
'''
Class representing individual in population
'''
def __init__(self, chromosome):
self.chromosome = chromosome
self.fitness = self.cal_fitness()
@classmethod
def mutated_genes(self):
'''
create random genes for mutation
'''
global GENES
gene = random.choice(GENES)
return gene
@classmethod
def create_gnome(self):
'''
create chromosome or string of genes
'''
global TARGET
gnome_len = len(TARGET)
return [self.mutated_genes() for _ in range(gnome_len)
]
def mate(self, par2):
'''
Perform mating and produce new offspring
'''
# chromosome for offspring
child_chromosome = []
for gp1, gp2 in zip(self.chromosome, par2.chromosome):
# random probability
prob = random.random()
# if prob is less than 0.45, insert gene
# from parent 1
if prob < 0.45:
child_chromosome.append(gp1)
# if prob is between 0.45 and 0.90, insert
# gene from parent 2
elif prob < 0.90:
child_chromosome.append(gp2)
# otherwise insert random gene(mutate),
# for maintaining diversity
else:
child_chromosome.append(self.mutated_genes())
# create new Individual(offspring) using
# generated chromosome for offspring
return Individual(child_chromosome)
def cal_fitness(self):
'''
Calculate fitness score, it is the number of
characters in string which differ from target
string.
'''
global TARGET
fitness = 0
for gs, gt in zip(self.chromosome, TARGET):
if gs != gt: fitness+= 1
return fitness
# Driver code
def main():
global POPULATION_SIZE
#current generation
generation = 1
found = False
population = []
# create initial population
for _ in range(POPULATION_SIZE):
gnome = Individual.create_gnome()
population.append(Individual(gnome))
while not found:
# sort the population in increasing order of fitness s
core
population = sorted(population, key = lambda x:x.fitne
ss)
# if the individual having lowest fitness score ie.
# 0 then we know that we have reached to the target
# and break the loop
if population[0].fitness <= 0:
found = True
break
# Otherwise generate new offsprings for new generation
new_generation = []
# Perform Elitism, that mean 10% of fittest population
# goes to the next generation
s = int((10*POPULATION_SIZE)/100)
new_generation.extend(population[:s])
# From 50% of fittest population, Individuals
# will mate to produce offspring
s = int((90*POPULATION_SIZE)/100)
for _ in range(s):
parent1 = random.choice(population[:50])
parent2 = random.choice(population[:50])
child = parent1.mate(parent2)
new_generation.append(child)
population = new_generation
print("Generation: {}\tString: {}\tFitness: {}".\
format(generation,
"".join(population[0].chromosome),
population[0].fitness))
generation += 1
print("Generation: {}\tString: {}\tFitness: {}".\
format(generation,
"".join(population[0].chromosome),
population[0].fitness))
if __name__ == '__main__':
main()
PROGRAM 9 Implement the finite words classification system using Back-propagation
algorithm
import numpy as np
class FiniteWordsClassifier:
def __init__(self, n_features, n_hidden, n_classes, learni
ng_rate=0.01):
self.n_features = n_features
self.n_hidden = n_hidden
self.n_classes = n_classes
self.learning_rate = learning_rate
self.W_ih = np.random.normal(0, 1, (n_features, n_hidd
en))
self.b_h = np.zeros((1, n_hidden))
self.W_ho = np.random.normal(0, 1, (n_hidden, n_classe
s))
self.b_o = np.zeros((1, n_classes))
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(self, x):
return x * (1 - x)
def forward(self, X):
self.h = self.sigmoid(np.dot(X, self.W_ih) + self.b_h)
self.y = self.sigmoid(np.dot(self.h, self.W_ho) + self
.b_o)
return self.y
def backward(self, X, y_true):
delta_o = (self.y - y_true) * self.sigmoid_derivative(
self.y)
dW_ho = np.dot(self.h.T, delta_o)
db_o = np.sum(delta_o, axis=0, keepdims=True)
delta_h = np.dot(delta_o, self.W_ho.T) * self.sigmoid_
derivative(self.h)
dW_ih = np.dot(X.T, delta_h)
db_h = np.sum(delta_h, axis=0)
self.W_ho -= self.learning_rate * dW_ho
self.b_o -= self.learning_rate * db_o
self.W_ih -= self.learning_rate * dW_ih
self.b_h -= self.learning_rate * db_h
def train(self, X, y, epochs=100):
for i in range(epochs):
y_pred = self.forward(X)
self.backward(X, y)
if i % 10 == 0:
loss = self.cross_entropy_loss(y, y_pred)
print(f"Epoch {i}: Loss = {loss}")
def predict(self, X):
y_pred = self.forward(X)
return np.argmax(y_pred, axis=1)
def cross_entropy_loss(self, y_true, y_pred):
N = y_true.shape[0]
loss = -np.sum(y_true * np.log(y_pred)) / N
return loss
X_train = np.array([[0, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 1]
])
y_train = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])
model = FiniteWordsClassifier(n_features=3, n_hidden=5, n_clas
ses=2)
model.train(X_train, y_train, epochs=1000)