0% found this document useful (0 votes)

23 views13 pages

DM ML Practical

Uploaded by

Debangshu Goswami

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

23 views13 pages

DM ML Practical

Uploaded by

Debangshu Goswami

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 13

Naïve Bayes Classifier Program

import numpy as np
from collections import defaultdict

# Step 1: Prepare a simple dataset

data = [
('spam', 'buy cheap amazon products now'),
('ham', 'how are you doing today'),
('spam', 'cheap watches on sale'),
('ham', 'let us meet up tomorrow'),
('spam', 'win a million dollars now'),
('ham', 'can you call me back later'),
]

# Step 2: Preprocess and count word frequencies

class NaiveBayesClassifier:
def __init__(self):
self.word_probs = defaultdict(lambda: defaultdict(float)) #
Word probabilities
self.class_probs = defaultdict(float) # Class probabilities
self.vocab = set() # Vocabulary

def train(self, dataset):

# Count occurrences
class_word_counts = defaultdict(lambda: defaultdict(int)) #
Count of words per class
class_counts = defaultdict(int) # Count of each class

for label, text in dataset:

words = text.split()
class_counts[label] += 1
for word in words:
self.vocab.add(word)
class_word_counts[label][word] += 1

# Calculate class probabilities P(C)

total_samples = sum(class_counts.values())
for label, count in class_counts.items():
self.class_probs[label] = count / total_samples

# Calculate word probabilities P(w|C)

for label, words in class_word_counts.items():
total_words = sum(words.values())
for word in self.vocab:
# Additive smoothing (Laplace smoothing)
self.word_probs[label][word] = (words[word] + 1) /
(total_words + len(self.vocab))

def predict(self, text):

words = text.split()
# Calculate P(C|w1, w2, ..., wn) for each class
class_scores = {}
for label in self.class_probs:
class_scores[label] = np.log(self.class_probs[label]) # Log
of prior P(C)
for word in words:
if word in self.vocab:
class_scores[label] += np.log(
self.word_probs[label].get(word, 1 /
len(self.vocab))) # Log of P(w|C)

# Return the class with the highest score

return max(class_scores, key=class_scores.get)

# Step 3: Train the classifier

classifier = NaiveBayesClassifier()
classifier.train(data)

# Step 4: Classify new examples

test_texts = [
'cheap watches available',
'how are you',
'call me now to win',
]

for text in test_texts:

prediction = classifier.predict(text)
print(f'Text: "{text}" => Predicted class: {prediction}')
KNN
import matplotlib.pyplot as plt

x = [4, 5, 10, 4, 3, 11, 14 , 8, 10, 12]

y = [21, 19, 24, 17, 16, 25, 24, 22, 21, 21]
classes = [0, 0, 1, 0, 0, 1, 1, 0, 1, 1]

plt.scatter(x, y, c=classes)
plt.show()
from sklearn.neighbors import KNeighborsClassifier

data = list(zip(x, y))

knn = KNeighborsClassifier(n_neighbors=1)

knn.fit(data, classes)
new_x = 8
new_y = 21
new_point = [(new_x, new_y)]

prediction = knn.predict(new_point)

plt.scatter(x + [new_x], y + [new_y], c=classes + [prediction[0]])

plt.text(x=new_x-1.7, y=new_y-0.7, s=f"new point, class:
{prediction[0]}")
plt.show()
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(data, classes)

prediction = knn.predict(new_point)

plt.scatter(x + [new_x], y + [new_y], c=classes + [prediction[0]])

plt.text(x=new_x-1.7, y=new_y-0.7, s=f"new point, class:
{prediction[0]}")
plt.show()

DBSCAN
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
from sklearn import datasets

# Load data in X
X, y_true = make_blobs(n_samples=300, centers=4,
cluster_std=0.50, random_state=0)
db = DBSCAN(eps=0.3, min_samples=10).fit(X)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

# Plot result

# Black removed and is used for noise instead.

unique_labels = set(labels)
colors = ['y', 'b', 'g', 'r']
print(colors)
for k, col in zip(unique_labels, colors):
if k == -1:
# Black used for noise.
col = 'k'

class_member_mask = (labels == k)

xy = X[class_member_mask & core_samples_mask]

plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
markeredgecolor='k',
markersize=6)

xy = X[class_member_mask & ~core_samples_mask]

plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
markeredgecolor='k',
markersize=6)
plt.title('number of clusters: %d' % n_clusters_)
plt.show()

Support Vector Machine

# importing scikit learn with make_blobs
from sklearn.datasets import make_blobs

# creating datasets X containing n_samples

# Y containing two classes
X, Y = make_blobs(n_samples=500, centers=2,
random_state=0, cluster_std=0.40)
import matplotlib.pyplot as plt
# plotting scatters
plt.scatter(X[:, 0], X[:, 1], c=Y, s=50, cmap='spring');
plt.show()

# creating linspace between -1 to 3.5

xfit = np.linspace(-1, 3.5)

# plotting scatter
plt.scatter(X[:, 0], X[:, 1], c=Y, s=50, cmap='spring')

# plot a line between the different sets of data

for m, b, d in [(1, 0.65, 0.33), (0.5, 1.6, 0.55), (-0.2, 2.9, 0.2)]:
yfit = m * xfit + b
plt.plot(xfit, yfit, '-k')
plt.fill_between(xfit, yfit - d, yfit + d, edgecolor='none',
color='#AAAAAA', alpha=0.4)

plt.xlim(-1, 3.5);
plt.show()
# importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# reading csv file and extracting class column to y.

x = pd.read_csv("C:\...\cancer.csv")
a = np.array(x)
y = a[:,30] # classes having 0 and 1

# extracting two features

x = np.column_stack((x.malignant,x.benign))

# 569 samples and 2 features

x.shape

print (x),(y)

# import support vector classifier

# "Support Vector Classifier"
from sklearn.svm import SVC
clf = SVC(kernel='linear')
# fitting x samples and y classes
clf.fit(x, y)

clf.predict([[120, 990]])

clf.predict([[85, 550]])

CART
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder

# Define the features and target variable

features = [
["red", "large"],
["green", "small"],
["red", "small"],
["yellow", "large"],
["green", "large"],
["orange", "large"],
]
target_variable = ["apple", "lime", "strawberry", "banana",
"grape", "orange"]

# Flatten the features list for encoding

flattened_features = [item for sublist in features for item in
sublist]
# Use a single LabelEncoder for all features and target variable
le = LabelEncoder()
le.fit(flattened_features + target_variable)

# Encode features and target variable

encoded_features = [le.transform(item) for item in features]
encoded_target = le.transform(target_variable)

# Create a CART classifier

clf = DecisionTreeClassifier()

# Train the classifier on the training set

clf.fit(encoded_features, encoded_target)

# Predict the fruit type for a new instance

new_instance = ["red", "large"]
encoded_new_instance = le.transform(new_instance)
predicted_fruit_type = clf.predict([encoded_new_instance])
decoded_predicted_fruit_type =
le.inverse_transform(predicted_fruit_type)
print("Predicted fruit type:", decoded_predicted_fruit_type[0])

KMEANS
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
X,y = make_blobs(n_samples = 500,n_features = 2,centers =
3,random_state = 23)

fig = plt.figure(0)
plt.grid(True)
plt.scatter(X[:,0],X[:,1])
plt.show()
k=3

clusters = {}
np.random.seed(23)

for idx in range(k):

center = 2*(2*np.random.random((X.shape[1],))-1)
points = []
cluster = {
'center' : center,
'points' : []
}

clusters[idx] = cluster

clusters
plt.scatter(X[:,0],X[:,1])
plt.grid(True)
for i in clusters:
center = clusters[i]['center']
plt.scatter(center[0],center[1],marker = '*',c = 'red')
plt.show()
def distance(p1,p2):
return np.sqrt(np.sum((p1-p2)**2))
def assign_clusters(X, clusters):
for idx in range(X.shape[0]):
dist = []

curr_x = X[idx]

for i in range(k):
dis = distance(curr_x,clusters[i]['center'])
dist.append(dis)
curr_cluster = np.argmin(dist)
clusters[curr_cluster]['points'].append(curr_x)
return clusters

#Implementing the M-Step

def update_clusters(X, clusters):
for i in range(k):
points = np.array(clusters[i]['points'])
if points.shape[0] > 0:
new_center = points.mean(axis =0)
clusters[i]['center'] = new_center

clusters[i]['points'] = []
return clusters
def pred_cluster(X, clusters):
pred = []
for i in range(X.shape[0]):
dist = []
for j in range(k):
dist.append(distance(X[i],clusters[j]['center']))
pred.append(np.argmin(dist))
return pred
clusters = assign_clusters(X,clusters)
clusters = update_clusters(X,clusters)
pred = pred_cluster(X,clusters)
plt.scatter(X[:,0],X[:,1],c = pred)
for i in clusters:
center = clusters[i]['center']
plt.scatter(center[0],center[1],marker = '^',c = 'red')
plt.show()

DELEM
50% (4)
DELEM
150 pages
Blooms Taxonomy
100% (1)
Blooms Taxonomy
39 pages
Skopos Theory. Schaeffner
No ratings yet
Skopos Theory. Schaeffner
6 pages
Simultaneous Interpreting-Completeversion
No ratings yet
Simultaneous Interpreting-Completeversion
15 pages
Characteristics of Young Learners
100% (1)
Characteristics of Young Learners
5 pages
Literature in English 1 2019 MS Corrected-1
No ratings yet
Literature in English 1 2019 MS Corrected-1
14 pages
C1 Input Manual 2007
No ratings yet
C1 Input Manual 2007
342 pages
Karthik V.: Sr. Azure Devops Engineer
No ratings yet
Karthik V.: Sr. Azure Devops Engineer
9 pages
Lesson 4 Gradient Descent
No ratings yet
Lesson 4 Gradient Descent
13 pages
Role of Syntax in John Milton Poem On His Blindness-2 PDF
No ratings yet
Role of Syntax in John Milton Poem On His Blindness-2 PDF
11 pages
Cmty. Journey Module
No ratings yet
Cmty. Journey Module
9 pages
CO - Q1 Oral Comm in Context SHS Module-7-FINAL
50% (6)
CO - Q1 Oral Comm in Context SHS Module-7-FINAL
19 pages
Anatomy of An Essay
100% (1)
Anatomy of An Essay
4 pages
Machine Learning Algorithms From Scratch
No ratings yet
Machine Learning Algorithms From Scratch
9 pages
F.A.L. Conducive Engineering Review Center
No ratings yet
F.A.L. Conducive Engineering Review Center
7 pages
The Musical Experience of Composer, Performer, Listener
100% (1)
The Musical Experience of Composer, Performer, Listener
4 pages
Lista de Verbos Irregulares
No ratings yet
Lista de Verbos Irregulares
3 pages
ML Shristi File
No ratings yet
ML Shristi File
49 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
33 pages
LAB-4 Report
No ratings yet
LAB-4 Report
21 pages
ML Lab Manual
No ratings yet
ML Lab Manual
24 pages
ML Lab PT
No ratings yet
ML Lab PT
25 pages
MLLab Manual
No ratings yet
MLLab Manual
24 pages
ML PDF
No ratings yet
ML PDF
30 pages
Mlda - Lab
No ratings yet
Mlda - Lab
35 pages
ML Practical 205160694034
No ratings yet
ML Practical 205160694034
33 pages
Mlalllabprgs
No ratings yet
Mlalllabprgs
17 pages
ML5 Implementation
No ratings yet
ML5 Implementation
32 pages
Final ML File
No ratings yet
Final ML File
34 pages
ML Programs
No ratings yet
ML Programs
14 pages
Deutsch-Jozsa Algorithm PDF
No ratings yet
Deutsch-Jozsa Algorithm PDF
7 pages
1st PGM
No ratings yet
1st PGM
10 pages
Programs Lab Bca
No ratings yet
Programs Lab Bca
16 pages
ML File
No ratings yet
ML File
13 pages
EX - NO:3: Algorithm
No ratings yet
EX - NO:3: Algorithm
11 pages
CP4252 Lab Manual
No ratings yet
CP4252 Lab Manual
13 pages
ML Lab Manual
No ratings yet
ML Lab Manual
12 pages
Machine Learning Practical PDF
No ratings yet
Machine Learning Practical PDF
12 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
9 pages
Aml - Lab (1-6)
No ratings yet
Aml - Lab (1-6)
15 pages
SVM K NN MLP With Sklearn Jupyter NoteBo
No ratings yet
SVM K NN MLP With Sklearn Jupyter NoteBo
22 pages
S6 - Data Mining Lab Experiments (Except 1)
No ratings yet
S6 - Data Mining Lab Experiments (Except 1)
6 pages
Machine Learning Programs
No ratings yet
Machine Learning Programs
10 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
18 pages
1
No ratings yet
1
13 pages
Python For Data Science IA 1 Programs
No ratings yet
Python For Data Science IA 1 Programs
14 pages
Minor Lab
No ratings yet
Minor Lab
4 pages
Aam Codes
No ratings yet
Aam Codes
8 pages
ML Journal External
No ratings yet
ML Journal External
14 pages
All in One
No ratings yet
All in One
13 pages
Python For Data Science IA 1 Programs
No ratings yet
Python For Data Science IA 1 Programs
14 pages
Final ML Programs 075005
No ratings yet
Final ML Programs 075005
15 pages
ML Short Code - Under Updating
No ratings yet
ML Short Code - Under Updating
4 pages
Lab Manual
No ratings yet
Lab Manual
9 pages
Implementing KNN Algorithm On The Iris Dataset
No ratings yet
Implementing KNN Algorithm On The Iris Dataset
7 pages
Aiml Lab
No ratings yet
Aiml Lab
14 pages
Unit2 ML Programs
No ratings yet
Unit2 ML Programs
7 pages
ML
No ratings yet
ML
11 pages
Ai Int-1
No ratings yet
Ai Int-1
6 pages
Setup: This Notebook Contains All The Sample Code and Solutions To The Exercises in Chapter 3
No ratings yet
Setup: This Notebook Contains All The Sample Code and Solutions To The Exercises in Chapter 3
30 pages
Work Immersion Form 4 (L-Wif4) Accomplishment Report
No ratings yet
Work Immersion Form 4 (L-Wif4) Accomplishment Report
2 pages
V
No ratings yet
V
8 pages
Discourse-Analytic Approach To Text An Talk
No ratings yet
Discourse-Analytic Approach To Text An Talk
7 pages
ML Minors Exp7
No ratings yet
ML Minors Exp7
6 pages
De Thi Hoc Ki 1 Mon Tieng Anh Lop 9
No ratings yet
De Thi Hoc Ki 1 Mon Tieng Anh Lop 9
12 pages
DS - ML - 7 - 60019210046 1
No ratings yet
DS - ML - 7 - 60019210046 1
6 pages
16BCB0126 VL2018195002535 Pe003
No ratings yet
16BCB0126 VL2018195002535 Pe003
40 pages
40-B-6 Maths Std. For VI Candidates Punjabi Version
No ratings yet
40-B-6 Maths Std. For VI Candidates Punjabi Version
7 pages
Naive
No ratings yet
Naive
5 pages
ML Lab
No ratings yet
ML Lab
7 pages
Aiml Ex 4-7
No ratings yet
Aiml Ex 4-7
8 pages
Assignment #1: K Nearest Neighbor Classifier: Name: Srikanth Mujjiga (Roll No: 2015-50-831
No ratings yet
Assignment #1: K Nearest Neighbor Classifier: Name: Srikanth Mujjiga (Roll No: 2015-50-831
8 pages
ML Minimized Programs
No ratings yet
ML Minimized Programs
9 pages
Naivebayes Labprg2
No ratings yet
Naivebayes Labprg2
3 pages
ML Lab Experiment Shortened With Same Output
No ratings yet
ML Lab Experiment Shortened With Same Output
6 pages
DataScience All 1to8
No ratings yet
DataScience All 1to8
6 pages
Vihang A Naik-A Study of His Mind and Art: Mary Surya Kala
No ratings yet
Vihang A Naik-A Study of His Mind and Art: Mary Surya Kala
6 pages
EE 559 HW2Code PDF
No ratings yet
EE 559 HW2Code PDF
7 pages
1.1 The Trinity
No ratings yet
1.1 The Trinity
3 pages
Code and Output of Cancer Detection Model
No ratings yet
Code and Output of Cancer Detection Model
13 pages
Ai Lab Programs
No ratings yet
Ai Lab Programs
5 pages
Linearregression SVM
No ratings yet
Linearregression SVM
3 pages
Opening Prayer - 1st Day of Class
No ratings yet
Opening Prayer - 1st Day of Class
12 pages
14-Design Concepts and Principles
No ratings yet
14-Design Concepts and Principles
43 pages
PRF192
No ratings yet
PRF192
41 pages
Won't+Stop+Now +Chord+Chart+ +A
No ratings yet
Won't+Stop+Now +Chord+Chart+ +A
2 pages
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
Selectividad Fashion and Waste
No ratings yet
Selectividad Fashion and Waste
6 pages
New Synopsis
No ratings yet
New Synopsis
12 pages
1 Nice To Meet You!: Hello!
No ratings yet
1 Nice To Meet You!: Hello!
80 pages

DM ML Practical

Uploaded by

DM ML Practical

Uploaded by

Naïve Bayes Classifier Program

# Step 1: Prepare a simple dataset

# Step 2: Preprocess and count word frequencies

def train(self, dataset):

for label, text in dataset:

# Calculate class probabilities P(C)

# Calculate word probabilities P(w|C)

def predict(self, text):

# Return the class with the highest score

# Step 3: Train the classifier

# Step 4: Classify new examples

for text in test_texts:

x = [4, 5, 10, 4, 3, 11, 14 , 8, 10, 12]

data = list(zip(x, y))

plt.scatter(x + [new_x], y + [new_y], c=classes + [prediction[0]])

plt.scatter(x + [new_x], y + [new_y], c=classes + [prediction[0]])

# Black removed and is used for noise instead.

xy = X[class_member_mask & core_samples_mask]

xy = X[class_member_mask & ~core_samples_mask]

Support Vector Machine

# creating datasets X containing n_samples

# creating linspace between -1 to 3.5

# plot a line between the different sets of data

# reading csv file and extracting class column to y.

# extracting two features

# 569 samples and 2 features

# import support vector classifier

# Define the features and target variable

# Flatten the features list for encoding

# Encode features and target variable

# Create a CART classifier

# Train the classifier on the training set

# Predict the fruit type for a new instance

for idx in range(k):

#Implementing the M-Step

You might also like