0% found this document useful (0 votes)

35 views58 pages

Naive Bayes

The document discusses several machine learning algorithms including naive bayes, linear regression, polynomial regression, k-nearest neighbors (KNN), and elastic net regression. Naive bayes, linear regression, and KNN are introduced and example code is provided to demonstrate implementation and use of each algorithm. Polynomial regression expands on linear regression by adding higher degree terms. Multiple linear regression and elastic net regression techniques are also overviewed.

Uploaded by

Dhaleshwar Prasad

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

35 views58 pages

Naive Bayes

Uploaded by

Dhaleshwar Prasad

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 58

NAIVE BAYES

# load the iris dataset

from sklearn.datasets import load_iris
iris = load_iris()

# store the feature matrix (X) and response vector (y)

X = iris.data
y = iris.target

# splitting X and y into training and testing sets

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
random_state=1)

# training the model on training set

from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train, y_train)

# making predictions on the testing set

y_pred = gnb.predict(X_test)

# comparing actual response values (y_test) with predicted response values (y_pred)
from sklearn import metrics
print("Gaussian Naive Bayes model accuracy(in %):", metrics.accuracy_score(y_test,
y_pred)*100)

--------------------------------------------------------------------
linear regression

import numpy as np
import matplotlib.pyplot as plt

def estimate_coef(x, y):

# number of observations/points
n = np.size(x)

# mean of x and y vector

m_x = np.mean(x)
m_y = np.mean(y)

# calculating cross-deviation and deviation about x

SS_xy = np.sum(y*x) - n*m_y*m_x
SS_xx = np.sum(x*x) - n*m_x*m_x

# calculating regression coefficients

b_1 = SS_xy / SS_xx
b_0 = m_y - b_1*m_x

return (b_0, b_1)

def plot_regression_line(x, y, b):

# plotting the actual points as scatter plot
plt.scatter(x, y, color = "m",
marker = "o", s = 30)

# predicted response vector

y_pred = b[0] + b[1]*x

# plotting the regression line

plt.plot(x, y_pred, color = "g")

# putting labels
plt.xlabel('x')
plt.ylabel('y')

def main():
# observations / data
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])

# estimating coefficients
b = estimate_coef(x, y)
print("Estimated coefficients:\nb_0 = {}\
\nb_1 = {}".format(b

-----------------------------------

multiple linear regression

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model, metrics

data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+",
skiprows=22, header=None)

X = np.hstack([raw_df.values[::2, :],
raw_df.values[1::2, :2]])
y = raw_df.values[1::2, 2]

X_train, X_test,\
y_train, y_test = train_test_split(X, y,
test_size=0.4,
random_state=1)

reg = linear_model.LinearRegression()
reg.fit(X_train, y_train)

# regression coefficients
print('Coefficients: ', reg.coef_)

# variance score: 1 means perfect prediction

print('Variance score: {}'.format(reg.score(X_test, y_test)))
# plot for residual error

# setting plot style

plt.style.use('fivethirtyeight')

# plotting residual errors in training data

plt.scatter(reg.predict(X_train),
reg.predict(X_train) - y_train,
color="green", s=10,
label='Train data')

# plotting residual errors in test data

plt.scatter(reg.predict(X_test),
reg.predict(X_test) - y_test,
color="blue", s=10,
label='Test data')

# plotting line for zero residual error

plt.hlines(y=0, xmin=0, xmax=50, linewidth=2)

# plotting legend
plt.legend(loc='upper right')

# plot title
plt.title("Residual errors")

# method call for showing the plot

plt.show()

---------------------------
polynomial

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import warnings

from sklearn.preprocessing import LabelEncoder

from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
warnings.filterwarnings('ignore')

df = pd.read_csv('Position_Salaries.csv')

X = df.iloc[:,1:2].values
y = df.iloc[:,2].values
x

from sklearn.linear_model import LinearRegression

lin_reg=LinearRegression()
lin_reg.fit(X,y)

from sklearn.preprocessing import PolynomialFeatures

poly_reg2=PolynomialFeatures(degree=2)
X_poly=poly_reg2.fit_transform(X)
lin_reg_2=LinearRegression()
lin_reg_2.fit(X_poly,y)

poly_reg3=PolynomialFeatures(degree=3)
X_poly3=poly_reg3.fit_transform(X)
lin_reg_3=LinearRegression()
lin_reg_3.fit(X_poly3,y)

plt.scatter(X,y,color='red')
plt.plot(X,lin_reg.predict(X),color='green')
plt.title('Simple Linear Regression')
plt.xlabel('Position Level')
plt.ylabel('Salary')
plt.show()

plt.style.use('fivethirtyeight')
plt.scatter(X,y,color='red')
plt.plot(X,lin_reg_2.predict(poly_reg2.fit_transform(X)),color='green')
plt.plot(X,lin_reg_3.predict(poly_reg3.fit_transform(X)),color='yellow')
plt.title('Polynomial Linear Regression Degree 2')
plt.xlabel('Position Level')
plt.ylabel('Salary')
plt.show()
plt.style.use('fivethirtyeight')
X_grid=np.arange(min(X),max(X),0.1) # This will give us a vector.We will have to
convert this into a matrix
X_grid=X_grid.reshape((len(X_grid),1))
plt.scatter(X,y,color='red')
plt.plot(X_grid,lin_reg_3.predict(poly_reg3.fit_transform(X_grid)),color='lightgree
n')

#plt.plot(X,lin_reg_3.predict(poly_reg3.fit_transform(X)),color='green')
plt.title('Polynomial Linear Regression Degree 3')
plt.xlabel('Position Level')
plt.ylabel('Salary')
plt.show()

--------------------------------------------------

KNN

import math

def classifyAPoint(points,p,k=3):
'''
This function finds the classification of p using
k nearest neighbor algorithm. It assumes only two
groups and returns 0 if p belongs to group 0, else
1 (belongs to group 1).

Parameters -
points: Dictionary of training points having two keys - 0 and 1
Each key have a list of training data points belong to that

p : A tuple, test data point of the form (x,y)

k : number of nearest neighbour to consider, default is 3

'''

distance=[]
for group in points:
for feature in points[group]:

#calculate the euclidean distance of p from training points

euclidean_distance = math.sqrt((feature[0]-p[0])**2 +(feature[1]-
p[1])**2)

# Add a tuple of form (distance,group) in the distance list

distance.append((euclidean_distance,group))

# sort the distance list in ascending order

# and select first k distances
distance = sorted(distance)[:k]

freq1 = 0 #frequency of group 0

freq2 = 0 #frequency og group 1

for d in distance:
if d[1] == 0:
freq1 += 1
elif d[1] == 1:
freq2 += 1

return 0 if freq1>freq2 else 1

# driver function
def main():

# Dictionary of training points having two keys - 0 and 1

# key 0 have points belong to class 0
# key 1 have points belong to class 1

points = {0:[(1,12),(2,5),(3,6),(3,10),(3.5,8),(2,11),(2,9),(1,7)],
1:[(5,3),(3,2),(1.5,9),(7,2),(6,1),(3.8,1),(5.6,4),(4,2),(2,5)]}

# testing point p(x,y)

p = (2.5,7)

# Number of neighbours
k = 3

print("The value classified to unknown point is: {}".\

format(classifyAPoint(points,p,k)))

if __name__ == '__main__':
main()

--------------------------------------------------------------

ELASTIC NET REGRESSION

# Importing libraries

import numpy as np

import pandas as pd
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

# Elastic Net Regression

class ElasticRegression() :

def init( self, learning_rate, iterations, l1_penality, l2_penality ) :

self.learning_rate = learning_rate

self.iterations = iterations

self.l1_penality = l1_penality

self.l2_penality = l2_penality

# Function for model training

def fit( self, X, Y ) :

# no_of_training_examples, no_of_features

self.m, self.n = X.shape

# weight initialization

self.W = np.zeros( self.n )

self.b = 0

self.X = X

self.Y = Y

# gradient descent learning

for i in range( self.iterations ) :

self.update_weights()

return self

# Helper function to update weights in gradient descent

def update_weights( self ) :

Y_pred = self.predict( self.X )

# calculate gradients

dW = np.zeros( self.n )

for j in range( self.n ) :

if self.W[j] > 0 :

dW[j] = ( - ( 2 * ( self.X[:,j] ).dot( self.Y - Y_pred ) )

self.l1_penality + 2 * self.l2_penality *
self.W[j] ) / self.m

else :

dW[j] = ( - ( 2 * ( self.X[:,j] ).dot( self.Y - Y_pred ) )

- self.l1_penality + 2 * self.l2_penality *
self.W[j] ) / self.m

db = - 2 * np.sum( self.Y - Y_pred ) / self.m

# update weights

self.W = self.W - self.learning_rate * dW

self.b = self.b - self.learning_rate * db

return self

# Hypothetical function h( x )

def predict( self, X ) :

return X.dot( self.W ) + self.b

# Driver Code

def main() :

# Importing dataset

df = pd.read_csv( "salary_data.csv" )

X = df.iloc[:,:-1].values

Y = df.iloc[:,1].values

# Splitting dataset into train and test set

X_train, X_test, Y_train, Y_test = train_test_split( X, Y,

test_size = 1/3,
random_state = 0 )

# Model training

model = ElasticRegression( iterations = 1000,

learning_rate = 0.01, l1_penality = 500, l2_penality

= 1 )

model.fit( X_train, Y_train )

# Prediction on test set

Y_pred = model.predict( X_test )

print( "Predicted values ", np.round( Y_pred[:3], 2 ) )

print( "Real values ", Y_test[:3] )

print( "Trained W ", round( model.W[0], 2 ) )

print( "Trained b ", round( model.b, 2 ) )

# Visualization on test set

plt.scatter( X_test, Y_test, color = 'blue' )

plt.plot( X_test, Y_pred, color = 'orange' )

plt.title( 'Salary vs Experience' )

plt.xlabel( 'Years of Experience' )

plt.ylabel( 'Salary' )

plt.show()

if __name__ == "__main__" :

main()

----------------------------------------------------------------------------------
SVM

# Load the important packages

from sklearn.datasets import load_breast_cancer
import matplotlib.pyplot as plt
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.svm import SVC

# Load the datasets

cancer = load_breast_cancer()
X = cancer.data[:, :2]
y = cancer.target

#Build the model

svm = SVC(kernel="rbf", gamma=0.5, C=1.0)
# Trained the model
svm.fit(X, y)

# Plot Decision Boundary

DecisionBoundaryDisplay.from_estimator(
svm,
X,
response_method="predict",
cmap=plt.cm.Spectral,
alpha=0.8,
xlabel=cancer.feature_names[0],
ylabel=cancer.feature_names[1],
)

# Scatter plot
plt.scatter(X[:, 0], X[:, 1],
c=y,
s=20, edgecolors="k")
plt.show()

---------------------------------------------------------------------------

SVM------2

# importing scikit learn with make_blobs

from sklearn.datasets import make_blobs

# creating datasets X containing n_samples

# Y containing two classes
X, Y = make_blobs(n_samples=500, centers=2,
random_state=0, cluster_std=0.40)
import matplotlib.pyplot as plt
# plotting scatters
plt.scatter(X[:, 0], X[:, 1], c=Y, s=50, cmap='spring');
plt.show()

# creating linspace between -1 to 3.5

xfit = np.linspace(-1, 3.5)

# plotting scatter
plt.scatter(X[:, 0], X[:, 1], c=Y, s=50, cmap='spring')

# plot a line between the different sets of data

for m, b, d in [(1, 0.65, 0.33), (0.5, 1.6, 0.55), (-0.2, 2.9, 0.2)]:
yfit = m * xfit + b
plt.plot(xfit, yfit, '-k')
plt.fill_between(xfit, yfit - d, yfit + d, edgecolor='none',
color='#AAAAAA', alpha=0.4)

plt.xlim(-1, 3.5);
plt.show()
# importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# reading csv file and extracting class column to y.

x = pd.read_csv("C:\...\cancer.csv")
a = np.array(x)
y = a[:,30] # classes having 0 and 1

# extracting two features

x = np.column_stack((x.malignant,x.benign))

# 569 samples and 2 features

x.shape

print (x),(y)

# import support vector classifier

# "Support Vector Classifier"
from sklearn.svm import SVC
clf = SVC(kernel='linear')

# fitting x samples and y classes

clf.fit(x, y)

clf.predict([[120, 990]])

clf.predict([[85, 550]])

--------------------------------------------------------------------------

DECISION TREE

# Importing the required packages

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt

# Function to import the dataset

def importdata():
balance_data = pd.read_csv(
'https://archive.ics.uci.edu/ml/machine-learning-' +
'databases/balance-scale/balance-scale.data',
sep=',', header=None)

# Displaying dataset information

print("Dataset Length: ", len(balance_data))
print("Dataset Shape: ", balance_data.shape)
print("Dataset: ", balance_data.head())

return balance_data

# Function to split the dataset into features and target variables

def splitdataset(balance_data):

# Separating the target variable

X = balance_data.values[:, 1:5]
Y = balance_data.values[:, 0]

# Splitting the dataset into train and test

X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size=0.3, random_state=100)

return X, Y, X_train, X_test, y_train, y_test

def train_using_gini(X_train, X_test, y_train):

# Creating the classifier object

clf_gini = DecisionTreeClassifier(criterion="gini",
random_state=100,
max_depth=3, min_samples_leaf=5)

# Performing training
clf_gini.fit(X_train, y_train)
return clf_gini

def tarin_using_entropy(X_train, X_test, y_train):

# Decision tree with entropy

clf_entropy = DecisionTreeClassifier(
criterion="entropy", random_state=100,
max_depth=3, min_samples_leaf=5)

# Performing training
clf_entropy.fit(X_train, y_train)
return clf_entropy

# Function to make predictions

def prediction(X_test, clf_object):
y_pred = clf_object.predict(X_test)
print("Predicted values:")
print(y_pred)
return y_pred

# Placeholder function for cal_accuracy

def cal_accuracy(y_test, y_pred):
print("Confusion Matrix: ",
confusion_matrix(y_test, y_pred))
print("Accuracy : ",
accuracy_score(y_test, y_pred)*100)
print("Report : ",
classification_report(y_test, y_pred))

# Function to plot the decision tree

def plot_decision_tree(clf_object, feature_names, class_names):
plt.figure(figsize=(15, 10))
plot_tree(clf_object, filled=True, feature_names=feature_names,
class_names=class_names, rounded=True)
plt.show()

if __name__ == "__main__":
data = importdata()
X, Y, X_train, X_test, y_train, y_test = splitdataset(data)

clf_gini = train_using_gini(X_train, X_test, y_train)

clf_entropy = train_using_entropy(X_train, X_test, y_train)

# Visualizing the Decision Trees

plot_decision_tree(clf_gini, ['X1', 'X2', 'X3', 'X4'], ['L', 'B', 'R'])
plot_decision_tree(clf_entropy, ['X1', 'X2', 'X3', 'X4'], ['L', 'B', 'R'])

# Operational Phase
print("Results Using Gini Index:")
y_pred_gini = prediction(X_test, clf_gini)
cal_accuracy(y_test, y_pred_gini)
print("Results Using Entropy:")
y_pred_entropy = prediction(X_test, clf_entropy)
cal_accuracy(y_test, y_pred_entropy)

-----------------------------------------------------------------------------

CART ( CLASSIFICATION AND REGRESSION )

REGRESSION DECISION TREE

from sklearn.tree import DecisionTreeClassifier

from sklearn.preprocessing import LabelEncoder

# Define the features and target variable

features = [
["red", "large"],
["green", "small"],
["red", "small"],
["yellow", "large"],
["green", "large"],
["orange", "large"],
]
target_variable = ["apple", "lime", "strawberry", "banana", "grape", "orange"]

# Flatten the features list for encoding

flattened_features = [item for sublist in features for item in sublist]

# Use a single LabelEncoder for all features and target variable

le = LabelEncoder()
le.fit(flattened_features + target_variable)

# Encode features and target variable

encoded_features = [le.transform(item) for item in features]
encoded_target = le.transform(target_variable)

# Create a CART classifier

clf = DecisionTreeClassifier()
# Train the classifier on the training set
clf.fit(encoded_features, encoded_target)

# Predict the fruit type for a new instance

new_instance = ["red", "large"]
encoded_new_instance = le.transform(new_instance)
predicted_fruit_type = clf.predict([encoded_new_instance])
decoded_predicted_fruit_type = le.inverse_transform(predicted_fruit_type)
print("Predicted fruit type:", decoded_predicted_fruit_type[0])

---------------------------------
Decision Tree Regression USING SKLEARN

# import numpy package for arrays and stuff

import numpy as np

# import matplotlib.pyplot for plotting our result

import matplotlib.pyplot as plt

# import pandas for importing csv files

import pandas as pd

# import dataset
# dataset = pd.read_csv('Data.csv')
# alternatively open up .csv file to read data

dataset = np.array(
[['Asset Flip', 100, 1000],
['Text Based', 500, 3000],
['Visual Novel', 1500, 5000],
['2D Pixel Art', 3500, 8000],
['2D Vector Art', 5000, 6500],
['Strategy', 6000, 7000],
['First Person Shooter', 8000, 15000],
['Simulator', 9500, 20000],
['Racing', 12000, 21000],
['RPG', 14000, 25000],
['Sandbox', 15500, 27000],
['Open-World', 16500, 30000],
['MMOFPS', 25000, 52000],
['MMORPG', 30000, 80000]
])

# print the dataset

print(dataset)
# select all rows by : and column 1
# by 1:2 representing features
X = dataset[:, 1:2].astype(int)

# print X
print(X)

# select all rows by : and column 2

# by 2 to Y representing labels
y = dataset[:, 2].astype(int)

# print y
print(y)

# import the regressor

from sklearn.tree import DecisionTreeRegressor

# create a regressor object

regressor = DecisionTreeRegressor(random_state = 0)

# fit the regressor with X and Y data

regressor.fit(X, y)

# predicting a new value

# test the output by changing values, like 3750

y_pred = regressor.predict([[3750]])

# print the predicted price

print("Predicted price: % d\n"% y_pred)

# arange for creating a range of values

# from min value of X to max value of X
# with a difference of 0.01 between two
# consecutive values
X_grid = np.arange(min(X), max(X), 0.01)
# reshape for reshaping the data into
# a len(X_grid)*1 array, i.e. to make
# a column out of the X_grid values
X_grid = X_grid.reshape((len(X_grid), 1))

# scatter plot for original data

plt.scatter(X, y, color = 'red')

# plot predicted data

plt.plot(X_grid, regressor.predict(X_grid), color = 'blue')

# specify title
plt.title('Profit to Production Cost (Decision Tree Regression)')

# specify X axis label

plt.xlabel('Production Cost')

# specify Y axis label

plt.ylabel('Profit')

# show the plot

plt.show()

# import export_graphviz
from sklearn.tree import export_graphviz

# export the decision tree to a tree.dot file

# for visualizing the plot easily anywhere
export_graphviz(regressor, out_file ='tree.dot',
feature_names =['Production Cost'])

------------------------------------------------------------------------------

K-MEAN CLUSTERING

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
X,y = make_blobs(n_samples = 500,n_features = 2,centers = 3,random_state = 23)

fig = plt.figure(0)
plt.grid(True)
plt.scatter(X[:,0],X[:,1])
plt.show()

k = 3

clusters = {}
np.random.seed(23)

for idx in range(k):

center = 2*(2*np.random.random((X.shape[1],))-1)
points = []
cluster = {
'center' : center,
'points' : []
}

clusters[idx] = cluster

clusters

plt.scatter(X[:,0],X[:,1])
plt.grid(True)
for i in clusters:
center = clusters[i]['center']
plt.scatter(center[0],center[1],marker = '*',c = 'red')
plt.show()

def distance(p1,p2):
return np.sqrt(np.sum((p1-p2)**2))
#Implementing E step
def assign_clusters(X, clusters):
for idx in range(X.shape[0]):
dist = []

curr_x = X[idx]

for i in range(k):
dis = distance(curr_x,clusters[i]['center'])
dist.append(dis)
curr_cluster = np.argmin(dist)
clusters[curr_cluster]['points'].append(curr_x)
return clusters

#Implementing the M-Step

def update_clusters(X, clusters):
for i in range(k):
points = np.array(clusters[i]['points'])
if points.shape[0] > 0:
new_center = points.mean(axis =0)
clusters[i]['center'] = new_center

clusters[i]['points'] = []
return clusters

def pred_cluster(X, clusters):

pred = []
for i in range(X.shape[0]):
dist = []
for j in range(k):
dist.append(distance(X[i],clusters[j]['center']))
pred.append(np.argmin(dist))
return pred

clusters = assign_clusters(X,clusters)
clusters = update_clusters(X,clusters)
pred = pred_cluster(X,clusters)

plt.scatter(X[:,0],X[:,1],c = pred)
for i in clusters:
center = clusters[i]['center']
plt.scatter(center[0],center[1],marker = '^',c = 'red')
plt.show()

-----------------------------------

EXAMPLE TWO K-MEAN CLUSTERING

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans

X, y = load_iris(return_X_y=True)

#Find optimum number of cluster

sse = [] #SUM OF SQUARED ERROR
for k in range(1,11):
km = KMeans(n_clusters=k, random_state=2)
km.fit(X)
sse.append(km.inertia_)

sns.set_style("whitegrid")
g=sns.lineplot(x=range(1,11), y=sse)

g.set(xlabel ="Number of cluster (k)",

ylabel = "Sum Squared Error",
title ='Elbow Method')

plt.show()

kmeans = KMeans(n_clusters = 3, random_state = 2)

kmeans.fit(X)
kmeans.cluster_centers_

pred = kmeans.fit_predict(X)
pred

# PLOT the cluster center with data points

plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.scatter(X[:,0],X[:,1],c = pred, cmap=cm.Accent)
plt.grid(True)
for center in kmeans.cluster_centers_:
center = center[:2]
plt.scatter(center[0],center[1],marker = '^',c = 'red')
plt.xlabel("petal length (cm)")
plt.ylabel("petal width (cm)")

plt.subplot(1,2,2)
plt.scatter(X[:,2],X[:,3],c = pred, cmap=cm.Accent)
plt.grid(True)
for center in kmeans.cluster_centers_:
center = center[2:4]
plt.scatter(center[0],center[1],marker = '^',c = 'red')
plt.xlabel("sepal length (cm)")
plt.ylabel("sepal width (cm)")
plt.show()

-------------------------------------------------------------------
Customer Segmentation using Unsupervised Machine Learning in Python
by k mean clustering

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb

from sklearn.preprocessing import StandardScaler, LabelEncoder

from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv('new.csv')
df.head()

df.shape

df.info()

df.describe().T

df['Accepted'] = df['Accepted'].str.replace('Accepted', '')

for col in df.columns:

temp = df[col].isnull().sum()
if temp > 0:
print(f'Column {col} contains {temp} null values.')

df = df.dropna()
print("Total missing values are:", len(df))
df.nunique()

parts = df["Dt_Customer"].str.split("-", n=3, expand=True)

df["day"] = parts[0].astype('int')
df["month"] = parts[1].astype('int')
df["year"] = parts[2].astype('int')

df.drop(['Z_CostContact', 'Z_Revenue', 'Dt_Customer'],

axis=1,
inplace=True)

floats, objects = [], []

for col in df.columns:
if df[col].dtype == object:
objects.append(col)
elif df[col].dtype == float:
floats.append(col)

print(objects)
print(floats)

plt.subplots(figsize=(15, 10))
for i, col in enumerate(objects):
plt.subplot(2, 2, i + 1)
sb.countplot(df[col])
plt.show()
df['Marital_Status'].value_counts()

for col in df.columns:

if df[col].dtype == object:
le = LabelEncoder()
df[col] = le.fit_transform(df[col])

scaler = StandardScaler()
data = scaler.fit_transform(df)

from sklearn.manifold import TSNE

model = TSNE(n_components=2, random_state=0)
tsne_data = model.fit_transform(df)
plt.figure(figsize=(7, 7))
plt.scatter(tsne_data[:, 0], tsne_data[:, 1])
plt.show()

error = []
for n_clusters in range(1, 21):
model = KMeans(init='k-means++',
n_clusters=n_clusters,
max_iter=500,
random_state=22)
model.fit(df)
error.append(model.inertia_)
plt.figure(figsize=(10, 5))
sb.lineplot(x=range(1, 21), y=error)
sb.scatterplot(x=range(1, 21), y=error)
plt.show()

# create clustering model with optimal k=5

model = KMeans(init='k-means++',
n_clusters=5,
max_iter=500,
random_state=22)
segments = model.fit_predict(df)

plt.figure(figsize=(7, 7))
sb.scatterplot(tsne_data[:, 0], tsne_data[:, 1], hue=segments)
plt.show()

-------------------------------------------------------------

Mini-Batch Gradient Descent:

# importing dependencies
import numpy as np
import matplotlib.pyplot as plt

# creating data
mean = np.array([5.0, 6.0])
cov = np.array([[1.0, 0.95], [0.95, 1.2]])
data = np.random.multivariate_normal(mean, cov, 8000)

# visualising data
plt.scatter(data[:500, 0], data[:500, 1], marker='.')
plt.show()

# train-test-split
data = np.hstack((np.ones((data.shape[0], 1)), data))

split_factor = 0.90
split = int(split_factor * data.shape[0])

X_train = data[:split, :-1]

y_train = data[:split, -1].reshape((-1, 1))
X_test = data[split:, :-1]
y_test = data[split:, -1].reshape((-1, 1))

print(& quot
Number of examples in training set= % d & quot
% (X_train.shape[0]))
print(& quot
Number of examples in testing set= % d & quot
% (X_test.shape[0]))

# linear regression using "mini-batch" gradient descent

# function to compute hypothesis / predictions

def hypothesis(X, theta):

return np.dot(X, theta)

# function to compute gradient of error function w.r.t. theta

def gradient(X, y, theta):

h = hypothesis(X, theta)
grad = np.dot(X.transpose(), (h - y))
return grad

# function to compute the error for current values of theta

def cost(X, y, theta):

h = hypothesis(X, theta)
J = np.dot((h - y).transpose(), (h - y))
J /= 2
return J[0]

# function to create a list containing mini-batches

def create_mini_batches(X, y, batch_size):

mini_batches = []
data = np.hstack((X, y))
np.random.shuffle(data)
n_minibatches = data.shape[0] // batch_size
i = 0

for i in range(n_minibatches + 1):

mini_batch = data[i * batch_size:(i + 1)*batch_size, :]
X_mini = mini_batch[:, :-1]
Y_mini = mini_batch[:, -1].reshape((-1, 1))
mini_batches.append((X_mini, Y_mini))
if data.shape[0] % batch_size != 0:
mini_batch = data[i * batch_size:data.shape[0]]
X_mini = mini_batch[:, :-1]
Y_mini = mini_batch[:, -1].reshape((-1, 1))
mini_batches.append((X_mini, Y_mini))
return mini_batches

# function to perform mini-batch gradient descent

def gradientDescent(X, y, learning_rate=0.001, batch_size=32):

theta = np.zeros((X.shape[1], 1))
error_list = []
max_iters = 3
for itr in range(max_iters):
mini_batches = create_mini_batches(X, y, batch_size)
for mini_batch in mini_batches:
X_mini, y_mini = mini_batch
theta = theta - learning_rate * gradient(X_mini, y_mini, theta)
error_list.append(cost(X_mini, y_mini, theta))

return theta, error_list

theta, error_list = gradientDescent(X_train, y_train)

print("Bias = ", theta[0])
print("Coefficients = ", theta[1:])

# visualising gradient descent

plt.plot(error_list)
plt.xlabel("Number of iterations")
plt.ylabel("Cost")
plt.show()
# predicting output for X_test
y_pred = hypothesis(X_test, theta)
plt.scatter(X_test[:, 1], y_test[:, ], marker='.')
plt.plot(X_test[:, 1], y_pred, color='orange')
plt.show()

# calculating error in predictions

error = np.sum(np.abs(y_test - y_pred) / y_test.shape[0])
print(& quot
Mean absolute error = & quot
, error)

------------------------------------------

# -- coding: utf-8 --

"""Untitled15.ipynb

Automatically generated by Colaboratory.

Original file is located at

https://colab.research.google.com/drive/1j5YYWDWSSosYhtqstL5P8DuC7RrkpHwx
"""

import pandas as pd
import numpy as np

# Generating synthetic data of 50000 points between value1 and 100

value1 = 1 # lower bound
data = value1 + np.random.rand(50000, 2) * (100 - value1) # assuming 2 features,
modify as needed

# Creating a DataFrame
df = pd.DataFrame(data, columns=['Feature1', 'Feature2'])

# Save to CSV
df.to_csv('synthetic_data.csv', index=False)

# Read CSV file

df = pd.read_csv('synthetic_data.csv')

# Display the first few rows of the dataset

print(df.head())
# You can explore the data further using df.describe(), df.info(), etc.

import numpy as np

def kmeans_algorithm(data, k, max_iters=100, tolerance=1e-4):

# Randomly initialize centroids
centroids = data[np.random.choice(data.shape[0], k, replace=False)]

for _ in range(max_iters):
# Assign each data point to the closest centroid
distances = np.linalg.norm(data - centroids[:, np.newaxis], axis=2)
labels = np.argmin(distances, axis=0)

# Update centroids
new_centroids = np.array([data[labels == j].mean(axis=0) for j in
range(k)])

# Check for convergence

if np.linalg.norm(new_centroids - centroids) < tolerance:
break

centroids = new_centroids

return labels, centroids

import numpy as np
import matplotlib.pyplot as plt

def find_optimal_k(data, max_k=20):

inertia = []

for k in range(1, max_k + 1):

labels, centroids = kmeans_algorithm(data, k)
inertia.append(np.sum(np.min(np.linalg.norm(data - centroids[:,
np.newaxis], axis=2), axis=0)))

# Plotting the Elbow Method graph

plt.plot(range(1, max_k + 1), inertia, marker='o')
plt.title('Elbow Method for Optimal K')
plt.xlabel('Number of Clusters (K)')
plt.ylabel('Inertia')
plt.show()

# Analyzing the Elbow Method graph to find the optimal K

diff = np.diff(inertia, 2)
k_optimal = np.argmax(diff) + 1

return k_optimal

# Assuming df is a DataFrame with columns 'Feature1' and 'Feature2'

data = df[['Feature1', 'Feature2']].values

# Find the optimal K

optimal_k = find_optimal_k(data)

print(f'The optimal value of K is: {optimal_k}')

# Apply K-Means clustering

labels, centroids = kmeans_algorithm(data, optimal_k)

# Plotting the clustered data

for cluster in range(optimal_k):
cluster_points = data[labels == cluster]
plt.scatter(cluster_points[:, 0], cluster_points[:, 1])

plt.title('K-Means Clustering')
plt.xlabel('Feature1')
plt.ylabel('Feature2')
plt.show()

1. Manual Function
def load_csv(filepath):
data = []
col = []
checkcol = False
with open(filepath) as f:
for val in f.readlines():
val = val.replace("\n","")
val = val.split(',')
if checkcol is False:
col = val
checkcol = True
else:
data.append(val)
df = pd.DataFrame(data=data, columns=col)
return df
2. Numpy.loadtxt function
df = np.loadtxt('convertcsv.csv', delimeter = ',')
print(df[:5,:])
3. Numpy.genfromtxt()
data = np.genfromtxt('100 Sales Records.csv', delimiter=',')
>>> pd.DataFrame(data)
4. Pandas.read_csv()
>>> pdDf = pd.read_csv('100 Sales Record.csv')
>>> pdDf.head()
5. Pickle
with open('test.pkl','wb') as f:
pickle.dump(pdDf, f)
-----------------------------------------------------------------------------------
-----------------------------------------------------------------------------------
--
WEEK-2: Data preprocessing

1. Handling missing values

 isnull()
 notnull()
 dropna()
 fillna()
 replace()
 interpolate()
# importing pandas as pd
import pandas as pd
# importing numpy as np
import numpy as np
# dictionary of lists
dict = {'First Score':[100, 90, np.nan, 95],
'Second Score': [30, 45, 56, np.nan],
'Third Score':[np.nan, 40, 80, 98]}
# creating a dataframe from list
df = pd.DataFrame(dict)
# using isnull() function
df.isnull()
# importing pandas package
import pandas as pd
# making data frame from csv file
data = pd.read_csv("employees.csv")

# creating bool series True for NaN values

bool_series = pd.isnull(data["Gender"])
# filtering data
# displaying data only with Gender = NaN
data[bool_series]
# importing pandas as pd
import pandas as pd
# importing numpy as np
import numpy as np
# dictionary of lists
dict = {'First Score':[100, 90, np.nan, 95],
'Second Score': [30, 45, 56, np.nan],
'Third Score':[np.nan, 40, 80, 98]}
# creating a dataframe using dictionary
df = pd.DataFrame(dict)
# using notnull() function
df.notnull()
# importing pandas package
import pandas as pd
# making data frame from csv file
data = pd.read_csv("employees.csv")
# creating bool series True for NaN values
bool_series = pd.notnull(data["Gender"])

# filtering data
# displaying data only with Gender = Not NaN
data[bool_series]
# importing pandas as pd
import pandas as pd
# importing numpy as np
import numpy as np
# dictionary of lists
dict = {'First Score':[100, 90, np.nan, 95],
'Second Score': [30, 45, 56, np.nan],
'Third Score':[np.nan, 40, 80, 98]}
# creating a dataframe from dictionary
df = pd.DataFrame(dict)
# filling missing value using fillna()
df.fillna(0)
# importing pandas as pd
import pandas as pd
# importing numpy as np
import numpy as np
# dictionary of lists
dict = {'First Score':[100, 90, np.nan, 95],
'Second Score': [30, 45, 56, np.nan],
'Third Score':[np.nan, 40, 80, 98]}

# creating a dataframe from dictionary

df = pd.DataFrame(dict)
# filling a missing value with
# previous ones
df.fillna(method ='pad')
# importing pandas as pd
import pandas as pd
# importing numpy as np
import numpy as np
# dictionary of lists
dict = {'First Score':[100, 90, np.nan, 95],
'Second Score': [30, 45, 56, np.nan],
'Third Score':[np.nan, 40, 80, 98]}
# creating a dataframe from dictionary
df = pd.DataFrame(dict)
# filling null value using fillna() function
df.fillna(method ='bfill')
---------------------------------------------------------------------------------
---------------------------------------------------------------------------------

WEEK-3: Dimensionality Reduction

1. Implementation of PCA

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
#import the breast _cancer dataset
from sklearn.datasets import load_breast_cancer
data=load_breast_cancer()
data.keys()
# Check the output classes
print(data['target_names'])
# Check the input attributes
print(data['feature_names'])
# construct a dataframe using pandas
df1=pd.DataFrame(data['data'],columns=data['feature_names'])
# Scale data before applying PCA
scaling=StandardScaler()
# Use fit and transform method
scaling.fit(df1)
Scaled_data=scaling.transform(df1)
# Set the n_components=3
principal=PCA(n_components=3)
principal.fit(Scaled_data)
x=principal.transform(Scaled_data)

# Check the dimensions of data after PCA

print(x.shape)
# Check the values of eigen vectors
# prodeced by principal components
principal.components_
plt.figure(figsize=(10,10))
plt.scatter(x[:,0],x[:,1],c=data['target'],cmap='plasma')
plt.xlabel('pc1')
plt.ylabel('pc2')
# import relevant libraries for 3d graph
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(10,10))
# choose projection 3d for creating a 3d graph
axis = fig.add_subplot(111, projection='3d')
# x[:,0]is pc1,x[:,1] is pc2 while x[:,2] is pc3
axis.scatter(x[:,0],x[:,1],x[:,2], c=data['target'],cmap='plasma')
axis.set_xlabel("PC1", fontsize=10)
axis.set_ylabel("PC2", fontsize=10)
axis.set_zlabel("PC3", fontsize=10)

-----------------------------------------------------------------
week -4 Write a python program to demonstrate various data visualisation

# importing pandas package

import pandas as pd
# making data frame from csv file
data = pd.read_csv("employees.csv")
# Printing the first 10 to 24 rows of
# the data frame for visualization
data[10:25]
# importing pandas package
import pandas as pd
# making data frame from csv file
data = pd.read_csv("employees.csv")
# Printing the first 10 to 24 rows of
# the data frame for visualization
data[10:25]
# importing pandas package
import pandas as pd
# making data frame from csv file
data = pd.read_csv("employees.csv")
# will replace Nan value in dataframe with value -99
data.replace(to_replace = np.nan, value = -99)
# importing pandas as pd
import pandas as pd
# Creating the dataframe

df = pd.DataFrame({"A":[12, 4, 5, None, 1],

"B":[None, 2, 54, 3, None],
"C":[20, 16, None, 3, 8],
"D":[14, 3, None, None, 6]})
# Print the dataframe
Df
# importing the required module
import matplotlib.pyplot as plt
# x axis values
x = [1,2,3]
# corresponding y axis values
y = [2,4,1]
# plotting the points
plt.plot(x, y)
# naming the x axis
plt.xlabel('x - axis')
# naming the y axis
plt.ylabel('y - axis')
# giving a title to my graph
plt.title('My first graph!')
# function to show the plot
plt.show()

return probabilities
def predict(info, test):
probabilities = calculateClassProbabilities(info, test)
bestLabel, bestProb = None, -1
for classValue, probability in probabilities.items():
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classValue
return bestLabel
def getPredictions(info, test):
predictions = []
for i in range(len(test)):
result = predict(info, test[i])
predictions.append(result)
return predictions
def accuracy_rate(test, predictions):
correct = 0
for i in range(len(test)):
if test[i][-1] == predictions[i]:
correct += 1
return (correct / float(len(test))) * 100.0
filename = r'E:\user\MACHINE LEARNING\machine learning algos\Naive bayes\
filedata.csv'
mydata = csv.reader(open(filename, "rt"))
mydata = list(mydata)
mydata = encode_class(mydata)
for i in range(len(mydata)):
mydata[i] = [float(x) for x in mydata[i]]
ratio = 0.7

train_data, test_data = splitting(mydata, ratio)

print('Total number of examples are: ', len(mydata))
print('Out of these, training examples are: ', len(train_data))
print("Test examples are: ", len(test_data))
info = MeanAndStdDevForClass(train_data)
predictions = getPredictions(info, test_data)
accuracy = accuracy_rate(test_data, predictions)
print("Accuracy of your model is: ", accuracy)
1. Implementation of SVM Classification
# importing scikit learn with make_blobs
from sklearn.datasets.samples_generator import make_blobs
# creating datasets X containing n_samples
# Y containing two classes
X, Y = make_blobs(n_samples=500, centers=2,random_state=0, cluster_std=0.40)
import matplotlib.pyplot as plt
# plotting scatters
plt.scatter(X[:, 0], X[:, 1], c=Y, s=50, cmap='spring');
plt.show()
# creating linspace between -1 to 3.5
xfit = np.linspace(-1, 3.5)
# plotting scatter
plt.scatter(X[:, 0], X[:, 1], c=Y, s=50, cmap='spring')
# plot a line between the different sets of data
for m, b, d in [(1, 0.65, 0.33), (0.5, 1.6, 0.55), (-0.2, 2.9, 0.2)]:
yfit = m * xfit + b
plt.plot(xfit, yfit, '-k')
plt.fill_between(xfit, yfit - d, yfit + d, edgecolor='none',
color='#AAAAAA', alpha=0.4)
plt.xlim(-1, 3.5);
plt.show()

# importing required libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
x = pd.read_csv("C:\...\cancer.csv")
a = np.array(x)
y = a[:,30] # classes having 0 and 1
x = np.column_stack((x.malignant,x.benign))
x.shape
print (x),(y)

----------------------------------------------------------------------------------

WEEK-5: Supervised Learning

1. Implementation of Linear Regression

import numpy as np
import matplotlib.pyplot as plt
def estimate_coef(x, y):
n = np.size(x)
m_x = np.mean(x)
m_y = np.mean(y)
SS_xy = np.sum(y*x) - n*m_y*m_x
SS_xx = np.sum(x*x) - n*m_x*m_x
b_1 = SS_xy / SS_xx
b_0 = m_y - b_1*m_x
return (b_0, b_1)
def plot_regression_line(x, y, b):
# plotting the actual points as scatter plot
plt.scatter(x, y, color = "m",marker = "o", s = 30)
y_pred = b[0] + b[1]*x
plt.plot(x, y_pred, color = "g")
plt.xlabel('x')
plt.ylabel('y')
plt.show()
def main():
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
b = estimate_coef(x, y)
print("Estimated coefficients:\nb_0 = {}\\nb_1 = {}".format(b[0], b[1]))
plot_regression_line(x, y, b)
if __name__ == "__main__":

-----------------------------------------------------------------------------------
--------------------------------------------------------------------------

WEEK-6 : Implementation of Logistic regression

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings( "ignore" )
class LogitRegression() :
def __init ( self, learning_rate, iterations ) :
self.learning_rate = learning_rate
self.iterations = iterations
def fit( self, X, Y ) :
self.m, self.n = X.shape
self.W = np.zeros( self.n )
self.b = 0
self.X = X
self.Y = Y
for i in range( self.iterations ) :
self.update_weights()
return self
def update_weights( self ) :
A = 1 / ( 1 + np.exp( - ( self.X.dot( self.W ) + self.b ) ) )
tmp = ( A - self.Y.T )
tmp = np.reshape( tmp, self.m )
dW = np.dot( self.X.T, tmp ) / self.m
db = np.sum( tmp ) / self.m
self.W = self.W - self.learning_rate * dW
self.b = self.b - self.learning_rate * db
return self
def predict( self, X ) :
Z = 1 / ( 1 + np.exp( - ( X.dot( self.W ) + self.b ) ) )
Y = np.where( Z > 0.5, 1, 0 )
return Y
def main() :
df = pd.read_csv( "diabetes.csv" )
X = df.iloc[:,:-1].values
Y = df.iloc[:,-1:].values
X_train, X_test, Y_train, Y_test = train_test_split(
X, Y, test_size = 1/3, random_state = 0 )
model = LogitRegression( learning_rate = 0.01, iterations = 1000 )
model.fit( X_train, Y_train )
model1 = LogisticRegression()
model1.fit( X_train, Y_train)
Y_pred = model.predict( X_test )
Y_pred1 = model1.predict( X_test )
correctly_classified = 0
correctly_classified1 = 0
count = 0
for count in range( np.size( Y_pred ) ) :
if Y_test[count] == Y_pred[count] :
correctly_classified = correctly_classified + 1
if Y_test[count] == Y_pred1[count] :
correctly_classified1 = correctly_classified1 + 1
count = count + 1
print( "Accuracy on test set by our model : ", (
correctly_classified / count ) * 100 )
print( "Accuracy on test set by sklearn model : ", (
correctly_classified1 / count ) * 100 )
if __name__ == "__main__" :
main()
# importing pandas package
import pandas as pd

# making data frame from csv file

data = pd.read_csv("employees.csv")
# Printing the first 10 to 24 rows of
# the data frame for visualization
data[10:25]

-----------------------------------------------------------------------------------
-----------------------------------------------------------------------------------
--

WEEK-7: Supervised Learning

1. Implementation of Decision tree classification

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
def importdata():
balance_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-
'+'databases/balance-scale/balance-scale.data',sep= ',', header = None)
print ("Dataset Length: ", len(balance_data))
print ("Dataset Shape: ", balance_data.shape)
print ("Dataset: ",balance_data.head())
return balance_data
def splitdataset(balance_data):
X = balance_data.values[:, 1:5]
Y = balance_data.values[:, 0]
X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size = 0.3, random_state = 100)
return X, Y, X_train, X_test, y_train, y_test
def train_using_gini(X_train, X_test, y_train):
clf_gini = DecisionTreeClassifier(criterion = "gini",random_state =
100,max_depth=3,
min_samples_leaf=5)
clf_gini.fit(X_train, y_train)
return clf_gini
def tarin_using_entropy(X_train, X_test, y_train):
clf_entropy = DecisionTreeClassifier(criterion = "entropy", random_state =
100,max_depth = 3,
min_samples_leaf = 5)
clf_entropy.fit(X_train, y_train)

return clf_entropy
def prediction(X_test, clf_object):
y_pred = clf_object.predict(X_test)
print("Predicted values:")
print(y_pred)
return y_pred
def cal_accuracy(y_test, y_pred):
print("Confusion Matrix: ",confusion_matrix(y_test, y_pred))print ("Accuracy :
",accuracy_score(y_test,y_pred)*100)
print("Report : ",
classification_report(y_test, y_pred))
def main():
data = importdata()
X, Y, X_train, X_test, y_train, y_test = splitdataset(data)
clf_gini = train_using_gini(X_train, X_test, y_train)
clf_entropy = tarin_using_entropy(X_train, X_test, y_train)
print("Results Using Gini Index:")
y_pred_gini = prediction(X_test, clf_gini)
cal_accuracy(y_test, y_pred_gini)
print("Results Using Entropy:")
y_pred_entropy = prediction(X_test, clf_entropy)
cal_accuracy(y_test, y_pred_entropy)
if __name__=="__main__":
main()
1. Implementation of K-nearest Neighbor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import numpy as np
import matplotlib.pyplot as plt
y = irisData.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2,

random_state=42)neighbors
= np.arange(1, 9)
train_accuracy = np.empty(len(neighbors))
test_accuracy = np.empty(len(neighbors))
for i, k in enumerate(neighbors):
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
train_accuracy[i] = knn.score(X_train, y_train)
test_accuracy[i] = knn.score(X_test, y_test)
plt.plot(neighbors, test_accuracy, label = 'Testing dataset Accuracy')
plt.plot(neighbors, train_accuracy, label = 'Training dataset Accuracy')
plt.legend()
plt.xlabel('n_neighbors')
plt.ylabel('Accuracy')
plt.show()

-----------------------------------------------------------------------------------
------------------------------------------

WEEK-8
Implementation of Naïve Bayes classifier algorithm

import math
import random
import csv
def encode_class(mydata):
classes = []
for i in range(len(mydata)):
if mydata[i][-1] not in classes:
classes.append(mydata[i][-1])
for i in range(len(classes)):
for j in range(len(mydata)):
if mydata[j][-1] == classes[i]:
mydata[j][-1] = i
return mydata
def splitting(mydata, ratio):
train_num = int(len(mydata) * ratio)
train = []
test = list(mydata)
while len(train) < train_num:
index = random.randrange(len(test))
train.append(test.pop(index))
return train, test
def groupUnderClass(mydata):
dict = {}
for i in range(len(mydata)):
if (mydata[i][-1] not in dict):
dict[mydata[i][-1]] = []
dict[mydata[i][-1]].append(mydata[i])
return dict

return sum(numbers) / float(len(numbers))

def std_dev(numbers):
avg = mean(numbers)
variance = sum([pow(x - avg, 2) for x in numbers]) / float(len(numbers) - 1)
return math.sqrt(variance)
def MeanAndStdDev(mydata):
info = [(mean(attribute), std_dev(attribute)) for attribute in zip(*mydata)]
del info[-1]
return info
def MeanAndStdDevForClass(mydata):
info = {}
dict = groupUnderClass(mydata)
for classValue, instances in dict.items():
info[classValue] = MeanAndStdDev(instances)
return info
def calculateGaussianProbability(x, mean, stdev):
expo = math.exp(-(math.pow(x - mean, 2) / (2 * math.pow(stdev, 2))))
return (1 / (math.sqrt(2 * math.pi) * stdev)) * expo
def calculateClassProbabilities(info, test):
probabilities = {}
for classValue, classSummaries in info.items():
probabilities[classValue] = 1
for i in range(len(classSummaries)):
mean, std_dev = classSummaries[i]
x = test[i]
probabilities[classValue] *= calculateGaussianProbability(x, mean, std_dev)

-----------------------------------------------------------------------------------
-----------------------------------------------------------------------------------
--
Week-9: Implementation of K-nearest Neighbor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import numpy as np
import matplotlib.pyplot as plt
y = irisData.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2,
random_state=42)
neighbors = np.arange(1, 9)
train_accuracy = np.empty(len(neighbors))
test_accuracy = np.empty(len(neighbors))
for i, k in enumerate(neighbors):
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
train_accuracy[i] = knn.score(X_train, y_train)
test_accuracy[i] = knn.score(X_test, y_test)
plt.plot(neighbors, test_accuracy, label = 'Testing dataset Accuracy')
plt.plot(neighbors, train_accuracy, label = 'Training dataset Accuracy')
plt.legend()
plt.xlabel('n_neighbors')
plt.ylabel('Accuracy')
plt.show()

-----------------------------------------------------------------------------------
-----------------------------------------------------------------------------------
-------

WEEK-10: Build Artificial Neural Network model with back propagation

Let’s first understand the term neural networks. In a neural network, where neurons
are
fed inputs which then neurons consider the weighted sum over them and pass it by an
activation function and passes out the output to next neuron.

Python: To run our script

Pip: Necessary to install Python packages
pip install tensorflow
pip install keras
# Importing libraries
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence# Our dictionary will contain
only of the top 7000 words appearing most frequently
top_words = 7000# Now we split our data-set into training and test data
(X_train, y_train), (X_test, y_test) =
imdb.load_data(num_words=top_words)# Looking at the nature of training
data
print(X_train[0])
print(y_train[0])print('Shape of training data: ')
print(X_train.shape)
print(y_train.shape)print('Shape of test data: ')
print(X_test.shape)
print(y_test.shape)

# Padding the data samples to a maximum review length in words

max_words = 450X_train = sequence.pad_sequences(X_train,
maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)# Building the
CNN Model
model = Sequential() # initilaizing the Sequential nature for CNN
model# Adding the embedding layer which will take in maximum of 450
words as input and provide a 32 dimensional output of those words which
belong in the top_words dictionary

model.add(Embedding(top_words, 32, input_length=max_words))

model.add(Conv1D(32, 3, padding='same', activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(250, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam',
metrics=['accuracy'])
model.summary()

-----------------------------------------------------------------------------------
----------------------------------------------------------------------------
WEEK-11
Implementing Random Forest
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
data = pd.read_csv('Salaries.csv')
print(data)
# Fitting Random Forest Regression to the dataset
# import the regressor
from sklearn.ensemble import RandomForestRegressor
# create regressor object
regressor = RandomForestRegressor(n_estimators = 100, random_state = 0)
# fit the regressor with x and y data
regressor.fit(x, y)
Y_pred = regressor.predict(np.array([6.5]).reshape(1, 1)) # test the output by
changing values
# Visualising the Random Forest Regression results
# arrange for creating a range of values
# from min value of x to max
# value of x with a difference of 0.01
# between two consecutive values
X_grid = np.arrange(min(x), max(x), 0.01)
# reshape for reshaping the data into a len(X_grid)*1 array,
# i.e. to make a column out of the X_grid value
X_grid = X_grid.reshape((len(X_grid), 1))
# Scatter plot for original data
plt.scatter(x, y, color = 'blue')
# plot predicted data
plt.plot(X_grid, regressor.predict(X_grid),color = 'green')
plt.title('Random Forest Regression')
plt.xlabel('Position level') plt.ylabel('Salary')

------------------------------------------------------

WEEK-11(B) : Model Selection, Bagging and Boosting

1. Cross Validation
# This code may not be run on GFG IDE
# as required packages are not found.
# importing cross-validation from sklearn package.from sklearn import
cross_validation
# value of K is 10.
data = cross_validation.KFold(len(train_set), n_folds=10, indices=False)
2. Implementing AdaBoost
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
import warnings
warnings.filterwarnings("ignore")
# Reading the dataset from the csv file
# separator is a vertical line, as seen in the dataset
data = pd.read_csv("Iris.csv")
# Printing the shape of the dataset
print(data.shape)
data = data.drop('Id',axis=1)
X = data.iloc[:,:-1]
y = data.iloc[:,-1]
print("Shape of X is %s and shape of y is %s"%(X.shape,y.shape))
total_classes = y.nunique()
print("Number of unique species in dataset are: ",total_classes)
distribution = y.value_counts()
print(distribution)
X_train,X_val,Y_train,Y_val = train_test_split(X,y,test_size=0.25,random_state=28)
print("The accuracy of the model on validation set is",
adb_model.score(X_val,Y_val))

def ReadData(fileName):
# Read the file, splitting by lines
f = open(fileName, 'r');
lines = f.read().splitlines();
f.close();
items = [];
for i in range(1, len(lines)):
line = lines[i].split(',');
itemFeatures = [];
for j in range(len(line)-1):
# Convert feature value to float
v = float(line[j]);
# Add feature value to dict
itemFeatures.append(v);
items.append(itemFeatures);
shuffle(items);
return items;
def FindColMinMax(items):n
= len(items[0]);
minima = [sys.maxint for i in range(n)];
maxima = [-sys.maxint -1 for i in range(n)];
for item in items:
for f in range(len(item)):
if (item[f] < minima[f]):
minima[f] = item[f];
if (item[f] > maxima[f]):
maxima[f] = item[f];
return minima,maxima;

def InitializeMeans(items, k, cMin, cMax):

# Initialize means to random numbers between
# the min and max of each column/feature
f = len(items[0]); # number of features
means = [[0 for i in range(f)] for j in range(k)];
for mean in means:
for i in range(len(mean)):
# Set value to a random float
# (adding +-1 to avoid a wide placement of a mean)
mean[i] = uniform(cMin[i]+1, cMax[i]-1);
return means;
def EuclideanDistance(x, y):
S = 0; # The sum of the squared differences of the elements
for i in range(len(x)):
S += math.pow(x[i]-y[i], 2)
#The square root of the sum
return math.sqrt(S)
def UpdateMean(n,mean,item):
for i in range(len(mean)):
m = mean[i];
m = (m*(n-1)+item[i])/float(n);
mean[i] = round(m, 3);
return mean;
def Classify(means,item):
# Classify item to the mean with minimum distance
minimum = sys.maxint;
index = -1;
for i in range(len(means)):
# Find distance from item to mean

dis = EuclideanDistance(item, means[i]);

if (dis < minimum):
minimum = dis;
index = i;
return index;
def CalculateMeans(k,items,maxIterations=100000):
# Find the minima and maxima for columns
cMin, cMax = FindColMinMax(items);
# Initialize means at random points
means = InitializeMeans(items,k,cMin,cMax);
# Initialize clusters, the array to hold
# the number of items in a class
clusterSizes= [0 for i in range(len(means))];
# An array to hold the cluster an item is in
belongsTo = [0 for i in range(len(items))];
# Calculate means
for e in range(maxIterations):
# If no change of cluster occurs, halt
noChange = True;
for i in range(len(items)):
item = items[i];
# Classify item into a cluster and update the
# corresponding means.
index = Classify(means,item);
clusterSizes[index] += 1;
cSize = clusterSizes[index];
means[index] = UpdateMean(cSize,means[index],item);
# Item changed cluster
if(index != belongsTo[i]):
noChange = False;
belongsTo[i] = index;

# Nothing changed, return

if (noChange):
break;
return means;
def FindClusters(means,items):
clusters = [[] for i in range(len(means))]; # Init clusters
for item in items:
# Classify item into a cluster
index = Classify(means,item);
# Add item to cluster
clusters[index].append(item);
return clusters;

-----------------------------------------------------------------------------------
------------------

hypothesis

import math
import csv
def load_csv(filename):
lines=csv.reader(open(filename,"r"));
dataset = list(lines)
headers = dataset.pop(0)
return dataset,headers
class Node:
def init (self,attribute):
self.attribute=attribute
self.children=[]
self.answer=""
def subtables(data,col,delete):
dic={}
coldata=[row[col] for row in data]
attr=list(set(coldata))
counts=[0]*len(attr)
r=len(data)
c=len(data[0])
for x in range(len(attr)):
for y in range(r):
if data[y][col]==attr[x]:
counts[x]+=1
for x in range(len(attr)):
dic[attr[x]]=[[0 for i in range(c)] for j in
range(counts[x])]
pos=0
for y in range(r):
if data[y][col]==attr[x]:
if delete:
del data[y][col]
dic[attr[x]][pos]=data[y]
pos+=1
return attr,dic
def entropy(S):
attr=list(set(S))
if len(attr)==1:
return 0
counts=[0,0]
for i in range(2):
counts[i]=sum([1 for x in S if attr[i]==x])/(len(S)*1.0)
sums=0
for cnt in counts:
sums+=-1*cnt*math.log(cnt,2)
return sums
def compute_gain(data,col):
attr,dic = subtables(data,col,delete=False)
total_size=len(data)
entropies=[0]*len(attr)
ratio=[0]*len(attr)
total_entropy=entropy([row[-1] for row in data])
for x in range(len(attr)):
ratio[x]=len(dic[attr[x]])/(total_size*1.0)
entropies[x]=entropy([row[-1] for row in
dic[attr[x]]])
total_entropy-=ratio[x]*entropies[x]
return total_entropy
def build_tree(data,features):
lastcol=[row[-1] for row in data]
if(len(set(lastcol)))==1:
node=Node("")
node.answer=lastcol[0]
return node
n=len(data[0])-1
gains=[0]*n
for col in range(n):
gains[col]=compute_gain(data,col)
split=gains.index(max(gains))
node=Node(features[split])
fea = features[:split]+features[split+1:]
attr,dic=subtables(data,split,delete=True)
for x in range(len(attr)):
child=build_tree(dic[attr[x]],fea)
node.children.append((attr[x],child))
return node
def print_tree(node,level):
if node.answer!="":
print(" "*level,node.answer)
return
print(" "*level,node.attribute)
for value,n in node.children:
print(" "*(level+1),value)
print_tree(n,level+2)
def classify(node,x_test,features):
if node.answer!="":
print(node.answer)
return
pos=features.index(node.attribute)
for value, n in node.children:
if x_test[pos]==value:
classify(n,x_test,features)
'''Main program'''
dataset,features=load_csv("data3.csv")
node1=build_tree(dataset,features)
print("The decision tree for the dataset using ID3 algorithm
is")
print_tree(node1,0)
testdata,features=load_csv("data3_test.csv")
for xtest in testdata:
print("The test instance:",xtest)
print("The label for test instance:",end=" ")
classify(node1,xtest,features)

-----------------------------------------------------------------------

import pandas as pd
msg=pd.read_csv('naivetext.csv',names=['message','label'])
print('The dimensions of the dataset',msg.shape)
msg['labelnum']=msg.label.map({'pos':1,'neg':0})
X=msg.message
y=msg.labelnum
print(X)
print(y)
#splitting the dataset into train and test data
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(X,y)
print ('\n The total number of Training Data :',ytrain.shape)
print ('\n The total number of Test Data :',ytest.shape)
#output of count vectoriser is a sparse matrix
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm=count_vect.transform(xtest)
print('\n The words or Tokens in the text documents \n')
print(count_vect.get_feature_names())
df=pd.DataFrame(xtrain_dtm.toarray(),columns=count_vect.get_fe
ature_names())
# Training Naive Bayes (NB) classifier on training data.
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(xtrain_dtm,ytrain)
predicted = clf.predict(xtest_dtm)
#printing accuracy, Confusion matrix, Precision and Recall
from sklearn import metrics
print('\n Accuracy of the classifer is’,
metrics.accuracy_score(ytest,predicted))
print('\n Confusion matrix')
print(metrics.confusion_matrix(ytest,predicted))
print('\n The value of Precision' ,
metrics.precision_score(ytest,predicted))
print('\n The value of Recall' ,
metrics.recall_score(ytest,predicted))

-----------------------------------------------------------------------------------
---------------

10. Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same
data setfor
clustering using k-Means algorithm. Compare the results of these two algorithms and
comment on the quality of clustering. You can add Python ML library classes/APIin
the
program.
SOURCE CODE:
import matplotlib.pyplot as plt from sklearn
import datasets
from sklearn.cluster import KMeans import
sklearn.metrics as sm
import pandas as pd import numpy as np
iris = datasets.load_iris()
X = pd.DataFrame(iris.data)
X.columns =
['Sepal_Length','Sepal_Width','Petal_Length',
'Petal_Width']
y = pd.DataFrame(iris.target) y.columns =
['Targets']
model = KMeans(n_clusters=3) model.fit(X)
plt.figure(figsize=(14,7))
colormap = np.array(['red', 'lime', 'black'])
# Plot the Original Classifications
plt.subplot(1, 2, 1)
plt.scatter(X.Petal_Length, X.Petal_Width,
c=colormap[y.Targets], s=40) plt.title('Real
Classification')
plt.xlabel('Petal Length') plt.ylabel('Petal
Width')
# Plot the Models Classifications
plt.subplot(1, 2, 2)
plt.scatter(X.Petal_Length, X.Petal_Width,
c=colormap[model.labels_], s=40) plt.title('K
Mean Classification')
plt.xlabel('Petal Length') plt.ylabel('Petal
Width')
print('The accuracy score of K-Mean:
',sm.accuracy_score(y, model.labels_))
print('The Confusion matrixof K-Mean:
',sm.confusion_matrix(y, model.labels_))
from sklearn import preprocessing scaler =
preprocessing.StandardScaler() scaler.fit(X)
xsa = scaler.transform(X)
xs = pd.DataFrame(xsa, columns = X.columns)
#xs.sample(5)
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(n_components=3)
gmm.fit(xs)
y_gmm = gmm.predict(xs) #y_cluster_gmm
plt.subplot(2, 2, 3)
plt.scatter(X.Petal_Length, X.Petal_Width,
c=colormap[y_gmm], s=40) plt.title('GMM
Classification')
plt.xlabel('Petal Length') plt.ylabel('Petal
Width')
print('The accuracy score of EM:
',sm.accuracy_score(y, y_gmm)) print('The
Confusion matrix of EM:
',sm.confusion_matrix(y, y_gmm))

-----------------------------------------------------------------------------------
----------
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import datasets
iris=datasets.load_iris()
x = iris.data
y = iris.target
print ('sepal-length', 'sepal-width', 'petal-length', 'petal-width')
print(x)
print('class: 0-Iris-Setosa, 1- Iris-Versicolour, 2- Iris-Virginica')
print(y)
x_train, x_test, y_train, y_test =
train_test_split(x,y,test_size=0.3)
#To Training the model and Nearest nighbors K=5
classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit(x_train, y_train)
#to make predictions on our test data
y_pred=classifier.predict(x_test)
print('Confusion Matrix')
print(confusion_matrix(y_test,y_pred))
print('Accuracy Metrics')
print(classification_report(y_test,y_pred))

--------------------------------------------------------------

obtain the line of regression of the datasets

import numpy as np
import pandas as pd
import numpy as genfromtxt
from sklearn import linear_model
from sklearn.model_selection import train_test_split,cross_val_score
from matplotlib import pyplot as plt
from matplotlib import colors
df=pd.read_csv("data_for_regression_coefficient.csv")

df.head(10)

x=df['x'].values.reshape(df['x'].count(),1)
y=df['y'].values.reshape(df['y'].count(),1)
print(x.shape)
print(y.shape)

X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.33,random_state=9892
)

simp_reg=linear_model.LinearRegression(fit_intercept=True) #create a scikit linear

regression model object
simp_reg.fit(x,y) #train the model on the training dataset

linear_model

print('coefficient : \n'+ str(simp_reg.coef_))

print('Intercept:\n' +str(simp_reg.intercept_))
#the mean squared error
print('Mean Squared Error: '+str(np.mean((simp_reg.predict(X_test)-y_test)**2)))
print('Variance: '+str(simp_reg.score(X_test,y_test)))

plt.plot(x, y, 'ro') # scatter plot showing actual data

plt.plot(x, y) # regression line
plt.title('Actual vs Predicted')
plt.xlabel('X')
plt.ylabel('y')
plt.show()

-------------------------
question to find the line of regression and estimate the height of the son when the
height of the father is 164.

data=pd.read_csv('heights_of_father_and_son.csv')

x=data['Height of fathers'].values.reshape(data['Height of fathers'].count(),1)

y=data['Height of sons'].values.reshape(data['Height of sons'].count(),1)
print(x.shape)
print(y.shape)

X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.33,random_state=9892
)

simp_reg=linear_model.LinearRegression(fit_intercept=True) #create a scikit linear

regression model object
simp_reg.fit(x,y) #train the model on the training dataset

print('coefficient : \n'+ str(simp_reg.coef_))

y=0.61023622*164+66.11417323

------------------------------------------------------------------------

pca

import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Generate some example data

np.random.seed(42)
data = np.random.rand(100, 2) # 100 samples, 2 features

# Apply PCA
pca = PCA(n_components=2)
pca_result = pca.fit_transform(data)

# Explained variance ratio

explained_variance_ratio = pca.explained_variance_ratio_
print("Explained Variance Ratio:", explained_variance_ratio)

# Plot the original data

plt.scatter(data[:, 0], data[:, 1], alpha=0.7, label='Original Data')

# Plot the transformed data after PCA

plt.scatter(pca_result[:, 0], pca_result[:, 1], alpha=0.7, label='PCA Result')
# Set labels and title
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('PCA Example')
# Add legend
plt.legend()

# Show the plot

plt.show()

import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Assuming you have a dataset X, replace the following line with your data
# For example, X = np.random.rand(100, 3) # 100 samples, 3 features

# Create a sample dataset

np.random.seed(42)
X = np.random.rand(100, 2) # 100 samples, 2 features

# Step 1: Standardize the data

scaler = StandardScaler()
X_std = scaler.fit_transform(X)

# Step 2: Compute the covariance matrix

cov_matrix = np.cov(X_std, rowvar=False)

# Step 3: Compute eigenvectors and eigenvalues

eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Step 4: Sort eigenvalues and corresponding eigenvectors

sorted_indices = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[sorted_indices]
eigenvectors = eigenvectors[:, sorted_indices]

# Step 5: Choose the number of components

# For example, let's keep the first two principal components
num_components = 2
selected_components = eigenvectors[:, :num_components]

# Step 6: Project the data onto the selected components

X_pca = np.dot(X_std, selected_components)

# Plot the original data and the data after PCA

plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.scatter(X_std[:, 0], X_std[:, 1])
plt.title('Original Data')
plt.xlabel('Standardized Feature 1')
plt.ylabel('Standardized Feature 2')

#plt.subplot(1, 2, 2)

import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.datasets import fetch_openml
import pandas as pd

# Load the MNIST dataset (you can replace this with your own image dataset)
mnist = fetch_openml('mnist_784')
images = mnist.data.astype(float)
labels = mnist.target.astype(int)

# Standardize the pixel values

images /= 255.0

# Reshape images to 28x28 (assuming MNIST-like images)

images = images.Value.reshape(-1, 28, 28)

# Flatten each image into a 1D array

flattened_images = images.reshape(images.shape[0], -1)

# Convert to DataFrame
df = pd.DataFrame(flattened_images)

# Apply PCA
n_components = 50 # You can adjust this number based on your requirements
pca = PCA(n_components=n_components)
flattened_images_pca = pca.fit_transform(df.values)
# Visualize original and PCA-transformed images
fig, axes = plt.subplots(2, 10, figsize=(10, 2))

for i in range(10):
axes[0, i].imshow(flattened_images[i].reshape(28, 28), cmap='gray')
axes[0, i].axis('off')
axes[1, i].imshow(pca.inverse_transform(flattened_images_pca[i]).reshape(28,
28), cmap='gray')
axes[1, i].axis('off')

axes[0, 0].set_title('Original Images')

axes[1, 0].set_title('PCA Transformed Images')

plt.show()

Data Structure & Algorithms Lab Manual V1.2-1
No ratings yet
Data Structure & Algorithms Lab Manual V1.2-1
97 pages
Datascience With Python
No ratings yet
Datascience With Python
178 pages
Data Analytics Essentials Online Course
No ratings yet
Data Analytics Essentials Online Course
15 pages
Cybersecurity Internship Report
No ratings yet
Cybersecurity Internship Report
55 pages
Python Notes by Prof T
No ratings yet
Python Notes by Prof T
10 pages
Chapter 4 - Linear Regression
100% (2)
Chapter 4 - Linear Regression
25 pages
Artificial Intelligence - (Unit - 1)
No ratings yet
Artificial Intelligence - (Unit - 1)
47 pages
Record Ip Mithun
No ratings yet
Record Ip Mithun
25 pages
Final 1
No ratings yet
Final 1
38 pages
Intro To Scientific Python (2018-01-23) PDF
No ratings yet
Intro To Scientific Python (2018-01-23) PDF
16 pages
PPT
No ratings yet
PPT
21 pages
ML Lab Manual AIML Final
No ratings yet
ML Lab Manual AIML Final
61 pages
Computer Programming Language: Introduction To PYTHON
No ratings yet
Computer Programming Language: Introduction To PYTHON
11 pages
Installing GSAS-II - Overview - GSAS-II Web Documentation 1.0 Documentation
No ratings yet
Installing GSAS-II - Overview - GSAS-II Web Documentation 1.0 Documentation
5 pages
Python Programs All Manual
No ratings yet
Python Programs All Manual
17 pages
Guide Step by Step
No ratings yet
Guide Step by Step
61 pages
Exercise2 Solution
No ratings yet
Exercise2 Solution
15 pages
Khwaja Moinuddin Chishti Language University
No ratings yet
Khwaja Moinuddin Chishti Language University
30 pages
Dictionaries: A Mapping Type, Arrays and Modular Programming
No ratings yet
Dictionaries: A Mapping Type, Arrays and Modular Programming
47 pages
MST Lab Manual (R20)
No ratings yet
MST Lab Manual (R20)
75 pages
FDS Aim Algorithm
No ratings yet
FDS Aim Algorithm
18 pages
Documentation MINI
No ratings yet
Documentation MINI
48 pages
Is5312 Week10-V2
No ratings yet
Is5312 Week10-V2
51 pages
Python 101: Understanding The Nuts and Bolts of Python
No ratings yet
Python 101: Understanding The Nuts and Bolts of Python
46 pages
Python Programming Language
No ratings yet
Python Programming Language
17 pages
Rasterio: Presenters: Sushma Ghimire (13) Ashmin Sharma Pokharel (19) Asim Shrestha
No ratings yet
Rasterio: Presenters: Sushma Ghimire (13) Ashmin Sharma Pokharel (19) Asim Shrestha
22 pages
I C 152 Lab Assignment 8
No ratings yet
I C 152 Lab Assignment 8
10 pages
Toxic Comments Classification
No ratings yet
Toxic Comments Classification
10 pages
ICT Assignment 2
No ratings yet
ICT Assignment 2
7 pages
7 Data Transformation - Jupyter Notebook
No ratings yet
7 Data Transformation - Jupyter Notebook
3 pages
Machine Learnin
100% (2)
Machine Learnin
23 pages
Data Scientist Resume Template
No ratings yet
Data Scientist Resume Template
1 page
Lab 2 SVM
No ratings yet
Lab 2 SVM
23 pages
Practicalpgm ML
No ratings yet
Practicalpgm ML
33 pages
ML All Projectpdf Removed
No ratings yet
ML All Projectpdf Removed
41 pages
Udacity Machine Learning Analysis Supervised Learning
100% (1)
Udacity Machine Learning Analysis Supervised Learning
504 pages
Import Pandas As PD
No ratings yet
Import Pandas As PD
3 pages
ML Journal External
No ratings yet
ML Journal External
14 pages
Python For Data Science IA 1 Programs
No ratings yet
Python For Data Science IA 1 Programs
14 pages
1
No ratings yet
1
13 pages
CP4252 Lab Manual
No ratings yet
CP4252 Lab Manual
13 pages
Assignment 2
No ratings yet
Assignment 2
3 pages
Atul MLT Exp 4-11
No ratings yet
Atul MLT Exp 4-11
17 pages
Python For Data Science IA 1 Programs
No ratings yet
Python For Data Science IA 1 Programs
14 pages
Assignment 1
No ratings yet
Assignment 1
5 pages
Machine Learning LAB
No ratings yet
Machine Learning LAB
20 pages
Regression Model
No ratings yet
Regression Model
6 pages
Ai Lab
No ratings yet
Ai Lab
19 pages
Aiml Practicals
No ratings yet
Aiml Practicals
22 pages
Ai Practicle
No ratings yet
Ai Practicle
8 pages
Mlda - Lab
No ratings yet
Mlda - Lab
35 pages
Btech1007022 Lab5
No ratings yet
Btech1007022 Lab5
14 pages
Machine Learning
No ratings yet
Machine Learning
10 pages
Programs Lab Bca
No ratings yet
Programs Lab Bca
16 pages
Classification Review
No ratings yet
Classification Review
8 pages
Data Analytics
No ratings yet
Data Analytics
10 pages
Ai Int-1
No ratings yet
Ai Int-1
6 pages
Linear Reg 33
No ratings yet
Linear Reg 33
3 pages
1st PGM
No ratings yet
1st PGM
10 pages
Ex No 11
No ratings yet
Ex No 11
4 pages
Btech1007022 Lab5.1
No ratings yet
Btech1007022 Lab5.1
9 pages
Assignment 1: Q1. Task Description
No ratings yet
Assignment 1: Q1. Task Description
12 pages
Linear Regression Program
No ratings yet
Linear Regression Program
2 pages
ML Lab Manual
No ratings yet
ML Lab Manual
12 pages
Aiml Lab
No ratings yet
Aiml Lab
14 pages
LAB-4 Report
No ratings yet
LAB-4 Report
21 pages
Data Mining Practicals
No ratings yet
Data Mining Practicals
22 pages
ANN PR Code and Output
No ratings yet
ANN PR Code and Output
25 pages
ML Lab Prgms Split
No ratings yet
ML Lab Prgms Split
3 pages
ML Record Print
No ratings yet
ML Record Print
20 pages
Expt 1
No ratings yet
Expt 1
6 pages
Machine File
No ratings yet
Machine File
27 pages
Supervised Learning For Data Science...
No ratings yet
Supervised Learning For Data Science...
14 pages
Logistic Regression
No ratings yet
Logistic Regression
3 pages
Unit2 ML Programs
No ratings yet
Unit2 ML Programs
7 pages
Ai Last 5
No ratings yet
Ai Last 5
4 pages
Aiml Ex 4-7
No ratings yet
Aiml Ex 4-7
8 pages
Soft Sensor Code
No ratings yet
Soft Sensor Code
4 pages
Soft Sensor Code
No ratings yet
Soft Sensor Code
4 pages
Linearregression SVM
No ratings yet
Linearregression SVM
3 pages
Báo Cáo Java 4
No ratings yet
Báo Cáo Java 4
3 pages
16BCB0126 VL2018195002535 Pe003
No ratings yet
16BCB0126 VL2018195002535 Pe003
40 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
23 pages
Ridge - Lasso - Regression (1) .Ipynb - Colaboratory
No ratings yet
Ridge - Lasso - Regression (1) .Ipynb - Colaboratory
4 pages
ML Lab
No ratings yet
ML Lab
7 pages
EE 559 HW2Code PDF
No ratings yet
EE 559 HW2Code PDF
7 pages
Linear Regression Program Python
No ratings yet
Linear Regression Program Python
2 pages
Assignment #1: K Nearest Neighbor Classifier: Name: Srikanth Mujjiga (Roll No: 2015-50-831
No ratings yet
Assignment #1: K Nearest Neighbor Classifier: Name: Srikanth Mujjiga (Roll No: 2015-50-831
8 pages
Machine Learning Lab: Raheel Aslam (74-FET/BSEE/F16)
No ratings yet
Machine Learning Lab: Raheel Aslam (74-FET/BSEE/F16)
4 pages
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet

Naive Bayes

Uploaded by

Naive Bayes

Uploaded by

NAIVE BAYES

# load the iris dataset

# store the feature matrix (X) and response vector (y)

# splitting X and y into training and testing sets

# training the model on training set

# making predictions on the testing set

def estimate_coef(x, y):

# mean of x and y vector

# calculating cross-deviation and deviation about x

# calculating regression coefficients

return (b_0, b_1)

def plot_regression_line(x, y, b):

# predicted response vector

# plotting the regression line

multiple linear regression

# variance score: 1 means perfect prediction

# setting plot style

# plotting residual errors in training data

# plotting residual errors in test data

# plotting line for zero residual error

# method call for showing the plot

from sklearn.preprocessing import LabelEncoder

from sklearn.linear_model import LinearRegression

from sklearn.preprocessing import PolynomialFeatures

p : A tuple, test data point of the form (x,y)

k : number of nearest neighbour to consider, default is 3

#calculate the euclidean distance of p from training points

# Add a tuple of form (distance,group) in the distance list

# sort the distance list in ascending order

freq1 = 0 #frequency of group 0

return 0 if freq1>freq2 else 1

# Dictionary of training points having two keys - 0 and 1

# testing point p(x,y)

print("The value classified to unknown point is: {}".\

ELASTIC NET REGRESSION

import matplotlib.pyplot as plt

# Elastic Net Regression

def __init__( self, learning_rate, iterations, l1_penality, l2_penality ) :

# Function for model training

def fit( self, X, Y ) :

self.m, self.n = X.shape

self.W = np.zeros( self.n )

# gradient descent learning

for i in range( self.iterations ) :

# Helper function to update weights in gradient descent

def update_weights( self ) :

Y_pred = self.predict( self.X )

for j in range( self.n ) :

dW[j] = ( - ( 2 * ( self.X[:,j] ).dot( self.Y - Y_pred ) )

dW[j] = ( - ( 2 * ( self.X[:,j] ).dot( self.Y - Y_pred ) )

db = - 2 * np.sum( self.Y - Y_pred ) / self.m

self.W = self.W - self.learning_rate * dW

self.b = self.b - self.learning_rate * db

def predict( self, X ) :

return X.dot( self.W ) + self.b

# Splitting dataset into train and test set

X_train, X_test, Y_train, Y_test = train_test_split( X, Y,

model = ElasticRegression( iterations = 1000,

learning_rate = 0.01, l1_penality = 500, l2_penality

model.fit( X_train, Y_train )

# Prediction on test set

print( "Predicted values ", np.round( Y_pred[:3], 2 ) )

print( "Real values ", Y_test[:3] )

print( "Trained W ", round( model.W[0], 2 ) )

print( "Trained b ", round( model.b, 2 ) )

# Visualization on test set

plt.scatter( X_test, Y_test, color = 'blue' )

plt.plot( X_test, Y_pred, color = 'orange' )

plt.title( 'Salary vs Experience' )

plt.xlabel( 'Years of Experience' )

# Load the important packages

# Load the datasets

#Build the model

# Plot Decision Boundary

# importing scikit learn with make_blobs

# creating datasets X containing n_samples

def init( self, learning_rate, iterations, l1_penality, l2_penality ) :