0% found this document useful (0 votes)
19 views25 pages

ML File External File

THIS IS MACHINE LEARNING EXTERNAL FILE FOR EXTERNAL

Uploaded by

imemyselfff01
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
19 views25 pages

ML File External File

THIS IS MACHINE LEARNING EXTERNAL FILE FOR EXTERNAL

Uploaded by

imemyselfff01
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 25

Index

S.no. Title Date Sign


1 Extract the data from the database using
Python
2 Write a program to Result Prediction
3 Write a program to Heart Disease predictor
4 Write a program to Computer accuracy of
classifier.
5 Write a program to Diabetes Prediction Model
6 Build an Artificial Neural Network (ANN) by
implementing the Back propagation algorithm
and test the same using appropriate data sets.
7 Apply k-Means algorithm k-Means algorithm
to cluster a set of data stored in a .CSV file.
Use the same data set for clustering using the
k- Means algorithm. Compare the results of
these two algorithms and comment on the
quality of clustering. You can add Python ML
library classes in the program.
8 Write a program to implement Self-
Organizing Map (SOM).
9 Write a program for empirical comparison of
different supervised
10 Write a program for empirical comparison of
different unsupervised learning algorithms.
Program – 1

Q1. Extract the data from the database using Python.

Code:

# Python code to extract data from MySQL database.


import mysql. connector as a
mycon=a.connect(host="localhost",user="root",passwd="1234",database="12a")
cursor=mycon.cursor()
cursor.execute("select * from student")
data=cursor.fetchall()
for i in data:
print(i)

Output:
Program – 2

Q2. Write a program to Result Prediction.

Code:

# Result Prediction Model using Logistic Regression


import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
data = pd.read_csv("marks.csv")
print(data.head())

data['RESULT'].fillna(0, inplace=True) # Filling NaN with 0 for simplicity


X = data.drop(columns='RESULT', axis=1)
Y = data['RESULT']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y,
random_state=2)
model = LogisticRegression()
model.fit(X_train, Y_train)

X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print("Accuracy on Training data:", training_data_accuracy)

X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print("Accuracy on Test data:", test_data_accuracy)
input_data = np.array([90, 50]).reshape(1, -1)

prediction = model.predict(input_data)
print("Prediction for example input data:", prediction[0])

input_data = np.array([70, 25]).reshape(1, -1)


prediction = model.predict(input_data)
print("Prediction for additional input data:", prediction[0])
if prediction[0] == 0:
print("FAIL")
else:
print("PASS")
Output:
Program – 3

Q3. Write a program to Heart Disease predictor.

Code:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
data=pd.read_csv("heart_data.csv")
data.head()

print(data)
data.isnull().sum()
data['HeartDisease'].value_counts()
X=data.drop(columns='HeartDisease',axis=1)
Y=data['HeartDisease']
print(X)
print(Y)

X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,stratify=Y,random_state=2)
print(X.shape,X_train.shape,X_test.shape)
model=LogisticRegression()
model.fit(X_train,Y_train)

X_train_prediction=model.predict(X_train)
training_data_accuracy=accuracy_score(X_train_prediction,Y_train)
print("Accuracy on Training data",training_data_accuracy)

X_test_prediction=model.predict(X_test)
test_data_accuracy=accuracy_score(X_test_prediction,Y_test)
print("Accuracy on Test data:",test_data_accuracy)

input_data=(74,0,2,20,264,0,2,121,1,0.2,1,1,3)
input_data_as_numpy_array=np.asarray(input_data)

input_data_reshaped=input_data_as_numpy_array.reshape(1,-1)
prediction=model.predict(input_data_reshaped)
print(prediction)
if(prediction[0]==0):
print("The person does not have Heart Disease")
else:
print("The person has Heart Disease")
Output:
Program – 4

Q4. Write a program to Computer accuracy of classifier.

Code:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_iris

# Load the Iris dataset


iris = load_iris()

# Create a DataFrame using Pandas


iris_df = pd.DataFrame(data=np.c_[iris['data'], iris['target']], columns=iris['feature_names'] +
['target'])

# Save the DataFrame to a CSV file


iris_df.to_csv('iris_dataset.csv', index=False)

# Load the Iris dataset from the saved file


loaded_df = pd.read_csv('iris_dataset.csv')
X_loaded = loaded_df.drop('target', axis=1).values
y_loaded = loaded_df['target'].values

# Split the loaded dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X_loaded, y_loaded, test_size=0.2,
random_state=42)

# Create a Random Forest Classifier


clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier on the training set


clf.fit(X_train, y_train)

# Make predictions on the test set


y_pred = clf.predict(X_test)

# Evaluate the classifier


accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

Output:
Program – 5

Q5. Write a program to Diabetes Prediction Model.

Code:

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

data=pd.read_csv('diabetes2.csv')
print(data)

X=data.drop(columns='Report',axis=1)
Y=data['Report']

scaler=StandardScaler()
scaler.fit(X)
StandardScaler(copy=True,with_mean=True,with_std=True)
standardized_data=scaler.transform(X)
print(standardized_data)

X=standardized_data
Y=data['Report']

X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,stratify=Y,random_state=2)

X_test_prediction=classifier.predict(X_test)
test_data_accuracy=accuracy_score(X_test_prediction,Y_test)
print('Accuracy score of the test data:',test_data_accuracy)

input_data=(85,60,23,8,30.1,0.163,57)
input_data_as_numpy_array = np.asarray(input_data)
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
std_data = scaler.transform(input_data_reshaped)
print(std_data)

prediction=classifier.predict(std_data)
print(prediction)
if(prediction[0]==0):
print("The person is not diabetic")
else:
print("The person is diabetic")

Output:
Program – 6

Q6 Build an Artificial Neural Network (ANN) by implementing the Back propagation


algorithm and test the same using appropriate data sets.

Code:
import numpy as np

class NeuralNetwork:

def init (self, input_size, hidden_size, output_size, learning_rate=0.01):

self.input_size = input_size

self.hidden_size = hidden_size

self.output_size = output_size

self.learning_rate = learning_rate

# Initialize weights and biases

self.weights_input_hidden = np.random.rand(self.input_size, self.hidden_size)

self.bias_hidden = np.zeros((1, self.hidden_size))

self.weights_hidden_output = np.random.rand(self.hidden_size,

self.output_size) self.bias_output = np.zeros((1, self.output_size))

def sigmoid(self, x):

return 1 / (1 + np.exp(-x))

def sigmoid_derivative(self,

x): return x * (1 - x)

def forward(self, X):

# Forward pass through the network

self.hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden

self.hidden_output = self.sigmoid(self.hidden_input)
self.output_input = np.dot(self.hidden_output, self.weights_hidden_output) +
self.bias_output

self.predicted_output = self.sigmoid(self.output_input)

return

self.predicted_output def

backward(self, X, y, output):

# Backward pass through the

network error = y - output

output_delta = error * self.sigmoid_derivative(output)

hidden_error =

output_delta.dot(self.weights_hidden_output.T)

hidden_delta = hidden_error *

self.sigmoid_derivative(self.hidden_output) # Update weights and biases

self.weights_hidden_output += self.hidden_output.T.dot(output_delta) *
self.learning_rate

self.bias_output += np.sum(output_delta, axis=0, keepdims=True) *

self.learning_rate self.weights_input_hidden += X.T.dot(hidden_delta) *

self.learning_rate self.bias_hidden += np.sum(hidden_delta, axis=0,

keepdims=True) * self.learning_rate

def train(self, X, y, epochs):

for epoch in

range(epochs): #

Forward pass

output = self.forward(X)

# Backward pass and optimization

self.backward(X, y, output)

# Print the mean squared error at each


epoch mse = np.mean((y - output) **

2)

if epoch % 1000 == 0:

print(f"Epoch {epoch}, Mean Squared Error: {mse:.4f}")

# Sample dataset (XOR problem)

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

y = np.array([[0], [1], [1], [0]])

# Initialize and train the neural

network input_size = 2

hidden_size = 4

output_size = 1

nn = NeuralNetwork(input_size, hidden_size, output_size)

nn.train(X, y, epochs=10000)

# Test the trained network

predictions = nn.forward(X)

print("\nPredictions:")

print(predictions)

Output:
Program – 7

Q7. Apply k-Means algorithm k-Means algorithm to cluster a set of data stored in
a .CSV file. Use the same data set for clustering using the k- Means algorithm.
Compare the results of these two algorithms and comment on the quality of
clustering. You can add Python ML library classes in the program.

Code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans,
AgglomerativeClustering from sklearn.preprocessing
import StandardScaler
from sklearn.metrics import adjusted_rand_score
# Load the Iris dataset (you can replace this with your CSV
file) from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y_true =
iris.target
# Standardize the data
scaler =
StandardScaler()
X_std =
scaler.fit_transform(X) #
Perform k-Means clustering
kmeans = KMeans(n_clusters=3,
random_state=42) kmeans_labels =
kmeans.fit_predict(X_std)
# Perform hierarchical clustering (Agglomerative Clustering)
hierarchical = AgglomerativeClustering(n_clusters=3)
hierarchical_labels = hierarchical.fit_predict(X_std)
# Visualize the results
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.scatter(X_std[:, 0], X_std[:, 1], c=kmeans_labels,
cmap='viridis') plt.title('k-Means Clustering')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], marker='X',
s=200, c='red')
plt.subplot(1, 2, 2)
plt.scatter(X_std[:, 0], X_std[:, 1], c=hierarchical_labels, cmap='viridis')
plt.title('Hierarchical Clustering')
plt.show()
# Evaluate clustering results using adjusted Rand index
ari_kmeans = adjusted_rand_score(y_true, kmeans_labels)
ari_hierarchical = adjusted_rand_score(y_true,
hierarchical_labels) print(f"Adjusted Rand Index (k-Means):
{ari_kmeans:.4f}") print(f"Adjusted Rand Index
(Hierarchical): {ari_hierarchical:.4f}")
Output
Program – 8

Q8. Write a program to implement Self-Organizing Map (SOM).

Code:

import numpy as np

import matplotlib.pyplot as plt

class SOM:

def init (self, input_size, map_size, learning_rate=0.1):

self.input_size = input_size

self.map_size = map_size

self.learning_rate = learning_rate

# Initialize the SOM weights

self.weights = np.random.rand(map_size[0], map_size[1], input_size)

def find_winner(self, input_vector):

# Find the winning neuron (the one with the closest weight

vector) distances = np.linalg.norm(self.weights - input_vector,

axis=-1) winner = np.unravel_index(np.argmin(distances),

distances.shape) return winner

def update_weights(self, input_vector,

winner): # Update the weights of the

neurons

for i in range(self.map_size[0]):

for j in

range(self.map_size[1]):
# Update each weight vector

distance = np.linalg.norm(np.array([i, j]) - np.array(winner))

influence = np.exp(-distance / (2 * 0.5**2)) # Influence function (e.g., Gaussian)

self.weights[i, j] += self.learning_rate * influence * (input_vector - self.weights[i, j])

def train(self, data, epochs):

for epoch in

range(epochs):

for input_vector in data:

# Find the winner and update weights for each input

vector winner = self.find_winner(input_vector)

self.update_weights(input_vector, winner)

def predict(self, data):

# Find the winning neuron for each input vector in the dataset

winners = [self.find_winner(input_vector) for input_vector in

data] return np.array(winners)

# Example usage with a 2D dataset

data = np.array([[1, 2],

[5, 6],

[8, 7],

[2, 1]])

# Normalize the data to be in the range [0, 1]

data_normalized = data / np.max(data)

# Initialize SOM

input_size = data.shape[1]

map_size = (5, 5) # Adjust the map size based on your

dataset som = SOM(input_size, map_size,

learning_rate=0.1)
# Train the SOM

epochs = 1000

som.train(data_normalized, epochs)

# Get the predicted winning neurons for each input vector

predicted_winners = som.predict(data_normalized)

# Visualize the SOM and the input vectors

plt.figure(figsize=(8, 8))

plt.scatter(data_normalized[:, 0], data_normalized[:, 1], label='Input Vectors')

plt.scatter(predicted_winners[:, 0] / (map_size[0] - 1), predicted_winners[:, 1] /


(map_size[1] - 1), marker='X', c='red', label='SOM Winners')

plt.legend()

plt.title('Self-Organizing Map (SOM) Results')

plt.show()

Output
Program – 9

Q9) Write a program for empirical comparison of different supervised

Code:
import numpy as np
import pandas as pd
from sklearn.model_selection import
train_test_split from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,
classification_report from sklearn.datasets import load_iris
# Load the Iris dataset
iris = load_iris()
X = iris.data
y=
iris.target
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42) # Support Vector Machines (SVM)
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_predictions = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test,
svm_predictions)
# Decision Trees
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)
dt_predictions =
dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test,
dt_predictions) # Random Forests
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
rf_predictions =
rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
# Print the results
print("Support Vector Machines Accuracy:",
svm_accuracy) print("Decision Trees Accuracy:",
dt_accuracy) print("Random Forests Accuracy:",
rf_accuracy)
# Additional information: Classification Report
print("\nSupport Vector Machines Classification
Report:") print(classification_report(y_test,
svm_predictions)) print("\nDecision Trees
Classification Report:")
print(classification_report(y_test, dt_predictions))
print("\nRandom Forests Classification Report:")
print(classification_report(y_test, rf_predictions))
Output
Program – 10

Q10. Write a program for empirical comparison of different


unsupervised learning algorithms.

Code:

import

numpy as np

import

pandas as pd

import matplotlib.pyplot as plt

from sklearn.cluster import KMeans, AgglomerativeClustering,

DBSCAN from sklearn.decomposition import PCA

from sklearn.datasets import load_iris

from sklearn.preprocessing import

StandardScaler from sklearn.metrics

import silhouette_score

# Load the Iris

dataset iris =

load_iris()

X=

iris.d

ata y

iris.t

arget

# Standardize the
features scaler =

StandardScaler()

X_scaled = scaler.fit_transform(X)

# Reduce dimensionality for

visualization pca =

PCA(n_components=2)

X_pca =

pca.fit_transform(X_scaled)

# Initialize clustering

algorithms

kmeans = KMeans(n_clusters=3,

random_state=42) hierarchical =

AgglomerativeClustering(n_clusters=3)

dbscan = DBSCAN(eps=0.5,

min_samples=5)

# Perform clustering

kmeans_labels =

kmeans.fit_predict(X_scaled)

hierarchical_labels =

hierarchical.fit_predict(X_scaled)

dbscan_labels =

dbscan.fit_predict(X_scaled)

# Visualize the results

plt.figure(figsize=(15,

5))
plt.subplot(1, 3, 1)

plt.scatter(X_pca[:, 0], X_pca[:, 1], c=kmeans_labels, cmap='viridis',


marker='o', edgecolors='k')

plt.title('K-Means

Clustering')

plt.subplot(1, 3, 2)

plt.scatter(X_pca[:, 0], X_pca[:, 1], c=hierarchical_labels, cmap='viridis',


marker='o', edgecolors='k')

plt.title("Hierarchical Clustering")

plt.subplot(1, 3, 3)

plt.scatter(X_pca[:, 0], X_pca[:, 1], c=dbscan_labels, cmap='viridis',


marker='o', edgecolors='k')

plt.title('DBSCAN Clustering')

plt.tight_layout()

plt.show()

# Evaluate the quality of clustering using silhouette score

silhouette_kmeans = silhouette_score(X_scaled,

kmeans_labels) silhouette_hierarchical =

silhouette_score(X_scaled, hierarchical_labels)

silhouette_dbscan = silhouette_score(X_scaled,

dbscan_labels) print("Silhouette Score (K-Means):",

silhouette_kmeans) print("Silhouette Score

(Hierarchical):", silhouette_hierarchical) print("Silhouette

Score (DBSCAN):", silhouette_dbscan)


Output

You might also like