ML File External File
ML File External File
Code:
Output:
Program – 2
Code:
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print("Accuracy on Training data:", training_data_accuracy)
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print("Accuracy on Test data:", test_data_accuracy)
input_data = np.array([90, 50]).reshape(1, -1)
prediction = model.predict(input_data)
print("Prediction for example input data:", prediction[0])
Code:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
data=pd.read_csv("heart_data.csv")
data.head()
print(data)
data.isnull().sum()
data['HeartDisease'].value_counts()
X=data.drop(columns='HeartDisease',axis=1)
Y=data['HeartDisease']
print(X)
print(Y)
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,stratify=Y,random_state=2)
print(X.shape,X_train.shape,X_test.shape)
model=LogisticRegression()
model.fit(X_train,Y_train)
X_train_prediction=model.predict(X_train)
training_data_accuracy=accuracy_score(X_train_prediction,Y_train)
print("Accuracy on Training data",training_data_accuracy)
X_test_prediction=model.predict(X_test)
test_data_accuracy=accuracy_score(X_test_prediction,Y_test)
print("Accuracy on Test data:",test_data_accuracy)
input_data=(74,0,2,20,264,0,2,121,1,0.2,1,1,3)
input_data_as_numpy_array=np.asarray(input_data)
input_data_reshaped=input_data_as_numpy_array.reshape(1,-1)
prediction=model.predict(input_data_reshaped)
print(prediction)
if(prediction[0]==0):
print("The person does not have Heart Disease")
else:
print("The person has Heart Disease")
Output:
Program – 4
Code:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_iris
Output:
Program – 5
Code:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
data=pd.read_csv('diabetes2.csv')
print(data)
X=data.drop(columns='Report',axis=1)
Y=data['Report']
scaler=StandardScaler()
scaler.fit(X)
StandardScaler(copy=True,with_mean=True,with_std=True)
standardized_data=scaler.transform(X)
print(standardized_data)
X=standardized_data
Y=data['Report']
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,stratify=Y,random_state=2)
X_test_prediction=classifier.predict(X_test)
test_data_accuracy=accuracy_score(X_test_prediction,Y_test)
print('Accuracy score of the test data:',test_data_accuracy)
input_data=(85,60,23,8,30.1,0.163,57)
input_data_as_numpy_array = np.asarray(input_data)
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
std_data = scaler.transform(input_data_reshaped)
print(std_data)
prediction=classifier.predict(std_data)
print(prediction)
if(prediction[0]==0):
print("The person is not diabetic")
else:
print("The person is diabetic")
Output:
Program – 6
Code:
import numpy as np
class NeuralNetwork:
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.learning_rate = learning_rate
self.weights_hidden_output = np.random.rand(self.hidden_size,
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(self,
x): return x * (1 - x)
self.hidden_output = self.sigmoid(self.hidden_input)
self.output_input = np.dot(self.hidden_output, self.weights_hidden_output) +
self.bias_output
self.predicted_output = self.sigmoid(self.output_input)
return
self.predicted_output def
backward(self, X, y, output):
hidden_error =
output_delta.dot(self.weights_hidden_output.T)
hidden_delta = hidden_error *
self.weights_hidden_output += self.hidden_output.T.dot(output_delta) *
self.learning_rate
keepdims=True) * self.learning_rate
for epoch in
range(epochs): #
Forward pass
output = self.forward(X)
self.backward(X, y, output)
2)
if epoch % 1000 == 0:
network input_size = 2
hidden_size = 4
output_size = 1
nn.train(X, y, epochs=10000)
predictions = nn.forward(X)
print("\nPredictions:")
print(predictions)
Output:
Program – 7
Q7. Apply k-Means algorithm k-Means algorithm to cluster a set of data stored in
a .CSV file. Use the same data set for clustering using the k- Means algorithm.
Compare the results of these two algorithms and comment on the quality of
clustering. You can add Python ML library classes in the program.
Code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans,
AgglomerativeClustering from sklearn.preprocessing
import StandardScaler
from sklearn.metrics import adjusted_rand_score
# Load the Iris dataset (you can replace this with your CSV
file) from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y_true =
iris.target
# Standardize the data
scaler =
StandardScaler()
X_std =
scaler.fit_transform(X) #
Perform k-Means clustering
kmeans = KMeans(n_clusters=3,
random_state=42) kmeans_labels =
kmeans.fit_predict(X_std)
# Perform hierarchical clustering (Agglomerative Clustering)
hierarchical = AgglomerativeClustering(n_clusters=3)
hierarchical_labels = hierarchical.fit_predict(X_std)
# Visualize the results
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.scatter(X_std[:, 0], X_std[:, 1], c=kmeans_labels,
cmap='viridis') plt.title('k-Means Clustering')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], marker='X',
s=200, c='red')
plt.subplot(1, 2, 2)
plt.scatter(X_std[:, 0], X_std[:, 1], c=hierarchical_labels, cmap='viridis')
plt.title('Hierarchical Clustering')
plt.show()
# Evaluate clustering results using adjusted Rand index
ari_kmeans = adjusted_rand_score(y_true, kmeans_labels)
ari_hierarchical = adjusted_rand_score(y_true,
hierarchical_labels) print(f"Adjusted Rand Index (k-Means):
{ari_kmeans:.4f}") print(f"Adjusted Rand Index
(Hierarchical): {ari_hierarchical:.4f}")
Output
Program – 8
Code:
import numpy as np
class SOM:
self.input_size = input_size
self.map_size = map_size
self.learning_rate = learning_rate
# Find the winning neuron (the one with the closest weight
neurons
for i in range(self.map_size[0]):
for j in
range(self.map_size[1]):
# Update each weight vector
for epoch in
range(epochs):
self.update_weights(input_vector, winner)
# Find the winning neuron for each input vector in the dataset
[5, 6],
[8, 7],
[2, 1]])
# Initialize SOM
input_size = data.shape[1]
learning_rate=0.1)
# Train the SOM
epochs = 1000
som.train(data_normalized, epochs)
predicted_winners = som.predict(data_normalized)
plt.figure(figsize=(8, 8))
plt.legend()
plt.show()
Output
Program – 9
Code:
import numpy as np
import pandas as pd
from sklearn.model_selection import
train_test_split from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,
classification_report from sklearn.datasets import load_iris
# Load the Iris dataset
iris = load_iris()
X = iris.data
y=
iris.target
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42) # Support Vector Machines (SVM)
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_predictions = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test,
svm_predictions)
# Decision Trees
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)
dt_predictions =
dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test,
dt_predictions) # Random Forests
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
rf_predictions =
rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
# Print the results
print("Support Vector Machines Accuracy:",
svm_accuracy) print("Decision Trees Accuracy:",
dt_accuracy) print("Random Forests Accuracy:",
rf_accuracy)
# Additional information: Classification Report
print("\nSupport Vector Machines Classification
Report:") print(classification_report(y_test,
svm_predictions)) print("\nDecision Trees
Classification Report:")
print(classification_report(y_test, dt_predictions))
print("\nRandom Forests Classification Report:")
print(classification_report(y_test, rf_predictions))
Output
Program – 10
Code:
import
numpy as np
import
pandas as pd
import silhouette_score
dataset iris =
load_iris()
X=
iris.d
ata y
iris.t
arget
# Standardize the
features scaler =
StandardScaler()
X_scaled = scaler.fit_transform(X)
visualization pca =
PCA(n_components=2)
X_pca =
pca.fit_transform(X_scaled)
# Initialize clustering
algorithms
kmeans = KMeans(n_clusters=3,
random_state=42) hierarchical =
AgglomerativeClustering(n_clusters=3)
dbscan = DBSCAN(eps=0.5,
min_samples=5)
# Perform clustering
kmeans_labels =
kmeans.fit_predict(X_scaled)
hierarchical_labels =
hierarchical.fit_predict(X_scaled)
dbscan_labels =
dbscan.fit_predict(X_scaled)
plt.figure(figsize=(15,
5))
plt.subplot(1, 3, 1)
plt.title('K-Means
Clustering')
plt.subplot(1, 3, 2)
plt.title("Hierarchical Clustering")
plt.subplot(1, 3, 3)
plt.title('DBSCAN Clustering')
plt.tight_layout()
plt.show()
silhouette_kmeans = silhouette_score(X_scaled,
kmeans_labels) silhouette_hierarchical =
silhouette_score(X_scaled, hierarchical_labels)
silhouette_dbscan = silhouette_score(X_scaled,