0% found this document useful (0 votes)
3 views14 pages

ML Lab Works

The document contains multiple Python programs implementing various machine learning algorithms including FIND-S, Candidate Elimination, Decision Trees, Linear Regression, Logistic Regression, K-Nearest Neighbors, and more. Each program is designed to perform specific tasks such as classification, regression, clustering, and exploratory data analysis using libraries like scikit-learn and pandas. Additionally, the document addresses error handling for file operations and provides visualizations for certain algorithms.

Uploaded by

Reddy. Veeraiah
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views14 pages

ML Lab Works

The document contains multiple Python programs implementing various machine learning algorithms including FIND-S, Candidate Elimination, Decision Trees, Linear Regression, Logistic Regression, K-Nearest Neighbors, and more. Each program is designed to perform specific tasks such as classification, regression, clustering, and exploratory data analysis using libraries like scikit-learn and pandas. Additionally, the document addresses error handling for file operations and provides visualizations for certain algorithms.

Uploaded by

Reddy. Veeraiah
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 14

# --- Program 1: FIND-S Algorithm ---

# Filename: find_s.py
"""
Implements the FIND-S algorithm to find the most specific hypothesis
that fits the given training data.
"""
import csv

print("Data in csv file is:")


data = []
with open("sedData.csv", "r") as fp: # Corrected file extension
read = csv.reader(fp)
for r in read:
data.append(r)
print(r)

print("\n")
n = len(data[0]) - 1
print(" No. of attributes are:", n)
print("Initial Hypothesis: ")
hypothesis = ['0'] * n
print(hypothesis)

for i in range(0, len(data)):


if (data[i][n] == 'yes'):
for j in range(0, n):
hypothesis[j] = data[i][j]
break

print(hypothesis)
print("\n")
print("After every iteration: ")
for i in range(0, len(data)):
if (data[i][n] == 'yes'):
for j in range(0, n):
if (hypothesis[j] != data[i][j]):
hypothesis[j] = '?'
print(hypothesis)

print("\n")
print("Final Hypothesis:")
print(hypothesis)

# --- Program 2: Candidate Elimination Algorithm ---


# Filename: candidate_elimination.py
"""
Implements the Candidate Elimination Algorithm to find the most general
and most specific hypotheses that are consistent with the training data.
"""
import numpy as np
import pandas as pd

data = pd.read_csv('ML2.CSV') # Corrected CSV name


concepts = np.array(data)[:, :-1]
print("Instances are:\n", concepts)
target = np.array(data)[:, -1]

def learn(concepts, target):


specific_h = concepts[0].copy()
print("\nInitialization of specific_h & general_h")
print("Specific boundary:\n", specific_h)
general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
print("General boundary:\n", general_h)

for i, h in enumerate(concepts):
print("\nInstance", i+1, "is", h)
if target[i] == "yes":
print("Instance is positive")
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
elif target[i] == "no":
print("Instance is negative")
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'

print("Specific boundary after", i+1, "instance:\n", specific_h)


print("General boundary after", i+1, "instance:\n", general_h)
print("\n")

indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?']] #
Adjusted to 5 question marks
for i in indices:
general_h.remove(['?', '?', '?', '?', '?']) #Adjusted to 5 question marks
return specific_h, general_h

s_final, g_final = learn(concepts, target)


print("Final specific-h:\n", s_final)
print("Final general_h:\n", g_final)

# --- Program 3: Decision Tree ---


# Filename: decision_tree.py
"""
Implements a simple decision tree using scikit-learn.
"""
from sklearn.tree import DecisionTreeClassifier
import numpy as np

X = np.array([[1, 1, 1],
[1, 0, 1],
[0, 1, 0],
[0, 0, 1],
[1, 1, 0]])

y = np.array([1, 1, 0, 0, 1])

tree = DecisionTreeClassifier(criterion='entropy') # Using criterion entropy

tree.fit(X, y)

new_sample = np.array([[1, 0, 1]])


predicted_class = tree.predict(new_sample)
print("Predicted class:", predicted_class[0])

# --- Program 4: Linear Regression ---


# Filename: linear_regression.py
"""
Performs linear regression on a given dataset using scikit-learn.
"""
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import numpy as np

data = pd.read_csv("lab3.csv") # Changed filename to lab3.csv


print(data)

diameter = data['diameter'].values.reshape(-1, 1)
price = data['price'].values

model = LinearRegression()
model.fit(diameter, price)

intercept = model.intercept_
slope = model.coef_[0]

print("Intercept:", intercept)
print("Slope:", slope)
predictions = model.predict(diameter)

mse = np.mean((predictions - price) ** 2)


print("Mean squared Error:", mse)

plt.scatter(diameter, price, color='blue')


plt.plot(diameter, predictions, color='red')
plt.xlabel('Diameter')
plt.ylabel('Price')
plt.title("Linear Regression")
plt.show()

new_diameter = np.array([20]).reshape(-1, 1) # Corrected: Reshape the input


future_price = model.predict(new_diameter)
print("Predicted price for a diameter of 20:", future_price[0])

# --- Program 5: Logistic Regression ---


# Filename: logistic_regression.py
"""
Performs logistic regression on a given dataset using scikit-learn.
"""
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

data = pd.read_csv('labs.csv')
print(data)

X = data[['studyhours']]
y = data['examresult']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,


random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

new_data = pd.DataFrame({'studyhours': [1, 7, 9]})


new_predictions = model.predict(new_data)

print("New predictions:")
for i, prediction in enumerate(new_predictions):
print("Instance {}: predicted Result: {}".format(i+1, prediction))

# --- Program 6: Binary classifier (Likely another Logistic Regression example,


slightly different) ---
# Filename: binary_classifier.py
"""
Performs binary classification (using Logistic Regression) on a dataset.
"""

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

data = pd.read_csv('lab3.csv')
print(data)

X = data[['studyhours']]
Y = data['examresult'] # Corrected target variable name

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2,


random_state=42) # Corrected test_size

model = LogisticRegression()
model.fit(X_train, y_train)

predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: ", accuracy)

new_data = pd.DataFrame({'studyhours': [1, 7, 9]})


new_predictions = model.predict(new_data)

print("New predictions:")
for i, prediction in enumerate(new_predictions):
print("Instance {}: Predicted Result {}".format(i+1, prediction))

# --- Program 7: Bias, Variance, Cross-Validation ---


# Filename: bias_variance_cv.py
"""
Demonstrates how to calculate bias and variance using cross-validation
for a linear regression model.
"""

import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression, LogisticRegression #Added
Logistic Regression import
from statistics import mean, stdev

data = pd.read_csv("wineQT.csv")
X_set = data.drop('quality', axis=1) # Corrected to X_set
y_set = data['quality']

# Linear Regression
model = LinearRegression()
scores = cross_val_score(model, X_set, y_set, cv=10)
print("Linear Regression Scores:", scores)
print("Linear Regression Bias (Mean):", mean(scores))
print("Linear Regression Variance (StDev):", stdev(scores))

# K-fold Cross Validation (K-list wasn't used correctly in the images)


# Here's a correct way to show how bias and variance change with different folds
k_values = [2, 5, 10, 20] # Example values for K
bias_scores = []
variance_scores = []

for k in k_values:
model = LinearRegression() # Create new model for each K

scores = cross_val_score(model, X_set, y_set, cv=k)


bias_scores.append(mean(scores))
variance_scores.append(stdev(scores))

print("\nLinear Regression K-Fold Validation Results:")


for i in range(len(k_values)):
print(f"K={k_values[i]}: Bias={bias_scores[i]:.4f}, Variance={variance_scores[i]:.4f}")

# --- Program 8: K-Nearest Neighbors (KNN) ---


# Filename: knn.py
"""
Implements the K-Nearest Neighbors algorithm on the Iris dataset.
"""
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,


random_state=0)
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)


print("Accuracy:", accuracy)

print("Correct predictions:")
for i in range(len(y_test)):
if y_test[i] == y_pred[i]:
print("True label:", iris.target_names[y_test[i]], "-Predicted label:",
iris.target_names[y_pred[i]])

print("\nWrong Predictions:")
for i in range(len(y_test)):
if y_test[i] != y_pred[i]:
print("True label:", iris.target_names[y_test[i]], "-predicted label:",
iris.target_names[y_pred[i]])

# --- Program 9: Locally Weighted Regression ---


# Filename: locally_weighted_regression.py
"""
Implements Locally Weighted Regression.
"""
import numpy as np
import matplotlib.pyplot as plt

def lwr(x_train, y_train, x_test, tau):


y_pred = np.zeros(len(x_test))
for i, test_point in enumerate(x_test):
weights = np.exp(-((x_train - test_point) ** 2) / (2 * tau ** 2))
X = np.vstack([np.ones_like(x_train), x_train]).T
W = np.diag(weights)
try:
theta = np.linalg.inv(X.T @ W @ X) @ (X.T @ (W @ y_train))
y_pred[i] = np.array([1, test_point]) @ theta
except np.linalg.LinAlgError:
print("Singular matrix encountered. Adjusting tau or data may be needed.")
y_pred[i] = 0 # or some other default value
return y_pred

np.random.seed(42)
x_train = np.linspace(0, 10, 100)
y_train = 2 * np.sin(x_train) + np.random.normal(0, 0.2, 100)
x_test = np.linspace(0, 10, 50)
tau = 0.1 # Corrected tau value (0.01 was likely too small)

y_pred = lwr(x_train, y_train, x_test, tau)


plt.scatter(x_train, y_train, color='blue')
plt.plot(x_test, y_pred, color='red')
plt.title("Locally Weighted Regression")
plt.xlabel('x')
plt.ylabel('y')
plt.show()

# --- Program 10: Naive Bayes ---


# Filename: naive_bayes.py
"""
Implements Naive Bayes classification on the Iris dataset.
"""
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score

iris = load_iris()
X, y = iris.data, iris.target
target_names = iris.target_names

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,


random_state=42)

nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)
y_pred = nb_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)


precision = precision_score(y_test, y_pred, average='weighted')

print('Accuracy:', accuracy)
print("Precision:", precision)

# predict for new values


new_data = [[5.1, 3.5, 1.4, 0.2], [6.9, 3.2, 5.7, 2.3]] # Example Data
new_prediction = nb_classifier.predict(new_data)

print("New predictions:", [target_names[prediction] for prediction in new_prediction])

# --- Program 11: EM and K-Means Clustering ---


# Filename: em_kmeans.py
"""
Compares EM (Gaussian Mixture Model) and K-Means clustering on
a heart disease dataset.
"""
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt

# Load the data, handling potential errors


try:
data = pd.read_csv("heart.csv") # Make sure 'heart.csv' is in the same directory
except FileNotFoundError:
print("Error: The file 'heart.csv' was not found.")
exit()
except pd.errors.EmptyDataError:
print("Error: The file 'heart.csv' is empty.")
exit()
except pd.errors.ParserError:
print("Error: The file 'heart.csv' could not be parsed. Check the format.")
exit()

features = ['trestbps', 'chol'] # Features for clustering


X = data[features]

# Scale the data


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# K-Means Clustering
kmeans = KMeans(n_clusters=2, random_state=42, n_init=10)
kmeans.fit(X_scaled)
kmeans_labels = kmeans.labels_

# EM Clustering (Gaussian Mixture Model)


em = GaussianMixture(n_components=2, random_state=42)
em.fit(X_scaled)
em_labels = em.predict(X_scaled)

# Create a colormap
colormap = np.array(['red', 'green'])

# Create the plot


plt.figure(figsize=(14, 5))

# Original Data
plt.subplot(1, 3, 1)
plt.scatter(X['trestbps'], X['chol'], c=colormap[data['target']], s=40)
plt.title('Original Data')
plt.xlabel('trestbps')
plt.ylabel('chol')

# K-Means Clustering
plt.subplot(1, 3, 2)
plt.scatter(X['trestbps'], X['chol'], c=colormap[kmeans_labels], s=40)
plt.title('K-Means Clustering')
plt.xlabel('trestbps')
plt.ylabel('chol')

# EM Clustering
plt.subplot(1, 3, 3)
plt.scatter(X['trestbps'], X['chol'], c=colormap[em_labels], s=40)
plt.title('EM Clustering')
plt.xlabel('trestbps')
plt.ylabel('chol')

plt.show()

# --- Program 12: Exploratory Data Analysis (EDA) ---


# Filename: exploratory_data_analysis.py
"""
Performs Exploratory Data Analysis on the Iris dataset using pandas and matplotlib.
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load data, with error handling


try:
df = pd.read_csv('IRIS.csv') # Change filename to IRIS.csv
except FileNotFoundError:
print("Error: The file 'IRIS.csv' was not found.")
exit()
except pd.errors.EmptyDataError:
print("Error: The file 'IRIS.csv' is empty.")
exit()
except pd.errors.ParserError:
print("Error: The file 'IRIS.csv' could not be parsed.")
exit()

print("Exploratory data analysis of iris dataset:\n")

print("First few rows:\n", df.head())

print("\nData information:\n", df.info())

print("\nMissing values:\n", df.isnull().sum())


print("\nColumn names:\n", df.columns)

print("\nValue counts for 'species':\n", df['species'].value_counts())

print("\nData types of each column:\n", df.dtypes)

print("\nCorrelation matrix:\n", df.corr(numeric_only=True)) # numeric_only added

# --- Program 13: Bayesian Network ---


# Filename: bayesian_network.py
"""
Constructs a Bayesian network for diagnosing heart disease using pgmpy.
"""
import numpy as np
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

# Load the data, handling possible errors


try:
heartDisease = pd.read_csv('heart.csv') # Replace with correct path
except FileNotFoundError:
print("Error: The file 'heart.csv' was not found.")
exit()
except pd.errors.EmptyDataError:
print("Error: The file 'heart.csv' is empty.")
exit()
except pd.errors.ParserError:
print("Error: The file 'heart.csv' could not be parsed.")
exit()

heartDisease = heartDisease.replace('?', np.nan)

print("Few examples from dataset are:")


print(heartDisease.head())

model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'), ('sex', 'trestbps'),


('exang', 'trestbps'), ('trestbps', 'heartdisease'), ('fbs', 'heartdisease'),
('heartdisease', 'restecg'), ('heartdisease', 'thalach'), ('heartdisease',
'chol')])

print("\nLearning CPD using Maximum Likelihood Estimators")


model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)

print("\nInferencing with Bayesian Network!")


HeartDisease_infer = VariableElimination(model)
print("\n1. Probability of Heart Disease given age = 30:")
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 30})
print(q['heartdisease'])

print("\n2. Probability of Heart Disease given cholesterol = 100:")


q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'chol': 100})
print(q['heartdisease'])

# --- Program 14: Support Vector Machine (SVM) ---


# Filename: svm_classification.py
"""
Implements Support Vector Machine (SVM) classification on the Iris dataset.
"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load data
iris = load_iris()
X = iris.data[:, :2] # Use only the first two features for visualization
y = iris.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,
random_state=0)

# Scale data
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Train SVM classifier


svm_classifier = SVC(kernel='linear', C=1.0, random_state=0)
svm_classifier.fit(X_train, y_train)

# Make predictions
y_pred_train = svm_classifier.predict(X_train)
y_pred_test = svm_classifier.predict(X_test)

# Calculate accuracy
accuracy_train = accuracy_score(y_train, y_pred_train)
accuracy_test = accuracy_score(y_test, y_pred_test)
print("Training accuracy:", accuracy_train)
print("Testing accuracy:", accuracy_test)

# Plot decision boundary (adapted from example)


def plot_decision_boundary(classifier, X, y):
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
np.arange(y_min, y_max, 0.02))

Z = classifier.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, alpha=0.4)


plt.scatter(X[:, 0], X[:, 1], c=y, marker='o', edgecolors='k')
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.title("Decision Boundary")
plt.show()

plot_decision_boundary(svm_classifier, X_train, y_train)

# --- Program 15: Principal Component Analysis (PCA) ---


# Filename: pca_analysis.py
"""
Demonstrates Principal Component Analysis (PCA) using scikit-learn.
"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA

# Example 1 (Simple array)


X = np.array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])

pca = PCA(n_components=2)
pca.fit(X)
X_transformed = pca.transform(X)

print("Original data:\n", X)
print("\nTransformed data:\n", X_transformed)

# Example 2 (Iris dataset)


iris = load_iris()
X = iris.data
y = iris.target
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

plt.figure(figsize=(8, 6))
for i in range(len(iris.target_names)):
plt.scatter(X_pca[y == i, 0], X_pca[y == i, 1], label=iris.target_names[i])

plt.xlabel('Principal Component 1')


plt.ylabel('Principal Component 2')
plt.title('PCA of Iris Dataset')
plt.legend()
plt.show()

You might also like