0% found this document useful (0 votes)

10 views9 pages

ML Manual

The document consists of a series of labs focusing on data analysis and machine learning techniques using Python. It covers various topics including data preprocessing, exploratory data analysis, regression models, decision trees, clustering, and evaluation metrics. Each lab demonstrates practical implementations with different datasets and visualizations.

Uploaded by

Bharath N

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

10 views9 pages

ML Manual

Uploaded by

Bharath N

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 9

Lab 1:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv(r'housing.csv')
df.head()
df.shape
df.info()
df.nunique()
df.isnull().sum()
df.duplicated().sum()
df['total_bedrooms'].median()

df['total_bedrooms'].fillna(df['total_bedrooms'].median(), inplace=True)
for i in df.iloc[:,2:7]:
df[i] = df[i].astype('int')
df.describe().T
Numerical = df.select_dtypes(include=[np.number]).columns
print(Numerical)
for col in Numerical:
plt.figure(figsize=(10, 6))
df[col].plot(kind='hist', tle=col, bins=60, edgecolor='black')
plt.ylabel('Frequency')
plt.show()
for col in Numerical:
plt.figure(figsize=(6, 6))
sns.boxplot(df[col], color='blue')
plt. tle(col)
plt.ylabel(col)
plt.show()

Lab 2:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv(r'housing.csv')

variable_meaning = {
"MedInc": "Median income in block group",
"HouseAge": "Median house age in block group",
"AveRooms": "Average number of rooms per household",
"AveBedrms": "Average number of bedrooms per household",
"Popula on": "Popula on of block group",
"AveOccup": "Average number of household members",
"La tude": "La tude of block group",
"Longitude": "Longitude of block group",
"Target": "Median house value (in $100,000s)"
}
variable_df = pd.DataFrame(list(variable_meaning.items()), columns=["Feature", "Descrip on"])
print("\nVariable Meaning Table:")
print(variable_df)

print("\nBasic Informa on about Dataset:")

print(df.info())

print("\nFirst Five Rows of Dataset:")

print(df.head())

print("\nSummary Sta s cs:")

print(df.describe().T)

print("\nMissing Values in Each Column:")

print(df.isnull().sum())

plt.figure(figsize=(12, 8))
df.hist(figsize=(12, 8), bins=30, edgecolor='black')
plt.sup tle("Feature Distribu ons", fontsize=16)
plt.show()

plt.ﬁgure(ﬁgsize=(12, 6))
sns.boxplot(data=df.select_dtypes(include=[np.number]))
plt.x cks(rota on=45)
plt. tle("Boxplots of Features to Iden fy Outliers")
plt.show()

correla on_matrix = df.select_dtypes(include=[np.number]).corr()

print(correla on_matrix)

plt.ﬁgure(ﬁgsize=(10, 8))
sns.heatmap(correla on_matrix, annot=True, cmap='coolwarm', fmt='.2f', square=True)
plt. tle("Correla on Heatmap of Features")
plt.show()

sns.pairplot(df.select_dtypes(include=[np.number]).sample(300), corner=True)
plt.sup tle("Pair Plot of Sampled Numerical Features", y=1.02)
plt.show()
Lab 3:

from sklearn.datasets import load_iris

from sklearn.decomposi on import PCA
from sklearn.preprocessing import StandardScaler
import pandas as pd
import matplotlib.pyplot as plt

iris = load_iris()
features = iris.data
target = iris.target

scaler = StandardScaler()
features_standardized = scaler.ﬁt_transform(features)
pca = PCA(n_components=2)
features_pca = pca.ﬁt_transform(features_standardized)

pca_df = pd.DataFrame(data=features_pca, columns=["Principal Component 1", "Principal Component 2"])

pca_df["Target"] = target
plt.ﬁgure(ﬁgsize=(8, 6))
for label, color in zip(iris.target_names, ["red", "green", "blue"]):
plt.sca er(
pca_df.loc[pca_df["Target"] == list(iris.target_names).index(label), "Principal Component 1"],
pca_df.loc[pca_df["Target"] == list(iris.target_names).index(label), "Principal Component 2"],
label=label,
alpha=0.7
)
plt. tle("PCA on Iris Dataset (4 features to 2 features)", fontsize=14)
plt.xlabel("Principal Component 1", fontsize=12)
plt.ylabel("Principal Component 2", fontsize=12)
plt.legend( tle="Species")
plt.grid()
plt.show()
Lab 4:

import pandas as pd

data = pd.read_csv(r"training_data.csv")

def ﬁnd_s_algorithm(data):
a ributes = data.iloc[:, :-1].values
target = data.iloc[:, -1].values

for i in range(len(target)):
if target[i] == "Yes":
hypothesis = a ributes[i].copy()
break

for i in range(len(target)):
if target[i] == "Yes":
for j in range(len(hypothesis)):
if hypothesis[j] != a ributes[i][j]:
hypothesis[j] = '?'

return hypothesis

final_hypothesis = find_s_algorithm(data)
print("Most Specific Hypothesis:", final_hypothesis)

lab5:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selec on import train_test_split
from sklearn.neighbors import KNeighborsClassiﬁer
from sklearn.metrics import accuracy_score

import warnings
warnings.ﬁlterwarnings('ignore')

np.random.seed(42)
values = np.random.rand(100)
labels = []
for i in values[:50]:
if i <= 0.5:
labels.append('Class1')
else:
labels.append('Class2')
labels += [None] * 50
data = {
"Point": [f"x{i+1}" for i in range(100)],
"Value": values,
"Label": labels
}
print(data)
type(data)
print(dict)
df = pd.DataFrame(data)
df.nunique()
df.shape
df.describe().T
df.isnull().sum()
num_col = df.select_dtypes(include=['int', 'ﬂoat']).columns
for col in num_col:
df[col].hist(bins=10, alpha=0.5, edgecolor='black', grid=False)
plt. tle(f'Histogram for {col}')
plt.xlabel(col)
plt.ylabel('Frequency')
plt.show()

lab 6:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spa al.distance import cdist

df_lwr = pd.read_csv(r'C:\Users\BHARATH N\Downloads\lwr_dataset.csv')

X_train = df_lwr[['X']].values
y_train = df_lwr['Y'].values
X_train = np.hstack([np.ones((X_train.shape[0], 1)), X_train])

tau = 0.5
X_range = np.linspace(X_train[:, 1].min(), X_train[:, 1].max(), 100)
y_pred = []

for x in X_range:
x_vec = np.array([1, x]) # Intercept term
weights = np.exp(-cdist([[x]], X_train[:, 1:], 'sqeuclidean') / (2
* tau**2)).ﬂa en()
W = np.diag(weights)
theta = np.linalg.pinv(X_train.T @ W @ X_train) @ (X_train.T @ W @
y_train)
y_pred.append(x_vec @ theta)

plt.sca er(X_train[:, 1], y_train, label='Data')

plt.plot(X_range, y_pred, color='red', label='LWR')
plt.legend()
plt. tle("Locally Weighted Regression")
plt.show()

Lab 7:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selec on import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score

def linear_regression_california():
housing = fetch_california_housing(as_frame=True)
X = housing.data[["AveRooms"]]
y = housing.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.ﬁt(X_train, y_train)
y_pred = model.predict(X_test)
plt.sca er(X_test, y_test, color="blue", label="Actual")
plt.plot(X_test, y_pred, color="red", label="Predicted")
plt.xlabel("Average number of rooms (AveRooms)")
plt.ylabel("Median value of homes ($100,000)")
plt. tle("Linear Regression - California Housing Dataset")
plt.legend()
plt.show()
print("Linear Regression - California Housing Dataset")
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R^2 Score:", r2_score(y_test, y_pred))

def polynomial_regression_auto_mpg():
url = "h ps://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data"
column_names = ["mpg", "cylinders", "displacement", "horsepower", "weight", "accelera on", "model_year",
"origin"]
data = pd.read_csv(url, sep='\\s+', names=column_names, na_values="?")
data = data.dropna()
X = data["displacement"].values.reshape(-1, 1)
y = data["mpg"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
poly_model = make_pipeline(PolynomialFeatures(degree=2), StandardScaler(), LinearRegression())
poly_model.ﬁt(X_train, y_train)
y_pred = poly_model.predict(X_test)
plt.sca er(X_test, y_test, color="blue", label="Actual")
plt.sca er(X_test, y_pred, color="red", label="Predicted")
plt.xlabel("Displacement")
plt.ylabel("Miles per gallon (mpg)")
plt. tle("Polynomial Regression - Auto MPG Dataset")
plt.legend()
plt.show()
print("Polynomial Regression - Auto MPG Dataset")
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R^2 Score:", r2_score(y_test, y_pred))

if __name__ == "__main__":
print("Demonstra ng Linear Regression and Polynomial Regression\n")
linear_regression_california()
polynomial_regression_auto_mpg()
lab 8:

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selec on import train_test_split
from sklearn.tree import DecisionTreeClassiﬁer
from sklearn.metrics import accuracy_score
from sklearn import tree

data = load_breast_cancer()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y,

test_size=0.2, random_state=42)
clf = DecisionTreeClassiﬁer(random_state=42)
clf.ﬁt(X_train, y_train)
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(f"Model Accuracy: {accuracy * 100:.2f}%")
new_sample = np.array([X_test[0]])
predic on = clf.predict(new_sample)

predic on_class = "Benign" if predic on == 1 else "Malignant"

print(f"Predicted Class for the new sample: {predic on_class}")

plt.figure(figsize=(12,8))
tree.plot_tree(clf, filled=True, feature_names=data.feature_names,
class_names=data.target_names)
plt. tle("Decision Tree - Breast Cancer Dataset")
plt.show()
lab 9:

import numpy as np
from sklearn.datasets import fetch_olive _faces
from sklearn.model_selec on import train_test_split, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classiﬁca on_report, confusion_matrix
import matplotlib.pyplot as plt

data = fetch_olive _faces(shuﬄe=True, random_state=42)

X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')
print("\nClassifica on Report:")
print(classifica on_report(y_test, y_pred, zero_division=1))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
cross_val_accuracy = cross_val_score(gnb, X, y, cv=5, scoring='accuracy')
print(f'\nCross-valida on accuracy: {cross_val_accuracy.mean() * 100:.2f}%')
fig, axes = plt.subplots(3, 5, figsize=(12, 8))
for ax, image, label, predic on in zip(axes.ravel(), X_test, y_test, y_pred):
ax.imshow(image.reshape(64, 64), cmap=plt.cm.gray)
ax.set_ tle(f"True: {label}, Pred: {predic on}")
ax.axis('off')
plt.show()

lab 10:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_breast_cancer
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposi on import PCA
from sklearn.metrics import confusion_matrix, classiﬁca on_report

data = load_breast_cancer()
X = data.data
y = data.target

scaler = StandardScaler()
X_scaled = scaler.ﬁt_transform(X)

kmeans = KMeans(n_clusters=2, random_state=42)

y_kmeans = kmeans.fit_predict(X_scaled)
print("Confusion Matrix:")
print(confusion_matrix(y, y_kmeans))
print("\nClassifica on Report:")
print(classifica on_report(y, y_kmeans))

pca = PCA(n_components=2)
X_pca = pca.ﬁt_transform(X_scaled)

df = pd.DataFrame(X_pca, columns=['PC1', 'PC2'])

df['Cluster'] = y_kmeans
df['True Label'] = y

plt.ﬁgure(ﬁgsize=(8, 6))
sns.sca erplot(data=df, x='PC1', y='PC2', hue='Cluster', pale e='Set1', s=100, edgecolor='black', alpha=0.7)
plt. tle('K-Means Clustering of Breast Cancer Dataset')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend( tle="Cluster")
plt.show()

plt.figure(figsize=(8, 6))
sns.sca erplot(data=df, x='PC1', y='PC2', hue='True Label', pale e='coolwarm', s=100, edgecolor='black', alpha=0.7)
plt. tle('True Labels of Breast Cancer Dataset')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend( tle="True Label")
plt.show()
plt.figure(figsize=(8, 6))
sns.sca erplot(data=df, x='PC1', y='PC2', hue='Cluster', pale e='Set1', s=100, edgecolor='black', alpha=0.7)
centers = pca.transform(kmeans.cluster_centers_)
plt.sca er(centers[:, 0], centers[:, 1], s=200, c='red', marker='X', label='Centroids')
plt. tle('K-Means Clustering with Centroids')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend( tle="Cluster")
plt.show()

SAP BDEx Config Guide
100% (2)
SAP BDEx Config Guide
101 pages
ML Manual
No ratings yet
ML Manual
30 pages
ML Journal External
No ratings yet
ML Journal External
14 pages
ML Spy Programs
No ratings yet
ML Spy Programs
16 pages
ML Lab
No ratings yet
ML Lab
5 pages
ML Short Code - Under Updating
No ratings yet
ML Short Code - Under Updating
4 pages
ML Programs
No ratings yet
ML Programs
14 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
9 pages
ML Minimized Programs
No ratings yet
ML Minimized Programs
9 pages
Lab Extern L
No ratings yet
Lab Extern L
8 pages
1
No ratings yet
1
13 pages
MLLab Manual
No ratings yet
MLLab Manual
24 pages
Mlalllabprgs
No ratings yet
Mlalllabprgs
17 pages
Aiml Practicals
No ratings yet
Aiml Practicals
22 pages
Final ML Programs 075005
No ratings yet
Final ML Programs 075005
15 pages
ML Labmanual
No ratings yet
ML Labmanual
33 pages
ML Shristi File
No ratings yet
ML Shristi File
49 pages
Machine Learning Lab Manaul BCSL606
No ratings yet
Machine Learning Lab Manaul BCSL606
27 pages
Machine Learning
No ratings yet
Machine Learning
10 pages
External
No ratings yet
External
11 pages
Experiment 1
No ratings yet
Experiment 1
19 pages
ML
No ratings yet
ML
11 pages
Train
No ratings yet
Train
17 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
18 pages
Final ML File
No ratings yet
Final ML File
34 pages
Machine Learning Programs
No ratings yet
Machine Learning Programs
10 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
26 pages
ML Lab Prgms Split
No ratings yet
ML Lab Prgms Split
3 pages
ML Final Prac
No ratings yet
ML Final Prac
47 pages
Data Science Record - 05
No ratings yet
Data Science Record - 05
20 pages
ML Lab 4,5,6,7,8,9,10
No ratings yet
ML Lab 4,5,6,7,8,9,10
7 pages
T2 Summary VHA
No ratings yet
T2 Summary VHA
14 pages
Python File
No ratings yet
Python File
5 pages
IoT Task4 21BEC0384
No ratings yet
IoT Task4 21BEC0384
9 pages
Machine Learnin
100% (2)
Machine Learnin
23 pages
V
No ratings yet
V
8 pages
Document From Jahnavi
No ratings yet
Document From Jahnavi
20 pages
Deepak Data Analysis 1
No ratings yet
Deepak Data Analysis 1
31 pages
Mlext
No ratings yet
Mlext
1 page
ABHAYMLFILE
No ratings yet
ABHAYMLFILE
16 pages
ML Yogesh
No ratings yet
ML Yogesh
23 pages
ML Lab Manual
No ratings yet
ML Lab Manual
24 pages
ML Lab Manual
No ratings yet
ML Lab Manual
25 pages
DSBDA Prac4 2
No ratings yet
DSBDA Prac4 2
1 page
ML Four To Eight
No ratings yet
ML Four To Eight
3 pages
Experiment Number: 3: Aim:-Study of The Linear Regression in The Machine Learning Using The Boston Housing Dataset. 1)
No ratings yet
Experiment Number: 3: Aim:-Study of The Linear Regression in The Machine Learning Using The Boston Housing Dataset. 1)
14 pages
ML Lab Experiment Shivansh
No ratings yet
ML Lab Experiment Shivansh
29 pages
Machine Learning Project: TITLE: Predicting The Sale Price of A House Using Linear Regression
No ratings yet
Machine Learning Project: TITLE: Predicting The Sale Price of A House Using Linear Regression
20 pages
Docu 4
No ratings yet
Docu 4
3 pages
Houses Prices Prediction Model
No ratings yet
Houses Prices Prediction Model
11 pages
ML Lab Manual
No ratings yet
ML Lab Manual
60 pages
Lab Manual ML
No ratings yet
Lab Manual ML
26 pages
ML Lab Experiment Shortened With Same Output
No ratings yet
ML Lab Experiment Shortened With Same Output
6 pages
1 2 3 4 6 7 8 9 10 Merged
No ratings yet
1 2 3 4 6 7 8 9 10 Merged
21 pages
Strangers
No ratings yet
Strangers
8 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
20 pages
Aiml Lab
No ratings yet
Aiml Lab
14 pages
CP4252 Lab Manual
No ratings yet
CP4252 Lab Manual
13 pages
ML File
No ratings yet
ML File
37 pages
ML Record
No ratings yet
ML Record
19 pages
Python For Beginners
From Everand
Python For Beginners
Célio Azevedo
No ratings yet
L310 Start Here Guide
No ratings yet
L310 Start Here Guide
4 pages
Structured COBOL Programming: Nassau Community College
No ratings yet
Structured COBOL Programming: Nassau Community College
62 pages
MCSL-204 ENG (Jan 25 To July 25)
No ratings yet
MCSL-204 ENG (Jan 25 To July 25)
19 pages
Cppcheck - A Tool For Static C - C++ Code Analysis
No ratings yet
Cppcheck - A Tool For Static C - C++ Code Analysis
4 pages
Foods 12 01242 v2
No ratings yet
Foods 12 01242 v2
33 pages
Sentinel Worlds I Future Magic Users Manual
No ratings yet
Sentinel Worlds I Future Magic Users Manual
45 pages
Rujrjrj
No ratings yet
Rujrjrj
3 pages
Canada RBC
No ratings yet
Canada RBC
2 pages
Why FR1 Over FR2
No ratings yet
Why FR1 Over FR2
4 pages
Sap-C S4ewm 2023
No ratings yet
Sap-C S4ewm 2023
31 pages
Fault Codes For Series 26 DF 2022-01-11
No ratings yet
Fault Codes For Series 26 DF 2022-01-11
2 pages
QP 5
No ratings yet
QP 5
4 pages
GE Launches Asset Transfer System For Airlines and Lessors - Air Transport News - Aviation International News
100% (1)
GE Launches Asset Transfer System For Airlines and Lessors - Air Transport News - Aviation International News
2 pages
Essay On Advantages and Disadvantages of Computer
0% (1)
Essay On Advantages and Disadvantages of Computer
4 pages
Library Confirmation Form For Plagiarism
No ratings yet
Library Confirmation Form For Plagiarism
2 pages
220644-665-666-DLD Lab 09 F.. (2) .Docxooooooooooo
No ratings yet
220644-665-666-DLD Lab 09 F.. (2) .Docxooooooooooo
8 pages
Solved - Chapter 11.1 Problem 15P Solution - Advanced Engineering Mathematics, Enhanced EText 10th Edition
100% (1)
Solved - Chapter 11.1 Problem 15P Solution - Advanced Engineering Mathematics, Enhanced EText 10th Edition
2 pages
Database Systems A Pragmatic Approach 2nd Edition Elvis C. Foster Shripad Godbol Download PDF
No ratings yet
Database Systems A Pragmatic Approach 2nd Edition Elvis C. Foster Shripad Godbol Download PDF
57 pages
DSGDSGDSG
No ratings yet
DSGDSGDSG
1 page
STA 241 Topic 7 Final Hypergeometric
No ratings yet
STA 241 Topic 7 Final Hypergeometric
12 pages
Lab 10 210309
No ratings yet
Lab 10 210309
5 pages
Module 2 - Reading Comprehension - Part I
No ratings yet
Module 2 - Reading Comprehension - Part I
17 pages
Cyber Security
No ratings yet
Cyber Security
2 pages
Socks4 Proxies
No ratings yet
Socks4 Proxies
39 pages
Python Syllabus PDF
No ratings yet
Python Syllabus PDF
3 pages
OS Lecture2 - CPU Scheduling
No ratings yet
OS Lecture2 - CPU Scheduling
48 pages
Beginner's Guide To Make A Game Controller
No ratings yet
Beginner's Guide To Make A Game Controller
23 pages
Programmable Machine Pre History - MechMachTheor - May2001
No ratings yet
Programmable Machine Pre History - MechMachTheor - May2001
15 pages
Asm2 1670 GCH0718 Le Trung Hai
No ratings yet
Asm2 1670 GCH0718 Le Trung Hai
22 pages