0% found this document useful (0 votes)

25 views24 pages

MLLab Manual

The document outlines various machine learning tasks including data visualization, dimensionality reduction, classification algorithms, and regression techniques using different datasets. It includes implementations for creating histograms, box plots, PCA, k-NN, decision trees, and Naive Bayes classifiers. Each section provides code examples and explanations for the respective algorithms and datasets used.

Uploaded by

sahanasaana19

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

25 views24 pages

MLLab Manual

Uploaded by

sahanasaana19

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 24

MYSORE COLLEGE OF ENGINEERING AND MANAGEMENT

DEPARTMENT OF COMPUTER SCIENCE AND ENGINNERING

Machine Learning Lab(BCSL606)

1. Develop a program to create histograms for all numerical features and analyze the distribution of
each feature. Generate box plots for all numerical features and identify any outliers. Use California
Housing dataset.

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing

# Step 1: Load the California Housing dataset

data = fetch_california_housing(as_frame=True)
housing_df = data.frame

# Step 2: Create histograms for numerical features

numerical_features =
housing_df.select_dtypes(include=[np.number]).columns

# Plot histograms
plt.figure(figsize=(15, 10))
for i, feature in enumerate(numerical_features):
plt.subplot(3, 3, i + 1)
sns.histplot(housing_df[feature], kde=True, bins=30, color='blue')
plt.title(f'Distribution of {feature}')
plt.tight_layout()
plt.show()

# Step 3: Generate box plots for numerical features

plt.figure(figsize=(15, 10))
for i, feature in enumerate(numerical_features):
plt.subplot(3, 3, i + 1)
sns.boxplot(x=housing_df[feature], color='orange')
plt.title(f'Box Plot of {feature}')
plt.tight_layout()
plt.show()

# Step 4: Identify outliers using the IQR method

print("Outliers Detection:")
outliers_summary = {}
for feature in numerical_features:
Q1 = housing_df[feature].quantile(0.25)
Q3 = housing_df[feature].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
outliers = housing_df[(housing_df[feature] < lower_bound) |
(housing_df[feature] > upper_bound)]
outliers_summary[feature] = len(outliers)
print(f"{feature}: {len(outliers)} outliers")
# Optional: Print a summary of the dataset
print("\nDataset Summary:")
print(housing_df.describe())

OUTPUT:
2. Develop a program to Compute the correlation matrix to understand the relationships between pairs
of features. Visualize the correlation matrix using a heatmap to know which variables have strong
positive/negative correlations. Create a pair plot to visualize pairwise relationships between features.
Use California Housing dataset.

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing

# Step 1: Load the California Housing Dataset

california_data = fetch_california_housing(as_frame=True)
data = california_data.frame

# Step 2: Compute the correlation matrix

correlation_matrix = data.corr()

# Step 3: Visualize the correlation matrix using a heatmap

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f',
linewidths=0.5)
plt.title('Correlation Matrix of California Housing Features')
plt.show()

# Step 4: Create a pair plot to visualize pairwise relationships

sns.pairplot(data, diag_kind='kde', plot_kws={'alpha': 0.5})
plt.suptitle('Pair Plot of California Housing Features', y=1.02)
plt.show()
OUTPUT:
3. Develop a program to implement Principal Component Analysis (PCA) for reducing the
dimensionality of the Iris dataset from 4 features to 2.

import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Load the Iris dataset

iris = load_iris()
data = iris.data
labels = iris.target
label_names = iris.target_names

# Convert to a DataFrame for better visualization

iris_df = pd.DataFrame(data, columns=iris.feature_names)

# Perform PCA to reduce dimensionality to 2

pca = PCA(n_components=2)
data_reduced = pca.fit_transform(data)

# Create a DataFrame for the reduced data

reduced_df = pd.DataFrame(data_reduced, columns=['Principal
Component 1', 'Principal Component 2'])
reduced_df['Label'] = labels

# Plot the reduced data

plt.figure(figsize=(8, 6))
colors = ['r', 'g', 'b']
for i, label in enumerate(np.unique(labels)):
plt.scatter(
reduced_df[reduced_df['Label'] == label]['Principal
Component 1'],
reduced_df[reduced_df['Label'] == label]['Principal
Component 2'],
label=label_names[label],
color=colors[i]
)

plt.title('PCA on Iris Dataset')

plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend()
plt.grid()
plt.show()
OUTPUT:
4. For a given set of training data examples stored in a .CSV file, implement and demonstrate the Find-
S algorithm to output a description of the set of all hypotheses consistent with the training examples.

import pandas as pd

def find_s_algorithm(file_path):
data = pd.read_csv(file_path)

print("Training data:")
print(data)

attributes = data.columns[:-1]
class_label = data.columns[-1]

hypothesis = ['?' for _ in attributes]

for index, row in data.iterrows():

if row[class_label] == 'Yes':
for i, value in enumerate(row[attributes]):
if hypothesis[i] == '?' or hypothesis[i] == value:
hypothesis[i] = value
else:
hypothesis[i] = '?'

return hypothesis

file_path = 'training_data.csv'
hypothesis = find_s_algorithm(file_path)
print("\nThe final hypothesis is:", hypothesis)
OUTPUT:
5. Develop a program to implement k-Nearest Neighbour algorithm to classify the randomly generated

a) Label the first 50 points {x1,……,x50} as follows: if (xi ≤ 0.5), then xi ∊ Class1, else xi ∊
100 values of x in the range of [0,1]. Perform the following based on dataset generated.

Class1
b) Classify the remaining points, x51,……,x100 using KNN. Perform this for k=1,2,3,4,5,20,30

import numpy as np
import matplotlib.pyplot as plt
from collections import Counter

data = np.random.rand(100)

labels = ["Class1" if x <= 0.5 else "Class2" for x in data[:50]]

def euclidean_distance(x1, x2):

return abs(x1 - x2)

def knn_classifier(train_data, train_labels, test_point, k):

distances = [(euclidean_distance(test_point, train_data[i]), train_labels[i]) for i
in range(len(train_data))]

distances.sort(key=lambda x: x[0])
k_nearest_neighbors = distances[:k]

k_nearest_labels = [label for _, label in k_nearest_neighbors]

return Counter(k_nearest_labels).most_common(1)[0][0]

train_data = data[:50]
train_labels = labels

test_data = data[50:]

k_values = [1, 2, 3, 4, 5, 20, 30]

print("--- k-Nearest Neighbors Classification ---")

print("Training dataset: First 50 points labeled based on the rule (x <= 0.5 -> Class1,
x > 0.5 -> Class2)")
print("Testing dataset: Remaining 50 points to be classified\n")

results = {}

for k in k_values:
print(f"Results for k = {k}:")
classified_labels = [knn_classifier(train_data, train_labels, test_point, k) for
test_point in test_data]
results[k] = classified_labels

for i, label in enumerate(classified_labels, start=51):

print(f"Point x{i} (value: {test_data[i - 51]:.4f}) is classified as {label}")
print("\n")

print("Classification complete.\n")

for k in k_values:
classified_labels = results[k]
class1_points = [test_data[i] for i in range(len(test_data)) if
classified_labels[i] == "Class1"]
class2_points = [test_data[i] for i in range(len(test_data)) if
classified_labels[i] == "Class2"]

plt.figure(figsize=(10, 6))
plt.scatter(train_data, [0] * len(train_data), c=["blue" if label == "Class1" else
"red" for label in train_labels],
label="Training Data", marker="o")
plt.scatter(class1_points, [1] * len(class1_points), c="blue", label="Class1
(Test)", marker="x")
plt.scatter(class2_points, [1] * len(class2_points), c="red", label="Class2
(Test)", marker="x")

plt.title(f"k-NN Classification Results for k = {k}")

plt.xlabel("Data Points")
plt.ylabel("Classification Level")
plt.legend()
plt.grid(True)
plt.show()

OUTPUT:
6. Implement the non-parametric Locally Weighted Regression algorithm in order to fit data points.
Select appropriate data set for your experiment and draw graphs.

import numpy as np
import matplotlib.pyplot as plt

def gaussian_kernel(x, xi, tau):

return np.exp(-np.sum((x - xi) ** 2) / (2 * tau ** 2))

def locally_weighted_regression(x, X, y, tau):

m = X.shape[0]
weights = np.array([gaussian_kernel(x, X[i], tau) for i in
range(m)])
W = np.diag(weights)
X_transpose_W = X.T @ W
theta = np.linalg.inv(X_transpose_W @ X) @ X_transpose_W @ y
return x @ theta

np.random.seed(42)
X = np.linspace(0, 2 * np.pi, 100)
y = np.sin(X) + 0.1 * np.random.randn(100)
X_bias = np.c_[np.ones(X.shape), X]

x_test = np.linspace(0, 2 * np.pi, 200)

x_test_bias = np.c_[np.ones(x_test.shape), x_test]
tau = 0.5
y_pred = np.array([locally_weighted_regression(xi, X_bias, y, tau)
for xi in x_test_bias])

plt.figure(figsize=(10, 6))
plt.scatter(X, y, color='red', label='Training Data', alpha=0.7)
plt.plot(x_test, y_pred, color='blue', label=f'LWR Fit
(tau={tau})', linewidth=2)
plt.xlabel('X', fontsize=12)
plt.ylabel('y', fontsize=12)
plt.title('Locally Weighted Regression', fontsize=14)
plt.legend(fontsize=10)
plt.grid(alpha=0.3)
plt.show()
OUTPUT:
7. Develop a program to demonstrate the working of Linear Regression and Polynomial Regression.
Use Boston Housing Dataset for Linear Regression and Auto MPG Dataset (for vehicle fuel
efficiency prediction) for Polynomial Regression.

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures,
StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score

def linear_regression_california():
housing = fetch_california_housing(as_frame=True)
X = housing.data[["AveRooms"]]
y = housing.target

X_train, X_test, y_train, y_test = train_test_split(X, y,

test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

plt.scatter(X_test, y_test, color="blue", label="Actual")

plt.plot(X_test, y_pred, color="red", label="Predicted")
plt.xlabel("Average number of rooms (AveRooms)")
plt.ylabel("Median value of homes ($100,000)")
plt.title("Linear Regression - California Housing Dataset")
plt.legend()
plt.show()

print("Linear Regression - California Housing Dataset")

print("Mean Squared Error:", mean_squared_error(y_test,
y_pred))
print("R^2 Score:", r2_score(y_test, y_pred))

def polynomial_regression_auto_mpg():
url = "https://archive.ics.uci.edu/ml/machine-learning-
databases/auto-mpg/auto-mpg.data"
column_names = ["mpg", "cylinders", "displacement",
"horsepower", "weight", "acceleration", "model_year", "origin"]
data = pd.read_csv(url, sep='\s+', names=column_names,
na_values="?")
data = data.dropna()

X = data["displacement"].values.reshape(-1, 1)
y = data["mpg"].values

X_train, X_test, y_train, y_test = train_test_split(X, y,

test_size=0.2, random_state=42)

poly_model = make_pipeline(PolynomialFeatures(degree=2),
StandardScaler(), LinearRegression())
poly_model.fit(X_train, y_train)

y_pred = poly_model.predict(X_test)

plt.scatter(X_test, y_test, color="blue", label="Actual")

plt.scatter(X_test, y_pred, color="red", label="Predicted")
plt.xlabel("Displacement")
plt.ylabel("Miles per gallon (mpg)")
plt.title("Polynomial Regression - Auto MPG Dataset")
plt.legend()
plt.show()

print("Polynomial Regression - Auto MPG Dataset")

print("Mean Squared Error:", mean_squared_error(y_test,
y_pred))
print("R^2 Score:", r2_score(y_test, y_pred))

if __name__ == "__main__":
print("Demonstrating Linear Regression and Polynomial
Regression\n")
linear_regression_california()
polynomial_regression_auto_mpg()

DATA SET:

Outlook,Temperature,Humidity,Windy,PlayTennis
Sunny,Hot,High,FALSE,No
Sunny,Hot,High,TRUE,No
Overcast,Hot,High,FALSE,Yes
Rain,Cold,High,FALSE,Yes
Rain,Cold,High,TRUE,No
Overcast,Hot,High,TRUE,Yes
Sunny,Hot,High,FALSE,No
OUTPUT:
8. Develop a program to demonstrate the working of the decision tree algorithm. Use Breast Cancer
Data set for building the decision tree and apply this knowledge to classify a new sample.

# Importing necessary libraries

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree

data = load_breast_cancer()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y,

test_size=0.2, random_state=42)
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(f"Model Accuracy: {accuracy * 100:.2f}%")
new_sample = np.array([X_test[0]])
prediction = clf.predict(new_sample)

prediction_class = "Benign" if prediction == 1 else "Malignant"

print(f"Predicted Class for the new sample: {prediction_class}")

plt.figure(figsize=(12,8))
tree.plot_tree(clf, filled=True, feature_names=data.feature_names,
class_names=data.target_names)
plt.title("Decision Tree - Breast Cancer Dataset")
plt.show()
OUTPUT:
9. Develop a program to implement the Naive Bayesian classifier considering Olivetti Face Data set for
training. Compute the accuracy of the classifier, considering a few test data sets.

import numpy as np
from sklearn.datasets import fetch_olivetti_faces
from sklearn.model_selection import train_test_split,
cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report,
confusion_matrix
import matplotlib.pyplot as plt

data = fetch_olivetti_faces(shuffle=True, random_state=42)

X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y,

test_size=0.3, random_state=42)

gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy: {accuracy * 100:.2f}%')

print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=1))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

cross_val_accuracy = cross_val_score(gnb, X, y, cv=5,

scoring='accuracy')
print(f'\nCross-validation accuracy: {cross_val_accuracy.mean() *
100:.2f}%')

fig, axes = plt.subplots(3, 5, figsize=(12, 8))

for ax, image, label, prediction in zip(axes.ravel(), X_test,
y_test, y_pred):
ax.imshow(image.reshape(64, 64), cmap=plt.cm.gray)
ax.set_title(f"True: {label}, Pred: {prediction}")
ax.axis('off')

plt.show()
OUTPUT:
10. Develop a program to implement k-means clustering using Wisconsin Breast Cancer data set and
visualize the clustering result.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_breast_cancer
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, classification_report

data = load_breast_cancer()
X = data.data
y = data.target

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

kmeans = KMeans(n_clusters=2, random_state=42)

y_kmeans = kmeans.fit_predict(X_scaled)

print("Confusion Matrix:")
print(confusion_matrix(y, y_kmeans))
print("\nClassification Report:")
print(classification_report(y, y_kmeans))

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

df = pd.DataFrame(X_pca, columns=['PC1', 'PC2'])

df['Cluster'] = y_kmeans
df['True Label'] = y

plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='PC1', y='PC2', hue='Cluster',
palette='Set1', s=100, edgecolor='black', alpha=0.7)
plt.title('K-Means Clustering of Breast Cancer Dataset')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(title="Cluster")
plt.show()

plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='PC1', y='PC2', hue='True Label',
palette='coolwarm', s=100, edgecolor='black', alpha=0.7)
plt.title('True Labels of Breast Cancer Dataset')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(title="True Label")
plt.show()

plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='PC1', y='PC2', hue='Cluster',
palette='Set1', s=100, edgecolor='black', alpha=0.7)
centers = pca.transform(kmeans.cluster_centers_)
plt.scatter(centers[:, 0], centers[:, 1], s=200, c='red',
marker='X', label='Centroids')
plt.title('K-Means Clustering with Centroids')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(title="Cluster")
plt.show()

OUTPUT:

GSRTC Presentation BY RAHESH - BKMIBA-HLBBA
No ratings yet
GSRTC Presentation BY RAHESH - BKMIBA-HLBBA
49 pages
PISA Data Analysis Manual - SPSS, Second Edition
No ratings yet
PISA Data Analysis Manual - SPSS, Second Edition
478 pages
SPSS 23 Step by Step Answers To Selected Exercises
No ratings yet
SPSS 23 Step by Step Answers To Selected Exercises
75 pages
Mlalllabprgs
No ratings yet
Mlalllabprgs
17 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
9 pages
ML Lab Manual
No ratings yet
ML Lab Manual
24 pages
Machine Learning Programs
No ratings yet
Machine Learning Programs
10 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
33 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
18 pages
ML Manual
No ratings yet
ML Manual
30 pages
ML Programs
No ratings yet
ML Programs
14 pages
ML - Datascience Manual
No ratings yet
ML - Datascience Manual
64 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
26 pages
V
No ratings yet
V
8 pages
ML Spy Programs
No ratings yet
ML Spy Programs
16 pages
ML Labmanual
No ratings yet
ML Labmanual
33 pages
Machine Learning Lab Manaul BCSL606
No ratings yet
Machine Learning Lab Manaul BCSL606
27 pages
ML Short Code - Under Updating
No ratings yet
ML Short Code - Under Updating
4 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
33 pages
ML Journal External
No ratings yet
ML Journal External
14 pages
BCSL606 Machine Learning Lab
No ratings yet
BCSL606 Machine Learning Lab
33 pages
ML 3
No ratings yet
ML 3
24 pages
M PDF
No ratings yet
M PDF
13 pages
ML Lab Mannual1
No ratings yet
ML Lab Mannual1
37 pages
BCSL606 Machine Learning Lab Final Draft
No ratings yet
BCSL606 Machine Learning Lab Final Draft
32 pages
ML Lab Manual
No ratings yet
ML Lab Manual
43 pages
ML Lab Manual
No ratings yet
ML Lab Manual
25 pages
Lab Extern L
No ratings yet
Lab Extern L
8 pages
Strangers
No ratings yet
Strangers
8 pages
ML Shristi File
No ratings yet
ML Shristi File
49 pages
Experiment 1111
No ratings yet
Experiment 1111
25 pages
ML Lab Manual
No ratings yet
ML Lab Manual
60 pages
Wa0003
No ratings yet
Wa0003
16 pages
Final ML File
No ratings yet
Final ML File
34 pages
ML Lab
No ratings yet
ML Lab
9 pages
ML Manual
No ratings yet
ML Manual
9 pages
Argha's ML LAB - 240927 - 121838
No ratings yet
Argha's ML LAB - 240927 - 121838
13 pages
Lab Manual ML
No ratings yet
Lab Manual ML
23 pages
ML Minimized Programs
No ratings yet
ML Minimized Programs
9 pages
ML Full For Print New 1
No ratings yet
ML Full For Print New 1
38 pages
Final ML Programs 075005
No ratings yet
Final ML Programs 075005
15 pages
Lab Manual ML
No ratings yet
Lab Manual ML
26 pages
Train
No ratings yet
Train
17 pages
Shubham Pract 6 - Merged
No ratings yet
Shubham Pract 6 - Merged
12 pages
Big Data Practical
No ratings yet
Big Data Practical
20 pages
Machine Learning Laboratory
No ratings yet
Machine Learning Laboratory
23 pages
Unit2 ML Programs
No ratings yet
Unit2 ML Programs
7 pages
CP4252 Lab Manual
No ratings yet
CP4252 Lab Manual
13 pages
Lab4 KNN
No ratings yet
Lab4 KNN
9 pages
1
No ratings yet
1
13 pages
ML Lab
No ratings yet
ML Lab
14 pages
Machine Learning LAB
No ratings yet
Machine Learning LAB
20 pages
ML
No ratings yet
ML
11 pages
DM ML Practical
No ratings yet
DM ML Practical
13 pages
Aam Codes
No ratings yet
Aam Codes
8 pages
Sklearn Tutorial: DNN On Boston Data
No ratings yet
Sklearn Tutorial: DNN On Boston Data
9 pages
ML
No ratings yet
ML
5 pages
External
No ratings yet
External
11 pages
Python For Data Science IA 1 Programs
No ratings yet
Python For Data Science IA 1 Programs
14 pages
B-56 Sanket Jambhulkar MLA-7
No ratings yet
B-56 Sanket Jambhulkar MLA-7
9 pages
Roll NO 2020
No ratings yet
Roll NO 2020
8 pages
Machine Learning All Lab Experiments VTU 6th Sem
No ratings yet
Machine Learning All Lab Experiments VTU 6th Sem
27 pages
Assignment Questions
No ratings yet
Assignment Questions
1 page
V__Sem_CC
No ratings yet
V__Sem_CC
49 pages
V__Sem_CC
No ratings yet
V__Sem_CC
49 pages
III__Sem__02
No ratings yet
III__Sem__02
33 pages
V Sem Unit01
No ratings yet
V Sem Unit01
5 pages
Data Analytics Excel Lab
No ratings yet
Data Analytics Excel Lab
83 pages
III Sem CC Notes(Unit01)
No ratings yet
III Sem CC Notes(Unit01)
9 pages
BCS602 Model Set 1 Paper
No ratings yet
BCS602 Model Set 1 Paper
2 pages
III Sem CC Notes(Unit01)
No ratings yet
III Sem CC Notes(Unit01)
9 pages
Unit 2
No ratings yet
Unit 2
2 pages
Unit 4
No ratings yet
Unit 4
2 pages
Bda Ia01qp
No ratings yet
Bda Ia01qp
1 page
EY & Zepto Data Analyst Interview Questions
No ratings yet
EY & Zepto Data Analyst Interview Questions
24 pages
Statistical Data Analysis Procedure
No ratings yet
Statistical Data Analysis Procedure
2 pages
The Vietnamese Version of The Social and Emotional Competence Questionnaire (Secq) : Psychometric Properties Among Adolescents
No ratings yet
The Vietnamese Version of The Social and Emotional Competence Questionnaire (Secq) : Psychometric Properties Among Adolescents
16 pages
Computational Statistics - 3rd Sem-1
No ratings yet
Computational Statistics - 3rd Sem-1
4 pages
ML Unit 4
No ratings yet
ML Unit 4
34 pages
Did DML
No ratings yet
Did DML
54 pages
Data Analysis CORRELATION
No ratings yet
Data Analysis CORRELATION
4 pages
DAX Functions - Time Intelligence Functions
No ratings yet
DAX Functions - Time Intelligence Functions
7 pages
Sample Paper
No ratings yet
Sample Paper
12 pages
EDA Case Study
No ratings yet
EDA Case Study
14 pages
Outlier Detection
No ratings yet
Outlier Detection
20 pages
The Impact of Social Media To Academic Performance
No ratings yet
The Impact of Social Media To Academic Performance
30 pages
Cox Model Assumptions E Martingala Residuals - Easy Guides in R
No ratings yet
Cox Model Assumptions E Martingala Residuals - Easy Guides in R
14 pages
My HRM THESIS
81% (27)
My HRM THESIS
79 pages
AST 110 - Data Analytics (Reviewer)
100% (1)
AST 110 - Data Analytics (Reviewer)
3 pages
Ambar Car Project Word 2 (1) 2
No ratings yet
Ambar Car Project Word 2 (1) 2
50 pages
Statistical Modeling For Data Analysis
100% (1)
Statistical Modeling For Data Analysis
24 pages
Course Syllabus
No ratings yet
Course Syllabus
7 pages
AI's Impact On Digital Marketing
No ratings yet
AI's Impact On Digital Marketing
8 pages
Relative Frequency of A Class : Mean
No ratings yet
Relative Frequency of A Class : Mean
5 pages
Education - Power Bi
100% (1)
Education - Power Bi
76 pages
Business Statistics 2021
No ratings yet
Business Statistics 2021
3 pages
Business Research Methods DDU
No ratings yet
Business Research Methods DDU
15 pages
Aiml Lab Manaual R23
100% (1)
Aiml Lab Manaual R23
10 pages
Chapter 3
No ratings yet
Chapter 3
36 pages
Reverse Engineering Using A Knowledge-Based Approach: Alexandre Durupt
No ratings yet
Reverse Engineering Using A Knowledge-Based Approach: Alexandre Durupt
17 pages
Bulba Code ICE - RLHF Synthetic & Organic Loss
No ratings yet
Bulba Code ICE - RLHF Synthetic & Organic Loss
94 pages

MLLab Manual

Uploaded by

MLLab Manual

Uploaded by

MYSORE COLLEGE OF ENGINEERING AND MANAGEMENT

DEPARTMENT OF COMPUTER SCIENCE AND ENGINNERING

# Step 1: Load the California Housing dataset

# Step 2: Create histograms for numerical features

# Step 3: Generate box plots for numerical features

# Step 4: Identify outliers using the IQR method

# Step 1: Load the California Housing Dataset

# Step 2: Compute the correlation matrix

# Step 3: Visualize the correlation matrix using a heatmap

# Step 4: Create a pair plot to visualize pairwise relationships

# Load the Iris dataset

# Convert to a DataFrame for better visualization

# Perform PCA to reduce dimensionality to 2

# Create a DataFrame for the reduced data

# Plot the reduced data

plt.title('PCA on Iris Dataset')

hypothesis = ['?' for _ in attributes]

for index, row in data.iterrows():

labels = ["Class1" if x <= 0.5 else "Class2" for x in data[:50]]

def euclidean_distance(x1, x2):

def knn_classifier(train_data, train_labels, test_point, k):

k_nearest_labels = [label for _, label in k_nearest_neighbors]

k_values = [1, 2, 3, 4, 5, 20, 30]

print("--- k-Nearest Neighbors Classification ---")

for i, label in enumerate(classified_labels, start=51):

plt.title(f"k-NN Classification Results for k = {k}")

def gaussian_kernel(x, xi, tau):

def locally_weighted_regression(x, X, y, tau):

x_test = np.linspace(0, 2 * np.pi, 200)

X_train, X_test, y_train, y_test = train_test_split(X, y,

plt.scatter(X_test, y_test, color="blue", label="Actual")

print("Linear Regression - California Housing Dataset")

X_train, X_test, y_train, y_test = train_test_split(X, y,

plt.scatter(X_test, y_test, color="blue", label="Actual")

print("Polynomial Regression - Auto MPG Dataset")

# Importing necessary libraries

X_train, X_test, y_train, y_test = train_test_split(X, y,

accuracy = accuracy_score(y_test, y_pred)

prediction_class = "Benign" if prediction == 1 else "Malignant"

data = fetch_olivetti_faces(shuffle=True, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y,

accuracy = accuracy_score(y_test, y_pred)

cross_val_accuracy = cross_val_score(gnb, X, y, cv=5,

fig, axes = plt.subplots(3, 5, figsize=(12, 8))

kmeans = KMeans(n_clusters=2, random_state=42)

df = pd.DataFrame(X_pca, columns=['PC1', 'PC2'])

You might also like