0% found this document useful (0 votes)

16 views16 pages

Wa0003

Uploaded by

shashankyadav5674

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

16 views16 pages

Wa0003

Uploaded by

shashankyadav5674

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 16

20

install pandas, scikit-learn ,matplotlib

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the Boston housing dataset

boston = load_boston()
data = pd.DataFrame(boston.data, columns=boston.feature_names)
data['PRICE'] = boston.target

# Display the first few rows of the dataset

print(data.head())

# Exploratory Data Analysis (EDA)

plt.figure(figsize=(10, 6))
sns.heatmap(data.corr(), annot=True, fmt=".2f", cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

# Prepare the data for linear regression

X = data.drop('PRICE', axis=1) # Features
y = data['PRICE'] # Target variable
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a linear regression model

model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")

print(f"R² Score: {r2:.2f}")

# Visualize the results

plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--')
plt.xlabel('Actual Prices')
plt.ylabel('Predicted Prices')
plt.title('Actual vs Predicted Prices')
plt.show()

21 A
import pandas as pd
import numpy as np
from scipy import stats

# Load the datasets

uci_diabetes = pd.read_csv('uci_diabetes.csv') # replace with the correct file path
pima_diabetes = pd.read_csv('pima_diabetes.csv') # replace with the correct file path

def univariate_analysis(dataset):
univariate_stats = {}
for column in dataset.columns:
univariate_stats[column] = {
'Mean': dataset[column].mean(),
'Median': dataset[column].median(),
'Mode': dataset[column].mode()[0],
'Variance': dataset[column].var(),
'Standard Deviation': dataset[column].std(),
'Skewness': dataset[column].skew(),
'Kurtosis': dataset[column].kurt()
}
return pd.DataFrame(univariate_stats)

# Univariate analysis for both datasets

uci_univariate = univariate_analysis(uci_diabetes)
pima_univariate = univariate_analysis(pima_diabetes)

print("UCI Diabetes Univariate Analysis:\n", uci_univariate)

print("\nPima Diabetes Univariate Analysis:\n", pima_univariate)

21 B
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Let's assume "Outcome" is the target variable for logistic regression

# Define features and target
X_uci = uci_diabetes.drop(columns='Outcome')
y_uci = uci_diabetes['Outcome']

X_pima = pima_diabetes.drop(columns='Outcome')
y_pima = pima_diabetes['Outcome']

# Bivariate: Linear Regression

def linear_regression(X, y):
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()
return model.summary()

# Bivariate: Logistic Regression

def logistic_regression(X, y):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train, y_train)
y_pred = log_model.predict(X_test)

print("Classification Report:\n", classification_report(y_test, y_pred))

print("Accuracy Score: ", accuracy_score(y_test, y_pred))

# Linear Regression for UCI and Pima datasets

print("UCI Diabetes Linear Regression:\n", linear_regression(X_uci, y_uci))
print("Pima Diabetes Linear Regression:\n", linear_regression(X_pima, y_pima))

# Logistic Regression for UCI and Pima datasets

print("UCI Diabetes Logistic Regression:")
logistic_regression(X_uci, y_uci)

print("\nPima Diabetes Logistic Regression:")

logistic_regression(X_pima, y_pima)

21 C
# Multiple Regression
def multiple_regression(X, y):
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()
return model.summary()

# Perform multiple regression analysis for both datasets

print("UCI Diabetes Multiple Regression:\n", multiple_regression(X_uci, y_uci))
print("Pima Diabetes Multiple Regression:\n", multiple_regression(X_pima, y_pima))

21 D
# Compare Univariate results
print("Comparing Univariate Results:\n")
print("UCI Diabetes Stats:\n", uci_univariate)
print("\nPima Diabetes Stats:\n", pima_univariate)

# Compare Bivariate and Multiple Regression outputs by analyzing the summaries printed
earlier
EXPERIMENT 5
27
import numpy as np
from collections import Counter

# Define the Euclidean distance function

def euclidean_distance(point1, point2):
return np.sqrt(np.sum((np.array(point1) - np.array(point2)) ** 2))

# KNN function
def knn_classify(data, labels, query_point, k=3):
# Calculate distances between the query point and all points in the dataset
distances = []
for i, point in enumerate(data):
distance = euclidean_distance(point, query_point)
distances.append((distance, labels[i]))

# Sort distances and select the k nearest neighbors

distances.sort(key=lambda x: x[0])
k_nearest_labels = [label for _, label in distances[:k]]

# Get the most common label among the nearest neighbors

most_common = Counter(k_nearest_labels).most_common(1)
return most_common[0][0]
# Example usage
if __name__ == "__main__":
# Example dataset (points and their labels)
data = [
[2, 3],
[1, 1],
[4, 4],
[6, 6],
[8, 8]
]
labels = ["A", "A", "B", "B", "A"]

# Query point
query_point = [3, 3]

# Number of neighbors
k=3

# Classify the query point

predicted_label = knn_classify(data, labels, query_point, k)
print(f"The predicted label for the query point {query_point} is:
{predicted_label}")

28
pip install scikit-learn
from sklearn.tree import DecisionTreeClassifier
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load a sample dataset (e.g., Iris dataset)

data = datasets.load_iris()
X = data.data # Features
y = data.target # Target labels

# Split the dataset into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)

# Create and train the Decision Tree Classifier

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predict the test set results

y_pred = clf.predict(X_test)

# Calculate and print the accuracy

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of the Decision Tree Classifier: {accuracy * 100:.2f}%")

# Predict a new query point (example point from the Iris dataset)
query_point = [5.1, 3.5, 1.4, 0.2] # You can change this to any sample
predicted_class = clf.predict([query_point])
print(f"The predicted class for the query point {query_point} is:
{data.target_names[predicted_class][0]}")

29
import numpy as np

# Calculate the Euclidean distance between two points

def euclidean_distance(point1, point2):
return np.sqrt(np.sum((np.array(point1) - np.array(point2)) ** 2))

# Function to find the closest clusters

def find_closest_clusters(clusters):
min_distance = float('inf')
closest_pair = (0, 1)
for i in range(len(clusters)):
for j in range(i + 1, len(clusters)):
# Calculate minimum distance between clusters[i] and clusters[j]
for point1 in clusters[i]:
for point2 in clusters[j]:
distance = euclidean_distance(point1, point2)
if distance < min_distance:
min_distance = distance
closest_pair = (i, j)
return closest_pair

# Hierarchical clustering function

def hierarchical_clustering(data, num_clusters=2):
# Start with each point as its own cluster
clusters = [[point] for point in data]

# Loop until we reach the desired number of clusters

while len(clusters) > num_clusters:
# Find the closest clusters
i, j = find_closest_clusters(clusters)

# Merge clusters i and j

clusters[i].extend(clusters[j])
del clusters[j]

return clusters

# Sample data points

data = [
[1, 2],
[2, 3],
[3, 4],
[8, 7],
[8, 8],
[25, 80]
]

# Set the desired number of clusters

num_clusters = 2
# Perform hierarchical clustering
clusters = hierarchical_clustering(data, num_clusters)

# Output the clusters

for idx, cluster in enumerate(clusters):
print(f"Cluster {idx + 1}: {cluster}")

30
import numpy as np

# Calculate Euclidean distance between two points

def euclidean_distance(point1, point2):
return np.sqrt(np.sum((np.array(point1) - np.array(point2)) ** 2))

# Find all neighbors within epsilon distance of a point

def find_neighbors(data, point_idx, epsilon):
neighbors = []
for idx, point in enumerate(data):
if euclidean_distance(data[point_idx], point) < epsilon:
neighbors.append(idx)
return neighbors

# DBSCAN function
def dbscan(data, epsilon, min_points):
# Initialize all points as unvisited
visited = [False] * len(data)
clusters = [-1] * len(data) # -1 means noise
cluster_id = 0

# Iterate through each point

for point_idx in range(len(data)):
if visited[point_idx]:
continue
# Mark the point as visited
visited[point_idx] = True

# Find neighbors
neighbors = find_neighbors(data, point_idx, epsilon)

# Mark as noise if it has fewer neighbors than min_points

if len(neighbors) < min_points:
clusters[point_idx] = -1
else:
# Otherwise, create a new cluster
cluster_id += 1
clusters[point_idx] = cluster_id
i=0
while i < len(neighbors):
neighbor_idx = neighbors[i]

if not visited[neighbor_idx]:
visited[neighbor_idx] = True
new_neighbors = find_neighbors(data, neighbor_idx, epsilon)
if len(new_neighbors) >= min_points:
neighbors.extend(new_neighbors)

# Add the point to the cluster if it's not already in one

if clusters[neighbor_idx] == -1:
clusters[neighbor_idx] = cluster_id
i += 1

return clusters

# Example data points

data = [
[1, 2],
[2, 2],
[2, 3],
[8, 7],
[8, 8],
[25, 80]
]

# Parameters
epsilon = 2
min_points = 2

# Perform DBSCAN clustering

clusters = dbscan(data, epsilon, min_points)

# Output the clusters

for idx, cluster_id in enumerate(clusters):
print(f"Point {data[idx]} -> Cluster {cluster_id}")

31
import numpy as np

# Function to perform PCA

def pca(data, num_components):
# Step 1: Standardize the dataset by centering it around the mean
mean = np.mean(data, axis=0)
centered_data = data - mean

# Step 2: Calculate the covariance matrix

covariance_matrix = np.cov(centered_data, rowvar=False)

# Step 3: Compute eigenvalues and eigenvectors

eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)

# Step 4: Sort eigenvalues and eigenvectors in descending order

sorted_indices = np.argsort(eigenvalues)[::-1]
sorted_eigenvalues = eigenvalues[sorted_indices]
sorted_eigenvectors = eigenvectors[:, sorted_indices]

# Step 5: Select the top 'num_components' eigenvectors

selected_eigenvectors = sorted_eigenvectors[:, :num_components]

# Step 6: Transform the data to the new subspace

reduced_data = np.dot(centered_data, selected_eigenvectors)

return reduced_data, sorted_eigenvalues[:num_components],

selected_eigenvectors

# Example dataset (5 samples, 3 features)

data = np.array([
[4.9, 3.0, 1.4],
[4.7, 3.2, 1.3],
[4.6, 3.1, 1.5],
[5.0, 3.6, 1.4],
[5.4, 3.9, 1.7]
])

# Set the number of components to reduce to (e.g., 2)

num_components = 2

# Perform PCA
reduced_data, eigenvalues, eigenvectors = pca(data, num_components)

# Output the results

print("Reduced Data:")
print(reduced_data)
print("\nEigenvalues:")
print(eigenvalues)
print("\nEigenvectors (Principal Components):")
print(eigenvectors)

Udacity Machine Learning Analysis Supervised Learning
100% (1)
Udacity Machine Learning Analysis Supervised Learning
504 pages
ML - Datascience Manual
No ratings yet
ML - Datascience Manual
64 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
9 pages
DA Programs
No ratings yet
DA Programs
44 pages
ML Full For Print New 1
No ratings yet
ML Full For Print New 1
38 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
33 pages
ML Manual
No ratings yet
ML Manual
30 pages
Machine Learning Lab Manaul BCSL606
No ratings yet
Machine Learning Lab Manaul BCSL606
27 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
26 pages
Machine Learning Algorithms From Scratch
No ratings yet
Machine Learning Algorithms From Scratch
9 pages
ML Lab Manual
No ratings yet
ML Lab Manual
24 pages
Experiment 1111
No ratings yet
Experiment 1111
25 pages
MLLab Manual
No ratings yet
MLLab Manual
24 pages
Machine Learning
100% (5)
Machine Learning
56 pages
cp4252 Machine Learning Lab Manual
No ratings yet
cp4252 Machine Learning Lab Manual
21 pages
ML Shristi File
No ratings yet
ML Shristi File
49 pages
ML Spy Programs
No ratings yet
ML Spy Programs
16 pages
Argha's ML LAB - 240927 - 121838
No ratings yet
Argha's ML LAB - 240927 - 121838
13 pages
Machine Learning LAB
No ratings yet
Machine Learning LAB
20 pages
Mlalllabprgs
No ratings yet
Mlalllabprgs
17 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
18 pages
Machine Learning Final Manual
No ratings yet
Machine Learning Final Manual
45 pages
ML Lab
No ratings yet
ML Lab
23 pages
ML
No ratings yet
ML
17 pages
ML Journal External
No ratings yet
ML Journal External
14 pages
ML Programs
No ratings yet
ML Programs
14 pages
KRAI Practical
No ratings yet
KRAI Practical
14 pages
Opening Range Trading Strategy
100% (1)
Opening Range Trading Strategy
20 pages
Mlda - Lab
No ratings yet
Mlda - Lab
35 pages
CP4252 Lab Manual
No ratings yet
CP4252 Lab Manual
13 pages
Shubham Pract 6 - Merged
No ratings yet
Shubham Pract 6 - Merged
12 pages
Python For Data Science IA 1 Programs
No ratings yet
Python For Data Science IA 1 Programs
14 pages
Final ML File
No ratings yet
Final ML File
34 pages
ML Lab Experiments (1) - Pages-5
No ratings yet
ML Lab Experiments (1) - Pages-5
8 pages
1
No ratings yet
1
13 pages
EX - NO:3: Algorithm
No ratings yet
EX - NO:3: Algorithm
11 pages
Python For Data Science IA 1 Programs
No ratings yet
Python For Data Science IA 1 Programs
14 pages
Machine Learning Programs
No ratings yet
Machine Learning Programs
10 pages
Lab4 KNN
No ratings yet
Lab4 KNN
9 pages
Programs Lab Bca
No ratings yet
Programs Lab Bca
16 pages
V
No ratings yet
V
8 pages
221 A Application Form New
0% (1)
221 A Application Form New
1 page
1st PGM
No ratings yet
1st PGM
10 pages
ML Assignment 3
No ratings yet
ML Assignment 3
7 pages
ML Minimized Programs
No ratings yet
ML Minimized Programs
9 pages
LAB-4 Report
No ratings yet
LAB-4 Report
21 pages
16BCB0126 VL2018195002535 Pe003
No ratings yet
16BCB0126 VL2018195002535 Pe003
40 pages
Aam Codes
No ratings yet
Aam Codes
8 pages
Scania Diagnos & Programmer 3 2.28
80% (5)
Scania Diagnos & Programmer 3 2.28
13 pages
Implementing KNN Algorithm On The Iris Dataset
No ratings yet
Implementing KNN Algorithm On The Iris Dataset
7 pages
Mazda Technical Service Training Air Conditioning Fundamentals
100% (60)
Mazda Technical Service Training Air Conditioning Fundamentals
9 pages
DSM 2
No ratings yet
DSM 2
7 pages
ML Lab Codes
No ratings yet
ML Lab Codes
14 pages
DSM 3
No ratings yet
DSM 3
6 pages
K-Nearest Neighbor: General Gist
No ratings yet
K-Nearest Neighbor: General Gist
14 pages
DSM 1
No ratings yet
DSM 1
6 pages
ML Short Code - Under Updating
No ratings yet
ML Short Code - Under Updating
4 pages
AIML Lab 10
No ratings yet
AIML Lab 10
4 pages
Ai Lab Programs
No ratings yet
Ai Lab Programs
5 pages
Aow DRV
No ratings yet
Aow DRV
3,758 pages
AI Lab10
No ratings yet
AI Lab10
4 pages
Unit2 ML Programs
No ratings yet
Unit2 ML Programs
7 pages
ML Lab
No ratings yet
ML Lab
7 pages
Doctor Appointment Booking
No ratings yet
Doctor Appointment Booking
5 pages
Solved Chapter 5 Worksheet Class 7
No ratings yet
Solved Chapter 5 Worksheet Class 7
2 pages
Assignment #1: K Nearest Neighbor Classifier: Name: Srikanth Mujjiga (Roll No: 2015-50-831
No ratings yet
Assignment #1: K Nearest Neighbor Classifier: Name: Srikanth Mujjiga (Roll No: 2015-50-831
8 pages
Asiarep Catalogue 2016
No ratings yet
Asiarep Catalogue 2016
372 pages
Power Point Presentation On Topic: Framework: Submitted By: Himani Kathal
No ratings yet
Power Point Presentation On Topic: Framework: Submitted By: Himani Kathal
11 pages
Triumph Herald
No ratings yet
Triumph Herald
84 pages
EBAB
No ratings yet
EBAB
2 pages
Econometrics I AMU
No ratings yet
Econometrics I AMU
145 pages
21ai601 LM1 23 23
No ratings yet
21ai601 LM1 23 23
13 pages
DLC Lab - 09
100% (1)
DLC Lab - 09
3 pages
Module 4. VAPOR COMPRESSION CYCLE
No ratings yet
Module 4. VAPOR COMPRESSION CYCLE
8 pages
Startups and The Growing Entrepreneurial Ecosystem: Malvika Garg and Suman Gupta
No ratings yet
Startups and The Growing Entrepreneurial Ecosystem: Malvika Garg and Suman Gupta
8 pages
ASM450 FC44 FB240 e
No ratings yet
ASM450 FC44 FB240 e
102 pages
IEEE 802.11ax - Technology Introduction White Paper
No ratings yet
IEEE 802.11ax - Technology Introduction White Paper
36 pages
Google Hacking With Python 2024
No ratings yet
Google Hacking With Python 2024
243 pages
Attachment 3 - Product Data Sheet of Ms 514
No ratings yet
Attachment 3 - Product Data Sheet of Ms 514
2 pages
Atterburg Limits Tests: Liquid Limit L.L T - 89
No ratings yet
Atterburg Limits Tests: Liquid Limit L.L T - 89
7 pages
VSSS 0001648408 1000 VSTL-156322-1-1
No ratings yet
VSSS 0001648408 1000 VSTL-156322-1-1
6 pages
D1C Part2 Chlorine OM
No ratings yet
D1C Part2 Chlorine OM
32 pages
Lecture 13: Locks: Mythili Vutukuru IIT Bombay
No ratings yet
Lecture 13: Locks: Mythili Vutukuru IIT Bombay
12 pages
Sai Opssb - Admit Card
No ratings yet
Sai Opssb - Admit Card
3 pages
E12360-Hyperfill FCAW
No ratings yet
E12360-Hyperfill FCAW
6 pages
Email Spoofing Detection Using Volatile Memory
No ratings yet
Email Spoofing Detection Using Volatile Memory
7 pages
22k-4522 (Shozab Mehdi) Lab - 1
No ratings yet
22k-4522 (Shozab Mehdi) Lab - 1
4 pages
Electronics, Technology and Trends in The Online
No ratings yet
Electronics, Technology and Trends in The Online
1 page
Detailed Drawing of Footing and Column
No ratings yet
Detailed Drawing of Footing and Column
1 page

Wa0003

Uploaded by

Wa0003

Uploaded by

20

install pandas, scikit-learn ,matplotlib

# Load the Boston housing dataset

# Display the first few rows of the dataset

# Exploratory Data Analysis (EDA)

# Prepare the data for linear regression

# Create a linear regression model

# Evaluate the model

print(f"Mean Squared Error: {mse:.2f}")

# Visualize the results

# Load the datasets

# Univariate analysis for both datasets

print("UCI Diabetes Univariate Analysis:\n", uci_univariate)

# Let's assume "Outcome" is the target variable for logistic regression

# Bivariate: Linear Regression

# Bivariate: Logistic Regression

print("Classification Report:\n", classification_report(y_test, y_pred))

# Linear Regression for UCI and Pima datasets

# Logistic Regression for UCI and Pima datasets

print("\nPima Diabetes Logistic Regression:")

# Perform multiple regression analysis for both datasets

# Define the Euclidean distance function

# Sort distances and select the k nearest neighbors

# Get the most common label among the nearest neighbors

# Classify the query point

# Load a sample dataset (e.g., Iris dataset)

# Split the dataset into training and testing sets

# Create and train the Decision Tree Classifier

# Predict the test set results

# Calculate and print the accuracy

# Calculate the Euclidean distance between two points

# Function to find the closest clusters

# Hierarchical clustering function

# Loop until we reach the desired number of clusters

# Merge clusters i and j

# Sample data points

# Set the desired number of clusters

# Output the clusters

# Calculate Euclidean distance between two points

# Find all neighbors within epsilon distance of a point

# Iterate through each point

# Mark as noise if it has fewer neighbors than min_points

# Add the point to the cluster if it's not already in one

# Example data points

# Perform DBSCAN clustering

# Output the clusters

# Function to perform PCA

# Step 2: Calculate the covariance matrix

# Step 3: Compute eigenvalues and eigenvectors

# Step 4: Sort eigenvalues and eigenvectors in descending order

# Step 5: Select the top 'num_components' eigenvectors

# Step 6: Transform the data to the new subspace

return reduced_data, sorted_eigenvalues[:num_components],

# Example dataset (5 samples, 3 features)

# Set the number of components to reduce to (e.g., 2)

# Output the results

You might also like