0% found this document useful (0 votes)

14 views7 pages

Data Mining Lab Manual

Mtech data mining lab manual

Uploaded by

jyothibg

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

14 views7 pages

Data Mining Lab Manual

Mtech data mining lab manual

Uploaded by

jyothibg

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 7

import numpy as np

arr1=np.array([[1,2,3],[4,5,6],[7,8,9],[23,33,45]])
print(f'Original Array:\n{arr1}')
arr1_transpose = arr1.transpose()
print(f'Transposed Array:\n{arr1_transpose}')
arr2=np.array([[10,20,30],[45,78,90],[1,2,3],[34,67,89]])
print(f'Original Array:\n{arr2}')
arr2_transpose=arr2.transpose()
print(f'Transposed Array:\n{arr2_transpose}')
Original Array:
[[ 1 2 3]
[ 4 5 6]
[ 7 8 9]
[23 33 45]]
Transposed Array:
[[ 1 4 7 23]
[ 2 5 8 33]
[ 3 6 9 45]]
Original Array:
[[10 20 30]
[45 78 90]
[ 1 2 3]
[34 67 89]]
Transposed Array:
[[10 45 1 34]
[20 78 2 67]
[30 90 3 89]]
In [11]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs

# Generate synthetic data

X, _ = make_blobs(n_samples=300, centers=4, cluster_std=0.60,
random_state=0)

# Plot the data points

plt.scatter(X[:, 0], X[:, 1], s=50)
plt.title('Original Data Points')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()

# Apply K-means clustering

kmeans = KMeans(n_clusters=4)
kmeans.fit(X)
y_kmeans = kmeans.predict(X)

# Plot the clustered data points and centroids

plt.scatter(X[:, 0], X[:, 1], c=y_kmeans, s=50, cmap='viridis')
centers = kmeans.cluster_centers_
plt.scatter(centers[:, 0], centers[:, 1], c='red', s=200, alpha=0.75)
plt.title('Clustered Data with Centroids')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()

In [12]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Generate synthetic data

np.random.seed(0)
X = 2 * np.random.rand(100, 1)
y = 3 + 4 * X + np.random.randn(100, 1)
# Plot the data points
plt.scatter(X, y, color='blue')
plt.title('Linear Regression Example')
plt.xlabel('X')
plt.ylabel('y')
plt.show()

# Fit the linear regression model

model = LinearRegression()
model.fit(X, y)

# Predictions
X_new = np.array([[0], [2]])
y_pred = model.predict(X_new)

# Plot the linear regression line

plt.scatter(X, y, color='blue')
plt.plot(X_new, y_pred, color='red', linewidth=3)
plt.title('Linear Regression Fit')
plt.xlabel('X')
plt.ylabel('y')
plt.show()

# Coefficients and intercept

print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)
Coefficients: [[3.96846751]]
Intercept: [3.22215108]
In [ ]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
data = {
'Transaction_ID': [1, 1, 1, 2, 2, 3, 3, 3, 4, 4],
'Item': ['A', 'B', 'C', 'A', 'B', 'B', 'C', 'D', 'A', 'C']
}
df = pd.DataFrame(data)
basket = (df.groupby(['Transaction_ID', 'Item'])['Item']
.count().unstack().reset_index().fillna(0)
.set_index('Transaction_ID'))
def encode_units(x):
if x <= 0:
return 0
if x >= 1:
return 1

basket_sets = basket.applymap(encode_units)

frequent_itemsets = apriori(basket_sets, min_support=0.2, use_colnames=True)

# Generate association rules

rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)

# Print the frequent itemsets

print("Frequent Itemsets:")
print(frequent_itemsets)
# Print the association rules
print("\nAssociation Rules:")
print(rules)
out put:
Frequent Itemsets:
support itemsets
0 0.75 (A)
1 0.75 (B)
2 0.75 (C)
3 0.25 (D)
4 0.50 (A, B)
5 0.50 (A, C)
6 0.50 (C, B)
7 0.25 (D, B)
8 0.25 (D, C)
9 0.25 (A, B, C)
10 0.25 (D, C, B)

Association Rules:
antecedents consequents antecedent support consequent support supp
ort \
0 (D) (B) 0.25 0.75 0
.25
1 (B) (D) 0.75 0.25 0
.25
2 (D) (C) 0.25 0.75 0
.25
3 (C) (D) 0.75 0.25 0
.25
4 (D, C) (B) 0.25 0.75 0
.25
5 (D, B) (C) 0.25 0.75 0
.25
6 (C, B) (D) 0.50 0.25 0
.25
7 (D) (C, B) 0.25 0.50 0
.25
8 (C) (D, B) 0.75 0.25 0
.25
9 (B) (D, C) 0.75 0.25 0
.25

confidence lift leverage conviction zhangs_metric

0 1.000000 1.333333 0.0625 inf 0.333333
1 0.333333 1.333333 0.0625 1.125 1.000000
2 1.000000 1.333333 0.0625 inf 0.333333
3 0.333333 1.333333 0.0625 1.125 1.000000
4 1.000000 1.333333 0.0625 inf 0.333333
5 1.000000 1.333333 0.0625 inf 0.333333
6 0.500000 2.000000 0.1250 1.500 1.000000
7 1.000000 2.000000 0.1250 inf 0.666667
8 0.333333 1.333333 0.0625 1.125 1.000000
9 0.333333 1.333333 0.0625 1.125 1.000000

# Import necessary libraries

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

# Load the heart disease dataset (assuming it's in CSV format)

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-
disease/processed.cleveland.data"
names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg',
'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']
data = pd.read_csv(url, names=names, na_values='?')

# Drop rows with missing values

data = data.dropna()

# Separate features and target variable

X = data.drop('target', axis=1)
y = data['target']

# Convert categorical variables to dummy variables if needed

# (Not necessary here as the dataset is already preprocessed)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features by removing the mean and scaling to unit variance

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build the decision tree classifier

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predict on the test data

y_pred = clf.predict(X_test)

# Evaluate the model

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}\n")

print("Classification Report:")
print(classification_report(y_test, y_pred))

output:
Accuracy: 0.48

Classification Report:
precision recall f1-score support
0 0.89 0.67 0.76 36
1 0.13 0.22 0.17 9
2 0.12 0.20 0.15 5
3 0.25 0.29 0.27 7
4 0.00 0.00 0.00 3

accuracy 0.48 60
macro avg 0.28 0.27 0.27 60
weighted avg 0.59 0.48 0.53 60

BLDG 2007 Renovation MCB Quantico: Mica Plates
No ratings yet
BLDG 2007 Renovation MCB Quantico: Mica Plates
25 pages
ML Program 7, 8,9 And10
No ratings yet
ML Program 7, 8,9 And10
12 pages
Garishav Basra 102103129 2CO5
No ratings yet
Garishav Basra 102103129 2CO5
8 pages
EXP - 7 - Prasham Doshi - 22bec097
No ratings yet
EXP - 7 - Prasham Doshi - 22bec097
7 pages
ML RECORD EX 5,6,7,8,9 (Without Border)
No ratings yet
ML RECORD EX 5,6,7,8,9 (Without Border)
13 pages
KNN - Ipynb - Colaboratory
No ratings yet
KNN - Ipynb - Colaboratory
3 pages
7 Output
No ratings yet
7 Output
4 pages
ML Shristi File
No ratings yet
ML Shristi File
49 pages
K Fold
No ratings yet
K Fold
6 pages
Final ML File
No ratings yet
Final ML File
34 pages
Week 4 Naive Bayes Classifier
No ratings yet
Week 4 Naive Bayes Classifier
2 pages
Mlda - Lab
No ratings yet
Mlda - Lab
35 pages
ML Labs
No ratings yet
ML Labs
14 pages
ML Lab Prgms Split
No ratings yet
ML Lab Prgms Split
3 pages
Fha-Pyhton Program Unit 1-4
No ratings yet
Fha-Pyhton Program Unit 1-4
13 pages
D3 Docs
No ratings yet
D3 Docs
6 pages
ML Lab Manual
No ratings yet
ML Lab Manual
12 pages
Fda Batch2program
No ratings yet
Fda Batch2program
18 pages
ML Record Print
No ratings yet
ML Record Print
20 pages
G 203008076 - 4 - Christhian Quiñonez - Ex1 - 2 A PDF
No ratings yet
G 203008076 - 4 - Christhian Quiñonez - Ex1 - 2 A PDF
20 pages
Apriori
No ratings yet
Apriori
8 pages
Localweighted - Jupyter Notebook
No ratings yet
Localweighted - Jupyter Notebook
4 pages
Indi - Colab
No ratings yet
Indi - Colab
11 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
9 pages
Aiml Lab
No ratings yet
Aiml Lab
37 pages
Project 1
No ratings yet
Project 1
6 pages
ML Lab
No ratings yet
ML Lab
24 pages
Experimenting With Data Analysis Packages and Statistical Operations
No ratings yet
Experimenting With Data Analysis Packages and Statistical Operations
18 pages
DL Lab 3
No ratings yet
DL Lab 3
5 pages
Code
No ratings yet
Code
5 pages
Shubham Pract 6 - Merged
No ratings yet
Shubham Pract 6 - Merged
12 pages
Aiml Lab
No ratings yet
Aiml Lab
14 pages
TI2134 PracticalAssignment 2
No ratings yet
TI2134 PracticalAssignment 2
4 pages
Logistic Regression Using Python
No ratings yet
Logistic Regression Using Python
9 pages
Stat Lab
No ratings yet
Stat Lab
24 pages
Chandigarh Group of Colleges College of Engineering Landran, Mohali
No ratings yet
Chandigarh Group of Colleges College of Engineering Landran, Mohali
47 pages
DataAnalytics Lab Manual
No ratings yet
DataAnalytics Lab Manual
35 pages
Tanu Raman ML Lab File
No ratings yet
Tanu Raman ML Lab File
21 pages
DA Programs
No ratings yet
DA Programs
44 pages
11zon - Merged-Files (1) - Removed - Removed
No ratings yet
11zon - Merged-Files (1) - Removed - Removed
7 pages
Wa0003
No ratings yet
Wa0003
16 pages
ADADELTA
No ratings yet
ADADELTA
2 pages
DSBDA6
No ratings yet
DSBDA6
6 pages
KNN - Jupyter Notebook
No ratings yet
KNN - Jupyter Notebook
7 pages
Project 3 - Diabetes Prediction - Ipynb - Colab
No ratings yet
Project 3 - Diabetes Prediction - Ipynb - Colab
4 pages
Data Science Manual
No ratings yet
Data Science Manual
16 pages
EE 559 HW2Code PDF
No ratings yet
EE 559 HW2Code PDF
7 pages
6 Logistic Regression Iris
No ratings yet
6 Logistic Regression Iris
3 pages
Bacdeaf 23032025 115708 Split 1
No ratings yet
Bacdeaf 23032025 115708 Split 1
37 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
13 pages
Project 4 - House Price Prediction - Ipynb - Colab
No ratings yet
Project 4 - House Price Prediction - Ipynb - Colab
5 pages
4.4. Data Standardization - Ipynb - Colaboratory
No ratings yet
4.4. Data Standardization - Ipynb - Colaboratory
1 page
ML Lab
No ratings yet
ML Lab
7 pages
ML Journal
No ratings yet
ML Journal
58 pages
Rajeek8 12
No ratings yet
Rajeek8 12
21 pages
16BCB0126 VL2018195002535 Pe003
No ratings yet
16BCB0126 VL2018195002535 Pe003
40 pages
Solution8 12
No ratings yet
Solution8 12
12 pages
Ai ML Programs
No ratings yet
Ai ML Programs
34 pages
CVDL (Practical No. 3)
No ratings yet
CVDL (Practical No. 3)
1 page
ML Yogesh
No ratings yet
ML Yogesh
23 pages
Week 1 To Week 12 Quize Questions
No ratings yet
Week 1 To Week 12 Quize Questions
24 pages
IT Skills Lesson Plan 1st SM
No ratings yet
IT Skills Lesson Plan 1st SM
8 pages
Iot Mtech Notes
No ratings yet
Iot Mtech Notes
57 pages
Operating System Notes
No ratings yet
Operating System Notes
46 pages
Shell Programming
No ratings yet
Shell Programming
17 pages
Army University and The Army Learning Strategy: Dr. Keith R. Beurskens
No ratings yet
Army University and The Army Learning Strategy: Dr. Keith R. Beurskens
21 pages
Online Mobile Recharge
No ratings yet
Online Mobile Recharge
29 pages
Water Fraud REPORT
0% (2)
Water Fraud REPORT
63 pages
A Data Mining Based Model For Identifying of Spurious Behaviour in Water Utilization
No ratings yet
A Data Mining Based Model For Identifying of Spurious Behaviour in Water Utilization
5 pages
WITS Pricelist For Graduation Attire 2024
No ratings yet
WITS Pricelist For Graduation Attire 2024
2 pages
ECE107 Activity Mapua
No ratings yet
ECE107 Activity Mapua
5 pages
Pmsonline - Bih.nic - in Pmsedubcebc2223 (S (Wbtw233ev2qt1u2t32j3zyda) ) PMS App StudentDetails - Aspx
No ratings yet
Pmsonline - Bih.nic - in Pmsedubcebc2223 (S (Wbtw233ev2qt1u2t32j3zyda) ) PMS App StudentDetails - Aspx
1 page
Why Does A Faster Clock Require More Power?: 4 Answers
No ratings yet
Why Does A Faster Clock Require More Power?: 4 Answers
4 pages
EAadhaar 0013010020033220240526145833 0406202416148
No ratings yet
EAadhaar 0013010020033220240526145833 0406202416148
1 page
Payment Break Down For Electrical Work
No ratings yet
Payment Break Down For Electrical Work
1 page
G37+ The Strongest 2.1 Monk Build (Quin69) - Monk - Diablo III Builds - Diablo Fans PDF
No ratings yet
G37+ The Strongest 2.1 Monk Build (Quin69) - Monk - Diablo III Builds - Diablo Fans PDF
6 pages
Placement Statistics (Academic Year 20-24) : Bit Sindri, Dhanbad, Jharkhand
No ratings yet
Placement Statistics (Academic Year 20-24) : Bit Sindri, Dhanbad, Jharkhand
13 pages
Self Help Books
No ratings yet
Self Help Books
14 pages
AC Air Cycles PDF
100% (1)
AC Air Cycles PDF
18 pages
Hans Jensen Lubricator Manual Type 6
No ratings yet
Hans Jensen Lubricator Manual Type 6
25 pages
Reading Project - Portfolio
No ratings yet
Reading Project - Portfolio
12 pages
Chrysler A604 SC-41TE-IN PDF
100% (1)
Chrysler A604 SC-41TE-IN PDF
10 pages
Introduction To Signals & Systems - Practice Sheet 02
No ratings yet
Introduction To Signals & Systems - Practice Sheet 02
5 pages
Construction of Residential Building (2Bhk) : A Project Report On
0% (1)
Construction of Residential Building (2Bhk) : A Project Report On
28 pages
950H, 962H and IT62H Wheel Loader Hydraulic System: Command Control Steering
100% (1)
950H, 962H and IT62H Wheel Loader Hydraulic System: Command Control Steering
2 pages
5E Lesson Plan Template: Handout
No ratings yet
5E Lesson Plan Template: Handout
5 pages
Example of Array List Using Object Stud With Data Type Student
No ratings yet
Example of Array List Using Object Stud With Data Type Student
3 pages
221 Practice Midterm 2
No ratings yet
221 Practice Midterm 2
5 pages
Catalogue 2022 SVAMC Electroménager-1
No ratings yet
Catalogue 2022 SVAMC Electroménager-1
14 pages
Control Charts - MBA
100% (1)
Control Charts - MBA
30 pages
Compendium of Biomedical Instrumentation, 3 Volume Set Raghbir Singh Khandpur PDF Download
100% (1)
Compendium of Biomedical Instrumentation, 3 Volume Set Raghbir Singh Khandpur PDF Download
58 pages
Im/ /nbaw: BÂ C Ìn-Äyq V
No ratings yet
Im/ /nbaw: BÂ C Ìn-Äyq V
70 pages
Technicalspecification 1607511427
No ratings yet
Technicalspecification 1607511427
470 pages
Chapter 12
No ratings yet
Chapter 12
9 pages
It Investment Value INTEL BVI
No ratings yet
It Investment Value INTEL BVI
8 pages
Yoshis Caanoo Emulator Fact Sheets v04
No ratings yet
Yoshis Caanoo Emulator Fact Sheets v04
26 pages
Adobe Flash Islands For Web Dynpro ABAP Tutorial #1
No ratings yet
Adobe Flash Islands For Web Dynpro ABAP Tutorial #1
40 pages
Túneles de Lavado de Vajillas - Top Tech - Colged - 2017
No ratings yet
Túneles de Lavado de Vajillas - Top Tech - Colged - 2017
6 pages

Data Mining Lab Manual

Uploaded by

Data Mining Lab Manual

Uploaded by

import numpy as np

# Generate synthetic data

# Plot the data points

# Apply K-means clustering

# Plot the clustered data points and centroids

# Generate synthetic data

# Fit the linear regression model

# Plot the linear regression line

# Coefficients and intercept

frequent_itemsets = apriori(basket_sets, min_support=0.2, use_colnames=True)

# Generate association rules

# Print the frequent itemsets

confidence lift leverage conviction zhangs_metric

# Import necessary libraries

# Load the heart disease dataset (assuming it's in CSV format)

# Drop rows with missing values

# Separate features and target variable

# Convert categorical variables to dummy variables if needed

# Standardize features by removing the mean and scaling to unit variance

# Build the decision tree classifier

# Predict on the test data

# Evaluate the model

You might also like