0% found this document useful (0 votes)
14 views7 pages

Data Mining Lab Manual

Mtech data mining lab manual

Uploaded by

jyothibg
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
14 views7 pages

Data Mining Lab Manual

Mtech data mining lab manual

Uploaded by

jyothibg
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

import numpy as np

arr1=np.array([[1,2,3],[4,5,6],[7,8,9],[23,33,45]])
print(f'Original Array:\n{arr1}')
arr1_transpose = arr1.transpose()
print(f'Transposed Array:\n{arr1_transpose}')
arr2=np.array([[10,20,30],[45,78,90],[1,2,3],[34,67,89]])
print(f'Original Array:\n{arr2}')
arr2_transpose=arr2.transpose()
print(f'Transposed Array:\n{arr2_transpose}')
Original Array:
[[ 1 2 3]
[ 4 5 6]
[ 7 8 9]
[23 33 45]]
Transposed Array:
[[ 1 4 7 23]
[ 2 5 8 33]
[ 3 6 9 45]]
Original Array:
[[10 20 30]
[45 78 90]
[ 1 2 3]
[34 67 89]]
Transposed Array:
[[10 45 1 34]
[20 78 2 67]
[30 90 3 89]]
In [11]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs

# Generate synthetic data


X, _ = make_blobs(n_samples=300, centers=4, cluster_std=0.60,
random_state=0)

# Plot the data points


plt.scatter(X[:, 0], X[:, 1], s=50)
plt.title('Original Data Points')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()

# Apply K-means clustering


kmeans = KMeans(n_clusters=4)
kmeans.fit(X)
y_kmeans = kmeans.predict(X)

# Plot the clustered data points and centroids


plt.scatter(X[:, 0], X[:, 1], c=y_kmeans, s=50, cmap='viridis')
centers = kmeans.cluster_centers_
plt.scatter(centers[:, 0], centers[:, 1], c='red', s=200, alpha=0.75)
plt.title('Clustered Data with Centroids')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()

In [12]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Generate synthetic data


np.random.seed(0)
X = 2 * np.random.rand(100, 1)
y = 3 + 4 * X + np.random.randn(100, 1)
# Plot the data points
plt.scatter(X, y, color='blue')
plt.title('Linear Regression Example')
plt.xlabel('X')
plt.ylabel('y')
plt.show()

# Fit the linear regression model


model = LinearRegression()
model.fit(X, y)

# Predictions
X_new = np.array([[0], [2]])
y_pred = model.predict(X_new)

# Plot the linear regression line


plt.scatter(X, y, color='blue')
plt.plot(X_new, y_pred, color='red', linewidth=3)
plt.title('Linear Regression Fit')
plt.xlabel('X')
plt.ylabel('y')
plt.show()

# Coefficients and intercept


print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)
Coefficients: [[3.96846751]]
Intercept: [3.22215108]
In [ ]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
data = {
'Transaction_ID': [1, 1, 1, 2, 2, 3, 3, 3, 4, 4],
'Item': ['A', 'B', 'C', 'A', 'B', 'B', 'C', 'D', 'A', 'C']
}
df = pd.DataFrame(data)
basket = (df.groupby(['Transaction_ID', 'Item'])['Item']
.count().unstack().reset_index().fillna(0)
.set_index('Transaction_ID'))
def encode_units(x):
if x <= 0:
return 0
if x >= 1:
return 1

basket_sets = basket.applymap(encode_units)

frequent_itemsets = apriori(basket_sets, min_support=0.2, use_colnames=True)

# Generate association rules


rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)

# Print the frequent itemsets


print("Frequent Itemsets:")
print(frequent_itemsets)
# Print the association rules
print("\nAssociation Rules:")
print(rules)
out put:
Frequent Itemsets:
support itemsets
0 0.75 (A)
1 0.75 (B)
2 0.75 (C)
3 0.25 (D)
4 0.50 (A, B)
5 0.50 (A, C)
6 0.50 (C, B)
7 0.25 (D, B)
8 0.25 (D, C)
9 0.25 (A, B, C)
10 0.25 (D, C, B)

Association Rules:
antecedents consequents antecedent support consequent support supp
ort \
0 (D) (B) 0.25 0.75 0
.25
1 (B) (D) 0.75 0.25 0
.25
2 (D) (C) 0.25 0.75 0
.25
3 (C) (D) 0.75 0.25 0
.25
4 (D, C) (B) 0.25 0.75 0
.25
5 (D, B) (C) 0.25 0.75 0
.25
6 (C, B) (D) 0.50 0.25 0
.25
7 (D) (C, B) 0.25 0.50 0
.25
8 (C) (D, B) 0.75 0.25 0
.25
9 (B) (D, C) 0.75 0.25 0
.25

confidence lift leverage conviction zhangs_metric


0 1.000000 1.333333 0.0625 inf 0.333333
1 0.333333 1.333333 0.0625 1.125 1.000000
2 1.000000 1.333333 0.0625 inf 0.333333
3 0.333333 1.333333 0.0625 1.125 1.000000
4 1.000000 1.333333 0.0625 inf 0.333333
5 1.000000 1.333333 0.0625 inf 0.333333
6 0.500000 2.000000 0.1250 1.500 1.000000
7 1.000000 2.000000 0.1250 inf 0.666667
8 0.333333 1.333333 0.0625 1.125 1.000000
9 0.333333 1.333333 0.0625 1.125 1.000000

# Import necessary libraries


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

# Load the heart disease dataset (assuming it's in CSV format)


url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-
disease/processed.cleveland.data"
names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg',
'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']
data = pd.read_csv(url, names=names, na_values='?')

# Drop rows with missing values


data = data.dropna()

# Separate features and target variable


X = data.drop('target', axis=1)
y = data['target']

# Convert categorical variables to dummy variables if needed


# (Not necessary here as the dataset is already preprocessed)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features by removing the mean and scaling to unit variance


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build the decision tree classifier


clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predict on the test data


y_pred = clf.predict(X_test)

# Evaluate the model


accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}\n")

print("Classification Report:")
print(classification_report(y_test, y_pred))

output:
Accuracy: 0.48

Classification Report:
precision recall f1-score support
0 0.89 0.67 0.76 36
1 0.13 0.22 0.17 9
2 0.12 0.20 0.15 5
3 0.25 0.29 0.27 7
4 0.00 0.00 0.00 3

accuracy 0.48 60
macro avg 0.28 0.27 0.27 60
weighted avg 0.59 0.48 0.53 60

You might also like