0% found this document useful (0 votes)
46 views

Lab Assignment 3 Ai

Uploaded by

yashutank46
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
46 views

Lab Assignment 3 Ai

Uploaded by

yashutank46
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 1

In [1]: #Write python script to implement KMeans Algorithm over a inputted dataset (Any data take of your own).

import pandas as pd
from sklearn.cluster import KMeans

data = pd.DataFrame({
"age": [25, 32, 40, 28, 35, 48, 38, 22, 27, 30],
"income": [50000, 70000, 85000, 62000, 78000, 95000, 82000, 45000, 52000, 65000]
})

k = 3

kmeans = KMeans(n_clusters=k, random_state=42)

kmeans.fit(data)

cluster_labels = kmeans.labels_

print("Cluster labels:", cluster_labels)

centroids = kmeans.cluster_centers_
print("Centroids:", centroids)

data["cluster"] = cluster_labels

print(data)

import matplotlib.pyplot as plt

plt.scatter(data["age"], data["income"], c=cluster_labels)


plt.xlabel("Age")
plt.ylabel("Income")
plt.title("Customer Clusters")
plt.show()

C:\tools\Anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set th
e value of `n_init` explicitly to suppress the warning
warnings.warn(
C:\tools\Anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are l
ess chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
warnings.warn(
Cluster labels: [1 2 0 2 0 0 0 1 1 2]
Centroids: [[4.02500000e+01 8.50000000e+04]
[2.46666667e+01 4.90000000e+04]
[3.00000000e+01 6.56666667e+04]]
age income cluster
0 25 50000 1
1 32 70000 2
2 40 85000 0
3 28 62000 2
4 35 78000 0
5 48 95000 0
6 38 82000 0
7 22 45000 1
8 27 52000 1
9 30 65000 2

In [2]: #Write python script to implement Hierarchical clustering Algorithm over a inputted dataset (Any data take of your own).

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage

np.random.seed(42)
data, _ = make_blobs(n_samples=300, centers=4, random_state=42)

k = int(input("Enter the number of clusters (K): "))

hc_model = AgglomerativeClustering(n_clusters=k, affinity='euclidean', linkage='ward')


hc_labels = hc_model.fit_predict(data)

plt.scatter(data[:, 0], data[:, 1], c=hc_labels, cmap='viridis', edgecolors='k', s=50)


plt.title('Hierarchical Clustering')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()

linked = linkage(data, 'ward')


dendrogram(linked, orientation='top', distance_sort='descending', show_leaf_counts=True)
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('Sample Index')
plt.ylabel('Cluster Distance')
plt.show()

Enter the number of clusters (K): 3


C:\tools\Anaconda3\lib\site-packages\sklearn\cluster\_agglomerative.py:983: FutureWarning: Attribute `affinity` was deprecated in version 1.2 and will be remo
ved in 1.4. Use `metric` instead
warnings.warn(

In [3]: #Write python script to implement decision tree over a inputted dataset (Any data take of your own).

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

np.random.seed(42)
data = pd.DataFrame({
'Feature1': np.random.rand(100),
'Feature2': np.random.rand(100),
'Label': np.random.choice([0, 1], size=100)
})

X = data[['Feature1', 'Feature2']]
y = data['Label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

dt_classifier = DecisionTreeClassifier(random_state=42)

dt_classifier.fit(X_train, y_train)

y_pred = dt_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)


conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print('Confusion Matrix:\n', conf_matrix)
print('Classification Report:\n', class_report)

Accuracy: 0.65
Confusion Matrix:
[[4 2]
[5 9]]
Classification Report:
precision recall f1-score support

0 0.44 0.67 0.53 6


1 0.82 0.64 0.72 14

accuracy 0.65 20
macro avg 0.63 0.65 0.63 20
weighted avg 0.71 0.65 0.66 20

In [ ]:

You might also like