-
Notifications
You must be signed in to change notification settings - Fork 43
/
Copy pathplot_clara_digits.py
120 lines (103 loc) · 3.15 KB
/
plot_clara_digits.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""
======================================================================
A demo of K-Medoids vs CLARA clustering on the handwritten digits data
======================================================================
In this example we compare different computation time of K-Medoids and CLARA on
the handwritten digits data.
"""
import numpy as np
import matplotlib.pyplot as plt
import time
from sklearn_extra.cluster import KMedoids, CLARA
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
print(__doc__)
# Authors: Timo Erkkilä <[email protected]>
# Antti Lehmussola <[email protected]>
# Kornel Kiełczewski <[email protected]>
# License: BSD 3 clause
np.random.seed(42)
digits = load_digits()
data = scale(digits.data)
n_digits = len(np.unique(digits.target))
reduced_data = PCA(n_components=2).fit_transform(data)
# Step size of the mesh. Decrease to increase the quality of the VQ.
h = 0.02 # point in the mesh [x_min, m_max]x[y_min, y_max].
# Plot the decision boundary. For that, we will assign a color to each
x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
plt.figure()
plt.clf()
plt.suptitle(
"Comparing KMedoids and CLARA",
fontsize=14,
)
selected_models = [
(
KMedoids(metric="cosine", n_clusters=n_digits),
"KMedoids (cosine)",
),
(
KMedoids(metric="manhattan", n_clusters=n_digits),
"KMedoids (manhattan)",
),
(
CLARA(
metric="cosine",
n_clusters=n_digits,
init="heuristic",
n_sampling=50,
),
"CLARA (cosine)",
),
(
CLARA(
metric="manhattan",
n_clusters=n_digits,
init="heuristic",
n_sampling=50,
),
"CLARA (manhattan)",
),
]
plot_rows = int(np.ceil(len(selected_models) / 2.0))
plot_cols = 2
for i, (model, description) in enumerate(selected_models):
# Obtain labels for each point in mesh. Use last trained model.
init_time = time.time()
model.fit(reduced_data)
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
computation_time = time.time() - init_time
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.subplot(plot_cols, plot_rows, i + 1)
plt.imshow(
Z,
interpolation="nearest",
extent=(xx.min(), xx.max(), yy.min(), yy.max()),
cmap=plt.cm.Paired,
aspect="auto",
origin="lower",
)
plt.plot(
reduced_data[:, 0], reduced_data[:, 1], "k.", markersize=2, alpha=0.3
)
# Plot the centroids as a white X
centroids = model.cluster_centers_
plt.scatter(
centroids[:, 0],
centroids[:, 1],
marker="x",
s=169,
linewidths=3,
color="w",
zorder=10,
)
plt.title(description + ": %.2Fs" % (computation_time))
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())
plt.show()