forked from scikit-learn/scikit-learn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_compare_methods.py
213 lines (167 loc) · 6.61 KB
/
plot_compare_methods.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
"""
=========================================
Comparison of Manifold Learning methods
=========================================
An illustration of dimensionality reduction on the S-curve dataset
with various manifold learning methods.
For a discussion and comparison of these algorithms, see the
:ref:`manifold module page <manifold>`
For a similar example, where the methods are applied to a
sphere dataset, see :ref:`sphx_glr_auto_examples_manifold_plot_manifold_sphere.py`
Note that the purpose of the MDS is to find a low-dimensional
representation of the data (here 2D) in which the distances respect well
the distances in the original high-dimensional space, unlike other
manifold-learning algorithms, it does not seeks an isotropic
representation of the data in the low-dimensional space.
"""
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
# %%
# Dataset preparation
# -------------------
#
# We start by generating the S-curve dataset.
import matplotlib.pyplot as plt
# unused but required import for doing 3d projections with matplotlib < 3.2
import mpl_toolkits.mplot3d # noqa: F401
from matplotlib import ticker
from sklearn import datasets, manifold
n_samples = 1500
S_points, S_color = datasets.make_s_curve(n_samples, random_state=0)
# %%
# Let's look at the original data. Also define some helping
# functions, which we will use further on.
def plot_3d(points, points_color, title):
x, y, z = points.T
fig, ax = plt.subplots(
figsize=(6, 6),
facecolor="white",
tight_layout=True,
subplot_kw={"projection": "3d"},
)
fig.suptitle(title, size=16)
col = ax.scatter(x, y, z, c=points_color, s=50, alpha=0.8)
ax.view_init(azim=-60, elev=9)
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
ax.zaxis.set_major_locator(ticker.MultipleLocator(1))
fig.colorbar(col, ax=ax, orientation="horizontal", shrink=0.6, aspect=60, pad=0.01)
plt.show()
def plot_2d(points, points_color, title):
fig, ax = plt.subplots(figsize=(3, 3), facecolor="white", constrained_layout=True)
fig.suptitle(title, size=16)
add_2d_scatter(ax, points, points_color)
plt.show()
def add_2d_scatter(ax, points, points_color, title=None):
x, y = points.T
ax.scatter(x, y, c=points_color, s=50, alpha=0.8)
ax.set_title(title)
ax.xaxis.set_major_formatter(ticker.NullFormatter())
ax.yaxis.set_major_formatter(ticker.NullFormatter())
plot_3d(S_points, S_color, "Original S-curve samples")
# %%
# Define algorithms for the manifold learning
# -------------------------------------------
#
# Manifold learning is an approach to non-linear dimensionality reduction.
# Algorithms for this task are based on the idea that the dimensionality of
# many data sets is only artificially high.
#
# Read more in the :ref:`User Guide <manifold>`.
n_neighbors = 12 # neighborhood which is used to recover the locally linear structure
n_components = 2 # number of coordinates for the manifold
# %%
# Locally Linear Embeddings
# ^^^^^^^^^^^^^^^^^^^^^^^^^
#
# Locally linear embedding (LLE) can be thought of as a series of local
# Principal Component Analyses which are globally compared to find the
# best non-linear embedding.
# Read more in the :ref:`User Guide <locally_linear_embedding>`.
params = {
"n_neighbors": n_neighbors,
"n_components": n_components,
"eigen_solver": "auto",
"random_state": 0,
}
lle_standard = manifold.LocallyLinearEmbedding(method="standard", **params)
S_standard = lle_standard.fit_transform(S_points)
lle_ltsa = manifold.LocallyLinearEmbedding(method="ltsa", **params)
S_ltsa = lle_ltsa.fit_transform(S_points)
lle_hessian = manifold.LocallyLinearEmbedding(method="hessian", **params)
S_hessian = lle_hessian.fit_transform(S_points)
lle_mod = manifold.LocallyLinearEmbedding(method="modified", **params)
S_mod = lle_mod.fit_transform(S_points)
# %%
fig, axs = plt.subplots(
nrows=2, ncols=2, figsize=(7, 7), facecolor="white", constrained_layout=True
)
fig.suptitle("Locally Linear Embeddings", size=16)
lle_methods = [
("Standard locally linear embedding", S_standard),
("Local tangent space alignment", S_ltsa),
("Hessian eigenmap", S_hessian),
("Modified locally linear embedding", S_mod),
]
for ax, method in zip(axs.flat, lle_methods):
name, points = method
add_2d_scatter(ax, points, S_color, name)
plt.show()
# %%
# Isomap Embedding
# ^^^^^^^^^^^^^^^^
#
# Non-linear dimensionality reduction through Isometric Mapping.
# Isomap seeks a lower-dimensional embedding which maintains geodesic
# distances between all points. Read more in the :ref:`User Guide <isomap>`.
isomap = manifold.Isomap(n_neighbors=n_neighbors, n_components=n_components, p=1)
S_isomap = isomap.fit_transform(S_points)
plot_2d(S_isomap, S_color, "Isomap Embedding")
# %%
# Multidimensional scaling
# ^^^^^^^^^^^^^^^^^^^^^^^^
#
# Multidimensional scaling (MDS) seeks a low-dimensional representation
# of the data in which the distances respect well the distances in the
# original high-dimensional space.
# Read more in the :ref:`User Guide <multidimensional_scaling>`.
md_scaling = manifold.MDS(
n_components=n_components,
max_iter=50,
n_init=4,
random_state=0,
normalized_stress=False,
)
S_scaling = md_scaling.fit_transform(S_points)
plot_2d(S_scaling, S_color, "Multidimensional scaling")
# %%
# Spectral embedding for non-linear dimensionality reduction
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# This implementation uses Laplacian Eigenmaps, which finds a low dimensional
# representation of the data using a spectral decomposition of the graph Laplacian.
# Read more in the :ref:`User Guide <spectral_embedding>`.
spectral = manifold.SpectralEmbedding(
n_components=n_components, n_neighbors=n_neighbors, random_state=42
)
S_spectral = spectral.fit_transform(S_points)
plot_2d(S_spectral, S_color, "Spectral Embedding")
# %%
# T-distributed Stochastic Neighbor Embedding
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# It converts similarities between data points to joint probabilities and
# tries to minimize the Kullback-Leibler divergence between the joint probabilities
# of the low-dimensional embedding and the high-dimensional data. t-SNE has a cost
# function that is not convex, i.e. with different initializations we can get
# different results. Read more in the :ref:`User Guide <t_sne>`.
t_sne = manifold.TSNE(
n_components=n_components,
perplexity=30,
init="random",
max_iter=250,
random_state=0,
)
S_t_sne = t_sne.fit_transform(S_points)
plot_2d(S_t_sne, S_color, "T-distributed Stochastic \n Neighbor Embedding")
# %%