-
Notifications
You must be signed in to change notification settings - Fork 43
/
Copy pathplot_robust_classification_toy.py
90 lines (77 loc) · 2.59 KB
/
plot_robust_classification_toy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# -*- coding: utf-8 -*-
"""
==============================================================
A demo of Robust Classification on Simulated corrupted dataset
==============================================================
In this example we compare the RobustWeightedClassifier using SGDClassifier
for classification with the vanilla SGDClassifier with various losses.
"""
import matplotlib.pyplot as plt
import numpy as np
from sklearn_extra.robust import RobustWeightedClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.datasets import make_blobs
from sklearn.utils import shuffle
rng = np.random.RandomState(42)
# Sample two Gaussian blobs
X, y = make_blobs(
n_samples=100, centers=np.array([[-1, -1], [1, 1]]), random_state=rng
)
# Change the first 3 entries to outliers
for f in range(3):
X[f] = [20, 3] + rng.normal(size=2) * 0.1
y[f] = 0
# Shuffle the data so that we don't know where the outlier is.
X, y = shuffle(X, y, random_state=rng)
estimators = [
(
"SGDClassifier, Hinge loss",
SGDClassifier(loss="hinge", random_state=rng),
),
(
"SGDClassifier, log loss",
SGDClassifier(loss="log_loss", random_state=rng),
),
(
"SGDClassifier, modified_huber loss",
SGDClassifier(loss="modified_huber", random_state=rng),
),
(
"RobustWeightedClassifier",
RobustWeightedClassifier(
max_iter=100,
weighting="mom",
k=8,
random_state=rng,
),
# The parameter k is set larger the number of outliers
# because here we know it. max_iter is set to 100. One may want
# to play with the number of iteration or the optimization scheme of
# the base_estimator to get good results.
),
]
# Helping function to represent estimators
def plot_classif(clf, X, y, ax):
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
h = 0.02 # step size in the mesh
xx, yy = np.meshgrid(
np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)
)
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
ax.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
ax.scatter(X[:, 0], X[:, 1], c=y)
fig, axes = plt.subplots(2, 2)
for i, (name, estimator) in enumerate(estimators):
ax = axes.flat[i]
estimator.fit(X, y)
plot_classif(estimator, X, y, ax)
ax.set_title(name)
ax.axis("off")
fig.suptitle(
"Scatter plot of training set and representation of"
" estimation functions"
)
plt.show()