forked from scikit-learn/scikit-learn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_qda.py
94 lines (71 loc) · 2.77 KB
/
test_qda.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import numpy as np
from sklearn.utils.testing import assert_array_equal
from sklearn.utils.testing import assert_array_almost_equal
from sklearn.utils.testing import assert_true
from sklearn.utils.testing import assert_greater
from sklearn import qda
# Data is just 6 separable points in the plane
X = np.array([[0, 0], [-2, -2], [-2, -1], [-1, -1], [-1, -2],
[1, 3], [1, 2], [2, 1], [2, 2]])
y = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2])
y3 = np.array([1, 2, 3, 2, 3, 1, 2, 3, 1])
# Degenerate data with 1 feature (still should be separable)
X1 = np.array([[-3, ], [-2, ], [-1, ], [-1, ], [0, ], [1, ], [1, ],
[2, ], [3, ]])
# Data that has zero variance in one dimension and needs regularization
X2 = np.array([[-3, 0], [-2, 0], [-1, 0], [-1, 0], [0, 0], [1, 0], [1, 0],
[2, 0], [3, 0]])
def test_qda():
"""
QDA classification.
This checks that QDA implements fit and predict and returns
correct values for a simple toy dataset.
"""
clf = qda.QDA()
y_pred = clf.fit(X, y).predict(X)
assert_array_equal(y_pred, y)
# Assure that it works with 1D data
y_pred1 = clf.fit(X1, y).predict(X1)
assert_array_equal(y_pred1, y)
# Test probas estimates
y_proba_pred1 = clf.predict_proba(X1)
assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y)
y_log_proba_pred1 = clf.predict_log_proba(X1)
assert_array_almost_equal(np.exp(y_log_proba_pred1), y_proba_pred1, 8)
y_pred3 = clf.fit(X, y3).predict(X)
# QDA shouldn't be able to separate those
assert_true(np.any(y_pred3 != y3))
def test_qda_priors():
clf = qda.QDA()
y_pred = clf.fit(X, y).predict(X)
n_pos = np.sum(y_pred == 2)
neg = 1e-10
clf = qda.QDA(priors=np.array([neg, 1 - neg]))
y_pred = clf.fit(X, y).predict(X)
n_pos2 = np.sum(y_pred == 2)
assert_greater(n_pos2, n_pos)
def test_qda_store_covariances():
# The default is to not set the covariances_ attribute
clf = qda.QDA().fit(X, y)
assert_true(not hasattr(clf, 'covariances_'))
# Test the actual attribute:
clf = qda.QDA().fit(X, y, store_covariances=True)
assert_true(hasattr(clf, 'covariances_'))
assert_array_almost_equal(
clf.covariances_[0],
np.array([[0.7, 0.45], [0.45, 0.7]])
)
assert_array_almost_equal(
clf.covariances_[1],
np.array([[0.33333333, -0.33333333], [-0.33333333, 0.66666667]])
)
def test_qda_regularization():
# the default is reg_param=0. and will cause issues
# when there is a constant variable
clf = qda.QDA()
y_pred = clf.fit(X2, y).predict(X2)
assert_true(np.any(y_pred != y))
# adding a little regularization fixes the problem
clf = qda.QDA(reg_param=0.01)
y_pred = clf.fit(X2, y).predict(X2)
assert_array_equal(y_pred, y)