-
Notifications
You must be signed in to change notification settings - Fork 142
/
Copy pathboosting.py
168 lines (144 loc) · 6.45 KB
/
boosting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import Orange
import Orange.core
_inf = 100000
@Orange.utils.deprecated_members(
{"examples": "instances",
"classVar": "class_var",
"weightId": "weigth_id",
"origWeight": "orig_weight"})
class BoostedLearner(Orange.core.Learner):
"""
Instead of drawing a series of bootstrap samples from the training set,
bootstrap maintains a weight for each instance. When a classifier is
trained from the training set, the weights for misclassified instances
are increased. Just like in a bagged learner, the class is decided based
on voting of classifiers, but in boosting votes are weighted by accuracy
obtained on training set.
BoostedLearner is an implementation of AdaBoost.M1 (Freund and Shapire,
1996). From user's viewpoint, the use of the BoostedLearner is similar to
that of BaggedLearner. The learner passed as an argument needs to deal
with instance weights.
:param learner: learner to be boosted.
:type learner: :class:`Orange.core.Learner`
:param t: number of boosted classifiers created from the instance set.
:type t: int
:param name: name of the resulting learner.
:type name: str
:rtype: :class:`Orange.ensemble.boosting.BoostedClassifier` or
:class:`Orange.ensemble.boosting.BoostedLearner`
"""
@Orange.utils.deprecated_keywords(
{"examples": "instances", "weightId": "weigth_id"})
def __new__(cls, learner, instances=None, weight_id=None, **kwargs):
self = Orange.core.Learner.__new__(cls, **kwargs)
if instances is not None:
self.__init__(self, learner, **kwargs)
return self.__call__(instances, weight_id)
else:
return self
def __init__(self, learner, t=10, name='AdaBoost.M1'):
self.t = t
self.name = name
self.learner = learner
@Orange.utils.deprecated_keywords(
{"examples": "instances", "origWeight": "orig_weight"})
def __call__(self, instances, orig_weight = 0):
"""
Learn from the given table of data instances.
:param instances: data instances to learn from.
:type instances: Orange.data.Table
:param orig_weight: weight.
:type orig_weight: int
:rtype: :class:`Orange.ensemble.boosting.BoostedClassifier`
"""
import math
weight = Orange.feature.Descriptor.new_meta_id()
if orig_weight:
for i in instances:
i.setweight(weight, i.getweight(orig_weight))
else:
instances.addMetaAttribute(weight, 1.0)
n = len(instances)
classifiers = []
for i in range(self.t):
epsilon = 0.0
classifier = self.learner(instances, weight)
corr = []
for ex in instances:
if classifier(ex) != ex.getclass():
epsilon += ex.getweight(weight)
corr.append(0)
else:
corr.append(1)
epsilon = epsilon / float(reduce(lambda x,y:x+y.getweight(weight),
instances, 0))
classifiers.append((classifier, epsilon and math.log(
(1-epsilon)/epsilon) or _inf))
if epsilon==0 or epsilon >= 0.499:
if epsilon >= 0.499 and len(classifiers)>1:
del classifiers[-1]
instances.removeMetaAttribute(weight)
return BoostedClassifier(classifiers = classifiers,
name=self.name, class_var=instances.domain.class_var)
beta = epsilon/(1-epsilon)
for e in range(n):
if corr[e]:
instances[e].setweight(weight, instances[e].getweight(weight)*beta)
f = 1/float(sum([e.getweight(weight) for e in instances]))
for e in range(n):
instances[e].setweight(weight, instances[e].getweight(weight)*f)
instances.removeMetaAttribute(weight)
return BoostedClassifier(classifiers = classifiers, name=self.name,
class_var=instances.domain.class_var)
def __reduce__(self):
return type(self), (self.learner,), dict(self.__dict__)
@Orange.utils.deprecated_members({"classVar": "class_var"})
class BoostedClassifier(Orange.core.Classifier):
"""
A classifier that uses a boosting technique. Usually the learner
(:class:`Orange.ensemble.boosting.BoostedLearner`) is used to construct the
classifier.
When constructing the classifier manually, the following parameters can
be passed:
:param classifiers: a list of boosted classifiers.
:type classifiers: list
:param name: name of the resulting classifier.
:type name: str
:param class_var: the class feature.
:type class_var: :class:`Orange.feature.Descriptor`
"""
def __init__(self, classifiers, name, class_var, **kwds):
self.classifiers = classifiers
self.name = name
self.class_var = class_var
self.__dict__.update(kwds)
@Orange.utils.deprecated_keywords({"resultType": "result_type"})
def __call__(self, instance, result_type = Orange.classification.Classifier.GetValue):
"""
:param instance: instance to be classified.
:type instance: :class:`Orange.data.Instance`
:param result_type: :class:`Orange.classification.Classifier.GetValue` or \
:class:`Orange.classification.Classifier.GetProbabilities` or
:class:`Orange.classification.Classifier.GetBoth`
:rtype: :class:`Orange.data.Value`,
:class:`Orange.statistics.Distribution` or a tuple with both
"""
votes = Orange.statistics.distribution.Discrete(self.class_var)
for c, e in self.classifiers:
votes[int(c(instance))] += e
index = Orange.utils.selection.select_best_index(votes)
# TODO
value = Orange.data.Value(self.class_var, index)
if result_type == Orange.classification.Classifier.GetValue:
return value
sv = sum(votes)
for i in range(len(votes)):
votes[i] = votes[i]/sv
if result_type == Orange.classification.Classifier.GetProbabilities:
return votes
elif result_type == Orange.classification.Classifier.GetBoth:
return (value, votes)
else:
return value
def __reduce__(self):
return type(self), (self.classifiers, self.name, self.class_var), dict(self.__dict__)