-
Notifications
You must be signed in to change notification settings - Fork 142
/
Copy pathbagging.py
170 lines (141 loc) · 6.58 KB
/
bagging.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import math
import random
import Orange
import Orange.core
import Orange.classification
@Orange.utils.deprecated_members(
{"weightId": "weight_id", "examples": "instances"})
class BaggedLearner(Orange.core.Learner):
"""
BaggedLearner takes a learner and returns a bagged learner, which is
essentially a wrapper around the learner passed as an argument. If
instances are passed in arguments, BaggedLearner returns a bagged
classifier. Both learner and classifier then behave just like any
other learner and classifier in Orange.
Bagging, in essence, takes training data and a learner, and builds *t*
classifiers, each time presenting a learner a bootstrap sample from the
training data. When given a test instance, classifiers vote on class,
and a bagged classifier returns a class with the highest number of votes.
As implemented in Orange, when class probabilities are requested, these
are proportional to the number of votes for a particular class.
:param learner: learner to be bagged.
:type learner: :class:`Orange.core.Learner`
:param t: number of bagged classifiers, that is, classifiers created
when instances are passed to bagged learner.
:type t: int
:param name: name of the resulting learner.
:type name: str
:rtype: :class:`Orange.ensemble.bagging.BaggedClassifier` or
:class:`Orange.ensemble.bagging.BaggedLearner`
"""
def __new__(cls, learner, instances=None, weight_id=None, **kwargs):
self = Orange.core.Learner.__new__(cls, **kwargs)
if instances is not None:
self.__init__(self, learner, **kwargs)
return self.__call__(instances, weight_id)
else:
return self
def __init__(self, learner, t=10, name='Bagging'):
self.t = t
self.name = name
self.learner = learner
def __call__(self, instances, weight=0):
"""
Learn from the given table of data instances.
:param instances: data instances to learn from.
:type instances: Orange.data.Table
:param weight: ID of meta feature with weights of instances
:type weight: int
:rtype: :class:`Orange.ensemble.bagging.BaggedClassifier`
"""
r = random.Random()
r.seed(0)
n = len(instances)
classifiers = []
for i in range(self.t):
selection = []
for i in range(n):
selection.append(r.randrange(n))
instances = Orange.data.Table(instances)
data = instances.getitems(selection)
classifiers.append(self.learner(data, weight))
return BaggedClassifier(classifiers=classifiers, name=self.name, \
class_var=instances.domain.class_var)
def __reduce__(self):
return type(self), (self.learner,), dict(self.__dict__)
@Orange.utils.deprecated_members(
{"example": "instance",
"classVar": "class_var",
"resultType": "result_type"})
class BaggedClassifier(Orange.core.Classifier):
"""
A classifier that uses a bagging technique. Usually the learner
(:class:`Orange.ensemble.bagging.BaggedLearner`) is used to construct the
classifier.
When constructing the classifier manually, the following parameters can
be passed:
:param classifiers: a list of boosted classifiers.
:type classifiers: list
:param name: name of the resulting classifier.
:type name: str
:param class_var: the class feature.
:type class_var: :class:`Orange.feature.Descriptor`
"""
def __init__(self, classifiers, name, class_var, **kwds):
self.classifiers = classifiers
self.name = name
self.class_var = class_var
self.__dict__.update(kwds)
def __call__(self, instance, result_type=Orange.classification.Classifier.GetValue):
"""
:param instance: instance to be classified.
:type instance: :class:`Orange.data.Instance`
:param result_type: :class:`Orange.classification.Classifier.GetValue`
or :class:`Orange.classification.Classifier.GetProbabilities` or
:class:`Orange.classification.Classifier.GetBoth`
:rtype: :class:`Orange.data.Value`,
:class:`Orange.statistics.Distribution` or a tuple with both
"""
if self.class_var.var_type == Orange.feature.Type.Discrete:
freq = [0.] * len(self.class_var.values)
for c in self.classifiers:
freq[int(c(instance))] += 1
index = freq.index(max(freq))
value = Orange.data.Value(self.class_var, index)
if result_type == Orange.classification.Classifier.GetValue:
return value
for i in range(len(freq)):
freq[i] = freq[i] / len(self.classifiers)
freq = Orange.statistics.distribution.Discrete(
freq, variable=self.class_var)
if result_type == Orange.classification.Classifier.GetProbabilities:
return freq
elif result_type == Orange.classification.Classifier.GetBoth:
return (value, freq)
else:
return value
elif self.class_var.var_type == Orange.feature.Type.Continuous:
votes = [c(instance, Orange.classification.Classifier.GetBoth \
if result_type == Orange.classification.Classifier.GetProbabilities \
else result_type) \
for c in self.classifiers]
wsum = float(len(self.classifiers))
if result_type in [Orange.classification.Classifier.GetBoth, Orange.classification.Classifier.GetProbabilities]:
pred = sum([float(c) for c, p in votes]) / wsum
# prob = sum([float(p.modus()) for c, p in votes]) / wsum
from collections import defaultdict
prob = defaultdict(float)
for c, p in votes:
for val, val_p in p.items():
prob[float(val)] += val_p / wsum
prob = Orange.statistics.distribution.Continuous(
prob, variable=self.class_var)
return (self.class_var(pred), prob) \
if result_type == Orange.classification.Classifier.GetBoth\
else prob
elif result_type == Orange.classification.Classifier.GetValue:
pred = sum([float(c) for c in votes]) / wsum
return self.class_var(pred)
def __reduce__(self):
return (type(self), (self.classifiers, self.name, self.class_var),
dict(self.__dict__))