forked from scikit-learn/scikit-learn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathensemble.py
121 lines (87 loc) · 2.92 KB
/
ensemble.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from sklearn.ensemble import (
GradientBoostingClassifier,
HistGradientBoostingClassifier,
RandomForestClassifier,
)
from .common import Benchmark, Estimator, Predictor
from .datasets import (
_20newsgroups_highdim_dataset,
_20newsgroups_lowdim_dataset,
_synth_classification_dataset,
)
from .utils import make_gen_classif_scorers
class RandomForestClassifierBenchmark(Predictor, Estimator, Benchmark):
"""
Benchmarks for RandomForestClassifier.
"""
param_names = ["representation", "n_jobs"]
params = (["dense", "sparse"], Benchmark.n_jobs_vals)
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
representation, n_jobs = params
if representation == "sparse":
data = _20newsgroups_highdim_dataset()
else:
data = _20newsgroups_lowdim_dataset()
return data
def make_estimator(self, params):
representation, n_jobs = params
n_estimators = 500 if Benchmark.data_size == "large" else 100
estimator = RandomForestClassifier(
n_estimators=n_estimators,
min_samples_split=10,
max_features="log2",
n_jobs=n_jobs,
random_state=0,
)
return estimator
def make_scorers(self):
make_gen_classif_scorers(self)
class GradientBoostingClassifierBenchmark(Predictor, Estimator, Benchmark):
"""
Benchmarks for GradientBoostingClassifier.
"""
param_names = ["representation"]
params = (["dense", "sparse"],)
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
(representation,) = params
if representation == "sparse":
data = _20newsgroups_highdim_dataset()
else:
data = _20newsgroups_lowdim_dataset()
return data
def make_estimator(self, params):
(representation,) = params
n_estimators = 100 if Benchmark.data_size == "large" else 10
estimator = GradientBoostingClassifier(
n_estimators=n_estimators,
max_features="log2",
subsample=0.5,
random_state=0,
)
return estimator
def make_scorers(self):
make_gen_classif_scorers(self)
class HistGradientBoostingClassifierBenchmark(Predictor, Estimator, Benchmark):
"""
Benchmarks for HistGradientBoostingClassifier.
"""
param_names = []
params = ()
def setup_cache(self):
super().setup_cache()
def make_data(self, params):
data = _synth_classification_dataset(
n_samples=10000, n_features=100, n_classes=5
)
return data
def make_estimator(self, params):
estimator = HistGradientBoostingClassifier(
max_iter=100, max_leaf_nodes=15, early_stopping=False, random_state=0
)
return estimator
def make_scorers(self):
make_gen_classif_scorers(self)