Skip to content

Commit a725102

Browse files
committed
Pushing the docs to 1.1/ for branch: 1.1.X, commit 6cb2c52375a812ff509c00f4eed1da232e7a8932
1 parent 61834c9 commit a725102

File tree

1,509 files changed

+27523
-24759
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,509 files changed

+27523
-24759
lines changed

Diff for: 1.1/.buildinfo

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: 0eaf86448214f705bb4019609544b7b3
3+
config: 8c6ff21e847d280e934fd16d253894de
44
tags: 645f666f9bcd5a90fca523b33c5a78b7

Diff for: 1.1/_downloads/02a1306a494b46cc56c930ceec6e8c4a/plot_species_kde.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
The two species are:
2020
2121
- `"Bradypus variegatus"
22-
<http://www.iucnredlist.org/apps/redlist/details/3038/0>`_ ,
22+
<https://www.iucnredlist.org/species/3038/47437046>`_ ,
2323
the Brown-throated Sloth.
2424
2525
- `"Microryzomys minutus"

Diff for: 1.1/_downloads/06cfc926acb27652fb2aa5bfc583e7cb/plot_hashing_vs_dict_vectorizer.ipynb

+290-2
Large diffs are not rendered by default.
Binary file not shown.

Diff for: 1.1/_downloads/08fc4f471ae40388eb535678346dc9d1/plot_gpc_xor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
# fit the model
3131
plt.figure(figsize=(10, 5))
32-
kernels = [1.0 * RBF(length_scale=1.0), 1.0 * DotProduct(sigma_0=1.0) ** 2]
32+
kernels = [1.0 * RBF(length_scale=1.15), 1.0 * DotProduct(sigma_0=1.0) ** 2]
3333
for i, kernel in enumerate(kernels):
3434
clf = GaussianProcessClassifier(kernel=kernel, warm_start=True).fit(X, Y)
3535

Diff for: 1.1/_downloads/14f620cd922ca2c9a39ae5784034dd0d/plot_lda.py

+3
Original file line numberDiff line numberDiff line change
@@ -71,20 +71,23 @@ def generate_data(n_samples, n_features):
7171
linewidth=2,
7272
label="Linear Discriminant Analysis with Ledoit Wolf",
7373
color="navy",
74+
linestyle="dashed",
7475
)
7576
plt.plot(
7677
features_samples_ratio,
7778
acc_clf2,
7879
linewidth=2,
7980
label="Linear Discriminant Analysis",
8081
color="gold",
82+
linestyle="solid",
8183
)
8284
plt.plot(
8385
features_samples_ratio,
8486
acc_clf3,
8587
linewidth=2,
8688
label="Linear Discriminant Analysis with OAS",
8789
color="red",
90+
linestyle="dotted",
8891
)
8992

9093
plt.xlabel("n_features / n_samples")

Diff for: 1.1/_downloads/1bcb2039afa126da41f1cea42b4a5866/plot_gpr_prior_posterior.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def plot_gpr_samples(gpr_model, n_samples, ax):
158158
)
159159

160160
# %%
161-
# Periodic kernel
161+
# Exp-Sine-Squared kernel
162162
# ...............
163163
from sklearn.gaussian_process.kernels import ExpSineSquared
164164

@@ -183,7 +183,7 @@ def plot_gpr_samples(gpr_model, n_samples, ax):
183183
axs[1].legend(bbox_to_anchor=(1.05, 1.5), loc="upper left")
184184
axs[1].set_title("Samples from posterior distribution")
185185

186-
fig.suptitle("Periodic kernel", fontsize=18)
186+
fig.suptitle("Exp-Sine-Squared kernel", fontsize=18)
187187
plt.tight_layout()
188188

189189
# %%
@@ -194,7 +194,7 @@ def plot_gpr_samples(gpr_model, n_samples, ax):
194194
)
195195

196196
# %%
197-
# Dot product kernel
197+
# Dot-product kernel
198198
# ..................
199199
from sklearn.gaussian_process.kernels import ConstantKernel, DotProduct
200200

@@ -216,7 +216,7 @@ def plot_gpr_samples(gpr_model, n_samples, ax):
216216
axs[1].legend(bbox_to_anchor=(1.05, 1.5), loc="upper left")
217217
axs[1].set_title("Samples from posterior distribution")
218218

219-
fig.suptitle("Dot product kernel", fontsize=18)
219+
fig.suptitle("Dot-product kernel", fontsize=18)
220220
plt.tight_layout()
221221

222222
# %%
@@ -227,7 +227,7 @@ def plot_gpr_samples(gpr_model, n_samples, ax):
227227
)
228228

229229
# %%
230-
# Mattern kernel
230+
# Matérn kernel
231231
# ..............
232232
from sklearn.gaussian_process.kernels import Matern
233233

@@ -247,7 +247,7 @@ def plot_gpr_samples(gpr_model, n_samples, ax):
247247
axs[1].legend(bbox_to_anchor=(1.05, 1.5), loc="upper left")
248248
axs[1].set_title("Samples from posterior distribution")
249249

250-
fig.suptitle("Mattern kernel", fontsize=18)
250+
fig.suptitle("Matérn kernel", fontsize=18)
251251
plt.tight_layout()
252252

253253
# %%

Diff for: 1.1/_downloads/1c4a422dfa5bd721501d19a2b7e2499b/plot_species_kde.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n# Kernel Density Estimate of Species Distributions\nThis shows an example of a neighbors-based query (in particular a kernel\ndensity estimate) on geospatial data, using a Ball Tree built upon the\nHaversine distance metric -- i.e. distances over points in latitude/longitude.\nThe dataset is provided by Phillips et. al. (2006).\nIf available, the example uses\n`basemap <https://matplotlib.org/basemap/>`_\nto plot the coast lines and national boundaries of South America.\n\nThis example does not perform any learning over the data\n(see `sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py` for\nan example of classification based on the attributes in this dataset). It\nsimply shows the kernel density estimate of observed data points in\ngeospatial coordinates.\n\nThe two species are:\n\n - `\"Bradypus variegatus\"\n <http://www.iucnredlist.org/apps/redlist/details/3038/0>`_ ,\n the Brown-throated Sloth.\n\n - `\"Microryzomys minutus\"\n <http://www.iucnredlist.org/details/13408/0>`_ ,\n also known as the Forest Small Rice Rat, a rodent that lives in Peru,\n Colombia, Ecuador, Peru, and Venezuela.\n\n## References\n\n * `\"Maximum entropy modeling of species geographic distributions\"\n <http://rob.schapire.net/papers/ecolmod.pdf>`_\n S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,\n 190:231-259, 2006.\n"
18+
"\n# Kernel Density Estimate of Species Distributions\nThis shows an example of a neighbors-based query (in particular a kernel\ndensity estimate) on geospatial data, using a Ball Tree built upon the\nHaversine distance metric -- i.e. distances over points in latitude/longitude.\nThe dataset is provided by Phillips et. al. (2006).\nIf available, the example uses\n`basemap <https://matplotlib.org/basemap/>`_\nto plot the coast lines and national boundaries of South America.\n\nThis example does not perform any learning over the data\n(see `sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py` for\nan example of classification based on the attributes in this dataset). It\nsimply shows the kernel density estimate of observed data points in\ngeospatial coordinates.\n\nThe two species are:\n\n - `\"Bradypus variegatus\"\n <https://www.iucnredlist.org/species/3038/47437046>`_ ,\n the Brown-throated Sloth.\n\n - `\"Microryzomys minutus\"\n <http://www.iucnredlist.org/details/13408/0>`_ ,\n also known as the Forest Small Rice Rat, a rodent that lives in Peru,\n Colombia, Ecuador, Peru, and Venezuela.\n\n## References\n\n * `\"Maximum entropy modeling of species geographic distributions\"\n <http://rob.schapire.net/papers/ecolmod.pdf>`_\n S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,\n 190:231-259, 2006.\n"
1919
]
2020
},
2121
{

Diff for: 1.1/_downloads/24475810034a0d0d190a9de0f87d72b5/plot_all_scaling.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ def make_plot(item_idx):
324324
#
325325
# Unlike the previous scalers, the centering and scaling statistics of
326326
# :class:`~sklearn.preprocessing.RobustScaler`
327-
# is based on percentiles and are therefore not influenced by a few
327+
# are based on percentiles and are therefore not influenced by a small
328328
# number of very large marginal outliers. Consequently, the resulting range of
329329
# the transformed feature values is larger than for the previous scalers and,
330330
# more importantly, are approximately similar: for both features most of the

Diff for: 1.1/_downloads/2f3ef774a6d7e52e1e6b7ccbb75d25f0/plot_gradient_boosting_quantile.py

+5
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ def f(x):
7272
all_models["q %1.2f" % alpha] = gbr.fit(X_train, y_train)
7373

7474
# %%
75+
# Notice that :class:`~sklearn.ensemble.HistGradientBoostingRegressor` is much
76+
# faster than :class:`~sklearn.ensemble.GradientBoostingRegressor` starting with
77+
# intermediate datasets (`n_samples >= 10_000`), which is not the case of the
78+
# present example.
79+
#
7580
# For the sake of comparison, we also fit a baseline model trained with the
7681
# usual (mean) squared error (MSE).
7782
gbr_ls = GradientBoostingRegressor(loss="squared_error", **common_params)

Diff for: 1.1/_downloads/3a10dcfbc1a4bf1349c7101a429aa47b/plot_feature_transformation.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from sklearn.datasets import make_classification
4040
from sklearn.model_selection import train_test_split
4141

42-
X, y = make_classification(n_samples=80000, random_state=10)
42+
X, y = make_classification(n_samples=80_000, random_state=10)
4343

4444
X_full_train, X_test, y_full_train, y_test = train_test_split(
4545
X, y, test_size=0.5, random_state=10
@@ -72,6 +72,11 @@
7272
_ = gradient_boosting.fit(X_train_ensemble, y_train_ensemble)
7373

7474
# %%
75+
# Notice that :class:`~sklearn.ensemble.HistGradientBoostingClassifier` is much
76+
# faster than :class:`~sklearn.ensemble.GradientBoostingClassifier` starting
77+
# with intermediate datasets (`n_samples >= 10_000`), which is not the case of
78+
# the present example.
79+
#
7580
# The :class:`~sklearn.ensemble.RandomTreesEmbedding` is an unsupervised method
7681
# and thus does not required to be trained independently.
7782

Diff for: 1.1/_downloads/3c3c738275484acc54821615bf72894a/plot_permutation_importance.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
# We define a predictive model based on a random forest. Therefore, we will make
5959
# the following preprocessing steps:
6060
#
61-
# - use :class:`~sklearn.preprocessing.OrdinaleEcnoder` to encode the
61+
# - use :class:`~sklearn.preprocessing.OrdinalEncoder` to encode the
6262
# categorical features;
6363
# - use :class:`~sklearn.impute.SimpleImputer` to fill missing values for
6464
# numerical features using a mean strategy.

Diff for: 1.1/_downloads/3ed102fa8211c8d36f2331f0c5e1dcef/plot_model_complexity_influence.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"cell_type": "markdown",
1616
"metadata": {},
1717
"source": [
18-
"\n# Model Complexity Influence\n\nDemonstrate how model complexity influences both prediction accuracy and\ncomputational performance.\n\nWe will be using two datasets:\n - `diabetes_dataset` for regression.\n This dataset consists of 10 measurements taken from diabetes patients.\n The task is to predict disease progression;\n - `20newsgroups_dataset` for classification. This dataset consists of\n newsgroup posts. The task is to predict on which topic (out of 20 topics)\n the post is written about.\n\nWe will model the complexity influence on three different estimators:\n - :class:`~sklearn.linear_model.SGDClassifier` (for classification data)\n which implements stochastic gradient descent learning;\n\n - :class:`~sklearn.svm.NuSVR` (for regression data) which implements\n Nu support vector regression;\n\n - :class:`~sklearn.ensemble.GradientBoostingRegressor` (for regression\n data) which builds an additive model in a forward stage-wise fashion.\n\n\nWe make the model complexity vary through the choice of relevant model\nparameters in each of our selected models. Next, we will measure the influence\non both computational performance (latency) and predictive power (MSE or\nHamming Loss).\n"
18+
"\n# Model Complexity Influence\n\nDemonstrate how model complexity influences both prediction accuracy and\ncomputational performance.\n\nWe will be using two datasets:\n - `diabetes_dataset` for regression.\n This dataset consists of 10 measurements taken from diabetes patients.\n The task is to predict disease progression;\n - `20newsgroups_dataset` for classification. This dataset consists of\n newsgroup posts. The task is to predict on which topic (out of 20 topics)\n the post is written about.\n\nWe will model the complexity influence on three different estimators:\n - :class:`~sklearn.linear_model.SGDClassifier` (for classification data)\n which implements stochastic gradient descent learning;\n\n - :class:`~sklearn.svm.NuSVR` (for regression data) which implements\n Nu support vector regression;\n\n - :class:`~sklearn.ensemble.GradientBoostingRegressor` builds an additive\n model in a forward stage-wise fashion. Notice that\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor` is much faster\n than :class:`~sklearn.ensemble.GradientBoostingRegressor` starting with\n intermediate datasets (`n_samples >= 10_000`), which is not the case for\n this example.\n\n\nWe make the model complexity vary through the choice of relevant model\nparameters in each of our selected models. Next, we will measure the influence\non both computational performance (latency) and predictive power (MSE or\nHamming Loss).\n"
1919
]
2020
},
2121
{

Diff for: 1.1/_downloads/473e94775f7181f54536fbb1f45b9e42/plot_agglomerative_clustering.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
local structure in the data. The graph is simply the graph of 20 nearest
77
neighbors.
88
9-
Two consequences of imposing a connectivity can be seen. First, clustering
9+
There are two advantages of imposing a connectivity. First, clustering
1010
without a connectivity matrix is much faster.
1111
1212
Second, when using a connectivity matrix, single, average and complete

Diff for: 1.1/_downloads/4825fc8223d1af0f3b61080c3dea3a62/plot_faces_decomposition.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
7979

8080

8181
# %%
82-
# Lets take a look at our data. Gray color indicates negative values,
82+
# Let's take a look at our data. Gray color indicates negative values,
8383
# white indicates positive values.
8484

8585
plot_gallery("Faces from dataset", faces_centered[:n_components])

Diff for: 1.1/_downloads/57163227aeb4c19ca4c69b87a8d1949c/plot_learning_curve.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def plot_learning_curve(
3535
ylim=None,
3636
cv=None,
3737
n_jobs=None,
38+
scoring=None,
3839
train_sizes=np.linspace(0.1, 1.0, 5),
3940
):
4041
"""
@@ -86,6 +87,11 @@ def plot_learning_curve(
8687
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
8788
for more details.
8889
90+
scoring : str or callable, default=None
91+
A str (see model evaluation documentation) or
92+
a scorer callable object / function with signature
93+
``scorer(estimator, X, y)``.
94+
8995
train_sizes : array-like of shape (n_ticks,)
9096
Relative or absolute numbers of training examples that will be used to
9197
generate the learning curve. If the ``dtype`` is float, it is regarded
@@ -109,6 +115,7 @@ def plot_learning_curve(
109115
estimator,
110116
X,
111117
y,
118+
scoring=scoring,
112119
cv=cv,
113120
n_jobs=n_jobs,
114121
train_sizes=train_sizes,
@@ -189,7 +196,15 @@ def plot_learning_curve(
189196

190197
estimator = GaussianNB()
191198
plot_learning_curve(
192-
estimator, title, X, y, axes=axes[:, 0], ylim=(0.7, 1.01), cv=cv, n_jobs=4
199+
estimator,
200+
title,
201+
X,
202+
y,
203+
axes=axes[:, 0],
204+
ylim=(0.7, 1.01),
205+
cv=cv,
206+
n_jobs=4,
207+
scoring="accuracy",
193208
)
194209

195210
title = r"Learning Curves (SVM, RBF kernel, $\gamma=0.001$)"

0 commit comments

Comments
 (0)