Skip to content

Commit f3fa236

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 70a185ae59b4362633d18b0d0083abb1b6f7370c
1 parent 6a501db commit f3fa236

File tree

2,042 files changed

+29983
-22988
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,042 files changed

+29983
-22988
lines changed

Diff for: dev/_downloads/006fc185672e58b056a5c134db26935c/plot_coin_segmentation.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n# Author: Gael Varoquaux <[email protected]>, Brian Cheung\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\nfrom scipy.ndimage.filters import gaussian_filter\nimport matplotlib.pyplot as plt\nimport skimage\nfrom skimage.data import coins\nfrom skimage.transform import rescale\n\nfrom sklearn.feature_extraction import image\nfrom sklearn.cluster import spectral_clustering\nfrom sklearn.utils.fixes import parse_version\n\n# these were introduced in skimage-0.14\nif parse_version(skimage.__version__) >= parse_version('0.14'):\n rescale_params = {'anti_aliasing': False, 'multichannel': False}\nelse:\n rescale_params = {}\n\n# load the coins as a numpy array\norig_coins = coins()\n\n# Resize it to 20% of the original size to speed up the processing\n# Applying a Gaussian filter for smoothing prior to down-scaling\n# reduces aliasing artifacts.\nsmoothened_coins = gaussian_filter(orig_coins, sigma=2)\nrescaled_coins = rescale(smoothened_coins, 0.2, mode=\"reflect\",\n **rescale_params)\n\n# Convert the image into a graph with the value of the gradient on the\n# edges.\ngraph = image.img_to_graph(rescaled_coins)\n\n# Take a decreasing function of the gradient: an exponential\n# The smaller beta is, the more independent the segmentation is of the\n# actual image. For beta=1, the segmentation is close to a voronoi\nbeta = 10\neps = 1e-6\ngraph.data = np.exp(-beta * graph.data / graph.data.std()) + eps\n\n# Apply spectral clustering (this step goes much faster if you have pyamg\n# installed)\nN_REGIONS = 25"
29+
"print(__doc__)\n\n# Author: Gael Varoquaux <[email protected]>, Brian Cheung\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\nfrom scipy.ndimage.filters import gaussian_filter\nimport matplotlib.pyplot as plt\nimport skimage\nfrom skimage.data import coins\nfrom skimage.transform import rescale\n\nfrom sklearn.feature_extraction import image\nfrom sklearn.cluster import spectral_clustering\nfrom sklearn.utils.fixes import parse_version\n\n# these were introduced in skimage-0.14\nif parse_version(skimage.__version__) >= parse_version(\"0.14\"):\n rescale_params = {\"anti_aliasing\": False, \"multichannel\": False}\nelse:\n rescale_params = {}\n\n# load the coins as a numpy array\norig_coins = coins()\n\n# Resize it to 20% of the original size to speed up the processing\n# Applying a Gaussian filter for smoothing prior to down-scaling\n# reduces aliasing artifacts.\nsmoothened_coins = gaussian_filter(orig_coins, sigma=2)\nrescaled_coins = rescale(smoothened_coins, 0.2, mode=\"reflect\", **rescale_params)\n\n# Convert the image into a graph with the value of the gradient on the\n# edges.\ngraph = image.img_to_graph(rescaled_coins)\n\n# Take a decreasing function of the gradient: an exponential\n# The smaller beta is, the more independent the segmentation is of the\n# actual image. For beta=1, the segmentation is close to a voronoi\nbeta = 10\neps = 1e-6\ngraph.data = np.exp(-beta * graph.data / graph.data.std()) + eps\n\n# Apply spectral clustering (this step goes much faster if you have pyamg\n# installed)\nN_REGIONS = 25"
3030
]
3131
},
3232
{
@@ -44,7 +44,7 @@
4444
},
4545
"outputs": [],
4646
"source": [
47-
"for assign_labels in ('kmeans', 'discretize'):\n t0 = time.time()\n labels = spectral_clustering(graph, n_clusters=N_REGIONS,\n assign_labels=assign_labels, random_state=42)\n t1 = time.time()\n labels = labels.reshape(rescaled_coins.shape)\n\n plt.figure(figsize=(5, 5))\n plt.imshow(rescaled_coins, cmap=plt.cm.gray)\n for l in range(N_REGIONS):\n plt.contour(labels == l,\n colors=[plt.cm.nipy_spectral(l / float(N_REGIONS))])\n plt.xticks(())\n plt.yticks(())\n title = 'Spectral clustering: %s, %.2fs' % (assign_labels, (t1 - t0))\n print(title)\n plt.title(title)\nplt.show()"
47+
"for assign_labels in (\"kmeans\", \"discretize\"):\n t0 = time.time()\n labels = spectral_clustering(\n graph, n_clusters=N_REGIONS, assign_labels=assign_labels, random_state=42\n )\n t1 = time.time()\n labels = labels.reshape(rescaled_coins.shape)\n\n plt.figure(figsize=(5, 5))\n plt.imshow(rescaled_coins, cmap=plt.cm.gray)\n for l in range(N_REGIONS):\n plt.contour(labels == l, colors=[plt.cm.nipy_spectral(l / float(N_REGIONS))])\n plt.xticks(())\n plt.yticks(())\n title = \"Spectral clustering: %s, %.2fs\" % (assign_labels, (t1 - t0))\n print(title)\n plt.title(title)\nplt.show()"
4848
]
4949
}
5050
],

Diff for: dev/_downloads/00ae629d652473137a3905a5e08ea815/plot_iris_dtc.py

+14-7
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@
3030
# Load data
3131
iris = load_iris()
3232

33-
for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3],
34-
[1, 2], [1, 3], [2, 3]]):
33+
for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]):
3534
# We only take the two corresponding features
3635
X = iris.data[:, pair]
3736
y = iris.target
@@ -44,8 +43,9 @@
4443

4544
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
4645
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
47-
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
48-
np.arange(y_min, y_max, plot_step))
46+
xx, yy = np.meshgrid(
47+
np.arange(x_min, x_max, plot_step), np.arange(y_min, y_max, plot_step)
48+
)
4949
plt.tight_layout(h_pad=0.5, w_pad=0.5, pad=2.5)
5050

5151
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
@@ -58,11 +58,18 @@
5858
# Plot the training points
5959
for i, color in zip(range(n_classes), plot_colors):
6060
idx = np.where(y == i)
61-
plt.scatter(X[idx, 0], X[idx, 1], c=color, label=iris.target_names[i],
62-
cmap=plt.cm.RdYlBu, edgecolor='black', s=15)
61+
plt.scatter(
62+
X[idx, 0],
63+
X[idx, 1],
64+
c=color,
65+
label=iris.target_names[i],
66+
cmap=plt.cm.RdYlBu,
67+
edgecolor="black",
68+
s=15,
69+
)
6370

6471
plt.suptitle("Decision surface of a decision tree using paired features")
65-
plt.legend(loc='lower right', borderpad=0, handletextpad=0)
72+
plt.legend(loc="lower right", borderpad=0, handletextpad=0)
6673
plt.axis("tight")
6774

6875
plt.figure()

Diff for: dev/_downloads/01fdc7c95204e4a420de7cd297711693/plot_feature_union.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,11 @@
5050

5151
pipeline = Pipeline([("features", combined_features), ("svm", svm)])
5252

53-
param_grid = dict(features__pca__n_components=[1, 2, 3],
54-
features__univ_select__k=[1, 2],
55-
svm__C=[0.1, 1, 10])
53+
param_grid = dict(
54+
features__pca__n_components=[1, 2, 3],
55+
features__univ_select__k=[1, 2],
56+
svm__C=[0.1, 1, 10],
57+
)
5658

5759
grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=10)
5860
grid_search.fit(X, y)

Diff for: dev/_downloads/023324c27491610e7c0ccff87c59abf9/plot_kernel_pca.py

+14-22
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
np.random.seed(0)
2222

23-
X, y = make_circles(n_samples=400, factor=.3, noise=.05)
23+
X, y = make_circles(n_samples=400, factor=0.3, noise=0.05)
2424

2525
kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10)
2626
X_kpca = kpca.fit_transform(X)
@@ -31,47 +31,39 @@
3131
# Plot results
3232

3333
plt.figure()
34-
plt.subplot(2, 2, 1, aspect='equal')
34+
plt.subplot(2, 2, 1, aspect="equal")
3535
plt.title("Original space")
3636
reds = y == 0
3737
blues = y == 1
3838

39-
plt.scatter(X[reds, 0], X[reds, 1], c="red",
40-
s=20, edgecolor='k')
41-
plt.scatter(X[blues, 0], X[blues, 1], c="blue",
42-
s=20, edgecolor='k')
39+
plt.scatter(X[reds, 0], X[reds, 1], c="red", s=20, edgecolor="k")
40+
plt.scatter(X[blues, 0], X[blues, 1], c="blue", s=20, edgecolor="k")
4341
plt.xlabel("$x_1$")
4442
plt.ylabel("$x_2$")
4543

4644
X1, X2 = np.meshgrid(np.linspace(-1.5, 1.5, 50), np.linspace(-1.5, 1.5, 50))
4745
X_grid = np.array([np.ravel(X1), np.ravel(X2)]).T
4846
# projection on the first principal component (in the phi space)
4947
Z_grid = kpca.transform(X_grid)[:, 0].reshape(X1.shape)
50-
plt.contour(X1, X2, Z_grid, colors='grey', linewidths=1, origin='lower')
48+
plt.contour(X1, X2, Z_grid, colors="grey", linewidths=1, origin="lower")
5149

52-
plt.subplot(2, 2, 2, aspect='equal')
53-
plt.scatter(X_pca[reds, 0], X_pca[reds, 1], c="red",
54-
s=20, edgecolor='k')
55-
plt.scatter(X_pca[blues, 0], X_pca[blues, 1], c="blue",
56-
s=20, edgecolor='k')
50+
plt.subplot(2, 2, 2, aspect="equal")
51+
plt.scatter(X_pca[reds, 0], X_pca[reds, 1], c="red", s=20, edgecolor="k")
52+
plt.scatter(X_pca[blues, 0], X_pca[blues, 1], c="blue", s=20, edgecolor="k")
5753
plt.title("Projection by PCA")
5854
plt.xlabel("1st principal component")
5955
plt.ylabel("2nd component")
6056

61-
plt.subplot(2, 2, 3, aspect='equal')
62-
plt.scatter(X_kpca[reds, 0], X_kpca[reds, 1], c="red",
63-
s=20, edgecolor='k')
64-
plt.scatter(X_kpca[blues, 0], X_kpca[blues, 1], c="blue",
65-
s=20, edgecolor='k')
57+
plt.subplot(2, 2, 3, aspect="equal")
58+
plt.scatter(X_kpca[reds, 0], X_kpca[reds, 1], c="red", s=20, edgecolor="k")
59+
plt.scatter(X_kpca[blues, 0], X_kpca[blues, 1], c="blue", s=20, edgecolor="k")
6660
plt.title("Projection by KPCA")
6761
plt.xlabel(r"1st principal component in space induced by $\phi$")
6862
plt.ylabel("2nd component")
6963

70-
plt.subplot(2, 2, 4, aspect='equal')
71-
plt.scatter(X_back[reds, 0], X_back[reds, 1], c="red",
72-
s=20, edgecolor='k')
73-
plt.scatter(X_back[blues, 0], X_back[blues, 1], c="blue",
74-
s=20, edgecolor='k')
64+
plt.subplot(2, 2, 4, aspect="equal")
65+
plt.scatter(X_back[reds, 0], X_back[reds, 1], c="red", s=20, edgecolor="k")
66+
plt.scatter(X_back[blues, 0], X_back[blues, 1], c="blue", s=20, edgecolor="k")
7567
plt.title("Original space after inverse transform")
7668
plt.xlabel("$x_1$")
7769
plt.ylabel("$x_2$")

Diff for: dev/_downloads/02a1306a494b46cc56c930ceec6e8c4a/plot_species_kde.py

+24-16
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
# otherwise, we'll improvise later...
4949
try:
5050
from mpl_toolkits.basemap import Basemap
51+
5152
basemap = True
5253
except ImportError:
5354
basemap = False
@@ -82,13 +83,14 @@ def construct_grids(batch):
8283

8384
# Get matrices/arrays of species IDs and locations
8485
data = fetch_species_distributions()
85-
species_names = ['Bradypus Variegatus', 'Microryzomys Minutus']
86+
species_names = ["Bradypus Variegatus", "Microryzomys Minutus"]
8687

87-
Xtrain = np.vstack([data['train']['dd lat'],
88-
data['train']['dd long']]).T
89-
ytrain = np.array([d.decode('ascii').startswith('micro')
90-
for d in data['train']['species']], dtype='int')
91-
Xtrain *= np.pi / 180. # Convert lat/long to radians
88+
Xtrain = np.vstack([data["train"]["dd lat"], data["train"]["dd long"]]).T
89+
ytrain = np.array(
90+
[d.decode("ascii").startswith("micro") for d in data["train"]["species"]],
91+
dtype="int",
92+
)
93+
Xtrain *= np.pi / 180.0 # Convert lat/long to radians
9294

9395
# Set up the data grid for the contour plot
9496
xgrid, ygrid = construct_grids(data)
@@ -98,7 +100,7 @@ def construct_grids(batch):
98100

99101
xy = np.vstack([Y.ravel(), X.ravel()]).T
100102
xy = xy[land_mask]
101-
xy *= np.pi / 180.
103+
xy *= np.pi / 180.0
102104

103105
# Plot map of South America with distributions of each species
104106
fig = plt.figure()
@@ -109,12 +111,13 @@ def construct_grids(batch):
109111

110112
# construct a kernel density estimate of the distribution
111113
print(" - computing KDE in spherical coordinates")
112-
kde = KernelDensity(bandwidth=0.04, metric='haversine',
113-
kernel='gaussian', algorithm='ball_tree')
114+
kde = KernelDensity(
115+
bandwidth=0.04, metric="haversine", kernel="gaussian", algorithm="ball_tree"
116+
)
114117
kde.fit(Xtrain[ytrain == i])
115118

116119
# evaluate only on the land: -9999 indicates ocean
117-
Z = np.full(land_mask.shape[0], -9999, dtype='int')
120+
Z = np.full(land_mask.shape[0], -9999, dtype="int")
118121
Z[land_mask] = np.exp(kde.score_samples(xy))
119122
Z = Z.reshape(X.shape)
120123

@@ -124,16 +127,21 @@ def construct_grids(batch):
124127

125128
if basemap:
126129
print(" - plot coastlines using basemap")
127-
m = Basemap(projection='cyl', llcrnrlat=Y.min(),
128-
urcrnrlat=Y.max(), llcrnrlon=X.min(),
129-
urcrnrlon=X.max(), resolution='c')
130+
m = Basemap(
131+
projection="cyl",
132+
llcrnrlat=Y.min(),
133+
urcrnrlat=Y.max(),
134+
llcrnrlon=X.min(),
135+
urcrnrlon=X.max(),
136+
resolution="c",
137+
)
130138
m.drawcoastlines()
131139
m.drawcountries()
132140
else:
133141
print(" - plot coastlines from coverage")
134-
plt.contour(X, Y, land_reference,
135-
levels=[-9998], colors="k",
136-
linestyles="solid")
142+
plt.contour(
143+
X, Y, land_reference, levels=[-9998], colors="k", linestyles="solid"
144+
)
137145
plt.xticks([])
138146
plt.yticks([])
139147

Diff for: dev/_downloads/02a7bbce3c39c70d62d80e875968e5c6/plot_digits_kde_sampling.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
data = pca.fit_transform(digits.data)
2727

2828
# use grid search cross-validation to optimize the bandwidth
29-
params = {'bandwidth': np.logspace(-1, 1, 20)}
29+
params = {"bandwidth": np.logspace(-1, 1, 20)}
3030
grid = GridSearchCV(KernelDensity(), params)
3131
grid.fit(data)
3232

@@ -48,14 +48,16 @@
4848
for j in range(11):
4949
ax[4, j].set_visible(False)
5050
for i in range(4):
51-
im = ax[i, j].imshow(real_data[i, j].reshape((8, 8)),
52-
cmap=plt.cm.binary, interpolation='nearest')
51+
im = ax[i, j].imshow(
52+
real_data[i, j].reshape((8, 8)), cmap=plt.cm.binary, interpolation="nearest"
53+
)
5354
im.set_clim(0, 16)
54-
im = ax[i + 5, j].imshow(new_data[i, j].reshape((8, 8)),
55-
cmap=plt.cm.binary, interpolation='nearest')
55+
im = ax[i + 5, j].imshow(
56+
new_data[i, j].reshape((8, 8)), cmap=plt.cm.binary, interpolation="nearest"
57+
)
5658
im.set_clim(0, 16)
5759

58-
ax[0, 5].set_title('Selection from the input data')
60+
ax[0, 5].set_title("Selection from the input data")
5961
ax[5, 5].set_title('"New" digits drawn from the kernel density model')
6062

6163
plt.show()

Diff for: dev/_downloads/02d88d76c60b7397c8c6e221b31568dd/plot_grid_search_refit_callable.py

+36-26
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,12 @@ def lower_bound(cv_results):
4646
Lower bound within 1 standard deviation of the
4747
best `mean_test_score`.
4848
"""
49-
best_score_idx = np.argmax(cv_results['mean_test_score'])
49+
best_score_idx = np.argmax(cv_results["mean_test_score"])
5050

51-
return (cv_results['mean_test_score'][best_score_idx]
52-
- cv_results['std_test_score'][best_score_idx])
51+
return (
52+
cv_results["mean_test_score"][best_score_idx]
53+
- cv_results["std_test_score"][best_score_idx]
54+
)
5355

5456

5557
def best_low_complexity(cv_results):
@@ -69,48 +71,56 @@ def best_low_complexity(cv_results):
6971
`mean_test_score`.
7072
"""
7173
threshold = lower_bound(cv_results)
72-
candidate_idx = np.flatnonzero(cv_results['mean_test_score'] >= threshold)
73-
best_idx = candidate_idx[cv_results['param_reduce_dim__n_components']
74-
[candidate_idx].argmin()]
74+
candidate_idx = np.flatnonzero(cv_results["mean_test_score"] >= threshold)
75+
best_idx = candidate_idx[
76+
cv_results["param_reduce_dim__n_components"][candidate_idx].argmin()
77+
]
7578
return best_idx
7679

7780

78-
pipe = Pipeline([
79-
('reduce_dim', PCA(random_state=42)),
80-
('classify', LinearSVC(random_state=42, C=0.01)),
81-
])
81+
pipe = Pipeline(
82+
[
83+
("reduce_dim", PCA(random_state=42)),
84+
("classify", LinearSVC(random_state=42, C=0.01)),
85+
]
86+
)
8287

83-
param_grid = {
84-
'reduce_dim__n_components': [6, 8, 10, 12, 14]
85-
}
88+
param_grid = {"reduce_dim__n_components": [6, 8, 10, 12, 14]}
8689

87-
grid = GridSearchCV(pipe, cv=10, n_jobs=1, param_grid=param_grid,
88-
scoring='accuracy', refit=best_low_complexity)
90+
grid = GridSearchCV(
91+
pipe,
92+
cv=10,
93+
n_jobs=1,
94+
param_grid=param_grid,
95+
scoring="accuracy",
96+
refit=best_low_complexity,
97+
)
8998
X, y = load_digits(return_X_y=True)
9099
grid.fit(X, y)
91100

92-
n_components = grid.cv_results_['param_reduce_dim__n_components']
93-
test_scores = grid.cv_results_['mean_test_score']
101+
n_components = grid.cv_results_["param_reduce_dim__n_components"]
102+
test_scores = grid.cv_results_["mean_test_score"]
94103

95104
plt.figure()
96-
plt.bar(n_components, test_scores, width=1.3, color='b')
105+
plt.bar(n_components, test_scores, width=1.3, color="b")
97106

98107
lower = lower_bound(grid.cv_results_)
99-
plt.axhline(np.max(test_scores), linestyle='--', color='y',
100-
label='Best score')
101-
plt.axhline(lower, linestyle='--', color='.5', label='Best score - 1 std')
108+
plt.axhline(np.max(test_scores), linestyle="--", color="y", label="Best score")
109+
plt.axhline(lower, linestyle="--", color=".5", label="Best score - 1 std")
102110

103111
plt.title("Balance model complexity and cross-validated score")
104-
plt.xlabel('Number of PCA components used')
105-
plt.ylabel('Digit classification accuracy')
112+
plt.xlabel("Number of PCA components used")
113+
plt.ylabel("Digit classification accuracy")
106114
plt.xticks(n_components.tolist())
107115
plt.ylim((0, 1.0))
108-
plt.legend(loc='upper left')
116+
plt.legend(loc="upper left")
109117

110118
best_index_ = grid.best_index_
111119

112120
print("The best_index_ is %d" % best_index_)
113121
print("The n_components selected is %d" % n_components[best_index_])
114-
print("The corresponding accuracy score is %.2f"
115-
% grid.cv_results_['mean_test_score'][best_index_])
122+
print(
123+
"The corresponding accuracy score is %.2f"
124+
% grid.cv_results_["mean_test_score"][best_index_]
125+
)
116126
plt.show()

Diff for: dev/_downloads/02f111fb3dd79805b161e14c564184fc/plot_sgd_comparison.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Author: Rob Zinkov <rob at zinkov dot com>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import datasets\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import SGDClassifier, Perceptron\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import LogisticRegression\n\nheldout = [0.95, 0.90, 0.75, 0.50, 0.01]\nrounds = 20\nX, y = datasets.load_digits(return_X_y=True)\n\nclassifiers = [\n (\"SGD\", SGDClassifier(max_iter=100)),\n (\"ASGD\", SGDClassifier(average=True)),\n (\"Perceptron\", Perceptron()),\n (\"Passive-Aggressive I\", PassiveAggressiveClassifier(loss='hinge',\n C=1.0, tol=1e-4)),\n (\"Passive-Aggressive II\", PassiveAggressiveClassifier(loss='squared_hinge',\n C=1.0, tol=1e-4)),\n (\"SAG\", LogisticRegression(solver='sag', tol=1e-1, C=1.e4 / X.shape[0]))\n]\n\nxx = 1. - np.array(heldout)\n\nfor name, clf in classifiers:\n print(\"training %s\" % name)\n rng = np.random.RandomState(42)\n yy = []\n for i in heldout:\n yy_ = []\n for r in range(rounds):\n X_train, X_test, y_train, y_test = \\\n train_test_split(X, y, test_size=i, random_state=rng)\n clf.fit(X_train, y_train)\n y_pred = clf.predict(X_test)\n yy_.append(1 - np.mean(y_pred == y_test))\n yy.append(np.mean(yy_))\n plt.plot(xx, yy, label=name)\n\nplt.legend(loc=\"upper right\")\nplt.xlabel(\"Proportion train\")\nplt.ylabel(\"Test Error Rate\")\nplt.show()"
29+
"# Author: Rob Zinkov <rob at zinkov dot com>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import datasets\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import SGDClassifier, Perceptron\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import LogisticRegression\n\nheldout = [0.95, 0.90, 0.75, 0.50, 0.01]\nrounds = 20\nX, y = datasets.load_digits(return_X_y=True)\n\nclassifiers = [\n (\"SGD\", SGDClassifier(max_iter=100)),\n (\"ASGD\", SGDClassifier(average=True)),\n (\"Perceptron\", Perceptron()),\n (\n \"Passive-Aggressive I\",\n PassiveAggressiveClassifier(loss=\"hinge\", C=1.0, tol=1e-4),\n ),\n (\n \"Passive-Aggressive II\",\n PassiveAggressiveClassifier(loss=\"squared_hinge\", C=1.0, tol=1e-4),\n ),\n (\"SAG\", LogisticRegression(solver=\"sag\", tol=1e-1, C=1.0e4 / X.shape[0])),\n]\n\nxx = 1.0 - np.array(heldout)\n\nfor name, clf in classifiers:\n print(\"training %s\" % name)\n rng = np.random.RandomState(42)\n yy = []\n for i in heldout:\n yy_ = []\n for r in range(rounds):\n X_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=i, random_state=rng\n )\n clf.fit(X_train, y_train)\n y_pred = clf.predict(X_test)\n yy_.append(1 - np.mean(y_pred == y_test))\n yy.append(np.mean(yy_))\n plt.plot(xx, yy, label=name)\n\nplt.legend(loc=\"upper right\")\nplt.xlabel(\"Proportion train\")\nplt.ylabel(\"Test Error Rate\")\nplt.show()"
3030
]
3131
}
3232
],

0 commit comments

Comments
 (0)