scikit-learn
diff --git a/‎dev/_downloads/006fc185672e58b056a5c134db26935c/plot_coin_segmentation.ipynb
Lines changed: 2 additions & 2 deletions b/‎dev/_downloads/006fc185672e58b056a5c134db26935c/plot_coin_segmentation.ipynb
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/_downloads/00ae629d652473137a3905a5e08ea815/plot_iris_dtc.py
Lines changed: 14 additions & 7 deletions b/‎dev/_downloads/00ae629d652473137a3905a5e08ea815/plot_iris_dtc.py
Lines changed: 14 additions & 7 deletions
diff --git a/‎dev/_downloads/01fdc7c95204e4a420de7cd297711693/plot_feature_union.py
Lines changed: 5 additions & 3 deletions b/‎dev/_downloads/01fdc7c95204e4a420de7cd297711693/plot_feature_union.py
Lines changed: 5 additions & 3 deletions
diff --git a/‎dev/_downloads/023324c27491610e7c0ccff87c59abf9/plot_kernel_pca.py
Lines changed: 14 additions & 22 deletions b/‎dev/_downloads/023324c27491610e7c0ccff87c59abf9/plot_kernel_pca.py
Lines changed: 14 additions & 22 deletions
diff --git a/‎dev/_downloads/02a1306a494b46cc56c930ceec6e8c4a/plot_species_kde.py
Lines changed: 24 additions & 16 deletions b/‎dev/_downloads/02a1306a494b46cc56c930ceec6e8c4a/plot_species_kde.py
Lines changed: 24 additions & 16 deletions
diff --git a/‎dev/_downloads/02a7bbce3c39c70d62d80e875968e5c6/plot_digits_kde_sampling.py
Lines changed: 8 additions & 6 deletions b/‎dev/_downloads/02a7bbce3c39c70d62d80e875968e5c6/plot_digits_kde_sampling.py
Lines changed: 8 additions & 6 deletions
diff --git a/‎dev/_downloads/02d88d76c60b7397c8c6e221b31568dd/plot_grid_search_refit_callable.py
Lines changed: 36 additions & 26 deletions b/‎dev/_downloads/02d88d76c60b7397c8c6e221b31568dd/plot_grid_search_refit_callable.py
Lines changed: 36 additions & 26 deletions
diff --git a/‎dev/_downloads/02f111fb3dd79805b161e14c564184fc/plot_sgd_comparison.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/02f111fb3dd79805b161e14c564184fc/plot_sgd_comparison.ipynb
Lines changed: 1 addition & 1 deletion
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\n# Author: Gael Varoquaux <[email protected]>, Brian Cheung\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\nfrom scipy.ndimage.filters import gaussian_filter\nimport matplotlib.pyplot as plt\nimport skimage\nfrom skimage.data import coins\nfrom skimage.transform import rescale\n\nfrom sklearn.feature_extraction import image\nfrom sklearn.cluster import spectral_clustering\nfrom sklearn.utils.fixes import parse_version\n\n# these were introduced in skimage-0.14\nif parse_version(skimage.__version__) >= parse_version('0.14'):\n    rescale_params = {'anti_aliasing': False, 'multichannel': False}\nelse:\n    rescale_params = {}\n\n# load the coins as a numpy array\norig_coins = coins()\n\n# Resize it to 20% of the original size to speed up the processing\n# Applying a Gaussian filter for smoothing prior to down-scaling\n# reduces aliasing artifacts.\nsmoothened_coins = gaussian_filter(orig_coins, sigma=2)\nrescaled_coins = rescale(smoothened_coins, 0.2, mode=\"reflect\",\n                         **rescale_params)\n\n# Convert the image into a graph with the value of the gradient on the\n# edges.\ngraph = image.img_to_graph(rescaled_coins)\n\n# Take a decreasing function of the gradient: an exponential\n# The smaller beta is, the more independent the segmentation is of the\n# actual image. For beta=1, the segmentation is close to a voronoi\nbeta = 10\neps = 1e-6\ngraph.data = np.exp(-beta * graph.data / graph.data.std()) + eps\n\n# Apply spectral clustering (this step goes much faster if you have pyamg\n# installed)\nN_REGIONS = 25"
+        "print(__doc__)\n\n# Author: Gael Varoquaux <[email protected]>, Brian Cheung\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\nfrom scipy.ndimage.filters import gaussian_filter\nimport matplotlib.pyplot as plt\nimport skimage\nfrom skimage.data import coins\nfrom skimage.transform import rescale\n\nfrom sklearn.feature_extraction import image\nfrom sklearn.cluster import spectral_clustering\nfrom sklearn.utils.fixes import parse_version\n\n# these were introduced in skimage-0.14\nif parse_version(skimage.__version__) >= parse_version(\"0.14\"):\n    rescale_params = {\"anti_aliasing\": False, \"multichannel\": False}\nelse:\n    rescale_params = {}\n\n# load the coins as a numpy array\norig_coins = coins()\n\n# Resize it to 20% of the original size to speed up the processing\n# Applying a Gaussian filter for smoothing prior to down-scaling\n# reduces aliasing artifacts.\nsmoothened_coins = gaussian_filter(orig_coins, sigma=2)\nrescaled_coins = rescale(smoothened_coins, 0.2, mode=\"reflect\", **rescale_params)\n\n# Convert the image into a graph with the value of the gradient on the\n# edges.\ngraph = image.img_to_graph(rescaled_coins)\n\n# Take a decreasing function of the gradient: an exponential\n# The smaller beta is, the more independent the segmentation is of the\n# actual image. For beta=1, the segmentation is close to a voronoi\nbeta = 10\neps = 1e-6\ngraph.data = np.exp(-beta * graph.data / graph.data.std()) + eps\n\n# Apply spectral clustering (this step goes much faster if you have pyamg\n# installed)\nN_REGIONS = 25"
       ]
     },
     {
@@ -44,7 +44,7 @@
       },
       "outputs": [],
       "source": [
-        "for assign_labels in ('kmeans', 'discretize'):\n    t0 = time.time()\n    labels = spectral_clustering(graph, n_clusters=N_REGIONS,\n                                 assign_labels=assign_labels, random_state=42)\n    t1 = time.time()\n    labels = labels.reshape(rescaled_coins.shape)\n\n    plt.figure(figsize=(5, 5))\n    plt.imshow(rescaled_coins, cmap=plt.cm.gray)\n    for l in range(N_REGIONS):\n        plt.contour(labels == l,\n                    colors=[plt.cm.nipy_spectral(l / float(N_REGIONS))])\n    plt.xticks(())\n    plt.yticks(())\n    title = 'Spectral clustering: %s, %.2fs' % (assign_labels, (t1 - t0))\n    print(title)\n    plt.title(title)\nplt.show()"
+        "for assign_labels in (\"kmeans\", \"discretize\"):\n    t0 = time.time()\n    labels = spectral_clustering(\n        graph, n_clusters=N_REGIONS, assign_labels=assign_labels, random_state=42\n    )\n    t1 = time.time()\n    labels = labels.reshape(rescaled_coins.shape)\n\n    plt.figure(figsize=(5, 5))\n    plt.imshow(rescaled_coins, cmap=plt.cm.gray)\n    for l in range(N_REGIONS):\n        plt.contour(labels == l, colors=[plt.cm.nipy_spectral(l / float(N_REGIONS))])\n    plt.xticks(())\n    plt.yticks(())\n    title = \"Spectral clustering: %s, %.2fs\" % (assign_labels, (t1 - t0))\n    print(title)\n    plt.title(title)\nplt.show()"
       ]
     }
   ],
 
@@ -30,8 +30,7 @@
 # Load data
 iris = load_iris()
 
-for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3],
-                                [1, 2], [1, 3], [2, 3]]):
+for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]):
     # We only take the two corresponding features
     X = iris.data[:, pair]
     y = iris.target
@@ -44,8 +43,9 @@
 
     x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
     y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
-    xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
-                         np.arange(y_min, y_max, plot_step))
+    xx, yy = np.meshgrid(
+        np.arange(x_min, x_max, plot_step), np.arange(y_min, y_max, plot_step)
+    )
     plt.tight_layout(h_pad=0.5, w_pad=0.5, pad=2.5)
 
     Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
@@ -58,11 +58,18 @@
     # Plot the training points
     for i, color in zip(range(n_classes), plot_colors):
         idx = np.where(y == i)
-        plt.scatter(X[idx, 0], X[idx, 1], c=color, label=iris.target_names[i],
-                    cmap=plt.cm.RdYlBu, edgecolor='black', s=15)
+        plt.scatter(
+            X[idx, 0],
+            X[idx, 1],
+            c=color,
+            label=iris.target_names[i],
+            cmap=plt.cm.RdYlBu,
+            edgecolor="black",
+            s=15,
+        )
 
 plt.suptitle("Decision surface of a decision tree using paired features")
-plt.legend(loc='lower right', borderpad=0, handletextpad=0)
+plt.legend(loc="lower right", borderpad=0, handletextpad=0)
 plt.axis("tight")
 
 plt.figure()
 
@@ -50,9 +50,11 @@
 
 pipeline = Pipeline([("features", combined_features), ("svm", svm)])
 
-param_grid = dict(features__pca__n_components=[1, 2, 3],
-                  features__univ_select__k=[1, 2],
-                  svm__C=[0.1, 1, 10])
+param_grid = dict(
+    features__pca__n_components=[1, 2, 3],
+    features__univ_select__k=[1, 2],
+    svm__C=[0.1, 1, 10],
+)
 
 grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=10)
 grid_search.fit(X, y)
 
@@ -20,7 +20,7 @@
 
 np.random.seed(0)
 
-X, y = make_circles(n_samples=400, factor=.3, noise=.05)
+X, y = make_circles(n_samples=400, factor=0.3, noise=0.05)
 
 kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10)
 X_kpca = kpca.fit_transform(X)
@@ -31,47 +31,39 @@
 # Plot results
 
 plt.figure()
-plt.subplot(2, 2, 1, aspect='equal')
+plt.subplot(2, 2, 1, aspect="equal")
 plt.title("Original space")
 reds = y == 0
 blues = y == 1
 
-plt.scatter(X[reds, 0], X[reds, 1], c="red",
-            s=20, edgecolor='k')
-plt.scatter(X[blues, 0], X[blues, 1], c="blue",
-            s=20, edgecolor='k')
+plt.scatter(X[reds, 0], X[reds, 1], c="red", s=20, edgecolor="k")
+plt.scatter(X[blues, 0], X[blues, 1], c="blue", s=20, edgecolor="k")
 plt.xlabel("$x_1$")
 plt.ylabel("$x_2$")
 
 X1, X2 = np.meshgrid(np.linspace(-1.5, 1.5, 50), np.linspace(-1.5, 1.5, 50))
 X_grid = np.array([np.ravel(X1), np.ravel(X2)]).T
 # projection on the first principal component (in the phi space)
 Z_grid = kpca.transform(X_grid)[:, 0].reshape(X1.shape)
-plt.contour(X1, X2, Z_grid, colors='grey', linewidths=1, origin='lower')
+plt.contour(X1, X2, Z_grid, colors="grey", linewidths=1, origin="lower")
 
-plt.subplot(2, 2, 2, aspect='equal')
-plt.scatter(X_pca[reds, 0], X_pca[reds, 1], c="red",
-            s=20, edgecolor='k')
-plt.scatter(X_pca[blues, 0], X_pca[blues, 1], c="blue",
-            s=20, edgecolor='k')
+plt.subplot(2, 2, 2, aspect="equal")
+plt.scatter(X_pca[reds, 0], X_pca[reds, 1], c="red", s=20, edgecolor="k")
+plt.scatter(X_pca[blues, 0], X_pca[blues, 1], c="blue", s=20, edgecolor="k")
 plt.title("Projection by PCA")
 plt.xlabel("1st principal component")
 plt.ylabel("2nd component")
 
-plt.subplot(2, 2, 3, aspect='equal')
-plt.scatter(X_kpca[reds, 0], X_kpca[reds, 1], c="red",
-            s=20, edgecolor='k')
-plt.scatter(X_kpca[blues, 0], X_kpca[blues, 1], c="blue",
-            s=20, edgecolor='k')
+plt.subplot(2, 2, 3, aspect="equal")
+plt.scatter(X_kpca[reds, 0], X_kpca[reds, 1], c="red", s=20, edgecolor="k")
+plt.scatter(X_kpca[blues, 0], X_kpca[blues, 1], c="blue", s=20, edgecolor="k")
 plt.title("Projection by KPCA")
 plt.xlabel(r"1st principal component in space induced by $\phi$")
 plt.ylabel("2nd component")
 
-plt.subplot(2, 2, 4, aspect='equal')
-plt.scatter(X_back[reds, 0], X_back[reds, 1], c="red",
-            s=20, edgecolor='k')
-plt.scatter(X_back[blues, 0], X_back[blues, 1], c="blue",
-            s=20, edgecolor='k')
+plt.subplot(2, 2, 4, aspect="equal")
+plt.scatter(X_back[reds, 0], X_back[reds, 1], c="red", s=20, edgecolor="k")
+plt.scatter(X_back[blues, 0], X_back[blues, 1], c="blue", s=20, edgecolor="k")
 plt.title("Original space after inverse transform")
 plt.xlabel("$x_1$")
 plt.ylabel("$x_2$")
 
@@ -48,6 +48,7 @@
 # otherwise, we'll improvise later...
 try:
     from mpl_toolkits.basemap import Basemap
+
     basemap = True
 except ImportError:
     basemap = False
@@ -82,13 +83,14 @@ def construct_grids(batch):
 
 # Get matrices/arrays of species IDs and locations
 data = fetch_species_distributions()
-species_names = ['Bradypus Variegatus', 'Microryzomys Minutus']
+species_names = ["Bradypus Variegatus", "Microryzomys Minutus"]
 
-Xtrain = np.vstack([data['train']['dd lat'],
-                    data['train']['dd long']]).T
-ytrain = np.array([d.decode('ascii').startswith('micro')
-                  for d in data['train']['species']], dtype='int')
-Xtrain *= np.pi / 180.  # Convert lat/long to radians
+Xtrain = np.vstack([data["train"]["dd lat"], data["train"]["dd long"]]).T
+ytrain = np.array(
+    [d.decode("ascii").startswith("micro") for d in data["train"]["species"]],
+    dtype="int",
+)
+Xtrain *= np.pi / 180.0  # Convert lat/long to radians
 
 # Set up the data grid for the contour plot
 xgrid, ygrid = construct_grids(data)
@@ -98,7 +100,7 @@ def construct_grids(batch):
 
 xy = np.vstack([Y.ravel(), X.ravel()]).T
 xy = xy[land_mask]
-xy *= np.pi / 180.
+xy *= np.pi / 180.0
 
 # Plot map of South America with distributions of each species
 fig = plt.figure()
@@ -109,12 +111,13 @@ def construct_grids(batch):
 
     # construct a kernel density estimate of the distribution
     print(" - computing KDE in spherical coordinates")
-    kde = KernelDensity(bandwidth=0.04, metric='haversine',
-                        kernel='gaussian', algorithm='ball_tree')
+    kde = KernelDensity(
+        bandwidth=0.04, metric="haversine", kernel="gaussian", algorithm="ball_tree"
+    )
     kde.fit(Xtrain[ytrain == i])
 
     # evaluate only on the land: -9999 indicates ocean
-    Z = np.full(land_mask.shape[0], -9999, dtype='int')
+    Z = np.full(land_mask.shape[0], -9999, dtype="int")
     Z[land_mask] = np.exp(kde.score_samples(xy))
     Z = Z.reshape(X.shape)
 
@@ -124,16 +127,21 @@ def construct_grids(batch):
 
     if basemap:
         print(" - plot coastlines using basemap")
-        m = Basemap(projection='cyl', llcrnrlat=Y.min(),
-                    urcrnrlat=Y.max(), llcrnrlon=X.min(),
-                    urcrnrlon=X.max(), resolution='c')
+        m = Basemap(
+            projection="cyl",
+            llcrnrlat=Y.min(),
+            urcrnrlat=Y.max(),
+            llcrnrlon=X.min(),
+            urcrnrlon=X.max(),
+            resolution="c",
+        )
         m.drawcoastlines()
         m.drawcountries()
     else:
         print(" - plot coastlines from coverage")
-        plt.contour(X, Y, land_reference,
-                    levels=[-9998], colors="k",
-                    linestyles="solid")
+        plt.contour(
+            X, Y, land_reference, levels=[-9998], colors="k", linestyles="solid"
+        )
         plt.xticks([])
         plt.yticks([])
 
 
@@ -26,7 +26,7 @@
 data = pca.fit_transform(digits.data)
 
 # use grid search cross-validation to optimize the bandwidth
-params = {'bandwidth': np.logspace(-1, 1, 20)}
+params = {"bandwidth": np.logspace(-1, 1, 20)}
 grid = GridSearchCV(KernelDensity(), params)
 grid.fit(data)
 
@@ -48,14 +48,16 @@
 for j in range(11):
     ax[4, j].set_visible(False)
     for i in range(4):
-        im = ax[i, j].imshow(real_data[i, j].reshape((8, 8)),
-                             cmap=plt.cm.binary, interpolation='nearest')
+        im = ax[i, j].imshow(
+            real_data[i, j].reshape((8, 8)), cmap=plt.cm.binary, interpolation="nearest"
+        )
         im.set_clim(0, 16)
-        im = ax[i + 5, j].imshow(new_data[i, j].reshape((8, 8)),
-                                 cmap=plt.cm.binary, interpolation='nearest')
+        im = ax[i + 5, j].imshow(
+            new_data[i, j].reshape((8, 8)), cmap=plt.cm.binary, interpolation="nearest"
+        )
         im.set_clim(0, 16)
 
-ax[0, 5].set_title('Selection from the input data')
+ax[0, 5].set_title("Selection from the input data")
 ax[5, 5].set_title('"New" digits drawn from the kernel density model')
 
 plt.show()
@@ -46,10 +46,12 @@ def lower_bound(cv_results):
         Lower bound within 1 standard deviation of the
         best `mean_test_score`.
     """
-    best_score_idx = np.argmax(cv_results['mean_test_score'])
+    best_score_idx = np.argmax(cv_results["mean_test_score"])
 
-    return (cv_results['mean_test_score'][best_score_idx]
-            - cv_results['std_test_score'][best_score_idx])
+    return (
+        cv_results["mean_test_score"][best_score_idx]
+        - cv_results["std_test_score"][best_score_idx]
+    )
 
 
 def best_low_complexity(cv_results):
@@ -69,48 +71,56 @@ def best_low_complexity(cv_results):
         `mean_test_score`.
     """
     threshold = lower_bound(cv_results)
-    candidate_idx = np.flatnonzero(cv_results['mean_test_score'] >= threshold)
-    best_idx = candidate_idx[cv_results['param_reduce_dim__n_components']
-                             [candidate_idx].argmin()]
+    candidate_idx = np.flatnonzero(cv_results["mean_test_score"] >= threshold)
+    best_idx = candidate_idx[
+        cv_results["param_reduce_dim__n_components"][candidate_idx].argmin()
+    ]
     return best_idx
 
 
-pipe = Pipeline([
-        ('reduce_dim', PCA(random_state=42)),
-        ('classify', LinearSVC(random_state=42, C=0.01)),
-])
+pipe = Pipeline(
+    [
+        ("reduce_dim", PCA(random_state=42)),
+        ("classify", LinearSVC(random_state=42, C=0.01)),
+    ]
+)
 
-param_grid = {
-    'reduce_dim__n_components': [6, 8, 10, 12, 14]
-}
+param_grid = {"reduce_dim__n_components": [6, 8, 10, 12, 14]}
 
-grid = GridSearchCV(pipe, cv=10, n_jobs=1, param_grid=param_grid,
-                    scoring='accuracy', refit=best_low_complexity)
+grid = GridSearchCV(
+    pipe,
+    cv=10,
+    n_jobs=1,
+    param_grid=param_grid,
+    scoring="accuracy",
+    refit=best_low_complexity,
+)
 X, y = load_digits(return_X_y=True)
 grid.fit(X, y)
 
-n_components = grid.cv_results_['param_reduce_dim__n_components']
-test_scores = grid.cv_results_['mean_test_score']
+n_components = grid.cv_results_["param_reduce_dim__n_components"]
+test_scores = grid.cv_results_["mean_test_score"]
 
 plt.figure()
-plt.bar(n_components, test_scores, width=1.3, color='b')
+plt.bar(n_components, test_scores, width=1.3, color="b")
 
 lower = lower_bound(grid.cv_results_)
-plt.axhline(np.max(test_scores), linestyle='--', color='y',
-            label='Best score')
-plt.axhline(lower, linestyle='--', color='.5', label='Best score - 1 std')
+plt.axhline(np.max(test_scores), linestyle="--", color="y", label="Best score")
+plt.axhline(lower, linestyle="--", color=".5", label="Best score - 1 std")
 
 plt.title("Balance model complexity and cross-validated score")
-plt.xlabel('Number of PCA components used')
-plt.ylabel('Digit classification accuracy')
+plt.xlabel("Number of PCA components used")
+plt.ylabel("Digit classification accuracy")
 plt.xticks(n_components.tolist())
 plt.ylim((0, 1.0))
-plt.legend(loc='upper left')
+plt.legend(loc="upper left")
 
 best_index_ = grid.best_index_
 
 print("The best_index_ is %d" % best_index_)
 print("The n_components selected is %d" % n_components[best_index_])
-print("The corresponding accuracy score is %.2f"
-      % grid.cv_results_['mean_test_score'][best_index_])
+print(
+    "The corresponding accuracy score is %.2f"
+    % grid.cv_results_["mean_test_score"][best_index_]
+)
 plt.show()
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "# Author: Rob Zinkov <rob at zinkov dot com>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import datasets\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import SGDClassifier, Perceptron\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import LogisticRegression\n\nheldout = [0.95, 0.90, 0.75, 0.50, 0.01]\nrounds = 20\nX, y = datasets.load_digits(return_X_y=True)\n\nclassifiers = [\n    (\"SGD\", SGDClassifier(max_iter=100)),\n    (\"ASGD\", SGDClassifier(average=True)),\n    (\"Perceptron\", Perceptron()),\n    (\"Passive-Aggressive I\", PassiveAggressiveClassifier(loss='hinge',\n                                                         C=1.0, tol=1e-4)),\n    (\"Passive-Aggressive II\", PassiveAggressiveClassifier(loss='squared_hinge',\n                                                          C=1.0, tol=1e-4)),\n    (\"SAG\", LogisticRegression(solver='sag', tol=1e-1, C=1.e4 / X.shape[0]))\n]\n\nxx = 1. - np.array(heldout)\n\nfor name, clf in classifiers:\n    print(\"training %s\" % name)\n    rng = np.random.RandomState(42)\n    yy = []\n    for i in heldout:\n        yy_ = []\n        for r in range(rounds):\n            X_train, X_test, y_train, y_test = \\\n                train_test_split(X, y, test_size=i, random_state=rng)\n            clf.fit(X_train, y_train)\n            y_pred = clf.predict(X_test)\n            yy_.append(1 - np.mean(y_pred == y_test))\n        yy.append(np.mean(yy_))\n    plt.plot(xx, yy, label=name)\n\nplt.legend(loc=\"upper right\")\nplt.xlabel(\"Proportion train\")\nplt.ylabel(\"Test Error Rate\")\nplt.show()"
+        "# Author: Rob Zinkov <rob at zinkov dot com>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import datasets\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import SGDClassifier, Perceptron\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import LogisticRegression\n\nheldout = [0.95, 0.90, 0.75, 0.50, 0.01]\nrounds = 20\nX, y = datasets.load_digits(return_X_y=True)\n\nclassifiers = [\n    (\"SGD\", SGDClassifier(max_iter=100)),\n    (\"ASGD\", SGDClassifier(average=True)),\n    (\"Perceptron\", Perceptron()),\n    (\n        \"Passive-Aggressive I\",\n        PassiveAggressiveClassifier(loss=\"hinge\", C=1.0, tol=1e-4),\n    ),\n    (\n        \"Passive-Aggressive II\",\n        PassiveAggressiveClassifier(loss=\"squared_hinge\", C=1.0, tol=1e-4),\n    ),\n    (\"SAG\", LogisticRegression(solver=\"sag\", tol=1e-1, C=1.0e4 / X.shape[0])),\n]\n\nxx = 1.0 - np.array(heldout)\n\nfor name, clf in classifiers:\n    print(\"training %s\" % name)\n    rng = np.random.RandomState(42)\n    yy = []\n    for i in heldout:\n        yy_ = []\n        for r in range(rounds):\n            X_train, X_test, y_train, y_test = train_test_split(\n                X, y, test_size=i, random_state=rng\n            )\n            clf.fit(X_train, y_train)\n            y_pred = clf.predict(X_test)\n            yy_.append(1 - np.mean(y_pred == y_test))\n        yy.append(np.mean(yy_))\n    plt.plot(xx, yy, label=name)\n\nplt.legend(loc=\"upper right\")\nplt.xlabel(\"Proportion train\")\nplt.ylabel(\"Test Error Rate\")\nplt.show()"
       ]
     }
   ],
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "print(__doc__)\n\n# Author: Gael Varoquaux <[email protected]>, Brian Cheung\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\nfrom scipy.ndimage.filters import gaussian_filter\nimport matplotlib.pyplot as plt\nimport skimage\nfrom skimage.data import coins\nfrom skimage.transform import rescale\n\nfrom sklearn.feature_extraction import image\nfrom sklearn.cluster import spectral_clustering\nfrom sklearn.utils.fixes import parse_version\n\n# these were introduced in skimage-0.14\nif parse_version(skimage.__version__) >= parse_version('0.14'):\n rescale_params = {'anti_aliasing': False, 'multichannel': False}\nelse:\n rescale_params = {}\n\n# load the coins as a numpy array\norig_coins = coins()\n\n# Resize it to 20% of the original size to speed up the processing\n# Applying a Gaussian filter for smoothing prior to down-scaling\n# reduces aliasing artifacts.\nsmoothened_coins = gaussian_filter(orig_coins, sigma=2)\nrescaled_coins = rescale(smoothened_coins, 0.2, mode=\"reflect\",\n *rescale_params)\n\n# Convert the image into a graph with the value of the gradient on the\n# edges.\ngraph = image.img_to_graph(rescaled_coins)\n\n# Take a decreasing function of the gradient: an exponential\n# The smaller beta is, the more independent the segmentation is of the\n# actual image. For beta=1, the segmentation is close to a voronoi\nbeta = 10\neps = 1e-6\ngraph.data = np.exp(-beta graph.data / graph.data.std()) + eps\n\n# Apply spectral clustering (this step goes much faster if you have pyamg\n# installed)\nN_REGIONS = 25"
	`29`	+ "print(__doc__)\n\n# Author: Gael Varoquaux <[email protected]>, Brian Cheung\n# License: BSD 3 clause\n\nimport time\n\nimport numpy as np\nfrom scipy.ndimage.filters import gaussian_filter\nimport matplotlib.pyplot as plt\nimport skimage\nfrom skimage.data import coins\nfrom skimage.transform import rescale\n\nfrom sklearn.feature_extraction import image\nfrom sklearn.cluster import spectral_clustering\nfrom sklearn.utils.fixes import parse_version\n\n# these were introduced in skimage-0.14\nif parse_version(skimage.__version__) >= parse_version(\"0.14\"):\n rescale_params = {\"anti_aliasing\": False, \"multichannel\": False}\nelse:\n rescale_params = {}\n\n# load the coins as a numpy array\norig_coins = coins()\n\n# Resize it to 20% of the original size to speed up the processing\n# Applying a Gaussian filter for smoothing prior to down-scaling\n# reduces aliasing artifacts.\nsmoothened_coins = gaussian_filter(orig_coins, sigma=2)\nrescaled_coins = rescale(smoothened_coins, 0.2, mode=\"reflect\", *rescale_params)\n\n# Convert the image into a graph with the value of the gradient on the\n# edges.\ngraph = image.img_to_graph(rescaled_coins)\n\n# Take a decreasing function of the gradient: an exponential\n# The smaller beta is, the more independent the segmentation is of the\n# actual image. For beta=1, the segmentation is close to a voronoi\nbeta = 10\neps = 1e-6\ngraph.data = np.exp(-beta graph.data / graph.data.std()) + eps\n\n# Apply spectral clustering (this step goes much faster if you have pyamg\n# installed)\nN_REGIONS = 25"
`30`	`30`	`]`
`31`	`31`	`},`
`32`	`32`	`{`
`@@ -44,7 +44,7 @@`
`44`	`44`	`},`
`45`	`45`	`"outputs": [],`
`46`	`46`	`"source": [`
`47`		- "for assign_labels in ('kmeans', 'discretize'):\n t0 = time.time()\n labels = spectral_clustering(graph, n_clusters=N_REGIONS,\n assign_labels=assign_labels, random_state=42)\n t1 = time.time()\n labels = labels.reshape(rescaled_coins.shape)\n\n plt.figure(figsize=(5, 5))\n plt.imshow(rescaled_coins, cmap=plt.cm.gray)\n for l in range(N_REGIONS):\n plt.contour(labels == l,\n colors=[plt.cm.nipy_spectral(l / float(N_REGIONS))])\n plt.xticks(())\n plt.yticks(())\n title = 'Spectral clustering: %s, %.2fs' % (assign_labels, (t1 - t0))\n print(title)\n plt.title(title)\nplt.show()"
	`47`	+ "for assign_labels in (\"kmeans\", \"discretize\"):\n t0 = time.time()\n labels = spectral_clustering(\n graph, n_clusters=N_REGIONS, assign_labels=assign_labels, random_state=42\n )\n t1 = time.time()\n labels = labels.reshape(rescaled_coins.shape)\n\n plt.figure(figsize=(5, 5))\n plt.imshow(rescaled_coins, cmap=plt.cm.gray)\n for l in range(N_REGIONS):\n plt.contour(labels == l, colors=[plt.cm.nipy_spectral(l / float(N_REGIONS))])\n plt.xticks(())\n plt.yticks(())\n title = \"Spectral clustering: %s, %.2fs\" % (assign_labels, (t1 - t0))\n print(title)\n plt.title(title)\nplt.show()"
`48`	`48`	`]`
`49`	`49`	`}`
`50`	`50`	`],`