Skip to content

Commit a14c627

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 798e4339b02214de745c3d46a02d878fa34b1575
1 parent 36e5adb commit a14c627

File tree

1,346 files changed

+7190
-7202
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,346 files changed

+7190
-7202
lines changed

Diff for: dev/.buildinfo

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: 39c816aaa582b3ac6dda7a6bace93052
3+
config: 30edc959a4dbded8e664e342536c9952
44
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file not shown.

Diff for: dev/_downloads/18eb95af29bd5554020a8428b3ceac54/plot_cluster_iris.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
},
1616
"outputs": [],
1717
"source": [
18-
"# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\n\n# Though the following import is not directly being used, it is required\n# for 3D projection to work with matplotlib < 3.2\nimport mpl_toolkits.mplot3d # noqa: F401\nimport numpy as np\n\nfrom sklearn import datasets\nfrom sklearn.cluster import KMeans\n\nnp.random.seed(5)\n\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\n\nestimators = [\n (\"k_means_iris_8\", KMeans(n_clusters=8, n_init=\"auto\")),\n (\"k_means_iris_3\", KMeans(n_clusters=3, n_init=\"auto\")),\n (\"k_means_iris_bad_init\", KMeans(n_clusters=3, n_init=1, init=\"random\")),\n]\n\nfig = plt.figure(figsize=(10, 8))\ntitles = [\"8 clusters\", \"3 clusters\", \"3 clusters, bad initialization\"]\nfor idx, ((name, est), title) in enumerate(zip(estimators, titles)):\n ax = fig.add_subplot(2, 2, idx + 1, projection=\"3d\", elev=48, azim=134)\n est.fit(X)\n labels = est.labels_\n\n ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=labels.astype(float), edgecolor=\"k\")\n\n ax.xaxis.set_ticklabels([])\n ax.yaxis.set_ticklabels([])\n ax.zaxis.set_ticklabels([])\n ax.set_xlabel(\"Petal width\")\n ax.set_ylabel(\"Sepal length\")\n ax.set_zlabel(\"Petal length\")\n ax.set_title(title)\n\n# Plot the ground truth\nax = fig.add_subplot(2, 2, 4, projection=\"3d\", elev=48, azim=134)\n\nfor name, label in [(\"Setosa\", 0), (\"Versicolour\", 1), (\"Virginica\", 2)]:\n ax.text3D(\n X[y == label, 3].mean(),\n X[y == label, 0].mean(),\n X[y == label, 2].mean() + 2,\n name,\n horizontalalignment=\"center\",\n bbox=dict(alpha=0.2, edgecolor=\"w\", facecolor=\"w\"),\n )\n# Reorder the labels to have colors matching the cluster results\ny = np.choose(y, [1, 2, 0]).astype(float)\nax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y, edgecolor=\"k\")\n\nax.xaxis.set_ticklabels([])\nax.yaxis.set_ticklabels([])\nax.zaxis.set_ticklabels([])\nax.set_xlabel(\"Petal width\")\nax.set_ylabel(\"Sepal length\")\nax.set_zlabel(\"Petal length\")\nax.set_title(\"Ground Truth\")\n\nplt.subplots_adjust(wspace=0.25, hspace=0.25)\nplt.show()"
18+
"# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\n\n# Though the following import is not directly being used, it is required\n# for 3D projection to work with matplotlib < 3.2\nimport mpl_toolkits.mplot3d # noqa: F401\nimport numpy as np\n\nfrom sklearn import datasets\nfrom sklearn.cluster import KMeans\n\nnp.random.seed(5)\n\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\n\nestimators = [\n (\"k_means_iris_8\", KMeans(n_clusters=8)),\n (\"k_means_iris_3\", KMeans(n_clusters=3)),\n (\"k_means_iris_bad_init\", KMeans(n_clusters=3, n_init=1, init=\"random\")),\n]\n\nfig = plt.figure(figsize=(10, 8))\ntitles = [\"8 clusters\", \"3 clusters\", \"3 clusters, bad initialization\"]\nfor idx, ((name, est), title) in enumerate(zip(estimators, titles)):\n ax = fig.add_subplot(2, 2, idx + 1, projection=\"3d\", elev=48, azim=134)\n est.fit(X)\n labels = est.labels_\n\n ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=labels.astype(float), edgecolor=\"k\")\n\n ax.xaxis.set_ticklabels([])\n ax.yaxis.set_ticklabels([])\n ax.zaxis.set_ticklabels([])\n ax.set_xlabel(\"Petal width\")\n ax.set_ylabel(\"Sepal length\")\n ax.set_zlabel(\"Petal length\")\n ax.set_title(title)\n\n# Plot the ground truth\nax = fig.add_subplot(2, 2, 4, projection=\"3d\", elev=48, azim=134)\n\nfor name, label in [(\"Setosa\", 0), (\"Versicolour\", 1), (\"Virginica\", 2)]:\n ax.text3D(\n X[y == label, 3].mean(),\n X[y == label, 0].mean(),\n X[y == label, 2].mean() + 2,\n name,\n horizontalalignment=\"center\",\n bbox=dict(alpha=0.2, edgecolor=\"w\", facecolor=\"w\"),\n )\n# Reorder the labels to have colors matching the cluster results\ny = np.choose(y, [1, 2, 0]).astype(float)\nax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y, edgecolor=\"k\")\n\nax.xaxis.set_ticklabels([])\nax.yaxis.set_ticklabels([])\nax.zaxis.set_ticklabels([])\nax.set_xlabel(\"Petal width\")\nax.set_ylabel(\"Sepal length\")\nax.set_zlabel(\"Petal length\")\nax.set_title(\"Ground Truth\")\n\nplt.subplots_adjust(wspace=0.25, hspace=0.25)\nplt.show()"
1919
]
2020
}
2121
],

Diff for: dev/_downloads/2434f000f4405168e6285a3e410c709f/plot_kmeans_silhouette_analysis.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
},
1616
"outputs": [],
1717
"source": [
18-
"import matplotlib.cm as cm\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.cluster import KMeans\nfrom sklearn.datasets import make_blobs\nfrom sklearn.metrics import silhouette_samples, silhouette_score\n\n# Generating the sample data from make_blobs\n# This particular setting has one distinct cluster and 3 clusters placed close\n# together.\nX, y = make_blobs(\n n_samples=500,\n n_features=2,\n centers=4,\n cluster_std=1,\n center_box=(-10.0, 10.0),\n shuffle=True,\n random_state=1,\n) # For reproducibility\n\nrange_n_clusters = [2, 3, 4, 5, 6]\n\nfor n_clusters in range_n_clusters:\n # Create a subplot with 1 row and 2 columns\n fig, (ax1, ax2) = plt.subplots(1, 2)\n fig.set_size_inches(18, 7)\n\n # The 1st subplot is the silhouette plot\n # The silhouette coefficient can range from -1, 1 but in this example all\n # lie within [-0.1, 1]\n ax1.set_xlim([-0.1, 1])\n # The (n_clusters+1)*10 is for inserting blank space between silhouette\n # plots of individual clusters, to demarcate them clearly.\n ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10])\n\n # Initialize the clusterer with n_clusters value and a random generator\n # seed of 10 for reproducibility.\n clusterer = KMeans(n_clusters=n_clusters, n_init=\"auto\", random_state=10)\n cluster_labels = clusterer.fit_predict(X)\n\n # The silhouette_score gives the average value for all the samples.\n # This gives a perspective into the density and separation of the formed\n # clusters\n silhouette_avg = silhouette_score(X, cluster_labels)\n print(\n \"For n_clusters =\",\n n_clusters,\n \"The average silhouette_score is :\",\n silhouette_avg,\n )\n\n # Compute the silhouette scores for each sample\n sample_silhouette_values = silhouette_samples(X, cluster_labels)\n\n y_lower = 10\n for i in range(n_clusters):\n # Aggregate the silhouette scores for samples belonging to\n # cluster i, and sort them\n ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i]\n\n ith_cluster_silhouette_values.sort()\n\n size_cluster_i = ith_cluster_silhouette_values.shape[0]\n y_upper = y_lower + size_cluster_i\n\n color = cm.nipy_spectral(float(i) / n_clusters)\n ax1.fill_betweenx(\n np.arange(y_lower, y_upper),\n 0,\n ith_cluster_silhouette_values,\n facecolor=color,\n edgecolor=color,\n alpha=0.7,\n )\n\n # Label the silhouette plots with their cluster numbers at the middle\n ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))\n\n # Compute the new y_lower for next plot\n y_lower = y_upper + 10 # 10 for the 0 samples\n\n ax1.set_title(\"The silhouette plot for the various clusters.\")\n ax1.set_xlabel(\"The silhouette coefficient values\")\n ax1.set_ylabel(\"Cluster label\")\n\n # The vertical line for average silhouette score of all the values\n ax1.axvline(x=silhouette_avg, color=\"red\", linestyle=\"--\")\n\n ax1.set_yticks([]) # Clear the yaxis labels / ticks\n ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])\n\n # 2nd Plot showing the actual clusters formed\n colors = cm.nipy_spectral(cluster_labels.astype(float) / n_clusters)\n ax2.scatter(\n X[:, 0], X[:, 1], marker=\".\", s=30, lw=0, alpha=0.7, c=colors, edgecolor=\"k\"\n )\n\n # Labeling the clusters\n centers = clusterer.cluster_centers_\n # Draw white circles at cluster centers\n ax2.scatter(\n centers[:, 0],\n centers[:, 1],\n marker=\"o\",\n c=\"white\",\n alpha=1,\n s=200,\n edgecolor=\"k\",\n )\n\n for i, c in enumerate(centers):\n ax2.scatter(c[0], c[1], marker=\"$%d$\" % i, alpha=1, s=50, edgecolor=\"k\")\n\n ax2.set_title(\"The visualization of the clustered data.\")\n ax2.set_xlabel(\"Feature space for the 1st feature\")\n ax2.set_ylabel(\"Feature space for the 2nd feature\")\n\n plt.suptitle(\n \"Silhouette analysis for KMeans clustering on sample data with n_clusters = %d\"\n % n_clusters,\n fontsize=14,\n fontweight=\"bold\",\n )\n\nplt.show()"
18+
"import matplotlib.cm as cm\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.cluster import KMeans\nfrom sklearn.datasets import make_blobs\nfrom sklearn.metrics import silhouette_samples, silhouette_score\n\n# Generating the sample data from make_blobs\n# This particular setting has one distinct cluster and 3 clusters placed close\n# together.\nX, y = make_blobs(\n n_samples=500,\n n_features=2,\n centers=4,\n cluster_std=1,\n center_box=(-10.0, 10.0),\n shuffle=True,\n random_state=1,\n) # For reproducibility\n\nrange_n_clusters = [2, 3, 4, 5, 6]\n\nfor n_clusters in range_n_clusters:\n # Create a subplot with 1 row and 2 columns\n fig, (ax1, ax2) = plt.subplots(1, 2)\n fig.set_size_inches(18, 7)\n\n # The 1st subplot is the silhouette plot\n # The silhouette coefficient can range from -1, 1 but in this example all\n # lie within [-0.1, 1]\n ax1.set_xlim([-0.1, 1])\n # The (n_clusters+1)*10 is for inserting blank space between silhouette\n # plots of individual clusters, to demarcate them clearly.\n ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10])\n\n # Initialize the clusterer with n_clusters value and a random generator\n # seed of 10 for reproducibility.\n clusterer = KMeans(n_clusters=n_clusters, random_state=10)\n cluster_labels = clusterer.fit_predict(X)\n\n # The silhouette_score gives the average value for all the samples.\n # This gives a perspective into the density and separation of the formed\n # clusters\n silhouette_avg = silhouette_score(X, cluster_labels)\n print(\n \"For n_clusters =\",\n n_clusters,\n \"The average silhouette_score is :\",\n silhouette_avg,\n )\n\n # Compute the silhouette scores for each sample\n sample_silhouette_values = silhouette_samples(X, cluster_labels)\n\n y_lower = 10\n for i in range(n_clusters):\n # Aggregate the silhouette scores for samples belonging to\n # cluster i, and sort them\n ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i]\n\n ith_cluster_silhouette_values.sort()\n\n size_cluster_i = ith_cluster_silhouette_values.shape[0]\n y_upper = y_lower + size_cluster_i\n\n color = cm.nipy_spectral(float(i) / n_clusters)\n ax1.fill_betweenx(\n np.arange(y_lower, y_upper),\n 0,\n ith_cluster_silhouette_values,\n facecolor=color,\n edgecolor=color,\n alpha=0.7,\n )\n\n # Label the silhouette plots with their cluster numbers at the middle\n ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))\n\n # Compute the new y_lower for next plot\n y_lower = y_upper + 10 # 10 for the 0 samples\n\n ax1.set_title(\"The silhouette plot for the various clusters.\")\n ax1.set_xlabel(\"The silhouette coefficient values\")\n ax1.set_ylabel(\"Cluster label\")\n\n # The vertical line for average silhouette score of all the values\n ax1.axvline(x=silhouette_avg, color=\"red\", linestyle=\"--\")\n\n ax1.set_yticks([]) # Clear the yaxis labels / ticks\n ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])\n\n # 2nd Plot showing the actual clusters formed\n colors = cm.nipy_spectral(cluster_labels.astype(float) / n_clusters)\n ax2.scatter(\n X[:, 0], X[:, 1], marker=\".\", s=30, lw=0, alpha=0.7, c=colors, edgecolor=\"k\"\n )\n\n # Labeling the clusters\n centers = clusterer.cluster_centers_\n # Draw white circles at cluster centers\n ax2.scatter(\n centers[:, 0],\n centers[:, 1],\n marker=\"o\",\n c=\"white\",\n alpha=1,\n s=200,\n edgecolor=\"k\",\n )\n\n for i, c in enumerate(centers):\n ax2.scatter(c[0], c[1], marker=\"$%d$\" % i, alpha=1, s=50, edgecolor=\"k\")\n\n ax2.set_title(\"The visualization of the clustered data.\")\n ax2.set_xlabel(\"Feature space for the 1st feature\")\n ax2.set_ylabel(\"Feature space for the 2nd feature\")\n\n plt.suptitle(\n \"Silhouette analysis for KMeans clustering on sample data with n_clusters = %d\"\n % n_clusters,\n fontsize=14,\n fontweight=\"bold\",\n )\n\nplt.show()"
1919
]
2020
}
2121
],

Diff for: dev/_downloads/4825fc8223d1af0f3b61080c3dea3a62/plot_faces_decomposition.py

-1
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,6 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
193193
batch_size=20,
194194
max_iter=50,
195195
random_state=rng,
196-
n_init="auto",
197196
)
198197
kmeans_estimator.fit(faces_centered)
199198
plot_gallery(

Diff for: dev/_downloads/586f6cb589cefcd68d55348630efbfa0/plot_kmeans_silhouette_analysis.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@
6969

7070
# Initialize the clusterer with n_clusters value and a random generator
7171
# seed of 10 for reproducibility.
72-
clusterer = KMeans(n_clusters=n_clusters, n_init="auto", random_state=10)
72+
clusterer = KMeans(n_clusters=n_clusters, random_state=10)
7373
cluster_labels = clusterer.fit_predict(X)
7474

7575
# The silhouette_score gives the average value for all the samples.
Binary file not shown.

0 commit comments

Comments
 (0)