{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n# K-means Clustering\n\nThe plots display firstly what a K-means algorithm would yield\nusing three clusters. It is then shown what the effect of a bad\ninitialization is on the classification process:\nBy setting n_init to only 1 (default is 10), the amount of\ntimes that the algorithm will be run with different centroid\nseeds is reduced.\nThe next plot displays what using eight clusters would deliver\nand finally the ground truth.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n# Though the following import is not directly being used, it is required\n# for 3D projection to work\nfrom mpl_toolkits.mplot3d import Axes3D\n\nfrom sklearn.cluster import KMeans\nfrom sklearn import datasets\n\nnp.random.seed(5)\n\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\n\nestimators = [\n (\"k_means_iris_8\", KMeans(n_clusters=8)),\n (\"k_means_iris_3\", KMeans(n_clusters=3)),\n (\"k_means_iris_bad_init\", KMeans(n_clusters=3, n_init=1, init=\"random\")),\n]\n\nfignum = 1\ntitles = [\"8 clusters\", \"3 clusters\", \"3 clusters, bad initialization\"]\nfor name, est in estimators:\n fig = plt.figure(fignum, figsize=(4, 3))\n ax = Axes3D(fig, rect=[0, 0, 0.95, 1], elev=48, azim=134)\n est.fit(X)\n labels = est.labels_\n\n ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=labels.astype(float), edgecolor=\"k\")\n\n ax.w_xaxis.set_ticklabels([])\n ax.w_yaxis.set_ticklabels([])\n ax.w_zaxis.set_ticklabels([])\n ax.set_xlabel(\"Petal width\")\n ax.set_ylabel(\"Sepal length\")\n ax.set_zlabel(\"Petal length\")\n ax.set_title(titles[fignum - 1])\n ax.dist = 12\n fignum = fignum + 1\n\n# Plot the ground truth\nfig = plt.figure(fignum, figsize=(4, 3))\nax = Axes3D(fig, rect=[0, 0, 0.95, 1], elev=48, azim=134)\n\nfor name, label in [(\"Setosa\", 0), (\"Versicolour\", 1), (\"Virginica\", 2)]:\n ax.text3D(\n X[y == label, 3].mean(),\n X[y == label, 0].mean(),\n X[y == label, 2].mean() + 2,\n name,\n horizontalalignment=\"center\",\n bbox=dict(alpha=0.2, edgecolor=\"w\", facecolor=\"w\"),\n )\n# Reorder the labels to have colors matching the cluster results\ny = np.choose(y, [1, 2, 0]).astype(float)\nax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y, edgecolor=\"k\")\n\nax.w_xaxis.set_ticklabels([])\nax.w_yaxis.set_ticklabels([])\nax.w_zaxis.set_ticklabels([])\nax.set_xlabel(\"Petal width\")\nax.set_ylabel(\"Sepal length\")\nax.set_zlabel(\"Petal length\")\nax.set_title(\"Ground Truth\")\nax.dist = 12\n\nfig.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 0 }