{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# K-means Clustering\n\nThe plots display firstly what a K-means algorithm would yield\nusing three clusters. It is then shown what the effect of a bad\ninitialization is on the classification process:\nBy setting n_init to only 1 (default is 10), the amount of\ntimes that the algorithm will be run with different centroid\nseeds is reduced.\nThe next plot displays what using eight clusters would deliver\nand finally the ground truth.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n# Though the following import is not directly being used, it is required\n# for 3D projection to work\nfrom mpl_toolkits.mplot3d import Axes3D\n\nfrom sklearn.cluster import KMeans\nfrom sklearn import datasets\n\nnp.random.seed(5)\n\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\n\nestimators = [\n    (\"k_means_iris_8\", KMeans(n_clusters=8)),\n    (\"k_means_iris_3\", KMeans(n_clusters=3)),\n    (\"k_means_iris_bad_init\", KMeans(n_clusters=3, n_init=1, init=\"random\")),\n]\n\nfignum = 1\ntitles = [\"8 clusters\", \"3 clusters\", \"3 clusters, bad initialization\"]\nfor name, est in estimators:\n    fig = plt.figure(fignum, figsize=(4, 3))\n    ax = Axes3D(fig, rect=[0, 0, 0.95, 1], elev=48, azim=134)\n    est.fit(X)\n    labels = est.labels_\n\n    ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=labels.astype(float), edgecolor=\"k\")\n\n    ax.w_xaxis.set_ticklabels([])\n    ax.w_yaxis.set_ticklabels([])\n    ax.w_zaxis.set_ticklabels([])\n    ax.set_xlabel(\"Petal width\")\n    ax.set_ylabel(\"Sepal length\")\n    ax.set_zlabel(\"Petal length\")\n    ax.set_title(titles[fignum - 1])\n    ax.dist = 12\n    fignum = fignum + 1\n\n# Plot the ground truth\nfig = plt.figure(fignum, figsize=(4, 3))\nax = Axes3D(fig, rect=[0, 0, 0.95, 1], elev=48, azim=134)\n\nfor name, label in [(\"Setosa\", 0), (\"Versicolour\", 1), (\"Virginica\", 2)]:\n    ax.text3D(\n        X[y == label, 3].mean(),\n        X[y == label, 0].mean(),\n        X[y == label, 2].mean() + 2,\n        name,\n        horizontalalignment=\"center\",\n        bbox=dict(alpha=0.2, edgecolor=\"w\", facecolor=\"w\"),\n    )\n# Reorder the labels to have colors matching the cluster results\ny = np.choose(y, [1, 2, 0]).astype(float)\nax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y, edgecolor=\"k\")\n\nax.w_xaxis.set_ticklabels([])\nax.w_yaxis.set_ticklabels([])\nax.w_zaxis.set_ticklabels([])\nax.set_xlabel(\"Petal width\")\nax.set_ylabel(\"Sepal length\")\nax.set_zlabel(\"Petal length\")\nax.set_title(\"Ground Truth\")\nax.dist = 12\n\nfig.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.9.7"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}