{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# A demo of the mean-shift clustering algorithm\n\nReference:\n\nDorin Comaniciu and Peter Meer, \"Mean Shift: A robust approach toward\nfeature space analysis\". IEEE Transactions on Pattern Analysis and\nMachine Intelligence. 2002. pp. 603-619.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import numpy as np\nfrom sklearn.cluster import MeanShift, estimate_bandwidth\nfrom sklearn.datasets import make_blobs"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Generate sample data\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "centers = [[1, 1], [-1, -1], [1, -1]]\nX, _ = make_blobs(n_samples=10000, centers=centers, cluster_std=0.6)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Compute clustering with MeanShift\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# The following bandwidth can be automatically detected using\nbandwidth = estimate_bandwidth(X, quantile=0.2, n_samples=500)\n\nms = MeanShift(bandwidth=bandwidth, bin_seeding=True)\nms.fit(X)\nlabels = ms.labels_\ncluster_centers = ms.cluster_centers_\n\nlabels_unique = np.unique(labels)\nn_clusters_ = len(labels_unique)\n\nprint(\"number of estimated clusters : %d\" % n_clusters_)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Plot result\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import matplotlib.pyplot as plt\n\nplt.figure(1)\nplt.clf()\n\ncolors = [\"#dede00\", \"#377eb8\", \"#f781bf\"]\nmarkers = [\"x\", \"o\", \"^\"]\n\nfor k, col in zip(range(n_clusters_), colors):\n    my_members = labels == k\n    cluster_center = cluster_centers[k]\n    plt.plot(X[my_members, 0], X[my_members, 1], markers[k], color=col)\n    plt.plot(\n        cluster_center[0],\n        cluster_center[1],\n        markers[k],\n        markerfacecolor=col,\n        markeredgecolor=\"k\",\n        markersize=14,\n    )\nplt.title(\"Estimated number of clusters: %d\" % n_clusters_)\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.9.13"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}