{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n# Linear and Quadratic Discriminant Analysis with covariance ellipsoid\n\nThis example plots the covariance ellipsoids of each class and\ndecision boundary learned by LDA and QDA. The ellipsoids display\nthe double standard deviation for each class. With LDA, the\nstandard deviation is the same for all the classes, while each\nclass has its own standard deviation with QDA.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Colormap\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import matplotlib as mpl\nimport matplotlib.pyplot as plt\nfrom matplotlib import colors\n\ncmap = colors.LinearSegmentedColormap(\n \"red_blue_classes\",\n {\n \"red\": [(0, 1, 1), (1, 0.7, 0.7)],\n \"green\": [(0, 0.7, 0.7), (1, 0.7, 0.7)],\n \"blue\": [(0, 0.7, 0.7), (1, 1, 1)],\n },\n)\nplt.cm.register_cmap(cmap=cmap)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Datasets generation functions\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy as np\n\n\ndef dataset_fixed_cov():\n \"\"\"Generate 2 Gaussians samples with the same covariance matrix\"\"\"\n n, dim = 300, 2\n np.random.seed(0)\n C = np.array([[0.0, -0.23], [0.83, 0.23]])\n X = np.r_[\n np.dot(np.random.randn(n, dim), C),\n np.dot(np.random.randn(n, dim), C) + np.array([1, 1]),\n ]\n y = np.hstack((np.zeros(n), np.ones(n)))\n return X, y\n\n\ndef dataset_cov():\n \"\"\"Generate 2 Gaussians samples with different covariance matrices\"\"\"\n n, dim = 300, 2\n np.random.seed(0)\n C = np.array([[0.0, -1.0], [2.5, 0.7]]) * 2.0\n X = np.r_[\n np.dot(np.random.randn(n, dim), C),\n np.dot(np.random.randn(n, dim), C.T) + np.array([1, 4]),\n ]\n y = np.hstack((np.zeros(n), np.ones(n)))\n return X, y" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Plot functions\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from scipy import linalg\n\n\ndef plot_data(lda, X, y, y_pred, fig_index):\n splot = plt.subplot(2, 2, fig_index)\n if fig_index == 1:\n plt.title(\"Linear Discriminant Analysis\")\n plt.ylabel(\"Data with\\n fixed covariance\")\n elif fig_index == 2:\n plt.title(\"Quadratic Discriminant Analysis\")\n elif fig_index == 3:\n plt.ylabel(\"Data with\\n varying covariances\")\n\n tp = y == y_pred # True Positive\n tp0, tp1 = tp[y == 0], tp[y == 1]\n X0, X1 = X[y == 0], X[y == 1]\n X0_tp, X0_fp = X0[tp0], X0[~tp0]\n X1_tp, X1_fp = X1[tp1], X1[~tp1]\n\n # class 0: dots\n plt.scatter(X0_tp[:, 0], X0_tp[:, 1], marker=\".\", color=\"red\")\n plt.scatter(X0_fp[:, 0], X0_fp[:, 1], marker=\"x\", s=20, color=\"#990000\") # dark red\n\n # class 1: dots\n plt.scatter(X1_tp[:, 0], X1_tp[:, 1], marker=\".\", color=\"blue\")\n plt.scatter(\n X1_fp[:, 0], X1_fp[:, 1], marker=\"x\", s=20, color=\"#000099\"\n ) # dark blue\n\n # class 0 and 1 : areas\n nx, ny = 200, 100\n x_min, x_max = plt.xlim()\n y_min, y_max = plt.ylim()\n xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx), np.linspace(y_min, y_max, ny))\n Z = lda.predict_proba(np.c_[xx.ravel(), yy.ravel()])\n Z = Z[:, 1].reshape(xx.shape)\n plt.pcolormesh(\n xx, yy, Z, cmap=\"red_blue_classes\", norm=colors.Normalize(0.0, 1.0), zorder=0\n )\n plt.contour(xx, yy, Z, [0.5], linewidths=2.0, colors=\"white\")\n\n # means\n plt.plot(\n lda.means_[0][0],\n lda.means_[0][1],\n \"*\",\n color=\"yellow\",\n markersize=15,\n markeredgecolor=\"grey\",\n )\n plt.plot(\n lda.means_[1][0],\n lda.means_[1][1],\n \"*\",\n color=\"yellow\",\n markersize=15,\n markeredgecolor=\"grey\",\n )\n\n return splot\n\n\ndef plot_ellipse(splot, mean, cov, color):\n v, w = linalg.eigh(cov)\n u = w[0] / linalg.norm(w[0])\n angle = np.arctan(u[1] / u[0])\n angle = 180 * angle / np.pi # convert to degrees\n # filled Gaussian at 2 standard deviation\n ell = mpl.patches.Ellipse(\n mean,\n 2 * v[0] ** 0.5,\n 2 * v[1] ** 0.5,\n angle=180 + angle,\n facecolor=color,\n edgecolor=\"black\",\n linewidth=2,\n )\n ell.set_clip_box(splot.bbox)\n ell.set_alpha(0.2)\n splot.add_artist(ell)\n splot.set_xticks(())\n splot.set_yticks(())\n\n\ndef plot_lda_cov(lda, splot):\n plot_ellipse(splot, lda.means_[0], lda.covariance_, \"red\")\n plot_ellipse(splot, lda.means_[1], lda.covariance_, \"blue\")\n\n\ndef plot_qda_cov(qda, splot):\n plot_ellipse(splot, qda.means_[0], qda.covariance_[0], \"red\")\n plot_ellipse(splot, qda.means_[1], qda.covariance_[1], \"blue\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Plot\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.figure(figsize=(10, 8), facecolor=\"white\")\nplt.suptitle(\n \"Linear Discriminant Analysis vs Quadratic Discriminant Analysis\",\n y=0.98,\n fontsize=15,\n)\n\nfrom sklearn.discriminant_analysis import (\n LinearDiscriminantAnalysis,\n QuadraticDiscriminantAnalysis,\n)\n\nfor i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]):\n # Linear Discriminant Analysis\n lda = LinearDiscriminantAnalysis(solver=\"svd\", store_covariance=True)\n y_pred = lda.fit(X, y).predict(X)\n splot = plot_data(lda, X, y, y_pred, fig_index=2 * i + 1)\n plot_lda_cov(lda, splot)\n plt.axis(\"tight\")\n\n # Quadratic Discriminant Analysis\n qda = QuadraticDiscriminantAnalysis(store_covariance=True)\n y_pred = qda.fit(X, y).predict(X)\n splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2)\n plot_qda_cov(qda, splot)\n plt.axis(\"tight\")\n\nplt.tight_layout()\nplt.subplots_adjust(top=0.92)\nplt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" } }, "nbformat": 4, "nbformat_minor": 0 }