{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n# Principal components analysis (PCA)\n\nThese figures aid in illustrating how a point cloud\ncan be very flat in one direction--which is where PCA\ncomes in to choose a direction that is not flat.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Authors: Gael Varoquaux\n# Jaques Grobler\n# Kevin Hughes\n# License: BSD 3 clause" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create the data\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy as np\n\nfrom scipy import stats\n\ne = np.exp(1)\nnp.random.seed(4)\n\n\ndef pdf(x):\n return 0.5 * (stats.norm(scale=0.25 / e).pdf(x) + stats.norm(scale=4 / e).pdf(x))\n\n\ny = np.random.normal(scale=0.5, size=(30000))\nx = np.random.normal(scale=0.5, size=(30000))\nz = np.random.normal(scale=0.1, size=len(x))\n\ndensity = pdf(x) * pdf(y)\npdf_z = pdf(5 * z)\n\ndensity *= pdf_z\n\na = x + y\nb = 2 * y\nc = a - b + z\n\nnorm = np.sqrt(a.var() + b.var())\na /= norm\nb /= norm" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Plot the figures\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.decomposition import PCA\n\nimport matplotlib.pyplot as plt\n\n# unused but required import for doing 3d projections with matplotlib < 3.2\nimport mpl_toolkits.mplot3d # noqa: F401\n\n\ndef plot_figs(fig_num, elev, azim):\n fig = plt.figure(fig_num, figsize=(4, 3))\n plt.clf()\n ax = fig.add_subplot(111, projection=\"3d\", elev=elev, azim=azim)\n ax.set_position([0, 0, 0.95, 1])\n\n ax.scatter(a[::10], b[::10], c[::10], c=density[::10], marker=\"+\", alpha=0.4)\n Y = np.c_[a, b, c]\n\n # Using SciPy's SVD, this would be:\n # _, pca_score, Vt = scipy.linalg.svd(Y, full_matrices=False)\n\n pca = PCA(n_components=3)\n pca.fit(Y)\n V = pca.components_.T\n\n x_pca_axis, y_pca_axis, z_pca_axis = 3 * V\n x_pca_plane = np.r_[x_pca_axis[:2], -x_pca_axis[1::-1]]\n y_pca_plane = np.r_[y_pca_axis[:2], -y_pca_axis[1::-1]]\n z_pca_plane = np.r_[z_pca_axis[:2], -z_pca_axis[1::-1]]\n x_pca_plane.shape = (2, 2)\n y_pca_plane.shape = (2, 2)\n z_pca_plane.shape = (2, 2)\n ax.plot_surface(x_pca_plane, y_pca_plane, z_pca_plane)\n ax.xaxis.set_ticklabels([])\n ax.yaxis.set_ticklabels([])\n ax.zaxis.set_ticklabels([])\n\n\nelev = -40\nazim = -80\nplot_figs(1, elev, azim)\n\nelev = 30\nazim = 20\nplot_figs(2, elev, azim)\n\nplt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 0 }