- "# Authors: Vlad Niculae, Alexandre Gramfort\n# License: BSD 3 clause\n\nimport logging\nfrom time import time\n\nfrom numpy.random import RandomState\nimport matplotlib.pyplot as plt\n\nfrom sklearn.datasets import fetch_olivetti_faces\nfrom sklearn.cluster import MiniBatchKMeans\nfrom sklearn import decomposition\n\n# Display progress logs on stdout\nlogging.basicConfig(level=logging.INFO, format=\"%(asctime)s %(levelname)s %(message)s\")\nn_row, n_col = 2, 3\nn_components = n_row * n_col\nimage_shape = (64, 64)\nrng = RandomState(0)\n\n# #############################################################################\n# Load faces data\nfaces, _ = fetch_olivetti_faces(return_X_y=True, shuffle=True, random_state=rng)\nn_samples, n_features = faces.shape\n\n# global centering\nfaces_centered = faces - faces.mean(axis=0)\n\n# local centering\nfaces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1)\n\nprint(\"Dataset consists of %d faces\" % n_samples)\n\n\ndef plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):\n plt.figure(figsize=(2.0 * n_col, 2.26 * n_row))\n plt.suptitle(title, size=16)\n for i, comp in enumerate(images):\n plt.subplot(n_row, n_col, i + 1)\n vmax = max(comp.max(), -comp.min())\n plt.imshow(\n comp.reshape(image_shape),\n cmap=cmap,\n interpolation=\"nearest\",\n vmin=-vmax,\n vmax=vmax,\n )\n plt.xticks(())\n plt.yticks(())\n plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.0)\n\n\n# #############################################################################\n# List of the different estimators, whether to center and transpose the\n# problem, and whether the transformer uses the clustering API.\nestimators = [\n (\n \"Eigenfaces - PCA using randomized SVD\",\n decomposition.PCA(\n n_components=n_components, svd_solver=\"randomized\", whiten=True\n ),\n True,\n ),\n (\n \"Non-negative components - NMF\",\n decomposition.NMF(n_components=n_components, init=\"nndsvda\", tol=5e-3),\n False,\n ),\n (\n \"Independent components - FastICA\",\n decomposition.FastICA(n_components=n_components, whiten=True),\n True,\n ),\n (\n \"Sparse comp. - MiniBatchSparsePCA\",\n decomposition.MiniBatchSparsePCA(\n n_components=n_components,\n alpha=0.8,\n n_iter=100,\n batch_size=3,\n random_state=rng,\n ),\n True,\n ),\n (\n \"MiniBatchDictionaryLearning\",\n decomposition.MiniBatchDictionaryLearning(\n n_components=15, alpha=0.1, n_iter=50, batch_size=3, random_state=rng\n ),\n True,\n ),\n (\n \"Cluster centers - MiniBatchKMeans\",\n MiniBatchKMeans(\n n_clusters=n_components,\n tol=1e-3,\n batch_size=20,\n max_iter=50,\n random_state=rng,\n ),\n True,\n ),\n (\n \"Factor Analysis components - FA\",\n decomposition.FactorAnalysis(n_components=n_components, max_iter=20),\n True,\n ),\n]\n\n\n# #############################################################################\n# Plot a sample of the input data\n\nplot_gallery(\"First centered Olivetti faces\", faces_centered[:n_components])\n\n# #############################################################################\n# Do the estimation and plot it\n\nfor name, estimator, center in estimators:\n print(\"Extracting the top %d %s...\" % (n_components, name))\n t0 = time()\n data = faces\n if center:\n data = faces_centered\n estimator.fit(data)\n train_time = time() - t0\n print(\"done in %0.3fs\" % train_time)\n if hasattr(estimator, \"cluster_centers_\"):\n components_ = estimator.cluster_centers_\n else:\n components_ = estimator.components_\n\n # Plot an image representing the pixelwise variance provided by the\n # estimator e.g its noise_variance_ attribute. The Eigenfaces estimator,\n # via the PCA decomposition, also provides a scalar noise_variance_\n # (the mean of pixelwise variance) that cannot be displayed as an image\n # so we skip it.\n if (\n hasattr(estimator, \"noise_variance_\") and estimator.noise_variance_.ndim > 0\n ): # Skip the Eigenfaces case\n plot_gallery(\n \"Pixelwise variance\",\n estimator.noise_variance_.reshape(1, -1),\n n_col=1,\n n_row=1,\n )\n plot_gallery(\n \"%s - Train time %.1fs\" % (name, train_time), components_[:n_components]\n )\n\nplt.show()\n\n# #############################################################################\n# Various positivity constraints applied to dictionary learning.\nestimators = [\n (\n \"Dictionary learning\",\n decomposition.MiniBatchDictionaryLearning(\n n_components=15, alpha=0.1, n_iter=50, batch_size=3, random_state=rng\n ),\n True,\n ),\n (\n \"Dictionary learning - positive dictionary\",\n decomposition.MiniBatchDictionaryLearning(\n n_components=15,\n alpha=0.1,\n n_iter=50,\n batch_size=3,\n random_state=rng,\n positive_dict=True,\n ),\n True,\n ),\n (\n \"Dictionary learning - positive code\",\n decomposition.MiniBatchDictionaryLearning(\n n_components=15,\n alpha=0.1,\n n_iter=50,\n batch_size=3,\n fit_algorithm=\"cd\",\n random_state=rng,\n positive_code=True,\n ),\n True,\n ),\n (\n \"Dictionary learning - positive dictionary & code\",\n decomposition.MiniBatchDictionaryLearning(\n n_components=15,\n alpha=0.1,\n n_iter=50,\n batch_size=3,\n fit_algorithm=\"cd\",\n random_state=rng,\n positive_dict=True,\n positive_code=True,\n ),\n True,\n ),\n]\n\n\n# #############################################################################\n# Plot a sample of the input data\n\nplot_gallery(\n \"First centered Olivetti faces\", faces_centered[:n_components], cmap=plt.cm.RdBu\n)\n\n# #############################################################################\n# Do the estimation and plot it\n\nfor name, estimator, center in estimators:\n print(\"Extracting the top %d %s...\" % (n_components, name))\n t0 = time()\n data = faces\n if center:\n data = faces_centered\n estimator.fit(data)\n train_time = time() - t0\n print(\"done in %0.3fs\" % train_time)\n components_ = estimator.components_\n plot_gallery(name, components_[:n_components], cmap=plt.cm.RdBu)\n\nplt.show()"
0 commit comments