{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Automatic Relevance Determination Regression (ARD)\n\nFit regression model with Bayesian Ridge Regression.\n\nSee `bayesian_ridge_regression` for more information on the regressor.\n\nCompared to the OLS (ordinary least squares) estimator, the coefficient\nweights are slightly shifted toward zeros, which stabilises them.\n\nThe histogram of the estimated weights is very peaked, as a sparsity-inducing\nprior is implied on the weights.\n\nThe estimation of the model is done by iteratively maximizing the\nmarginal log-likelihood of the observations.\n\nWe also plot predictions and uncertainties for ARD\nfor one dimensional regression using polynomial feature expansion.\nNote the uncertainty starts going up on the right side of the plot.\nThis is because these test samples are outside of the range of the training\nsamples.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import stats\n\nfrom sklearn.linear_model import ARDRegression, LinearRegression\n\n# #############################################################################\n# Generating simulated data with Gaussian weights\n\n# Parameters of the example\nnp.random.seed(0)\nn_samples, n_features = 100, 100\n# Create Gaussian data\nX = np.random.randn(n_samples, n_features)\n# Create weights with a precision lambda_ of 4.\nlambda_ = 4.0\nw = np.zeros(n_features)\n# Only keep 10 weights of interest\nrelevant_features = np.random.randint(0, n_features, 10)\nfor i in relevant_features:\n    w[i] = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(lambda_))\n# Create noise with a precision alpha of 50.\nalpha_ = 50.0\nnoise = stats.norm.rvs(loc=0, scale=1.0 / np.sqrt(alpha_), size=n_samples)\n# Create the target\ny = np.dot(X, w) + noise\n\n# #############################################################################\n# Fit the ARD Regression\nclf = ARDRegression(compute_score=True)\nclf.fit(X, y)\n\nols = LinearRegression()\nols.fit(X, y)\n\n# #############################################################################\n# Plot the true weights, the estimated weights, the histogram of the\n# weights, and predictions with standard deviations\nplt.figure(figsize=(6, 5))\nplt.title(\"Weights of the model\")\nplt.plot(clf.coef_, color=\"darkblue\", linestyle=\"-\", linewidth=2, label=\"ARD estimate\")\nplt.plot(\n    ols.coef_, color=\"yellowgreen\", linestyle=\":\", linewidth=2, label=\"OLS estimate\"\n)\nplt.plot(w, color=\"orange\", linestyle=\"-\", linewidth=2, label=\"Ground truth\")\nplt.xlabel(\"Features\")\nplt.ylabel(\"Values of the weights\")\nplt.legend(loc=1)\n\nplt.figure(figsize=(6, 5))\nplt.title(\"Histogram of the weights\")\nplt.hist(clf.coef_, bins=n_features, color=\"navy\", log=True)\nplt.scatter(\n    clf.coef_[relevant_features],\n    np.full(len(relevant_features), 5.0),\n    color=\"gold\",\n    marker=\"o\",\n    label=\"Relevant features\",\n)\nplt.ylabel(\"Features\")\nplt.xlabel(\"Values of the weights\")\nplt.legend(loc=1)\n\nplt.figure(figsize=(6, 5))\nplt.title(\"Marginal log-likelihood\")\nplt.plot(clf.scores_, color=\"navy\", linewidth=2)\nplt.ylabel(\"Score\")\nplt.xlabel(\"Iterations\")\n\n\n# Plotting some predictions for polynomial regression\ndef f(x, noise_amount):\n    y = np.sqrt(x) * np.sin(x)\n    noise = np.random.normal(0, 1, len(x))\n    return y + noise_amount * noise\n\n\ndegree = 10\nX = np.linspace(0, 10, 100)\ny = f(X, noise_amount=1)\nclf_poly = ARDRegression(threshold_lambda=1e5)\nclf_poly.fit(np.vander(X, degree), y)\n\nX_plot = np.linspace(0, 11, 25)\ny_plot = f(X_plot, noise_amount=0)\ny_mean, y_std = clf_poly.predict(np.vander(X_plot, degree), return_std=True)\nplt.figure(figsize=(6, 5))\nplt.errorbar(X_plot, y_mean, y_std, color=\"navy\", label=\"Polynomial ARD\", linewidth=2)\nplt.plot(X_plot, y_plot, color=\"gold\", linewidth=2, label=\"Ground Truth\")\nplt.ylabel(\"Output y\")\nplt.xlabel(\"Feature X\")\nplt.legend(loc=\"lower left\")\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.9.7"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}