{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Cross-validation on diabetes Dataset Exercise\n\nA tutorial exercise which uses cross-validation with linear models.\n\nThis exercise is used in the `cv_estimators_tut` part of the\n`model_selection_tut` section of the `stat_learn_tut_index`.\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Load dataset and apply GridSearchCV\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn import datasets\nfrom sklearn.linear_model import Lasso\nfrom sklearn.model_selection import GridSearchCV\n\nX, y = datasets.load_diabetes(return_X_y=True)\nX = X[:150]\ny = y[:150]\n\nlasso = Lasso(random_state=0, max_iter=10000)\nalphas = np.logspace(-4, -0.5, 30)\n\ntuned_parameters = [{\"alpha\": alphas}]\nn_folds = 5\n\nclf = GridSearchCV(lasso, tuned_parameters, cv=n_folds, refit=False)\nclf.fit(X, y)\nscores = clf.cv_results_[\"mean_test_score\"]\nscores_std = clf.cv_results_[\"std_test_score\"]"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Plot error lines showing +/- std. errors of the scores\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "plt.figure().set_size_inches(8, 6)\nplt.semilogx(alphas, scores)\n\nstd_error = scores_std / np.sqrt(n_folds)\n\nplt.semilogx(alphas, scores + std_error, \"b--\")\nplt.semilogx(alphas, scores - std_error, \"b--\")\n\n# alpha=0.2 controls the translucency of the fill color\nplt.fill_between(alphas, scores + std_error, scores - std_error, alpha=0.2)\n\nplt.ylabel(\"CV score +/- std error\")\nplt.xlabel(\"alpha\")\nplt.axhline(np.max(scores), linestyle=\"--\", color=\".5\")\nplt.xlim([alphas[0], alphas[-1]])"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Bonus: how much can you trust the selection of alpha?\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# To answer this question we use the LassoCV object that sets its alpha\n# parameter automatically from the data by internal cross-validation (i.e. it\n# performs cross-validation on the training data it receives).\n# We use external cross-validation to see how much the automatically obtained\n# alphas differ across different cross-validation folds.\n\nfrom sklearn.linear_model import LassoCV\nfrom sklearn.model_selection import KFold\n\nlasso_cv = LassoCV(alphas=alphas, random_state=0, max_iter=10000)\nk_fold = KFold(3)\n\nprint(\"Answer to the bonus question:\", \"how much can you trust the selection of alpha?\")\nprint()\nprint(\"Alpha parameters maximising the generalization score on different\")\nprint(\"subsets of the data:\")\nfor k, (train, test) in enumerate(k_fold.split(X, y)):\n    lasso_cv.fit(X[train], y[train])\n    print(\n        \"[fold {0}] alpha: {1:.5f}, score: {2:.5f}\".format(\n            k, lasso_cv.alpha_, lasso_cv.score(X[test], y[test])\n        )\n    )\nprint()\nprint(\"Answer: Not very much since we obtained different alphas for different\")\nprint(\"subsets of the data and moreover, the scores for these alphas differ\")\nprint(\"quite substantially.\")\n\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.9.18"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}