{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n# Plot class probabilities calculated by the VotingClassifier\n\n.. currentmodule:: sklearn\n\nPlot the class probabilities of the first sample in a toy dataset predicted by\nthree different classifiers and averaged by the\n:class:`~ensemble.VotingClassifier`.\n\nFirst, three examplary classifiers are initialized\n(:class:`~linear_model.LogisticRegression`, :class:`~naive_bayes.GaussianNB`,\nand :class:`~ensemble.RandomForestClassifier`) and used to initialize a\nsoft-voting :class:`~ensemble.VotingClassifier` with weights `[1, 1, 5]`, which\nmeans that the predicted probabilities of the\n:class:`~ensemble.RandomForestClassifier` count 5 times as much as the weights\nof the other classifiers when the averaged probability is calculated.\n\nTo visualize the probability weighting, we fit each classifier on the training\nset and plot the predicted class probabilities for the first sample in this\nexample dataset.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.ensemble import VotingClassifier\n\nclf1 = LogisticRegression(max_iter=1000, random_state=123)\nclf2 = RandomForestClassifier(n_estimators=100, random_state=123)\nclf3 = GaussianNB()\nX = np.array([[-1.0, -1.0], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])\ny = np.array([1, 1, 2, 2])\n\neclf = VotingClassifier(\n estimators=[(\"lr\", clf1), (\"rf\", clf2), (\"gnb\", clf3)],\n voting=\"soft\",\n weights=[1, 1, 5],\n)\n\n# predict class probabilities for all classifiers\nprobas = [c.fit(X, y).predict_proba(X) for c in (clf1, clf2, clf3, eclf)]\n\n# get class probabilities for the first sample in the dataset\nclass1_1 = [pr[0, 0] for pr in probas]\nclass2_1 = [pr[0, 1] for pr in probas]\n\n\n# plotting\n\nN = 4 # number of groups\nind = np.arange(N) # group positions\nwidth = 0.35 # bar width\n\nfig, ax = plt.subplots()\n\n# bars for classifier 1-3\np1 = ax.bar(ind, np.hstack(([class1_1[:-1], [0]])), width, color=\"green\", edgecolor=\"k\")\np2 = ax.bar(\n ind + width,\n np.hstack(([class2_1[:-1], [0]])),\n width,\n color=\"lightgreen\",\n edgecolor=\"k\",\n)\n\n# bars for VotingClassifier\np3 = ax.bar(ind, [0, 0, 0, class1_1[-1]], width, color=\"blue\", edgecolor=\"k\")\np4 = ax.bar(\n ind + width, [0, 0, 0, class2_1[-1]], width, color=\"steelblue\", edgecolor=\"k\"\n)\n\n# plot annotations\nplt.axvline(2.8, color=\"k\", linestyle=\"dashed\")\nax.set_xticks(ind + width)\nax.set_xticklabels(\n [\n \"LogisticRegression\\nweight 1\",\n \"GaussianNB\\nweight 1\",\n \"RandomForestClassifier\\nweight 5\",\n \"VotingClassifier\\n(average probabilities)\",\n ],\n rotation=40,\n ha=\"right\",\n)\nplt.ylim([0, 1])\nplt.title(\"Class probabilities for sample 1 by different classifiers\")\nplt.legend([p1[0], p2[0]], [\"class 1\", \"class 2\"], loc=\"upper left\")\nplt.tight_layout()\nplt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 0 }