Skip to content

Commit 35df73f

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 3d16a21b6c9c6940509d98e5e0c030658f7c348c
1 parent 2dd9b40 commit 35df73f

File tree

1,266 files changed

+5078
-4728
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,266 files changed

+5078
-4728
lines changed
Binary file not shown.

dev/_downloads/215c560d29193ab9b0a495609bc74802/plot_monotonic_constraints.ipynb

+80-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,86 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"from sklearn.ensemble import HistGradientBoostingRegressor\nfrom sklearn.inspection import PartialDependenceDisplay\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n\nrng = np.random.RandomState(0)\n\nn_samples = 5000\nf_0 = rng.rand(n_samples)\nf_1 = rng.rand(n_samples)\nX = np.c_[f_0, f_1]\nnoise = rng.normal(loc=0.0, scale=0.01, size=n_samples)\n\n# y is positively correlated with f_0, and negatively correlated with f_1\ny = 5 * f_0 + np.sin(10 * np.pi * f_0) - 5 * f_1 - np.cos(10 * np.pi * f_1) + noise\n\nfig, ax = plt.subplots()\n\n\n# Without any constraint\ngbdt = HistGradientBoostingRegressor()\ngbdt.fit(X, y)\ndisp = PartialDependenceDisplay.from_estimator(\n gbdt,\n X,\n features=[0, 1],\n feature_names=(\n \"First feature\",\n \"Second feature\",\n ),\n line_kw={\"linewidth\": 4, \"label\": \"unconstrained\", \"color\": \"tab:blue\"},\n ax=ax,\n)\n\n# With monotonic increase (1) and a monotonic decrease (-1) constraints, respectively.\ngbdt = HistGradientBoostingRegressor(monotonic_cst=[1, -1])\ngbdt.fit(X, y)\n\nPartialDependenceDisplay.from_estimator(\n gbdt,\n X,\n features=[0, 1],\n line_kw={\"linewidth\": 4, \"label\": \"constrained\", \"color\": \"tab:orange\"},\n ax=disp.axes_,\n)\n\nfor f_idx in (0, 1):\n disp.axes_[0, f_idx].plot(\n X[:, f_idx], y, \"o\", alpha=0.3, zorder=-1, color=\"tab:green\"\n )\n disp.axes_[0, f_idx].set_ylim(-6, 6)\n\nplt.legend()\nfig.suptitle(\"Monotonic constraints effect on partial dependences\")\n\nplt.show()"
29+
"from sklearn.ensemble import HistGradientBoostingRegressor\nfrom sklearn.inspection import PartialDependenceDisplay\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n\nrng = np.random.RandomState(0)\n\nn_samples = 1000\nf_0 = rng.rand(n_samples)\nf_1 = rng.rand(n_samples)\nX = np.c_[f_0, f_1]\nnoise = rng.normal(loc=0.0, scale=0.01, size=n_samples)\n\n# y is positively correlated with f_0, and negatively correlated with f_1\ny = 5 * f_0 + np.sin(10 * np.pi * f_0) - 5 * f_1 - np.cos(10 * np.pi * f_1) + noise"
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {},
35+
"source": [
36+
"Fit a first model on this dataset without any constraints.\n\n"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {
43+
"collapsed": false
44+
},
45+
"outputs": [],
46+
"source": [
47+
"gbdt_no_cst = HistGradientBoostingRegressor()\ngbdt_no_cst.fit(X, y)"
48+
]
49+
},
50+
{
51+
"cell_type": "markdown",
52+
"metadata": {},
53+
"source": [
54+
"Fit a second model on this dataset with monotonic increase (1)\nand a monotonic decrease (-1) constraints, respectively.\n\n"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": null,
60+
"metadata": {
61+
"collapsed": false
62+
},
63+
"outputs": [],
64+
"source": [
65+
"gbdt_with_monotonic_cst = HistGradientBoostingRegressor(monotonic_cst=[1, -1])\ngbdt_with_monotonic_cst.fit(X, y)"
66+
]
67+
},
68+
{
69+
"cell_type": "markdown",
70+
"metadata": {},
71+
"source": [
72+
"Let's display the partial dependence of the predictions on the two features.\n\n"
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": null,
78+
"metadata": {
79+
"collapsed": false
80+
},
81+
"outputs": [],
82+
"source": [
83+
"fig, ax = plt.subplots()\ndisp = PartialDependenceDisplay.from_estimator(\n gbdt_no_cst,\n X,\n features=[0, 1],\n feature_names=(\n \"First feature\",\n \"Second feature\",\n ),\n line_kw={\"linewidth\": 4, \"label\": \"unconstrained\", \"color\": \"tab:blue\"},\n ax=ax,\n)\nPartialDependenceDisplay.from_estimator(\n gbdt_with_monotonic_cst,\n X,\n features=[0, 1],\n line_kw={\"linewidth\": 4, \"label\": \"constrained\", \"color\": \"tab:orange\"},\n ax=disp.axes_,\n)\n\nfor f_idx in (0, 1):\n disp.axes_[0, f_idx].plot(\n X[:, f_idx], y, \"o\", alpha=0.3, zorder=-1, color=\"tab:green\"\n )\n disp.axes_[0, f_idx].set_ylim(-6, 6)\n\nplt.legend()\nfig.suptitle(\"Monotonic constraints effect on partial dependences\")\nplt.show()"
84+
]
85+
},
86+
{
87+
"cell_type": "markdown",
88+
"metadata": {},
89+
"source": [
90+
"We can see that the predictions of the unconstrained model capture the\noscillations of the data while the constrained model follows the general\ntrend and ignores the local variations.\n\n"
91+
]
92+
},
93+
{
94+
"cell_type": "markdown",
95+
"metadata": {},
96+
"source": [
97+
"\n## Using feature names to specify monotonic constraints\n\nNote that if the training data has feature names, it's possible to specifiy the\nmonotonic constraints by passing a dictionary:\n\n"
98+
]
99+
},
100+
{
101+
"cell_type": "code",
102+
"execution_count": null,
103+
"metadata": {
104+
"collapsed": false
105+
},
106+
"outputs": [],
107+
"source": [
108+
"import pandas as pd\n\nX_df = pd.DataFrame(X, columns=[\"f_0\", \"f_1\"])\n\ngbdt_with_monotonic_cst_df = HistGradientBoostingRegressor(\n monotonic_cst={\"f_0\": 1, \"f_1\": -1}\n).fit(X_df, y)\n\nnp.allclose(\n gbdt_with_monotonic_cst_df.predict(X_df), gbdt_with_monotonic_cst.predict(X)\n)"
30109
]
31110
}
32111
],

0 commit comments

Comments
 (0)