diff --git a/.coveragerc b/.coveragerc
deleted file mode 100644
index 138dc4863..000000000
--- a/.coveragerc
+++ /dev/null
@@ -1,5 +0,0 @@
-[run]
-omit = mapie/_compatibility.py
-
-[report]
-omit = mapie/_compatibility.py
\ No newline at end of file
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index af866084c..6bf989626 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -6,6 +6,7 @@ on:
- dev
- main
- master
+ - v1
pull_request:
jobs:
@@ -14,18 +15,12 @@ jobs:
strategy:
matrix:
include:
- - os: ubuntu-latest
- python-version: "3.7"
- numpy-version: 1.21.4
- - os: ubuntu-latest
- python-version: "3.8"
- numpy-version: 1.21.4
- os: ubuntu-latest
python-version: "3.9"
- numpy-version: 1.21.4
+ numpy-version: "1.23"
- os: ubuntu-latest
python-version: "3.10"
- numpy-version: 1.22.4
+ numpy-version: "1.23"
- os: ubuntu-latest
python-version: "3.11"
numpy-version: 1.25.2
@@ -55,7 +50,11 @@ jobs:
run: make lint
- name: Check static typing
run: make type-check
- - name: Test with pytest
+ - name: Test and coverage with pytest
run: make coverage
- name: Code coverage
run: codecov
+ - name: v1 tests
+ run: make v1-tests
+ - name: v1 docstring tests
+ run: make v1-docstring-tests
diff --git a/.gitignore b/.gitignore
index 7c5f1ddae..ed7caa966 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,7 +49,7 @@ var/
pip-log.txt
pip-delete-this-directory.txt
-# Unit test / coverage reports
+# Tests / coverage reports
htmlcov/
.tox/
.coverage
@@ -59,6 +59,7 @@ nosetests.xml
coverage.xml
*,cover
.hypothesis/
+tests_v1/integration_tests/mapie_v0_package
# Translations
*.mo
diff --git a/CITATION.cff b/CITATION.cff
index 563ea7de5..f90d27484 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -5,7 +5,7 @@ authors:
given-names: "Thibault"
orcid: "https://orcid.org/0000-0000-0000-0000"
title: "MAPIE - Model Agnostic Prediction Interval Estimator"
-version: 0.9.2
+version: 1.0.0-alpha1
date-released: 2019-04-30
url: "https://github.com/scikit-learn-contrib/MAPIE"
preferred-citation:
diff --git a/Makefile b/Makefile
index c5e96d156..65b797b01 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,24 @@
+### Config ###
+
.PHONY: tests doc build
+mapie_v0_folder_name = mapie_v0_package
+
+
+### To run when working locally ###
+
+all-checks:
+ $(MAKE) lint
+ $(MAKE) type-check
+ $(MAKE) coverage
+
+v1-all-checks:
+ $(MAKE) v1-type-check
+ $(MAKE) v1-tests
+ $(MAKE) v1-docstring-tests
+ $(MAKE) lint
+
+
+### Checks that are run in GitHub CI ###
lint:
flake8 . --max-line-length=88 --exclude=doc
@@ -6,9 +26,6 @@ lint:
type-check:
mypy mapie
-tests:
- pytest -vs --doctest-modules mapie
-
coverage:
pytest -vsx \
--cov-branch \
@@ -19,12 +36,28 @@ coverage:
--cov-config=.coveragerc \
--no-cov-on-fail
+v1-tests:
+ python -m pytest -vs tests_v1
+
+v1-docstring-tests:
+ pytest -vs --doctest-modules mapie_v1
+
+
+### Checks that are run in ReadTheDocs CI ###
+
doc:
$(MAKE) html -C doc
doctest:
+ # Tests .. testcode:: blocks in documentation, among other things
$(MAKE) doctest -C doc
+
+### Local utilities ###
+
+tests:
+ pytest -vs --doctest-modules mapie
+
clean-doc:
$(MAKE) clean -C doc
@@ -39,3 +72,19 @@ clean:
rm -rf **__pycache__
$(MAKE) clean-build
$(MAKE) clean-doc
+
+
+### Local utilities (v1 specific) ###
+
+# Issues when trying to include it in CI, see related task on the project board
+v1-type-check:
+ mypy mapie_v1 --disallow-untyped-defs --exclude $(mapie_v0_folder_name)
+
+v1-coverage:
+ pytest -vsx \
+ --cov-branch \
+ --cov=mapie_v1 \
+ --cov-report term-missing \
+ --pyargs tests_v1 \
+ --cov-fail-under=100 \
+ --no-cov-on-fail
diff --git a/README.rst b/README.rst
index 815d12b46..d0f3a24b0 100644
--- a/README.rst
+++ b/README.rst
@@ -58,8 +58,8 @@ Here's a quick instantiation of MAPIE models for regression and classification p
.. code:: python
# Uncertainty quantification for regression problem
- from mapie.regression import MapieRegressor
- mapie_regressor = MapieRegressor(estimator=regressor, method='plus', cv=5)
+ from mapie_v1.regression import SplitConformalRegressor
+ mapie_regressor = SplitConformalRegressor(estimator=regressor)
.. code:: python
@@ -79,8 +79,8 @@ Implemented methods in **MAPIE** respect three fundamental pillars:
🔗 Requirements
===============
-- **MAPIE** runs on Python 3.7+.
-- **MAPIE** stands on the shoulders of giants. Its only internal dependencies are `scikit-learn `_ and `numpy=>1.21 `_.
+- **MAPIE** runs on Python 3.9+.
+- **MAPIE** stands on the shoulders of giants. Its only internal dependencies are `scikit-learn `_ and `numpy=>1.22 `_.
🛠Installation
@@ -112,19 +112,21 @@ As **MAPIE** is compatible with the standard scikit-learn API, you can see that
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
+ from mapie_v1.regression import SplitConformalRegressor
- from mapie.regression import MapieRegressor
-
-
- X, y = make_regression(n_samples=500, n_features=1)
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
+ X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=59)
+ X_train_conformalize, X_test, y_train_conformalize, y_test = train_test_split(X, y, test_size=0.5)
+ X_train, X_conformalize, y_train, y_conformalize = train_test_split(X_train_conformalize, y_train_conformalize, test_size=0.5)
regressor = LinearRegression()
+ regressor.fit(X_train, y_train)
+ mapie_regressor = SplitConformalRegressor(
+ regressor,
+ confidence_level=[0.95, 0.68],
+ )
+ mapie_regressor.conformalize(X_conformalize, y_conformalize)
- mapie_regressor = MapieRegressor(estimator=regressor, method='plus', cv=5)
-
- mapie_regressor = mapie_regressor.fit(X_train, y_train)
- y_pred, y_pis = mapie_regressor.predict(X_test, alpha=[0.05, 0.32])
+ y_pred, y_pred_intervals = mapie_regressor.predict_interval(X_test)
.. code:: python
@@ -239,7 +241,11 @@ MAPIE is free and open-source software licensed under the `license
+ index_mondrian
.. toctree::
:maxdepth: 2
diff --git a/doc/index_binary_classification.rst b/doc/index_binary_classification.rst
new file mode 100644
index 000000000..278d0c5db
--- /dev/null
+++ b/doc/index_binary_classification.rst
@@ -0,0 +1,8 @@
+The binary classification case
+================================
+
+.. toctree::
+ :maxdepth: 2
+
+ examples_classification/4-tutorials/plot_main-tutorial-binary-classification
+ theoretical_description_binary_classification
\ No newline at end of file
diff --git a/doc/index_classification.rst b/doc/index_classification.rst
new file mode 100644
index 000000000..2d8e76f5f
--- /dev/null
+++ b/doc/index_classification.rst
@@ -0,0 +1,13 @@
+Prediction sets (classification)
+================================
+
+.. toctree::
+ :maxdepth: 2
+
+ choosing_the_right_algorithm_classification
+ examples_classification/4-tutorials/plot_main-tutorial-classification
+ examples_classification/4-tutorials/plot_crossconformal
+ examples_classification/index
+ notebooks_classification
+ theoretical_description_classification
+ index_binary_classification
\ No newline at end of file
diff --git a/doc/index_mondrian.rst b/doc/index_mondrian.rst
new file mode 100644
index 000000000..e937e9eca
--- /dev/null
+++ b/doc/index_mondrian.rst
@@ -0,0 +1,8 @@
+Mondrian: how to use prior knowledge on groups when measuring uncertainty?
+==============================================================================
+
+.. toctree::
+ :maxdepth: 2
+
+ theoretical_description_mondrian
+ examples_mondrian/1-quickstart/plot_main-tutorial-mondrian-regression
\ No newline at end of file
diff --git a/doc/index_regression.rst b/doc/index_regression.rst
new file mode 100644
index 000000000..0511e562e
--- /dev/null
+++ b/doc/index_regression.rst
@@ -0,0 +1,12 @@
+Prediction intervals (regression)
+=================================
+
+.. toctree::
+ :maxdepth: 2
+
+ choosing_the_right_algorithm_regression
+ examples_regression/1-quickstart/plot_toy_model
+ examples_regression/1-quickstart/plot_prefit
+ examples_regression/index
+ theoretical_description_regression
+ theoretical_description_conformity_scores
\ No newline at end of file
diff --git a/doc/notebooks_regression.rst b/doc/notebooks_regression.rst
deleted file mode 100755
index 24b8ce12e..000000000
--- a/doc/notebooks_regression.rst
+++ /dev/null
@@ -1,18 +0,0 @@
-Regression notebooks
-====================
-
-This section lists a series of Jupyter notebooks hosted on the MAPIE Github repository that can be run on Google Colab.
-
-
-1. Estimating the uncertainties of homoscedastic and heteroscedastic noisy data for tabular regression : `notebook `_
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-
-
-2. Estimating the uncertainties in the exoplanet masses : `exoplanet_notebook `_
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-
-
-3. Estimating prediction intervals for time series forecast with EnbPI and ACI : `ts_notebook `_
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-
-
diff --git a/doc/quick_start.rst b/doc/quick_start.rst
index d7f86b2da..6b1106a59 100644
--- a/doc/quick_start.rst
+++ b/doc/quick_start.rst
@@ -30,74 +30,73 @@ To install directly from the github repository :
pip install git+https://github.com/scikit-learn-contrib/MAPIE
-2. Run MapieRegressor
+2. Regression
=====================
-Let us start with a basic regression problem.
+Let us start with a basic regression problem.
Here, we generate one-dimensional noisy data that we fit with a linear model.
-.. code:: python
+..
+ Comment to developers: the following piece of code is heavily inspired by `examples/regression/1-quickstart/plot_toy_model.py`.
+ When updating it, please replicate the changes to this other file.
+
+.. testcode::
import numpy as np
- from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
- regressor = LinearRegression()
- X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=59)
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
+ X, y = make_regression(n_samples=500, n_features=1, noise=20)
-Since MAPIE is compliant with the standard scikit-learn API, we follow the standard
-sequential ``fit`` and ``predict`` process like any scikit-learn regressor.
-We set two values for alpha to estimate prediction intervals at approximately one
-and two standard deviations from the mean.
+ X_train, X_temp, y_train, y_temp = train_test_split(X, y)
+ X_test, X_conformalize, y_test, y_conformalize = train_test_split(X_temp, y_temp)
-.. code:: python
+ # We follow a sequential ``fit``, ``conformalize``, and ``predict`` process.
+ # We set the confidence level to estimate prediction intervals at approximately one and two
+ # standard deviation from the mean.
- from mapie.regression import MapieRegressor
+ from mapie_v1.regression import SplitConformalRegressor
- mapie_regressor = MapieRegressor(regressor)
+ mapie_regressor = SplitConformalRegressor(confidence_level=[0.95, 0.68], prefit=False)
mapie_regressor.fit(X_train, y_train)
+ mapie_regressor.conformalize(X_conformalize, y_conformalize)
- alpha = [0.05, 0.32]
- y_pred, y_pred_intervals = mapie_regressor.predict(X_test, alpha=alpha)
-
-MAPIE returns a tuple, the first element is a ``np.ndarray`` of shape ``(n_samples)`` giving the
-predictions, and the second element a ``np.ndarray`` of shape ``(n_samples, 2, len(alpha))`` giving
-the lower and upper bounds of the prediction intervals for the target quantile for each desired alpha value.
+ y_pred, y_pred_intervals = mapie_regressor.predict_interval(X_test)
-You can compute the coverage of your prediction intervals.
+ # MAPIE's ``predict`` method returns point predictions as a ``np.ndarray`` of shape ``(n_samples)``.
+ # The ``predict_set`` method returns prediction intervals as a ``np.ndarray`` of shape ``(n_samples, 2, 2)``
+ # giving the lower and upper bounds of the intervals for each confidence level.
-.. code:: python
-
- from mapie.metrics import regression_coverage_score_v2
+ # You can compute the coverage of your prediction intervals.
- coverage_scores = regression_coverage_score_v2(y_test, y_pis)
+ from mapie.metrics.regression import regression_coverage_score
-The estimated prediction intervals can then be plotted as follows.
+ coverage_scores = regression_coverage_score(y_test, y_pred_intervals)
-.. code:: python
+ # The estimated prediction intervals can then be plotted as follows.
from matplotlib import pyplot as plt
+ confidence_level = [0.95, 0.68]
+
plt.xlabel("x")
plt.ylabel("y")
plt.scatter(X, y, alpha=0.3)
plt.plot(X_test, y_pred, color="C1")
order = np.argsort(X_test[:, 0])
- plt.plot(X_test[order], y_pis[order][:, 0, 1], color="C1", ls="--")
- plt.plot(X_test[order], y_pis[order][:, 1, 1], color="C1", ls="--")
+ plt.plot(X_test[order], y_pred_intervals[order, 0], color="C1", ls="--")
+ plt.plot(X_test[order], y_pred_intervals[order, 1], color="C1", ls="--")
plt.fill_between(
X_test[order].ravel(),
- y_pis[order][:, 0, 0].ravel(),
- y_pis[order][:, 1, 0].ravel(),
+ y_pred_intervals[order][:, 0, 0].ravel(),
+ y_pred_intervals[order][:, 1, 0].ravel(),
alpha=0.2
)
plt.title(
- f"Target and effective coverages for "
- f"alpha={alpha[0]:.2f}: ({1-alpha[0]:.3f}, {coverage_scores[0]:.3f})\n"
- f"Target and effective coverages for "
- f"alpha={alpha[1]:.2f}: ({1-alpha[1]:.3f}, {coverage_scores[1]:.3f})"
+ f"Effective coverage for "
+ f"confidence_level={confidence_level[0]:.2f}: {coverage_scores[0]:.3f}\n"
+ f"Effective coverage for "
+ f"confidence_level={confidence_level[1]:.2f}: {coverage_scores[1]:.3f}"
)
plt.show()
@@ -106,10 +105,9 @@ The estimated prediction intervals can then be plotted as follows.
:align: center
The title of the plot compares the target coverages with the effective coverages.
-The target coverage, or the confidence interval, is the fraction of true labels lying in the
+The target coverage, or the confidence level, is the fraction of true labels lying in the
prediction intervals that we aim to obtain for a given dataset.
-It is given by the alpha parameter defined in ``MapieRegressor``, here equal to ``0.05`` and ``0.32``,
-thus giving target coverages of ``0.95`` and ``0.68``.
+It is given by the ``confidence_level`` parameter defined in ``SplitConformalRegressor``, here equal to ``0.95`` and ``0.68``.
The effective coverage is the actual fraction of true labels lying in the prediction intervals.
3. Run MapieClassifier
@@ -173,4 +171,4 @@ Similarly, it's possible to do the same for a basic classification problem.
.. image:: images/quickstart_2.png
:width: 400
- :align: center
+ :align: center
\ No newline at end of file
diff --git a/doc/split_cross_conformal.rst b/doc/split_cross_conformal.rst
index 936a1162d..a8ff8ab2f 100644
--- a/doc/split_cross_conformal.rst
+++ b/doc/split_cross_conformal.rst
@@ -1,37 +1,39 @@
-################################
-Split/Cross-Conformal Prediction
-################################
+################################################################
+The conformity (or "calibration") set
+################################################################
-**MAPIE** is based on two types of techniques:
+**MAPIE** is based on two types of techniques for measuring uncertainty in regression and classification:
- the split-conformal predictions,
- the cross-conformal predictions.
-In all cases, the training/calibration process can be broken down as follows:
+In all cases, the training/conformalization process can be broken down as follows:
-- Identify a basic model (or pre-trained model).
-- Wrap it with the MAPIE class.
-- Fit new model to calibration data (or full data if cross-validation) to estimate conformity scores.
-- Predict target on test data to obtain prediction intervals/sets based on conformity scores.
+- Train a model using the training set (or full dataset if cross-conformal).
+- Estimate conformity scores using the conformity set (or full dataset if cross-conformal).
+- Predict target on test data to obtain prediction intervals/sets based on these conformity scores.
1. Split conformal predictions
==============================
-- Construction of a conformity score.
-- Calibration of the conformity score on a calibration set not seen by the model during training.
+- Compute conformity scores ("conformalization") on a conformity set not seen by the model during training.
+ (Use :func:`~mapie_v1.utils.train_conformalize_test_split` to obtain the different sets.)
-**MAPIE** then uses the calibrated conformity scores to estimate sets associated with the desired coverage on new data with strong theoretical guarantees.
+**MAPIE** then uses the conformity scores to estimate sets associated with the desired coverage on new data with strong theoretical guarantees.
-.. image:: images/cp_split.png
+Split conformal predictions with a pre-trained model
+------------------------------------------------------------------------------------
+
+.. image:: images/cp_prefit.png
:width: 600
:align: center
-Prefit mode of split conformal predictions
-------------------------------------------
+Split conformal predictions with an untrained model
+------------------------------------------------------------------------------------
-.. image:: images/cp_prefit.png
+.. image:: images/cp_split.png
:width: 600
:align: center
@@ -39,7 +41,7 @@ Prefit mode of split conformal predictions
2. Cross conformal predictions
==============================
-- Conformity scores on the whole training set obtained by cross-validation,
+- Conformity scores on the whole dataset obtained by cross-validation,
- Perturbed models generated during the cross-validation.
**MAPIE** then combines all these elements in a way that provides prediction intervals on new data with strong theoretical guarantees.
diff --git a/doc/theoretical_description_binary_classification.rst b/doc/theoretical_description_binary_classification.rst
index 9c8f6f336..3a3b56750 100644
--- a/doc/theoretical_description_binary_classification.rst
+++ b/doc/theoretical_description_binary_classification.rst
@@ -6,6 +6,13 @@
Theoretical Description
#######################
+Note: in theoretical parts of the documentation, we use the following terms employed in the scientific literature:
+
+- `alpha` is equivalent to `1 - confidence_level`. It can be seen as a *risk level*
+- *calibrate* and *calibration*, are equivalent to *conformalize* and *conformalization*.
+
+—
+
There are mainly three different ways to handle uncertainty quantification in binary classification:
calibration (see :doc:`theoretical_description_calibration`), confidence interval (CI) for the probability
:math:`P(Y \vert \hat{\mu}(X))` and prediction sets (see :doc:`theoretical_description_classification`).
diff --git a/doc/theoretical_description_calibration.rst b/doc/theoretical_description_calibration.rst
index c62540337..057163441 100644
--- a/doc/theoretical_description_calibration.rst
+++ b/doc/theoretical_description_calibration.rst
@@ -6,6 +6,13 @@
Theoretical Description
#######################
+Note: in theoretical parts of the documentation, we use the following terms employed in the scientific literature:
+
+- `alpha` is equivalent to `1 - confidence_level`. It can be seen as a *risk level*
+- *calibrate* and *calibration*, are equivalent to *conformalize* and *conformalization*.
+
+—
+
One method for multi-class calibration has been implemented in MAPIE so far :
Top-Label Calibration [1].
diff --git a/doc/theoretical_description_classification.rst b/doc/theoretical_description_classification.rst
index 5144c8487..d5899eed8 100644
--- a/doc/theoretical_description_classification.rst
+++ b/doc/theoretical_description_classification.rst
@@ -6,6 +6,13 @@
Theoretical Description
#######################
+Note: in theoretical parts of the documentation, we use the following terms employed in the scientific literature:
+
+- `alpha` is equivalent to `1 - confidence_level`. It can be seen as a *risk level*
+- *calibrate* and *calibration*, are equivalent to *conformalize* and *conformalization*.
+
+—
+
Three methods for multi-class uncertainty quantification have been implemented in MAPIE so far :
LAC (that stands for Least Ambiguous set-valued Classifier) [1], Adaptive Prediction Sets [2, 3] and Top-K [3].
The difference between these methods is the way the conformity scores are computed.
diff --git a/doc/theoretical_description_conformity_scores.rst b/doc/theoretical_description_conformity_scores.rst
index 5ec0aee4d..a843cd8d4 100644
--- a/doc/theoretical_description_conformity_scores.rst
+++ b/doc/theoretical_description_conformity_scores.rst
@@ -6,6 +6,13 @@
Theoretical Description for Conformity Scores
#############################################
+Note: in theoretical parts of the documentation, we use the following terms employed in the scientific literature:
+
+- `alpha` is equivalent to `1 - confidence_level`. It can be seen as a *risk level*
+- *calibrate* and *calibration*, are equivalent to *conformalize* and *conformalization*.
+
+—
+
The :class:`mapie.conformity_scores.ConformityScore` class implements various
methods to compute conformity scores for regression.
We give here a brief theoretical description of the scores included in the module.
@@ -99,7 +106,8 @@ it is not proportional to the uncertainty.
Key takeaways
-------------
-- The absolute residual score is the basic conformity score and gives constant intervals. It is the one used by default by :class:`mapie.regression.MapieRegressor`.
+- The absolute residual score is the basic conformity score and gives constant intervals. It is the one used by default by regression methods
+ such as :class:`mapie_v1.regression.SplitConformalRegressor`.
- The gamma conformity score adds a notion of adaptivity by giving intervals of different sizes
and is proportional to the uncertainty.
- The residual normalized score is a conformity score that requires an additional model
diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst
index 6ac886886..e6b3b7f96 100644
--- a/doc/theoretical_description_metrics.rst
+++ b/doc/theoretical_description_metrics.rst
@@ -1,4 +1,4 @@
-.. title:: Theoretical Description Metrics : contents
+.. title:: How to measure conformal prediction performance?
.. _theoretical_description_metrics:
@@ -6,6 +6,13 @@
Theoretical Description
#######################
+Note: in theoretical parts of the documentation, we use the following terms employed in the scientific literature:
+
+- `alpha` is equivalent to `1 - confidence_level`. It can be seen as a *risk level*
+- *calibrate* and *calibration*, are equivalent to *conformalize* and *conformalization*.
+
+—
+
This document provides detailed descriptions of various metrics used to evaluate the performance of predictive models, particularly focusing on their ability to estimate uncertainties and calibrate predictions accurately.
1. General Metrics
diff --git a/doc/theoretical_description_mondrian.rst b/doc/theoretical_description_mondrian.rst
index 7b93b3164..45ddd031b 100644
--- a/doc/theoretical_description_mondrian.rst
+++ b/doc/theoretical_description_mondrian.rst
@@ -6,6 +6,13 @@
Theoretical Description
#######################
+Note: in theoretical parts of the documentation, we use the following terms employed in the scientific literature:
+
+- `alpha` is equivalent to `1 - confidence_level`. It can be seen as a *risk level*
+- *calibrate* and *calibration*, are equivalent to *conformalize* and *conformalization*.
+
+—
+
Mondrian conformal prediction (MCP) [1] is a method that allows to build prediction sets with a group-conditional
coverage guarantee. The coverage guarantee is given by:
diff --git a/doc/theoretical_description_multilabel_classification.rst b/doc/theoretical_description_multilabel_classification.rst
index e3ff05da3..6dd72bdb7 100644
--- a/doc/theoretical_description_multilabel_classification.rst
+++ b/doc/theoretical_description_multilabel_classification.rst
@@ -6,6 +6,13 @@
Theoretical Description
#######################
+Note: in theoretical parts of the documentation, we use the following terms employed in the scientific literature:
+
+- `alpha` is equivalent to `1 - confidence_level`. It can be seen as a *risk level*
+- *calibrate* and *calibration*, are equivalent to *conformalize* and *conformalization*.
+
+—
+
Three methods for multi-label uncertainty quantification have been implemented in MAPIE so far :
Risk-Controlling Prediction Sets (RCPS) [1], Conformal Risk Control (CRC) [2] and Learn Then Test (LTT) [3].
The difference between these methods is the way the conformity scores are computed.
diff --git a/doc/theoretical_description_regression.rst b/doc/theoretical_description_regression.rst
index 09c55e74c..bf8784080 100644
--- a/doc/theoretical_description_regression.rst
+++ b/doc/theoretical_description_regression.rst
@@ -6,7 +6,14 @@
Theoretical Description
#######################
-The :class:`mapie.regression.MapieRegressor` class uses various
+Note: in theoretical parts of the documentation, we use the following terms employed in the scientific literature:
+
+- `alpha` is equivalent to `1 - confidence_level`. It can be seen as a *risk level*
+- *calibrate* and *calibration*, are equivalent to *conformalize* and *conformalization*.
+
+—
+
+The methods in `mapie_v1.regression` use various
resampling methods based on the jackknife strategy
recently introduced by Foygel-Barber et al. (2020) [1].
They allow the user to estimate robust prediction intervals with any kind of
diff --git a/doc/v1_migration_guide.rst b/doc/v1_migration_guide.rst
new file mode 100644
index 000000000..576f6ccf4
--- /dev/null
+++ b/doc/v1_migration_guide.rst
@@ -0,0 +1,365 @@
+Migrating to MAPIE v1
+===========================================
+
+MAPIE v1 introduces several updates, enhancements, and structural changes that simplify the API by breaking down ``MapieRegressor`` and ``MapieClassifier`` into dedicated classes for different conformal prediction techniques.
+
+This guide outlines the differences between MAPIE v0.x and MAPIE v1 and provides instructions for migrating your code to the new API.
+
+1. Python and NumPy versions support
+-------------------------------------------------
+
+We now support Python >= 3.9 (formerly >=3.7), and NumPy >= 1.23 (formerly >=1.21).
+
+MAPIE v1 may run with Python < 3.9, but we do not recommend it.
+
+2. Class restructuring
+-----------------------------------
+
+MAPIE v1 breaks down the ``MapieRegressor`` and ``MapieClassifier`` classes into 5 classes, each dedicated to a particular conformal prediction technique. ``MapieQuantileRegressor`` has also been revamped, and renamed ``ConformalizedQuantileRegressor``.
+
+The rationale behind this is that ``MapieRegressor`` and ``MapieClassifier`` managed several conformal techniques under a single interface, which led to parameter redundancy and ambiguity. In MAPIE v1, each class includes only the relevant parameters specific to its technique.
+
+The ``cv`` parameter is key to understand what new class to use in the v1 API:
+
+.. list-table:: Mapie v0.x -> v1 classes correspondence
+ :header-rows: 1
+
+ * - v0.x class
+ - ``cv`` parameter value
+ - Corresponding v1 class
+ - Conformal prediction type
+ * - ``MapieRegressor``
+ - ``"split"`` or ``"prefit"``
+ - ``SplitConformalRegressor``
+ - Split
+ * - ``MapieRegressor``
+ - ``None``, integer, or any ``sklearn.model_selection.BaseCrossValidator``
+ - ``CrossConformalRegressor``
+ - Cross
+ * - ``MapieRegressor``
+ - ``subsample.Subsample``
+ - ``JackknifeAfterBootstrapRegressor``
+ - Cross
+ * - ``MapieQuantileRegressor``
+ - ``None``, ``"split"`` or ``"prefit"``
+ - ``ConformalizedQuantileRegressor``
+ - Split
+ * - ``MapieClassifier``
+ - ``"split"`` or ``"prefit"``
+ - ``SplitConformalClassifier``
+ - Split
+ * - ``MapieClassifier``
+ - ``None``, integer, or any ``sklearn.model_selection.BaseCrossValidator``
+ - ``CrossConformalClassifier``
+ - Cross
+
+For more details regarding the difference between split and cross conformal types, see :doc:`split_cross_conformal`
+
+3. Method changes
+-----------------
+
+In MAPIE v1, the conformal prediction workflow is more streamlined and modular, with distinct methods for training, conformalization (named calibration in the scientific literature), and prediction. The conformalization process in v1 consists of four steps.
+
+Step 1: Data splitting
+~~~~~~~~~~~~~~~~~~~~~~
+In v0.x, data splitting is handled by MAPIE.
+
+In v1, the data splitting is left to the user for split conformal techniques. The user can split the data into training, conformalization, and test sets using scikit-learn's ``train_test_split`` or other methods.
+
+Step 2 & 3: Model training and conformalization (ie: calibration)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In v0.x, the ``fit`` method handled both model training and calibration.
+
+In v1.0: MAPIE separates between training and calibration. We decided to name the *calibration* step *conformalization*, to avoid confusion with probability calibration.
+
+For split conformal techniques:
+
+``.fit()`` method:
+
+- In v1, ``fit`` only trains the model on training data, without handling conformalization.
+- Additional fitting parameters, like ``sample_weight``, should be included in ``fit_params``, keeping this method focused on training alone.
+
+``.conformalize()`` method:
+
+- Used in split methods only
+- This new method performs conformalization after fitting, using separate conformity data ``(X_conformalize, y_conformalize)``.
+- ``predict_params`` should be passed here
+
+For cross conformal techniques:
+
+``.fit_conformalize()`` method: because those techniques rely on fitting and conformalizing models in a cross-validation fashion, the fitting and conformalization steps are not distinct.
+
+Step 4: Making predictions (``predict`` and ``predict_interval`` methods)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In MAPIE v0.x, both point predictions and prediction intervals were produced through the ``predict`` method.
+
+MAPIE v1 introduces a new method for prediction, ``.predict_interval()``, that behaves like v0.x ``.predict(alpha=...)`` method. Namely, it predicts points and intervals.
+The ``.predict()`` method now focuses solely on producing point predictions.
+
+
+
+4. Parameters change
+------------------------
+
+``alpha``
+~~~~~~~~~~~~~~~~~~~~
+Indicates the desired coverage probability of the prediction intervals.
+
+- **v0.x**: Specified as ``alpha`` during prediction, representing error rate.
+- **v1**: Replaced with ``confidence_level`` to denote the coverage rate directly. Set at model initialization, improving consistency and clarity. ``confidence_level`` is equivalent to ``1 - alpha``.
+
+``cv``
+~~~~~~~
+See the first section of this guide. The ``cv`` parameter is now only declared at cross conformal techniques initialization.
+
+``conformity_score``
+~~~~~~~~~~~~~~~~~~~~
+A parameter used to specify the scoring approach for evaluating model predictions.
+
+- **v0.x**: Only allowed subclass instances of ``BaseRegressionScore``, like AbsoluteConformityScore()
+- **v1**: Now also accepts strings, like ``"absolute"``.
+
+``method``
+~~~~~~~~~~
+Specifies the approach for calculating prediction intervals for cross conformal techniques.
+
+- **v0.x**: Part of ``MapieRegressor``. Configured for the main prediction process.
+- **v1**: Specific to ``CrossConformalRegressor`` and ``JackknifeAfterBootstrapRegressor``, indicating the interval calculation approach (``"base"``, ``"plus"``, or ``"minmax"``).
+
+``groups``
+~~~~~~~~~~~
+The ``groups`` parameter is used to specify group labels for cross-validation, ensuring that the same group is not present in both training and conformity sets.
+
+- **v0.x**: Passed as a parameter to the ``fit`` method.
+- **v1**: The ``groups`` present is now only present in ``CrossConformalRegressor``. It is passed in the ``.conformalize()`` method instead of the ``.fit()`` method. In other classes (like ``SplitConformalRegressor``), groups can be directly handled by the user during data splitting.
+
+``prefit``
+~~~~~~~~~~
+Controls whether the model has been pre-fitted before applying conformal prediction.
+
+- **v0.x**: Indicated through ``cv="prefit"`` in ``MapieRegressor``.
+- **v1**: ``prefit`` is now a separate boolean parameter, allowing explicit control over whether the model has been pre-fitted before conformalizing. It is set by default to ``True`` for ``SplitConformalRegressor``, as we believe this will become MAPIE nominal usage.
+
+``fit_params`` (includes ``sample_weight``)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Dictionary of parameters specifically used during training, such as ``sample_weight`` in scikit-learn.
+
+- **v0.x**: Passed additional parameters in a flexible but less explicit manner.
+- **v1**: Now explicitly structured as a dedicated dictionary, ``fit_params``, ensuring parameters used during training are clearly defined and separated from other stages.
+
+``predict_params``
+~~~~~~~~~~~~~~~~~~
+Defines additional parameters exclusively for prediction.
+
+- **v0.x**: Passed additional parameters in a flexible but less explicit manner, sometimes mixed within training configurations.
+- **v1**: Now structured as a dedicated dictionary, ``predict_params``, to be used during calibration (``conformalize`` method) and prediction stages, ensuring no overlap with training parameters.
+
+``agg_function`` and ``ensemble``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+How to aggregate predictions in cross conformal methods.
+
+- **v0.x**: Previously, the ``agg_function`` parameter had two usage: to aggregate predictions when setting ``ensemble=True`` in the ``predict`` method, and to specify the aggregation used in ``JackknifeAfterBootstrapRegressor``.
+- **v1**:
+
+ - The ``agg_function`` parameter has been split into two distinct parameters: ``aggregate_predictions`` and ``aggregation_method``. ``aggregate_predictions`` is specific to ``CrossConformalRegressor``, and it specifies how predictions from multiple conformal regressors are aggregated when making point predictions. ``aggregation_method`` is specific to ``JackknifeAfterBootstrapRegressor``, and it specifies the aggregation technique for combining predictions across different bootstrap samples during conformalization.
+ - Note that for both cross conformal techniques, predictions points are now computed by default using mean aggregation. This is to avoid prediction points outside of prediction intervals in the default setting.
+
+``random_state``
+~~~~~~~~~~~~~~~~~~
+
+- **v0.x**: This parameter was used to control the randomness of the data splitting.
+- **v1**: This parameter has been removed in cases where data splitting is now manual. Future evolutions may reintroduce it as a general purpose randomness control parameter.
+
+``symmetry``
+~~~~~~~~~~~~~~~~~~
+
+- **v0.x**: This parameter of the `predict` method of the MapieQuantileRegressor was set to True by default
+- **v1**: This parameter is now named `symmetric_correction` and is set to False by default, because the resulting intervals are smaller. It is used in the `predict_interval` method of the ConformalizedQuantileRegressor.
+
+``optimize_beta``
+~~~~~~~~~~~~~~~~~~
+
+This parameter used during interval prediction in regression has been renamed ``minimize_interval_width`` for clarity.
+
+None defaults
+~~~~~~~~~~~~~~~~~~~~
+No more parameters with incorrect ``None`` defaults.
+
+- **v0.x**: Eg: ``estimator`` had a ``None`` default value, even though the actual default value is ``LinearRegression()``. This was the case for other parameters as well.
+- **v1**: All parameters now have explicit defaults.
+
+
+5. Metrics change
+----------------------------------------------------------------------------------------
+
+In MAPIE v1, metrics are divided into three modules: ``calibration``, ``classification``, and ``regression``, which changes the import paths.
+
+Below is an example of the import needed for the ``classification_coverage_score`` function:
+
+- **v0.x**:
+.. code-block::
+
+ from mapie.metrics import classification_coverage_score
+
+- **v1**:
+.. code-block::
+
+ from mapie.metrics.classification import classification_coverage_score
+
+
+Additionally, two regression functions have undergone some change from v0.x to v1:
+
+- **regression_coverage_score**: Merged with ``regression_coverage_v2``. In MAPIE v1, ``regression_coverage_score`` now corresponds to MAPIE v0.x's ``regression_coverage_score_v2``.
+- **regression_mwi_score**: Now takes ``confidence_level`` as input instead of ``alpha`` (``confidence_level`` is equivalent to ``1 - alpha``).
+
+
+6. Migration examples: MAPIE v0.x to MAPIE v1
+----------------------------------------------------------------------------------------
+
+Below is a side-by-side example of code in MAPIE v0.x and its equivalent in MAPIE v1
+
+Example 1: Split Conformal Prediction
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Description
+############
+Split conformal prediction is a widely used technique for generating prediction intervals, it splits the data into training, conformity, and test sets. The model is trained on the training set, calibrated on the conformity set, and then used to make predictions on the test set. In `MAPIE v1`, the `SplitConformalRegressor` replaces the older `MapieRegressor` with a more modular design and simplified API.
+
+MAPIE v0.x Code
+###############
+
+Below is a MAPIE v0.x code for split conformal prediction in case of pre-fitted model:
+
+.. testcode::
+
+ from sklearn.linear_model import LinearRegression
+ from mapie.regression import MapieRegressor
+ from mapie.conformity_scores import ResidualNormalisedScore
+ from sklearn.model_selection import train_test_split
+ from sklearn.datasets import make_regression
+
+ X, y = make_regression(n_samples=100, n_features=2, noise=0.1)
+
+ X_train, X_conf_test, y_train, y_conf_test = train_test_split(X, y)
+ X_conf, X_test, y_conf, y_test = train_test_split(X_conf_test, y_conf_test)
+
+ prefit_model = LinearRegression().fit(X_train, y_train)
+
+ v0 = MapieRegressor(
+ estimator=prefit_model,
+ cv="prefit",
+ conformity_score=ResidualNormalisedScore()
+ )
+
+ v0.fit(X_conf, y_conf)
+
+ prediction_points_v0, prediction_intervals_v0 = v0.predict(X_test, alpha=0.1)
+ prediction_points_v0 = v0.predict(X_test)
+
+Equivalent MAPIE v1 code
+########################
+
+Below is the equivalent MAPIE v1 code for split conformal prediction:
+
+.. testcode::
+
+ from sklearn.linear_model import LinearRegression
+ from sklearn.model_selection import train_test_split
+ from mapie_v1.regression import SplitConformalRegressor
+ from sklearn.datasets import make_regression
+
+ X, y = make_regression(n_samples=100, n_features=2, noise=0.1)
+
+ X_train, X_conf_test, y_train, y_conf_test = train_test_split(X, y)
+ X_conf, X_test, y_conf, y_test = train_test_split(X_conf_test, y_conf_test)
+
+ prefit_model = LinearRegression().fit(X_train, y_train)
+
+ v1 = SplitConformalRegressor(
+ estimator=prefit_model,
+ confidence_level=0.9,
+ conformity_score="residual_normalized",
+ )
+
+ # Here we're not using v1.fit(), because the provided model is already fitted
+ v1.conformalize(X_conf, y_conf)
+
+ prediction_points_v1, prediction_intervals_v1 = v1.predict_interval(X_test)
+ prediction_points_v1 = v1.predict(X_test)
+
+Example 2: Cross-Conformal Prediction
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Description
+############
+
+Cross-conformal prediction extends split conformal prediction by using multiple cross-validation folds to improve the efficiency of the prediction intervals. In MAPIE v1, `CrossConformalRegressor`` replaces the older `MapieRegressor`` for this purpose.
+
+MAPIE v0.x code
+###############
+
+Below is a MAPIE v0.x code for cross-conformal prediction:
+
+.. testcode::
+
+ import numpy as np
+ from sklearn.ensemble import RandomForestRegressor
+ from mapie.regression import MapieRegressor
+ from sklearn.model_selection import train_test_split, GroupKFold
+ from sklearn.datasets import make_regression
+
+ X_full, y_full = make_regression(n_samples=100, n_features=2, noise=0.1)
+ X, X_test, y, y_test = train_test_split(X_full, y_full)
+ groups = np.random.randint(0, 10, X.shape[0])
+ sample_weight = np.random.rand(X.shape[0])
+
+ regression_model = RandomForestRegressor(
+ n_estimators=100,
+ max_depth=5
+ )
+
+ v0 = MapieRegressor(
+ estimator=regression_model,
+ cv=GroupKFold(),
+ agg_function="median",
+ )
+
+ v0.fit(X, y, sample_weight=sample_weight, groups=groups)
+
+ prediction_points_v0, prediction_intervals_v0 = v0.predict(X_test, alpha=0.1)
+ prediction_points_v0 = v0.predict(X_test, ensemble=True)
+
+Equivalent MAPIE v1 code
+########################
+
+Below is the equivalent MAPIE v1 code for cross-conformal prediction:
+
+.. testcode::
+
+ import numpy as np
+ from sklearn.ensemble import RandomForestRegressor
+ from sklearn.model_selection import train_test_split, GroupKFold
+ from mapie_v1.regression import CrossConformalRegressor
+ from sklearn.datasets import make_regression
+
+ X_full, y_full = make_regression(n_samples=100, n_features=2, noise=0.1)
+ X, X_test, y, y_test = train_test_split(X_full, y_full)
+ groups = np.random.randint(0, 10, X.shape[0])
+ sample_weight = np.random.rand(X.shape[0])
+
+ regression_model = RandomForestRegressor(
+ n_estimators=100,
+ max_depth=5
+ )
+
+ v1 = CrossConformalRegressor(
+ estimator=regression_model,
+ confidence_level=0.9,
+ cv=GroupKFold(),
+ conformity_score="absolute",
+ )
+
+ v1.fit_conformalize(X, y, groups=groups, fit_params={"sample_weight": sample_weight})
+
+ prediction_points_v1, prediction_intervals_v1 = v1.predict_interval(X_test)
+ prediction_points_v1 = v1.predict(X_test, aggregate_predictions="median")
diff --git a/examples/calibration/1-quickstart/plot_calibration_hypothesis_testing.py b/examples/calibration/1-quickstart/plot_calibration_hypothesis_testing.py
index 894add6aa..29c388248 100644
--- a/examples/calibration/1-quickstart/plot_calibration_hypothesis_testing.py
+++ b/examples/calibration/1-quickstart/plot_calibration_hypothesis_testing.py
@@ -29,9 +29,11 @@
from matplotlib import pyplot as plt
from sklearn.utils import check_random_state
-from mapie._typing import NDArray
-from mapie.metrics import (cumulative_differences, kolmogorov_smirnov_p_value,
- length_scale)
+from numpy.typing import NDArray
+from mapie.metrics.calibration import (
+ cumulative_differences, kolmogorov_smirnov_p_value,
+ length_scale,
+)
####################################################################
# 1. Create 1-dimensional dataset and scores to test for calibration
diff --git a/examples/calibration/2-advanced-analysis/plot_asymptotic_convergence_of_p_values.py b/examples/calibration/2-advanced-analysis/plot_asymptotic_convergence_of_p_values.py
index ae5cb8b24..2bcccfffa 100644
--- a/examples/calibration/2-advanced-analysis/plot_asymptotic_convergence_of_p_values.py
+++ b/examples/calibration/2-advanced-analysis/plot_asymptotic_convergence_of_p_values.py
@@ -39,9 +39,9 @@
from matplotlib import pyplot as plt
from sklearn.utils import check_random_state
-from mapie._typing import NDArray
-from mapie.metrics import (kolmogorov_smirnov_p_value, kuiper_p_value,
- spiegelhalter_p_value)
+from numpy.typing import NDArray
+from mapie.metrics.calibration import (spiegelhalter_p_value)
+from mapie.metrics.calibration import kolmogorov_smirnov_p_value, kuiper_p_value
##############################################################################
# First we need to generate scores that are perfecty calibrated. To do so,
diff --git a/examples/classification/1-quickstart/plot_comp_methods_on_2d_dataset.py b/examples/classification/1-quickstart/plot_comp_methods_on_2d_dataset.py
index 014ed943a..cf02fb624 100644
--- a/examples/classification/1-quickstart/plot_comp_methods_on_2d_dataset.py
+++ b/examples/classification/1-quickstart/plot_comp_methods_on_2d_dataset.py
@@ -53,10 +53,12 @@
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
-from mapie._typing import NDArray
+from numpy.typing import NDArray
from mapie.classification import MapieClassifier
-from mapie.metrics import (classification_coverage_score,
- classification_mean_width_score)
+from mapie.metrics.classification import (
+ classification_coverage_score,
+ classification_mean_width_score,
+)
centers = [(0, 3.5), (-2, 0), (2, 0)]
covs = [np.eye(2), np.eye(2) * 2, np.diag([5, 1])]
diff --git a/examples/classification/4-tutorials/plot_crossconformal.py b/examples/classification/4-tutorials/plot_crossconformal.py
index f9469300b..a0d3677d5 100644
--- a/examples/classification/4-tutorials/plot_crossconformal.py
+++ b/examples/classification/4-tutorials/plot_crossconformal.py
@@ -35,10 +35,12 @@
from sklearn.naive_bayes import GaussianNB
from typing_extensions import TypedDict
-from mapie._typing import NDArray
+from numpy.typing import NDArray
from mapie.classification import MapieClassifier
-from mapie.metrics import (classification_coverage_score,
- classification_mean_width_score)
+from mapie.metrics.classification import (
+ classification_coverage_score,
+ classification_mean_width_score,
+)
##############################################################################
# 1. Estimating the impact of train/calibration split on the prediction sets
diff --git a/examples/classification/4-tutorials/plot_main-tutorial-binary-classification.py b/examples/classification/4-tutorials/plot_main-tutorial-binary-classification.py
index f83d24011..dd852561f 100644
--- a/examples/classification/4-tutorials/plot_main-tutorial-binary-classification.py
+++ b/examples/classification/4-tutorials/plot_main-tutorial-binary-classification.py
@@ -33,10 +33,12 @@
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
-from mapie._typing import NDArray
+from numpy.typing import NDArray
from mapie.classification import MapieClassifier
-from mapie.metrics import (classification_coverage_score,
- classification_mean_width_score)
+from mapie.metrics.classification import (
+ classification_coverage_score,
+ classification_mean_width_score,
+)
##############################################################################
# 1. Conformal Prediction method using the softmax score of the true label
diff --git a/examples/classification/4-tutorials/plot_main-tutorial-classification.py b/examples/classification/4-tutorials/plot_main-tutorial-classification.py
index cd57da03a..db23118e7 100644
--- a/examples/classification/4-tutorials/plot_main-tutorial-classification.py
+++ b/examples/classification/4-tutorials/plot_main-tutorial-classification.py
@@ -23,8 +23,10 @@
from sklearn.naive_bayes import GaussianNB
from mapie.classification import MapieClassifier
-from mapie.metrics import (classification_coverage_score,
- classification_mean_width_score)
+from mapie.metrics.classification import (
+ classification_coverage_score,
+ classification_mean_width_score,
+)
##############################################################################
# 1. Conformal Prediction method using the softmax score of the true label
diff --git a/examples/mondrian/1-quickstart/plot_main-tutorial-mondrian-regression.py b/examples/mondrian/1-quickstart/plot_main-tutorial-mondrian-regression.py
index 6a58fe0fe..0dc5b761d 100644
--- a/examples/mondrian/1-quickstart/plot_main-tutorial-mondrian-regression.py
+++ b/examples/mondrian/1-quickstart/plot_main-tutorial-mondrian-regression.py
@@ -26,7 +26,7 @@
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
-from mapie.metrics import regression_coverage_score_v2
+from mapie.metrics.regression import regression_coverage_score
from mapie.mondrian import MondrianCP
from mapie.regression import MapieRegressor
@@ -152,10 +152,10 @@
coverages = {}
for group in np.unique(partition_test):
coverages[group] = {}
- coverages[group]["split"] = regression_coverage_score_v2(
+ coverages[group]["split"] = regression_coverage_score(
y_test[partition_test == group], y_pss_split[partition_test == group]
)
- coverages[group]["mondrian"] = regression_coverage_score_v2(
+ coverages[group]["mondrian"] = regression_coverage_score(
y_test[partition_test == group],
y_pss_mondrian[partition_test == group]
)
diff --git a/examples/regression/1-quickstart/README.rst b/examples/regression/1-quickstart/README.rst
index cb770c871..8538ded35 100644
--- a/examples/regression/1-quickstart/README.rst
+++ b/examples/regression/1-quickstart/README.rst
@@ -1,6 +1,6 @@
.. _regression_examples_1:
-1. Quickstart examples
+1. Quickstart
----------------------
The following examples present the main functionalities of MAPIE through basic quickstart regression problems.
\ No newline at end of file
diff --git a/examples/regression/1-quickstart/plot_compare_conformity_scores.py b/examples/regression/1-quickstart/plot_compare_conformity_scores.py
index 1dd0fc79a..b42144cb6 100644
--- a/examples/regression/1-quickstart/plot_compare_conformity_scores.py
+++ b/examples/regression/1-quickstart/plot_compare_conformity_scores.py
@@ -1,8 +1,10 @@
"""
-===========================================================
-Estimating prediction intervals of Gamma distributed target
-===========================================================
-This example uses :class:`~mapie.regression.MapieRegressor` to estimate
+==========================================================================================
+Use MAPIE on data with gamma distribution
+==========================================================================================
+
+
+This example uses :class:`~mapie_v1.regression.CrossConformalRegressor` to estimate
prediction intervals associated with Gamma distributed target.
The limit of the absolute residual conformity score is illustrated.
@@ -14,10 +16,10 @@
The data is modelled by a Random Forest model
:class:`~sklearn.ensemble.RandomForestRegressor` with a fixed parameter set.
The prediction intervals are determined by means of the MAPIE regressor
-:class:`~mapie.regression.MapieRegressor` considering two conformity scores:
-:class:`~mapie.conformity_scores.AbsoluteConformityScore` which
+:class:`~mapie_v1.regression.CrossConformalRegressor` considering two conformity scores:
+``"absolute"`` which
considers the absolute residuals as the conformity scores and
-:class:`~mapie.conformity_scores.GammaConformityScore` which
+``"gamma"`` which
considers the residuals divided by the predicted means as conformity scores.
We consider the standard CV+ resampling method.
@@ -28,7 +30,7 @@
overcomes this issue by considering prediction intervals with width
proportional to the predicted mean. For low prices, the Gamma prediction
intervals are narrower than the default ones, conversely to high prices
-for which the conficence intervals are higher but visually more relevant.
+for which the confidence intervals are higher but visually more relevant.
The empirical coverage is similar between the two conformity scores.
"""
import matplotlib.pyplot as plt
@@ -40,11 +42,10 @@
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
-from mapie.conformity_scores import GammaConformityScore
-from mapie.metrics import regression_coverage_score
-from mapie.regression import MapieRegressor
+from mapie.metrics.regression import regression_coverage_score
+from mapie_v1.regression import CrossConformalRegressor
-random_state = 42
+RANDOM_STATE = 42
# Parameters
features = [
@@ -56,8 +57,8 @@
]
target = "SalePrice"
-alpha = 0.05
-rf_kwargs = {"n_estimators": 10, "random_state": random_state}
+confidence_level = 0.95
+rf_kwargs = {"n_estimators": 10, "random_state": RANDOM_STATE}
model = RandomForestRegressor(**rf_kwargs)
##############################################################################
@@ -66,8 +67,6 @@
#
# We start by loading a dataset with a target following approximately
# a Gamma distribution.
-# The :class:`~mapie.conformity_scores.GammaConformityScore`` is relevant
-# in such cases.
# Two sub datasets are extracted: the training and test ones.
dataset_url = (
@@ -82,8 +81,8 @@
X = data[features]
y = data[target]
-X_train, X_test, y_train, y_test = train_test_split(
- X[features], y, test_size=0.2, random_state=random_state
+X_train_conformalize, X_test, y_train_conformalize, y_test = train_test_split(
+ X[features], y, test_size=0.2, random_state=RANDOM_STATE
)
##############################################################################
@@ -92,27 +91,29 @@
#
# Two models are trained with two different conformity score:
#
-# - :class:`~mapie.conformity_scores.AbsoluteConformityScore` (default
-# conformity score) relevant for target positive as well as negative.
+# - ``conformity_score = "absolute"`` (default
+# conformity score) is relevant for target positive as well as negative.
# The prediction interval widths are, in this case, approximately the same
# over the range of prediction.
#
-# - :class:`~mapie.conformity_scores.GammaConformityScore` relevant for target
+# - ``conformity_score = "gamma"`` is relevant for target
# following roughly a Gamma distribution. The prediction interval widths
# scale with the predicted value.
##############################################################################
# First, train model with
-# :class:`~mapie.conformity_scores.AbsoluteConformityScore`.
-mapie = MapieRegressor(model, random_state=random_state)
-mapie.fit(X_train, y_train)
-y_pred_absconfscore, y_pis_absconfscore = mapie.predict(
- X_test, alpha=alpha, ensemble=True
+# conformity_score = "absolute".
+mapie = CrossConformalRegressor(
+ model, confidence_level=confidence_level, conformity_score="absolute"
+)
+mapie.fit_conformalize(X_train_conformalize, y_train_conformalize)
+y_pred_absconfscore, y_pis_absconfscore = mapie.predict_interval(
+ X_test
)
coverage_absconfscore = regression_coverage_score(
- y_test, y_pis_absconfscore[:, 0, 0], y_pis_absconfscore[:, 1, 0]
-)
+ y_test, y_pis_absconfscore
+)[0]
##############################################################################
# Prepare the results for matplotlib. Get the prediction intervals and their
@@ -135,19 +136,19 @@ def get_yerr(y_pred, y_pis):
)
##############################################################################
-# Then, train the model with
-# :class:`~mapie.conformity_scores.GammaConformityScore`.
-mapie = MapieRegressor(
- model, conformity_score=GammaConformityScore(), random_state=random_state
+# Then, train the model with:
+# `conformity_score = "gamma"`.
+mapie = CrossConformalRegressor(
+ model, confidence_level=confidence_level, conformity_score="gamma"
)
-mapie.fit(X_train, y_train)
-y_pred_gammaconfscore, y_pis_gammaconfscore = mapie.predict(
- X_test, alpha=[alpha], ensemble=True
+mapie.fit_conformalize(X_train_conformalize, y_train_conformalize)
+y_pred_gammaconfscore, y_pis_gammaconfscore = mapie.predict_interval(
+ X_test
)
coverage_gammaconfscore = regression_coverage_score(
- y_test, y_pis_gammaconfscore[:, 0, 0], y_pis_gammaconfscore[:, 1, 0]
-)
+ y_test, y_pis_gammaconfscore
+)[0]
yerr_gammaconfscore = get_yerr(y_pred_gammaconfscore, y_pis_gammaconfscore)
pred_int_width_gammaconfscore = (
@@ -161,9 +162,9 @@ def get_yerr(y_pred, y_pis):
#
# Once the models have been trained, we now compare the prediction intervals
# obtained from the two conformity scores. We can see that the
-# :class:`~mapie.conformity_scores.AbsoluteConformityScore` generates
+# ``"absolute" ``conformity score generates
# prediction interval with almost the same width for all the predicted values.
-# Conversely, the `mapie.conformity_scores.GammaConformityScore`
+# Conversely, the ``"gamma"`` conformity score
# yields prediction interval with width scaling with the predicted values.
#
# The choice of the conformity score depends on the problem we face.
@@ -206,7 +207,7 @@ def get_yerr(y_pred, y_pis):
axs[1, img_id].set_ylim([ymin, ymax])
fig.suptitle(
- f"Predicted values with the prediction intervals of level {alpha}"
+ f"Predicted values with the prediction intervals of level {confidence_level}"
)
plt.subplots_adjust(wspace=0.3, hspace=0.3)
plt.show()
diff --git a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py
deleted file mode 100644
index 9fec3d91d..000000000
--- a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py
+++ /dev/null
@@ -1,114 +0,0 @@
-"""
-====================================
-Plotting CQR with symmetric argument
-====================================
-An example plot of :class:`~mapie.quantile_regression.MapieQuantileRegressor`
-illustrating the impact of the symmetry parameter.
-"""
-import numpy as np
-from matplotlib import pyplot as plt
-from sklearn.datasets import make_regression
-from sklearn.ensemble import GradientBoostingRegressor
-
-from mapie.metrics import regression_coverage_score
-from mapie.quantile_regression import MapieQuantileRegressor
-
-random_state = 2
-
-##############################################################################
-# We generate a synthetic data.
-
-X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=59)
-
-# Define alpha level
-alpha = 0.2
-
-# Fit a Gradient Boosting Regressor for quantile regression
-gb_reg = GradientBoostingRegressor(
- loss="quantile", alpha=0.5, random_state=random_state
-)
-
-# MAPIE Quantile Regressor
-mapie_qr = MapieQuantileRegressor(estimator=gb_reg, alpha=alpha)
-mapie_qr.fit(X, y, random_state=random_state)
-y_pred_sym, y_pis_sym = mapie_qr.predict(X, symmetry=True)
-y_pred_asym, y_pis_asym = mapie_qr.predict(X, symmetry=False)
-y_qlow = mapie_qr.estimators_[0].predict(X)
-y_qup = mapie_qr.estimators_[1].predict(X)
-
-# Calculate coverage scores
-coverage_score_sym = regression_coverage_score(
- y, y_pis_sym[:, 0], y_pis_sym[:, 1]
-)
-coverage_score_asym = regression_coverage_score(
- y, y_pis_asym[:, 0], y_pis_asym[:, 1]
-)
-
-# Sort the values for plotting
-order = np.argsort(X[:, 0])
-X_sorted = X[order]
-y_pred_sym_sorted = y_pred_sym[order]
-y_pis_sym_sorted = y_pis_sym[order]
-y_pred_asym_sorted = y_pred_asym[order]
-y_pis_asym_sorted = y_pis_asym[order]
-y_qlow = y_qlow[order]
-y_qup = y_qup[order]
-
-##############################################################################
-# We will plot the predictions and prediction intervals for both symmetric
-# and asymmetric intervals. The line represents the predicted values, the
-# dashed lines represent the prediction intervals, and the shaded area
-# represents the symmetric and asymmetric prediction intervals.
-
-plt.figure(figsize=(14, 7))
-
-plt.subplot(1, 2, 1)
-plt.xlabel("x")
-plt.ylabel("y")
-plt.scatter(X, y, alpha=0.3)
-plt.plot(X_sorted, y_qlow, color="C1")
-plt.plot(X_sorted, y_qup, color="C1")
-plt.plot(X_sorted, y_pis_sym_sorted[:, 0], color="C1", ls="--")
-plt.plot(X_sorted, y_pis_sym_sorted[:, 1], color="C1", ls="--")
-plt.fill_between(
- X_sorted.ravel(),
- y_pis_sym_sorted[:, 0].ravel(),
- y_pis_sym_sorted[:, 1].ravel(),
- alpha=0.2,
-)
-plt.title(
- f"Symmetric Intervals\n"
- f"Target and effective coverages for "
- f"alpha={alpha:.2f}: ({1-alpha:.3f}, {coverage_score_sym:.3f})"
-)
-
-# Plot asymmetric prediction intervals
-plt.subplot(1, 2, 2)
-plt.xlabel("x")
-plt.ylabel("y")
-plt.scatter(X, y, alpha=0.3)
-plt.plot(X_sorted, y_qlow, color="C2")
-plt.plot(X_sorted, y_qup, color="C2")
-plt.plot(X_sorted, y_pis_asym_sorted[:, 0], color="C2", ls="--")
-plt.plot(X_sorted, y_pis_asym_sorted[:, 1], color="C2", ls="--")
-plt.fill_between(
- X_sorted.ravel(),
- y_pis_asym_sorted[:, 0].ravel(),
- y_pis_asym_sorted[:, 1].ravel(),
- alpha=0.2,
-)
-plt.title(
- f"Asymmetric Intervals\n"
- f"Target and effective coverages for "
- f"alpha={alpha:.2f}: ({1-alpha:.3f}, {coverage_score_asym:.3f})"
-)
-plt.tight_layout()
-plt.show()
-
-##############################################################################
-# The symmetric intervals (`symmetry=True`) use a combined set of residuals
-# for both bounds, while the asymmetric intervals use distinct residuals for
-# each bound, allowing for more flexible and accurate intervals that reflect
-# the heteroscedastic nature of the data. The resulting effective coverages
-# demonstrate the theoretical guarantee of the target coverage level
-# ``1 - α``.
diff --git a/examples/regression/1-quickstart/plot_heteroscedastic_1d_data.py b/examples/regression/1-quickstart/plot_heteroscedastic_1d_data.py
index 28aedd9dc..f89ee5cde 100644
--- a/examples/regression/1-quickstart/plot_heteroscedastic_1d_data.py
+++ b/examples/regression/1-quickstart/plot_heteroscedastic_1d_data.py
@@ -1,14 +1,19 @@
"""
-============================================================
-Estimate the prediction intervals of 1D heteroscedastic data
-============================================================
+===========================================================================================
+Use MAPIE on data with uneven uncertainty
+===========================================================================================
-:class:`~mapie.regression.MapieRegressor` and
-:class:`~mapie.quantile_regression.MapieQuantileRegressor` is used
+
+:class:`~mapie_v1.regression.CrossConformalRegressor`,
+:class:`~mapie_v1.regression.JackknifeAfterBootstrapRegressor`,
+:class:`~mapie_v1.regression.ConformalizedQuantileRegressor` are used
to estimate the prediction intervals of 1D heteroscedastic data using
-different strategies. The latter class should provide the same
-coverage for a lower width of intervals because it adapts the prediction
-intervals to the local heteroscedastic noise.
+different strategies.
+
+The example clearly shows that
+:class:`~mapie_v1.regression.ConformalizedQuantileRegressor`
+should provide the same coverage for a lower width of intervals because it
+adapts the prediction intervals to the local heteroscedastic noise.
"""
from typing import Tuple
@@ -18,12 +23,16 @@
from sklearn.linear_model import LinearRegression, QuantileRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
+from sklearn.model_selection import train_test_split
-from mapie._typing import NDArray
-from mapie.regression import MapieQuantileRegressor, MapieRegressor
-from mapie.subsample import Subsample
+from numpy.typing import NDArray
+from mapie_v1.regression import (
+ CrossConformalRegressor,
+ JackknifeAfterBootstrapRegressor,
+ ConformalizedQuantileRegressor
+)
-random_state = 42
+RANDOM_STATE = 42
def f(x: NDArray) -> NDArray:
@@ -59,14 +68,14 @@ def get_heteroscedastic_data(
[3]: y_true
[4]: y_true_sigma
"""
- np.random.seed(random_state)
+ np.random.seed(RANDOM_STATE)
q95 = scipy.stats.norm.ppf(0.95)
X_train = np.linspace(0, 1, n_train)
X_true = np.linspace(0, 1, n_true)
y_train = f(X_train) + np.random.normal(0, sigma, n_train) * X_train
y_true = f(X_true)
y_true_sigma = q95 * sigma * X_true
- return X_train, y_train, X_true, y_true, y_true_sigma
+ return X_train.reshape(-1, 1), y_train, X_true.reshape(-1, 1), y_true, y_true_sigma
def plot_1d_data(
@@ -122,7 +131,9 @@ def plot_1d_data(
ax.legend()
-X_train, y_train, X_test, y_test, y_test_sigma = get_heteroscedastic_data()
+X_train_conformalize, y_train_conformalize, X_test, y_test, y_test_sigma = (
+ get_heteroscedastic_data()
+)
polyn_model = Pipeline(
[
@@ -141,47 +152,65 @@ def plot_1d_data(
)
STRATEGIES = {
- "jackknife": {"method": "base", "cv": -1},
- "jackknife_plus": {"method": "plus", "cv": -1},
- "jackknife_minmax": {"method": "minmax", "cv": -1},
- "cv_plus": {"method": "plus", "cv": 10},
- "jackknife_plus_ab": {"method": "plus", "cv": Subsample(n_resamplings=50)},
- "conformalized_quantile_regression": {"method": "quantile", "cv": "split"},
+ "cv_plus": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="plus", cv=10),
+ },
+ "jackknife": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="base", cv=-1),
+ },
+ "jackknife_plus": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="plus", cv=-1),
+ },
+ "jackknife_minmax": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="minmax", cv=-1),
+ },
+ "jackknife_plus_ab": {
+ "class": JackknifeAfterBootstrapRegressor,
+ "init_params": dict(method="plus", resampling=50),
+ },
+ "conformalized_quantile_regression": {
+ "class": ConformalizedQuantileRegressor,
+ "init_params": dict(),
+ },
}
fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(
2, 3, figsize=(3 * 6, 12)
)
axs = [ax1, ax2, ax3, ax4, ax5, ax6]
-for i, (strategy, params) in enumerate(STRATEGIES.items()):
- if strategy == "conformalized_quantile_regression":
- mapie = MapieQuantileRegressor( # type: ignore
- polyn_model_quant,
- **params
+for i, (strategy_name, strategy_params) in enumerate(STRATEGIES.items()):
+ init_params = strategy_params["init_params"]
+ class_ = strategy_params["class"]
+ if strategy_name == "conformalized_quantile_regression":
+ X_train, X_conformalize, y_train, y_conformalize = (
+ train_test_split(
+ X_train_conformalize, y_train_conformalize,
+ test_size=0.3, random_state=RANDOM_STATE
+ )
)
- mapie.fit(X_train.reshape(-1, 1), y_train, random_state=random_state)
- y_pred, y_pis = mapie.predict(X_test.reshape(-1, 1))
+ mapie = class_(polyn_model_quant, confidence_level=0.95, **init_params)
+ mapie.fit(X_train, y_train)
+ mapie.conformalize(X_conformalize, y_conformalize)
+ y_pred, y_pis = mapie.predict_interval(X_test)
else:
- mapie = MapieRegressor( # type: ignore
- polyn_model,
- agg_function="median",
- n_jobs=-1,
- **params
- )
- mapie.fit(X_train.reshape(-1, 1), y_train)
- y_pred, y_pis = mapie.predict(
- X_test.reshape(-1, 1),
- alpha=0.05,
+ mapie = class_(
+ polyn_model, confidence_level=0.95, random_state=RANDOM_STATE, **init_params
)
+ mapie.fit_conformalize(X_train_conformalize, y_train_conformalize)
+ y_pred, y_pis = mapie.predict_interval(X_test)
plot_1d_data(
- X_train,
- y_train,
- X_test,
+ X_train_conformalize.ravel(),
+ y_train_conformalize,
+ X_test.ravel(),
y_test,
y_test_sigma,
y_pred,
y_pis[:, 0, 0],
y_pis[:, 1, 0],
axs[i],
- strategy,
+ strategy_name,
)
plt.show()
diff --git a/examples/regression/1-quickstart/plot_homoscedastic_1d_data.py b/examples/regression/1-quickstart/plot_homoscedastic_1d_data.py
index 9340739af..504579248 100644
--- a/examples/regression/1-quickstart/plot_homoscedastic_1d_data.py
+++ b/examples/regression/1-quickstart/plot_homoscedastic_1d_data.py
@@ -1,11 +1,15 @@
"""
-==========================================================
-Estimate the prediction intervals of 1D homoscedastic data
-==========================================================
+=========================================================================================
+Use MAPIE on data with constant uncertainty
+=========================================================================================
-:class:`~mapie.regression.MapieRegressor` and
-:class:`~mapie.quantile_regression.MapieQuantileRegressor`
-is used to estimate the prediction intervals of 1D homoscedastic
+
+We show here how to use various MAPIE methods on data with homoscedastic data.
+
+:class:`~mapie_v1.regression.CrossConformalRegressor`,
+:class:`~mapie_v1.regression.JackknifeAfterBootstrapRegressor`,
+:class:`~mapie_v1.regression.ConformalizedQuantileRegressor`,
+are used to estimate the prediction intervals of 1D homoscedastic
data using different strategies.
"""
from typing import Tuple
@@ -16,12 +20,16 @@
from sklearn.linear_model import LinearRegression, QuantileRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
+from sklearn.model_selection import train_test_split
-from mapie._typing import NDArray
-from mapie.regression import MapieQuantileRegressor, MapieRegressor
-from mapie.subsample import Subsample
-random_state = 42
+from numpy.typing import NDArray
+from mapie_v1.regression import (
+ CrossConformalRegressor,
+ JackknifeAfterBootstrapRegressor,
+ ConformalizedQuantileRegressor
+)
+RANDOM_STATE = 42
def f(x: NDArray) -> NDArray:
@@ -57,14 +65,16 @@ def get_homoscedastic_data(
[3]: y_true
[4]: y_true_sigma
"""
- np.random.seed(random_state)
+ rng = np.random.default_rng(RANDOM_STATE)
q95 = scipy.stats.norm.ppf(0.95)
X_train = np.linspace(0, 1, n_train)
X_true = np.linspace(0, 1, n_true)
- y_train = f(X_train) + np.random.normal(0, sigma, n_train)
+ y_train = f(X_train) + rng.normal(0, sigma, n_train)
y_true = f(X_true)
y_true_sigma = np.full(len(y_true), q95 * sigma)
- return X_train, y_train, X_true, y_true, y_true_sigma
+ return (
+ X_train.reshape(-1, 1), y_train, X_true.reshape(-1, 1), y_true, y_true_sigma
+ )
def plot_1d_data(
@@ -120,7 +130,10 @@ def plot_1d_data(
ax.legend()
-X_train, y_train, X_test, y_test, y_test_sigma = get_homoscedastic_data()
+X_train_conformalize, y_train_conformalize, X_test, y_test, y_test_sigma = (
+ get_homoscedastic_data()
+)
+
polyn_model = Pipeline(
[
@@ -139,47 +152,66 @@ def plot_1d_data(
)
STRATEGIES = {
- "jackknife": {"method": "base", "cv": -1},
- "jackknife_plus": {"method": "plus", "cv": -1},
- "jackknife_minmax": {"method": "minmax", "cv": -1},
- "cv_plus": {"method": "plus", "cv": 10},
- "jackknife_plus_ab": {"method": "plus", "cv": Subsample(n_resamplings=50)},
- "conformalized_quantile_regression": {"method": "quantile", "cv": "split"},
+ "cv_plus": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="plus", cv=10),
+ },
+ "jackknife": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="base", cv=-1),
+ },
+ "jackknife_plus": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="plus", cv=-1),
+ },
+ "jackknife_minmax": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="minmax", cv=-1),
+ },
+ "jackknife_plus_ab": {
+ "class": JackknifeAfterBootstrapRegressor,
+ "init_params": dict(method="plus", resampling=50),
+ },
+ "conformalized_quantile_regression": {
+ "class": ConformalizedQuantileRegressor,
+ "init_params": dict(),
+ },
}
+
fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(
2, 3, figsize=(3 * 6, 12)
)
axs = [ax1, ax2, ax3, ax4, ax5, ax6]
-for i, (strategy, params) in enumerate(STRATEGIES.items()):
- if strategy == "conformalized_quantile_regression":
- mapie = MapieQuantileRegressor( # type: ignore
- polyn_model_quant,
- **params
+for i, (strategy_name, strategy_params) in enumerate(STRATEGIES.items()):
+ init_params = strategy_params["init_params"]
+ class_ = strategy_params["class"]
+ if strategy_name == "conformalized_quantile_regression":
+ X_train, X_conformalize, y_train, y_conformalize = (
+ train_test_split(
+ X_train_conformalize, y_train_conformalize,
+ test_size=0.3, random_state=RANDOM_STATE
+ )
)
- mapie.fit(X_train.reshape(-1, 1), y_train, random_state=random_state)
- y_pred, y_pis = mapie.predict(X_test.reshape(-1, 1))
+ mapie = class_(polyn_model_quant, confidence_level=0.95, **init_params)
+ mapie.fit(X_train, y_train)
+ mapie.conformalize(X_conformalize, y_conformalize)
+ y_pred, y_pis = mapie.predict_interval(X_test)
else:
- mapie = MapieRegressor( # type: ignore
- polyn_model,
- agg_function="median",
- n_jobs=-1,
- **params
- )
- mapie.fit(X_train.reshape(-1, 1), y_train)
- y_pred, y_pis = mapie.predict(
- X_test.reshape(-1, 1),
- alpha=0.05,
+ mapie = class_(
+ polyn_model, confidence_level=0.95, random_state=RANDOM_STATE, **init_params
)
+ mapie.fit_conformalize(X_train_conformalize, y_train_conformalize)
+ y_pred, y_pis = mapie.predict_interval(X_test)
plot_1d_data(
- X_train,
- y_train,
- X_test,
+ X_train_conformalize.ravel(),
+ y_train_conformalize,
+ X_test.ravel(),
y_test,
y_test_sigma,
y_pred,
y_pis[:, 0, 0],
y_pis[:, 1, 0],
axs[i],
- strategy,
+ strategy_name,
)
plt.show()
diff --git a/examples/regression/1-quickstart/plot_prefit.py b/examples/regression/1-quickstart/plot_prefit.py
index 91498c3ee..baaa1599f 100644
--- a/examples/regression/1-quickstart/plot_prefit.py
+++ b/examples/regression/1-quickstart/plot_prefit.py
@@ -1,19 +1,20 @@
"""
-===========================================================================
-Example use of the prefit parameter with neural networks and LGBM Regressor
-===========================================================================
+==========================================================================================================
+Use MAPIE with a pre-trained model
+==========================================================================================================
-:class:`~mapie.regression.MapieRegressor` and
-:class:`~mapie.quantile_regression.MapieQuantileRegressor`
-are used to calibrate uncertainties for large models for
+
+:class:`~mapie_v1.regression.SplitConformalRegressor` and
+:class:`~mapie_v1.regression.ConformalizedQuantileRegressor`
+are used to conformalize uncertainties for large models for
which the cost of cross-validation is too high. Typically,
neural networks rely on a single validation set.
In this example, we first fit a neural network on the training set. We
-then compute residuals on a validation set with the `cv="prefit"` parameter.
+then compute residuals on a validation set with the ``prefit=True`` parameter.
Finally, we evaluate the model with prediction intervals on a testing set.
-We will also show how to use the prefit method in the conformalized quantile
-regressor.
+In a second part, we will also show how to use the prefit method in the
+conformalized quantile regressor.
"""
@@ -23,24 +24,24 @@
import scipy
from lightgbm import LGBMRegressor
from matplotlib import pyplot as plt
-from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
-from mapie._typing import NDArray
-from mapie.metrics import regression_coverage_score
-from mapie.regression import MapieQuantileRegressor, MapieRegressor
+from numpy.typing import NDArray
+from mapie.metrics.regression import regression_coverage_score
+from mapie_v1.regression import SplitConformalRegressor, ConformalizedQuantileRegressor
+from mapie_v1.utils import train_conformalize_test_split
warnings.filterwarnings("ignore")
-alpha = 0.1
+RANDOM_STATE = 1
+confidence_level = 0.9
##############################################################################
-# 1. Generate dataset
-# -----------------------------------------------------------------------------
+#
#
# We start by defining a function that we will use to generate data. We then
# add random noise to the y values. Then we split the dataset to have
-# a training, calibration and test set.
+# a training, conformalize and test set.
def f(x: NDArray) -> NDArray:
@@ -49,90 +50,68 @@ def f(x: NDArray) -> NDArray:
# Generate data
+rng = np.random.default_rng(59)
sigma = 0.1
n_samples = 10000
X = np.linspace(0, 1, n_samples)
-y = f(X) + np.random.normal(0, sigma, n_samples)
+y = f(X) + rng.normal(0, sigma, n_samples)
-# Train/validation/test split
-X_train_cal, X_test, y_train_cal, y_test = train_test_split(
- X, y, test_size=1 / 10
-)
-X_train, X_cal, y_train, y_cal = train_test_split(
- X_train_cal, y_train_cal, test_size=1 / 9
+# Train/conformalize/test split
+(
+ X_train, X_conformalize, X_test, y_train, y_conformalize, y_test
+) = train_conformalize_test_split(
+ X, y,
+ train_size=0.8, conformalize_size=0.1, test_size=0.1,
+ random_state=RANDOM_STATE
)
##############################################################################
-# 2. Pre-train models
+# 1. Use a neural network
+# -----------------------------------------------------------------------------
+# 1.1 Pre-train a neural network
# -----------------------------------------------------------------------------
#
# For this example, we will train a
# :class:`~sklearn.neural_network.MLPRegressor` for
-# :class:`~mapie.regression.MapieRegressor` and multiple LGBMRegressor with a
-# quantile objective as this is a requirement to perform conformalized
-# quantile regression using
-# :class:`~mapie.quanitle_regression.MapieQuantileRegressor`. Note that the
-# three estimators need to be trained at quantile values of
-# ``(α/2, 1-(α/2), 0.5)``.
+# :class:`~mapie_v1.regression.SplitConformalRegressor`.
-# Train a MLPRegressor for MapieRegressor
-est_mlp = MLPRegressor(activation="relu", random_state=1)
+# Train a MLPRegressor for SplitConformalRegressor
+est_mlp = MLPRegressor(activation="relu", random_state=RANDOM_STATE)
est_mlp.fit(X_train.reshape(-1, 1), y_train)
-# Train LGBMRegressor models for MapieQuantileRegressor
-list_estimators_cqr = []
-for alpha_ in [alpha / 2, (1 - (alpha / 2)), 0.5]:
- estimator_ = LGBMRegressor(
- objective='quantile',
- alpha=alpha_,
- )
- estimator_.fit(X_train.reshape(-1, 1), y_train)
- list_estimators_cqr.append(estimator_)
-
##############################################################################
-# 3. Using MAPIE to calibrate the models
+# 1.2 Use MAPIE to conformalize the models
# -----------------------------------------------------------------------------
#
-# We will now proceed to calibrate the models using MAPIE. To this aim, we set
-# `cv="prefit"` so that we use the models that we already trained prior.
-# We then precict using the test set and evaluate its coverage.
-
-
-# Calibrate uncertainties on calibration set
-mapie = MapieRegressor(est_mlp, cv="prefit")
-mapie.fit(X_cal.reshape(-1, 1), y_cal)
+# We will now proceed to conformalize the models using MAPIE. To this aim, we set
+# `prefit=True` so that we use the model that we already trained prior.
+# We then predict using the test set and evaluate its coverage.
-# Evaluate prediction and coverage level on testing set
-y_pred, y_pis = mapie.predict(X_test.reshape(-1, 1), alpha=alpha)
-coverage = regression_coverage_score(y_test, y_pis[:, 0, 0], y_pis[:, 1, 0])
-# Calibrate uncertainties on calibration set
-mapie_cqr = MapieQuantileRegressor(list_estimators_cqr, cv="prefit")
-mapie_cqr.fit(X_cal.reshape(-1, 1), y_cal)
+# Conformalize uncertainties on conformalize set
+mapie = SplitConformalRegressor(
+ estimator=est_mlp, confidence_level=confidence_level, prefit=True
+)
+mapie.conformalize(X_conformalize.reshape(-1, 1), y_conformalize)
# Evaluate prediction and coverage level on testing set
-y_pred_cqr, y_pis_cqr = mapie_cqr.predict(X_test.reshape(-1, 1))
-coverage_cqr = regression_coverage_score(
- y_test,
- y_pis_cqr[:, 0, 0],
- y_pis_cqr[:, 1, 0]
-)
+y_pred, y_pis = mapie.predict_interval(X_test.reshape(-1, 1))
+coverage = regression_coverage_score(y_test, y_pis)[0]
##############################################################################
-# 4. Plots
+# 1.3 Plot results
# -----------------------------------------------------------------------------
#
-# In order to view the results shown above, we will plot each other predictions
-# with their prediction interval. The multi-layer perceptron (MLP) with
-# :class:`~mapie.regression.MapieRegressor` and LGBMRegressor with
-# :class:`~mapie.quantile_regression.MapieQuantileRegressor`.
+# In order to view the results, we will plot the predictions of the
+# the multi-layer perceptron (MLP) with their prediction intervals calculated with
+# :class:`~mapie_v1.regression.SplitConformalRegressor`.
# Plot obtained prediction intervals on testing set
-theoretical_semi_width = scipy.stats.norm.ppf(1 - alpha) * sigma
+theoretical_semi_width = scipy.stats.norm.ppf(1 - confidence_level) * sigma
y_test_theoretical = f(X_test)
order = np.argsort(X_test)
@@ -148,9 +127,106 @@ def f(x: NDArray) -> NDArray:
y_pis[:, 0, 0][order],
y_pis[:, 1, 0][order],
alpha=0.4,
- label="prediction intervals MP",
+ label="prediction intervals SCR",
color="green"
)
+
+plt.title(
+ f"Target and effective coverages for:\n "
+ f"MLP with SplitConformalRegressor, confidence_level={confidence_level}: "
+ + f"(coverage is {coverage:.3f})\n"
+)
+plt.scatter(X_test, y_test, color="red", alpha=0.7, label="testing", s=2)
+plt.plot(
+ X_test[order],
+ y_test_theoretical[order],
+ color="gray",
+ label="True confidence intervals",
+)
+plt.plot(
+ X_test[order],
+ y_test_theoretical[order] - theoretical_semi_width,
+ color="gray",
+ ls="--",
+)
+plt.plot(
+ X_test[order],
+ y_test_theoretical[order] + theoretical_semi_width,
+ color="gray",
+ ls="--",
+)
+plt.xlabel("x")
+plt.ylabel("y")
+plt.legend(
+ loc='upper center',
+ bbox_to_anchor=(0.5, -0.05),
+ fancybox=True,
+ shadow=True,
+ ncol=3
+)
+plt.show()
+
+
+##############################################################################
+# 2. Use LGBM models
+# -----------------------------------------------------------------------------
+# 2.1 Pre-train LGBM models
+# -----------------------------------------------------------------------------
+#
+# For this example, we will train multiple LGBMRegressor with a
+# quantile objective as this is a requirement to perform conformalized
+# quantile regression using
+# :class:`~mapie_v1.regression.ConformalizedQuantileRegressor`. Note that the
+# three estimators need to be trained at quantile values of
+# ``(1+confidence_level)/2, (1-confidence_level)/2, 0.5)``.
+
+# Train LGBMRegressor models for MapieQuantileRegressor
+list_estimators_cqr = []
+for alpha_ in [(1 - confidence_level) / 2, (1 + confidence_level) / 2, 0.5]:
+ estimator_ = LGBMRegressor(
+ objective='quantile',
+ alpha=alpha_,
+ )
+ estimator_.fit(X_train.reshape(-1, 1), y_train)
+ list_estimators_cqr.append(estimator_)
+
+##############################################################################
+# 2.2 Use MAPIE to conformalize the models
+# -----------------------------------------------------------------------------
+#
+# We will now proceed to conformalize the models using MAPIE. To this aim, we set
+# `prefit=True` so that we use the models that we already trained prior.
+# We then predict using the test set and evaluate its coverage.
+
+# Conformalize uncertainties on conformalize set
+mapie_cqr = ConformalizedQuantileRegressor(
+ list_estimators_cqr, confidence_level=0.9, prefit=True
+)
+mapie_cqr.conformalize(X_conformalize.reshape(-1, 1), y_conformalize)
+
+# Evaluate prediction and coverage level on testing set
+y_pred_cqr, y_pis_cqr = mapie_cqr.predict_interval(X_test.reshape(-1, 1))
+coverage_cqr = regression_coverage_score(
+ y_test,
+ y_pis_cqr
+)[0]
+
+
+##############################################################################
+# 2.3 Plot results
+# -----------------------------------------------------------------------------
+#
+# As fdor the MLP predictions, we plot the predictions of the LGBMRegressor
+# with their prediction intervals calculated with
+# :class:`~mapie_v1.regression.ConformalizedQuantileRegressor`.
+
+# Plot obtained prediction intervals on testing set
+theoretical_semi_width = scipy.stats.norm.ppf(1 - confidence_level) * sigma
+y_test_theoretical = f(X_test)
+order = np.argsort(X_test)
+
+plt.figure(figsize=(8, 8))
+
plt.plot(
X_test[order],
y_pred_cqr[order],
@@ -162,15 +238,13 @@ def f(x: NDArray) -> NDArray:
y_pis_cqr[:, 0, 0][order],
y_pis_cqr[:, 1, 0][order],
alpha=0.4,
- label="prediction intervals MQP",
+ label="prediction intervals CQR",
color="blue"
)
plt.title(
f"Target and effective coverages for:\n "
- f"MLP with MapieRegressor alpha={alpha}: "
- + f"({1 - alpha:.3f}, {coverage:.3f})\n"
- f"LGBM with MapieQuantileRegressor alpha={alpha}: "
- + f"({1 - alpha:.3f}, {coverage_cqr:.3f})"
+ f"LGBM with ConformalizedQuantileRegressor, confidence_level={confidence_level}: "
+ + f"(coverage is {coverage_cqr:.3f})"
)
plt.scatter(X_test, y_test, color="red", alpha=0.7, label="testing", s=2)
plt.plot(
diff --git a/examples/regression/1-quickstart/plot_timeseries_example.py b/examples/regression/1-quickstart/plot_timeseries_example.py
deleted file mode 100644
index f1027fe31..000000000
--- a/examples/regression/1-quickstart/plot_timeseries_example.py
+++ /dev/null
@@ -1,114 +0,0 @@
-"""
-=======================================================
-Estimating prediction intervals of time series forecast
-=======================================================
-This example uses :class:`~mapie.regression.MapieRegressor` to estimate
-prediction intervals associated with time series forecast. We use the
-standard cross-validation approach to estimate conformity scores and associated
-prediction intervals.
-
-We use here the Victoria electricity demand dataset used in the book
-"Forecasting: Principles and Practice" by R. J. Hyndman and G. Athanasopoulos.
-The electricity demand features daily and weekly seasonalities and is impacted
-by the temperature, considered here as a exogeneous variable.
-
-The data is modelled by a Random Forest model with a
-:class:`~sklearn.model_selection.RandomizedSearchCV` using a sequential
-:class:`~sklearn.model_selection.TimeSeriesSplit` cross validation, in which
-the training set is prior to the validation set.
-The best model is then feeded into :class:`~mapie.regression.MapieRegressor`
-to estimate the associated prediction intervals.
-We consider the standard CV+ resampling method.
-
-We wish to emphasize one main limitation with this example.
-We use a standard cross-validation in Mapie to estimate the prediction
-intervals, through the `sklearn.model_selection.KFold()` object.
-Residuals are therefore estimated using models trained on data with higher
-indices than the validation data, which is inappropriate for time-series data.
-However, using a `sklearn.model_selection.TimeSeriesSplit` cross validation
-object for estimating the residuals breaks the theoretical guarantees of the
-Jackknife+ and CV+ methods.
-"""
-import pandas as pd
-from matplotlib import pylab as plt
-from scipy.stats import randint
-from sklearn.ensemble import RandomForestRegressor
-from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit
-
-from mapie.metrics import (regression_coverage_score,
- regression_mean_width_score)
-from mapie.regression import MapieRegressor
-
-# Load input data and feature engineering
-demand_df = pd.read_csv(
- "../../data/demand_temperature.csv", parse_dates=True, index_col=0
-)
-demand_df["Date"] = pd.to_datetime(demand_df.index)
-demand_df["Weekofyear"] = demand_df.Date.dt.isocalendar().week.astype("int64")
-demand_df["Weekday"] = demand_df.Date.dt.isocalendar().day.astype("int64")
-demand_df["Hour"] = demand_df.index.hour
-
-# Train/validation/test split
-num_test_steps = 24 * 7 * 2
-demand_train = demand_df.iloc[:-num_test_steps, :].copy()
-demand_test = demand_df.iloc[-num_test_steps:, :].copy()
-X_train = demand_train.loc[:, ["Weekofyear", "Weekday", "Hour", "Temperature"]]
-y_train = demand_train["Demand"]
-X_test = demand_test.loc[:, ["Weekofyear", "Weekday", "Hour", "Temperature"]]
-y_test = demand_test["Demand"]
-
-# CV parameter search
-n_iter = 10
-n_splits = 5
-tscv = TimeSeriesSplit(n_splits=n_splits)
-random_state = 59
-rf_model = RandomForestRegressor(random_state=random_state)
-rf_params = {"max_depth": randint(2, 30), "n_estimators": randint(10, 1e3)}
-cv_obj = RandomizedSearchCV(
- rf_model,
- param_distributions=rf_params,
- n_iter=n_iter,
- cv=tscv,
- scoring="neg_root_mean_squared_error",
- random_state=random_state,
- verbose=0,
- n_jobs=-1,
-)
-cv_obj.fit(X_train, y_train)
-best_est = cv_obj.best_estimator_
-
-# Estimate prediction intervals on test set with best estimator
-# Here, a non-nested CV approach is used for the sake of computational
-# time, but a nested CV approach is preferred.
-# See the dedicated example in the gallery for more information.
-alpha = 0.1
-mapie = MapieRegressor(
- best_est, method="plus", cv=n_splits, agg_function="median", n_jobs=-1
-)
-mapie.fit(X_train, y_train)
-y_pred, y_pis = mapie.predict(X_test, alpha=alpha)
-coverage = regression_coverage_score(y_test, y_pis[:, 0, 0], y_pis[:, 1, 0])
-width = regression_mean_width_score(y_pis[:, 0, 0], y_pis[:, 1, 0])
-
-# Print results
-print(
- "Coverage and prediction interval width mean for CV+: "
- f"{coverage:.3f}, {width:.3f}"
-)
-
-# Plot estimated prediction intervals on test set
-fig = plt.figure(figsize=(15, 5))
-ax = fig.add_subplot(1, 1, 1)
-ax.set_ylabel("Hourly demand (GW)")
-ax.plot(demand_test.Demand, lw=2, label="Test data", c="C1")
-ax.plot(demand_test.index, y_pred, lw=2, c="C2", label="Predictions")
-ax.fill_between(
- demand_test.index,
- y_pis[:, 0, 0],
- y_pis[:, 1, 0],
- color="C2",
- alpha=0.2,
- label="CV+ PIs",
-)
-ax.legend()
-plt.show()
diff --git a/examples/regression/1-quickstart/plot_toy_model.py b/examples/regression/1-quickstart/plot_toy_model.py
index 0435801d5..60148b267 100644
--- a/examples/regression/1-quickstart/plot_toy_model.py
+++ b/examples/regression/1-quickstart/plot_toy_model.py
@@ -1,51 +1,57 @@
"""
-======================================================
-Plotting MAPIE prediction intervals with a toy dataset
-======================================================
-An example plot of :class:`~mapie.regression.MapieRegressor` used
+=====================================================================================
+Use MAPIE to plot prediction intervals
+=====================================================================================
+An example plot of :class:`~mapie_v1.regression.SplitConformalRegressor` used
in the Quickstart.
"""
import numpy as np
from matplotlib import pyplot as plt
from sklearn.datasets import make_regression
-from sklearn.linear_model import LinearRegression
-from mapie.metrics import regression_coverage_score
-from mapie.regression import MapieRegressor
+from mapie.metrics.regression import regression_coverage_score
+from mapie_v1.regression import SplitConformalRegressor
+from mapie_v1.utils import train_conformalize_test_split
RANDOM_STATE = 42
-regressor = LinearRegression()
-X, y = make_regression(
- n_samples=500, n_features=1, noise=20, random_state=RANDOM_STATE
+
+X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=RANDOM_STATE)
+
+(
+ X_train, X_conformalize, X_test, y_train, y_conformalize, y_test
+) = train_conformalize_test_split(
+ X, y,
+ train_size=0.6, conformalize_size=0.2, test_size=0.2,
+ random_state=RANDOM_STATE
)
-alpha = [0.05, 0.32]
-mapie = MapieRegressor(regressor, method="plus", random_state=RANDOM_STATE)
-mapie.fit(X, y)
-y_pred, y_pis = mapie.predict(X, alpha=alpha)
+confidence_level = [0.95, 0.68]
+mapie_regressor = SplitConformalRegressor(
+ confidence_level=confidence_level, prefit=False
+)
+mapie_regressor.fit(X_train, y_train)
+mapie_regressor.conformalize(X_conformalize, y_conformalize)
+y_pred, y_pred_interval = mapie_regressor.predict_interval(X_test)
-coverage_scores = [
- regression_coverage_score(y, y_pis[:, 0, i], y_pis[:, 1, i])
- for i, _ in enumerate(alpha)
-]
+coverage_scores = regression_coverage_score(y_test, y_pred_interval)
plt.xlabel("x")
plt.ylabel("y")
-plt.scatter(X, y, alpha=0.3)
-plt.plot(X, y_pred, color="C1")
-order = np.argsort(X[:, 0])
-plt.plot(X[order], y_pis[order][:, 0, 1], color="C1", ls="--")
-plt.plot(X[order], y_pis[order][:, 1, 1], color="C1", ls="--")
+plt.scatter(X_test, y_test, alpha=0.3)
+plt.plot(X_test, y_pred, color="C1")
+order = np.argsort(X_test[:, 0])
+plt.plot(X_test[order], y_pred_interval[order][:, 0, 1], color="C1", ls="--")
+plt.plot(X_test[order], y_pred_interval[order][:, 1, 1], color="C1", ls="--")
plt.fill_between(
- X[order].ravel(),
- y_pis[order][:, 0, 0].ravel(),
- y_pis[order][:, 1, 0].ravel(),
+ X_test[order].ravel(),
+ y_pred_interval[order][:, 0, 0].ravel(),
+ y_pred_interval[order][:, 1, 0].ravel(),
alpha=0.2,
)
plt.title(
- f"Target and effective coverages for "
- f"alpha={alpha[0]:.2f}: ({1-alpha[0]:.3f}, {coverage_scores[0]:.3f})\n"
- f"Target and effective coverages for "
- f"alpha={alpha[1]:.2f}: ({1-alpha[1]:.3f}, {coverage_scores[1]:.3f})"
+ f"Effective coverage for "
+ f"confidence_level={confidence_level[0]:.2f}: {coverage_scores[0]:.3f}\n"
+ f"Effective coverage for "
+ f"confidence_level={confidence_level[1]:.2f}: {coverage_scores[1]:.3f}"
)
plt.show()
diff --git a/examples/regression/4-tutorials/plot_ts-tutorial.py b/examples/regression/1-quickstart/plot_ts-tutorial.py
similarity index 91%
rename from examples/regression/4-tutorials/plot_ts-tutorial.py
rename to examples/regression/1-quickstart/plot_ts-tutorial.py
index 13dde284e..c71ca949a 100644
--- a/examples/regression/4-tutorials/plot_ts-tutorial.py
+++ b/examples/regression/1-quickstart/plot_ts-tutorial.py
@@ -4,7 +4,7 @@
========================
In this tutorial we describe how to use
-:class:`~mapie.time_series_regression.MapieTimeSeriesRegressor`
+:class:`~mapie.regression.MapieTimeSeriesRegressor`
to estimate prediction intervals associated with time series forecast.
Here, we use the Victoria electricity demand dataset used in the book
@@ -24,7 +24,8 @@
the EnbPI method.
As its parent class :class:`~MapieRegressor`,
-:class:`~MapieTimeSeriesRegressor` has two main arguments : "cv", and "method".
+:class:`~mapie.regression.MapieTimeSeriesRegressor` has two main arguments :
+"cv", and "method".
In order to implement EnbPI, "method" must be set to "enbpi" (the default
value) while "cv" must be set to the :class:`~mapie.subsample.BlockBootstrap`
class that block bootstraps the training set.
@@ -34,8 +35,8 @@ class that block bootstraps the training set.
The EnbPI method allows you update the residuals during the prediction,
each time new observations are available so that the deterioration of
predictions, or the increase of noise level, can be dynamically taken into
-account. It can be done with :class:`~MapieTimeSeriesRegressor` through
-the ``partial_fit`` class method called at every step.
+account. It can be done with :class:`~mapie.regression.MapieTimeSeriesRegressor`
+through the ``partial_fit`` class method called at every step.
The ACI strategy allows you to adapt the conformal inference
@@ -55,8 +56,10 @@ class that block bootstraps the training set.
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit
-from mapie.metrics import (coverage_width_based, regression_coverage_score,
- regression_mean_width_score)
+from mapie.metrics.regression import (
+ regression_coverage_score,
+ regression_mean_width_score, coverage_width_based,
+)
from mapie.regression import MapieTimeSeriesRegressor
from mapie.subsample import BlockBootstrap
@@ -210,13 +213,13 @@ class that block bootstraps the training set.
mapie_enbpi = mapie_enbpi.fit(X_train, y_train)
y_pred_enbpi_npfit, y_pis_enbpi_npfit = mapie_enbpi.predict(
- X_test, alpha=alpha, ensemble=True, optimize_beta=True,
+ X_test, alpha=alpha, ensemble=True,
allow_infinite_bounds=True
)
y_pis_enbpi_npfit = np.clip(y_pis_enbpi_npfit, 1, 10)
coverage_enbpi_npfit = regression_coverage_score(
- y_test, y_pis_enbpi_npfit[:, 0, 0], y_pis_enbpi_npfit[:, 1, 0]
-)
+ y_test, y_pis_enbpi_npfit
+)[0]
width_enbpi_npfit = regression_mean_width_score(
y_pis_enbpi_npfit[:, 0, 0], y_pis_enbpi_npfit[:, 1, 0]
)
@@ -233,7 +236,7 @@ class that block bootstraps the training set.
y_pred_aci_npfit = np.zeros(y_pred_enbpi_npfit.shape)
y_pis_aci_npfit = np.zeros(y_pis_enbpi_npfit.shape)
y_pred_aci_npfit[:gap], y_pis_aci_npfit[:gap, :, :] = mapie_aci.predict(
- X_test.iloc[:gap, :], alpha=alpha, ensemble=True, optimize_beta=True,
+ X_test.iloc[:gap, :], alpha=alpha, ensemble=True,
allow_infinite_bounds=True
)
for step in range(gap, len(X_test), gap):
@@ -249,7 +252,6 @@ class that block bootstraps the training set.
X_test.iloc[step:(step + gap), :],
alpha=alpha,
ensemble=True,
- optimize_beta=True,
allow_infinite_bounds=True
)
y_pis_aci_npfit[step:step + gap, :, :] = np.clip(
@@ -257,8 +259,8 @@ class that block bootstraps the training set.
)
coverage_aci_npfit = regression_coverage_score(
- y_test, y_pis_aci_npfit[:, 0, 0], y_pis_aci_npfit[:, 1, 0]
-)
+ y_test, y_pis_aci_npfit
+)[0]
width_aci_npfit = regression_mean_width_score(
y_pis_aci_npfit[:, 0, 0], y_pis_aci_npfit[:, 1, 0]
)
@@ -284,7 +286,7 @@ class that block bootstraps the training set.
y_pred_enbpi_pfit = np.zeros(y_pred_enbpi_npfit.shape)
y_pis_enbpi_pfit = np.zeros(y_pis_enbpi_npfit.shape)
y_pred_enbpi_pfit[:gap], y_pis_enbpi_pfit[:gap, :, :] = mapie_enbpi.predict(
- X_test.iloc[:gap, :], alpha=alpha, ensemble=True, optimize_beta=True,
+ X_test.iloc[:gap, :], alpha=alpha, ensemble=True,
allow_infinite_bounds=True
)
@@ -300,15 +302,14 @@ class that block bootstraps the training set.
X_test.iloc[step:(step + gap), :],
alpha=alpha,
ensemble=True,
- optimize_beta=True,
allow_infinite_bounds=True
)
y_pis_enbpi_pfit[step:step + gap, :, :] = np.clip(
y_pis_enbpi_pfit[step:step + gap, :, :], 1, 10
)
coverage_enbpi_pfit = regression_coverage_score(
- y_test, y_pis_enbpi_pfit[:, 0, 0], y_pis_enbpi_pfit[:, 1, 0]
-)
+ y_test, y_pis_enbpi_pfit
+)[0]
width_enbpi_pfit = regression_mean_width_score(
y_pis_enbpi_pfit[:, 0, 0], y_pis_enbpi_pfit[:, 1, 0]
)
@@ -332,7 +333,7 @@ class that block bootstraps the training set.
y_pred_aci_pfit = np.zeros(y_pred_aci_npfit.shape)
y_pis_aci_pfit = np.zeros(y_pis_aci_npfit.shape)
y_pred_aci_pfit[:gap], y_pis_aci_pfit[:gap, :, :] = mapie_aci.predict(
- X_test.iloc[:gap, :], alpha=alpha, ensemble=True, optimize_beta=True,
+ X_test.iloc[:gap, :], alpha=alpha, ensemble=True,
allow_infinite_bounds=True
)
@@ -353,7 +354,6 @@ class that block bootstraps the training set.
X_test.iloc[step:(step + gap), :],
alpha=alpha,
ensemble=True,
- optimize_beta=True,
allow_infinite_bounds=True
)
y_pis_aci_pfit[step:step + gap, :, :] = np.clip(
@@ -361,8 +361,8 @@ class that block bootstraps the training set.
)
coverage_aci_pfit = regression_coverage_score(
- y_test, y_pis_aci_pfit[:, 0, 0], y_pis_aci_pfit[:, 1, 0]
-)
+ y_test, y_pis_aci_pfit
+)[0]
width_aci_pfit = regression_mean_width_score(
y_pis_aci_pfit[:, 0, 0], y_pis_aci_pfit[:, 1, 0]
)
@@ -463,28 +463,24 @@ class that block bootstraps the training set.
for i in range(window, len(y_test), 1):
rolling_coverage_aci_npfit.append(
regression_coverage_score(
- y_test[i-window:i], y_pis_aci_npfit[i-window:i, 0, 0],
- y_pis_aci_npfit[i-window:i, 1, 0]
- )
+ y_test[i-window:i], y_pis_aci_npfit[i-window:i]
+ )[0]
)
rolling_coverage_aci_pfit.append(
regression_coverage_score(
- y_test[i-window:i], y_pis_aci_pfit[i-window:i, 0, 0],
- y_pis_aci_pfit[i-window:i, 1, 0]
- )
+ y_test[i-window:i], y_pis_aci_pfit[i-window:i]
+ )[0]
)
rolling_coverage_enbpi_npfit.append(
regression_coverage_score(
- y_test[i-window:i], y_pis_enbpi_npfit[i-window:i, 0, 0],
- y_pis_enbpi_npfit[i-window:i, 1, 0]
- )
+ y_test[i-window:i], y_pis_enbpi_npfit[i-window:i]
+ )[0]
)
rolling_coverage_enbpi_pfit.append(
regression_coverage_score(
- y_test[i-window:i], y_pis_enbpi_pfit[i-window:i, 0, 0],
- y_pis_enbpi_pfit[i-window:i, 1, 0]
- )
+ y_test[i-window:i], y_pis_enbpi_pfit[i-window:i]
+ )[0]
)
plt.figure(figsize=(10, 5))
diff --git a/examples/regression/2-advanced-analysis/README.rst b/examples/regression/2-advanced-analysis/README.rst
index 9e0516c84..af6482625 100644
--- a/examples/regression/2-advanced-analysis/README.rst
+++ b/examples/regression/2-advanced-analysis/README.rst
@@ -1,5 +1,7 @@
.. _regression_examples_2:
+-----
+
2. Advanced analysis
--------------------
diff --git a/examples/regression/2-advanced-analysis/plot-coverage-width-based-criterion.py b/examples/regression/2-advanced-analysis/plot-coverage-width-based-criterion.py
index f9c7bdfb2..4425ff769 100644
--- a/examples/regression/2-advanced-analysis/plot-coverage-width-based-criterion.py
+++ b/examples/regression/2-advanced-analysis/plot-coverage-width-based-criterion.py
@@ -1,13 +1,16 @@
"""
-================================================
-Estimating coverage width based criterion
-================================================
-This example uses :class:`~mapie.regression.MapieRegressor`,
-:class:`~mapie.quantile_regression.MapieQuantileRegressor` and
+===============================================================================
+Focus on intervals width
+===============================================================================
+
+
+This example uses :class:`~mapie_v1.regression.CrossConformalRegressor`,
+:class:`~mapie_v1.regression.ConformalizedQuantileRegressor` and
+:class:`~mapie_v1.regression.JackknifeAfterBootstrapRegressor`.
:class:`~mapie.metrics` is used to estimate the coverage width
based criterion of 1D homoscedastic data using different strategies.
The coverage width based criterion is computed with the function
-:func:`~mapie.metrics.coverage_width_based()`
+:func:`~mapie.metrics.coverage_width_based`
"""
import os
@@ -19,16 +22,22 @@
from sklearn.linear_model import LinearRegression, QuantileRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
+from sklearn.model_selection import train_test_split
-from mapie.metrics import (coverage_width_based, regression_coverage_score,
- regression_mean_width_score)
-from mapie.regression import MapieQuantileRegressor, MapieRegressor
-from mapie.subsample import Subsample
+from mapie.metrics.regression import (
+ regression_coverage_score,
+ regression_mean_width_score, coverage_width_based,
+)
+from mapie_v1.regression import (
+ CrossConformalRegressor,
+ ConformalizedQuantileRegressor,
+ JackknifeAfterBootstrapRegressor)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
warnings.filterwarnings("ignore")
+RANDOM_STATE = 1
##############################################################################
# Estimating the aleatoric uncertainty of heteroscedastic noisy data
# ---------------------------------------------------------------------
@@ -37,6 +46,7 @@
# function that generates one-dimensional data with normal noise uniformely
# in a given interval.
+
def x_sinx(x):
"""One-dimensional x*sin(x) function."""
return x*np.sin(x)
@@ -74,7 +84,7 @@ def get_1d_data_with_heteroscedastic_noise(
min_x, max_x, n_samples, noise = 0, 5, 300, 0.5
(
- X_train, y_train, X_test, y_test, y_mesh
+ X_train_conformalize, y_train_conformalize, X_test, y_test, y_mesh
) = get_1d_data_with_heteroscedastic_noise(
x_sinx, min_x, max_x, n_samples, noise
)
@@ -85,7 +95,7 @@ def get_1d_data_with_heteroscedastic_noise(
plt.xlabel("x")
plt.ylabel("y")
-plt.scatter(X_train, y_train, color="C0")
+plt.scatter(X_train_conformalize, y_train_conformalize, color="C0")
plt.plot(X_test, y_mesh, color="C1")
plt.show()
@@ -118,35 +128,73 @@ def get_1d_data_with_heteroscedastic_noise(
# 0.05 in order to obtain a 95% confidence for our prediction intervals.
STRATEGIES = {
- "naive": dict(method="naive"),
- "jackknife": dict(method="base", cv=-1),
- "jackknife_plus": dict(method="plus", cv=-1),
- "jackknife_minmax": dict(method="minmax", cv=-1),
- "cv": dict(method="base", cv=10),
- "cv_plus": dict(method="plus", cv=10),
- "cv_minmax": dict(method="minmax", cv=10),
- "jackknife_plus_ab": dict(method="plus", cv=Subsample(n_resamplings=50)),
- "conformalized_quantile_regression": dict(
- method="quantile", cv="split", alpha=0.05
- )
+ "cv": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="base", cv=10),
+ },
+ "cv_plus": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="plus", cv=10),
+ },
+ "cv_minmax": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="minmax", cv=10),
+ },
+ "jackknife": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="base", cv=-1),
+ },
+ "jackknife_plus": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="plus", cv=-1),
+ },
+ "jackknife_minmax": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="minmax", cv=-1),
+ },
+ "jackknife_plus_ab": {
+ "class": JackknifeAfterBootstrapRegressor,
+ "init_params": dict(method="plus", resampling=50),
+ },
+ "jackknife_minmax_ab": {
+ "class": JackknifeAfterBootstrapRegressor,
+ "init_params": dict(method="minmax", resampling=50),
+ },
+ "conformalized_quantile_regression": {
+ "class": ConformalizedQuantileRegressor,
+ "init_params": dict(),
+ },
}
+
+
y_pred, y_pis = {}, {}
-for strategy, params in STRATEGIES.items():
- if strategy == "conformalized_quantile_regression":
- mapie = MapieQuantileRegressor(polyn_model_quant, **params)
- mapie.fit(X_train, y_train, random_state=1)
- y_pred[strategy], y_pis[strategy] = mapie.predict(X_test)
- else:
- mapie = MapieRegressor(polyn_model, **params)
+for strategy_name, strategy_params in STRATEGIES.items():
+ init_params = strategy_params["init_params"]
+ class_ = strategy_params["class"]
+ if strategy_name == "conformalized_quantile_regression":
+ X_train, X_conformalize, y_train, y_conformalize = (
+ train_test_split(
+ X_train_conformalize, y_train_conformalize,
+ test_size=0.3, random_state=RANDOM_STATE
+ )
+ )
+ mapie = class_(polyn_model_quant, confidence_level=0.95, **init_params)
mapie.fit(X_train, y_train)
- y_pred[strategy], y_pis[strategy] = mapie.predict(X_test, alpha=0.05)
-
+ mapie.conformalize(X_conformalize, y_conformalize)
+ y_pred[strategy_name], y_pis[strategy_name] = mapie.predict_interval(X_test)
+ else:
+ mapie = class_(
+ polyn_model, confidence_level=0.95, random_state=RANDOM_STATE, **init_params
+ )
+ mapie.fit_conformalize(X_train_conformalize, y_train_conformalize)
+ y_pred[strategy_name], y_pis[strategy_name] = mapie.predict_interval(X_test)
##############################################################################
# Once again, let’s compare the target confidence intervals with prediction
# intervals obtained with the Jackknife+, Jackknife-minmax, CV+, CV-minmax,
# Jackknife+-after-Boostrap, and CQR strategies.
+
def plot_1d_data(
X_train,
y_train,
@@ -214,9 +262,8 @@ def plot_1d_data(
for strategy in STRATEGIES:
coverage_score[strategy] = regression_coverage_score(
y_test,
- y_pis[strategy][:, 0, 0],
- y_pis[strategy][:, 1, 0]
- )
+ y_pis[strategy]
+ )[0]
width_mean_score[strategy] = regression_mean_width_score(
y_pis[strategy][:, 0, 0],
y_pis[strategy][:, 1, 0]
diff --git a/examples/regression/4-tutorials/plot_ResidualNormalisedScore_tutorial.py b/examples/regression/2-advanced-analysis/plot_ResidualNormalisedScore_tutorial.py
similarity index 67%
rename from examples/regression/4-tutorials/plot_ResidualNormalisedScore_tutorial.py
rename to examples/regression/2-advanced-analysis/plot_ResidualNormalisedScore_tutorial.py
index 315830d17..0930ed7ae 100644
--- a/examples/regression/4-tutorials/plot_ResidualNormalisedScore_tutorial.py
+++ b/examples/regression/2-advanced-analysis/plot_ResidualNormalisedScore_tutorial.py
@@ -1,7 +1,9 @@
"""
-======================================
-Tutorial for residual normalised score
-======================================
+=====================================================================
+Focus on residual normalised score
+=====================================================================
+
+
We will use the sklearn california housing dataset to understand how the
residual normalised score works and show the multiple ways of using it.
@@ -19,15 +21,17 @@
from sklearn.model_selection import train_test_split
from mapie.conformity_scores import ResidualNormalisedScore
-from mapie.metrics import regression_coverage_score_v2, regression_ssc_score
-from mapie.regression import MapieRegressor
+from mapie.metrics.regression import (
+ regression_coverage_score,
+ regression_ssc_score,
+)
+from mapie_v1.regression import SplitConformalRegressor
+from mapie_v1.utils import train_conformalize_test_split
warnings.filterwarnings("ignore")
-random_state = 23
-rng = np.random.default_rng(random_state)
-round_to = 3
-
+RANDOM_STATE = 1
+rng = np.random.default_rng(RANDOM_STATE)
##############################################################################
# 1. Data
@@ -57,28 +61,26 @@
##############################################################################
# Let's now create the different splits for the dataset, with a training,
-# calibration, residual and test set. Recall that the calibration set is used
+# conformalize, residual and test set. Recall that the conformalize set is used
# for calibrating the prediction intervals and the residual set is used to fit
# the residual estimator used by the
# :class:`~mapie.conformity_scores.ResidualNormalisedScore`.
np.array(X)
np.array(y)
-X_train, X_test, y_train, y_test = train_test_split(
- X,
- y,
- random_state=random_state,
- test_size=0.02
-)
-X_train, X_calib, y_train, y_calib = train_test_split(
- X_train,
- y_train,
- random_state=random_state
+
+(
+ X_train, X_conformalize, X_test, y_train, y_conformalize, y_test
+) = train_conformalize_test_split(
+ X, y,
+ train_size=0.7, conformalize_size=0.28, test_size=0.02,
+ random_state=RANDOM_STATE
)
-X_calib_prefit, X_res, y_calib_prefit, y_res = train_test_split(
- X_calib,
- y_calib,
- random_state=random_state,
+
+X_conformalize_prefit, X_res, y_conformalize_prefit, y_res = train_test_split(
+ X_conformalize,
+ y_conformalize,
+ random_state=RANDOM_STATE,
test_size=0.5
)
@@ -92,11 +94,11 @@
# a :class:`~sklearn.linear_model.LinearRegression` is used for the residual
# estimator. (Note that to avoid negative values it is trained with the log
# of the features and the exponential of the predictions are used).
-# It is also possible to use it with ``cv="prefit"`` i.e. with
+# It is also possible to use it with ``prefit=True`` i.e. with
# the base model trained beforehand. The third setup that we illustrate here
# is with the residual model prefitted : we can set the estimator in parameters
# of the class, not forgetting to specify ``prefit="True"``. Finally, as an
-# example of the exotic parameterisation we can do : we use as a residual
+# example of the exotic parameterisation we use as a residual
# estimator a :class:`~sklearn.linear_model.LinearRegression` wrapped to avoid
# negative values like it is done by default in the class.
@@ -115,14 +117,14 @@ def predict(self, X):
return np.exp(y_pred)
-base_model = RandomForestRegressor(n_estimators=10, random_state=random_state)
+base_model = RandomForestRegressor(n_estimators=10, random_state=RANDOM_STATE)
base_model = base_model.fit(X_train, y_train)
residual_estimator = RandomForestRegressor(
n_estimators=20,
max_leaf_nodes=70,
min_samples_leaf=7,
- random_state=random_state
+ random_state=RANDOM_STATE
)
residual_estimator = residual_estimator.fit(
X_res, np.abs(np.subtract(y_res, base_model.predict(X_res)))
@@ -130,55 +132,76 @@ def predict(self, X):
wrapped_residual_estimator = PosEstim().fit(
X_res, np.abs(np.subtract(y_res, base_model.predict(X_res)))
)
+
+CONFIDENCE_LEVEL = 0.9
+
# Estimating prediction intervals
STRATEGIES = {
"Default": {
- "cv": "split",
- "conformity_score": ResidualNormalisedScore()
+ "class": SplitConformalRegressor,
+ "init_params": dict(
+ confidence_level=CONFIDENCE_LEVEL,
+ prefit=False,
+ conformity_score=ResidualNormalisedScore(),
+ ),
},
"Base model prefit": {
- "cv": "prefit",
- "estimator": base_model,
- "conformity_score": ResidualNormalisedScore(
- split_size=0.5, random_state=random_state
- )
+ "class": SplitConformalRegressor,
+ "init_params": dict(
+ estimator=base_model,
+ confidence_level=CONFIDENCE_LEVEL,
+ prefit=True,
+ conformity_score=ResidualNormalisedScore(
+ split_size=0.5,
+ random_state=RANDOM_STATE,
+ ),
+ ),
},
"Base and residual model prefit": {
- "cv": "prefit",
- "estimator": base_model,
- "conformity_score": ResidualNormalisedScore(
- residual_estimator=residual_estimator,
- random_state=random_state,
- prefit=True
- )
+ "class": SplitConformalRegressor,
+ "init_params": dict(
+ estimator=base_model,
+ confidence_level=CONFIDENCE_LEVEL,
+ prefit=True,
+ conformity_score=ResidualNormalisedScore(
+ residual_estimator=residual_estimator,
+ random_state=RANDOM_STATE,
+ prefit=True,
+ ),
+ ),
},
"Wrapped residual model": {
- "cv": "prefit",
- "estimator": base_model,
- "conformity_score": ResidualNormalisedScore(
- residual_estimator=wrapped_residual_estimator,
- random_state=random_state,
- prefit=True
- )
+ "class": SplitConformalRegressor,
+ "init_params": dict(
+ estimator=base_model,
+ confidence_level=CONFIDENCE_LEVEL,
+ prefit=True,
+ conformity_score=ResidualNormalisedScore(
+ residual_estimator=wrapped_residual_estimator,
+ random_state=RANDOM_STATE,
+ prefit=True,
+ ),
+ ),
},
}
-y_pred, intervals, coverage, cond_coverage = {}, {}, {}, {}
+y_pred, y_pis, coverage, cond_coverage = {}, {}, {}, {}
num_bins = 10
-alpha = 0.1
-for strategy, params in STRATEGIES.items():
- mapie = MapieRegressor(**params, random_state=random_state)
- if mapie.conformity_score.prefit:
- mapie.fit(X_calib_prefit, y_calib_prefit)
+for strategy_name, strategy_params in STRATEGIES.items():
+ init_params = strategy_params["init_params"]
+ class_ = strategy_params["class"]
+ mapie = class_(**init_params)
+ if mapie._prefit:
+ mapie.conformalize(X_conformalize_prefit, y_conformalize_prefit)
else:
- mapie.fit(X_calib, y_calib)
- y_pred[strategy], intervals[strategy] = mapie.predict(X_test, alpha=alpha)
-
- coverage[strategy] = regression_coverage_score_v2(
- y_test, intervals[strategy]
+ mapie.fit(X_train, y_train)
+ mapie.conformalize(X_conformalize, y_conformalize)
+ y_pred[strategy_name], y_pis[strategy_name] = mapie.predict_interval(X_test)
+ coverage[strategy_name] = regression_coverage_score(
+ y_test, y_pis[strategy_name]
)
- cond_coverage[strategy] = regression_ssc_score(
- y_test, intervals[strategy], num_bins=num_bins
+ cond_coverage[strategy_name] = regression_ssc_score(
+ y_test, y_pis[strategy_name], num_bins=num_bins
)
@@ -272,19 +295,19 @@ def plot_predictions(y, y_pred, intervals, coverage, cond_coverage, ax=None):
plot_predictions(
y_test,
y_pred[strategy],
- intervals[strategy],
+ y_pis[strategy],
coverage[strategy][0],
cond_coverage[strategy][0],
ax=ax
)
-fig.suptitle(f"Predicted values and intervals of level {alpha}")
+fig.suptitle(f"Predicted values and intervals of level {CONFIDENCE_LEVEL}")
plt.tight_layout()
plt.show()
##############################################################################
# The results show that all the setups reach the global coverage guaranteed of
-# 1-alpha.
+# confidence_level.
# It is interesting to note that the "base model prefit" and the "wrapped
# residual model" give exactly the same results. And this is because they are
# the same models : one prefitted and one fitted directly in the class.
diff --git a/examples/regression/2-advanced-analysis/plot_both_uncertainties.py b/examples/regression/2-advanced-analysis/plot_both_uncertainties.py
index bd3a951a8..574164bbd 100644
--- a/examples/regression/2-advanced-analysis/plot_both_uncertainties.py
+++ b/examples/regression/2-advanced-analysis/plot_both_uncertainties.py
@@ -1,9 +1,12 @@
"""
-================================================
+===============================================================================
Estimating aleatoric and epistemic uncertainties
-================================================
-This example uses :class:`~mapie.regression.MapieRegressor` and
-:class:`~mapie.quantile_regression.MapieQuantileRegressor` to estimate
+===============================================================================
+
+
+This example uses :class:`~mapie_v1.regression.CrossConformalRegressor`,
+:class:`~mapie_v1.regression.ConformalizedQuantileRegressor` and
+:class:`~mapie_v1.regression.JackknifeAfterBootstrapRegressor` to estimate
prediction intervals capturing both aleatoric and epistemic uncertainties
on a one-dimensional dataset with homoscedastic noise and normal sampling.
"""
@@ -14,13 +17,16 @@
from sklearn.linear_model import LinearRegression, QuantileRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
+from sklearn.model_selection import train_test_split
-from mapie._typing import NDArray
-from mapie.regression import MapieQuantileRegressor, MapieRegressor
-from mapie.subsample import Subsample
+from numpy.typing import NDArray
+from mapie_v1.regression import (
+ CrossConformalRegressor,
+ ConformalizedQuantileRegressor,
+ JackknifeAfterBootstrapRegressor)
F = TypeVar("F", bound=Callable[..., Any])
-random_state = 42
+RANDOM_STATE = 42
# Functions for generating our dataset
@@ -59,12 +65,12 @@ def get_1d_data_with_normal_distrib(
[3]: y_test
[4]: y_mesh
"""
- np.random.seed(random_state)
- X_train = np.random.normal(mu, sigma, n_samples)
+ rng = np.random.default_rng(RANDOM_STATE)
+ X_train = rng.normal(mu, sigma, n_samples)
X_test = np.arange(mu - 4 * sigma, mu + 4 * sigma, sigma / 20.0)
y_train, y_mesh, y_test = funct(X_train), funct(X_test), funct(X_test)
- y_train += np.random.normal(0, noise, y_train.shape[0])
- y_test += np.random.normal(0, noise, y_test.shape[0])
+ y_train += rng.normal(0, noise, y_train.shape[0])
+ y_test += rng.normal(0, noise, y_test.shape[0])
return (
X_train.reshape(-1, 1),
y_train,
@@ -76,8 +82,9 @@ def get_1d_data_with_normal_distrib(
# Data generation
mu, sigma, n_samples, noise = 0, 2.5, 300, 0.5
-X_train, y_train, X_test, y_test, y_mesh = get_1d_data_with_normal_distrib(
- x_sinx, mu, sigma, n_samples, noise
+X_train_conformalize, y_train_conformalize, X_test, y_test, y_mesh = (
+ get_1d_data_with_normal_distrib(
+ x_sinx, mu, sigma, n_samples, noise)
)
# Definition of our base model
@@ -101,24 +108,44 @@ def get_1d_data_with_normal_distrib(
# Estimating prediction intervals
STRATEGIES = {
- "jackknife_plus": {"method": "plus", "cv": -1},
- "cv_plus": {"method": "plus", "cv": 10},
- "jackknife_plus_ab": {"method": "plus", "cv": Subsample(n_resamplings=50)},
- "conformalized_quantile_regression": {"method": "quantile", "cv": "split"},
+ "jackknife_plus": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="plus", cv=-1),
+ },
+ "cv_plus": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="plus", cv=10),
+ },
+ "jackknife_plus_ab": {
+ "class": JackknifeAfterBootstrapRegressor,
+ "init_params": dict(method="plus", resampling=50),
+ },
+ "conformalized_quantile_regression": {
+ "class": ConformalizedQuantileRegressor,
+ "init_params": dict(),
+ },
}
y_pred, y_pis = {}, {}
-for strategy, params in STRATEGIES.items():
- if strategy == "conformalized_quantile_regression":
- mapie = MapieQuantileRegressor( # type: ignore
- polyn_model_quant,
- **params
+for strategy_name, strategy_params in STRATEGIES.items():
+ init_params = strategy_params["init_params"]
+ class_ = strategy_params["class"]
+ if strategy_name == "conformalized_quantile_regression":
+ X_train, X_conformalize, y_train, y_conformalize = (
+ train_test_split(
+ X_train_conformalize, y_train_conformalize,
+ test_size=0.3, random_state=RANDOM_STATE
+ )
)
- mapie.fit(X_train, y_train, random_state=random_state)
- y_pred[strategy], y_pis[strategy] = mapie.predict(X_test)
- else:
- mapie = MapieRegressor(polyn_model, **params) # type: ignore
+ mapie = class_(polyn_model_quant, confidence_level=0.95, **init_params)
mapie.fit(X_train, y_train)
- y_pred[strategy], y_pis[strategy] = mapie.predict(X_test, alpha=0.05)
+ mapie.conformalize(X_conformalize, y_conformalize)
+ y_pred[strategy_name], y_pis[strategy_name] = mapie.predict_interval(X_test)
+ else:
+ mapie = class_(
+ polyn_model, confidence_level=0.95, random_state=RANDOM_STATE, **init_params
+ )
+ mapie.fit_conformalize(X_train_conformalize, y_train_conformalize)
+ y_pred[strategy_name], y_pis[strategy_name] = mapie.predict_interval(X_test)
# Visualization
diff --git a/examples/regression/2-advanced-analysis/plot_conditional_coverage.py b/examples/regression/2-advanced-analysis/plot_conditional_coverage.py
index 655df767f..3330627a0 100644
--- a/examples/regression/2-advanced-analysis/plot_conditional_coverage.py
+++ b/examples/regression/2-advanced-analysis/plot_conditional_coverage.py
@@ -1,17 +1,22 @@
"""
-===============================
-Estimating conditional coverage
-===============================
-This example uses :func:`~mapie.regression.MapieRegressor` with conformal
-scores that returns adaptive intervals i.e.
+==============================================================
+Focus on local (or "conditional") coverage
+==============================================================
+
+
+This example uses :class:`~mapie_v1.regression.SplitConformalRegressor`,
+:class:`~mapie_v1.regression.JackknifeAfterBootstrapRegressor`,
+with conformal scores that returns adaptive intervals i.e.
(:class:`~mapie.conformity_scores.GammaConformityScore` and
:class:`~mapie.conformity_scores.ResidualNormalisedScore`) as well as
-:func:`~mapie.regression.MapieQuantileRegressor`.
+:class:`~mapie_v1.regression.ConformalizedQuantileRegressor` and
+:class:`~mapie_v1.regression.CrossConformalRegressor.
The conditional coverage is computed with the three
functions that allows to estimate the conditional coverage in regression
:func:`~mapie.metrics.regression_ssc`,
:func:`~mapie.metrics.regression_ssc_score` and :func:`~mapie.metrics.hsic`.
"""
+
import warnings
from typing import Tuple, Union
@@ -19,21 +24,29 @@
import numpy as np
import pandas as pd
from lightgbm import LGBMRegressor
+from sklearn.model_selection import train_test_split
-from mapie._typing import NDArray
+from numpy.typing import NDArray
from mapie.conformity_scores import (GammaConformityScore,
ResidualNormalisedScore)
-from mapie.metrics import (hsic, regression_coverage_score_v2, regression_ssc,
- regression_ssc_score)
-from mapie.regression import MapieQuantileRegressor, MapieRegressor
-from mapie.subsample import Subsample
+from mapie.metrics.regression import (
+ regression_coverage_score,
+ regression_ssc,
+ regression_ssc_score, hsic,
+)
+from mapie_v1.regression import (
+ SplitConformalRegressor,
+ CrossConformalRegressor,
+ JackknifeAfterBootstrapRegressor,
+ ConformalizedQuantileRegressor
+)
warnings.filterwarnings("ignore")
-random_state = 42
+RANDOM_STATE = 42
split_size = 0.20
alpha = 0.05
-rng = np.random.default_rng(random_state)
+rng = np.random.default_rng(RANDOM_STATE)
# Functions for generating our dataset
@@ -81,71 +94,81 @@ def sin_with_controlled_noise(
# Data generation
min_x, max_x, n_samples = 0, 10, 3000
-X_train, y_train = sin_with_controlled_noise(min_x, max_x, n_samples)
+X_train_conformalize, y_train_conformalize = sin_with_controlled_noise(
+ min_x, max_x, n_samples)
X_test, y_test = sin_with_controlled_noise(min_x, max_x,
int(n_samples * split_size))
# Definition of our base models
-model = LGBMRegressor(random_state=random_state, alpha=0.5)
+model = LGBMRegressor(random_state=RANDOM_STATE, alpha=0.5)
model_quant = LGBMRegressor(
objective="quantile",
alpha=0.5,
- random_state=random_state
+ random_state=RANDOM_STATE
)
# Definition of the experimental set up
STRATEGIES = {
- "CV+": {
- "method": "plus",
- "cv": 10,
- },
- "JK+ab_Gamma": {
- "method": "plus",
- "cv": Subsample(n_resamplings=100),
- "conformity_score": GammaConformityScore()
+ "cv_plus": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="plus", cv=10),
},
- "ResidualNormalised": {
- "cv": "split",
- "conformity_score": ResidualNormalisedScore(
- residual_estimator=LGBMRegressor(
- alpha=0.5,
- random_state=random_state),
- split_size=0.7,
- random_state=random_state
+ "jackknife_plus_ab": {
+ "class": JackknifeAfterBootstrapRegressor,
+ "init_params": dict(
+ method="plus", resampling=100,
+ conformity_score=GammaConformityScore(),
)
},
- "CQR": {
- "method": "quantile", "cv": "split", "alpha": alpha
+ "residual_normalised": {
+ "class": SplitConformalRegressor,
+ "init_params": dict(
+ prefit=False,
+ conformity_score=ResidualNormalisedScore(
+ residual_estimator=LGBMRegressor(alpha=0.5, random_state=RANDOM_STATE),
+ split_size=0.7,
+ random_state=RANDOM_STATE,
+ ),
+ ),
+ },
+ "conformalized_quantile_regression": {
+ "class": ConformalizedQuantileRegressor,
+ "init_params": dict(),
},
}
-y_pred, intervals, coverage, cond_coverage, coef_corr = {}, {}, {}, {}, {}
+y_pred, y_pis, coverage, cond_coverage, coef_corr = {}, {}, {}, {}, {}
num_bins = 10
-for strategy, params in STRATEGIES.items():
- # computing predictions
- if strategy == "CQR":
- mapie = MapieQuantileRegressor(
- model_quant,
- **params
+for strategy_name, strategy_params in STRATEGIES.items():
+ init_params = strategy_params["init_params"]
+ class_ = strategy_params["class"]
+ if strategy_name in ["conformalized_quantile_regression", "residual_normalised"]:
+ X_train, X_conformalize, y_train, y_conformalize = (
+ train_test_split(
+ X_train_conformalize, y_train_conformalize,
+ test_size=0.3, random_state=RANDOM_STATE
+ )
)
- mapie.fit(X_train, y_train, random_state=random_state)
- y_pred[strategy], intervals[strategy] = mapie.predict(X_test)
- else:
- mapie = MapieRegressor(model, **params, random_state=random_state)
+ mapie = class_(model_quant, confidence_level=0.95, **init_params)
mapie.fit(X_train, y_train)
- y_pred[strategy], intervals[strategy] = mapie.predict(
- X_test, alpha=alpha
+ mapie.conformalize(X_conformalize, y_conformalize)
+ y_pred[strategy_name], y_pis[strategy_name] = mapie.predict_interval(X_test)
+ else:
+ mapie = class_(
+ model, confidence_level=0.95, random_state=RANDOM_STATE, **init_params
)
+ mapie.fit_conformalize(X_train_conformalize, y_train_conformalize)
+ y_pred[strategy_name], y_pis[strategy_name] = mapie.predict_interval(X_test)
# computing metrics
- coverage[strategy] = regression_coverage_score_v2(
- y_test, intervals[strategy]
+ coverage[strategy_name] = regression_coverage_score(
+ y_test, y_pis[strategy_name]
)
- cond_coverage[strategy] = regression_ssc_score(
- y_test, intervals[strategy], num_bins=num_bins
+ cond_coverage[strategy_name] = regression_ssc_score(
+ y_test, y_pis[strategy_name], num_bins=1
)
- coef_corr[strategy] = hsic(y_test, intervals[strategy])
+ coef_corr[strategy_name] = hsic(y_test, y_pis[strategy_name])
# Visualisation of the estimated conditional coverage
@@ -166,34 +189,27 @@ def sin_with_controlled_noise(
print(estimated_cond_cov)
##############################################################################
-# We can see here that the global coverage is approximately the same for
-# all methods. What we want to understand is : "Are these methods good
-# adaptive conformal methods ?". For this we have the two metrics
+# The global coverage is similar for all methods. To determine if these
+# methods are good adaptive conformal methods, we use two metrics:
# :func:`~mapie.metrics.regression_ssc_score` and :func:`~mapie.metrics.hsic`.
-# - SSC (Size Stratified Coverage) is the maximum violation of the coverage :
-# the intervals are grouped by width and the coverage is computed for each
-# group. The lower coverage is the maximum coverage violation. An adaptive
-# method is one where this maximum violation is as close as possible to the
-# global coverage. If we interpret the result for the four methods here :
-# CV+ seems to be the better one.
-# - And with the hsic correlation coefficient, we have the
-# same interpretation : :func:`~mapie.metrics.hsic` computes the correlation
-# between the coverage indicator and the interval size, a value of 0
-# translates an independence between the two.
#
-# We would like to highlight here the misinterpretation that can be made
-# with these metrics. In fact, here CV+ with the absolute residual score
-# calculates constant intervals which, by definition, are not adaptive.
-# Therefore, it is very important to check that the intervals widths are well
-# spread before drawing conclusions (with a plot of the distribution of
-# interval widths or a visualisation of the data for example).
+# - SSC (Size Stratified Coverage): This measures the maximum violation
+# of coverage by grouping intervals by width and computing coverage for
+# each group. An adaptive method has a maximum violation close to the global
+# coverage. Among the four methods, CV+ performs the best.
+# - HSIC (Hilbert-Schmidt Independence Criterion): This computes the
+# correlation between coverage and interval size. A value of 0 indicates
+# independence between the two.
#
-# In this example, with the hsic correlation coefficient, none of the methods
-# stand out from the others. However, the SSC score for the method using the
-# gamma score is significantly worse than for CQR and ResidualNormalisedScore,
-# even though their global coverage is similar. ResidualNormalisedScore and CQR
-# are very close here, with ResidualNormalisedScore being slightly more
-# conservative.
+# It's important to note that CV+ with the absolute residual score
+# calculates constant intervals, which are not adaptive. Therefore,
+# checking the distribution of interval widths is crucial before drawing conclusions.
+#
+# In this example, none of the methods stand out with the HSIC correlation coefficient.
+# However, the SSC score for the gamma score method is significantly worse than
+# for CQR and ResidualNormalisedScore, despite similar global coverage.
+# ResidualNormalisedScore and CQR are very close, with ResidualNormalisedScore
+# being slightly more conservative.
# Visualition of the data and predictions
@@ -281,7 +297,7 @@ def plot_coverage_by_width(y, intervals, num_bins, alpha, title="", ax=None):
max_width = np.max([
- np.abs(intervals[strategy][:, 0, 0] - intervals[strategy][:, 1, 0])
+ np.abs(y_pis[strategy][:, 0, 0] - y_pis[strategy][:, 1, 0])
for strategy in STRATEGIES.keys()])
fig_distr, axs_distr = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))
@@ -292,16 +308,16 @@ def plot_coverage_by_width(y, intervals, num_bins, alpha, title="", ax=None):
axs_viz.flat, axs_hist.flat, axs_distr.flat, STRATEGIES.keys()
):
plot_intervals(
- X_test, y_test, y_pred[strategy], intervals[strategy],
+ X_test, y_test, y_pred[strategy], y_pis[strategy],
title=strategy, ax=ax_viz
)
plot_coverage_by_width(
- y_test, intervals[strategy],
+ y_test, y_pis[strategy],
num_bins=num_bins, alpha=alpha, title=strategy, ax=ax_hist
)
ax_distr.hist(
- np.abs(intervals[strategy][:, 0, 0] - intervals[strategy][:, 1, 0]),
+ np.abs(y_pis[strategy][:, 0, 0] - y_pis[strategy][:, 1, 0]),
bins=num_bins
)
ax_distr.set_xlabel("Interval width")
@@ -316,21 +332,19 @@ def plot_coverage_by_width(y, intervals, num_bins, alpha, title="", ax=None):
plt.show()
##############################################################################
-# With toy datasets like this, it is easy to compare visually the methods
-# with a plot of the data and predictions.
-# As mentionned above, a histogram of the ditribution of the interval widths is
-# important to accompany the metrics. It is clear from this histogram
-# that CV+ is not adaptive, the metrics presented here should not be used
-# to evaluate its adaptivity. A wider spread of intervals indicates a more
-# adaptive method.
-# Finally, with the plot of coverage by bins of intervals grouped by widths
-# (which is the output of :func:`~mapie.metrics.regression_ssc`), we want
-# the bins to be as constant as possible around the global coverage (here 0.9).
-
-# As the previous metrics show, gamma score does not perform well in terms of
-# size stratified coverage. It either over-covers or under-covers too much.
-# For ResidualNormalisedScore and CQR, while the first one has several bins
-# with over-coverage, the second one has more under-coverage. These results
-# are confirmed by the visualisation of the data: CQR is better when the data
-# are more spread out, whereas ResidualNormalisedScore is better with small
-# intervals.
+# With toy datasets, it's easy to visually compare methods using data and
+# prediction plots. A histogram of interval widths is crucial to accompany
+# the metrics. This histogram shows that CV+ is not adaptive, so the metrics
+# should not be used to evaluate its adaptivity. A wider spread of intervals
+# indicates a more adaptive method.
+#
+# The plot of coverage by bins of intervals grouped by widths
+# (output of :func:`~mapie.metrics.regression_ssc`) should
+# show bins as constant as possible around the global coverage (0.9).
+
+# The gamma score does not perform well in size stratified coverage,
+# often over-covering or under-covering. ResidualNormalisedScore has
+# several bins with over-coverage, while CQR has more under-coverage.
+# Visualizing the data confirms these results: CQR performs better
+# with spread-out data, whereas ResidualNormalisedScore is better
+# with small intervals.
diff --git a/examples/regression/2-advanced-analysis/plot_conformal_predictive_distribution.py b/examples/regression/2-advanced-analysis/plot_conformal_predictive_distribution.py
index e8f368a56..8d0e17420 100644
--- a/examples/regression/2-advanced-analysis/plot_conformal_predictive_distribution.py
+++ b/examples/regression/2-advanced-analysis/plot_conformal_predictive_distribution.py
@@ -1,7 +1,8 @@
"""
-=================================================
-Make Conformal Predictive Distribution with MAPIE
-=================================================
+================================================================================
+Conformal Predictive Distribution with MAPIE
+================================================================================
+
"""
##############################################################################
@@ -22,15 +23,15 @@
from matplotlib import pyplot as plt
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
-from sklearn.model_selection import train_test_split
from mapie.conformity_scores import (AbsoluteConformityScore,
ResidualNormalisedScore)
-from mapie.regression import MapieRegressor
+from mapie_v1.regression import SplitConformalRegressor
+from mapie_v1.utils import train_conformalize_test_split
warnings.filterwarnings('ignore')
-random_state = 15
+RANDOM_STATE = 15
##############################################################################
@@ -40,12 +41,18 @@
# Here, we propose just to generate data for regression task, then split it.
X, y = make_regression(
- n_samples=1000, n_features=1, noise=20, random_state=random_state
+ n_samples=1000, n_features=1, noise=20, random_state=RANDOM_STATE
)
-X_train, X_test, y_train, y_test = train_test_split(
- X, y, test_size=0.5, random_state=random_state
+
+(
+ X_train, X_conformalize, X_test, y_train, y_conformalize, y_test
+) = train_conformalize_test_split(
+ X, y,
+ train_size=0.6, conformalize_size=0.2, test_size=0.2,
+ random_state=RANDOM_STATE
)
+
plt.xlabel("x")
plt.ylabel("y")
plt.scatter(X_train, y_train, alpha=0.3)
@@ -58,19 +65,19 @@
#
# To be able to obtain the cumulative distribution function of
# a prediction with MAPIE, we propose here to wrap the
-# :class:`~mapie.regression.MapieRegressor` to add a new method named
+# :class:`~mapie_v1.regression.SplitConformalRegressor` to add a new method named
# `get_cumulative_distribution_function`.
-class MapieConformalPredictiveDistribution(MapieRegressor):
+class MapieConformalPredictiveDistribution(SplitConformalRegressor):
def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
- self.conformity_score.sym = False
def get_cumulative_distribution_function(self, X):
- y_pred = self.predict(X)
- cs = self.conformity_scores_[~np.isnan(self.conformity_scores_)]
- res = self.conformity_score_function_.get_estimation_distribution(
+ y_pred, _ = self.predict_interval(X)
+ cs = self._mapie_regressor.conformity_scores_[
+ ~np.isnan(self._mapie_regressor.conformity_scores_)]
+ res = self._conformity_score.get_estimation_distribution(
y_pred.reshape((-1, 1)), cs, X=X
)
return res
@@ -78,30 +85,30 @@ def get_cumulative_distribution_function(self, X):
##############################################################################
# Now, we propose to use it with two different conformity scores -
-# :class:`~mapie.conformity_score.AbsoluteConformityScore` and
-# :class:`~mapie.conformity_score.ResidualNormalisedScore` - in split-conformal
-# inference.
+# :class:`~mapie.conformity_scores.AbsoluteConformityScore` and
+# :class:`~mapie.conformity_scores.ResidualNormalisedScore` -
+# in split-conformal inference.
mapie_regressor_1 = MapieConformalPredictiveDistribution(
estimator=LinearRegression(),
- conformity_score=AbsoluteConformityScore(),
- cv='split',
- random_state=random_state
+ conformity_score=AbsoluteConformityScore(sym=False),
+ prefit=False
)
mapie_regressor_1.fit(X_train, y_train)
-y_pred_1 = mapie_regressor_1.predict(X_test)
+mapie_regressor_1.conformalize(X_conformalize, y_conformalize)
+y_pred_1, _ = mapie_regressor_1.predict_interval(X_test)
y_cdf_1 = mapie_regressor_1.get_cumulative_distribution_function(X_test)
mapie_regressor_2 = MapieConformalPredictiveDistribution(
estimator=LinearRegression(),
- conformity_score=ResidualNormalisedScore(),
- cv='split',
- random_state=random_state
+ conformity_score=ResidualNormalisedScore(sym=False, random_state=RANDOM_STATE),
+ prefit=False
)
mapie_regressor_2.fit(X_train, y_train)
-y_pred_2 = mapie_regressor_2.predict(X_test)
+mapie_regressor_2.conformalize(X_conformalize, y_conformalize)
+y_pred_2, _ = mapie_regressor_2.predict_interval(X_test)
y_cdf_2 = mapie_regressor_2.get_cumulative_distribution_function(X_test)
plt.xlabel("x")
diff --git a/examples/regression/2-advanced-analysis/plot_coverage_validity.py b/examples/regression/2-advanced-analysis/plot_coverage_validity.py
index 75a868d35..b7f88c9da 100644
--- a/examples/regression/2-advanced-analysis/plot_coverage_validity.py
+++ b/examples/regression/2-advanced-analysis/plot_coverage_validity.py
@@ -1,12 +1,13 @@
"""
-================================================
-Coverage Validity with MAPIE for Regression Task
-================================================
+===============================================================================
+Coverage validity of MAPIE for regression tasks
+===============================================================================
+
This example verifies that conformal claims are valid in the MAPIE package
when using the CP prefit/split methods.
-This notebook is inspired of the notebook used for episode "Uncertainty
+This notebook is inspired from the notebook used for episode "Uncertainty
Quantification: Avoid these Missteps in Validating Your Conformal Claims!"
(link to the [orginal notebook](https://github.com/mtorabirad/MLBoost)).
@@ -25,11 +26,11 @@
from sklearn.tree import DecisionTreeRegressor
from sklearn.datasets import make_regression
-from sklearn.model_selection import ShuffleSplit, train_test_split
+from sklearn.model_selection import train_test_split
-from mapie.regression import MapieRegressor
+from mapie_v1.regression import SplitConformalRegressor
from mapie.conformity_scores import AbsoluteConformityScore
-from mapie.metrics import regression_coverage_score_v2
+from mapie.metrics.regression import regression_coverage_score
from joblib import Parallel, delayed
@@ -41,11 +42,11 @@
##############################################################################
# Section 1: Comparison with the split conformalizer method (light version)
-# -------------------------------------------------------------------------
+# -----------------------------------------------------------------------------------------------
#
# We propose here to implement a lighter version of split CP by calculating
# the quantile with a small correction according to [1].
-# We prepare the fit/calibration/test routine in order to calculate the average
+# We prepare the fit/conformalize/test routine in order to calculate the average
# coverage over several simulations.
# Conformalizer Class
@@ -54,31 +55,33 @@ def __init__(
self,
pre_trained_model,
non_conformity_func,
- delta
+ confidence_level
):
# Initialize the conformalizer with required parameters
self.estimator = pre_trained_model
self.non_conformity_func = non_conformity_func
- self.delta = delta
+ self.confidence_level = confidence_level
- def _calculate_quantile(self, scores_calib):
+ def _calculate_quantile(self, scores_conformalize):
# Calculate the quantile value based on delta and non-conformity scores
- self.delta_cor = np.ceil(self.delta*(self.n_calib+1))/self.n_calib
- return np.quantile(scores_calib, self.delta_cor, method='lower')
+ self.delta_cor = np.ceil(
+ self.confidence_level * (self.n_conformalize + 1)) / self.n_conformalize
+ return np.quantile(scores_conformalize, self.delta_cor, method='lower')
- def _calibrate(self, X_calib, y_calib):
+ def _conformalize(self, X_conformalize, y_conformalize):
# Calibrate the conformalizer to calculate q_hat
- y_calib_pred = self.estimator.predict(X_calib)
- scores_calib = self.non_conformity_func(y_calib_pred, y_calib)
- self.q_hat = self._calculate_quantile(scores_calib)
+ y_conformalize_pred = self.estimator.predict(X_conformalize)
+ scores_conformalize = self.non_conformity_func(
+ y_conformalize_pred, y_conformalize)
+ self.q_hat = self._calculate_quantile(scores_conformalize)
def fit(self, X, y):
# Fit the conformalizer to the data and calculate q_hat
- self.n_calib = X.shape[0]
- self._calibrate(X, y)
+ self.n_conformalize = X.shape[0]
+ self._conformalize(X, y)
return self
- def predict(self, X, alpha=None):
+ def predict(self, X):
# Returns the predicted interval
y_pred = self.estimator.predict(X)
y_lower, y_upper = y_pred - self.q_hat, y_pred + self.q_hat
@@ -91,16 +94,13 @@ def non_conformity_func(y, y_hat):
def get_coverage_prefit(
- conformalizer, data, target, delta, n_calib, random_state=None
+ conformalizer, data, target, n_conformalize, random_state=None
):
"""
Calculate the fraction of test samples within the predicted intervals.
- This function splits the data into a training set and a test set. If the
- cross-validation strategy of the mapie regressor is a ShuffleSplit, it fits
- the regressor to the entire training set. Otherwise, it further splits the
- training set into a calibration set and a training set, and fits the
- regressor to the calibration set. It then predicts intervals for the test
+ This function splits the data into a conformalize set and a test set, and fits the
+ regressor to the conformalize set. It then predicts intervals for the test
set and calculates the fraction of test samples within these intervals.
Parameters:
@@ -109,29 +109,40 @@ def get_coverage_prefit(
A mapie regressor object.
data: array-like of shape (n_samples, n_features)
- The data to be split into a training set and a test set.
+ The data to be split into a train set and a test set.
target: array-like of shape (n_samples,)
The target values for the data.
- delta: float
- The level of confidence for the predicted intervals.
+ n_conformalize: int
+ The length of the conformalize set.
+
+ random_state: int
+ Random state for the data splits.
Returns:
--------
- fraction_within_bounds: float
- The fraction of test samples within the predicted intervals.
+ coverage: float
+ The coverage within the predicted intervals.
"""
# Split data step
- X_cal, X_test, y_cal, y_test = train_test_split(
- data, target, train_size=n_calib, random_state=random_state
+ X_conformalize, X_test, y_conformalize, y_test = train_test_split(
+ data, target, train_size=n_conformalize, random_state=random_state
)
- # Calibration step
- conformalizer.fit(X_cal, y_cal)
- # Prediction step
- _, y_pis = conformalizer.predict(X_test, alpha=1-delta)
- # Coverage step
- coverage = regression_coverage_score_v2(y_test, y_pis)
+ if isinstance(conformalizer, SplitConformalRegressor):
+ # Calibration step
+ conformalizer.conformalize(X_conformalize, y_conformalize)
+ # Prediction step
+ _, y_pis = conformalizer.predict_interval(X_test)
+ # Coverage step
+ coverage = regression_coverage_score(y_test, y_pis)
+ else:
+ # Calibration step
+ conformalizer.fit(X_conformalize, y_conformalize)
+ # Prediction step
+ _, y_pis = conformalizer.predict(X_test)
+ # Coverage step
+ coverage = regression_coverage_score(y_test, y_pis)
return coverage
@@ -151,7 +162,7 @@ def cumulative_average(arr):
Returns:
--------
- running_avg: List[float]
+ cumulative_avg: List[float]
The cumulative average of the input list.
"""
cumsum = np.cumsum(arr)
@@ -161,60 +172,76 @@ def cumulative_average(arr):
##############################################################################
-# Experiment 1: Coverage Validity for a given delta, n_calib
-# ----------------------------------------------------------
+# Experiment 1: Coverage Validity for given confidence_level and n_conformalize
+# --------------------------------------------------------------------------------
#
-# To begin, we propose to use ``delta=0.8`` and ``n_delta=6`` and compare
-# the coverage validity claim of the MAPIE class and the referenced class.
+# To begin, we propose to use ``confidence_level=0.8`` and
+# ``n_conformalize=6`` and compare the coverage validity claim
+# of the MAPIE class and the referenced class.
+
+RANDOM_STATE = 1
# Parameters of the modelisation
-delta = 0.8
-n_calib = 6
+confidence_level = 0.8
+n_conformalize = 6
n_train = 1000
n_test = 1000
num_splits = 1000
# Load toy Data
-n_all = n_train + n_calib + n_test
-data, target = make_regression(n_all, random_state=1)
+n_all = n_train + n_conformalize + n_test
+data, target = make_regression(n_all, random_state=RANDOM_STATE)
-# Split dataset into training, calibration and validation sets
-X_train, X_cal_test, y_train, y_cal_test = train_test_split(
- data, target, train_size=n_train, random_state=1
+# Split dataset into train, conformalize_validation sets
+X_train, X_conformalize_test, y_train, y_conformalize_test = train_test_split(
+ data, target, train_size=n_train, random_state=RANDOM_STATE
)
-# Create a regression model and fit it to the training data
-model = DecisionTreeRegressor()
+# Create a regression model and fit it to the train data
+model = DecisionTreeRegressor(random_state=RANDOM_STATE)
model.fit(X_train, y_train)
# Compute theorical bounds and exact coverage to attempt
-lower_bound = delta
-upper_bound = (delta + 1/(n_calib+1))
-upper_bound_2 = (delta + 1/(n_calib/2+1))
-exact_cov = (np.ceil((n_calib+1)*delta))/(n_calib+1)
+lower_bound = confidence_level
+upper_bound = (confidence_level + 1 / (n_conformalize + 1))
+exact_cov = (np.ceil((n_conformalize + 1) * confidence_level)) / (n_conformalize + 1)
# Run the experiment
empirical_coverages_ref = []
empirical_coverages_mapie = []
-for i in range(1, num_splits):
+for random_state in range(1, num_splits):
# Compute empirical coverage for each trial with StandardConformalizer
- conformalizer = StandardConformalizer(model, non_conformity_func, delta)
+ conformalizer = StandardConformalizer(
+ pre_trained_model=model,
+ non_conformity_func=non_conformity_func,
+ confidence_level=confidence_level
+ )
coverage = get_coverage_prefit(
- conformalizer, X_cal_test, y_cal_test, delta, n_calib, random_state=i
+ conformalizer=conformalizer,
+ data=X_conformalize_test,
+ target=y_conformalize_test,
+ n_conformalize=n_conformalize,
+ random_state=random_state
)
empirical_coverages_ref.append(coverage)
- # Compute empirical coverage for each trial with MapieRegressor
- conformalizer = MapieRegressor(estimator=model, cv="prefit")
+ # Compute empirical coverage for each trial with SplitConformalRegressor
+ conformalizer = SplitConformalRegressor(
+ estimator=model, confidence_level=confidence_level, prefit=True
+ )
coverage = get_coverage_prefit(
- conformalizer, X_cal_test, y_cal_test, delta, n_calib, random_state=i
+ conformalizer=conformalizer,
+ data=X_conformalize_test,
+ target=y_conformalize_test,
+ n_conformalize=n_conformalize,
+ random_state=random_state
)
empirical_coverages_mapie.append(coverage)
-cumulative_averages_ref = cumulative_average(empirical_coverages_ref)
-cumulative_averages_mapie = cumulative_average(empirical_coverages_mapie)
+cumulative_averages_ref = cumulative_average(arr=empirical_coverages_ref)
+cumulative_averages_mapie = cumulative_average(arr=empirical_coverages_mapie)
# Plot the results
fig, ax = plt.subplots()
@@ -227,7 +254,10 @@ def cumulative_average(arr):
plt.xlabel(r'Split Number')
plt.ylabel(r'$\overline{\mathbb{C}}$')
-plt.title(r'$|D_{cal}| = $' + str(n_calib) + r' and $\delta = $' + str(delta))
+plt.title(
+ r'$|D_{cal}| = $' + str(n_conformalize) +
+ r' and $\delta = $' + str(confidence_level)
+)
plt.legend(loc="upper right", ncol=2)
plt.ylim(0.7, 1)
@@ -243,8 +273,8 @@ def cumulative_average(arr):
##############################################################################
-# Experiment 2: Again but without fixing random_state
-# ---------------------------------------------------
+# Experiment 2: Coverage validity with different random states
+# -----------------------------------------------------------------------------
#
# We just propose to reproduce the previous experiment without fixing the
# random_state. The methods therefore follow different trajectories but
@@ -254,18 +284,32 @@ def cumulative_average(arr):
empirical_coverages_ref = []
empirical_coverages_mapie = []
-for i in range(1, num_splits):
+for random_state in range(1, num_splits):
# Compute empirical coverage for each trial with StandardConformalizer
- conformalizer = StandardConformalizer(model, non_conformity_func, delta)
+ conformalizer = StandardConformalizer(
+ pre_trained_model=model,
+ non_conformity_func=non_conformity_func,
+ confidence_level=confidence_level
+ )
coverage = get_coverage_prefit(
- conformalizer, X_cal_test, y_cal_test, delta, n_calib
+ conformalizer=conformalizer,
+ data=X_conformalize_test,
+ target=y_conformalize_test,
+ n_conformalize=n_conformalize,
+ random_state=random_state
)
empirical_coverages_ref.append(coverage)
# Compute empirical coverage for each trial with MapieRegressor
- conformalizer = MapieRegressor(estimator=model, cv="prefit")
+ conformalizer = SplitConformalRegressor(
+ estimator=model, confidence_level=confidence_level, prefit=True
+ )
coverage = get_coverage_prefit(
- conformalizer, X_cal_test, y_cal_test, delta, n_calib
+ conformalizer=conformalizer,
+ data=X_conformalize_test,
+ target=y_conformalize_test,
+ n_conformalize=n_conformalize,
+ random_state=num_splits + random_state
)
empirical_coverages_mapie.append(coverage)
@@ -283,7 +327,10 @@ def cumulative_average(arr):
plt.xlabel(r'Split Number')
plt.ylabel(r'$\overline{\mathbb{C}}$')
-plt.title(r'$|D_{cal}| = $' + str(n_calib) + r' and $\delta = $' + str(delta))
+plt.title(
+ r'$|D_{cal}| = $' + str(n_conformalize) +
+ r' and $\delta = $' + str(confidence_level)
+)
plt.legend(loc="upper right", ncol=2)
plt.ylim(0.7, 1)
@@ -293,100 +340,60 @@ def cumulative_average(arr):
##############################################################################
# Section 2: Comparison with different MAPIE CP methods
-# -----------------------------------------------------
+# -----------------------------------------------------------------------------
#
# We propose to reproduce the previous experience with different methods of
# the MAPIE package (prefit, prefit with asymmetrical non-conformity scores
# and split).
-def get_coverage_split(conformalizer, data, target, delta, random_state=None):
- """
- Calculate the fraction of test samples within the predicted intervals.
-
- This function splits the data into a training set and a test set. If the
- cross-validation strategy of the mapie regressor is a ShuffleSplit, it fits
- the regressor to the entire training set. Otherwise, it further splits the
- training set into a calibration set and a training set, and fits the
- regressor to the calibration set. It then predicts intervals for the test
- set and calculates the fraction of test samples within these intervals.
-
- Parameters:
- -----------
- conformalizer: object
- A mapie regressor object.
-
- data: array-like of shape (n_samples, n_features)
- The data to be split into a training set and a test set.
-
- target: array-like of shape (n_samples,)
- The target values for the data.
-
- delta: float
- The level of confidence for the predicted intervals.
-
- Returns:
- --------
- fraction_within_bounds: float
- The fraction of test samples within the predicted intervals.
- """
- # Split data step
- X_train_cal, X_test, y_train_cal, y_test = train_test_split(
- data, target, test_size=n_test
- )
-
- # Calibration step
- if isinstance(conformalizer, MapieRegressor) and \
- isinstance(conformalizer.cv, ShuffleSplit):
- conformalizer.fit(X_train_cal, y_train_cal)
- else:
- _, X_cal, _, y_cal = train_test_split(
- X_train_cal, y_train_cal, test_size=n_calib
+def run_get_coverage_prefit(
+ model, method, params, n_conformalize, data,
+ target, confidence_level, random_state, num_splits
+):
+ if method == "reference":
+ ref_reg = StandardConformalizer(
+ pre_trained_model=model,
+ non_conformity_func=non_conformity_func,
+ confidence_level=confidence_level
+ )
+ coverage = get_coverage_prefit(
+ conformalizer=ref_reg,
+ data=data,
+ target=target,
+ n_conformalize=n_conformalize,
+ random_state=random_state
)
- conformalizer.fit(X_cal, y_cal)
-
- # Prediction step
- if isinstance(conformalizer, StandardConformalizer):
- _, y_pis = conformalizer.predict(X_test)
else:
- _, y_pis = conformalizer.predict(X_test, alpha=1-delta)
-
- # Coverage step
- fraction_within_bounds = regression_coverage_score_v2(y_test, y_pis)
-
- return fraction_within_bounds
-
-
-def run_get_coverage_split(model, params, n_calib, data, target, delta):
- if not params:
- ref_reg = StandardConformalizer(model, non_conformity_func, delta)
- return get_coverage_split(ref_reg, data, target, delta)
- try:
- mapie_reg = MapieRegressor(estimator=model, **params(n_calib))
- coverage = get_coverage_split(mapie_reg, data, target, delta)
- except Exception:
- coverage = np.nan
+ mapie_reg = SplitConformalRegressor(
+ estimator=model, confidence_level=confidence_level, **params
+ )
+ coverage = get_coverage_prefit(
+ conformalizer=mapie_reg,
+ data=data,
+ target=target,
+ n_conformalize=n_conformalize,
+ random_state=num_splits + random_state
+ )
return coverage
STRATEGIES = {
"reference": None,
- "prefit": lambda n: dict(
- method="base",
- cv="prefit",
+ "prefit": dict(
+ prefit=True,
conformity_score=AbsoluteConformityScore(sym=True)
),
- "prefit_asym": lambda n: dict(
- method="base",
- cv="prefit",
+ "prefit_asym": dict(
+ prefit=True,
conformity_score=AbsoluteConformityScore(sym=False)
),
}
##############################################################################
-# Experiment 3: Again but with different MAPIE CP methods
-# -------------------------------------------------------
+# Experiment 3: Coverage with different MAPIE CP methods
+# -----------------------------------------------------------------------------
#
# The methods always follow different trajectories but always achieve the
# expected coverage.
@@ -394,8 +401,8 @@ def run_get_coverage_split(model, params, n_calib, data, target, delta):
# We should calculate them differently but that doesn't change our conclusion.
# Parameters of the modelisation
-delta = 0.8
-n_calib = 12 # for asymmetric non-conformity scores
+confidence_level = 0.8
+n_conformalize = 12 # for asymmetric non-conformity scores
num_splits = 1000
# Run the experiment
@@ -403,12 +410,14 @@ def run_get_coverage_split(model, params, n_calib, data, target, delta):
for method, params in STRATEGIES.items():
coverages_list = []
- run_params = model, params, n_calib, data, target, delta
+ run_params = model, method, params, n_conformalize, data, target, confidence_level
coverages_list = Parallel(n_jobs=-1)(
- delayed(run_get_coverage_split)(*run_params)
- for _ in range(num_splits)
+ delayed(run_get_coverage_prefit)(
+ *run_params, num_splits=num_splits, random_state=random_state
+ )
+ for random_state in range(num_splits)
)
- cumulative_averages_dict[method] = cumulative_average(coverages_list)
+ cumulative_averages_dict[method] = cumulative_average(arr=coverages_list)
# Plot the results
fig, ax = plt.subplots()
@@ -421,7 +430,10 @@ def run_get_coverage_split(model, params, n_calib, data, target, delta):
plt.xlabel(r'Split Number')
plt.ylabel(r'$\overline{\mathbb{C}}$')
-plt.title(r'$|D_{cal}| = $' + str(n_calib) + r' and $\delta = $' + str(delta))
+plt.title(
+ r'$|D_{cal}| = $' + str(n_conformalize) +
+ r' and $\delta = $' + str(confidence_level)
+)
plt.legend(loc="upper right", ncol=2)
plt.ylim(0.7, 1)
@@ -431,7 +443,7 @@ def run_get_coverage_split(model, params, n_calib, data, target, delta):
##############################################################################
# Experiment 4: Extensive experimentation on different delta and n_calib
-# ----------------------------------------------------------------------
+# --------------------------------------------------------------------------------------------
#
# Here we propose to extend the experiment on different sizes of the
# calibration dataset and target coverage.
@@ -446,30 +458,28 @@ def run_get_coverage_split(model, params, n_calib, data, target, delta):
nc_min, nc_max = 10, 30
n_calib_array = np.arange(nc_min, nc_max+1, 2)
-delta = 0.8
-delta_array = [delta]
+confidence_level = 0.8
+confidence_level_array = [confidence_level]
final_coverage_dict = {
- method: {delta: [] for delta in delta_array}
- for method in STRATEGIES
-}
-effective_coverage_dict = {
- method: {delta: [] for delta in delta_array}
+ method: {confidence_level: [] for confidence_level in confidence_level_array}
for method in STRATEGIES
}
-
# Run experiment
for method, params in STRATEGIES.items():
- for n_calib in n_calib_array:
+ for n_conformalize in n_calib_array:
coverages_list = []
- run_params = model, params, n_calib, data, target, delta
+ run_params = (
+ model, method, params, n_conformalize, data, target, confidence_level
+ )
coverages_list = Parallel(n_jobs=-1)(
- delayed(run_get_coverage_split)(*run_params)
- for _ in range(num_splits)
+ delayed(run_get_coverage_prefit)(
+ *run_params, num_splits=num_splits, random_state=random_state)
+ for random_state in range(num_splits)
)
coverages_list = np.array(coverages_list)
final_coverage = cumulative_average(coverages_list)[-1]
- final_coverage_dict[method][delta].append(final_coverage)
+ final_coverage_dict[method][confidence_level].append(final_coverage)
# Theorical bounds and exact coverage to attempt
@@ -486,12 +496,12 @@ def upper_bound_asym_fct(delta):
def exact_coverage_fct(delta):
- return np.ceil((n_calib_array+1)*delta)/(n_calib_array+1)
+ return np.ceil((n_calib_array + 1) * delta) / (n_calib_array + 1)
def exact_coverage_asym_fct(delta):
- new_n = n_calib_array//2-1
- return np.ceil((new_n+1)*delta)/(new_n+1)
+ new_n = n_calib_array//2 - 1
+ return np.ceil((new_n + 1) * delta) / (new_n + 1)
# Plot the results
@@ -500,29 +510,33 @@ def exact_coverage_asym_fct(delta):
fig, ax = plt.subplots(nrows=nrows, ncols=ncols)
-for i, method in enumerate(final_coverage_dict):
+for random_state, method in enumerate(final_coverage_dict):
# Compute the different bounds, target
- cov = final_coverage_dict[method][delta]
- ub = upper_bound_fct(delta)
- lb = lower_bound_fct(delta)
- exact_cov = exact_coverage_fct(delta)
+ cov = final_coverage_dict[method][confidence_level]
+ ub = upper_bound_fct(confidence_level)
+ lb = lower_bound_fct(confidence_level)
+ exact_cov = exact_coverage_fct(confidence_level)
if 'asym' in method:
- ub = upper_bound_asym_fct(delta)
- exact_cov = exact_coverage_asym_fct(delta)
+ ub = upper_bound_asym_fct(confidence_level)
+ exact_cov = exact_coverage_asym_fct(confidence_level)
ub = np.clip(ub, a_min=0, a_max=1)
lb = np.clip(lb, a_min=0, a_max=1)
# Plot the results
- ax[i].plot(n_calib_array, cov, alpha=0.5, label=method, color='g')
- ax[i].plot(n_calib_array, lb, color='k', label='Lower Bound')
- ax[i].plot(n_calib_array, ub, color='b', label='Upper Bound')
- ax[i].plot(n_calib_array, exact_cov, color='g', ls='--', label='Exact Cov')
- ax[i].hlines(delta, nc_min, nc_max, color='r', ls='--', label='Target Cov')
-
- ax[i].legend(loc="upper right", ncol=2)
- ax[i].set_ylim(np.min(lb) - 0.05, 1.0)
- ax[i].set_xlabel(r'$n_{calib}$')
- ax[i].set_ylabel(r'$\overline{\mathbb{C}}$')
-
-fig.suptitle(r'$\delta = $' + str(delta))
+ ax[random_state].plot(n_calib_array, cov, alpha=0.5, label=method, color='g')
+ ax[random_state].plot(n_calib_array, lb, color='k', label='Lower Bound')
+ ax[random_state].plot(n_calib_array, ub, color='b', label='Upper Bound')
+ ax[random_state].plot(
+ n_calib_array, exact_cov, color='g', ls='--', label='Exact Cov'
+ )
+ ax[random_state].hlines(
+ confidence_level, nc_min, nc_max, color='r', ls='--', label='Target Cov'
+ )
+
+ ax[random_state].legend(loc="upper right", ncol=2)
+ ax[random_state].set_ylim(np.min(lb) - 0.05, 1.0)
+ ax[random_state].set_xlabel(r'$n_{calib}$')
+ ax[random_state].set_ylabel(r'$\overline{\mathbb{C}}$')
+
+fig.suptitle(r'$\delta = $' + str(confidence_level))
plt.show()
diff --git a/examples/regression/2-advanced-analysis/plot_cqr_symmetry_difference.py b/examples/regression/2-advanced-analysis/plot_cqr_symmetry_difference.py
new file mode 100644
index 000000000..a93c025ea
--- /dev/null
+++ b/examples/regression/2-advanced-analysis/plot_cqr_symmetry_difference.py
@@ -0,0 +1,134 @@
+"""
+==========================================================================================================
+The symmetric_correction parameter of ConformalizedQuantileRegressor
+==========================================================================================================
+
+
+An example plot of :class:`~mapie_v1.regression.ConformalizedQuantileRegressor`
+illustrating the impact of the ``symmetric_correction`` parameter.
+"""
+import numpy as np
+from matplotlib import pyplot as plt
+from sklearn.datasets import make_regression
+from sklearn.ensemble import GradientBoostingRegressor
+
+from mapie.metrics.regression import regression_coverage_score
+from mapie_v1.regression import ConformalizedQuantileRegressor
+from mapie_v1.utils import train_conformalize_test_split
+
+RANDOM_STATE = 1
+
+##############################################################################
+# We generate a synthetic data.
+
+X, y = make_regression(
+ n_samples=1000, n_features=1, noise=20, random_state=RANDOM_STATE
+)
+
+(
+ X_train, X_conformalize, X_test, y_train, y_conformalize, y_test
+) = train_conformalize_test_split(
+ X, y,
+ train_size=0.6, conformalize_size=0.2, test_size=0.2,
+ random_state=RANDOM_STATE
+)
+
+
+# Define confidence level
+confidence_level = 0.8
+
+# Initialize a Gradient Boosting Regressor for quantile regression
+gb_reg = GradientBoostingRegressor(
+ loss="quantile", alpha=0.5, random_state=RANDOM_STATE
+)
+
+# Using ConformalizedQuantileRegressor
+mapie_qr = ConformalizedQuantileRegressor(
+ estimator=gb_reg, confidence_level=confidence_level)
+mapie_qr.fit(X_train, y_train)
+mapie_qr.conformalize(X_conformalize, y_conformalize)
+y_pred_sym, y_pis_sym = mapie_qr.predict_interval(X_test, symmetric_correction=True)
+y_pred_asym, y_pis_asym = mapie_qr.predict_interval(X_test, symmetric_correction=False)
+y_qlow = mapie_qr._mapie_quantile_regressor.estimators_[0].predict(X_test)
+y_qup = mapie_qr._mapie_quantile_regressor.estimators_[1].predict(X_test)
+
+print(f"y.shape: {y.shape}")
+print(f"y_pis_sym[:, 0].shape: {y_pis_sym[:, 0].shape}")
+print(f"y_pis_sym[:, 1].shape: {y_pis_sym[:, 1].shape}")
+# Calculate coverage scores
+coverage_score_sym = regression_coverage_score(
+ y_test, y_pis_sym
+)[0]
+coverage_score_asym = regression_coverage_score(
+ y_test, y_pis_asym
+)[0]
+
+# Sort the values for plotting
+order = np.argsort(X_test[:, 0])
+X_test_sorted = X_test[order]
+y_pred_sym_sorted = y_pred_sym[order]
+y_pis_sym_sorted = y_pis_sym[order]
+y_pred_asym_sorted = y_pred_asym[order]
+y_pis_asym_sorted = y_pis_asym[order]
+y_qlow = y_qlow[order]
+y_qup = y_qup[order]
+
+##############################################################################
+# We will plot the predictions and prediction intervals for both symmetric
+# and asymmetric intervals. The line represents the predicted values, the
+# dashed lines represent the prediction intervals, and the shaded area
+# represents the symmetric and asymmetric prediction intervals.
+
+plt.figure(figsize=(14, 7))
+
+plt.subplot(1, 2, 1)
+plt.xlabel("x")
+plt.ylabel("y")
+plt.scatter(X_test, y_test, alpha=0.3)
+plt.plot(X_test_sorted, y_qlow, color="C1")
+plt.plot(X_test_sorted, y_qup, color="C1")
+plt.plot(X_test_sorted, y_pis_sym_sorted[:, 0], color="C1", ls="--")
+plt.plot(X_test_sorted, y_pis_sym_sorted[:, 1], color="C1", ls="--")
+plt.fill_between(
+ X_test_sorted.ravel(),
+ y_pis_sym_sorted[:, 0].ravel(),
+ y_pis_sym_sorted[:, 1].ravel(),
+ alpha=0.2,
+)
+plt.title(
+ f"Symmetric Intervals\n"
+ f"Target and effective coverages for "
+ f"confidence_level={confidence_level:.2f}; coverage={coverage_score_sym:.3f})"
+)
+
+# Plot asymmetric prediction intervals
+plt.subplot(1, 2, 2)
+plt.xlabel("x")
+plt.ylabel("y")
+plt.scatter(X_test, y_test, alpha=0.3)
+plt.plot(X_test_sorted, y_qlow, color="C2")
+plt.plot(X_test_sorted, y_qup, color="C2")
+plt.plot(X_test_sorted, y_pis_asym_sorted[:, 0], color="C2", ls="--")
+plt.plot(X_test_sorted, y_pis_asym_sorted[:, 1], color="C2", ls="--")
+plt.fill_between(
+ X_test_sorted.ravel(),
+ y_pis_asym_sorted[:, 0].ravel(),
+ y_pis_asym_sorted[:, 1].ravel(),
+ alpha=0.2,
+)
+plt.title(
+ f"Asymmetric Intervals\n"
+ f"Target and effective coverages for "
+ f"confidence_level={confidence_level:.2f}; coverage={coverage_score_sym:.3f})"
+)
+plt.tight_layout()
+plt.show()
+
+##############################################################################
+# The symmetric intervals (``symmetric_correction=True``) use a combined set of
+# residuals for both bounds, while the asymmetric intervals
+# (``symmetric_correction=False``) use distinct residuals for each bound,
+# allowing for more flexible and accurate intervals that reflect the
+# heteroscedastic nature of the data. The resulting effective coverages
+# demonstrate the theoretical guarantee of the target coverage level
+# ``confidence_level``.
diff --git a/examples/regression/4-tutorials/plot_cqr_tutorial.py b/examples/regression/2-advanced-analysis/plot_cqr_tutorial.py
similarity index 67%
rename from examples/regression/4-tutorials/plot_cqr_tutorial.py
rename to examples/regression/2-advanced-analysis/plot_cqr_tutorial.py
index e5dc76c7c..c0d590d61 100644
--- a/examples/regression/4-tutorials/plot_cqr_tutorial.py
+++ b/examples/regression/2-advanced-analysis/plot_cqr_tutorial.py
@@ -1,32 +1,20 @@
"""
-====================================================
-Tutorial for conformalized quantile regression (CQR)
-====================================================
+==========================================================================================================
+ConformalizedQuantileRegressor on gamma distributed data
+==========================================================================================================
+
We will use the sklearn california housing dataset as the base for the
comparison of the different methods available on MAPIE. Two classes will
-be used: :class:`~mapie.quantile_regression.MapieQuantileRegressor` for CQR
-and :class:`~mapie.regression.MapieRegressor` for the other methods.
+be used: :class:`~mapie_v1.regression.ConformalizedQuantileRegressor` for CQR.
+We use :class:`~mapie_v1.regression.CrossConformalRegressor` and
+:class:`~mapie_v1.regression.JackknifeAfterBootstrapRegressor` for the other methods.
For this example, the estimator will be :class:`~lightgbm.LGBMRegressor` with
``objective="quantile"`` as this is a necessary component for CQR, the
regression needs to be from a quantile regressor.
-For the conformalized quantile regression (CQR), we will use a split-conformal
-method meaning that we will split the training set into a training and
-calibration set. This means using
-:class:`~mapie.quantile_regression.MapieQuantileRegressor` with ``cv="split"``
-and the ``alpha`` parameter already defined. Recall that the ``alpha`` is
-`1 - target coverage`.
-
-For the other type of conformal methods, they are chosen with the
-parameter ``method`` of :class:`~mapie.regression.MapieRegressor` and the
-parameter ``cv`` is the strategy for cross-validation. In this method, to use a
-"leave-one-out" strategy, one would have to use ``cv=-1`` where a positive
-value would indicate the number of folds for a cross-validation strategy.
-Note that for the jackknife+ after boostrap, we need to use the
-class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is
-defined in the ``predict`` for these methods).
+We then compare the coverage and the intervals width.
"""
import warnings
@@ -41,13 +29,17 @@ class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import KFold, RandomizedSearchCV, train_test_split
-from mapie.metrics import (regression_coverage_score,
- regression_mean_width_score)
-from mapie.regression import MapieQuantileRegressor, MapieRegressor
-from mapie.subsample import Subsample
+from mapie.metrics.regression import (
+ regression_coverage_score,
+ regression_mean_width_score,
+)
+from mapie_v1.regression import (
+ ConformalizedQuantileRegressor,
+ CrossConformalRegressor,
+ JackknifeAfterBootstrapRegressor)
-random_state = 18
-rng = np.random.default_rng(random_state)
+RANDOM_STATE = 1
+rng = np.random.default_rng(RANDOM_STATE)
round_to = 3
warnings.filterwarnings("ignore")
@@ -58,7 +50,7 @@ class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is
# The target variable of this dataset is the median house value for the
# California districts. This dataset is composed of 8 features, including
# variables such as the age of the house, the median income of the
-# neighborhood, the average numbe rooms or bedrooms or even the location in
+# neighborhood, the average number rooms or bedrooms or even the location in
# latitude and longitude. In total there are around 20k observations.
# As the value is expressed in thousands of $ we will multiply it by 100 for
# better visualization (note that this will not affect the results).
@@ -92,14 +84,14 @@ class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is
##############################################################################
# Let's now create the different splits for the dataset, with a training,
-# calibration and test set. Recall that the calibration set is used for
-# calibrating the prediction intervals.
+# conformalize and test set. Remember that the conformalize set is used to
+# conformalize the prediction intervals.
-X_train, X_test, y_train, y_test = train_test_split(
+X_train_conformalize, X_test, y_train_conformalize, y_test = train_test_split(
X,
y['MedHouseVal'],
- random_state=random_state
+ random_state=RANDOM_STATE
)
@@ -116,7 +108,7 @@ class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is
estimator = LGBMRegressor(
objective='quantile',
alpha=0.5,
- random_state=random_state,
+ random_state=RANDOM_STATE,
verbose=-1
)
params_distributions = dict(
@@ -131,9 +123,9 @@ class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is
n_jobs=-1,
n_iter=10,
cv=KFold(n_splits=5, shuffle=True),
- random_state=random_state
+ random_state=RANDOM_STATE
)
-optim_model.fit(X_train, y_train)
+optim_model.fit(X_train_conformalize, y_train_conformalize)
estimator = optim_model.best_estimator_
@@ -142,7 +134,7 @@ class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is
# --------------------------------------------------------------------------
# We will now proceed to compare the different methods available in MAPIE used
# for uncertainty quantification on regression settings. For this tutorial we
-# will compare the "naive", "Jackknife plus after Bootstrap", "cv plus" and
+# will compare the "cv", "Jackknife plus after Bootstrap", "cv plus" and
# "conformalized quantile regression". Please have a look at the theoretical
# description of the documentation for more details on these methods.
#
@@ -229,68 +221,69 @@ def plot_prediction_intervals(
##############################################################################
-# We proceed to using MAPIE to return the predictions and prediction intervals.
-# We will use an ``α=0.2``, this means a target coverage of 0.8
-# (recall that this parameter needs to be initialized directly when setting
-# :class:`~mapie.quantile_regression.MapieQuantileRegressor` and when using
-# :class:`~mapie.regression.MapieRegressor`, it needs to be set in the
-# ``predict``).
-# Note that for the CQR, there are two options for ``cv``:
-#
-# * ``cv="split"`` (by default), the split-conformal where MAPIE trains the
-# model on a training set and then calibrates on the calibration set.
-# * ``cv="prefit"`` meaning that you can train your models with the correct
-# quantile values (must be given in the following order:
-# ``(α, 1-(α/2), 0.5)`` and given to MAPIE as an iterable
-# object. (Check the examples for how to use prefit in MAPIE)
-#
-# Additionally, note that there is a list of accepted models by
-# :class:`~mapie.quantile_regression.MapieQuantileRegressor`
-# (``quantile_estimator_params``) and that we will use symmetrical residuals.
+# Here, wWe use MAPIE to return the predictions and prediction intervals.
+# We will use an ``confidence_level=CONFIDENCE_LEVEL``, (this is the target
+# coverage for our prediction intervals).
+# Note that that we will use symmetrical residuals for the CQR.
STRATEGIES = {
- "naive": {"method": "naive"},
- "cv_plus": {"method": "plus", "cv": 10},
- "jackknife_plus_ab": {"method": "plus", "cv": Subsample(n_resamplings=50)},
- "cqr": {"method": "quantile", "cv": "split", "alpha": 0.2},
+ "cv": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="base", cv=10),
+ },
+ "cv_plus": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="plus", cv=10),
+ },
+ "jackknife_plus_ab": {
+ "class": JackknifeAfterBootstrapRegressor,
+ "init_params": dict(method="plus", resampling=50),
+ },
+ "conformalized_quantile_regression": {
+ "class": ConformalizedQuantileRegressor,
+ "init_params": dict(),
+ },
}
+CONFIDENCE_LEVEL = 0.8
y_pred, y_pis = {}, {}
y_test_sorted, y_pred_sorted, lower_bound, upper_bound = {}, {}, {}, {}
coverage, width = {}, {}
-for strategy, params in STRATEGIES.items():
- if strategy == "cqr":
- mapie = MapieQuantileRegressor(estimator, **params)
- mapie.fit(
- X_train,
- y_train,
- calib_size=0.3,
- random_state=random_state
+for strategy_name, strategy_params in STRATEGIES.items():
+ init_params = strategy_params["init_params"]
+ class_ = strategy_params["class"]
+ if strategy_name == "conformalized_quantile_regression":
+ X_train, X_conformalize, y_train, y_conformalize = (
+ train_test_split(
+ X_train_conformalize, y_train_conformalize,
+ test_size=0.3, random_state=RANDOM_STATE
+ )
)
- y_pred[strategy], y_pis[strategy] = mapie.predict(X_test)
+ mapie = class_(estimator, confidence_level=CONFIDENCE_LEVEL, **init_params)
+ mapie.fit(X_train, y_train)
+ mapie.conformalize(X_conformalize, y_conformalize)
+ y_pred[strategy_name], y_pis[strategy_name] = mapie.predict_interval(
+ X_test, symmetric_correction=True)
else:
- mapie = MapieRegressor(
- estimator,
- test_size=0.3,
- random_state=random_state,
- **params
+ mapie = class_(
+ estimator, confidence_level=CONFIDENCE_LEVEL,
+ random_state=RANDOM_STATE, **init_params
)
- mapie.fit(X_train, y_train)
- y_pred[strategy], y_pis[strategy] = mapie.predict(X_test, alpha=0.2)
+ mapie.fit_conformalize(X_train_conformalize, y_train_conformalize)
+ y_pred[strategy_name], y_pis[strategy_name] = mapie.predict_interval(X_test)
(
- y_test_sorted[strategy],
- y_pred_sorted[strategy],
- lower_bound[strategy],
- upper_bound[strategy]
- ) = sort_y_values(y_test, y_pred[strategy], y_pis[strategy])
- coverage[strategy] = regression_coverage_score(
+ y_test_sorted[strategy_name],
+ y_pred_sorted[strategy_name],
+ lower_bound[strategy_name],
+ upper_bound[strategy_name]
+ ) = sort_y_values(y_test, y_pred[strategy_name], y_pis[strategy_name])
+ coverage[strategy_name] = regression_coverage_score(
y_test,
- y_pis[strategy][:, 0, 0],
- y_pis[strategy][:, 1, 0]
- )
- width[strategy] = regression_mean_width_score(
- y_pis[strategy][:, 0, 0],
- y_pis[strategy][:, 1, 0]
+ y_pis[strategy_name]
+ )[0]
+ width[strategy_name] = regression_mean_width_score(
+ y_pis[strategy_name][:, 0, 0],
+ y_pis[strategy_name][:, 1, 0]
)
@@ -305,16 +298,16 @@ def plot_prediction_intervals(
)
fig, axs = plt.subplots(2, 2, figsize=(15, 13))
coords = [axs[0, 0], axs[0, 1], axs[1, 0], axs[1, 1]]
-for strategy, coord in zip(STRATEGIES.keys(), coords):
+for strategy_name, coord in zip(STRATEGIES.keys(), coords):
plot_prediction_intervals(
- strategy,
+ strategy_name,
coord,
- y_test_sorted[strategy],
- y_pred_sorted[strategy],
- lower_bound[strategy],
- upper_bound[strategy],
- coverage[strategy],
- width[strategy],
+ y_test_sorted[strategy_name],
+ y_pred_sorted[strategy_name],
+ lower_bound[strategy_name],
+ upper_bound[strategy_name],
+ coverage[strategy_name],
+ width[strategy_name],
num_plots
)
lines_labels = [ax.get_legend_handles_labels() for ax in fig.axes]
@@ -333,8 +326,7 @@ def plot_prediction_intervals(
##############################################################################
# We notice more adaptability of the prediction intervals for the
# conformalized quantile regression while the other methods have fixed
-# interval width. Indeed, as the prices get larger, the prediction intervals
-# are increased with the increase in price.
+# interval width.
def get_coverages_widths_by_bins(
@@ -352,11 +344,11 @@ def get_coverages_widths_by_bins(
or width per bin.
"""
cuts = []
- cuts_ = pd.qcut(y_test["naive"], bins).unique()[:-1]
+ cuts_ = pd.qcut(y_test["cv"], bins).unique()[:-1]
for item in cuts_:
cuts.append(item.left)
cuts.append(cuts_[-1].right)
- cuts.append(np.max(y_test["naive"])+1)
+ cuts.append(np.max(y_test["cv"])+1)
recap = {}
for i in range(len(cuts) - 1):
cut1, cut2 = cuts[i], cuts[i+1]
@@ -371,10 +363,8 @@ def get_coverages_widths_by_bins(
y_high_ = np.take(upper_bound[strategy], indices)
if want == "coverage":
recap[name].append(regression_coverage_score(
- y_test_trunc[0],
- y_low_[0],
- y_high_[0]
- ))
+ y_test_trunc[0], np.stack((y_low_[0], y_high_[0]), axis=-1)
+ )[0])
elif want == "width":
recap[name].append(
regression_mean_width_score(y_low_[0], y_high_[0])
@@ -402,7 +392,7 @@ def get_coverages_widths_by_bins(
binned_data.T.plot.bar(figsize=(12, 4))
-plt.axhline(0.80, ls="--", color="k")
+plt.axhline(CONFIDENCE_LEVEL, ls="--", color="k")
plt.ylabel("Conditional coverage")
plt.xlabel("Binned house prices")
plt.xticks(rotation=345)
@@ -412,8 +402,8 @@ def get_coverages_widths_by_bins(
##############################################################################
-# What we observe from these results is that none of the methods seems to
-# have conditional coverage at the target ``1 - α``. However, we can
+# None of the methods seems to
+# have conditional coverage at the target ``confidence_level``. However, we can
# clearly notice that the CQR seems to better adapt to large prices. Its
# conditional coverage is closer to the target coverage not only for higher
# prices, but also for lower prices where the other methods have a higher
@@ -442,7 +432,6 @@ def get_coverages_widths_by_bins(
##############################################################################
# When observing the values of the the interval width we again see what was
-# observed in the previous graphs with the interval widths. We can again see
-# that the prediction intervals are larger as the price of the houses
-# increases, interestingly, it's important to note that the prediction
+# observed in the previous graphs with the interval widths. It's important to
+# note that the prediction
# intervals are shorter when the estimator is more certain.
diff --git a/examples/regression/4-tutorials/plot_main-tutorial-regression.py b/examples/regression/2-advanced-analysis/plot_main-tutorial-regression.py
similarity index 65%
rename from examples/regression/4-tutorials/plot_main-tutorial-regression.py
rename to examples/regression/2-advanced-analysis/plot_main-tutorial-regression.py
index 3c5cdb8e0..0e4402fb4 100644
--- a/examples/regression/4-tutorials/plot_main-tutorial-regression.py
+++ b/examples/regression/2-advanced-analysis/plot_main-tutorial-regression.py
@@ -1,7 +1,8 @@
r"""
-===============================
-Tutorial for tabular regression
-===============================
+=======================================================================
+Comparison between conformalized quantile regressor and cross methods
+=======================================================================
+
In this tutorial, we compare the prediction intervals estimated by MAPIE on a
simple, one-dimensional, ground truth function ``f(x) = x * sin(x)``.
@@ -34,10 +35,14 @@
from sklearn.linear_model import LinearRegression, QuantileRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
+from sklearn.model_selection import train_test_split
-from mapie.metrics import regression_coverage_score
-from mapie.regression import MapieQuantileRegressor, MapieRegressor
-from mapie.subsample import Subsample
+from mapie.metrics.regression import regression_coverage_score
+from mapie_v1.regression import (
+ CrossConformalRegressor,
+ JackknifeAfterBootstrapRegressor,
+ ConformalizedQuantileRegressor
+)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
warnings.filterwarnings("ignore")
@@ -62,13 +67,13 @@ def get_1d_data_with_constant_noise(funct, min_x, max_x, n_samples, noise):
Generate 1D noisy data uniformely from the given function
and standard deviation for the noise.
"""
- np.random.seed(59)
+ rng = np.random.default_rng(59)
X_train = np.linspace(min_x, max_x, n_samples)
- np.random.shuffle(X_train)
- X_test = np.linspace(min_x, max_x, n_samples*5)
+ rng.shuffle(X_train)
+ X_test = np.linspace(min_x, max_x, n_samples)
y_train, y_mesh, y_test = funct(X_train), funct(X_test), funct(X_test)
- y_train += np.random.normal(0, noise, y_train.shape[0])
- y_test += np.random.normal(0, noise, y_test.shape[0])
+ y_train += rng.normal(0, noise, y_train.shape[0])
+ y_test += rng.normal(0, noise, y_test.shape[0])
return (
X_train.reshape(-1, 1), y_train, X_test.reshape(-1, 1), y_test, y_mesh
)
@@ -81,16 +86,17 @@ def get_1d_data_with_constant_noise(funct, min_x, max_x, n_samples, noise):
min_x, max_x, n_samples, noise = -5, 5, 600, 0.5
-X_train, y_train, X_test, y_test, y_mesh = get_1d_data_with_constant_noise(
- x_sinx, min_x, max_x, n_samples, noise
+X_train_conformalize, y_train_conformalize, X_test, y_test, y_mesh = (
+ get_1d_data_with_constant_noise(
+ x_sinx, min_x, max_x, n_samples, noise
+ )
)
-
##############################################################################
# Let's visualize our noisy function.
plt.xlabel("x")
plt.ylabel("y")
-plt.scatter(X_train, y_train, color="C0")
+plt.scatter(X_train_conformalize, y_train_conformalize, color="C0")
_ = plt.plot(X_test, y_mesh, color="C1")
plt.show()
@@ -99,16 +105,16 @@ def get_1d_data_with_constant_noise(funct, min_x, max_x, n_samples, noise):
# polynomial function. Here, we choose a degree equal to 10 so the function
# is able to perfectly fit ``x * sin(x)``.
-degree_polyn = 10
+DEGREE_POLYN = 10
polyn_model = Pipeline(
[
- ("poly", PolynomialFeatures(degree=degree_polyn)),
+ ("poly", PolynomialFeatures(degree=DEGREE_POLYN)),
("linear", LinearRegression())
]
)
polyn_model_quant = Pipeline(
[
- ("poly", PolynomialFeatures(degree=degree_polyn)),
+ ("poly", PolynomialFeatures(degree=DEGREE_POLYN)),
("linear", QuantileRegressor(
solver="highs",
alpha=0,
@@ -120,53 +126,85 @@ def get_1d_data_with_constant_noise(funct, min_x, max_x, n_samples, noise):
# We then estimate the prediction intervals for all the strategies very easily
# with a
# `fit` and `predict` process. The prediction interval's lower and upper bounds
-# are then saved in a DataFrame. Here, we set an alpha value of 0.05
+# are then saved in a DataFrame. Here, we set confidence_level=0.95
# in order to obtain a 95% confidence for our prediction intervals.
-
+RANDOM_STATE = 1
STRATEGIES = {
- "naive": dict(method="naive"),
- "jackknife": dict(method="base", cv=-1),
- "jackknife_plus": dict(method="plus", cv=-1),
- "jackknife_minmax": dict(method="minmax", cv=-1),
- "cv": dict(method="base", cv=10),
- "cv_plus": dict(method="plus", cv=10),
- "cv_minmax": dict(method="minmax", cv=10),
- "jackknife_plus_ab": dict(method="plus", cv=Subsample(n_resamplings=50)),
- "jackknife_minmax_ab": dict(
- method="minmax", cv=Subsample(n_resamplings=50)
- ),
- "conformalized_quantile_regression": dict(
- method="quantile", cv="split", alpha=0.05
- )
+ "cv": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="base", cv=10),
+ },
+ "cv_plus": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="plus", cv=10),
+ },
+ "cv_minmax": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="minmax", cv=10),
+ },
+ "jackknife": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="base", cv=-1),
+ },
+ "jackknife_plus": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="plus", cv=-1),
+ },
+ "jackknife_minmax": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="minmax", cv=-1),
+ },
+ "jackknife_plus_ab": {
+ "class": JackknifeAfterBootstrapRegressor,
+ "init_params": dict(method="plus", resampling=50),
+ },
+ "jackknife_minmax_ab": {
+ "class": JackknifeAfterBootstrapRegressor,
+ "init_params": dict(method="minmax", resampling=50),
+ },
+ "conformalized_quantile_regression": {
+ "class": ConformalizedQuantileRegressor,
+ "init_params": dict(),
+ },
}
+
+
y_pred, y_pis = {}, {}
-for strategy, params in STRATEGIES.items():
- if strategy == "conformalized_quantile_regression":
- mapie = MapieQuantileRegressor(polyn_model_quant, **params)
- mapie.fit(X_train, y_train, random_state=1)
- y_pred[strategy], y_pis[strategy] = mapie.predict(X_test)
- else:
- mapie = MapieRegressor(polyn_model, **params)
+for strategy_name, strategy_params in STRATEGIES.items():
+ init_params = strategy_params["init_params"]
+ class_ = strategy_params["class"]
+ if strategy_name == "conformalized_quantile_regression":
+ X_train, X_conformalize, y_train, y_conformalize = (
+ train_test_split(
+ X_train_conformalize, y_train_conformalize,
+ test_size=0.3, random_state=RANDOM_STATE
+ )
+ )
+ mapie = class_(polyn_model_quant, confidence_level=0.95, **init_params)
mapie.fit(X_train, y_train)
- y_pred[strategy], y_pis[strategy] = mapie.predict(X_test, alpha=0.05)
+ mapie.conformalize(X_conformalize, y_conformalize)
+ y_pred[strategy_name], y_pis[strategy_name] = mapie.predict_interval(X_test)
+ else:
+ mapie = class_(
+ polyn_model, confidence_level=0.95, random_state=RANDOM_STATE, **init_params
+ )
+ mapie.fit_conformalize(X_train_conformalize, y_train_conformalize)
+ y_pred[strategy_name], y_pis[strategy_name] = mapie.predict_interval(X_test)
##############################################################################
# Let’s now compare the target confidence intervals with the predicted
# intervals obtained with the Jackknife+, Jackknife-minmax, CV+, CV-minmax,
# Jackknife+-after-Boostrap, and conformalized quantile regression (CQR)
-# strategies. Note that for the Jackknife-after-Bootstrap method, we call the
-# :class:`~mapie.subsample.Subsample` object that allows us to train
-# bootstrapped models. Note also that the CQR method is called with
-# :class:`~mapie.quantile_regression.MapieQuantileRegressor` with a
-# "split" strategy.
+# strategies. Note that when the CQR method is called thanks to
+# :class:`~mapie.quantile_regression.ConformalizedQuantileRegressor` with prefit=False,
+# it will use a "split" strategy.
def plot_1d_data(
- X_train,
- y_train,
X_test,
y_test,
+ y_mesh,
y_sigma,
y_pred,
y_pred_low,
@@ -180,14 +218,14 @@ def plot_1d_data(
X_test, y_pred_low, y_pred_up, alpha=0.3, label="Prediction intervals"
)
ax.scatter(
- X_train, y_train, color="red", alpha=0.3, label="Training data"
+ X_test, y_test, color="red", alpha=0.3, label="Test data"
)
- ax.plot(X_test, y_test, color="gray")
+ ax.plot(X_test, y_mesh, color="gray")
ax.plot(
- X_test, y_test - y_sigma, color="gray", ls="--",
+ X_test, y_mesh - y_sigma, color="gray", ls="--",
label="True confidence intervals"
)
- ax.plot(X_test, y_test + y_sigma, color="gray", ls="--")
+ ax.plot(X_test, y_mesh + y_sigma, color="gray", ls="--")
ax.plot(
X_test, y_pred, color="blue", alpha=0.5, label="y_pred"
)
@@ -204,14 +242,14 @@ def plot_1d_data(
"jackknife_plus_ab",
"conformalized_quantile_regression"
]
+
n_figs = len(strategies)
fig, axs = plt.subplots(3, 2, figsize=(9, 13))
coords = [axs[0, 0], axs[0, 1], axs[1, 0], axs[1, 1], axs[2, 0], axs[2, 1]]
for strategy, coord in zip(strategies, coords):
plot_1d_data(
- X_train.ravel(),
- y_train.ravel(),
X_test.ravel(),
+ y_test.ravel(),
y_mesh.ravel(),
np.full((X_test.shape[0]), 1.96*noise).ravel(),
y_pred[strategy].ravel(),
@@ -223,7 +261,7 @@ def plot_1d_data(
plt.show()
##############################################################################
-# At first glance, the four strategies give similar results and the
+# At first glance, the strategies give similar results and the
# prediction intervals are very close to the true confidence intervals.
# Let’s confirm this by comparing the prediction interval widths over
# `x` between all strategies.
@@ -244,12 +282,12 @@ def plot_1d_data(
##############################################################################
-# As expected, the prediction intervals estimated by the Naive method
-# are slightly too narrow. The Jackknife, Jackknife+, CV, CV+, JaB, and J+aB
+# The Jackknife, Jackknife+, CV, CV+, and J+aB
# give
# similar widths that are very close to the true width. On the other hand,
-# the width estimated by Jackknife-minmax and CV-minmax are slightly too
-# wide. Note that the widths given by the Naive, Jackknife, and CV strategies
+# the width estimated by Jackknife-minmax, Jackknife-minmax-after-Boostrap
+# and CV-minmax are slightly too
+# wide. Note that the widths given by Jackknife and CV strategies
# are constant because there is a single model used for prediction,
# perturbed models are ignored at prediction time.
#
@@ -266,8 +304,8 @@ def plot_1d_data(
pd.DataFrame([
[
regression_coverage_score(
- y_test, y_pis[strategy][:, 0, 0], y_pis[strategy][:, 1, 0]
- ),
+ y_test, y_pis[strategy]
+ )[0],
(
y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0]
).mean()
@@ -276,7 +314,7 @@ def plot_1d_data(
##############################################################################
-# All strategies except the Naive one give effective coverage close to the
+# All strategies give effective coverage close to the
# expected 0.95 value (recall that alpha = 0.05), confirming the theoretical
# garantees.
@@ -296,17 +334,17 @@ def get_1d_data_with_heteroscedastic_noise(
Generate 1D noisy data uniformely from the given function
and standard deviation for the noise.
"""
- np.random.seed(59)
+ rng = np.random.default_rng(59)
X_train = np.linspace(min_x, max_x, n_samples)
- np.random.shuffle(X_train)
- X_test = np.linspace(min_x, max_x, n_samples*5)
+ rng.shuffle(X_train)
+ X_test = np.linspace(min_x, max_x, n_samples)
y_train = (
funct(X_train) +
- (np.random.normal(0, noise, len(X_train)) * X_train)
+ (rng.normal(0, noise, len(X_train)) * X_train)
)
y_test = (
funct(X_test) +
- (np.random.normal(0, noise, len(X_test)) * X_test)
+ (rng.normal(0, noise, len(X_test)) * X_test)
)
y_mesh = funct(X_test)
return (
@@ -322,7 +360,7 @@ def get_1d_data_with_heteroscedastic_noise(
min_x, max_x, n_samples, noise = 0, 5, 300, 0.5
(
- X_train, y_train, X_test, y_test, y_mesh
+ X_train_conformalize, y_train_conformalize, X_test, y_test, y_mesh
) = get_1d_data_with_heteroscedastic_noise(
x_sinx, min_x, max_x, n_samples, noise
)
@@ -334,7 +372,7 @@ def get_1d_data_with_heteroscedastic_noise(
plt.xlabel("x")
plt.ylabel("y")
-plt.scatter(X_train, y_train, color="C0")
+plt.scatter(X_train_conformalize, y_train_conformalize, color="C0")
plt.plot(X_test, y_mesh, color="C1")
plt.show()
@@ -343,16 +381,16 @@ def get_1d_data_with_heteroscedastic_noise(
# polynomial function. Here, we choose a degree equal to 10 so the function
# is able to perfectly fit ``x * sin(x)``.
-degree_polyn = 10
+DEGREE_POLYN = 10
polyn_model = Pipeline(
[
- ("poly", PolynomialFeatures(degree=degree_polyn)),
+ ("poly", PolynomialFeatures(degree=DEGREE_POLYN)),
("linear", LinearRegression())
]
)
polyn_model_quant = Pipeline(
[
- ("poly", PolynomialFeatures(degree=degree_polyn)),
+ ("poly", PolynomialFeatures(degree=DEGREE_POLYN)),
("linear", QuantileRegressor(
solver="highs",
alpha=0,
@@ -364,33 +402,67 @@ def get_1d_data_with_heteroscedastic_noise(
# We then estimate the prediction intervals for all the strategies very easily
# with a
# `fit` and `predict` process. The prediction interval's lower and upper bounds
-# are then saved in a DataFrame. Here, we set an alpha value of 0.05
+# are then saved in a DataFrame. Here, we set confidence_level=0.95
# in order to obtain a 95% confidence for our prediction intervals.
STRATEGIES = {
- "naive": dict(method="naive"),
- "jackknife": dict(method="base", cv=-1),
- "jackknife_plus": dict(method="plus", cv=-1),
- "jackknife_minmax": dict(method="minmax", cv=-1),
- "cv": dict(method="base", cv=10),
- "cv_plus": dict(method="plus", cv=10),
- "cv_minmax": dict(method="minmax", cv=10),
- "jackknife_plus_ab": dict(method="plus", cv=Subsample(n_resamplings=50)),
- "conformalized_quantile_regression": dict(
- method="quantile", cv="split", alpha=0.05
- )
+ "cv": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="base", cv=10),
+ },
+ "cv_plus": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="plus", cv=10),
+ },
+ "cv_minmax": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="minmax", cv=10),
+ },
+ "jackknife": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="base", cv=-1),
+ },
+ "jackknife_plus": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="plus", cv=-1),
+ },
+ "jackknife_minmax": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="minmax", cv=-1),
+ },
+ "jackknife_plus_ab": {
+ "class": JackknifeAfterBootstrapRegressor,
+ "init_params": dict(method="plus", resampling=50),
+ },
+ "jackknife_minmax_ab": {
+ "class": JackknifeAfterBootstrapRegressor,
+ "init_params": dict(method="minmax", resampling=50),
+ },
+ "conformalized_quantile_regression": {
+ "class": ConformalizedQuantileRegressor,
+ "init_params": dict(),
+ },
}
+
y_pred, y_pis = {}, {}
-for strategy, params in STRATEGIES.items():
- if strategy == "conformalized_quantile_regression":
- mapie = MapieQuantileRegressor(polyn_model_quant, **params)
- mapie.fit(X_train, y_train, random_state=1)
- y_pred[strategy], y_pis[strategy] = mapie.predict(X_test)
- else:
- mapie = MapieRegressor(polyn_model, **params)
+for strategy_name, strategy_params in STRATEGIES.items():
+ init_params = strategy_params["init_params"]
+ class_ = strategy_params["class"]
+ if strategy_name == "conformalized_quantile_regression":
+ X_train, X_conformalize, y_train, y_conformalize = train_test_split(
+ X_train_conformalize, y_train_conformalize,
+ test_size=0.3, random_state=RANDOM_STATE
+ )
+ mapie = class_(polyn_model_quant, confidence_level=0.95, **init_params)
mapie.fit(X_train, y_train)
- y_pred[strategy], y_pis[strategy] = mapie.predict(X_test, alpha=0.05)
-
+ mapie.conformalize(X_conformalize, y_conformalize)
+ y_pred[strategy_name], y_pis[strategy_name] = mapie.predict_interval(X_test)
+ else:
+ mapie = class_(
+ polyn_model, confidence_level=0.95, random_state=RANDOM_STATE, **init_params
+ )
+ mapie.fit_conformalize(X_train_conformalize, y_train_conformalize)
+ y_pred[strategy_name], y_pis[strategy_name] = mapie.predict_interval(X_test)
##############################################################################
# Once again, let’s compare the target confidence intervals with prediction
@@ -410,9 +482,8 @@ def get_1d_data_with_heteroscedastic_noise(
coords = [axs[0, 0], axs[0, 1], axs[1, 0], axs[1, 1], axs[2, 0], axs[2, 1]]
for strategy, coord in zip(strategies, coords):
plot_1d_data(
- X_train.ravel(),
- y_train.ravel(),
X_test.ravel(),
+ y_test.ravel(),
y_mesh.ravel(),
(1.96*noise*X_test).ravel(),
y_pred[strategy].ravel(),
@@ -452,7 +523,7 @@ def get_1d_data_with_heteroscedastic_noise(
# even slightly follows the true width, and therefore is the preferred method
# for heteroscedastic data. Notice also that the true width is greater (lower)
# than the predicted width from the other methods at ``x ≳ 3``
-# (``x ≤ 3``). This means that while the marginal coverage correct for
+# (``x ≤ 3``). This means that while the marginal coverage is correct for
# these methods, the conditional coverage is likely not guaranteed as we will
# observe in the next figure.
@@ -468,8 +539,8 @@ def get_heteroscedastic_coverage(y_test, y_pis, STRATEGIES, bins):
y_low_ = np.take(y_pis[strategy][:, 0, 0], indices)
y_high_ = np.take(y_pis[strategy][:, 1, 0], indices)
score_coverage = regression_coverage_score(
- y_test_trunc[0], y_low_[0], y_high_[0]
- )
+ y_test_trunc[0], np.stack((y_low_[0], y_high_[0]), axis=-1)
+ )[0]
recap[name].append(score_coverage)
recap_df = pd.DataFrame(recap, index=STRATEGIES)
return recap_df
@@ -499,8 +570,8 @@ def get_heteroscedastic_coverage(y_test, y_pis, STRATEGIES, bins):
pd.DataFrame([
[
regression_coverage_score(
- y_test, y_pis[strategy][:, 0, 0], y_pis[strategy][:, 1, 0]
- ),
+ y_test, y_pis[strategy]
+ )[0],
(
y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0]
).mean()
@@ -534,67 +605,103 @@ def get_1d_data_with_normal_distrib(funct, mu, sigma, n_samples, noise):
Generate noisy 1D data with normal distribution from given function
and noise standard deviation.
"""
- np.random.seed(59)
- X_train = np.random.normal(mu, sigma, n_samples)
- X_test = np.arange(mu-4*sigma, mu+4*sigma, sigma/20.)
+ rng = np.random.default_rng(59)
+ X_train = rng.normal(mu, sigma, n_samples)
+ X_test = np.arange(mu-4*sigma, mu+4*sigma, sigma/10.)
y_train, y_mesh, y_test = funct(X_train), funct(X_test), funct(X_test)
- y_train += np.random.normal(0, noise, y_train.shape[0])
- y_test += np.random.normal(0, noise, y_test.shape[0])
+ y_train += rng.normal(0, noise, y_train.shape[0])
+ y_test += rng.normal(0, noise, y_test.shape[0])
return (
X_train.reshape(-1, 1), y_train, X_test.reshape(-1, 1), y_test, y_mesh
)
mu, sigma, n_samples, noise = 0, 2, 1000, 0.
-X_train, y_train, X_test, y_test, y_mesh = get_1d_data_with_normal_distrib(
- x_sinx, mu, sigma, n_samples, noise
+X_train_conformalize, y_train_conformalize, X_test, y_test, y_mesh = (
+ get_1d_data_with_normal_distrib(
+ x_sinx, mu, sigma, n_samples, noise
+ )
)
plt.xlabel("x")
plt.ylabel("y")
-plt.scatter(X_train, y_train, color="C0")
+plt.scatter(X_train_conformalize, y_train_conformalize, color="C0")
_ = plt.plot(X_test, y_test, color="C1")
plt.show()
##############################################################################
# As before, we estimate the prediction intervals using a polynomial
-# function of degree 10 and show the results for the Jackknife+ and CV+
+# function of degree 10 and show the results for some of the
# strategies.
polyn_model_quant = Pipeline(
[
- ("poly", PolynomialFeatures(degree=degree_polyn)),
+ ("poly", PolynomialFeatures(degree=DEGREE_POLYN)),
("linear", QuantileRegressor(
solver="highs-ds",
alpha=0,
))
]
)
+
STRATEGIES = {
- "naive": dict(method="naive"),
- "jackknife": dict(method="base", cv=-1),
- "jackknife_plus": dict(method="plus", cv=-1),
- "jackknife_minmax": dict(method="minmax", cv=-1),
- "cv": dict(method="base", cv=10),
- "cv_plus": dict(method="plus", cv=10),
- "cv_minmax": dict(method="minmax", cv=10),
- "jackknife_plus_ab": dict(method="plus", cv=Subsample(n_resamplings=50)),
- "jackknife_minmax_ab": dict(
- method="minmax", cv=Subsample(n_resamplings=50)
- ),
- "conformalized_quantile_regression": dict(
- method="quantile", cv="split", alpha=0.05
- )
+ "cv": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="base", cv=10),
+ },
+ "cv_plus": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="plus", cv=10),
+ },
+ "cv_minmax": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="minmax", cv=10),
+ },
+ "jackknife": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="base", cv=-1),
+ },
+ "jackknife_plus": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="plus", cv=-1),
+ },
+ "jackknife_minmax": {
+ "class": CrossConformalRegressor,
+ "init_params": dict(method="minmax", cv=-1),
+ },
+ "jackknife_plus_ab": {
+ "class": JackknifeAfterBootstrapRegressor,
+ "init_params": dict(method="plus", resampling=50),
+ },
+ "jackknife_minmax_ab": {
+ "class": JackknifeAfterBootstrapRegressor,
+ "init_params": dict(method="minmax", resampling=50),
+ },
+ "conformalized_quantile_regression": {
+ "class": ConformalizedQuantileRegressor,
+ "init_params": dict(),
+ },
}
+
+
y_pred, y_pis = {}, {}
-for strategy, params in STRATEGIES.items():
- if strategy == "conformalized_quantile_regression":
- mapie = MapieQuantileRegressor(polyn_model_quant, **params)
- mapie.fit(X_train, y_train, random_state=1)
- y_pred[strategy], y_pis[strategy] = mapie.predict(X_test)
- else:
- mapie = MapieRegressor(polyn_model, **params)
+for strategy_name, strategy_params in STRATEGIES.items():
+ init_params = strategy_params["init_params"]
+ class_ = strategy_params["class"]
+ if strategy_name == "conformalized_quantile_regression":
+ X_train, X_conformalize, y_train, y_conformalize = train_test_split(
+ X_train_conformalize, y_train_conformalize,
+ test_size=0.3, random_state=RANDOM_STATE
+ )
+ mapie = class_(polyn_model_quant, confidence_level=0.95, **init_params)
mapie.fit(X_train, y_train)
- y_pred[strategy], y_pis[strategy] = mapie.predict(X_test, alpha=0.05)
+ mapie.conformalize(X_conformalize, y_conformalize)
+ y_pred[strategy_name], y_pis[strategy_name] = mapie.predict_interval(X_test)
+ else:
+ mapie = class_(
+ polyn_model, confidence_level=0.95, random_state=RANDOM_STATE, **init_params
+ )
+ mapie.fit_conformalize(X_train_conformalize, y_train_conformalize)
+ y_pred[strategy_name], y_pis[strategy_name] = mapie.predict_interval(X_test)
strategies = [
"jackknife_plus",
@@ -609,9 +716,8 @@ def get_1d_data_with_normal_distrib(funct, mu, sigma, n_samples, noise):
coords = [axs[0, 0], axs[0, 1], axs[1, 0], axs[1, 1], axs[2, 0], axs[2, 1]]
for strategy, coord in zip(strategies, coords):
plot_1d_data(
- X_train.ravel(),
- y_train.ravel(),
X_test.ravel(),
+ y_test.ravel(),
y_mesh.ravel(),
1.96*noise,
y_pred[strategy].ravel(),
@@ -633,7 +739,6 @@ def get_1d_data_with_normal_distrib(funct, mu, sigma, n_samples, noise):
# Let's now compare the prediction interval widths between all strategies.
fig, ax = plt.subplots(1, 1, figsize=(7, 5))
-ax.set_yscale("log")
for strategy in STRATEGIES:
ax.plot(
X_test,
@@ -647,9 +752,9 @@ def get_1d_data_with_normal_distrib(funct, mu, sigma, n_samples, noise):
##############################################################################
# The prediction interval widths start to increase exponentially
-# for ``|x| > 4`` for the CV+, CV-minmax, Jackknife-minmax, and quantile
+# for ``|x| > 4`` for the CV+, CV-minmax, Jackknife-minmax, and JackknifeAB
# strategies. On the other hand, the prediction intervals estimated by
-# Jackknife+ remain roughly constant until ``|x| ≈ 5`` before
+# Jackknife+ remain roughly constant until ``|x| ≈ 6`` before
# increasing.
# The CQR strategy seems to perform well, however, on the extreme values
# of the data the quantile regression fails to give reliable results as it
@@ -662,8 +767,8 @@ def get_1d_data_with_normal_distrib(funct, mu, sigma, n_samples, noise):
pd.DataFrame([
[
regression_coverage_score(
- y_test, y_pis[strategy][:, 0, 0], y_pis[strategy][:, 1, 0]
- ),
+ y_test, y_pis[strategy]
+ )[0],
(
y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0]
).mean()
@@ -671,11 +776,11 @@ def get_1d_data_with_normal_distrib(funct, mu, sigma, n_samples, noise):
], index=STRATEGIES, columns=["Coverage", "Width average"]).round(3)
##############################################################################
-# In conclusion, the Jackknife-minmax, CV+, CV-minmax, or Jackknife-minmax-ab
+# In conclusion, the Jackknife-minmax, CV+, CV-minmax, or JackknifeAB
# strategies are more
# conservative than the Jackknife+ strategy, and tend to result in more
# reliable coverages for *out-of-distribution* data. It is therefore
-# advised to use the three former strategies for predictions with new
+# advised to use the former strategies for predictions with new
# out-of-distribution data.
# Note however that there are no theoretical guarantees on the coverage level
# for out-of-distribution data.
diff --git a/examples/regression/2-advanced-analysis/plot_nested-cv.py b/examples/regression/2-advanced-analysis/plot_nested-cv.py
index 1613dff08..78fb9f6e5 100644
--- a/examples/regression/2-advanced-analysis/plot_nested-cv.py
+++ b/examples/regression/2-advanced-analysis/plot_nested-cv.py
@@ -1,39 +1,33 @@
"""
-===========================================================
-Nested cross-validation for estimating prediction intervals
-===========================================================
-
-This example compares non-nested and nested cross-validation strategies for
-estimating prediction intervals with :class:`~mapie.regression.MapieRegressor`.
-
-In the regular sequential method, a cross-validation parameter search is
-carried out over the entire training set.
-The model with the set of parameters that gives the best score is then used in
-MAPIE to estimate the prediction intervals associated with the predictions.
-A limitation of this method is that residuals used by MAPIE are computed on
-the validation dataset, which can be subject to overfitting as far as
-hyperparameter tuning is concerned.
-
-This fools MAPIE into being slightly too optimistic with confidence intervals.
+==========================================================================================
+Hyperparameters tuning with CrossConformalRegressor
+==========================================================================================
+
+
+This example compares non-nested and nested cross-validation strategies
+when using
+:class:`~mapie_v1.regression.CrossConformalRegressor`.
+
+In the regular sequential method, a cross-validation parameter search is performed
+on the entire training set. The best model is then used in MAPIE to estimate
+prediction intervals. However, as MAPIE computes residuals on
+the validation dataset used during hyperparameter tuning, it can lead to
+overfitting. This fools MAPIE into being slightly too optimistic with confidence
+intervals.
+
To solve this problem, an alternative option is to perform a nested
cross-validation parameter search directly within the MAPIE estimator on each
*out-of-fold* dataset.
-For each testing fold used by MAPIE to store residuals, an internal
-cross-validation occurs on the training fold, optimizing hyperparameters.
This ensures that residuals seen by MAPIE are never seen by the algorithm
beforehand. However, this method is much heavier computationally since
it results in ``N * P`` calculations, where *N* is the number of
*out-of-fold* models and *P* the number of parameter search cross-validations,
versus ``N + P`` for the non-nested approach.
-Here, we compare the two strategies on a toy dataset. We use the Random
-Forest Regressor as a base regressor for the CV+ strategy. For the sake of
-light computation, we adopt a RandomizedSearchCV parameter search strategy
-with a low number of iterations and with a reproducible random state.
+Here, we compare the two strategies on a toy dataset.
The two approaches give slightly different predictions with the nested CV
-approach estimating slightly larger prediction interval widths by a
-few percents at most (apart from a handful of exceptions).
+approach estimating larger prediction interval in average.
For this example, the two approaches result in identical scores and identical
effective coverages.
@@ -47,32 +41,32 @@
import numpy as np
from scipy.stats import randint
from sklearn.ensemble import RandomForestRegressor
-from sklearn.metrics import mean_squared_error
+from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.datasets import make_sparse_uncorrelated
-from mapie.metrics import regression_coverage_score
-from mapie.regression import MapieRegressor
+from mapie.metrics.regression import regression_coverage_score
+from mapie_v1.regression import CrossConformalRegressor
-random_state = 42
+RANDOM_STATE = 42
# Load the toy data
-X, y = make_sparse_uncorrelated(500, random_state=random_state)
+X, y = make_sparse_uncorrelated(500, random_state=RANDOM_STATE)
# Split the data into training and test sets.
X_train, X_test, y_train, y_test = train_test_split(
- X, y, test_size=0.2, random_state=random_state
+ X, y, test_size=0.2, random_state=RANDOM_STATE
)
# Define the Random Forest model as base regressor with parameter ranges.
-rf_model = RandomForestRegressor(random_state=random_state, verbose=0)
+rf_model = RandomForestRegressor(random_state=RANDOM_STATE, verbose=0)
rf_params = {"max_depth": randint(2, 10), "n_estimators": randint(10, 100)}
# Cross-validation and prediction-interval parameters.
cv = 10
n_iter = 5
-alpha = 0.05
+confidence_level = 0.95
# Non-nested approach with the CV+ strategy using the Random Forest model.
cv_obj = RandomizedSearchCV(
@@ -83,24 +77,24 @@
scoring="neg_root_mean_squared_error",
return_train_score=True,
verbose=0,
- random_state=random_state,
+ random_state=RANDOM_STATE,
n_jobs=-1,
)
cv_obj.fit(X_train, y_train)
best_est = cv_obj.best_estimator_
-mapie_non_nested = MapieRegressor(
- best_est, method="plus", cv=cv, agg_function="median", n_jobs=-1,
- random_state=random_state
+mapie_non_nested = CrossConformalRegressor(
+ estimator=best_est, method="plus", cv=cv, n_jobs=-1,
+ confidence_level=confidence_level, random_state=RANDOM_STATE
)
-mapie_non_nested.fit(X_train, y_train)
-y_pred_non_nested, y_pis_non_nested = mapie_non_nested.predict(
- X_test, alpha=alpha
+mapie_non_nested.fit_conformalize(X_train, y_train)
+y_pred_non_nested, y_pis_non_nested = mapie_non_nested.predict_interval(
+ X_test, aggregate_predictions='median'
)
widths_non_nested = y_pis_non_nested[:, 1, 0] - y_pis_non_nested[:, 0, 0]
coverage_non_nested = regression_coverage_score(
- y_test, y_pis_non_nested[:, 0, 0], y_pis_non_nested[:, 1, 0]
-)
-score_non_nested = mean_squared_error(y_test, y_pred_non_nested, squared=False)
+ y_test, y_pis_non_nested
+)[0]
+score_non_nested = root_mean_squared_error(y_test, y_pred_non_nested)
# Nested approach with the CV+ strategy using the Random Forest model.
cv_obj = RandomizedSearchCV(
@@ -111,20 +105,22 @@
scoring="neg_root_mean_squared_error",
return_train_score=True,
verbose=0,
- random_state=random_state,
+ random_state=RANDOM_STATE,
n_jobs=-1,
)
-mapie_nested = MapieRegressor(
- cv_obj, method="plus", cv=cv, agg_function="median",
- random_state=random_state
+mapie_nested = CrossConformalRegressor(
+ estimator=cv_obj, method="plus", cv=cv, n_jobs=-1,
+ confidence_level=confidence_level, random_state=RANDOM_STATE
+)
+mapie_nested.fit_conformalize(X_train, y_train)
+y_pred_nested, y_pis_nested = mapie_nested.predict_interval(
+ X_test, aggregate_predictions='median'
)
-mapie_nested.fit(X_train, y_train)
-y_pred_nested, y_pis_nested = mapie_nested.predict(X_test, alpha=alpha)
widths_nested = y_pis_nested[:, 1, 0] - y_pis_nested[:, 0, 0]
coverage_nested = regression_coverage_score(
- y_test, y_pis_nested[:, 0, 0], y_pis_nested[:, 1, 0]
-)
-score_nested = mean_squared_error(y_test, y_pred_nested, squared=False)
+ y_test, y_pis_nested
+)[0]
+score_nested = root_mean_squared_error(y_test, y_pred_nested)
# Print scores and effective coverages.
print(
diff --git a/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py b/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py
index 6dda6b113..042c9c849 100644
--- a/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py
+++ b/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py
@@ -1,6 +1,6 @@
"""
==================================================================
-Estimating prediction intervals of time series forecast with EnbPI
+Time series: example of the EnbPI technique
==================================================================
This example uses
@@ -34,9 +34,11 @@
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit
-from mapie._typing import NDArray
-from mapie.metrics import (regression_coverage_score,
- regression_mean_width_score)
+from numpy.typing import NDArray
+from mapie.metrics.regression import (
+ regression_coverage_score,
+ regression_mean_width_score,
+)
from mapie.regression import MapieTimeSeriesRegressor
from mapie.subsample import BlockBootstrap
@@ -120,8 +122,8 @@
X_test, alpha=alpha, ensemble=True, optimize_beta=True
)
coverage_npfit_enbpi = regression_coverage_score(
- y_test, y_pis_npfit_enbpi[:, 0, 0], y_pis_npfit_enbpi[:, 1, 0]
-)
+ y_test, y_pis_npfit_enbpi
+)[0]
width_npfit_enbpi = regression_mean_width_score(
y_pis_npfit_enbpi[:, 1, 0], y_pis_npfit_enbpi[:, 0, 0]
@@ -152,11 +154,10 @@
X_test.iloc[step:(step + step_size), :],
alpha=alpha,
ensemble=True,
- optimize_beta=True,
)
coverage_pfit_enbpi = regression_coverage_score(
- y_test, y_pis_pfit_enbpi[:, 0, 0], y_pis_pfit_enbpi[:, 1, 0]
-)
+ y_test, y_pis_pfit_enbpi
+)[0]
width_pfit_enbpi = regression_mean_width_score(
y_pis_pfit_enbpi[:, 1, 0], y_pis_pfit_enbpi[:, 0, 0]
)
diff --git a/examples/regression/3-scientific-articles/README.rst b/examples/regression/3-scientific-articles/README.rst
index e53e2c121..7cdd4cc67 100644
--- a/examples/regression/3-scientific-articles/README.rst
+++ b/examples/regression/3-scientific-articles/README.rst
@@ -1,5 +1,7 @@
.. _regression_examples_3:
+-----
+
3. Simulations from scientific articles
---------------------------------------
diff --git a/examples/regression/3-scientific-articles/plot_barber2020_simulations.py b/examples/regression/3-scientific-articles/plot_barber2020_simulations.py
index 010be9b8b..222c7cb45 100644
--- a/examples/regression/3-scientific-articles/plot_barber2020_simulations.py
+++ b/examples/regression/3-scientific-articles/plot_barber2020_simulations.py
@@ -1,9 +1,10 @@
"""
-============================================================
-Reproducing the simulations from Foygel-Barber et al. (2020)
-============================================================
+===========================================================================================
+Predictive inference with the jackknife+, Foygel-Barber et al. (2020)
+===========================================================================================
-:class:`~mapie.regression.MapieRegressor` is used to investigate
+
+:class:`~mapie_v1.regression.CrossConformalRegressor` is used to investigate
the coverage level and the prediction interval width as a function
of the dimension using simulated data points as introduced in
Foygel-Barber et al. (2021) [1].
@@ -34,24 +35,29 @@
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
-from mapie._typing import NDArray
-from mapie.metrics import (regression_coverage_score,
- regression_mean_width_score)
-from mapie.regression import MapieRegressor
+from numpy.typing import NDArray
+from mapie.metrics.regression import (
+ regression_coverage_score,
+ regression_mean_width_score,
+)
+from mapie_v1.regression import CrossConformalRegressor
+
+RANDOM_STATE = 1
def PIs_vs_dimensions(
strategies: Dict[str, Any],
- alpha: float,
+ confidence_level: float,
n_trial: int,
dimensions: NDArray,
+ random_state: int = 1
) -> Dict[str, Dict[int, Dict[str, NDArray]]]:
"""
Compute the prediction intervals for a linear regression problem.
Function adapted from Foygel-Barber et al. (2020).
It generates several times linear data with random noise whose
- signal-to-noise is equal to 10 and for several given dimensions,
+ signal-to-noise is equal to 10 and for several given dimensions,
given by the dimensions list.
Here we use MAPIE, with a LinearRegression base model, to estimate
@@ -69,8 +75,8 @@ def PIs_vs_dimensions(
strategies : Dict[str, Dict[str, Any]]
List of strategies for estimating prediction intervals,
with corresponding parameters.
- alpha : float
- 1 - (target coverage level).
+ confidence_level : float
+ target coverage level.
n_trial : int
Number of trials for each dimension for estimating
prediction intervals.
@@ -87,6 +93,7 @@ def PIs_vs_dimensions(
n_train = 100
n_test = 100
SNR = 10
+ rng = np.random.default_rng(random_state)
results: Dict[str, Dict[int, Dict[str, NDArray]]] = {
strategy: {
dimension: {
@@ -99,28 +106,31 @@ def PIs_vs_dimensions(
}
for dimension in dimensions:
for trial in range(n_trial):
- beta = np.random.normal(size=dimension)
+ beta = rng.normal(size=dimension)
beta_norm = np.sqrt(np.square(beta).sum())
beta = beta / beta_norm * np.sqrt(SNR)
- X_train = np.random.normal(size=(n_train, dimension))
- noise_train = np.random.normal(size=n_train)
- noise_test = np.random.normal(size=n_test)
+ X_train = rng.normal(size=(n_train, dimension))
+ noise_train = rng.normal(size=n_train)
+ noise_test = rng.normal(size=n_test)
y_train = X_train.dot(beta) + noise_train
- X_test = np.random.normal(size=(n_test, dimension))
+ X_test = rng.normal(size=(n_test, dimension))
y_test = X_test.dot(beta) + noise_test
for strategy, params in strategies.items():
- mapie = MapieRegressor(
- LinearRegression(),
- agg_function="median",
+ mapie = CrossConformalRegressor(
+ estimator=LinearRegression(),
+ confidence_level=confidence_level,
n_jobs=-1,
+ random_state=random_state,
**params
)
- mapie.fit(X_train, y_train)
- _, y_pis = mapie.predict(X_test, alpha=alpha)
- coverage = regression_coverage_score(
- y_test, y_pis[:, 0, 0], y_pis[:, 1, 0]
+ mapie.fit_conformalize(X_train, y_train)
+ _, y_pis = mapie.predict_interval(
+ X_test, aggregate_predictions="median"
)
+ coverage = regression_coverage_score(
+ y_test, y_pis
+ )[0]
results[strategy][dimension]["coverage"][trial] = coverage
width_mean = regression_mean_width_score(
y_pis[:, 0, 0], y_pis[:, 1, 0]
@@ -177,7 +187,7 @@ def plot_simulation_results(
width_mean + width_SE,
alpha=0.25,
)
- ax1.axhline(1 - alpha, linestyle="dashed", c="k")
+ ax1.axhline(confidence_level, linestyle="dashed", c="k")
ax1.set_ylim(0.0, 1.0)
ax1.set_xlabel("Dimension d")
ax1.set_ylabel("Coverage")
@@ -189,13 +199,17 @@ def plot_simulation_results(
STRATEGIES = {
- "naive": dict(method="naive"),
"cv": dict(method="base", cv=5),
"cv_plus": dict(method="plus", cv=5),
}
-alpha = 0.1
+confidence_level = 0.9
ntrial = 3
dimensions = np.arange(10, 150, 10)
-results = PIs_vs_dimensions(STRATEGIES, alpha, ntrial, dimensions)
+results = PIs_vs_dimensions(
+ strategies=STRATEGIES,
+ confidence_level=confidence_level,
+ n_trial=ntrial,
+ dimensions=dimensions,
+ random_state=RANDOM_STATE)
plot_simulation_results(results, title="Coverages and interval widths")
plt.show()
diff --git a/examples/regression/3-scientific-articles/plot_kim2020_simulations.py b/examples/regression/3-scientific-articles/plot_kim2020_simulations.py
index 10795aea7..699bd320e 100644
--- a/examples/regression/3-scientific-articles/plot_kim2020_simulations.py
+++ b/examples/regression/3-scientific-articles/plot_kim2020_simulations.py
@@ -1,11 +1,13 @@
"""
-==================================================
-Reproducing the simulations from Kim et al. (2020)
-==================================================
+=====================================================================================================
+Predictive inference is free with the Jackknife+-after-Bootstrap, Kim et al. (2020)
+=====================================================================================================
-:class:`~mapie.regression.MapieRegressor` is used to reproduce the simulations
-by Kim et al. (2020) [1] in their article which introduces the
-jackknife+-after-bootstrap method.
+
+:class:`~mapie_v1.regression.JackknifeAfterBootstrapRegressor` and
+:class:`~mapie_v1.regression.CrossConformalRegressor` are used to
+reproduce the simulations by Kim et al. (2020) [1] in their article
+which introduces the jackknife+-after-bootstrap method.
For a given model, the simulation fits MAPIE regressors with jackknife+ and
jackknife+-after-bootstrap methods, on different resamplings of a data set
@@ -43,10 +45,15 @@
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
-from mapie._typing import ArrayLike, NDArray
-from mapie.metrics import (regression_coverage_score,
- regression_mean_width_score)
-from mapie.regression import MapieRegressor
+from numpy.typing import ArrayLike, NDArray
+from mapie.metrics.regression import (
+ regression_coverage_score,
+ regression_mean_width_score,
+)
+from mapie_v1.regression import (
+ JackknifeAfterBootstrapRegressor,
+ CrossConformalRegressor
+)
from mapie.subsample import Subsample
@@ -138,8 +145,9 @@ def compute_PIs(
X_test: NDArray,
method: str,
cv: Any,
- alpha: float,
+ confidence_level: float,
agg_function: Optional[str] = None,
+ random_state: int = 1
) -> pd.DataFrame:
"""
Train and test a model with a MAPIE method,
@@ -160,27 +168,40 @@ def compute_PIs(
Method for estimating prediction intervals.
cv : Any
Strategy for computing conformity scores.
- alpha : float
- 1 - (target coverage level).
+ confidence_level : float
+ target coverage level.
agg_function: str
'mean' or 'median'.
Function to aggregate the predictions of the B estimators.
+ random_state: int
+ The random state
Returns
-------
pd.DataFrame
DataFrame of upper and lower predictions.
"""
- mapie_estimator = MapieRegressor(
- estimator=estimator,
- method=method,
- cv=cv,
- n_jobs=-1,
- agg_function=agg_function,
- )
-
- mapie_estimator = mapie_estimator.fit(X=X_train, y=y_train)
- _, y_pis = mapie_estimator.predict(X=X_test, alpha=alpha)
+ if cv == -1:
+ mapie_estimator = CrossConformalRegressor(
+ estimator=estimator,
+ confidence_level=confidence_level,
+ method=method,
+ cv=cv,
+ n_jobs=-1,
+ random_state=random_state,
+ )
+ else:
+ mapie_estimator = JackknifeAfterBootstrapRegressor(
+ estimator=estimator,
+ confidence_level=confidence_level,
+ method=method,
+ resampling=cv,
+ n_jobs=-1,
+ aggregation_method=agg_function,
+ random_state=random_state,
+ )
+ mapie_estimator = mapie_estimator.fit_conformalize(X=X_train, y=y_train)
+ _, y_pis = mapie_estimator.predict_interval(X=X_test)
PI = np.c_[y_pis[:, 0, 0], y_pis[:, 1, 0]]
return pd.DataFrame(PI, columns=["lower", "upper"])
@@ -205,8 +226,8 @@ def get_coverage_width(PIs: pd.DataFrame, y: NDArray) -> Tuple[float, float]:
The mean coverage and width of the PIs.
"""
coverage = regression_coverage_score(
- y_true=y, y_pred_low=PIs["lower"], y_pred_up=PIs["upper"]
- )
+ y_true=y, y_intervals=np.stack((PIs["lower"], PIs["upper"]), axis=-1)
+ )[0]
width = regression_mean_width_score(
y_pred_low=PIs["lower"], y_pred_up=PIs["upper"]
)
@@ -256,7 +277,7 @@ def B_random_from_B_fixed(
def comparison_JAB(
model: BaseEstimator = Ridge2(),
agg_function: str = "mean",
- alpha: float = 0.1,
+ confidence_level: float = 0.9,
trials: int = 10,
train_size: int = 200,
boostrap_size: int = 10,
@@ -274,8 +295,8 @@ def comparison_JAB(
Base model. By default, Ridge2.
agg_function: str
Aggregation function to test.
- alpha : float
- 1 - (target coverage level).
+ confidence_level : float
+ target coverage level.
trials: int
Number of trials launch for a given boostrap set size.
train_size : int
@@ -326,7 +347,7 @@ def comparison_JAB(
X_test=X_test,
method="plus",
cv=-1,
- alpha=alpha,
+ confidence_level=confidence_level,
agg_function=agg_function,
)
(coverage, width) = get_coverage_width(PIs, y_test)
@@ -358,7 +379,7 @@ def comparison_JAB(
X_test=X_test,
method="plus",
cv=subsample_B_random,
- alpha=alpha,
+ confidence_level=confidence_level,
agg_function=agg_function,
)
(coverage, width) = get_coverage_width(PIs, y_test)
@@ -386,7 +407,7 @@ def comparison_JAB(
X_test=X_test,
method="plus",
cv=subsample_B_fixed,
- alpha=alpha,
+ confidence_level=confidence_level,
agg_function=agg_function,
)
(coverage, width) = get_coverage_width(PIs, y_test)
@@ -400,7 +421,7 @@ def comparison_JAB(
]
result_index += 1
results["agg_function"] = agg_function
- results["alpha"] = alpha
+ results["confidence_level"] = confidence_level
results = results.astype(
{
"itrial": int,
@@ -439,7 +460,7 @@ def plot_results(results: pd.DataFrame, score: str) -> None:
data_fix = res.loc[res.fixed_random == "Fixed", ["ratio", score]]
data_random = res.loc[res.fixed_random == "Random", ["ratio", score]]
- alpha = pd.unique(results["alpha"])[0]
+ confidence_level = pd.unique(results["confidence_level"])[0]
# plot the comparison between J+ vs J+AB
fig, axes = plt.subplots(1, 2, figsize=(8, 6), sharey=True)
@@ -447,8 +468,8 @@ def plot_results(results: pd.DataFrame, score: str) -> None:
data_JaB.boxplot(by="ratio", ax=axes[1])
if score == "coverage":
- axes[0].axhline(y=1 - alpha, color="red")
- axes[1].axhline(y=1 - alpha, color="red")
+ axes[0].axhline(y=confidence_level, color="red")
+ axes[1].axhline(y=confidence_level, color="red")
xticks = mtick.PercentFormatter(1, decimals=0)
axes[0].yaxis.set_major_formatter(xticks)
axes[0].set_title("J+")
@@ -471,8 +492,8 @@ def plot_results(results: pd.DataFrame, score: str) -> None:
axes[0].set_title("Fixed B", fontsize=14)
axes[1].set_title("Random B", fontsize=14)
if score == "coverage":
- axes[0].axhline(y=1 - alpha, color="red")
- axes[1].axhline(y=1 - alpha, color="red")
+ axes[0].axhline(y=confidence_level, color="red")
+ axes[1].axhline(y=confidence_level, color="red")
axes[0].yaxis.set_major_formatter(xticks)
axes[0].set_ylabel(score)
axes[1].set_ylabel("")
@@ -490,7 +511,7 @@ def plot_results(results: pd.DataFrame, score: str) -> None:
results_coverages_widths = comparison_JAB(
model=Ridge2(),
- alpha=0.1,
+ confidence_level=0.9,
trials=2,
train_size=40,
boostrap_size=5,
diff --git a/examples/regression/3-scientific-articles/plot_zaffran2022_comparison.py b/examples/regression/3-scientific-articles/plot_zaffran2022_comparison.py
index d994763ea..d62a1efc3 100644
--- a/examples/regression/3-scientific-articles/plot_zaffran2022_comparison.py
+++ b/examples/regression/3-scientific-articles/plot_zaffran2022_comparison.py
@@ -1,7 +1,8 @@
"""
-======================================================================
-Reproduction of part of the paper experiments of Zaffran et al. (2022)
-======================================================================
+=====================================================================================================
+Adaptive conformal predictions for time series, Zaffran et al. (2022)
+=====================================================================================================
+
:class:`~mapie.regression.MapieTimeSeriesRegressor` is used to reproduce a
part of the paper experiments of Zaffran et al. (2022) in their article [1]
@@ -44,7 +45,7 @@
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import PredefinedSplit
-from mapie._typing import NDArray
+from numpy.typing import NDArray
from mapie.conformity_scores import AbsoluteConformityScore
from mapie.time_series_regression import MapieTimeSeriesRegressor
diff --git a/examples/regression/4-other-notebooks/README.rst b/examples/regression/4-other-notebooks/README.rst
new file mode 100644
index 000000000..02adbac0e
--- /dev/null
+++ b/examples/regression/4-other-notebooks/README.rst
@@ -0,0 +1,11 @@
+.. _regression_examples_5:
+
+-----
+
+4. Other notebooks
+--------------------------------------------
+
+This section lists a series of Jupyter notebooks hosted on the MAPIE Github repository that can be run on Google Colab:
+
+ - `Estimating prediction intervals for time series forecast with EnbPI and ACI `_
+
diff --git a/examples/regression/4-tutorials/README.rst b/examples/regression/4-tutorials/README.rst
deleted file mode 100644
index 65af0e28f..000000000
--- a/examples/regression/4-tutorials/README.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-.. _regression_examples_4:
-
-4. Tutorials
-------------
-
-The following examples present pedagogical tutorials explaining how to use MAPIE on different regression taks.
\ No newline at end of file
diff --git a/examples/regression/README.rst b/examples/regression/README.rst
index a43004049..5306c9e7b 100644
--- a/examples/regression/README.rst
+++ b/examples/regression/README.rst
@@ -1,4 +1,6 @@
.. _regression_examples:
-Regression examples
-===================
\ No newline at end of file
+All regression examples
+========================
+
+Following is a collection of notebooks demonstrating how to use MAPIE.
\ No newline at end of file
diff --git a/mapie/_compatibility.py b/mapie/_compatibility.py
deleted file mode 100644
index 5ead5c201..000000000
--- a/mapie/_compatibility.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from typing import Any
-
-import numpy as np
-from packaging.version import parse as parse_version
-
-from ._typing import ArrayLike, NDArray
-
-
-def np_quantile_version_below_122(
- a: ArrayLike,
- q: ArrayLike,
- method: str = "linear",
- **kwargs: Any
-) -> NDArray:
- """Wrapper of np.quantile function for numpy version < 1.22."""
- return np.quantile(a, q, interpolation=method, **kwargs) # type: ignore
-
-
-def np_quantile_version_above_122(
- a: ArrayLike,
- q: ArrayLike,
- method: str = "linear",
- **kwargs: Any
-) -> NDArray:
- """Wrapper of np.quantile function for numpy version >= 1.22."""
- return np.quantile(a, q, method=method, **kwargs) # type: ignore
-
-
-def np_nanquantile_version_below_122(
- a: ArrayLike,
- q: ArrayLike,
- method: str = "linear",
- **kwargs: Any
-) -> NDArray:
- """Wrapper of np.quantile function for numpy version < 1.22."""
- # Does not work if `a` is of dtype object. Converting `a` to a float array
- # is necessary in order to use this function safely.
- return np.nanquantile(a, q, interpolation=method, **kwargs)
-
-
-def np_nanquantile_version_above_122(
- a: ArrayLike,
- q: ArrayLike,
- method: str = "linear",
- **kwargs: Any
-) -> NDArray:
- """Wrapper of np.quantile function for numpy version >= 1.22."""
- return np.nanquantile(a, q, method=method, **kwargs) # type: ignore
-
-
-numpy_version = parse_version(np.__version__)
-if numpy_version < parse_version("1.22"):
- np_quantile = np_quantile_version_below_122
- np_nanquantile = np_nanquantile_version_below_122
-
-else:
- np_quantile = np_quantile_version_above_122
- np_nanquantile = np_nanquantile_version_above_122
diff --git a/mapie/_typing.py b/mapie/_typing.py
deleted file mode 100644
index af5839e8c..000000000
--- a/mapie/_typing.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from numpy.typing import ArrayLike, NDArray
-
-__all__ = ["ArrayLike", "NDArray"]
diff --git a/mapie/aggregation_functions.py b/mapie/aggregation_functions.py
index ad9a8181c..ad35f2214 100644
--- a/mapie/aggregation_functions.py
+++ b/mapie/aggregation_functions.py
@@ -2,7 +2,7 @@
import numpy as np
-from ._typing import NDArray
+from numpy.typing import NDArray
def phi1D(
diff --git a/mapie/calibration.py b/mapie/calibration.py
index ea3834a38..09edf9957 100644
--- a/mapie/calibration.py
+++ b/mapie/calibration.py
@@ -12,7 +12,7 @@
from sklearn.utils.validation import (_check_y, _num_samples, check_is_fitted,
indexable)
-from ._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
from .utils import (check_estimator_classification,
check_estimator_fit_predict, check_n_features_in,
check_null_weight, fit_estimator, get_calib_set)
diff --git a/mapie/classification.py b/mapie/classification.py
index 5eab26e10..da5f208f8 100644
--- a/mapie/classification.py
+++ b/mapie/classification.py
@@ -10,7 +10,7 @@
from sklearn.utils import check_random_state
from sklearn.utils.validation import (_check_y, check_is_fitted, indexable)
-from mapie._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
from mapie.conformity_scores import BaseClassificationScore
from mapie.conformity_scores.sets.raps import RAPSConformityScore
from mapie.conformity_scores.utils import (
@@ -47,12 +47,12 @@ class MapieClassifier(BaseEstimator, ClassifierMixin):
- ``"naive"``, sum of the probabilities until the 1-alpha threshold.
- - ``"lac"`` (formerly called ``"score"``), Least Ambiguous set-valued
+ - ``"lac"``, Least Ambiguous set-valued
Classifier. It is based on the scores
(i.e. 1 minus the softmax score of the true label)
on the calibration set. See [1] for more details.
- - ``"aps"`` (formerly called "cumulated_score"), Adaptive Prediction
+ - ``"aps"``, Adaptive Prediction
Sets method. It is based on the sum of the softmax outputs of the
labels until the true label is reached, on the calibration set.
See [2] for more details.
@@ -288,10 +288,10 @@ def _get_classes_info(
)
if n_classes > n_unique_y_labels:
warnings.warn(
- "WARNING: your calibration dataset has less labels"
+ "WARNING: your conformity dataset has less labels"
+ " than your training dataset (training"
+ f" has {n_classes} unique labels while"
- + f" calibration have {n_unique_y_labels} unique labels"
+ + f" conformity have {n_unique_y_labels} unique labels"
)
else:
@@ -395,7 +395,7 @@ def _check_fit_parameter(
):
raise ValueError(
"RAPS method can only be used "
- "with ``cv='split'`` and ``cv='prefit'``."
+ "with SplitConformalClassifier."
)
# Cast
diff --git a/mapie/conformity_scores/bounds/absolute.py b/mapie/conformity_scores/bounds/absolute.py
index 90c1c3e94..7a53d3cf5 100644
--- a/mapie/conformity_scores/bounds/absolute.py
+++ b/mapie/conformity_scores/bounds/absolute.py
@@ -1,6 +1,6 @@
import numpy as np
-from mapie._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
from mapie.conformity_scores import BaseRegressionScore
diff --git a/mapie/conformity_scores/bounds/gamma.py b/mapie/conformity_scores/bounds/gamma.py
index 09f161e02..5eb05b031 100644
--- a/mapie/conformity_scores/bounds/gamma.py
+++ b/mapie/conformity_scores/bounds/gamma.py
@@ -1,6 +1,6 @@
import numpy as np
-from mapie._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
from mapie.conformity_scores import BaseRegressionScore
diff --git a/mapie/conformity_scores/bounds/residuals.py b/mapie/conformity_scores/bounds/residuals.py
index 5ce0d799a..f41a257f4 100644
--- a/mapie/conformity_scores/bounds/residuals.py
+++ b/mapie/conformity_scores/bounds/residuals.py
@@ -9,7 +9,7 @@
from sklearn.utils.validation import (check_is_fitted, check_random_state,
indexable)
-from mapie._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
from mapie.conformity_scores import BaseRegressionScore
diff --git a/mapie/conformity_scores/classification.py b/mapie/conformity_scores/classification.py
index 5dda679cf..0239089d6 100644
--- a/mapie/conformity_scores/classification.py
+++ b/mapie/conformity_scores/classification.py
@@ -6,7 +6,7 @@
from mapie.conformity_scores.interface import BaseConformityScore
from mapie.estimator.classifier import EnsembleClassifier
-from mapie._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
class BaseClassificationScore(BaseConformityScore, metaclass=ABCMeta):
diff --git a/mapie/conformity_scores/conformity_scores.py b/mapie/conformity_scores/conformity_scores.py
index e6953a81d..691f76163 100644
--- a/mapie/conformity_scores/conformity_scores.py
+++ b/mapie/conformity_scores/conformity_scores.py
@@ -4,7 +4,7 @@
from mapie.conformity_scores.regression import BaseConformityScore
from mapie._machine_precision import EPSILON
-from mapie._typing import NDArray
+from numpy.typing import NDArray
@deprecated(
diff --git a/mapie/conformity_scores/interface.py b/mapie/conformity_scores/interface.py
index 07345d3e4..e8dd3834b 100644
--- a/mapie/conformity_scores/interface.py
+++ b/mapie/conformity_scores/interface.py
@@ -4,8 +4,7 @@
import numpy as np
from sklearn.base import BaseEstimator
-from mapie._compatibility import np_nanquantile
-from mapie._typing import NDArray
+from numpy.typing import NDArray
class BaseConformityScore(metaclass=ABCMeta):
@@ -155,7 +154,7 @@ def get_quantile(
# Otherwise, the quantile is calculated as the corrected lower quantile
# of the signed conformity scores.
quantile = signed * np.column_stack([
- np_nanquantile(
+ np.nanquantile(
signed * conformity_scores, _alpha_cor,
axis=axis, method="lower"
) if not (unbounded and _alpha >= 1) else np.inf * np.ones(n_ref)
diff --git a/mapie/conformity_scores/regression.py b/mapie/conformity_scores/regression.py
index b58f4a264..e28867f17 100644
--- a/mapie/conformity_scores/regression.py
+++ b/mapie/conformity_scores/regression.py
@@ -7,9 +7,8 @@
from mapie.conformity_scores.interface import BaseConformityScore
from mapie.estimator.regressor import EnsembleRegressor
-from mapie._compatibility import np_nanquantile
from mapie._machine_precision import EPSILON
-from mapie._typing import NDArray
+from numpy.typing import NDArray
class BaseRegressionScore(BaseConformityScore, metaclass=ABCMeta):
@@ -238,13 +237,13 @@ def _beta_optimize(
num=len(lower_bounds),
endpoint=True,
)
- one_alpha_beta = np_nanquantile(
+ one_alpha_beta = np.nanquantile(
upper_bounds.astype(float),
1 - _alpha + betas,
axis=1,
method="higher",
)
- beta = np_nanquantile(
+ beta = np.nanquantile(
lower_bounds.astype(float),
betas,
axis=1,
@@ -326,7 +325,7 @@ def get_bounds(
"""
if self.sym and optimize_beta:
raise ValueError(
- "Beta optimisation cannot be used with "
+ "Interval width minimization cannot be used with a "
+ "symmetrical conformity score function."
)
diff --git a/mapie/conformity_scores/sets/aps.py b/mapie/conformity_scores/sets/aps.py
index 9847f8b7d..ba4be1ebf 100644
--- a/mapie/conformity_scores/sets/aps.py
+++ b/mapie/conformity_scores/sets/aps.py
@@ -11,7 +11,7 @@
from mapie.estimator.classifier import EnsembleClassifier
from mapie._machine_precision import EPSILON
-from mapie._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
from mapie.utils import compute_quantiles
@@ -360,15 +360,40 @@ def get_prediction_sets(
By default ``"mean"``.
include_last_label: Optional[Union[bool, str]]
- Whether or not to include last label in prediction sets.
- Choose among ``False``, ``True`` or ``"randomized"``.
+ Whether or not to include last label in
+ prediction sets for the "aps" method. Choose among:
- By default, ``True``.
+ - False, does not include label whose cumulated score is just over
+ the quantile.
+ - True, includes label whose cumulated score is just over the
+ quantile, unless there is only one label in the prediction set.
+ - "randomized", randomly includes label whose cumulated score is
+ just over the quantile based on the comparison of a uniform
+ number and the difference between the cumulated score of
+ the last label and the quantile.
+
+ When set to ``True`` or ``False``, it may result in a coverage
+ higher than ``1 - alpha`` (because contrary to the "randomized"
+ setting, none of these methods create empty prediction sets). See
+ [1] and [2] for more details.
+
+ By default ``True``.
Returns
--------
NDArray
Array of quantiles with respect to alpha_np.
+
+ References
+ ----------
+ [1] Yaniv Romano, Matteo Sesia and Emmanuel J. Candès.
+ "Classification with Valid and Adaptive Coverage."
+ NeurIPS 202 (spotlight) 2020.
+
+ [2] Anastasios Nikolas Angelopoulos, Stephen Bates, Michael Jordan
+ and Jitendra Malik.
+ "Uncertainty Sets for Image Classifiers using Conformal Prediction."
+ International Conference on Learning Representations 2021.
"""
include_last_label = check_include_last_label(include_last_label)
diff --git a/mapie/conformity_scores/sets/lac.py b/mapie/conformity_scores/sets/lac.py
index e5f088158..0a94e0258 100644
--- a/mapie/conformity_scores/sets/lac.py
+++ b/mapie/conformity_scores/sets/lac.py
@@ -7,7 +7,7 @@
from mapie.estimator.classifier import EnsembleClassifier
from mapie._machine_precision import EPSILON
-from mapie._typing import NDArray
+from numpy.typing import NDArray
from mapie.utils import compute_quantiles
diff --git a/mapie/conformity_scores/sets/naive.py b/mapie/conformity_scores/sets/naive.py
index 09bafa181..1a2a868e7 100644
--- a/mapie/conformity_scores/sets/naive.py
+++ b/mapie/conformity_scores/sets/naive.py
@@ -9,7 +9,7 @@
from mapie.estimator.classifier import EnsembleClassifier
from mapie._machine_precision import EPSILON
-from mapie._typing import NDArray
+from numpy.typing import NDArray
class NaiveConformityScore(BaseClassificationScore):
diff --git a/mapie/conformity_scores/sets/raps.py b/mapie/conformity_scores/sets/raps.py
index 435c135ba..8f19a78b0 100644
--- a/mapie/conformity_scores/sets/raps.py
+++ b/mapie/conformity_scores/sets/raps.py
@@ -11,8 +11,8 @@
from mapie.estimator.classifier import EnsembleClassifier
from mapie._machine_precision import EPSILON
-from mapie._typing import NDArray
-from mapie.metrics import classification_mean_width_score
+from numpy.typing import NDArray
+from mapie.metrics.classification import classification_mean_width_score
from mapie.utils import check_alpha_and_n_samples, compute_quantiles
@@ -413,6 +413,10 @@ def get_conformity_score_quantiles(
By default, ``True``.
+ See the docstring of
+ :meth:`conformity_scores.sets.aps.APSConformityScore.get_prediction_sets`
+ for more details.
+
X_raps: NDArray of shape (n_samples, n_features)
Observed feature values for the RAPS method (split data).
diff --git a/mapie/conformity_scores/sets/topk.py b/mapie/conformity_scores/sets/topk.py
index cfad29a0a..3ec248b5b 100644
--- a/mapie/conformity_scores/sets/topk.py
+++ b/mapie/conformity_scores/sets/topk.py
@@ -9,7 +9,7 @@
from mapie.estimator.classifier import EnsembleClassifier
from mapie._machine_precision import EPSILON
-from mapie._typing import NDArray
+from numpy.typing import NDArray
from mapie.utils import compute_quantiles
diff --git a/mapie/conformity_scores/sets/utils.py b/mapie/conformity_scores/sets/utils.py
index 5912607fb..46ad064cc 100644
--- a/mapie/conformity_scores/sets/utils.py
+++ b/mapie/conformity_scores/sets/utils.py
@@ -1,7 +1,7 @@
from typing import Optional, Union
import numpy as np
-from mapie._typing import NDArray
+from numpy.typing import NDArray
from mapie._machine_precision import EPSILON
diff --git a/mapie/conformity_scores/utils.py b/mapie/conformity_scores/utils.py
index b995926c9..abee2a6a4 100644
--- a/mapie/conformity_scores/utils.py
+++ b/mapie/conformity_scores/utils.py
@@ -12,13 +12,11 @@
RAPSConformityScore, TopKConformityScore
)
-from mapie._typing import ArrayLike
+from numpy.typing import ArrayLike
METHOD_SCORE_MAP = {
- 'score': lambda: LACConformityScore(),
'lac': lambda: LACConformityScore(),
- 'cumulated_score': lambda: APSConformityScore(),
'aps': lambda: APSConformityScore(),
'naive': lambda: NaiveConformityScore(),
'raps': lambda: RAPSConformityScore(),
@@ -75,36 +73,6 @@ def check_regression_conformity_score(
)
-def check_depreciated_score(
- method: str
-) -> None:
- """
- Check if the chosen method is outdated.
-
- Raises
- ------
- Warning
- If method is ``"score"`` (not ``"lac"``) or
- if method is ``"cumulated_score"`` (not ``"aps"``).
- """
- if method == "score":
- warnings.warn(
- "WARNING: Deprecated method. "
- "The method \"score\" is outdated. "
- "Prefer to use \"lac\" instead to keep "
- "the same behavior in the next release.",
- DeprecationWarning
- )
- if method == "cumulated_score":
- warnings.warn(
- "WARNING: Deprecated method. "
- "The method \"cumulated_score\" is outdated. "
- "Prefer to use \"aps\" instead to keep "
- "the same behavior in the next release.",
- DeprecationWarning
- )
-
-
def check_depreciated_size_raps(
size_raps: Optional[float]
) -> None:
@@ -156,10 +124,8 @@ def check_target(
not isinstance(conformity_score, LACConformityScore)
):
raise ValueError(
- "Invalid method for binary target. "
- "Your target is not of type multiclass and "
- "allowed values for binary type are "
- f"{['score', 'lac']}."
+ "Invalid conformity score for binary target. "
+ "The only valid score is 'lac'."
)
@@ -217,11 +183,10 @@ def check_classification_conformity_score(
)
elif method is not None:
if isinstance(method, str) and method in METHOD_SCORE_MAP:
- check_depreciated_score(method)
return METHOD_SCORE_MAP[method]()
else:
raise ValueError(
- "Invalid method. "
+ "Invalid conformity score. "
f"Allowed values are {list(METHOD_SCORE_MAP.keys())}."
)
else:
diff --git a/mapie/control_risk/ltt.py b/mapie/control_risk/ltt.py
index 67686df54..9b7d7e124 100644
--- a/mapie/control_risk/ltt.py
+++ b/mapie/control_risk/ltt.py
@@ -3,7 +3,7 @@
import numpy as np
-from mapie._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
from .p_values import compute_hoeffdding_bentkus_p_value
diff --git a/mapie/control_risk/p_values.py b/mapie/control_risk/p_values.py
index 63623f3e5..81800cd5b 100644
--- a/mapie/control_risk/p_values.py
+++ b/mapie/control_risk/p_values.py
@@ -3,7 +3,7 @@
import numpy as np
from scipy.stats import binom
-from mapie._typing import NDArray
+from numpy.typing import NDArray
from mapie.utils import check_alpha
diff --git a/mapie/control_risk/risks.py b/mapie/control_risk/risks.py
index 76bbf6c7b..490be62a6 100644
--- a/mapie/control_risk/risks.py
+++ b/mapie/control_risk/risks.py
@@ -3,7 +3,7 @@
import numpy as np
from sklearn.utils.validation import column_or_1d
-from mapie._typing import NDArray
+from numpy.typing import NDArray
def compute_risk_recall(
diff --git a/mapie/estimator/classifier.py b/mapie/estimator/classifier.py
index 9cd45e64e..e5eeb5a64 100644
--- a/mapie/estimator/classifier.py
+++ b/mapie/estimator/classifier.py
@@ -9,7 +9,7 @@
from sklearn.utils import _safe_indexing
from sklearn.utils.validation import _num_samples, check_is_fitted
-from mapie._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
from mapie.utils import check_no_agg_cv, fit_estimator, fix_number_of_classes
diff --git a/mapie/estimator/regressor.py b/mapie/estimator/regressor.py
index 3ec18bb16..be03fe4dc 100644
--- a/mapie/estimator/regressor.py
+++ b/mapie/estimator/regressor.py
@@ -9,7 +9,7 @@
from sklearn.utils import _safe_indexing, deprecated
from sklearn.utils.validation import _num_samples, check_is_fitted
-from mapie._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
from mapie.aggregation_functions import aggregate_all, phi2D
from mapie.utils import (check_nan_in_aposteriori_prediction, check_no_agg_cv,
fit_estimator)
@@ -204,6 +204,8 @@ def _fit_oof_estimator(
RegressorMixin
Fitted estimator.
"""
+ # TODO back-end: avoid using private utilities from sklearn like
+ # _safe_indexing (may break anytime without notice)
X_train = _safe_indexing(X, train_index)
y_train = _safe_indexing(y, train_index)
if not (sample_weight is None):
@@ -284,9 +286,7 @@ def _aggregate_with_mask(
"""
if self.method in self.no_agg_methods_ or self.use_split_method_:
raise ValueError(
- "There should not be aggregation of predictions "
- f"if cv is in '{self.no_agg_cv_}', if cv >=2 "
- f"or if method is in '{self.no_agg_methods_}'."
+ "There should not be aggregation of predictions."
)
elif self.agg_function == "median":
return phi2D(A=x, B=k, fun=lambda x: np.nanmedian(x, axis=1))
@@ -299,7 +299,9 @@ def _aggregate_with_mask(
K = np.nan_to_num(k, nan=0.0)
return np.matmul(x, (K / (K.sum(axis=1, keepdims=True))).T)
else:
- raise ValueError("The value of self.agg_function is not correct")
+ raise ValueError(
+ "The value of the aggregation function is not correct"
+ )
def _pred_multi(self, X: ArrayLike, **predict_params) -> NDArray:
"""
diff --git a/mapie/metrics.py b/mapie/metrics.py
deleted file mode 100644
index 9990f4cb9..000000000
--- a/mapie/metrics.py
+++ /dev/null
@@ -1,1635 +0,0 @@
-from typing import Optional, Tuple, Union, cast
-
-import numpy as np
-import scipy
-from sklearn.utils import check_random_state
-from sklearn.utils.validation import check_array, column_or_1d
-
-from ._machine_precision import EPSILON
-from ._typing import ArrayLike, NDArray
-from .utils import (calc_bins, check_alpha, check_array_inf, check_array_nan,
- check_array_shape_classification, check_split_strategy,
- check_array_shape_regression, check_arrays_length,
- check_binary_zero_one, check_nb_intervals_sizes,
- check_nb_sets_sizes, check_number_bins)
-
-
-def regression_coverage_score(
- y_true: ArrayLike,
- y_pred_low: ArrayLike,
- y_pred_up: ArrayLike,
-) -> float:
- """
- Effective coverage score obtained by the prediction intervals.
-
- The effective coverage is obtained by estimating the fraction
- of true labels that lie within the prediction intervals.
-
- Parameters
- ----------
- y_true: ArrayLike of shape (n_samples,)
- True labels.
- y_pred_low: ArrayLike of shape (n_samples,)
- Lower bound of prediction intervals.
- y_pred_up: ArrayLike of shape (n_samples,)
- Upper bound of prediction intervals.
-
- Returns
- -------
- float
- Effective coverage obtained by the prediction intervals.
-
- Examples
- ---------
- >>> from mapie.metrics import regression_coverage_score
- >>> import numpy as np
- >>> y_true = np.array([5, 7.5, 9.5, 10.5, 12.5])
- >>> y_pred_low = np.array([4, 6, 9, 8.5, 10.5])
- >>> y_pred_up = np.array([6, 9, 10, 12.5, 12])
- >>> print(regression_coverage_score(y_true, y_pred_low, y_pred_up))
- 0.8
- """
- y_true = cast(NDArray, column_or_1d(y_true))
- y_pred_low = cast(NDArray, column_or_1d(y_pred_low))
- y_pred_up = cast(NDArray, column_or_1d(y_pred_up))
-
- check_arrays_length(y_true, y_pred_low, y_pred_up)
- check_array_nan(y_true)
- check_array_inf(y_true)
- check_array_nan(y_pred_low)
- check_array_inf(y_pred_low)
- check_array_nan(y_pred_up)
- check_array_inf(y_pred_up)
-
- coverage = np.mean(
- ((y_pred_low <= y_true) & (y_pred_up >= y_true))
- )
- return float(coverage)
-
-
-def classification_coverage_score(
- y_true: ArrayLike,
- y_pred_set: ArrayLike
-) -> float:
- """
- Effective coverage score obtained by the prediction sets.
-
- The effective coverage is obtained by estimating the fraction
- of true labels that lie within the prediction sets.
-
- Parameters
- ----------
- y_true: ArrayLike of shape (n_samples,)
- True labels.
- y_pred_set: ArrayLike of shape (n_samples, n_class)
- Prediction sets given by booleans of labels.
-
- Returns
- -------
- float
- Effective coverage obtained by the prediction sets.
-
- Examples
- --------
- >>> from mapie.metrics import classification_coverage_score
- >>> import numpy as np
- >>> y_true = np.array([3, 3, 1, 2, 2])
- >>> y_pred_set = np.array([
- ... [False, False, True, True],
- ... [False, True, False, True],
- ... [False, True, True, False],
- ... [False, False, True, True],
- ... [False, True, False, True]
- ... ])
- >>> print(classification_coverage_score(y_true, y_pred_set))
- 0.8
- """
- y_true = cast(NDArray, column_or_1d(y_true))
- y_pred_set = cast(
- NDArray,
- check_array(
- y_pred_set, force_all_finite=True, dtype=["bool"]
- )
- )
-
- check_arrays_length(y_true, y_pred_set)
- check_array_nan(y_true)
- check_array_inf(y_true)
- check_array_nan(y_pred_set)
- check_array_inf(y_pred_set)
-
- coverage = np.take_along_axis(
- y_pred_set, y_true.reshape(-1, 1), axis=1
- ).mean()
- return float(coverage)
-
-
-def regression_mean_width_score(
- y_pred_low: ArrayLike,
- y_pred_up: ArrayLike
-) -> float:
- """
- Effective mean width score obtained by the prediction intervals.
-
- Parameters
- ----------
- y_pred_low: ArrayLike of shape (n_samples,)
- Lower bound of prediction intervals.
- y_pred_up: ArrayLike of shape (n_samples,)
- Upper bound of prediction intervals.
-
- Returns
- -------
- float
- Effective mean width of the prediction intervals.
-
- Examples
- --------
- >>> from mapie.metrics import regression_mean_width_score
- >>> import numpy as np
- >>> y_pred_low = np.array([4, 6, 9, 8.5, 10.5])
- >>> y_pred_up = np.array([6, 9, 10, 12.5, 12])
- >>> print(regression_mean_width_score(y_pred_low, y_pred_up))
- 2.3
- """
- y_pred_low = cast(NDArray, column_or_1d(y_pred_low))
- y_pred_up = cast(NDArray, column_or_1d(y_pred_up))
-
- check_arrays_length(y_pred_low, y_pred_up)
- check_array_nan(y_pred_low)
- check_array_inf(y_pred_low)
- check_array_nan(y_pred_up)
- check_array_inf(y_pred_up)
-
- mean_width = np.abs(y_pred_up - y_pred_low).mean()
- return float(mean_width)
-
-
-def classification_mean_width_score(y_pred_set: ArrayLike) -> float:
- """
- Mean width of prediction set output by
- :class:`~mapie.classification.MapieClassifier`.
-
- Parameters
- ----------
- y_pred_set: ArrayLike of shape (n_samples, n_class)
- Prediction sets given by booleans of labels.
-
- Returns
- -------
- float
- Mean width of the prediction set.
-
- Examples
- --------
- >>> from mapie.metrics import classification_mean_width_score
- >>> import numpy as np
- >>> y_pred_set = np.array([
- ... [False, False, True, True],
- ... [False, True, False, True],
- ... [False, True, True, False],
- ... [False, False, True, True],
- ... [False, True, False, True]
- ... ])
- >>> print(classification_mean_width_score(y_pred_set))
- 2.0
- """
- y_pred_set = cast(
- NDArray,
- check_array(
- y_pred_set, force_all_finite=True, dtype=["bool"]
- )
- )
- check_array_nan(y_pred_set)
- check_array_inf(y_pred_set)
- mean_width = y_pred_set.sum(axis=1).mean()
- return float(mean_width)
-
-
-def expected_calibration_error(
- y_true: ArrayLike,
- y_scores: ArrayLike,
- num_bins: int = 50,
- split_strategy: Optional[str] = None,
-) -> float:
- """
- The expected calibration error, which is the difference between
- the confidence scores and accuracy per bin [1].
-
- [1] Naeini, Mahdi Pakdaman, Gregory Cooper, and Milos Hauskrecht.
- "Obtaining well calibrated probabilities using bayesian binning."
- Twenty-Ninth AAAI Conference on Artificial Intelligence. 2015.
-
- Parameters
- ----------
- y_true: ArrayLike of shape (n_samples,)
- The target values for the calibrator.
- y_score: ArrayLike of shape (n_samples,) or (n_samples, n_classes)
- The predictions scores.
- num_bins: int
- Number of bins to make the split in the y_score. The allowed
- values are num_bins above 0.
- split_strategy: str
- The way of splitting the predictions into different bins.
- The allowed split strategies are "uniform", "quantile" and
- "array split".
- Returns
- -------
- float
- The score of ECE (Expected Calibration Error).
- """
- split_strategy = check_split_strategy(split_strategy)
- num_bins = check_number_bins(num_bins)
- y_true_ = check_binary_zero_one(y_true)
- y_scores = cast(NDArray, y_scores)
-
- check_arrays_length(y_true_, y_scores)
- check_array_nan(y_true_)
- check_array_inf(y_true_)
- check_array_nan(y_scores)
- check_array_inf(y_scores)
-
- if np.size(y_scores.shape) == 2:
- y_score = cast(
- NDArray, column_or_1d(np.nanmax(y_scores, axis=1))
- )
- else:
- y_score = cast(NDArray, column_or_1d(y_scores))
-
- _, bin_accs, bin_confs, bin_sizes = calc_bins(
- y_true_, y_score, num_bins, split_strategy
- )
-
- return np.divide(
- np.sum(bin_sizes * np.abs(bin_accs - bin_confs)),
- np.sum(bin_sizes)
- )
-
-
-def top_label_ece(
- y_true: ArrayLike,
- y_scores: ArrayLike,
- y_score_arg: Optional[ArrayLike] = None,
- num_bins: int = 50,
- split_strategy: Optional[str] = None,
- classes: Optional[ArrayLike] = None,
-) -> float:
- """
- The Top-Label ECE which is a method adapted to fit the
- ECE to a Top-Label setting [2].
-
- [2] Gupta, Chirag, and Aaditya K. Ramdas.
- "Top-label calibration and multiclass-to-binary reductions."
- arXiv preprint arXiv:2107.08353 (2021).
-
- Parameters
- ----------
- y_true: ArrayLike of shape (n_samples,)
- The target values for the calibrator.
- y_scores: ArrayLike of shape (n_samples, n_classes)
- or (n_samples,)
- The predictions scores, either the maximum score and the
- argmax needs to be inputted or in the form of the prediction
- probabilities.
- y_score_arg: Optional[ArrayLike] of shape (n_samples,)
- If only the maximum is provided in the y_scores, the argmax must
- be provided here. This is optional and could be directly infered
- from the y_scores.
- num_bins: int
- Number of bins to make the split in the y_score. The allowed
- values are num_bins above 0.
- split_strategy: str
- The way of splitting the predictions into different bins.
- The allowed split strategies are "uniform", "quantile" and
- "array split".
- classes: ArrayLike of shape (n_samples,)
- The different classes, in order of the indices that would be
- present in a pred_proba.
-
- Returns
- -------
- float
- The ECE score adapted in the top label setting.
- """
- y_scores = cast(NDArray, y_scores)
- y_true = cast(NDArray, y_true)
- check_array_nan(y_true)
- check_array_inf(y_true)
- check_array_nan(y_scores)
- check_array_inf(y_scores)
-
- if y_score_arg is None:
- check_arrays_length(y_true, y_scores)
- else:
- y_score_arg = cast(NDArray, y_score_arg)
- check_array_nan(y_score_arg)
- check_array_inf(y_score_arg)
- check_arrays_length(y_true, y_scores, y_score_arg)
-
- ece = float(0.)
- split_strategy = check_split_strategy(split_strategy)
- num_bins = check_number_bins(num_bins)
- y_true = cast(NDArray, column_or_1d(y_true))
- if y_score_arg is None:
- y_score = cast(
- NDArray, column_or_1d(np.nanmax(y_scores, axis=1))
- )
- if classes is None:
- y_score_arg = cast(
- NDArray, column_or_1d(np.nanargmax(y_scores, axis=1))
- )
- else:
- classes = cast(NDArray, classes)
- y_score_arg = cast(
- NDArray, column_or_1d(classes[np.nanargmax(y_scores, axis=1)])
- )
- else:
- y_score = cast(NDArray, column_or_1d(y_scores))
- y_score_arg = cast(NDArray, column_or_1d(y_score_arg))
- labels = np.unique(y_score_arg)
-
- for label in labels:
- label_ind = np.where(label == y_score_arg)[0]
- y_true_ = np.array(y_true[label_ind] == label, dtype=int)
- ece += expected_calibration_error(
- y_true_,
- y_scores=y_score[label_ind],
- num_bins=num_bins,
- split_strategy=split_strategy
- )
- ece /= len(labels)
- return ece
-
-
-def regression_coverage_score_v2(
- y_true: NDArray,
- y_intervals: NDArray,
-) -> NDArray:
- """
- Effective coverage score obtained by the prediction intervals.
-
- The effective coverage is obtained by estimating the fraction
- of true labels that lie within the prediction intervals.
-
- It is different from ``regression_coverage_score`` because it uses
- directly the output of ``predict`` method and can compute the
- coverage for each alpha.
-
- Parameters
- ----------
- y_true: NDArray of shape (n_samples, n_alpha) or (n_samples,)
- True labels.
- y_intervals: NDArray of shape (n_samples, 2, n_alpha)
- Lower and upper bound of prediction intervals
- with different alpha risks.
-
- Returns
- -------
- NDArray of shape (n_alpha,)
- Effective coverage obtained by the prediction intervals.
- """
- check_arrays_length(y_true, y_intervals)
- check_array_nan(y_true)
- check_array_inf(y_true)
- check_array_nan(y_intervals)
- check_array_inf(y_intervals)
-
- y_intervals = check_array_shape_regression(y_true, y_intervals)
- if len(y_true.shape) != 2:
- y_true = cast(NDArray, column_or_1d(y_true))
- y_true = np.expand_dims(y_true, axis=1)
- coverages = np.mean(
- np.logical_and(
- np.less_equal(y_intervals[:, 0, :], y_true),
- np.greater_equal(y_intervals[:, 1, :], y_true)
- ),
- axis=0
- )
- return coverages
-
-
-def classification_coverage_score_v2(
- y_true: NDArray,
- y_pred_set: NDArray
-) -> NDArray:
- """
- Effective coverage score obtained by the prediction sets.
-
- The effective coverage is obtained by estimating the fraction
- of true labels that lie within the prediction sets.
-
- It is different from ``classification_coverage_score`` because it uses
- directly the output of ``predict`` method and can compute the
- coverage for each alpha.
-
- Parameters
- ----------
- y_true: NDArray of shape (n_samples, n_alpha) or (n_samples,)
- True labels.
- y_pred_set: NDArray of shape (n_samples, n_class, n_alpha)
- Prediction sets given by booleans of labels.
-
- Returns
- -------
- NDArray of shape (n_alpha,)
- Effective coverage obtained by the prediction sets.
- """
- check_arrays_length(y_true, y_pred_set)
- check_array_nan(y_true)
- check_array_inf(y_true)
- check_array_nan(y_pred_set)
- check_array_inf(y_pred_set)
-
- y_pred_set = check_array_shape_classification(y_true, y_pred_set)
- if len(y_true.shape) != 2:
- y_true = cast(NDArray, column_or_1d(y_true))
- y_true = np.expand_dims(y_true, axis=1)
- y_true = np.expand_dims(y_true, axis=1)
- coverage = np.nanmean(
- np.take_along_axis(y_pred_set, y_true, axis=1),
- axis=0
- )
- return coverage[0]
-
-
-def regression_ssc(
- y_true: NDArray,
- y_intervals: NDArray,
- num_bins: int = 3
-) -> NDArray:
- """
- Compute Size-Stratified Coverage metrics proposed in [3] that is
- the conditional coverage conditioned by the size of the intervals.
- The intervals are ranked by their size (ascending) and then divided into
- num_bins groups: one value of coverage by groups is computed.
-
- Warning: This metric should be used only with non constant intervals
- (intervals of different sizes), with constant intervals the result
- may be misinterpreted.
-
- [3] Angelopoulos, A. N., & Bates, S. (2021).
- A gentle introduction to conformal prediction and
- distribution-free uncertainty quantification.
- arXiv preprint arXiv:2107.07511.
-
- Parameters
- ----------
- y_true: NDArray of shape (n_samples,)
- True labels.
- y_intervals: NDArray of shape (n_samples, 2, n_alpha) or (n_samples, 2)
- Prediction intervals given by booleans of labels.
- num_bins: int n
- Number of groups. Should be less than the number of different
- interval widths.
-
- Returns
- -------
- NDArray of shape (n_alpha, num_bins)
-
- Examples
- --------
- >>> from mapie.metrics import regression_ssc
- >>> import numpy as np
- >>> y_true = np.array([5, 7.5, 9.5])
- >>> y_intervals = np.array([
- ... [4, 6],
- ... [6.0, 9.0],
- ... [9, 10.0]
- ... ])
- >>> print(regression_ssc(y_true, y_intervals, num_bins=2))
- [[1. 1.]]
- """
- y_true = cast(NDArray, column_or_1d(y_true))
- y_intervals = check_array_shape_regression(y_true, y_intervals)
- check_number_bins(num_bins)
- widths = np.abs(y_intervals[:, 1, :] - y_intervals[:, 0, :])
- check_nb_intervals_sizes(widths, num_bins)
-
- check_arrays_length(y_true, y_intervals)
- check_array_nan(y_true)
- check_array_inf(y_true)
- check_array_nan(y_intervals)
- check_array_inf(y_intervals)
-
- indexes_sorted = np.argsort(widths, axis=0)
- indexes_bybins = np.array_split(indexes_sorted, num_bins, axis=0)
- coverages = np.zeros((y_intervals.shape[2], num_bins))
- for i, indexes in enumerate(indexes_bybins):
- intervals_binned = np.stack([
- np.take_along_axis(y_intervals[:, 0, :], indexes, axis=0),
- np.take_along_axis(y_intervals[:, 1, :], indexes, axis=0)
- ], axis=1)
- coverages[:, i] = regression_coverage_score_v2(y_true[indexes],
- intervals_binned)
-
- return coverages
-
-
-def regression_ssc_score(
- y_true: NDArray,
- y_intervals: NDArray,
- num_bins: int = 3
-) -> NDArray:
- """
- Aggregate by the minimum for each alpha the Size-Stratified Coverage [3]:
- returns the maximum violation of the conditional coverage
- (with the groups defined).
-
- Warning: This metric should be used only with non constant intervals
- (intervals of different sizes), with constant intervals the result
- may be misinterpreted.
-
- [3] Angelopoulos, A. N., & Bates, S. (2021).
- A gentle introduction to conformal prediction and
- distribution-free uncertainty quantification.
- arXiv preprint arXiv:2107.07511.
-
- Parameters
- ----------
- y_true: NDArray of shape (n_samples,)
- True labels.
- y_intervals: NDArray of shape (n_samples, 2, n_alpha) or (n_samples, 2)
- Prediction intervals given by booleans of labels.
- num_bins: int n
- Number of groups. Should be less than the number of different
- interval widths.
-
- Returns
- -------
- NDArray of shape (n_alpha,)
-
- Examples
- --------
- >>> from mapie.metrics import regression_ssc_score
- >>> import numpy as np
- >>> y_true = np.array([5, 7.5, 9.5])
- >>> y_intervals = np.array([
- ... [[4, 4], [6, 7.5]],
- ... [[6.0, 8], [9.0, 10]],
- ... [[9, 9], [10.0, 10.0]]
- ... ])
- >>> print(regression_ssc_score(y_true, y_intervals, num_bins=2))
- [1. 0.5]
- """
- return np.min(regression_ssc(y_true, y_intervals, num_bins), axis=1)
-
-
-def classification_ssc(
- y_true: NDArray,
- y_pred_set: NDArray,
- num_bins: Union[int, None] = None
-) -> NDArray:
- """
- Compute Size-Stratified Coverage metrics proposed in [3] that is
- the conditional coverage conditioned by the size of the predictions sets.
- The sets are ranked by their size (ascending) and then divided into
- num_bins groups: one value of coverage by groups is computed.
-
- [3] Angelopoulos, A. N., & Bates, S. (2021).
- A gentle introduction to conformal prediction and
- distribution-free uncertainty quantification.
- arXiv preprint arXiv:2107.07511.
-
- Parameters
- ----------
- y_true: NDArray of shape (n_samples,)
- True labels.
- y_pred_set: NDArray of shape (n_samples, n_class, n_alpha)
- or (n_samples, n_class)
- Prediction sets given by booleans of labels.
- num_bins: int or None
- Number of groups. If None, one value of coverage by possible
- size of sets (n_classes +1) is computed. Should be less than the
- number of different set sizes.
-
- Returns
- -------
- NDArray of shape (n_alpha, num_bins)
-
- Examples
- --------
- >>> from mapie.metrics import classification_ssc
- >>> import numpy as np
- >>> y_true = y_true_class = np.array([3, 3, 1, 2, 2])
- >>> y_pred_set = np.array([
- ... [True, True, True, True],
- ... [False, True, False, True],
- ... [True, True, True, False],
- ... [False, False, True, True],
- ... [True, True, False, True]])
- >>> print(classification_ssc(y_true, y_pred_set, num_bins=2))
- [[1. 0.66666667]]
- """
- y_true = cast(NDArray, column_or_1d(y_true))
- y_pred_set = check_array_shape_classification(y_true, y_pred_set)
-
- check_arrays_length(y_true, y_pred_set)
- check_array_nan(y_true)
- check_array_inf(y_true)
- check_array_nan(y_pred_set)
- check_array_inf(y_pred_set)
-
- sizes = np.sum(y_pred_set, axis=1)
- n_classes = y_pred_set.shape[1]
- if num_bins is None:
- bins = list(range(n_classes + 1))
- else:
- check_nb_sets_sizes(sizes, num_bins)
- check_number_bins(num_bins)
- bins = [
- b[0] for b in np.array_split(range(n_classes + 1), num_bins)
- ]
-
- digitized_sizes = np.digitize(sizes, bins)
- coverages = np.zeros((y_pred_set.shape[2], len(bins)))
- for alpha in range(y_pred_set.shape[2]):
- indexes_bybins = [
- np.argwhere(digitized_sizes[:, alpha] == i)
- for i in range(1, len(bins)+1)
- ]
-
- for i, indexes in enumerate(indexes_bybins):
- coverages[alpha, i] = classification_coverage_score_v2(
- y_true[indexes],
- np.take_along_axis(
- y_pred_set[:, :, alpha],
- indexes,
- axis=0
- )
- )
- return coverages
-
-
-def classification_ssc_score(
- y_true: NDArray,
- y_pred_set: NDArray,
- num_bins: Union[int, None] = None
-) -> NDArray:
- """
- Aggregate by the minimum for each alpha the Size-Stratified Coverage [3]:
- returns the maximum violation of the conditional coverage
- (with the groups defined).
-
- Parameters
- ----------
- y_true: NDArray of shape (n_samples,)
- True labels.
- y_pred_set: NDArray of shape (n_samples, n_class, n_alpha)
- or (n_samples, n_class)
- Prediction sets given by booleans of labels.
- num_bins: int or None
- Number of groups. If None, one value of coverage by possible
- size of sets (n_classes +1) is computed. Should be less than
- the number of different set sizes.
-
- Returns
- -------
- NDArray of shape (n_alpha,)
-
- Examples
- --------
- >>> from mapie.metrics import classification_ssc_score
- >>> import numpy as np
- >>> y_true = y_true_class = np.array([3, 3, 1, 2, 2])
- >>> y_pred_set = np.array([
- ... [True, True, True, True],
- ... [False, True, False, True],
- ... [True, True, True, False],
- ... [False, False, True, True],
- ... [True, True, False, True]])
- >>> print(classification_ssc_score(y_true, y_pred_set, num_bins=2))
- [0.66666667]
- """
- check_arrays_length(y_true, y_pred_set)
- check_array_nan(y_true)
- check_array_inf(y_true)
- check_array_nan(y_pred_set)
- check_array_inf(y_pred_set)
-
- return np.nanmin(classification_ssc(y_true, y_pred_set, num_bins), axis=1)
-
-
-def _gaussian_kernel(
- x: NDArray,
- kernel_size: int
-) -> NDArray:
- """
- Computes the gaussian kernel of x. (Used in hsic function)
-
- Parameters
- ----------
- x: NDArray
- The values from which to compute the gaussian kernel.
- kernel_size: int
- The variance (sigma), this coefficient controls the width of the curve.
- """
- norm_x = x ** 2
- dist = -2 * np.matmul(x, x.transpose((0, 2, 1))) \
- + norm_x + norm_x.transpose((0, 2, 1))
- return np.exp(-dist / kernel_size)
-
-
-def hsic(
- y_true: NDArray,
- y_intervals: NDArray,
- kernel_sizes: ArrayLike = (1, 1)
-) -> NDArray:
- """
- Compute the square root of the hsic coefficient. HSIC is Hilbert-Schmidt
- independence criterion that is a correlation measure. Here we use it as
- proposed in [4], to compute the correlation between the indicator of
- coverage and the interval size.
-
- If hsic is 0, the two variables (the indicator of coverage and the
- interval size) are independant.
-
- Warning: This metric should be used only with non constant intervals
- (intervals of different sizes), with constant intervals the result
- may be misinterpreted.
-
- [4] Feldman, S., Bates, S., & Romano, Y. (2021).
- Improving conditional coverage via orthogonal quantile regression.
- Advances in Neural Information Processing Systems, 34, 2060-2071.
-
- Parameters
- ----------
- y_true: NDArray of shape (n_samples,)
- True labels.
- y_intervals: NDArray of shape (n_samples, 2, n_alpha) or (n_samples, 2)
- Prediction sets given by booleans of labels.
- kernel_sizes: ArrayLike of size (2,)
- The variance (sigma) for each variable (the indicator of coverage and
- the interval size), this coefficient controls the width of the curve.
-
- Returns
- -------
- NDArray of shape (n_alpha,)
- One hsic correlation coefficient by alpha.
-
- Raises
- ------
- ValueError
- If kernel_sizes has a length different from 2
- and if it has negative or null values.
-
- Examples
- --------
- >>> from mapie.metrics import hsic
- >>> import numpy as np
- >>> y_true = np.array([9.5, 10.5, 12.5])
- >>> y_intervals = np.array([
- ... [[9, 9], [10.0, 10.0]],
- ... [[8.5, 9], [12.5, 12]],
- ... [[10.5, 10.5], [12.0, 12]]
- ... ])
- >>> print(hsic(y_true, y_intervals))
- [0.31787614 0.2962914 ]
- """
- y_true = cast(NDArray, column_or_1d(y_true))
- y_intervals = check_array_shape_regression(y_true, y_intervals)
-
- check_arrays_length(y_true, y_intervals)
- check_array_nan(y_true)
- check_array_inf(y_true)
- check_array_nan(y_intervals)
- check_array_inf(y_intervals)
-
- kernel_sizes = cast(NDArray, column_or_1d(kernel_sizes))
- if len(kernel_sizes) != 2:
- raise ValueError(
- "kernel_sizes should be an ArrayLike of length 2"
- )
- if (kernel_sizes <= 0).any():
- raise ValueError(
- "kernel_size should be positive"
- )
- n_samples, _, n_alpha = y_intervals.shape
- y_true_per_alpha = np.tile(y_true, (n_alpha, 1)).transpose()
- widths = np.expand_dims(
- np.abs(y_intervals[:, 1, :] - y_intervals[:, 0, :]).transpose(),
- axis=2
- )
- cov_ind = np.expand_dims(
- np.int_(
- ((y_intervals[:, 0, :] <= y_true_per_alpha) &
- (y_intervals[:, 1, :] >= y_true_per_alpha))
- ).transpose(),
- axis=2
- )
-
- k_mat = _gaussian_kernel(widths, kernel_sizes[0])
- l_mat = _gaussian_kernel(cov_ind, kernel_sizes[1])
- h_mat = np.eye(n_samples) - 1 / n_samples * np.ones((n_samples, n_samples))
- hsic_mat = np.matmul(l_mat, np.matmul(h_mat, np.matmul(k_mat, h_mat)))
- hsic_mat /= ((n_samples - 1) ** 2)
- coef_hsic = np.sqrt(np.matrix.trace(hsic_mat, axis1=1, axis2=2))
-
- return coef_hsic
-
-
-def coverage_width_based(
- y_true: ArrayLike,
- y_pred_low: ArrayLike,
- y_pred_up: ArrayLike,
- eta: float,
- alpha: float
-) -> float:
- """
- Coverage Width-based Criterion (CWC) obtained by the prediction intervals.
-
- The effective coverage score is a criterion used to evaluate the quality
- of prediction intervals (PIs) based on their coverage and width.
-
- Khosravi, Abbas, Saeid Nahavandi, and Doug Creighton.
- "Construction of optimal prediction intervals for load forecasting
- problems."
- IEEE Transactions on Power Systems 25.3 (2010): 1496-1503.
-
- Parameters
- ----------
- Coverage score : float
- Prediction interval coverage probability (Coverage score), which is
- the estimated fraction of true labels that lie within the prediction
- intervals.
- Mean Width Score : float
- Prediction interval normalized average width (Mean Width Score),
- calculated as the average width of the prediction intervals.
- eta : int
- A user-defined parameter that balances the contributions of
- Mean Width Score and Coverage score in the CWC calculation.
- alpha : float
- A user-defined parameter representing the designed confidence level of
- the PI.
-
- Returns
- -------
- float
- Effective coverage score (CWC) obtained by the prediction intervals.
-
- Notes
- -----
- The effective coverage score (CWC) is calculated using the following
- formula:
- CWC = (1 - Mean Width Score) * exp(-eta * (Coverage score - (1-alpha))**2)
-
- The CWC penalizes under- and overcoverage in the same way and summarizes
- the quality of the prediction intervals in a single value.
-
- High Eta (Large Positive Value):
-
- When eta is a high positive value, it will strongly
- emphasize the contribution of (1-Mean Width Score). This means that the
- algorithm will prioritize reducing the average width of the prediction
- intervals (Mean Width Score) over achieving a high coverage probability
- (Coverage score). The exponential term np.exp(-eta*(Coverage score -
- (1-alpha))**2) will have a sharp decline as Coverage score deviates
- from (1-alpha). So, achieving a high Coverage score becomes less important
- compared to minimizing Mean Width Score.
- The impact will be narrower prediction intervals on average, which may
- result in more precise but less conservative predictions.
-
- Low Eta (Small Positive Value):
-
- When eta is a low positive value, it will still
- prioritize reducing the average width of the prediction intervals
- (Mean Width Score) but with less emphasis compared to higher
- eta values.
- The exponential term will be less steep, meaning that deviations of
- Coverage score from (1-alpha) will have a moderate impact.
- You'll get a balance between prediction precision and coverage, but the
- exact balance will depend on the specific value of eta.
-
- Negative Eta (Any Negative Value):
-
- When eta is negative, it will have a different effect on the formula.
- Negative values of eta will cause the exponential term
- np.exp(-eta*(Coverage score - (1-alpha))**2) to become larger as
- Coverage score deviates from (1-alpha). This means that
- a negative eta prioritizes achieving a high coverage probability
- (Coverage score) over minimizing Mean Width Score.
- In this case, the algorithm will aim to produce wider prediction intervals
- to ensure a higher likelihood of capturing the true values within those
- intervals, even if it sacrifices precision.
- Negative eta values might be used in scenarios where avoiding errors or
- outliers is critical.
-
- Null Eta (Eta = 0):
-
- Specifically, when eta is zero, the CWC score becomes equal to
- (1 - Mean Width Score), which is equivalent to
- (1 - average width of the prediction intervals).
- Therefore, in this case, the CWC score is primarily based on the size of
- the prediction interval.
-
- Examples
- --------
- >>> y_true = np.array([5, 7.5, 9.5, 10.5, 12.5])
- >>> y_preds_low = np.array([4, 6, 9, 8.5, 10.5])
- >>> y_preds_up = np.array([6, 9, 10, 12.5, 12])
- >>> eta = 0.01
- >>> alpha = 0.1
- >>> cwb = coverage_width_based(y_true, y_preds_low, y_preds_up, eta, alpha)
- >>> print(np.round(cwb ,2))
- 0.69
- """
- y_true = cast(NDArray, column_or_1d(y_true))
- y_pred_low = cast(NDArray, column_or_1d(y_pred_low))
- y_pred_up = cast(NDArray, column_or_1d(y_pred_up))
-
- check_alpha(1-alpha)
-
- coverage_score = regression_coverage_score(
- y_true,
- y_pred_low,
- y_pred_up
- )
- mean_width = regression_mean_width_score(
- y_pred_low,
- y_pred_up
- )
- ref_length = np.subtract(
- float(y_true.max()),
- float(y_true.min())
- )
- avg_length = mean_width / ref_length
-
- cwc = (1-avg_length)*np.exp(-eta*(coverage_score-(1-alpha))**2)
-
- return float(cwc)
-
-
-def add_jitter(
- x: NDArray,
- noise_amplitude: float = 1e-8,
- random_state: Optional[Union[int, np.random.RandomState]] = None
-) -> NDArray:
- """
- Add a tiny normal distributed perturbation to an array x.
-
- Parameters
- ----------
- x : NDArray
- The array to jitter.
-
- noise_amplitude : float, optional
- The tiny relative noise amplitude to add, by default 1e-8.
-
- random_state: Optional[Union[int, RandomState]]
- Pseudo random number generator state used for random sampling.
- Pass an int for reproducible output across multiple function calls.
-
- Returns
- -------
- NDArray
- The array x jittered.
-
- Examples
- --------
- >>> import numpy as np
- >>> from mapie.metrics import add_jitter
- >>> x = np.array([0, 1, 2, 3, 4])
- >>> res = add_jitter(x, random_state=1)
- >>> res
- array([0. , 0.99999999, 1.99999999, 2.99999997, 4.00000003])
- """
- n = len(x)
- random_state_np = check_random_state(random_state)
- noise = noise_amplitude * random_state_np.normal(size=n)
- x_jittered = x * (1 + noise)
- return x_jittered
-
-
-def sort_xy_by_y(x: NDArray, y: NDArray) -> Tuple[NDArray, NDArray]:
- """
- Sort two arrays x and y according to y values.
-
- Parameters
- ----------
- x : NDArray of size (n_samples,)
- The array to sort according to y.
- y : NDArray of size (n_samples,)
- The array to sort.
-
- Returns
- -------
- Tuple[NDArray, NDArray]
- Both arrays sorted.
-
- Examples
- --------
- >>> import numpy as np
- >>> from mapie.metrics import sort_xy_by_y
- >>> x = np.array([1, 2, 3, 4, 5])
- >>> y = np.array([5, 4, 3, 1, 2])
- >>> x_sorted, y_sorted = sort_xy_by_y(x, y)
- >>> print(x_sorted)
- [4 5 3 2 1]
- >>> print(y_sorted)
- [1 2 3 4 5]
- """
- x = column_or_1d(x)
- y = column_or_1d(y)
- sort_index = np.argsort(y)
- x_sorted = x[sort_index]
- y_sorted = y[sort_index]
- return x_sorted, y_sorted
-
-
-def cumulative_differences(
- y_true: NDArray,
- y_score: NDArray,
- noise_amplitude: float = 1e-8,
- random_state: Optional[Union[int, np.random.RandomState]] = 1
-) -> NDArray:
- """
- Compute the cumulative difference between y_true and y_score, both ordered
- according to y_scores array.
-
- Parameters
- ----------
- y_true : NDArray of size (n_samples,)
- An array of ground truths.
-
- y_score : NDArray of size (n_samples,)
- An array of scores.
-
- noise_amplitude : float, optional
- The tiny relative noise amplitude to add, by default 1e-8.
-
- random_state: Optional[Union[int, RandomState]]
- Pseudo random number generator state used for random sampling.
- Pass an int for reproducible output across multiple function calls.
-
- Returns
- -------
- NDArray
- The mean cumulative difference between y_true and y_score.
-
- References
- ----------
- Arrieta-Ibarra I, Gujral P, Tannen J, Tygert M, Xu C.
- Metrics of calibration for probabilistic predictions.
- The Journal of Machine Learning Research.
- 2022 Jan 1;23(1):15886-940.
-
- Examples
- --------
- >>> import numpy as np
- >>> from mapie.metrics import cumulative_differences
- >>> y_true = np.array([1, 0, 0])
- >>> y_score = np.array([0.7, 0.3, 0.6])
- >>> cum_diff = cumulative_differences(y_true, y_score)
- >>> print(len(cum_diff))
- 3
- >>> print(np.max(cum_diff) <= 1)
- True
- >>> print(np.min(cum_diff) >= -1)
- True
- >>> cum_diff
- array([-0.1, -0.3, -0.2])
- """
- check_arrays_length(y_true, y_score)
- check_array_nan(y_true)
- check_array_inf(y_true)
- check_array_nan(y_score)
- check_array_inf(y_score)
-
- n = len(y_true)
- y_score_jittered = add_jitter(
- y_score,
- noise_amplitude=noise_amplitude,
- random_state=random_state
- )
- y_true_sorted, y_score_sorted = sort_xy_by_y(y_true, y_score_jittered)
- cumulative_differences = np.cumsum(y_true_sorted - y_score_sorted)/n
- return cumulative_differences
-
-
-def length_scale(s: NDArray) -> float:
- """
- Compute the mean square root of the sum of s * (1 - s).
- This is basically the standard deviation of the
- cumulative differences.
-
- Parameters
- ----------
- s : NDArray of shape (n_samples,)
- An array of scores.
-
- Returns
- -------
- float
- The length_scale array.
-
- References
- ----------
- Arrieta-Ibarra I, Gujral P, Tannen J, Tygert M, Xu C.
- Metrics of calibration for probabilistic predictions.
- The Journal of Machine Learning Research.
- 2022 Jan 1;23(1):15886-940.
-
- Examples
- --------
- >>> import numpy as np
- >>> from mapie.metrics import length_scale
- >>> s = np.array([0, 0, 0.4, 0.3, 0.8])
- >>> res = length_scale(s)
- >>> print(np.round(res, 2))
- 0.16
- """
- n = len(s)
- length_scale = np.sqrt(np.sum(s * (1 - s)))/n
- return length_scale
-
-
-def kolmogorov_smirnov_statistic(y_true: NDArray, y_score: NDArray) -> float:
- """
- Compute Kolmogorov-smirnov's statistic for calibration test.
- Also called ECCE-MAD
- (Estimated Cumulative Calibration Errors - Maximum Absolute Deviation).
- The closer to zero, the better the scores are calibrated.
- Indeed, if the scores are perfectly calibrated,
- the cumulative differences between ``y_true`` and ``y_score``
- should share the same properties of a standard Brownian motion
- asymptotically.
-
- Parameters
- ----------
- y_true : NDArray of shape (n_samples,)
- An array of ground truth.
-
- y_score : NDArray of shape (n_samples,)
- An array of scores..
-
- Returns
- -------
- float
- Kolmogorov-smirnov's statistic.
-
- References
- ----------
- Arrieta-Ibarra I, Gujral P, Tannen J, Tygert M, Xu C.
- Metrics of calibration for probabilistic predictions.
- The Journal of Machine Learning Research.
- 2022 Jan 1;23(1):15886-940.
-
- Examples
- --------
- >>> import numpy as np
- >>> from mapie.metrics import kolmogorov_smirnov_statistic
- >>> y_true = np.array([0, 1, 0, 1, 0])
- >>> y_score = np.array([0.1, 0.9, 0.21, 0.9, 0.5])
- >>> print(np.round(kolmogorov_smirnov_statistic(y_true, y_score), 3))
- 0.978
- """
- check_arrays_length(y_true, y_score)
- check_array_nan(y_true)
- check_array_inf(y_true)
- check_array_nan(y_score)
- check_array_inf(y_score)
-
- y_true = column_or_1d(y_true)
- y_score = column_or_1d(y_score)
-
- cum_diff = cumulative_differences(y_true, y_score)
- sigma = length_scale(y_score)
- ks_stat = np.max(np.abs(cum_diff)) / sigma
- return ks_stat
-
-
-def kolmogorov_smirnov_cdf(x: float) -> float:
- """
- Compute the Kolmogorov-smirnov cumulative distribution
- function (CDF) for the float x.
- This is interpreted as the CDF of the maximum absolute value
- of the standard Brownian motion over the unit interval [0, 1].
- The function is approximated by its power series, truncated so as to hit
- machine precision error.
-
- Parameters
- ----------
- x : float
- The float x to compute the cumulative distribution function on.
-
- Returns
- -------
- float
- The Kolmogorov-smirnov cumulative distribution function.
-
- References
- ----------
- Tygert M.
- Calibration of P-values for calibration and for deviation
- of a subpopulation from the full population.
- arXiv preprint arXiv:2202.00100.
- 2022 Jan 31.
-
- D. A. Darling. A. J. F. Siegert.
- The First Passage Problem for a Continuous Markov Process.
- Ann. Math. Statist. 24 (4) 624 - 639, December,
- 1953.
-
- Examples
- --------
- >>> import numpy as np
- >>> from mapie.metrics import kolmogorov_smirnov_cdf
- >>> print(np.round(kolmogorov_smirnov_cdf(1), 4))
- 0.3708
- """
- kmax = np.ceil(
- 0.5 + x * np.sqrt(2) / np.pi * np.sqrt(np.log(4 / (np.pi*EPSILON)))
- )
- c = 0.0
- for k in range(int(kmax)):
- kplus = k + 1 / 2
- c += (-1)**k / kplus * np.exp(-kplus**2 * np.pi**2 / (2 * x**2))
- c *= 2 / np.pi
- return c
-
-
-def kolmogorov_smirnov_p_value(y_true: NDArray, y_score: NDArray) -> float:
- """
- Compute Kolmogorov Smirnov p-value.
- Deduced from the corresponding statistic and CDF.
- It represents the probability of the observed statistic
- under the null hypothesis of perfect calibration.
-
- Parameters
- ----------
- y_true : NDArray of shape (n_samples,)
- An array of ground truth.
-
- y_score : NDArray of shape (n_samples,)
- An array of scores.
-
- Returns
- -------
- float
- The Kolmogorov Smirnov p-value.
-
- References
- ----------
- Tygert M.
- Calibration of P-values for calibration and for deviation
- of a subpopulation from the full population.
- arXiv preprint arXiv:2202.00100.
- 2022 Jan 31.
-
- D. A. Darling. A. J. F. Siegert.
- The First Passage Problem for a Continuous Markov Process.
- Ann. Math. Statist. 24 (4) 624 - 639, December,
- 1953.
-
- Examples
- --------
- >>> import pandas as pd
- >>> import numpy as np
- >>> from mapie.metrics import kolmogorov_smirnov_p_value
- >>> y_true = np.array([1, 0, 1, 0, 1, 0])
- >>> y_score = np.array([0.8, 0.3, 0.5, 0.5, 0.7, 0.1])
- >>> ks_p_value = kolmogorov_smirnov_p_value(y_true, y_score)
- >>> print(np.round(ks_p_value, 4))
- 0.7857
- """
- check_arrays_length(y_true, y_score)
- check_array_nan(y_true)
- check_array_inf(y_true)
- check_array_nan(y_score)
- check_array_inf(y_score)
-
- ks_stat = kolmogorov_smirnov_statistic(y_true, y_score)
- ks_p_value = 1 - kolmogorov_smirnov_cdf(ks_stat)
- return ks_p_value
-
-
-def kuiper_statistic(y_true: NDArray, y_score: NDArray) -> float:
- """
- Compute Kuiper's statistic for calibration test.
- Also called ECCE-R (Estimated Cumulative Calibration Errors - Range).
- The closer to zero, the better the scores are calibrated.
- Indeed, if the scores are perfectly calibrated,
- the cumulative differences between ``y_true`` and ``y_score``
- should share the same properties of a standard Brownian motion
- asymptotically.
-
- Parameters
- ----------
- y_true : NDArray of shape (n_samples,)
- An array of ground truth.
-
- y_score : NDArray of shape (n_samples,)
- An array of scores.
-
- Returns
- -------
- float
- Kuiper's statistic.
-
- References
- ----------
- Arrieta-Ibarra I, Gujral P, Tannen J, Tygert M, Xu C.
- Metrics of calibration for probabilistic predictions.
- The Journal of Machine Learning Research.
- 2022 Jan 1;23(1):15886-940.
-
- Examples
- --------
- >>> import numpy as np
- >>> from mapie.metrics import kuiper_statistic
- >>> y_true = np.array([0, 1, 0, 1, 0])
- >>> y_score = np.array([0.1, 0.9, 0.21, 0.9, 0.5])
- >>> print(np.round(kuiper_statistic(y_true, y_score), 3))
- 0.857
- """
- check_arrays_length(y_true, y_score)
- check_array_nan(y_true)
- check_array_inf(y_true)
- check_array_nan(y_score)
- check_array_inf(y_score)
-
- y_true = column_or_1d(y_true)
- y_score = column_or_1d(y_score)
- cum_diff = cumulative_differences(y_true, y_score)
- sigma = length_scale(y_score)
- ku_stat = (np.max(cum_diff) - np.min(cum_diff)) / sigma
- return ku_stat
-
-
-def kuiper_cdf(x: float) -> float:
- """
- Compute the Kuiper cumulative distribution function (CDF) for the float x.
- This is interpreted as the CDF of the range
- of the standard Brownian motion over the unit interval [0, 1].
- The function is approximated by its power series, truncated so as to hit
- machine precision error.
-
- Parameters
- ----------
- x : float
- The float x to compute the cumulative distribution function.
-
- Returns
- -------
- float
- The Kuiper cumulative distribution function.
-
- References
- ----------
- Tygert M.
- Calibration of P-values for calibration and for deviation
- of a subpopulation from the full population.
- arXiv preprint arXiv:2202.00100.
- 2022 Jan 31.
-
- William Feller.
- The Asymptotic Distribution of the Range of Sums of
- Independent Random Variables.
- Ann. Math. Statist. 22 (3) 427 - 432
- September, 1951.
-
- Examples
- --------
- >>> import numpy as np
- >>> from mapie.metrics import kuiper_cdf
- >>> print(np.round(kuiper_cdf(1), 4))
- 0.0634
- """
- kmax = np.ceil(
- (
- 0.5 + x / (np.pi * np.sqrt(2)) *
- np.sqrt(
- np.log(
- 4 / (np.sqrt(2 * np.pi) * EPSILON) * (1 / x + x / np.pi**2)
- )
- )
- )
- )
- c = 0.0
- for k in range(int(kmax)):
- kplus = k + 1 / 2
- c += (
- (8 / x**2 + 2 / kplus**2 / np.pi**2) *
- np.exp(-2 * kplus**2 * np.pi**2 / x**2)
- )
- return c
-
-
-def kuiper_p_value(y_true: NDArray, y_score: NDArray) -> float:
- """
- Compute Kuiper statistic p-value.
- Deduced from the corresponding statistic and CDF.
- It represents the probability of the observed statistic
- under the null hypothesis of perfect calibration.
-
- Parameters
- ----------
- y_true : NDArray of shape (n_samples,)
- An array of ground truth.
-
- y_score : NDArray of shape (n_samples,)
- An array of scores.
-
- Returns
- -------
- float
- The Kuiper p-value.
-
- References
- ----------
- Tygert M.
- Calibration of P-values for calibration and for deviation
- of a subpopulation from the full population.
- arXiv preprint arXiv:2202.00100.
- 2022 Jan 31.
-
- William Feller.
- The Asymptotic Distribution of the Range of Sums of
- Independent Random Variables.
- Ann. Math. Statist. 22 (3) 427 - 432
- September, 1951.
-
- Examples
- --------
- >>> import pandas as pd
- >>> import numpy as np
- >>> from mapie.metrics import kuiper_p_value
- >>> y_true = np.array([1, 0, 1, 0, 1, 0])
- >>> y_score = np.array([0.8, 0.3, 0.5, 0.5, 0.7, 0.1])
- >>> ku_p_value = kuiper_p_value(y_true, y_score)
- >>> print(np.round(ku_p_value, 4))
- 0.9684
- """
- check_arrays_length(y_true, y_score)
- check_array_nan(y_true)
- check_array_inf(y_true)
- check_array_nan(y_score)
- check_array_inf(y_score)
-
- ku_stat = kuiper_statistic(y_true, y_score)
- ku_p_value = 1 - kuiper_cdf(ku_stat)
- return ku_p_value
-
-
-def spiegelhalter_statistic(y_true: NDArray, y_score: NDArray) -> float:
- """
- Compute Spiegelhalter's statistic for calibration test.
- The closer to zero, the better the scores are calibrated.
- Indeed, if the scores are perfectly calibrated,
- the Brier score simplifies to an expression whose expectancy
- and variance are easy to compute. The statistic is no more that
- a z-score on this normalized expression.
-
- Parameters
- ----------
- y_true : NDArray of shape (n_samples,)
- An array of ground truth.
-
- y_score : NDArray of shape (n_samples,)
- An array of scores.
-
- Returns
- -------
- float
- Spiegelhalter's statistic.
-
- References
- ----------
- Spiegelhalter DJ.
- Probabilistic prediction in patient management and clinical trials.
- Statistics in medicine.
- 1986 Sep;5(5):421-33.
-
- Examples
- --------
- >>> import numpy as np
- >>> from mapie.metrics import spiegelhalter_statistic
- >>> y_true = np.array([0, 1, 0, 1, 0])
- >>> y_score = np.array([0.1, 0.9, 0.21, 0.9, 0.5])
- >>> print(np.round(spiegelhalter_statistic(y_true, y_score), 3))
- -0.757
- """
- check_arrays_length(y_true, y_score)
- check_array_nan(y_true)
- check_array_inf(y_true)
- check_array_nan(y_score)
- check_array_inf(y_score)
-
- y_true = column_or_1d(y_true)
- y_score = column_or_1d(y_score)
- numerator = np.sum(
- (y_true - y_score) * (1 - 2 * y_score)
- )
- denominator = np.sqrt(
- np.sum(
- (1 - 2 * y_score) ** 2 * y_score * (1 - y_score)
- )
- )
- sp_stat = numerator/denominator
- return sp_stat
-
-
-def spiegelhalter_p_value(y_true: NDArray, y_score: NDArray) -> float:
- """
- Compute Spiegelhalter statistic p-value.
- Deduced from the corresponding statistic and CDF,
- which is no more than the normal distribution.
- It represents the probability of the observed statistic
- under the null hypothesis of perfect calibration.
-
- Parameters
- ----------
- y_true : NDArray of shape (n_samples,)
- An array of ground truth.
-
- y_score : NDArray of shape (n_samples,)
- An array of scores.
-
- Returns
- -------
- float
- The Spiegelhalter statistic p_value.
-
- References
- ----------
- Spiegelhalter DJ.
- Probabilistic prediction in patient management and clinical trials.
- Statistics in medicine.
- 1986 Sep;5(5):421-33.
-
- Examples
- --------
- >>> import numpy as np
- >>> from mapie.metrics import spiegelhalter_p_value
- >>> y_true = np.array([1, 0, 1, 0, 1, 0])
- >>> y_score = np.array([0.8, 0.3, 0.5, 0.5, 0.7, 0.1])
- >>> sp_p_value = spiegelhalter_p_value(y_true, y_score)
- >>> print(np.round(sp_p_value, 4))
- 0.8486
- """
- check_arrays_length(y_true, y_score)
- check_array_nan(y_true)
- check_array_inf(y_true)
- check_array_nan(y_score)
- check_array_inf(y_score)
- sp_stat = spiegelhalter_statistic(y_true, y_score)
- sp_p_value = 1 - scipy.stats.norm.cdf(sp_stat)
- return sp_p_value
-
-
-def regression_mwi_score(
- y_true: NDArray,
- y_pis: NDArray,
- alpha: float
-) -> float:
- """
- The Winkler score, proposed by Winkler (1972), is a measure used to
- evaluate prediction intervals, combining the length of the interval
- with a penalty that increases proportionally to the distance of an
- observation outside the interval.
-
- Parameters
- ----------
- y_true: ArrayLike of shape (n_samples,)
- Ground truth values
- y_pis: ArrayLike of shape (n_samples, 2, 1)
- Lower and upper bounds of prediction intervals
- output from a MAPIE regressor
- alpha: float
- The value of alpha
-
- Returns
- -------
- float
- The mean Winkler interval score
-
- References
- ----------
- [1] Robert L. Winkler
- "A Decision-Theoretic Approach to Interval Estimation",
- Journal of the American Statistical Association,
- volume 67, pages 187-191 (1972)
- (https://doi.org/10.1080/01621459.1972.10481224)
- [2] Tilmann Gneiting and Adrian E Raftery
- "Strictly Proper Scoring Rules, Prediction, and Estimation",
- Journal of the American Statistical Association,
- volume 102, pages 359-378 (2007)
- (https://doi.org/10.1198/016214506000001437) (Section 6.2)
- """
-
- # Undo any possible quantile crossing
- y_pred_low = np.minimum(y_pis[:, 0, 0], y_pis[:, 1, 0])
- y_pred_up = np.maximum(y_pis[:, 0, 0], y_pis[:, 1, 0])
-
- check_arrays_length(y_true, y_pred_low, y_pred_up)
-
- # Checking for NaN and inf values
- for array in (y_true, y_pred_low, y_pred_up):
- check_array_nan(array)
- check_array_inf(array)
-
- width = np.sum(y_pred_up) - np.sum(y_pred_low)
- error_above = np.sum((y_true - y_pred_up)[y_true > y_pred_up])
- error_below = np.sum((y_pred_low - y_true)[y_true < y_pred_low])
- total_error = error_above + error_below
- mwi = (width + total_error * 2 / alpha) / len(y_true)
- return mwi
diff --git a/mapie/metrics/__init__.py b/mapie/metrics/__init__.py
new file mode 100644
index 000000000..fffdb45f5
--- /dev/null
+++ b/mapie/metrics/__init__.py
@@ -0,0 +1,9 @@
+from . import classification
+from . import calibration
+from . import regression
+
+__all__ = [
+ "classification",
+ "calibration",
+ "regression",
+]
diff --git a/mapie/metrics/calibration.py b/mapie/metrics/calibration.py
new file mode 100644
index 000000000..fa48eac4d
--- /dev/null
+++ b/mapie/metrics/calibration.py
@@ -0,0 +1,794 @@
+import scipy
+from numpy.typing import ArrayLike
+from sklearn.utils import check_random_state
+from mapie._machine_precision import EPSILON
+from numpy.typing import NDArray
+from mapie.utils import (
+ calc_bins,
+ check_array_inf,
+ check_array_nan,
+ check_arrays_length,
+ check_binary_zero_one,
+ check_number_bins,
+ check_split_strategy,
+)
+
+
+import numpy as np
+from sklearn.utils.validation import column_or_1d
+
+
+from typing import Tuple, cast, Optional, Union
+
+
+def expected_calibration_error(
+ y_true: ArrayLike,
+ y_scores: ArrayLike,
+ num_bins: int = 50,
+ split_strategy: Optional[str] = None,
+) -> float:
+ """
+ The expected calibration error, which is the difference between
+ the confidence scores and accuracy per bin [1].
+
+ [1] Naeini, Mahdi Pakdaman, Gregory Cooper, and Milos Hauskrecht.
+ "Obtaining well calibrated probabilities using bayesian binning."
+ Twenty-Ninth AAAI Conference on Artificial Intelligence. 2015.
+
+ Parameters
+ ----------
+ y_true: ArrayLike of shape (n_samples,)
+ The target values for the calibrator.
+ y_scores: ArrayLike of shape (n_samples,) or (n_samples, n_classes)
+ The predictions scores.
+ num_bins: int
+ Number of bins to make the split in the y_score. The allowed
+ values are num_bins above 0.
+ split_strategy: str
+ The way of splitting the predictions into different bins.
+ The allowed split strategies are "uniform", "quantile" and
+ "array split".
+ Returns
+ -------
+ float
+ The score of ECE (Expected Calibration Error).
+ """
+ split_strategy = check_split_strategy(split_strategy)
+ num_bins = check_number_bins(num_bins)
+ y_true_ = check_binary_zero_one(y_true)
+ y_scores = cast(NDArray, y_scores)
+
+ check_arrays_length(y_true_, y_scores)
+ check_array_nan(y_true_)
+ check_array_inf(y_true_)
+ check_array_nan(y_scores)
+ check_array_inf(y_scores)
+
+ if np.size(y_scores.shape) == 2:
+ y_score = cast(
+ NDArray, column_or_1d(np.nanmax(y_scores, axis=1))
+ )
+ else:
+ y_score = cast(NDArray, column_or_1d(y_scores))
+
+ _, bin_accs, bin_confs, bin_sizes = calc_bins(
+ y_true_, y_score, num_bins, split_strategy
+ )
+
+ return np.divide(
+ np.sum(bin_sizes * np.abs(bin_accs - bin_confs)),
+ np.sum(bin_sizes)
+ )
+
+
+def top_label_ece(
+ y_true: ArrayLike,
+ y_scores: ArrayLike,
+ y_score_arg: Optional[ArrayLike] = None,
+ num_bins: int = 50,
+ split_strategy: Optional[str] = None,
+ classes: Optional[ArrayLike] = None,
+) -> float:
+ """
+ The Top-Label ECE which is a method adapted to fit the
+ ECE to a Top-Label setting [2].
+
+ [2] Gupta, Chirag, and Aaditya K. Ramdas.
+ "Top-label calibration and multiclass-to-binary reductions."
+ arXiv preprint arXiv:2107.08353 (2021).
+
+ Parameters
+ ----------
+ y_true: ArrayLike of shape (n_samples,)
+ The target values for the calibrator.
+ y_scores: ArrayLike of shape (n_samples, n_classes)
+ or (n_samples,)
+ The predictions scores, either the maximum score and the
+ argmax needs to be inputted or in the form of the prediction
+ probabilities.
+ y_score_arg: Optional[ArrayLike] of shape (n_samples,)
+ If only the maximum is provided in the y_scores, the argmax must
+ be provided here. This is optional and could be directly infered
+ from the y_scores.
+ num_bins: int
+ Number of bins to make the split in the y_score. The allowed
+ values are num_bins above 0.
+ split_strategy: str
+ The way of splitting the predictions into different bins.
+ The allowed split strategies are "uniform", "quantile" and
+ "array split".
+ classes: ArrayLike of shape (n_samples,)
+ The different classes, in order of the indices that would be
+ present in a pred_proba.
+
+ Returns
+ -------
+ float
+ The ECE score adapted in the top label setting.
+ """
+ y_scores = cast(NDArray, y_scores)
+ y_true = cast(NDArray, y_true)
+ check_array_nan(y_true)
+ check_array_inf(y_true)
+ check_array_nan(y_scores)
+ check_array_inf(y_scores)
+
+ if y_score_arg is None:
+ check_arrays_length(y_true, y_scores)
+ else:
+ y_score_arg = cast(NDArray, y_score_arg)
+ check_array_nan(y_score_arg)
+ check_array_inf(y_score_arg)
+ check_arrays_length(y_true, y_scores, y_score_arg)
+
+ ece = float(0.)
+ split_strategy = check_split_strategy(split_strategy)
+ num_bins = check_number_bins(num_bins)
+ y_true = cast(NDArray, column_or_1d(y_true))
+ if y_score_arg is None:
+ y_score = cast(
+ NDArray, column_or_1d(np.nanmax(y_scores, axis=1))
+ )
+ if classes is None:
+ y_score_arg = cast(
+ NDArray, column_or_1d(np.nanargmax(y_scores, axis=1))
+ )
+ else:
+ classes = cast(NDArray, classes)
+ y_score_arg = cast(
+ NDArray, column_or_1d(classes[np.nanargmax(y_scores, axis=1)])
+ )
+ else:
+ y_score = cast(NDArray, column_or_1d(y_scores))
+ y_score_arg = cast(NDArray, column_or_1d(y_score_arg))
+ labels = np.unique(y_score_arg)
+
+ for label in labels:
+ label_ind = np.where(label == y_score_arg)[0]
+ y_true_ = np.array(y_true[label_ind] == label, dtype=int)
+ ece += expected_calibration_error(
+ y_true_,
+ y_scores=y_score[label_ind],
+ num_bins=num_bins,
+ split_strategy=split_strategy
+ )
+ ece /= len(labels)
+ return ece
+
+
+def add_jitter(
+ x: NDArray,
+ noise_amplitude: float = 1e-8,
+ random_state: Optional[Union[int, np.random.RandomState]] = None
+) -> NDArray:
+ """
+ Add a tiny normal distributed perturbation to an array x.
+
+ Parameters
+ ----------
+ x : NDArray
+ The array to jitter.
+
+ noise_amplitude : float, optional
+ The tiny relative noise amplitude to add, by default 1e-8.
+
+ random_state: Optional[Union[int, RandomState]]
+ Pseudo random number generator state used for random sampling.
+ Pass an int for reproducible output across multiple function calls.
+
+ Returns
+ -------
+ NDArray
+ The array x jittered.
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> from mapie.metrics.calibration import add_jitter
+ >>> x = np.array([0, 1, 2, 3, 4])
+ >>> res = add_jitter(x, random_state=1)
+ >>> res
+ array([0. , 0.99999999, 1.99999999, 2.99999997, 4.00000003])
+ """
+ n = len(x)
+ random_state_np = check_random_state(random_state)
+ noise = noise_amplitude * random_state_np.normal(size=n)
+ x_jittered = x * (1 + noise)
+ return x_jittered
+
+
+def sort_xy_by_y(x: NDArray, y: NDArray) -> Tuple[NDArray, NDArray]:
+ """
+ Sort two arrays x and y according to y values.
+
+ Parameters
+ ----------
+ x : NDArray of size (n_samples,)
+ The array to sort according to y.
+ y : NDArray of size (n_samples,)
+ The array to sort.
+
+ Returns
+ -------
+ Tuple[NDArray, NDArray]
+ Both arrays sorted.
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> from mapie.metrics.calibration import sort_xy_by_y
+ >>> x = np.array([1, 2, 3, 4, 5])
+ >>> y = np.array([5, 4, 3, 1, 2])
+ >>> x_sorted, y_sorted = sort_xy_by_y(x, y)
+ >>> print(x_sorted)
+ [4 5 3 2 1]
+ >>> print(y_sorted)
+ [1 2 3 4 5]
+ """
+ x = column_or_1d(x)
+ y = column_or_1d(y)
+ sort_index = np.argsort(y)
+ x_sorted = x[sort_index]
+ y_sorted = y[sort_index]
+ return x_sorted, y_sorted
+
+
+def cumulative_differences(
+ y_true: NDArray,
+ y_score: NDArray,
+ noise_amplitude: float = 1e-8,
+ random_state: Optional[Union[int, np.random.RandomState]] = 1
+) -> NDArray:
+ """
+ Compute the cumulative difference between y_true and y_score, both ordered
+ according to y_scores array.
+
+ Parameters
+ ----------
+ y_true : NDArray of size (n_samples,)
+ An array of ground truths.
+
+ y_score : NDArray of size (n_samples,)
+ An array of scores.
+
+ noise_amplitude : float, optional
+ The tiny relative noise amplitude to add, by default 1e-8.
+
+ random_state: Optional[Union[int, RandomState]]
+ Pseudo random number generator state used for random sampling.
+ Pass an int for reproducible output across multiple function calls.
+
+ Returns
+ -------
+ NDArray
+ The mean cumulative difference between y_true and y_score.
+
+ References
+ ----------
+ Arrieta-Ibarra I, Gujral P, Tannen J, Tygert M, Xu C.
+ Metrics of calibration for probabilistic predictions.
+ The Journal of Machine Learning Research.
+ 2022 Jan 1;23(1):15886-940.
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> from mapie.metrics.calibration import cumulative_differences
+ >>> y_true = np.array([1, 0, 0])
+ >>> y_score = np.array([0.7, 0.3, 0.6])
+ >>> cum_diff = cumulative_differences(y_true, y_score)
+ >>> print(len(cum_diff))
+ 3
+ >>> print(np.max(cum_diff) <= 1)
+ True
+ >>> print(np.min(cum_diff) >= -1)
+ True
+ >>> cum_diff
+ array([-0.1, -0.3, -0.2])
+ """
+ check_arrays_length(y_true, y_score)
+ check_array_nan(y_true)
+ check_array_inf(y_true)
+ check_array_nan(y_score)
+ check_array_inf(y_score)
+
+ n = len(y_true)
+ y_score_jittered = add_jitter(
+ y_score,
+ noise_amplitude=noise_amplitude,
+ random_state=random_state
+ )
+ y_true_sorted, y_score_sorted = sort_xy_by_y(y_true, y_score_jittered)
+ cumulative_differences = np.cumsum(y_true_sorted - y_score_sorted)/n
+ return cumulative_differences
+
+
+def length_scale(s: NDArray) -> float:
+ """
+ Compute the mean square root of the sum of s * (1 - s).
+ This is basically the standard deviation of the
+ cumulative differences.
+
+ Parameters
+ ----------
+ s : NDArray of shape (n_samples,)
+ An array of scores.
+
+ Returns
+ -------
+ float
+ The length_scale array.
+
+ References
+ ----------
+ Arrieta-Ibarra I, Gujral P, Tannen J, Tygert M, Xu C.
+ Metrics of calibration for probabilistic predictions.
+ The Journal of Machine Learning Research.
+ 2022 Jan 1;23(1):15886-940.
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> from mapie.metrics.calibration import length_scale
+ >>> s = np.array([0, 0, 0.4, 0.3, 0.8])
+ >>> res = length_scale(s)
+ >>> print(np.round(res, 2))
+ 0.16
+ """
+ n = len(s)
+ length_scale = np.sqrt(np.sum(s * (1 - s)))/n
+ return length_scale
+
+
+def kolmogorov_smirnov_statistic(y_true: NDArray, y_score: NDArray) -> float:
+ """
+ Compute Kolmogorov-smirnov's statistic for calibration test.
+ Also called ECCE-MAD
+ (Estimated Cumulative Calibration Errors - Maximum Absolute Deviation).
+ The closer to zero, the better the scores are calibrated.
+ Indeed, if the scores are perfectly calibrated,
+ the cumulative differences between ``y_true`` and ``y_score``
+ should share the same properties of a standard Brownian motion
+ asymptotically.
+
+ Parameters
+ ----------
+ y_true : NDArray of shape (n_samples,)
+ An array of ground truth.
+
+ y_score : NDArray of shape (n_samples,)
+ An array of scores..
+
+ Returns
+ -------
+ float
+ Kolmogorov-smirnov's statistic.
+
+ References
+ ----------
+ Arrieta-Ibarra I, Gujral P, Tannen J, Tygert M, Xu C.
+ Metrics of calibration for probabilistic predictions.
+ The Journal of Machine Learning Research.
+ 2022 Jan 1;23(1):15886-940.
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> from mapie.metrics.calibration import kolmogorov_smirnov_statistic
+ >>> y_true = np.array([0, 1, 0, 1, 0])
+ >>> y_score = np.array([0.1, 0.9, 0.21, 0.9, 0.5])
+ >>> print(np.round(kolmogorov_smirnov_statistic(y_true, y_score), 3))
+ 0.978
+ """
+ check_arrays_length(y_true, y_score)
+ check_array_nan(y_true)
+ check_array_inf(y_true)
+ check_array_nan(y_score)
+ check_array_inf(y_score)
+
+ y_true = column_or_1d(y_true)
+ y_score = column_or_1d(y_score)
+
+ cum_diff = cumulative_differences(y_true, y_score)
+ sigma = length_scale(y_score)
+ ks_stat = np.max(np.abs(cum_diff)) / sigma
+ return ks_stat
+
+
+def kolmogorov_smirnov_cdf(x: float) -> float:
+ """
+ Compute the Kolmogorov-smirnov cumulative distribution
+ function (CDF) for the float x.
+ This is interpreted as the CDF of the maximum absolute value
+ of the standard Brownian motion over the unit interval [0, 1].
+ The function is approximated by its power series, truncated so as to hit
+ machine precision error.
+
+ Parameters
+ ----------
+ x : float
+ The float x to compute the cumulative distribution function on.
+
+ Returns
+ -------
+ float
+ The Kolmogorov-smirnov cumulative distribution function.
+
+ References
+ ----------
+ Tygert M.
+ Calibration of P-values for calibration and for deviation
+ of a subpopulation from the full population.
+ arXiv preprint arXiv:2202.00100.
+ 2022 Jan 31.
+
+ D. A. Darling. A. J. F. Siegert.
+ The First Passage Problem for a Continuous Markov Process.
+ Ann. Math. Statist. 24 (4) 624 - 639, December,
+ 1953.
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> from mapie.metrics.calibration import kolmogorov_smirnov_cdf
+ >>> print(np.round(kolmogorov_smirnov_cdf(1), 4))
+ 0.3708
+ """
+ kmax = np.ceil(
+ 0.5 + x * np.sqrt(2) / np.pi * np.sqrt(np.log(4 / (np.pi*EPSILON)))
+ )
+ c = 0.0
+ for k in range(int(kmax)):
+ kplus = k + 1 / 2
+ c += (-1)**k / kplus * np.exp(-kplus**2 * np.pi**2 / (2 * x**2))
+ c *= 2 / np.pi
+ return c
+
+
+def kolmogorov_smirnov_p_value(y_true: NDArray, y_score: NDArray) -> float:
+ """
+ Compute Kolmogorov Smirnov p-value.
+ Deduced from the corresponding statistic and CDF.
+ It represents the probability of the observed statistic
+ under the null hypothesis of perfect calibration.
+
+ Parameters
+ ----------
+ y_true : NDArray of shape (n_samples,)
+ An array of ground truth.
+
+ y_score : NDArray of shape (n_samples,)
+ An array of scores.
+
+ Returns
+ -------
+ float
+ The Kolmogorov Smirnov p-value.
+
+ References
+ ----------
+ Tygert M.
+ Calibration of P-values for calibration and for deviation
+ of a subpopulation from the full population.
+ arXiv preprint arXiv:2202.00100.
+ 2022 Jan 31.
+
+ D. A. Darling. A. J. F. Siegert.
+ The First Passage Problem for a Continuous Markov Process.
+ Ann. Math. Statist. 24 (4) 624 - 639, December,
+ 1953.
+
+ Examples
+ --------
+ >>> import pandas as pd
+ >>> import numpy as np
+ >>> from mapie.metrics.calibration import kolmogorov_smirnov_p_value
+ >>> y_true = np.array([1, 0, 1, 0, 1, 0])
+ >>> y_score = np.array([0.8, 0.3, 0.5, 0.5, 0.7, 0.1])
+ >>> ks_p_value = kolmogorov_smirnov_p_value(y_true, y_score)
+ >>> print(np.round(ks_p_value, 4))
+ 0.7857
+ """
+ check_arrays_length(y_true, y_score)
+ check_array_nan(y_true)
+ check_array_inf(y_true)
+ check_array_nan(y_score)
+ check_array_inf(y_score)
+
+ ks_stat = kolmogorov_smirnov_statistic(y_true, y_score)
+ ks_p_value = 1 - kolmogorov_smirnov_cdf(ks_stat)
+ return ks_p_value
+
+
+def kuiper_statistic(y_true: NDArray, y_score: NDArray) -> float:
+ """
+ Compute Kuiper's statistic for calibration test.
+ Also called ECCE-R (Estimated Cumulative Calibration Errors - Range).
+ The closer to zero, the better the scores are calibrated.
+ Indeed, if the scores are perfectly calibrated,
+ the cumulative differences between ``y_true`` and ``y_score``
+ should share the same properties of a standard Brownian motion
+ asymptotically.
+
+ Parameters
+ ----------
+ y_true : NDArray of shape (n_samples,)
+ An array of ground truth.
+
+ y_score : NDArray of shape (n_samples,)
+ An array of scores.
+
+ Returns
+ -------
+ float
+ Kuiper's statistic.
+
+ References
+ ----------
+ Arrieta-Ibarra I, Gujral P, Tannen J, Tygert M, Xu C.
+ Metrics of calibration for probabilistic predictions.
+ The Journal of Machine Learning Research.
+ 2022 Jan 1;23(1):15886-940.
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> from mapie.metrics.calibration import kuiper_statistic
+ >>> y_true = np.array([0, 1, 0, 1, 0])
+ >>> y_score = np.array([0.1, 0.9, 0.21, 0.9, 0.5])
+ >>> print(np.round(kuiper_statistic(y_true, y_score), 3))
+ 0.857
+ """
+ check_arrays_length(y_true, y_score)
+ check_array_nan(y_true)
+ check_array_inf(y_true)
+ check_array_nan(y_score)
+ check_array_inf(y_score)
+
+ y_true = column_or_1d(y_true)
+ y_score = column_or_1d(y_score)
+ cum_diff = cumulative_differences(y_true, y_score)
+ sigma = length_scale(y_score)
+ ku_stat = (np.max(cum_diff) - np.min(cum_diff)) / sigma
+ return ku_stat
+
+
+def kuiper_cdf(x: float) -> float:
+ """
+ Compute the Kuiper cumulative distribution function (CDF) for the float x.
+ This is interpreted as the CDF of the range
+ of the standard Brownian motion over the unit interval [0, 1].
+ The function is approximated by its power series, truncated so as to hit
+ machine precision error.
+
+ Parameters
+ ----------
+ x : float
+ The float x to compute the cumulative distribution function.
+
+ Returns
+ -------
+ float
+ The Kuiper cumulative distribution function.
+
+ References
+ ----------
+ Tygert M.
+ Calibration of P-values for calibration and for deviation
+ of a subpopulation from the full population.
+ arXiv preprint arXiv:2202.00100.
+ 2022 Jan 31.
+
+ William Feller.
+ The Asymptotic Distribution of the Range of Sums of
+ Independent Random Variables.
+ Ann. Math. Statist. 22 (3) 427 - 432
+ September, 1951.
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> from mapie.metrics.calibration import kuiper_cdf
+ >>> print(np.round(kuiper_cdf(1), 4))
+ 0.0634
+ """
+ kmax = np.ceil(
+ (
+ 0.5 + x / (np.pi * np.sqrt(2)) *
+ np.sqrt(
+ np.log(
+ 4 / (np.sqrt(2 * np.pi) * EPSILON) * (1 / x + x / np.pi**2)
+ )
+ )
+ )
+ )
+ c = 0.0
+ for k in range(int(kmax)):
+ kplus = k + 1 / 2
+ c += (
+ (8 / x**2 + 2 / kplus**2 / np.pi**2) *
+ np.exp(-2 * kplus**2 * np.pi**2 / x**2)
+ )
+ return c
+
+
+def kuiper_p_value(y_true: NDArray, y_score: NDArray) -> float:
+ """
+ Compute Kuiper statistic p-value.
+ Deduced from the corresponding statistic and CDF.
+ It represents the probability of the observed statistic
+ under the null hypothesis of perfect calibration.
+
+ Parameters
+ ----------
+ y_true : NDArray of shape (n_samples,)
+ An array of ground truth.
+
+ y_score : NDArray of shape (n_samples,)
+ An array of scores.
+
+ Returns
+ -------
+ float
+ The Kuiper p-value.
+
+ References
+ ----------
+ Tygert M.
+ Calibration of P-values for calibration and for deviation
+ of a subpopulation from the full population.
+ arXiv preprint arXiv:2202.00100.
+ 2022 Jan 31.
+
+ William Feller.
+ The Asymptotic Distribution of the Range of Sums of
+ Independent Random Variables.
+ Ann. Math. Statist. 22 (3) 427 - 432
+ September, 1951.
+
+ Examples
+ --------
+ >>> import pandas as pd
+ >>> import numpy as np
+ >>> from mapie.metrics.calibration import kuiper_p_value
+ >>> y_true = np.array([1, 0, 1, 0, 1, 0])
+ >>> y_score = np.array([0.8, 0.3, 0.5, 0.5, 0.7, 0.1])
+ >>> ku_p_value = kuiper_p_value(y_true, y_score)
+ >>> print(np.round(ku_p_value, 4))
+ 0.9684
+ """
+ check_arrays_length(y_true, y_score)
+ check_array_nan(y_true)
+ check_array_inf(y_true)
+ check_array_nan(y_score)
+ check_array_inf(y_score)
+
+ ku_stat = kuiper_statistic(y_true, y_score)
+ ku_p_value = 1 - kuiper_cdf(ku_stat)
+ return ku_p_value
+
+
+def spiegelhalter_statistic(y_true: NDArray, y_score: NDArray) -> float:
+ """
+ Compute Spiegelhalter's statistic for calibration test.
+ The closer to zero, the better the scores are calibrated.
+ Indeed, if the scores are perfectly calibrated,
+ the Brier score simplifies to an expression whose expectancy
+ and variance are easy to compute. The statistic is no more that
+ a z-score on this normalized expression.
+
+ Parameters
+ ----------
+ y_true : NDArray of shape (n_samples,)
+ An array of ground truth.
+
+ y_score : NDArray of shape (n_samples,)
+ An array of scores.
+
+ Returns
+ -------
+ float
+ Spiegelhalter's statistic.
+
+ References
+ ----------
+ Spiegelhalter DJ.
+ Probabilistic prediction in patient management and clinical trials.
+ Statistics in medicine.
+ 1986 Sep;5(5):421-33.
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> from mapie.metrics.calibration import spiegelhalter_statistic
+ >>> y_true = np.array([0, 1, 0, 1, 0])
+ >>> y_score = np.array([0.1, 0.9, 0.21, 0.9, 0.5])
+ >>> print(np.round(spiegelhalter_statistic(y_true, y_score), 3))
+ -0.757
+ """
+ check_arrays_length(y_true, y_score)
+ check_array_nan(y_true)
+ check_array_inf(y_true)
+ check_array_nan(y_score)
+ check_array_inf(y_score)
+
+ y_true = column_or_1d(y_true)
+ y_score = column_or_1d(y_score)
+ numerator = np.sum(
+ (y_true - y_score) * (1 - 2 * y_score)
+ )
+ denominator = np.sqrt(
+ np.sum(
+ (1 - 2 * y_score) ** 2 * y_score * (1 - y_score)
+ )
+ )
+ sp_stat = numerator/denominator
+ return sp_stat
+
+
+def spiegelhalter_p_value(y_true: NDArray, y_score: NDArray) -> float:
+ """
+ Compute Spiegelhalter statistic p-value.
+ Deduced from the corresponding statistic and CDF,
+ which is no more than the normal distribution.
+ It represents the probability of the observed statistic
+ under the null hypothesis of perfect calibration.
+
+ Parameters
+ ----------
+ y_true : NDArray of shape (n_samples,)
+ An array of ground truth.
+
+ y_score : NDArray of shape (n_samples,)
+ An array of scores.
+
+ Returns
+ -------
+ float
+ The Spiegelhalter statistic p_value.
+
+ References
+ ----------
+ Spiegelhalter DJ.
+ Probabilistic prediction in patient management and clinical trials.
+ Statistics in medicine.
+ 1986 Sep;5(5):421-33.
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> from mapie.metrics.calibration import spiegelhalter_p_value
+ >>> y_true = np.array([1, 0, 1, 0, 1, 0])
+ >>> y_score = np.array([0.8, 0.3, 0.5, 0.5, 0.7, 0.1])
+ >>> sp_p_value = spiegelhalter_p_value(y_true, y_score)
+ >>> print(np.round(sp_p_value, 4))
+ 0.8486
+ """
+ check_arrays_length(y_true, y_score)
+ check_array_nan(y_true)
+ check_array_inf(y_true)
+ check_array_nan(y_score)
+ check_array_inf(y_score)
+ sp_stat = spiegelhalter_statistic(y_true, y_score)
+ sp_p_value = 1 - scipy.stats.norm.cdf(sp_stat)
+ return sp_p_value
diff --git a/mapie/metrics/classification.py b/mapie/metrics/classification.py
new file mode 100644
index 000000000..1f48f7aa4
--- /dev/null
+++ b/mapie/metrics/classification.py
@@ -0,0 +1,289 @@
+from typing import cast, Union
+
+import numpy as np
+from numpy.typing import ArrayLike, NDArray
+from sklearn.utils import column_or_1d, check_array
+
+from mapie.utils import (
+ check_arrays_length,
+ check_array_nan,
+ check_array_inf,
+ check_array_shape_classification, check_nb_sets_sizes, check_number_bins,
+)
+
+
+def classification_coverage_score(
+ y_true: ArrayLike,
+ y_pred_set: ArrayLike
+) -> float:
+ """
+ Effective coverage score obtained by the prediction sets.
+
+ The effective coverage is obtained by estimating the fraction
+ of true labels that lie within the prediction sets.
+
+ Parameters
+ ----------
+ y_true: ArrayLike of shape (n_samples,)
+ True labels.
+ y_pred_set: ArrayLike of shape (n_samples, n_class)
+ Prediction sets given by booleans of labels.
+
+ Returns
+ -------
+ float
+ Effective coverage obtained by the prediction sets.
+
+ Examples
+ --------
+ >>> from mapie.metrics.classification import classification_coverage_score
+ >>> import numpy as np
+ >>> y_true = np.array([3, 3, 1, 2, 2])
+ >>> y_pred_set = np.array([
+ ... [False, False, True, True],
+ ... [False, True, False, True],
+ ... [False, True, True, False],
+ ... [False, False, True, True],
+ ... [False, True, False, True]
+ ... ])
+ >>> print(classification_coverage_score(y_true, y_pred_set))
+ 0.8
+ """
+ y_true = cast(NDArray, column_or_1d(y_true))
+ y_pred_set = cast(
+ NDArray,
+ check_array(
+ y_pred_set, force_all_finite=True, dtype=["bool"]
+ )
+ )
+
+ check_arrays_length(y_true, y_pred_set)
+ check_array_nan(y_true)
+ check_array_inf(y_true)
+ check_array_nan(y_pred_set)
+ check_array_inf(y_pred_set)
+
+ coverage = np.take_along_axis(
+ y_pred_set, y_true.reshape(-1, 1), axis=1
+ ).mean()
+ return float(coverage)
+
+
+def classification_mean_width_score(y_pred_set: ArrayLike) -> float:
+ """
+ Mean width of prediction set output by
+ :class:`~mapie.classification.MapieClassifier`.
+
+ Parameters
+ ----------
+ y_pred_set: ArrayLike of shape (n_samples, n_class)
+ Prediction sets given by booleans of labels.
+
+ Returns
+ -------
+ float
+ Mean width of the prediction set.
+
+ Examples
+ --------
+ >>> from mapie.metrics.classification import classification_mean_width_score
+ >>> import numpy as np
+ >>> y_pred_set = np.array([
+ ... [False, False, True, True],
+ ... [False, True, False, True],
+ ... [False, True, True, False],
+ ... [False, False, True, True],
+ ... [False, True, False, True]
+ ... ])
+ >>> print(classification_mean_width_score(y_pred_set))
+ 2.0
+ """
+ y_pred_set = cast(
+ NDArray,
+ check_array(
+ y_pred_set, force_all_finite=True, dtype=["bool"]
+ )
+ )
+ check_array_nan(y_pred_set)
+ check_array_inf(y_pred_set)
+ mean_width = y_pred_set.sum(axis=1).mean()
+ return float(mean_width)
+
+
+def classification_coverage_score_v2(
+ y_true: NDArray,
+ y_pred_set: NDArray
+) -> NDArray:
+ """
+ Effective coverage score obtained by the prediction sets.
+
+ The effective coverage is obtained by estimating the fraction
+ of true labels that lie within the prediction sets.
+
+ It is different from ``classification_coverage_score`` because it uses
+ directly the output of ``predict`` method and can compute the
+ coverage for each confidence level.
+
+ Parameters
+ ----------
+ y_true: NDArray of shape (n_samples, n_confidence_level) or (n_samples,)
+ True labels.
+ y_pred_set: NDArray of shape (n_samples, n_class, n_confidence_level)
+ Prediction sets given by booleans of labels.
+
+ Returns
+ -------
+ NDArray of shape (n_confidence_level,)
+ Effective coverage obtained by the prediction sets.
+ """
+ check_arrays_length(y_true, y_pred_set)
+ check_array_nan(y_true)
+ check_array_inf(y_true)
+ check_array_nan(y_pred_set)
+ check_array_inf(y_pred_set)
+
+ y_pred_set = check_array_shape_classification(y_true, y_pred_set)
+ if len(y_true.shape) != 2:
+ y_true = cast(NDArray, column_or_1d(y_true))
+ y_true = np.expand_dims(y_true, axis=1)
+ y_true = np.expand_dims(y_true, axis=1)
+ coverage = np.nanmean(
+ np.take_along_axis(y_pred_set, y_true, axis=1),
+ axis=0
+ )
+ return coverage[0]
+
+
+def classification_ssc(
+ y_true: NDArray,
+ y_pred_set: NDArray,
+ num_bins: Union[int, None] = None
+) -> NDArray:
+ """
+ Compute Size-Stratified Coverage metrics proposed in [3] that is
+ the conditional coverage conditioned by the size of the predictions sets.
+ The sets are ranked by their size (ascending) and then divided into
+ num_bins groups: one value of coverage by groups is computed.
+
+ [3] Angelopoulos, A. N., & Bates, S. (2021).
+ A gentle introduction to conformal prediction and
+ distribution-free uncertainty quantification.
+ arXiv preprint arXiv:2107.07511.
+
+ Parameters
+ ----------
+ y_true: NDArray of shape (n_samples,)
+ True labels.
+ y_pred_set: NDArray of shape (n_samples, n_class, n_confidence_level)
+ or (n_samples, n_class)
+ Prediction sets given by booleans of labels.
+ num_bins: int or None
+ Number of groups. If None, one value of coverage by possible
+ size of sets (n_classes +1) is computed. Should be less than the
+ number of different set sizes.
+
+ Returns
+ -------
+ NDArray of shape (n_confidence_level, num_bins)
+
+ Examples
+ --------
+ >>> from mapie.metrics.classification import classification_ssc
+ >>> import numpy as np
+ >>> y_true = y_true_class = np.array([3, 3, 1, 2, 2])
+ >>> y_pred_set = np.array([
+ ... [True, True, True, True],
+ ... [False, True, False, True],
+ ... [True, True, True, False],
+ ... [False, False, True, True],
+ ... [True, True, False, True]])
+ >>> print(classification_ssc(y_true, y_pred_set, num_bins=2))
+ [[1. 0.66666667]]
+ """
+ y_true = cast(NDArray, column_or_1d(y_true))
+ y_pred_set = check_array_shape_classification(y_true, y_pred_set)
+
+ check_arrays_length(y_true, y_pred_set)
+ check_array_nan(y_true)
+ check_array_inf(y_true)
+ check_array_nan(y_pred_set)
+ check_array_inf(y_pred_set)
+
+ sizes = np.sum(y_pred_set, axis=1)
+ n_classes = y_pred_set.shape[1]
+ if num_bins is None:
+ bins = list(range(n_classes + 1))
+ else:
+ check_nb_sets_sizes(sizes, num_bins)
+ check_number_bins(num_bins)
+ bins = [
+ b[0] for b in np.array_split(range(n_classes + 1), num_bins)
+ ]
+
+ digitized_sizes = np.digitize(sizes, bins)
+ coverages = np.zeros((y_pred_set.shape[2], len(bins)))
+ for alpha in range(y_pred_set.shape[2]):
+ indexes_bybins = [
+ np.argwhere(digitized_sizes[:, alpha] == i)
+ for i in range(1, len(bins)+1)
+ ]
+
+ for i, indexes in enumerate(indexes_bybins):
+ coverages[alpha, i] = classification_coverage_score_v2(
+ y_true[indexes],
+ np.take_along_axis(
+ y_pred_set[:, :, alpha],
+ indexes,
+ axis=0
+ )
+ )
+ return coverages
+
+
+def classification_ssc_score(
+ y_true: NDArray,
+ y_pred_set: NDArray,
+ num_bins: Union[int, None] = None
+) -> NDArray:
+ """
+ Aggregate by the minimum for each confidence level the Size-Stratified Coverage [3]:
+ returns the maximum violation of the conditional coverage
+ (with the groups defined).
+
+ Parameters
+ ----------
+ y_true: NDArray of shape (n_samples,)
+ True labels.
+ y_pred_set: NDArray of shape (n_samples, n_class, n_confidence_level)
+ or (n_samples, n_class)
+ Prediction sets given by booleans of labels.
+ num_bins: int or None
+ Number of groups. If None, one value of coverage by possible
+ size of sets (n_classes +1) is computed. Should be less than
+ the number of different set sizes.
+
+ Returns
+ -------
+ NDArray of shape (n_confidence_level,)
+
+ Examples
+ --------
+ >>> from mapie.metrics.classification import classification_ssc_score
+ >>> import numpy as np
+ >>> y_true = y_true_class = np.array([3, 3, 1, 2, 2])
+ >>> y_pred_set = np.array([
+ ... [True, True, True, True],
+ ... [False, True, False, True],
+ ... [True, True, True, False],
+ ... [False, False, True, True],
+ ... [True, True, False, True]])
+ >>> print(classification_ssc_score(y_true, y_pred_set, num_bins=2))
+ [0.66666667]
+ """
+ check_arrays_length(y_true, y_pred_set)
+ check_array_nan(y_true)
+ check_array_inf(y_true)
+ check_array_nan(y_pred_set)
+ check_array_inf(y_pred_set)
+
+ return np.nanmin(classification_ssc(y_true, y_pred_set, num_bins), axis=1)
diff --git a/mapie/metrics/regression.py b/mapie/metrics/regression.py
new file mode 100644
index 000000000..1ff8a4fa1
--- /dev/null
+++ b/mapie/metrics/regression.py
@@ -0,0 +1,566 @@
+from typing import cast
+
+import numpy as np
+from numpy.typing import ArrayLike, NDArray
+from sklearn.utils import column_or_1d
+
+from mapie.utils import (
+ check_arrays_length,
+ check_array_nan,
+ check_array_inf,
+ check_array_shape_regression,
+ check_number_bins,
+ check_nb_intervals_sizes,
+ check_alpha,
+)
+
+
+def regression_mean_width_score(
+ y_pred_low: ArrayLike,
+ y_pred_up: ArrayLike
+) -> float:
+ """
+ Effective mean width score obtained by the prediction intervals.
+
+ Parameters
+ ----------
+ y_pred_low: ArrayLike of shape (n_samples,)
+ Lower bound of prediction intervals.
+ y_pred_up: ArrayLike of shape (n_samples,)
+ Upper bound of prediction intervals.
+
+ Returns
+ -------
+ float
+ Effective mean width of the prediction intervals.
+
+ Examples
+ --------
+ >>> from mapie.metrics.regression import regression_mean_width_score
+ >>> import numpy as np
+ >>> y_pred_low = np.array([4, 6, 9, 8.5, 10.5])
+ >>> y_pred_up = np.array([6, 9, 10, 12.5, 12])
+ >>> print(regression_mean_width_score(y_pred_low, y_pred_up))
+ 2.3
+ """
+ y_pred_low = cast(NDArray, column_or_1d(y_pred_low))
+ y_pred_up = cast(NDArray, column_or_1d(y_pred_up))
+
+ check_arrays_length(y_pred_low, y_pred_up)
+ check_array_nan(y_pred_low)
+ check_array_inf(y_pred_low)
+ check_array_nan(y_pred_up)
+ check_array_inf(y_pred_up)
+
+ mean_width = np.abs(y_pred_up - y_pred_low).mean()
+ return float(mean_width)
+
+
+def regression_coverage_score(
+ y_true: NDArray,
+ y_intervals: NDArray,
+) -> NDArray:
+ """
+ Effective coverage obtained by the prediction intervals.
+
+ Intervals given by the ``predict_interval`` method can be passed directly
+ to the ``y_intervals`` argument (see example below).
+
+ Beside this intended use, this function also works with:
+
+ - ``y_true`` of shape (n_sample,) and ``y_intervals`` of shape (n_sample, 2)
+ - ``y_true`` of shape (n_sample, n_experiment) and `y_intervals` of shape
+ (n_sample, 2, n_experiment)
+
+ The effective coverage is obtained by computing the fraction
+ of true labels that lie within the prediction intervals.
+
+ Parameters
+ ------------
+ y_true: NDArray of shape (n_samples,)
+ True labels.
+
+ y_intervals: NDArray of shape (n_samples, 2, n_confidence_level)
+ Lower and upper bound of prediction intervals
+ with different confidence levels, given by the ``predict_interval`` method
+
+ Returns
+ ---------
+ NDArray of shape (n_confidence_level,)
+ Effective coverage obtained by the prediction intervals
+ for each confidence level.
+
+ Examples
+ ---------
+ >>> from mapie.metrics.regression import regression_coverage_score
+ >>> from mapie_v1.regression import SplitConformalRegressor
+ >>> from mapie_v1.utils import train_conformalize_test_split
+ >>> from sklearn.datasets import make_regression
+ >>> from sklearn.model_selection import train_test_split
+ >>> from sklearn.linear_model import Ridge
+
+ >>> X, y = make_regression(n_samples=500, n_features=2, noise=1.0)
+ >>> (
+ ... X_train, X_conformalize, X_test,
+ ... y_train, y_conformalize, y_test
+ ... ) = train_conformalize_test_split(
+ ... X, y, train_size=0.6, conformalize_size=0.2, test_size=0.2, random_state=1
+ ... )
+
+ >>> mapie_regressor = SplitConformalRegressor(
+ ... estimator=Ridge(),
+ ... confidence_level=0.95,
+ ... prefit=False,
+ ... ).fit(X_train, y_train).conformalize(X_conformalize, y_conformalize)
+
+ >>> predicted_points, predicted_intervals = mapie_regressor.predict_interval(X_test)
+ >>> coverage = regression_coverage_score(y_test, predicted_intervals)[0]
+ """
+ check_arrays_length(y_true, y_intervals)
+ check_array_nan(y_true)
+ check_array_inf(y_true)
+ check_array_nan(y_intervals)
+ check_array_inf(y_intervals)
+
+ y_intervals = check_array_shape_regression(y_true, y_intervals)
+ if len(y_true.shape) != 2:
+ y_true = cast(NDArray, column_or_1d(y_true))
+ y_true = np.expand_dims(y_true, axis=1)
+ coverages = np.mean(
+ np.logical_and(
+ np.less_equal(y_intervals[:, 0, :], y_true),
+ np.greater_equal(y_intervals[:, 1, :], y_true)
+ ),
+ axis=0
+ )
+ return coverages
+
+
+def regression_ssc(
+ y_true: NDArray,
+ y_intervals: NDArray,
+ num_bins: int = 3
+) -> NDArray:
+ """
+ Compute Size-Stratified Coverage metrics proposed in [3] that is
+ the conditional coverage conditioned by the size of the intervals.
+ The intervals are ranked by their size (ascending) and then divided into
+ num_bins groups: one value of coverage by groups is computed.
+
+ Warning: This metric should be used only with non constant intervals
+ (intervals of different sizes), with constant intervals the result
+ may be misinterpreted.
+
+ [3] Angelopoulos, A. N., & Bates, S. (2021).
+ A gentle introduction to conformal prediction and
+ distribution-free uncertainty quantification.
+ arXiv preprint arXiv:2107.07511.
+
+ Parameters
+ ----------
+ y_true: NDArray of shape (n_samples,)
+ True labels.
+ y_intervals: NDArray of shape (n_samples, 2, n_confidence_level) or (n_samples, 2)
+ Prediction intervals given by booleans of labels.
+ num_bins: int n
+ Number of groups. Should be less than the number of different
+ interval widths.
+
+ Returns
+ -------
+ NDArray of shape (n_confidence_level, num_bins)
+
+ Examples
+ --------
+ >>> from mapie.metrics.regression import regression_ssc
+ >>> import numpy as np
+ >>> y_true = np.array([5, 7.5, 9.5])
+ >>> y_intervals = np.array([
+ ... [4, 6],
+ ... [6.0, 9.0],
+ ... [9, 10.0]
+ ... ])
+ >>> print(regression_ssc(y_true, y_intervals, num_bins=2))
+ [[1. 1.]]
+ """
+ y_true = cast(NDArray, column_or_1d(y_true))
+ y_intervals = check_array_shape_regression(y_true, y_intervals)
+ check_number_bins(num_bins)
+ widths = np.abs(y_intervals[:, 1, :] - y_intervals[:, 0, :])
+ check_nb_intervals_sizes(widths, num_bins)
+
+ check_arrays_length(y_true, y_intervals)
+ check_array_nan(y_true)
+ check_array_inf(y_true)
+ check_array_nan(y_intervals)
+ check_array_inf(y_intervals)
+
+ indexes_sorted = np.argsort(widths, axis=0)
+ indexes_bybins = np.array_split(indexes_sorted, num_bins, axis=0)
+ coverages = np.zeros((y_intervals.shape[2], num_bins))
+ for i, indexes in enumerate(indexes_bybins):
+ intervals_binned = np.stack([
+ np.take_along_axis(y_intervals[:, 0, :], indexes, axis=0),
+ np.take_along_axis(y_intervals[:, 1, :], indexes, axis=0)
+ ], axis=1)
+ coverages[:, i] = regression_coverage_score(y_true[indexes], intervals_binned)
+
+ return coverages
+
+
+def regression_ssc_score(
+ y_true: NDArray,
+ y_intervals: NDArray,
+ num_bins: int = 3
+) -> NDArray:
+ """
+ Aggregate by the minimum for each confidence level the Size-Stratified Coverage [3]:
+ returns the maximum violation of the conditional coverage
+ (with the groups defined).
+
+ Warning: This metric should be used only with non constant intervals
+ (intervals of different sizes), with constant intervals the result
+ may be misinterpreted.
+
+ [3] Angelopoulos, A. N., & Bates, S. (2021).
+ A gentle introduction to conformal prediction and
+ distribution-free uncertainty quantification.
+ arXiv preprint arXiv:2107.07511.
+
+ Parameters
+ ----------
+ y_true: NDArray of shape (n_samples,)
+ True labels.
+ y_intervals: NDArray of shape (n_samples, 2, n_confidence_level) or (n_samples, 2)
+ Prediction intervals given by booleans of labels.
+ num_bins: int n
+ Number of groups. Should be less than the number of different
+ interval widths.
+
+ Returns
+ -------
+ NDArray of shape (n_confidence_level,)
+
+ Examples
+ --------
+ >>> from mapie.metrics.regression import regression_ssc_score
+ >>> import numpy as np
+ >>> y_true = np.array([5, 7.5, 9.5])
+ >>> y_intervals = np.array([
+ ... [[4, 4], [6, 7.5]],
+ ... [[6.0, 8], [9.0, 10]],
+ ... [[9, 9], [10.0, 10.0]]
+ ... ])
+ >>> print(regression_ssc_score(y_true, y_intervals, num_bins=2))
+ [1. 0.5]
+ """
+ return np.min(regression_ssc(y_true, y_intervals, num_bins), axis=1)
+
+
+def _gaussian_kernel(
+ x: NDArray,
+ kernel_size: int
+) -> NDArray:
+ """
+ Computes the gaussian kernel of x. (Used in hsic function)
+
+ Parameters
+ ----------
+ x: NDArray
+ The values from which to compute the gaussian kernel.
+ kernel_size: int
+ The variance (sigma), this coefficient controls the width of the curve.
+ """
+ norm_x = x ** 2
+ dist = -2 * np.matmul(x, x.transpose((0, 2, 1))) \
+ + norm_x + norm_x.transpose((0, 2, 1))
+ return np.exp(-dist / kernel_size)
+
+
+def hsic(
+ y_true: NDArray,
+ y_intervals: NDArray,
+ kernel_sizes: ArrayLike = (1, 1)
+) -> NDArray:
+ """
+ Compute the square root of the hsic coefficient. HSIC is Hilbert-Schmidt
+ independence criterion that is a correlation measure. Here we use it as
+ proposed in [4], to compute the correlation between the indicator of
+ coverage and the interval size.
+
+ If hsic is 0, the two variables (the indicator of coverage and the
+ interval size) are independant.
+
+ Warning: This metric should be used only with non constant intervals
+ (intervals of different sizes), with constant intervals the result
+ may be misinterpreted.
+
+ [4] Feldman, S., Bates, S., & Romano, Y. (2021).
+ Improving conditional coverage via orthogonal quantile regression.
+ Advances in Neural Information Processing Systems, 34, 2060-2071.
+
+ Parameters
+ ----------
+ y_true: NDArray of shape (n_samples,)
+ True labels.
+ y_intervals: NDArray of shape (n_samples, 2, n_confidence_level) or (n_samples, 2)
+ Prediction sets given by booleans of labels.
+ kernel_sizes: ArrayLike of size (2,)
+ The variance (sigma) for each variable (the indicator of coverage and
+ the interval size), this coefficient controls the width of the curve.
+
+ Returns
+ -------
+ NDArray of shape (n_confidence_level,)
+ One hsic correlation coefficient by confidence level.
+
+ Raises
+ ------
+ ValueError
+ If kernel_sizes has a length different from 2
+ and if it has negative or null values.
+
+ Examples
+ --------
+ >>> from mapie.metrics.regression import hsic
+ >>> import numpy as np
+ >>> y_true = np.array([9.5, 10.5, 12.5])
+ >>> y_intervals = np.array([
+ ... [[9, 9], [10.0, 10.0]],
+ ... [[8.5, 9], [12.5, 12]],
+ ... [[10.5, 10.5], [12.0, 12]]
+ ... ])
+ >>> print(hsic(y_true, y_intervals))
+ [0.31787614 0.2962914 ]
+ """
+ y_true = cast(NDArray, column_or_1d(y_true))
+ y_intervals = check_array_shape_regression(y_true, y_intervals)
+
+ check_arrays_length(y_true, y_intervals)
+ check_array_nan(y_true)
+ check_array_inf(y_true)
+ check_array_nan(y_intervals)
+ check_array_inf(y_intervals)
+
+ kernel_sizes = cast(NDArray, column_or_1d(kernel_sizes))
+ if len(kernel_sizes) != 2:
+ raise ValueError(
+ "kernel_sizes should be an ArrayLike of length 2"
+ )
+ if (kernel_sizes <= 0).any():
+ raise ValueError(
+ "kernel_size should be positive"
+ )
+ n_samples, _, n_confidence_level = y_intervals.shape
+ y_true_per_alpha = np.tile(y_true, (n_confidence_level, 1)).transpose()
+ widths = np.expand_dims(
+ np.abs(y_intervals[:, 1, :] - y_intervals[:, 0, :]).transpose(),
+ axis=2
+ )
+ cov_ind = np.expand_dims(
+ np.int_(
+ ((y_intervals[:, 0, :] <= y_true_per_alpha) &
+ (y_intervals[:, 1, :] >= y_true_per_alpha))
+ ).transpose(),
+ axis=2
+ )
+
+ k_mat = _gaussian_kernel(widths, kernel_sizes[0])
+ l_mat = _gaussian_kernel(cov_ind, kernel_sizes[1])
+ h_mat = np.eye(n_samples) - 1 / n_samples * np.ones((n_samples, n_samples))
+ hsic_mat = np.matmul(l_mat, np.matmul(h_mat, np.matmul(k_mat, h_mat)))
+ hsic_mat /= ((n_samples - 1) ** 2)
+ coef_hsic = np.sqrt(np.matrix.trace(hsic_mat, axis1=1, axis2=2))
+
+ return coef_hsic
+
+
+def coverage_width_based(
+ y_true: ArrayLike,
+ y_pred_low: ArrayLike,
+ y_pred_up: ArrayLike,
+ eta: float,
+ alpha: float
+) -> float:
+ """
+ Coverage Width-based Criterion (CWC) obtained by the prediction intervals.
+
+ The effective coverage score is a criterion used to evaluate the quality
+ of prediction intervals (PIs) based on their coverage and width.
+
+ Khosravi, Abbas, Saeid Nahavandi, and Doug Creighton.
+ "Construction of optimal prediction intervals for load forecasting
+ problems."
+ IEEE Transactions on Power Systems 25.3 (2010): 1496-1503.
+
+ Parameters
+ ----------
+ Coverage score : float
+ Prediction interval coverage probability (Coverage score), which is
+ the estimated fraction of true labels that lie within the prediction
+ intervals.
+ Mean Width Score : float
+ Prediction interval normalized average width (Mean Width Score),
+ calculated as the average width of the prediction intervals.
+ eta : int
+ A user-defined parameter that balances the contributions of
+ Mean Width Score and Coverage score in the CWC calculation.
+ alpha : float
+ A user-defined parameter representing the designed confidence level of
+ the PI.
+
+ Returns
+ -------
+ float
+ Effective coverage score (CWC) obtained by the prediction intervals.
+
+ Notes
+ -----
+ The effective coverage score (CWC) is calculated using the following
+ formula:
+ CWC = (1 - Mean Width Score) * exp(-eta * (Coverage score - (1-alpha))**2)
+
+ The CWC penalizes under- and overcoverage in the same way and summarizes
+ the quality of the prediction intervals in a single value.
+
+ High Eta (Large Positive Value):
+
+ When eta is a high positive value, it will strongly
+ emphasize the contribution of (1-Mean Width Score). This means that the
+ algorithm will prioritize reducing the average width of the prediction
+ intervals (Mean Width Score) over achieving a high coverage probability
+ (Coverage score). The exponential term np.exp(-eta*(Coverage score -
+ (1-alpha))**2) will have a sharp decline as Coverage score deviates
+ from (1-alpha). So, achieving a high Coverage score becomes less important
+ compared to minimizing Mean Width Score.
+ The impact will be narrower prediction intervals on average, which may
+ result in more precise but less conservative predictions.
+
+ Low Eta (Small Positive Value):
+
+ When eta is a low positive value, it will still
+ prioritize reducing the average width of the prediction intervals
+ (Mean Width Score) but with less emphasis compared to higher
+ eta values.
+ The exponential term will be less steep, meaning that deviations of
+ Coverage score from (1-alpha) will have a moderate impact.
+ You'll get a balance between prediction precision and coverage, but the
+ exact balance will depend on the specific value of eta.
+
+ Negative Eta (Any Negative Value):
+
+ When eta is negative, it will have a different effect on the formula.
+ Negative values of eta will cause the exponential term
+ np.exp(-eta*(Coverage score - (1-alpha))**2) to become larger as
+ Coverage score deviates from (1-alpha). This means that
+ a negative eta prioritizes achieving a high coverage probability
+ (Coverage score) over minimizing Mean Width Score.
+ In this case, the algorithm will aim to produce wider prediction intervals
+ to ensure a higher likelihood of capturing the true values within those
+ intervals, even if it sacrifices precision.
+ Negative eta values might be used in scenarios where avoiding errors or
+ outliers is critical.
+
+ Null Eta (Eta = 0):
+
+ Specifically, when eta is zero, the CWC score becomes equal to
+ (1 - Mean Width Score), which is equivalent to
+ (1 - average width of the prediction intervals).
+ Therefore, in this case, the CWC score is primarily based on the size of
+ the prediction interval.
+
+ Examples
+ --------
+ >>> from mapie.metrics.regression import coverage_width_based
+ >>> import numpy as np
+ >>> y_true = np.array([5, 7.5, 9.5, 10.5, 12.5])
+ >>> y_preds_low = np.array([4, 6, 9, 8.5, 10.5])
+ >>> y_preds_up = np.array([6, 9, 10, 12.5, 12])
+ >>> eta = 0.01
+ >>> alpha = 0.1
+ >>> cwb = coverage_width_based(y_true, y_preds_low, y_preds_up, eta, alpha)
+ >>> print(np.round(cwb ,2))
+ 0.69
+ """
+ y_true = cast(NDArray, column_or_1d(y_true))
+ y_pred_low = cast(NDArray, column_or_1d(y_pred_low))
+ y_pred_up = cast(NDArray, column_or_1d(y_pred_up))
+
+ check_alpha(1-alpha)
+
+ coverage_score = regression_coverage_score(
+ y_true,
+ np.column_stack((y_pred_low, y_pred_up)),
+ )[0]
+ mean_width = regression_mean_width_score(
+ y_pred_low,
+ y_pred_up
+ )
+ ref_length = np.subtract(
+ float(y_true.max()),
+ float(y_true.min())
+ )
+ avg_length = mean_width / ref_length
+
+ cwc = (1-avg_length)*np.exp(-eta*(coverage_score-(1-alpha))**2)
+
+ return float(cwc)
+
+
+def regression_mwi_score(
+ y_true: NDArray,
+ y_pis: NDArray,
+ confidence_level: float
+) -> float:
+ """
+ The Winkler score, proposed by Winkler (1972), is a measure used to
+ evaluate prediction intervals, combining the length of the interval
+ with a penalty that increases proportionally to the distance of an
+ observation outside the interval.
+
+ Parameters
+ ----------
+ y_true: ArrayLike of shape (n_samples,)
+ Ground truth values
+ y_pis: ArrayLike of shape (n_samples, 2, 1)
+ Lower and upper bounds of prediction intervals
+ output from a MAPIE regressor
+ confidence_level: float
+ The value of confidence_level
+
+ Returns
+ -------
+ float
+ The mean Winkler interval score
+
+ References
+ ----------
+ [1] Robert L. Winkler
+ "A Decision-Theoretic Approach to Interval Estimation",
+ Journal of the American Statistical Association,
+ volume 67, pages 187-191 (1972)
+ (https://doi.org/10.1080/01621459.1972.10481224)
+ [2] Tilmann Gneiting and Adrian E Raftery
+ "Strictly Proper Scoring Rules, Prediction, and Estimation",
+ Journal of the American Statistical Association,
+ volume 102, pages 359-378 (2007)
+ (https://doi.org/10.1198/016214506000001437) (Section 6.2)
+ """
+
+ # Undo any possible quantile crossing
+ y_pred_low = np.minimum(y_pis[:, 0, 0], y_pis[:, 1, 0])
+ y_pred_up = np.maximum(y_pis[:, 0, 0], y_pis[:, 1, 0])
+
+ check_arrays_length(y_true, y_pred_low, y_pred_up)
+
+ # Checking for NaN and inf values
+ for array in (y_true, y_pred_low, y_pred_up):
+ check_array_nan(array)
+ check_array_inf(array)
+
+ width = np.sum(y_pred_up) - np.sum(y_pred_low)
+ error_above = np.sum((y_true - y_pred_up)[y_true > y_pred_up])
+ error_below = np.sum((y_pred_low - y_true)[y_true < y_pred_low])
+ total_error = error_above + error_below
+ mwi = (width + total_error * 2 / (1 - confidence_level)) / len(y_true)
+ return mwi
diff --git a/mapie/mondrian.py b/mapie/mondrian.py
index 86c76549f..564816583 100644
--- a/mapie/mondrian.py
+++ b/mapie/mondrian.py
@@ -24,7 +24,7 @@
MapieTimeSeriesRegressor
)
from mapie.utils import check_alpha
-from mapie._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
class MondrianCP(BaseEstimator):
@@ -39,7 +39,7 @@ class MondrianCP(BaseEstimator):
The underlying estimator must be used with `cv='prefit'` and the
conformity score must be one of the following:
- - For `MapieClassifier`: 'lac', 'score', 'cumulated_score', 'aps' or 'topk'
+ - For `MapieClassifier`: 'lac', 'aps' or 'topk'
- For `MapieRegressor`: 'absolute' or 'gamma'
Parameters
@@ -48,7 +48,7 @@ class MondrianCP(BaseEstimator):
The estimator for which the Mondrian method will be applied.
It must be used with `cv='prefit'` and the
conformity score must be one of the following:
- - For `MapieClassifier`: 'lac', 'score', 'cumulated_score', 'aps' or
+ - For `MapieClassifier`: 'lac', 'aps' or
'topk'
- For `MapieRegressor`: 'absolute' or 'gamma'
@@ -101,7 +101,7 @@ class MondrianCP(BaseEstimator):
MapieTimeSeriesRegressor
)
allowed_classification_ncs_str = [
- "lac", "score", "cumulated_score", "aps", "top_k"
+ "lac", "aps", "top_k"
]
allowed_classification_ncs_class = (
LACConformityScore, NaiveConformityScore, APSConformityScore,
@@ -242,8 +242,8 @@ def _check_cv(self):
"""
if not self.mapie_estimator.cv == "prefit":
raise ValueError(
- "Mondrian can only be used if the underlying Mapie" +
- "estimator uses cv='prefit'."
+ "Mondrian can only be used if the underlying" +
+ "estimator is used in prefit mode."
)
def _check_partition_fit(self, X: NDArray, partition: NDArray):
@@ -348,7 +348,7 @@ def _check_estimator(self):
"""
if isinstance(self.mapie_estimator, self.not_allowed_estimators):
raise ValueError(
- "The estimator must be a MapieClassifier or MapieRegressor"
+ "The estimator must be a classifier or regressor"
)
def _check_confomity_score(self):
@@ -371,7 +371,7 @@ def _check_confomity_score(self):
if self.mapie_estimator.method not in \
self.allowed_classification_ncs_str:
raise ValueError(
- "The conformity score for the MapieClassifier must " +
+ "The conformity score for the classifier must " +
f"be one of {self.allowed_classification_ncs_str}"
)
@@ -379,7 +379,7 @@ def _check_confomity_score(self):
if type(self.mapie_estimator.conformity_score) not in \
self.allowed_classification_ncs_class:
raise ValueError(
- "The conformity score for the MapieClassifier must" +
+ "The conformity score for the classifier must" +
f" be one of {self.allowed_classification_ncs_class}"
)
else:
@@ -387,7 +387,7 @@ def _check_confomity_score(self):
if not isinstance(self.mapie_estimator.conformity_score,
self.allowed_regression_ncs):
raise ValueError(
- "The conformity score for the MapieRegressor must " +
+ "The conformity score for the regressor must " +
f"be one of {self.allowed_regression_ncs}"
)
diff --git a/mapie/multi_label_classification.py b/mapie/multi_label_classification.py
index c21a3da16..73ad21b65 100644
--- a/mapie/multi_label_classification.py
+++ b/mapie/multi_label_classification.py
@@ -14,7 +14,7 @@
from sklearn.utils.validation import (_check_y, _num_samples, check_is_fitted,
indexable)
-from ._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
from .control_risk.crc_rcps import find_lambda_star, get_r_hat_plus
from .control_risk.ltt import find_lambda_control_star, ltt_procedure
from .control_risk.risks import compute_risk_precision, compute_risk_recall
@@ -351,7 +351,7 @@ def _check_estimator(
)
if (estimator is None) and (_refit):
estimator = MultiOutputClassifier(
- LogisticRegression(multi_class="multinomial")
+ LogisticRegression()
)
X_train, X_calib, y_train, y_calib = train_test_split(
X,
@@ -362,8 +362,8 @@ def _check_estimator(
estimator.fit(X_train, y_train)
warnings.warn(
"WARNING: To avoid overffiting, X has been splitted"
- + "into X_train and X_calib. The calibraiton will only"
- + "be done on X_calib"
+ + "into X_train and X_conf. The conformalization will only"
+ + "be done on X_conf"
)
return estimator, X_calib, y_calib
diff --git a/mapie/regression/quantile_regression.py b/mapie/regression/quantile_regression.py
index 90654864f..ec62b3026 100644
--- a/mapie/regression/quantile_regression.py
+++ b/mapie/regression/quantile_regression.py
@@ -11,8 +11,7 @@
from sklearn.utils.validation import (_check_y, _num_samples, check_is_fitted,
indexable)
-from mapie._compatibility import np_quantile
-from mapie._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
from mapie.utils import (check_alpha_and_n_samples,
check_defined_variables_predict_cqr,
check_estimator_fit_predict, check_lower_upper_bounds,
@@ -197,13 +196,14 @@ def _check_alpha(
if isinstance(alpha, float):
if np.any(np.logical_or(alpha <= 0, alpha >= 1.0)):
raise ValueError(
- "Invalid alpha. Allowed values are between 0.0 and 1.0."
+ "Invalid confidence_level. "
+ "Allowed values are between 0.0 and 1.0."
)
else:
alpha_np = np.array([alpha / 2, 1 - alpha / 2, 0.5])
else:
raise ValueError(
- "Invalid alpha. Allowed values are float."
+ "Invalid confidence_level. Allowed values are float."
)
return alpha_np
@@ -296,8 +296,7 @@ def _check_estimator(
)
else:
raise ValueError(
- "The base model does not seem to be accepted"
- + " by MapieQuantileRegressor. \n"
+ "The base model is not supported. \n"
"Give a base model among: \n"
f"{self.quantile_estimator_params.keys()} "
"Or, add your base model to"
@@ -415,8 +414,9 @@ def _check_prefit_params(
else:
raise ValueError(
"You need to have provided 3 different estimators, they"
- " need to be preset with alpha values in the following"
- " order [alpha/2, 1 - alpha/2, 0.5]."
+ " need to be preset with alpha values"
+ "(alpha = 1 - confidence_level)"
+ "in the following order [alpha/2, 1 - alpha/2, 0.5]."
)
def fit(
@@ -744,17 +744,17 @@ def predict(
if symmetry:
quantile = np.full(
2,
- np_quantile(
+ np.quantile(
self.conformity_scores_[2], q, method="higher"
)
)
else:
quantile = np.array(
[
- np_quantile(
+ np.quantile(
self.conformity_scores_[0], q, method="higher"
),
- np_quantile(
+ np.quantile(
self.conformity_scores_[1], q, method="higher"
)
]
diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py
index bfca560f6..88963dd39 100644
--- a/mapie/regression/regression.py
+++ b/mapie/regression/regression.py
@@ -10,7 +10,7 @@
from sklearn.utils import check_random_state
from sklearn.utils.validation import _check_y, check_is_fitted, indexable
-from mapie._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
from mapie.conformity_scores import (BaseRegressionScore,
ResidualNormalisedScore)
from mapie.conformity_scores.utils import check_regression_conformity_score
@@ -317,8 +317,7 @@ def _check_agg_function(
type(self.cv).__name__ in self.cv_need_agg_function_
):
raise ValueError(
- "You need to specify an aggregation function when "
- f"cv's type is in {self.cv_need_agg_function_}."
+ "You need to specify an aggregation function."
)
elif agg_function is not None:
return agg_function
@@ -385,8 +384,8 @@ def _check_ensemble(
"""
if ensemble and (self.agg_function is None):
raise ValueError(
- "If ensemble is True, the aggregation function has to be "
- f"in '{self.ensemble_agg_functions_}'."
+ "The aggregation function has to be in "
+ f"{self.ensemble_agg_functions_}."
)
def _check_fit_parameters(
@@ -442,7 +441,7 @@ def _check_fit_parameters(
self.cv not in ["split", "prefit"]:
raise ValueError(
"The ResidualNormalisedScore can be used only with "
- "``cv='split'`` and ``cv='prefit'``"
+ "``SplitConformalRegressor``"
)
X, y = indexable(X, y)
diff --git a/mapie/regression/time_series_regression.py b/mapie/regression/time_series_regression.py
index e4e6f5520..3d74e2fec 100644
--- a/mapie/regression/time_series_regression.py
+++ b/mapie/regression/time_series_regression.py
@@ -8,7 +8,7 @@
from sklearn.model_selection import BaseCrossValidator
from sklearn.utils.validation import check_is_fitted
-from mapie._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
from mapie.conformity_scores import BaseRegressionScore
from mapie.regression import MapieRegressor
from mapie.utils import check_alpha, check_gamma
diff --git a/mapie/subsample.py b/mapie/subsample.py
index 88293bc5e..3bef3c393 100644
--- a/mapie/subsample.py
+++ b/mapie/subsample.py
@@ -9,7 +9,7 @@
from sklearn.utils import check_random_state, resample
from sklearn.utils.validation import _num_samples
-from ._typing import NDArray
+from numpy.typing import NDArray
from .utils import check_n_samples
diff --git a/mapie/tests/test_calibration.py b/mapie/tests/test_calibration.py
index 0057d7fdc..5f3da959d 100644
--- a/mapie/tests/test_calibration.py
+++ b/mapie/tests/test_calibration.py
@@ -16,7 +16,8 @@
from sklearn.preprocessing import OneHotEncoder
from mapie.calibration import MapieCalibrator
-from mapie.metrics import expected_calibration_error, top_label_ece
+from mapie.metrics.calibration import top_label_ece
+from mapie.metrics.calibration import expected_calibration_error
random_state = 20
@@ -59,19 +60,19 @@
results_binary = {
"split": {
"y_score": [
- [0.76226014, np.nan],
- [0.39557708, np.nan],
+ [0.74020596, np.nan],
+ [0.4247601, np.nan],
[np.nan, 0.66666667],
- [0.75506701, np.nan],
+ [0.72980855, np.nan],
[np.nan, 0.66666667],
- [0.81175724, np.nan],
- [0.77294068, np.nan],
- [0.62599563, np.nan],
+ [0.81058943, np.nan],
+ [0.7551083, np.nan],
+ [0.59798388, np.nan],
[np.nan, 0.66666667],
[np.nan, 0.66666667],
],
- "top_label_ece": 0.30562,
- "ece": 0.56657,
+ "top_label_ece": 0.315922,
+ "ece": 0.554227,
},
"prefit": {
"y_score": [
diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py
index 6b9cb502c..319b61610 100644
--- a/mapie/tests/test_classification.py
+++ b/mapie/tests/test_classification.py
@@ -22,12 +22,12 @@
from sklearn.utils.validation import check_is_fitted
from typing_extensions import TypedDict
-from mapie._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
from mapie.classification import MapieClassifier
from mapie.conformity_scores import LACConformityScore
from mapie.conformity_scores.utils import METHOD_SCORE_MAP
from mapie.conformity_scores.sets.utils import check_proba_normalized
-from mapie.metrics import classification_coverage_score
+from mapie.metrics.classification import classification_coverage_score
random_state = 42
@@ -952,7 +952,7 @@ def test_warning_binary_classif(cv: str, method: str) -> None:
random_state=random_state,
)
with pytest.raises(
- ValueError, match=r".*Invalid method for binary target.*"
+ ValueError, match=r".*Invalid conformity score for binary target.*"
):
mapie_clf.fit(X, y)
@@ -968,7 +968,7 @@ def test_binary_classif_same_result() -> None:
random_state=random_state,
)
mapie_predict = mapie_clf.fit(X, y).predict(X)
- lr = LogisticRegression(multi_class="multinomial").fit(X, y)
+ lr = LogisticRegression().fit(X, y)
lr_predict = lr.predict(X)
np.testing.assert_allclose(mapie_predict, lr_predict)
@@ -1000,7 +1000,7 @@ def test_valid_method(method: str) -> None:
)
def test_valid_cv(cv: Any) -> None:
"""Test that valid cv raises no errors."""
- model = LogisticRegression(multi_class="multinomial")
+ model = LogisticRegression()
model.fit(X_toy, y_toy)
mapie_clf = MapieClassifier(
estimator=model, cv=cv, random_state=random_state
@@ -1423,7 +1423,7 @@ def test_results_with_groups() -> None:
)
def test_valid_prediction(alpha: Any) -> None:
"""Test fit and predict."""
- model = LogisticRegression(multi_class="multinomial")
+ model = LogisticRegression()
model.fit(X_toy, y_toy)
mapie_clf = MapieClassifier(
estimator=model, cv="prefit", random_state=random_state
@@ -1665,7 +1665,9 @@ def test_method_error_in_fit(monkeypatch: Any, method: str) -> None:
mapie_clf = MapieClassifier(
method=method, random_state=random_state
)
- with pytest.raises(ValueError, match=r".*Invalid method.*"):
+ with pytest.raises(
+ ValueError, match=r".*(Invalid method.)|(Invalid conformity score.)*"
+ ):
mapie_clf.fit(X_toy, y_toy)
@@ -1835,7 +1837,7 @@ def test_warning_not_all_label_in_calib() -> None:
cv="prefit", random_state=random_state
)
with pytest.warns(
- UserWarning, match=r".*WARNING: your calibration dataset.*"
+ UserWarning, match=r".*WARNING: your conformity dataset.*"
):
mapie_clf.fit(X_mapie, y_mapie)
@@ -1942,23 +1944,6 @@ def test_raise_error_new_class() -> None:
mapie_clf.fit(X, y)
-@pytest.mark.parametrize("method", ["score", "cumulated_score"])
-def test_deprecated_method_warning(method: str) -> None:
- """
- Test that a warning is raised if choose a deprecated method.
- """
- clf = LogisticRegression()
- clf.fit(X_toy, y_toy)
- mapie_clf = MapieClassifier(
- estimator=clf, method=method,
- cv="prefit", random_state=random_state
- )
- with pytest.warns(
- DeprecationWarning, match=r".*WARNING: Deprecated method.*"
- ):
- mapie_clf.fit(X_toy, y_toy)
-
-
def test_fit_parameters_passing() -> None:
"""
Test passing fit parameters, here early stopping at iteration 3.
diff --git a/mapie/tests/test_common.py b/mapie/tests/test_common.py
index 9e4901181..557da1a76 100644
--- a/mapie/tests/test_common.py
+++ b/mapie/tests/test_common.py
@@ -11,7 +11,7 @@
from sklearn.utils.estimator_checks import parametrize_with_checks
from sklearn.utils.validation import check_is_fitted
-from mapie._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
from mapie.classification import MapieClassifier
from mapie.regression import MapieQuantileRegressor, MapieRegressor
@@ -159,7 +159,9 @@ def test_valid_prefit_estimator(
def test_invalid_method(MapieEstimator: BaseEstimator, method: str) -> None:
"""Test that invalid methods raise errors."""
mapie_estimator = MapieEstimator(method=method)
- with pytest.raises(ValueError, match=r".*Invalid method.*"):
+ with pytest.raises(
+ ValueError, match="(Invalid method.)|(Invalid conformity score.)*"
+ ):
mapie_estimator.fit(X_toy, y_toy)
diff --git a/mapie/tests/test_conformity_scores_bounds.py b/mapie/tests/test_conformity_scores_bounds.py
index bd7b9209d..ac30e7a57 100644
--- a/mapie/tests/test_conformity_scores_bounds.py
+++ b/mapie/tests/test_conformity_scores_bounds.py
@@ -5,7 +5,7 @@
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
-from mapie._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
from mapie.conformity_scores import (
AbsoluteConformityScore, BaseRegressionScore, GammaConformityScore,
ResidualNormalisedScore
diff --git a/mapie/tests/test_conformity_scores_sets.py b/mapie/tests/test_conformity_scores_sets.py
index 2e258a160..02b86032a 100644
--- a/mapie/tests/test_conformity_scores_sets.py
+++ b/mapie/tests/test_conformity_scores_sets.py
@@ -5,7 +5,7 @@
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
-from mapie._typing import NDArray
+from numpy.typing import NDArray
from mapie.classification import MapieClassifier
from mapie.conformity_scores import BaseClassificationScore
from mapie.conformity_scores.sets import (
@@ -112,7 +112,9 @@ def test_check_wrong_classification_method(
Test that the function check_classification_conformity_score raises
a ValueError when using a wrong method.
"""
- with pytest.raises(ValueError, match="Invalid method.*"):
+ with pytest.raises(
+ ValueError, match="(Invalid method.)|(Invalid conformity score.)*"
+ ):
check_classification_conformity_score(method=method)
diff --git a/mapie/tests/test_conformity_scores_utils.py b/mapie/tests/test_conformity_scores_utils.py
index 9d07fa8bc..4636d2396 100644
--- a/mapie/tests/test_conformity_scores_utils.py
+++ b/mapie/tests/test_conformity_scores_utils.py
@@ -4,7 +4,7 @@
import pytest
from mapie.conformity_scores.sets.utils import get_true_label_position
-from mapie._typing import NDArray
+from numpy.typing import NDArray
Y_TRUE_PROBA_PLACE = [
[
diff --git a/mapie/tests/test_control_risk.py b/mapie/tests/test_control_risk.py
index 4d2709f76..66eaab09f 100644
--- a/mapie/tests/test_control_risk.py
+++ b/mapie/tests/test_control_risk.py
@@ -7,7 +7,7 @@
import numpy as np
import pytest
-from mapie._typing import NDArray
+from numpy.typing import NDArray
from mapie.control_risk.ltt import find_lambda_control_star, ltt_procedure
from mapie.control_risk.p_values import compute_hoeffdding_bentkus_p_value
from mapie.control_risk.risks import (compute_risk_precision,
@@ -173,13 +173,13 @@ def test_warning_valid_index_empty() -> None:
def test_invalid_alpha_hb() -> None:
"""Test error message when invalid alpha"""
- with pytest.raises(ValueError, match=r".*Invalid alpha"):
+ with pytest.raises(ValueError, match=r".*Invalid confidence_level"):
compute_hoeffdding_bentkus_p_value(r_hat, n, wrong_alpha)
def test_invalid_shape_alpha_hb() -> None:
"""Test error message when invalid alpha shape"""
- with pytest.raises(ValueError, match=r".*Invalid alpha"):
+ with pytest.raises(ValueError, match=r".*Invalid confidence_level"):
compute_hoeffdding_bentkus_p_value(r_hat, n, wrong_alpha_shape)
diff --git a/mapie/tests/test_metrics.py b/mapie/tests/test_metrics.py
index 5607c2b98..b8bc2939e 100644
--- a/mapie/tests/test_metrics.py
+++ b/mapie/tests/test_metrics.py
@@ -8,22 +8,35 @@
from numpy.random import RandomState
from typing_extensions import TypedDict
-from mapie._typing import ArrayLike, NDArray
-from mapie.metrics import (add_jitter, classification_coverage_score,
- classification_coverage_score_v2,
- classification_mean_width_score, classification_ssc,
- classification_ssc_score, coverage_width_based,
- cumulative_differences, expected_calibration_error,
- hsic, kolmogorov_smirnov_cdf,
- kolmogorov_smirnov_p_value,
- kolmogorov_smirnov_statistic, kuiper_cdf,
- kuiper_p_value, kuiper_statistic, length_scale,
- regression_coverage_score,
- regression_coverage_score_v2,
- regression_mean_width_score, regression_mwi_score,
- regression_ssc, regression_ssc_score, sort_xy_by_y,
- spiegelhalter_p_value, spiegelhalter_statistic,
- top_label_ece)
+from numpy.typing import ArrayLike, NDArray
+from mapie.metrics.calibration import (spiegelhalter_p_value)
+from mapie.metrics.calibration import (
+ add_jitter,
+ cumulative_differences,
+ expected_calibration_error,
+ kolmogorov_smirnov_cdf,
+ kolmogorov_smirnov_p_value,
+ kolmogorov_smirnov_statistic,
+ kuiper_cdf,
+ kuiper_p_value,
+ kuiper_statistic,
+ length_scale,
+ sort_xy_by_y,
+ spiegelhalter_statistic,
+ top_label_ece,
+)
+from mapie.metrics.classification import (
+ classification_coverage_score,
+ classification_mean_width_score,
+ classification_coverage_score_v2,
+ classification_ssc, classification_ssc_score,
+)
+from mapie.metrics.regression import (
+ regression_mean_width_score,
+ regression_coverage_score,
+ regression_ssc,
+ regression_ssc_score, hsic, coverage_width_based, regression_mwi_score,
+)
y_toy = np.array([5, 7.5, 9.5, 10.5, 12.5])
y_preds = np.array([
@@ -180,8 +193,6 @@
def test_regression_ypredlow_shape() -> None:
"""Test shape of y_pred_low."""
- with pytest.raises(ValueError, match=r".*y should be a 1d array*"):
- regression_coverage_score(y_toy, y_preds[:, :2], y_preds[:, 2])
with pytest.raises(ValueError, match=r".*y should be a 1d array*"):
regression_mean_width_score(y_preds[:, :2], y_preds[:, 2])
with pytest.raises(ValueError):
@@ -192,8 +203,6 @@ def test_regression_ypredlow_shape() -> None:
def test_regression_ypredup_shape() -> None:
"""Test shape of y_pred_up."""
- with pytest.raises(ValueError, match=r".*y should be a 1d array*"):
- regression_coverage_score(y_toy, y_preds[:, 1], y_preds[:, 1:])
with pytest.raises(ValueError, match=r".*y should be a 1d array*"):
regression_mean_width_score(y_preds[:, :2], y_preds[:, 2])
with pytest.raises(ValueError):
@@ -237,8 +246,6 @@ def test_regression_valid_input_shape() -> None:
def test_regression_same_length() -> None:
"""Test when y_true and y_preds have different lengths."""
- with pytest.raises(ValueError, match=r".*arrays with different len*"):
- regression_coverage_score(y_toy, y_preds[:-1, 1], y_preds[:-1, 2])
with pytest.raises(ValueError, match=r".*y should be a 1d array*"):
regression_mean_width_score(y_preds[:, :2], y_preds[:, 2])
with pytest.raises(ValueError, match=r".*shape mismatch*"):
@@ -255,25 +262,7 @@ def test_regression_same_length() -> None:
def test_regression_toydata_coverage_score() -> None:
"""Test coverage_score for toy data."""
- scr = regression_coverage_score(y_toy, y_preds[:, 1], y_preds[:, 2])
- assert scr == 0.8
-
-
-def test_regression_ytrue_type_coverage_score() -> None:
- """Test that list(y_true) gives right coverage."""
- scr = regression_coverage_score(list(y_toy), y_preds[:, 1], y_preds[:, 2])
- assert scr == 0.8
-
-
-def test_regression_ypredlow_type_coverage_score() -> None:
- """Test that list(y_pred_low) gives right coverage."""
- scr = regression_coverage_score(y_toy, list(y_preds[:, 1]), y_preds[:, 2])
- assert scr == 0.8
-
-
-def test_regression_ypredup_type_coverage_score() -> None:
- """Test that list(y_pred_up) gives right coverage."""
- scr = regression_coverage_score(y_toy, y_preds[:, 1], list(y_preds[:, 2]))
+ scr = regression_coverage_score(y_toy, y_preds[:, 1:])[0]
assert scr == 0.8
@@ -568,27 +557,15 @@ def test_hsic_correlation_value() -> None:
np.testing.assert_allclose(coef, np.array([0.16829506, 0.3052798]))
-def test_regression_coverage_v1andv2() -> None:
- """
- Test that ``regression_coverage_score`` and
- ```regression_coverage_score_v2``` returns the same results
- """
- cov_v1 = regression_coverage_score(
- y_toy, intervals[:, 0, 0], intervals[:, 1, 0]
- )
- cov_v2 = regression_coverage_score_v2(np.expand_dims(y_toy, 1), intervals)
- np.testing.assert_allclose(cov_v1, cov_v2[0])
-
-
-def test_regression_coverage_score_v2_ytrue_valid_shape() -> None:
+def test_regression_coverage_score_ytrue_valid_shape() -> None:
"""Test that no error is raised if y_true has the shape (n_samples,)."""
- regression_coverage_score_v2(y_toy, intervals)
+ regression_coverage_score(y_toy, intervals)
-def test_regression_coverage_score_v2_intervals_invalid_shape() -> None:
+def test_regression_coverage_score_intervals_invalid_shape() -> None:
"""Test that an error is raised if intervals has not the good shape."""
with pytest.raises(ValueError):
- regression_coverage_score_v2(
+ regression_coverage_score(
np.expand_dims(y_toy, 1), intervals[:, 0, 0]
)
@@ -823,5 +800,5 @@ def test_regression_mwi_score() -> None:
alpha = 0.1
- mwi_score = regression_mwi_score(y_true, y_pis, alpha)
+ mwi_score = regression_mwi_score(y_true, y_pis, 1 - alpha)
np.testing.assert_allclose(mwi_score, 82.25, rtol=1e-2)
diff --git a/mapie/tests/test_mondrian.py b/mapie/tests/test_mondrian.py
index a1b3d3bd6..96b9ada64 100644
--- a/mapie/tests/test_mondrian.py
+++ b/mapie/tests/test_mondrian.py
@@ -32,11 +32,6 @@
)
VALID_MAPIE_ESTIMATORS = {
- "classif_score": {
- "estimator": MapieClassifier,
- "task": "classification",
- "kwargs": {"method": "score"}
- },
"classif_lac": {
"estimator": MapieClassifier,
"task": "classification",
@@ -47,11 +42,6 @@
"task": "classification",
"kwargs": {"method": "aps"}
},
- "classif_cumulated_score": {
- "estimator": MapieClassifier,
- "task": "classification",
- "kwargs": {"method": "cumulated_score"}
- },
"classif_topk": {
"estimator": MapieClassifier,
"task": "classification",
@@ -170,7 +160,7 @@
"calibration": LogisticRegression(),
"classification": LogisticRegression(),
"multilabel_classification": MultiOutputClassifier(
- LogisticRegression(multi_class="multinomial")
+ LogisticRegression()
),
"regression": LinearRegression(),
}
@@ -242,7 +232,9 @@ def test_invalid_cv_fails(mapie_estimator_name, non_valid_cv):
estimator=model, cv=non_valid_cv, **mapie_kwargs
)
)
- with pytest.raises(ValueError, match=r".*estimator uses cv='prefit'*"):
+ with pytest.raises(
+ ValueError, match=r".*Mondrian can only be used if the underlying*"
+ ):
mondrian_cp.fit(x, y, partition=partition)
diff --git a/mapie/tests/test_multi_label_classification.py b/mapie/tests/test_multi_label_classification.py
index ad21c027a..a9ec5546d 100644
--- a/mapie/tests/test_multi_label_classification.py
+++ b/mapie/tests/test_multi_label_classification.py
@@ -13,7 +13,7 @@
from sklearn.utils.validation import check_is_fitted
from typing_extensions import TypedDict
-from mapie._typing import NDArray
+from numpy.typing import NDArray
from mapie.multi_label_classification import MapieMultiLabelClassifier
Params = TypedDict(
@@ -421,7 +421,7 @@ def test_results_single_and_multi_jobs(strategy: str) -> None:
def test_valid_prediction(alpha: Any, delta: Any, bound: Any) -> None:
"""Test fit and predict."""
model = MultiOutputClassifier(
- LogisticRegression(multi_class="multinomial")
+ LogisticRegression()
)
model.fit(X_toy, y_toy)
mapie_clf = MapieMultiLabelClassifier(
diff --git a/mapie/tests/test_quantile_regression.py b/mapie/tests/test_quantile_regression.py
index a1791f482..47aa9555c 100644
--- a/mapie/tests/test_quantile_regression.py
+++ b/mapie/tests/test_quantile_regression.py
@@ -18,8 +18,10 @@
from sklearn.utils.validation import check_is_fitted
from typing_extensions import TypedDict
-from mapie._typing import NDArray
-from mapie.metrics import regression_coverage_score
+from numpy.typing import NDArray
+from mapie.metrics.regression import (
+ regression_coverage_score,
+)
from mapie.regression import MapieQuantileRegressor
X_toy = np.array(
@@ -352,7 +354,7 @@ def test_wrong_alphas_types(alphas: float) -> None:
"""Checking for wrong type of alphas"""
with pytest.raises(
ValueError,
- match=r".*Invalid alpha. Allowed values are float.*",
+ match=r".*Invalid confidence_level. Allowed values are float.*",
):
mapie_reg = MapieQuantileRegressor(alpha=alphas)
mapie_reg.fit(X_train, y_train, X_calib=X_calib, y_calib=y_calib)
@@ -363,7 +365,7 @@ def test_wrong_alphas(alphas: float) -> None:
"""Checking for alphas values that are too big according to all value."""
with pytest.raises(
ValueError,
- match=r".*Invalid alpha. Allowed values are between .*",
+ match=r".*Invalid confidence_level. Allowed values are between .*",
):
mapie_reg = MapieQuantileRegressor(alpha=alphas)
mapie_reg.fit(X_train, y_train, X_calib=X_calib, y_calib=y_calib)
@@ -449,7 +451,7 @@ def test_estimators_not_in_list(est: RegressorMixin) -> None:
"""
with pytest.raises(
ValueError,
- match=r".*The base model does not seem to be accepted by.*",
+ match=r".*The base model is not supported.*",
):
mapie_reg = MapieQuantileRegressor(estimator=est)
mapie_reg.fit(
@@ -514,7 +516,7 @@ def test_linear_regression_results(strategy: str) -> None:
_, y_pis = mapie.predict(X)
y_pred_low, y_pred_up = y_pis[:, 0, 0], y_pis[:, 1, 0]
width_mean = (y_pred_up - y_pred_low).mean()
- coverage = regression_coverage_score(y, y_pred_low, y_pred_up)
+ coverage = regression_coverage_score(y, y_pis)[0]
np.testing.assert_allclose(width_mean, WIDTHS[strategy], rtol=1e-2)
np.testing.assert_allclose(coverage, COVERAGES[strategy], rtol=1e-2)
diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py
index 1a7abc7c5..7dca5368e 100644
--- a/mapie/tests/test_regression.py
+++ b/mapie/tests/test_regression.py
@@ -24,14 +24,16 @@
from sklearn.utils.validation import check_is_fitted
from typing_extensions import TypedDict
-from mapie._typing import NDArray
+from numpy.typing import NDArray
from mapie.aggregation_functions import aggregate_all
from mapie.conformity_scores import (
AbsoluteConformityScore, BaseRegressionScore, GammaConformityScore,
ResidualNormalisedScore
)
from mapie.estimator.regressor import EnsembleRegressor
-from mapie.metrics import regression_coverage_score
+from mapie.metrics.regression import (
+ regression_coverage_score,
+)
from mapie.regression import MapieRegressor
from mapie.subsample import Subsample
@@ -232,7 +234,10 @@ def test_invalid_agg_function(agg_function: Any) -> None:
mapie_reg.fit(X_toy, y_toy)
mapie_reg = MapieRegressor(agg_function=None)
- with pytest.raises(ValueError, match=r".*If ensemble is True*"):
+ with pytest.raises(
+ ValueError,
+ match=r".*aggregation function has to be in ['median', 'mean']*"
+ ):
mapie_reg.fit(X_toy, y_toy)
mapie_reg.predict(X_toy, ensemble=True)
@@ -309,8 +314,7 @@ def test_coverage_validity(delta: float, n_calib: int) -> None:
Xc, Xt, yc, yt = train_test_split(Xct, yct, test_size=n_test)
mapie_reg.fit(Xc, yc)
_, y_pis = mapie_reg.predict(Xt, alpha=1-delta)
- y_low, y_up = y_pis[:, 0, 0], y_pis[:, 1, 0]
- coverage = regression_coverage_score(yt, y_low, y_up)
+ coverage = regression_coverage_score(yt, y_pis)[0]
cov_list.append(coverage)
# Here we are testing whether the average coverage is statistically
@@ -414,33 +418,6 @@ def test_calibration_data_size_asymmetric_score(delta: float) -> None:
mapie_reg.predict(Xt, alpha=1-delta)
-def test_same_results_prefit_split() -> None:
- """
- Test checking that if split and prefit method have exactly
- the same data split, then we have exactly the same results.
- """
- X, y = make_regression(
- n_samples=500, n_features=10, noise=1.0, random_state=1
- )
- cv = ShuffleSplit(n_splits=1, test_size=0.1, random_state=random_state)
- train_index, val_index = list(cv.split(X))[0]
- X_train, X_calib = X[train_index], X[val_index]
- y_train, y_calib = y[train_index], y[val_index]
-
- mapie_reg = MapieRegressor(method='base', cv=cv)
- mapie_reg.fit(X, y)
- y_pred_1, y_pis_1 = mapie_reg.predict(X, alpha=0.1)
-
- model = LinearRegression().fit(X_train, y_train)
- mapie_reg = MapieRegressor(estimator=model, method='base', cv="prefit")
- mapie_reg.fit(X_calib, y_calib)
- y_pred_2, y_pis_2 = mapie_reg.predict(X, alpha=0.1)
-
- np.testing.assert_allclose(y_pred_1, y_pred_2)
- np.testing.assert_allclose(y_pis_1[:, 0, 0], y_pis_2[:, 0, 0])
- np.testing.assert_allclose(y_pis_1[:, 1, 0], y_pis_2[:, 1, 0])
-
-
@pytest.mark.parametrize("strategy", [*STRATEGIES])
def test_results_for_same_alpha(strategy: str) -> None:
"""
@@ -644,7 +621,7 @@ def test_linear_regression_results(strategy: str) -> None:
_, y_pis = mapie.predict(X, alpha=0.05)
y_pred_low, y_pred_up = y_pis[:, 0, 0], y_pis[:, 1, 0]
width_mean = (y_pred_up - y_pred_low).mean()
- coverage = regression_coverage_score(y, y_pred_low, y_pred_up)
+ coverage = regression_coverage_score(y, y_pis)[0]
np.testing.assert_allclose(width_mean, WIDTHS[strategy], rtol=1e-2)
np.testing.assert_allclose(coverage, COVERAGES[strategy], rtol=1e-2)
@@ -674,7 +651,7 @@ def test_results_prefit_naive() -> None:
mapie_reg.fit(X, y)
_, y_pis = mapie_reg.predict(X, alpha=0.05)
width_mean = (y_pis[:, 1, 0] - y_pis[:, 0, 0]).mean()
- coverage = regression_coverage_score(y, y_pis[:, 0, 0], y_pis[:, 1, 0])
+ coverage = regression_coverage_score(y, y_pis)[0]
np.testing.assert_allclose(width_mean, WIDTHS["naive"], rtol=1e-2)
np.testing.assert_allclose(coverage, COVERAGES["naive"], rtol=1e-2)
@@ -689,9 +666,7 @@ def test_results_prefit() -> None:
mapie_reg.fit(X_calib, y_calib)
_, y_pis = mapie_reg.predict(X_calib, alpha=0.05)
width_mean = (y_pis[:, 1, 0] - y_pis[:, 0, 0]).mean()
- coverage = regression_coverage_score(
- y_calib, y_pis[:, 0, 0], y_pis[:, 1, 0]
- )
+ coverage = regression_coverage_score(y_calib, y_pis)[0]
np.testing.assert_allclose(width_mean, WIDTHS["prefit"], rtol=1e-2)
np.testing.assert_allclose(coverage, COVERAGES["prefit"], rtol=1e-2)
@@ -739,7 +714,7 @@ def test_aggregate_with_mask_with_prefit() -> None:
mapie_reg = mapie_reg.fit(X, y)
with pytest.raises(
ValueError,
- match=r".*There should not be aggregation of predictions if cv is*",
+ match=r".*There should not be aggregation of predictions.*",
):
mapie_reg.estimator_._aggregate_with_mask(k, k)
@@ -758,7 +733,7 @@ def test_aggregate_with_mask_with_invalid_agg_function() -> None:
ens_reg.use_split_method_ = False
with pytest.raises(
ValueError,
- match=r".*The value of self.agg_function is not correct*",
+ match=r".*The value of the aggregation function is not correct*",
):
ens_reg._aggregate_with_mask(k, k)
diff --git a/mapie/tests/test_time_series_regression.py b/mapie/tests/test_time_series_regression.py
index 77e4607b4..528c84323 100644
--- a/mapie/tests/test_time_series_regression.py
+++ b/mapie/tests/test_time_series_regression.py
@@ -10,10 +10,12 @@
from sklearn.utils.estimator_checks import check_estimator
from typing_extensions import TypedDict
-from mapie._typing import NDArray
+from numpy.typing import NDArray
from mapie.aggregation_functions import aggregate_all
from mapie.conformity_scores import AbsoluteConformityScore
-from mapie.metrics import regression_coverage_score
+from mapie.metrics.regression import (
+ regression_coverage_score,
+)
from mapie.regression import MapieTimeSeriesRegressor
from mapie.subsample import BlockBootstrap
@@ -133,7 +135,7 @@ def test_invalid_agg_function(agg_function: Any) -> None:
mapie_reg.fit(X_toy, y_toy)
mapie_reg = MapieTimeSeriesRegressor(agg_function=None)
- with pytest.raises(ValueError, match=r".*If ensemble is True*"):
+ with pytest.raises(ValueError, match=r".*The aggregation function*"):
mapie_reg.fit(X_toy, y_toy)
mapie_reg.predict(X_toy, ensemble=True)
@@ -284,7 +286,7 @@ def test_linear_regression_results(strategy: str) -> None:
y_pred_low, y_pred_up = y_pis[:, 0, 0], y_pis[:, 1, 0]
width_mean = (y_pred_up - y_pred_low).mean()
- coverage = regression_coverage_score(y, y_pred_low, y_pred_up)
+ coverage = regression_coverage_score(y, y_pis)[0]
np.testing.assert_allclose(width_mean, WIDTHS[strategy], rtol=1e-2)
np.testing.assert_allclose(coverage, COVERAGES[strategy], rtol=1e-2)
@@ -304,9 +306,7 @@ def test_results_prefit() -> None:
mapie_ts_reg.fit(X_val, y_val)
_, y_pis = mapie_ts_reg.predict(X_test, alpha=0.05)
width_mean = (y_pis[:, 1, 0] - y_pis[:, 0, 0]).mean()
- coverage = regression_coverage_score(
- y_test, y_pis[:, 0, 0], y_pis[:, 1, 0]
- )
+ coverage = regression_coverage_score(y_test, y_pis)[0]
np.testing.assert_allclose(width_mean, WIDTHS["prefit"], rtol=1e-2)
np.testing.assert_allclose(coverage, COVERAGES["prefit"], rtol=1e-2)
@@ -399,7 +399,7 @@ def test_MapieTimeSeriesRegressor_beta_optimize_error() -> None:
cv=-1, conformity_score=AbsoluteConformityScore(sym=True)
).fit(X_toy, y_toy)
with pytest.raises(
- ValueError, match=r"Beta optimisation cannot be used*"
+ ValueError, match=r"Interval width minimization cannot be used*"
):
mapie_ts_reg.predict(X_toy, alpha=0.4, optimize_beta=True)
@@ -423,9 +423,7 @@ def test_interval_prediction_with_beta_optimize() -> None:
mapie_ts_reg.update(X_val, y_val)
_, y_pis = mapie_ts_reg.predict(X_test, alpha=0.05, optimize_beta=True)
width_mean = (y_pis[:, 1, 0] - y_pis[:, 0, 0]).mean()
- coverage = regression_coverage_score(
- y_test, y_pis[:, 0, 0], y_pis[:, 1, 0]
- )
+ coverage = regression_coverage_score(y_test, y_pis)[0]
np.testing.assert_allclose(width_mean, 3.67, rtol=1e-2)
np.testing.assert_allclose(coverage, 0.916, rtol=1e-2)
diff --git a/mapie/tests/test_utils.py b/mapie/tests/test_utils.py
index 6249e2335..057ee0335 100644
--- a/mapie/tests/test_utils.py
+++ b/mapie/tests/test_utils.py
@@ -13,7 +13,7 @@
ShuffleSplit)
from sklearn.utils.validation import check_is_fitted
-from mapie._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
from mapie.regression import MapieQuantileRegressor
from mapie.utils import (check_alpha, check_alpha_and_n_samples,
check_array_inf, check_array_nan, check_arrays_length,
@@ -131,7 +131,7 @@ def test_fit_estimator_sample_weight() -> None:
@pytest.mark.parametrize("alpha", [-1, 0, 1, 2, 2.5, "a", ["a", "b"]])
def test_invalid_alpha(alpha: Any) -> None:
"""Test that invalid alphas raise errors."""
- with pytest.raises(ValueError, match=r".*Invalid alpha.*"):
+ with pytest.raises(ValueError, match=r".*Invalid confidence_level.*"):
check_alpha(alpha=alpha)
diff --git a/mapie/utils.py b/mapie/utils.py
index 037707fae..4a8b6d03b 100644
--- a/mapie/utils.py
+++ b/mapie/utils.py
@@ -15,8 +15,7 @@
from sklearn.utils.validation import (_check_sample_weight, _num_features,
check_is_fitted, column_or_1d)
-from ._compatibility import np_quantile
-from ._typing import ArrayLike, NDArray
+from numpy.typing import ArrayLike, NDArray
SPLIT_STRATEGIES = ["uniform", "quantile", "array split"]
@@ -76,6 +75,8 @@ def check_null_weight(
return sample_weight, X, y
+# TODO back-end: this will be useless in v1 because we'll not distinguish
+# sample_weight from other fit_params
def fit_estimator(
estimator: Union[RegressorMixin, ClassifierMixin],
X: ArrayLike,
@@ -193,7 +194,8 @@ def check_cv(
else:
raise ValueError(
"Invalid cv argument. "
- "Allowed integer values are -1 or int >= 2."
+ "Allowed integer values are -1 or int >= 2, "
+ "or a suitable BaseCrossValidator object"
)
elif isinstance(cv, BaseCrossValidator):
return cv
@@ -208,8 +210,8 @@ def check_cv(
else:
raise ValueError(
"Invalid cv argument. "
- "Allowed values are None, -1, int >= 2, 'prefit', 'split', "
- "or a BaseCrossValidator object (Kfold, LeaveOneOut)."
+ "Allowed values are -1, int >= 2, "
+ "or a suitable BaseCrossValidator object"
)
@@ -315,19 +317,21 @@ def check_alpha(
alpha_np = np.array(alpha)
else:
raise ValueError(
- "Invalid alpha. Allowed values are float or Iterable."
+ "Invalid confidence_level or alpha. Allowed values are float or Iterable."
)
if len(alpha_np.shape) != 1:
raise ValueError(
- "Invalid alpha."
+ "Invalid confidence_level or alpha. "
"Please provide a one-dimensional list of values."
)
if alpha_np.dtype.type not in [np.float64, np.float32]:
raise ValueError(
- "Invalid alpha. Allowed values are Iterable of floats."
+ "Invalid confidence_level or alpha. Allowed values are Iterable of floats."
)
if np.any(np.logical_or(alpha_np < 0, alpha_np > 1)):
- raise ValueError("Invalid alpha. Allowed values are between 0 and 1.")
+ raise ValueError(
+ "Invalid confidence_level or alpha. Allowed values are between 0 and 1."
+ )
return alpha_np
@@ -429,7 +433,7 @@ def get_effective_calibration_samples(scores: NDArray, sym: bool):
n: int
The effective number of calibration samples.
"""
- n = np.sum(~np.isnan(scores))
+ n: int = np.sum(~np.isnan(scores))
if not sym:
n //= 2
return n
@@ -467,16 +471,19 @@ def check_alpha_and_n_samples(
... print(exception)
...
Number of samples of the score is too low,
- 1/alpha (or 1/(1 - alpha)) must be lower than the number of samples.
+ 1/confidence_level and 1/(1 - confidence_level) must be
+ lower than the number of samples.
"""
if isinstance(alphas, float):
- alphas = np.array([alphas])
- for alpha in alphas:
+ alphas_: Iterable[float] = [alphas]
+ else:
+ alphas_ = alphas
+ for alpha in alphas_:
if n < np.max([1/alpha, 1/(1-alpha)]):
raise ValueError(
"Number of samples of the score is too low,\n"
- "1/alpha (or 1/(1 - alpha)) must be lower "
- "than the number of samples."
+ "1/confidence_level and 1/(1 - confidence_level) must be\n"
+ "lower than the number of samples."
)
@@ -685,8 +692,8 @@ def check_alpha_and_last_axis(vector: NDArray, alpha_np: NDArray):
"""
if len(alpha_np) != vector.shape[2]:
raise ValueError(
- "In case of the vector has 3 dimensions, the dimension\n"
- + "of his last axis must be equal to the number of alphas"
+ "In case of the vector has 3 dimensions, the dimension of its"
+ + "last axis must be equal to the number of confidence levels"
)
else:
return vector, alpha_np
@@ -713,7 +720,7 @@ def compute_quantiles(vector: NDArray, alpha: NDArray) -> NDArray:
if len(vector.shape) <= 2:
quantiles_ = np.stack(
[
- np_quantile(
+ np.quantile(
vector,
((n + 1) * (1 - _alpha)) / n,
method="higher",
@@ -843,7 +850,7 @@ def check_estimator_classification(
If the estimator is not fitted and ``cv`` attribute is "prefit".
"""
if estimator is None:
- return LogisticRegression(multi_class="multinomial").fit(X, y)
+ return LogisticRegression().fit(X, y)
if isinstance(estimator, Pipeline):
est = estimator[-1]
@@ -1131,7 +1138,7 @@ def check_array_shape_classification(
if len(y_pred_set.shape) != 2:
raise ValueError(
"y_pred_set should be a 3D array of shape \
- (n_obs, n_classes, n_alpha)"
+ (n_obs, n_classes, n_confidence_levels)"
)
else:
y_pred_set = np.expand_dims(y_pred_set, axis=2)
@@ -1167,7 +1174,8 @@ def check_array_shape_regression(
if len(y_intervals.shape) != 3:
if len(y_intervals.shape) != 2:
raise ValueError(
- "y_intervals should be a 3D array of shape (n_obs, 2, n_alpha)"
+ "y_intervals should be a 3D array of shape"
+ " (n_obs, 2, n_confidence_levels)"
)
else:
y_intervals = np.expand_dims(y_intervals, axis=2)
diff --git a/mapie_v1/__init__.py b/mapie_v1/__init__.py
new file mode 100644
index 000000000..afe5cfcf3
--- /dev/null
+++ b/mapie_v1/__init__.py
@@ -0,0 +1,8 @@
+from . import classification, regression
+from ._version import __version__
+
+__all__ = [
+ "regression",
+ "classification",
+ "__version__"
+]
diff --git a/mapie_v1/_version.py b/mapie_v1/_version.py
new file mode 100644
index 000000000..70ce1d69a
--- /dev/null
+++ b/mapie_v1/_version.py
@@ -0,0 +1 @@
+__version__ = "1.0.0-alpha1"
diff --git a/mapie_v1/classification.py b/mapie_v1/classification.py
new file mode 100644
index 000000000..e525bab74
--- /dev/null
+++ b/mapie_v1/classification.py
@@ -0,0 +1,391 @@
+from __future__ import annotations
+
+from typing import Optional, Union, Tuple, Iterable
+from typing_extensions import Self
+
+import numpy as np
+from sklearn.base import ClassifierMixin, clone
+from sklearn.model_selection import BaseCrossValidator
+from sklearn.linear_model import LogisticRegression
+
+from numpy.typing import ArrayLike, NDArray
+from mapie.classification import MapieClassifier
+from mapie.conformity_scores import BaseClassificationScore
+from mapie_v1.utils import (
+ transform_confidence_level_to_alpha_list,
+ prepare_params,
+ cast_predictions_to_ndarray_tuple,
+ cast_point_predictions_to_ndarray,
+ raise_error_if_previous_method_not_called,
+ raise_error_if_method_already_called,
+ raise_error_if_fit_called_in_prefit_mode,
+ check_cv_not_string,
+ prepare_fit_params_and_sample_weight,
+)
+from mapie_v1.conformity_scores._utils import check_and_select_conformity_score
+
+
+class SplitConformalClassifier:
+ """
+ Computes prediction sets using the split conformal classification technique:
+
+ 1. The ``fit`` method (optional) fits the base classifier to the training data.
+ 2. The ``conformalize`` method estimates the uncertainty of the base classifier by
+ computing conformity scores on the conformity set.
+ 3. The ``predict_set`` method predicts labels and sets of labels.
+
+ Parameters
+ ----------
+ estimator : ClassifierMixin, default=LogisticRegression()
+ The base classifier used to predict labels.
+
+ confidence_level : Union[float, List[float]], default=0.9
+ The confidence level(s) for the prediction sets, indicating the
+ desired coverage probability of the prediction sets. If a float is
+ provided, it represents a single confidence level. If a list, multiple
+ prediction sets for each specified confidence level are returned.
+
+ conformity_score : Union[str, BaseClassificationScore], default="lac"
+ The method used to compute conformity scores.
+
+ Valid options:
+
+ - "lac"
+ - "top_k"
+ - "aps"
+ - "raps"
+ - Any subclass of BaseClassificationScore
+
+ A custom score function inheriting from BaseClassificationScore may also
+ be provided.
+
+ See :ref:`theoretical_description_classification`.
+
+ prefit : bool, default=False
+ If True, the base classifier must be fitted, and the ``fit``
+ method must be skipped.
+
+ If False, the base classifier will be fitted during the ``fit`` method.
+
+ n_jobs : Optional[int], default=None
+ The number of jobs to run in parallel when applicable.
+
+ verbose : int, default=0
+ Controls the verbosity level.
+ Higher values increase the output details.
+
+ Examples
+ --------
+ >>> from mapie_v1.classification import SplitConformalClassifier
+ >>> from mapie_v1.utils import train_conformalize_test_split
+ >>> from sklearn.datasets import make_classification
+ >>> from sklearn.neighbors import KNeighborsClassifier
+
+ >>> X, y = make_classification(n_samples=500)
+ >>> (
+ ... X_train, X_conformalize, X_test,
+ ... y_train, y_conformalize, y_test
+ ... ) = train_conformalize_test_split(
+ ... X, y, train_size=0.6, conformalize_size=0.2, test_size=0.2, random_state=1
+ ... )
+
+ >>> mapie_classifier = SplitConformalClassifier(
+ ... estimator=KNeighborsClassifier(),
+ ... confidence_level=0.95,
+ ... prefit=False,
+ ... ).fit(X_train, y_train).conformalize(X_conformalize, y_conformalize)
+
+ >>> predicted_labels, predicted_sets = mapie_classifier.predict_set(X_test)
+ """
+
+ def __init__(
+ self,
+ estimator: ClassifierMixin = LogisticRegression(),
+ confidence_level: Union[float, Iterable[float]] = 0.9,
+ conformity_score: Union[str, BaseClassificationScore] = "lac",
+ prefit: bool = True,
+ n_jobs: Optional[int] = None,
+ verbose: int = 0,
+ random_state: Optional[Union[int, np.random.RandomState]] = None,
+ ) -> None:
+ self._estimator = estimator
+ self._alphas = transform_confidence_level_to_alpha_list(
+ confidence_level
+ )
+ self._conformity_score = check_and_select_conformity_score(
+ conformity_score,
+ BaseClassificationScore
+ )
+ self._prefit = prefit
+ self._is_fitted = prefit
+ self._is_conformalized = False
+
+ # Note to developers: to implement this v1 class without touching the
+ # v0 backend, we're for now using a hack. We always set cv="prefit",
+ # and we fit the estimator if needed. See the .fit method below.
+ self._mapie_classifier = MapieClassifier(
+ estimator=self._estimator,
+ cv="prefit",
+ n_jobs=n_jobs,
+ verbose=verbose,
+ conformity_score=self._conformity_score,
+ random_state=random_state,
+ )
+ self._predict_params: dict = {}
+
+ def fit(
+ self,
+ X_train: ArrayLike,
+ y_train: ArrayLike,
+ fit_params: Optional[dict] = None,
+ ) -> Self:
+ """
+ Fits the base classifier to the training data.
+
+ Parameters
+ ----------
+ X_train : ArrayLike
+ Training data features.
+
+ y_train : ArrayLike
+ Training data targets.
+
+ fit_params : Optional[dict], default=None
+ Parameters to pass to the ``fit`` method of the base classifier.
+
+ Returns
+ -------
+ Self
+ The fitted SplitConformalClassifier instance.
+ """
+ raise_error_if_fit_called_in_prefit_mode(self._prefit)
+ raise_error_if_method_already_called("fit", self._is_fitted)
+
+ cloned_estimator = clone(self._estimator)
+ fit_params_ = prepare_params(fit_params)
+ cloned_estimator.fit(X_train, y_train, **fit_params_)
+ self._mapie_classifier.estimator = cloned_estimator
+
+ self._is_fitted = True
+ return self
+
+ def conformalize(
+ self,
+ X_conformalize: ArrayLike,
+ y_conformalize: ArrayLike,
+ predict_params: Optional[dict] = None,
+ ) -> Self:
+ """
+ Estimates the uncertainty of the base classifier by computing
+ conformity scores on the conformity set.
+
+ Parameters
+ ----------
+ X_conformalize : ArrayLike
+ Features of the conformity set.
+
+ y_conformalize : ArrayLike
+ Targets of the conformity set.
+
+ predict_params : Optional[dict], default=None
+ Parameters to pass to the ``predict`` and ``predict_proba`` methods
+ of the base classifier. These parameters will also be used in the
+ ``predict_set`` and ``predict`` methods of this SplitConformalClassifier.
+
+ Returns
+ -------
+ Self
+ The conformalized SplitConformalClassifier instance.
+ """
+ raise_error_if_previous_method_not_called(
+ "conformalize",
+ "fit",
+ self._is_fitted,
+ )
+ raise_error_if_method_already_called(
+ "conformalize",
+ self._is_conformalized,
+ )
+
+ self._predict_params = prepare_params(predict_params)
+ self._mapie_classifier.fit(
+ X_conformalize,
+ y_conformalize,
+ predict_params=self._predict_params,
+ )
+
+ self._is_conformalized = True
+ return self
+
+ def predict_set(
+ self,
+ X: ArrayLike,
+ conformity_score_params: Optional[dict] = None,
+ ) -> Tuple[NDArray, NDArray]:
+ """
+ For each sample in X, returns the predicted label and a set of labels.
+
+ If several confidence levels were provided during initialisation, several
+ sets will be predicted for each sample. See the return signature.
+
+ Parameters
+ ----------
+ X : ArrayLike
+ Features
+
+ conformity_score_params : dict, default=None
+ Parameters specific to conformity scores, used at prediction time.
+
+ The only example for now is ``include_last_label``, available for `aps`
+ and `raps` conformity scores. For detailed information on
+ ``include_last_label``, see the docstring of
+ :meth:`conformity_scores.sets.aps.APSConformityScore.get_prediction_sets`.
+
+ Returns
+ -------
+ Tuple[NDArray, NDArray]
+ Two arrays:
+
+ - Prediction labels, of shape ``(n_samples,)``
+ - Prediction sets, of shape ``(n_samples, n_class, n_confidence_levels)``
+ """
+ raise_error_if_previous_method_not_called(
+ "predict_set",
+ "conformalize",
+ self._is_conformalized,
+ )
+ conformity_score_params_ = prepare_params(conformity_score_params)
+ predictions = self._mapie_classifier.predict(
+ X,
+ alpha=self._alphas,
+ include_last_label=conformity_score_params_.get("include_last_label", True),
+ **self._predict_params,
+ )
+ return cast_predictions_to_ndarray_tuple(predictions)
+
+ def predict(self, X: ArrayLike) -> NDArray:
+ """
+ For each sample in X, returns the predicted label
+
+ Parameters
+ ----------
+ X : ArrayLike
+ Features
+
+ Returns
+ -------
+ NDArray
+ Array of predicted labels, with shape (n_samples,).
+ """
+ raise_error_if_previous_method_not_called(
+ "predict",
+ "conformalize",
+ self._is_conformalized,
+ )
+ predictions = self._mapie_classifier.predict(
+ X,
+ alpha=None,
+ **self._predict_params,
+ )
+ return cast_point_predictions_to_ndarray(predictions)
+
+
+class CrossConformalClassifier:
+ def __init__(
+ self,
+ estimator: ClassifierMixin = LogisticRegression(),
+ confidence_level: Union[float, Iterable[float]] = 0.9,
+ conformity_score: Union[str, BaseClassificationScore] = "lac",
+ cv: Union[int, BaseCrossValidator] = 5,
+ n_jobs: Optional[int] = None,
+ verbose: int = 0,
+ random_state: Optional[Union[int, np.random.RandomState]] = None,
+ ) -> None:
+ """
+ All except raps & top-k
+ """
+ check_cv_not_string(cv)
+
+ self._mapie_classifier = MapieClassifier(
+ estimator=estimator,
+ cv=cv,
+ n_jobs=n_jobs,
+ verbose=verbose,
+ conformity_score=check_and_select_conformity_score(
+ conformity_score,
+ BaseClassificationScore,
+ ),
+ random_state=random_state,
+ )
+
+ self._alphas = transform_confidence_level_to_alpha_list(
+ confidence_level
+ )
+ self.is_fitted_and_conformalized = False
+
+ self._predict_params: dict = {}
+
+ def fit_conformalize(
+ self,
+ X: ArrayLike,
+ y: ArrayLike,
+ groups: Optional[ArrayLike] = None,
+ fit_params: Optional[dict] = None,
+ predict_params: Optional[dict] = None,
+ ) -> Self:
+ raise_error_if_method_already_called(
+ "fit_conformalize",
+ self.is_fitted_and_conformalized,
+ )
+
+ fit_params_, sample_weight = prepare_fit_params_and_sample_weight(
+ fit_params
+ )
+ self._predict_params = prepare_params(predict_params)
+ self._mapie_classifier.fit(
+ X=X,
+ y=y,
+ sample_weight=sample_weight,
+ groups=groups,
+ fit_params=fit_params_,
+ predict_params=self._predict_params
+ )
+
+ self.is_fitted_and_conformalized = True
+ return self
+
+ def predict_set(
+ self,
+ X: ArrayLike,
+ conformity_score_params: Optional[dict] = None,
+ agg_scores: str = "mean",
+ ) -> Tuple[NDArray, NDArray]:
+ """
+ Shape: (n, ), (n, n_class, n_confidence_level)
+ """
+ raise_error_if_previous_method_not_called(
+ "predict_set",
+ "fit_conformalize",
+ self.is_fitted_and_conformalized,
+ )
+
+ conformity_score_params_ = prepare_params(conformity_score_params)
+ predictions = self._mapie_classifier.predict(
+ X,
+ alpha=self._alphas,
+ include_last_label=conformity_score_params_.get("include_last_label", True),
+ agg_scores=agg_scores,
+ **self._predict_params,
+ )
+ return cast_predictions_to_ndarray_tuple(predictions)
+
+ def predict(self, X: ArrayLike) -> NDArray:
+ raise_error_if_previous_method_not_called(
+ "predict",
+ "fit_conformalize",
+ self.is_fitted_and_conformalized,
+ )
+ predictions = self._mapie_classifier.predict(
+ X, alpha=None, **self._predict_params,
+ )
+ return cast_point_predictions_to_ndarray(predictions)
diff --git a/mapie_v1/conformity_scores/__init__.py b/mapie_v1/conformity_scores/__init__.py
new file mode 100644
index 000000000..f93d23e45
--- /dev/null
+++ b/mapie_v1/conformity_scores/__init__.py
@@ -0,0 +1,25 @@
+from mapie.conformity_scores import (
+ AbsoluteConformityScore,
+ GammaConformityScore,
+ ResidualNormalisedScore,
+ LACConformityScore,
+ TopKConformityScore,
+ APSConformityScore,
+ RAPSConformityScore,
+ BaseRegressionScore,
+ BaseClassificationScore,
+)
+
+CONFORMITY_SCORES_STRING_MAP = {
+ BaseRegressionScore: {
+ "absolute": AbsoluteConformityScore,
+ "gamma": GammaConformityScore,
+ "residual_normalized": ResidualNormalisedScore,
+ },
+ BaseClassificationScore: {
+ "lac": LACConformityScore,
+ "top_k": TopKConformityScore,
+ "aps": APSConformityScore,
+ "raps": RAPSConformityScore,
+ },
+}
diff --git a/mapie_v1/conformity_scores/_utils.py b/mapie_v1/conformity_scores/_utils.py
new file mode 100644
index 000000000..ad241546b
--- /dev/null
+++ b/mapie_v1/conformity_scores/_utils.py
@@ -0,0 +1,12 @@
+from typing import no_type_check
+from . import CONFORMITY_SCORES_STRING_MAP
+
+
+@no_type_check # Cumbersome to type
+def check_and_select_conformity_score(conformity_score, conformity_score_type):
+ if isinstance(conformity_score, conformity_score_type):
+ return conformity_score
+ elif conformity_score in CONFORMITY_SCORES_STRING_MAP[conformity_score_type]:
+ return CONFORMITY_SCORES_STRING_MAP[conformity_score_type][conformity_score]()
+ else:
+ raise ValueError("Invalid conformity_score parameter")
diff --git a/mapie_v1/regression.py b/mapie_v1/regression.py
new file mode 100644
index 000000000..0c9742791
--- /dev/null
+++ b/mapie_v1/regression.py
@@ -0,0 +1,1165 @@
+from typing import Optional, Union, List, Tuple, Iterable
+from typing_extensions import Self
+
+import numpy as np
+from sklearn.linear_model import LinearRegression
+from sklearn.base import RegressorMixin, clone
+from sklearn.model_selection import BaseCrossValidator
+from sklearn.pipeline import Pipeline
+
+from mapie.subsample import Subsample
+from numpy.typing import ArrayLike, NDArray
+from mapie.conformity_scores import BaseRegressionScore
+from mapie.regression import MapieRegressor, MapieQuantileRegressor
+from mapie.utils import check_estimator_fit_predict
+from mapie_v1.conformity_scores._utils import check_and_select_conformity_score
+from mapie_v1.utils import (
+ transform_confidence_level_to_alpha_list,
+ check_if_param_in_allowed_values,
+ check_cv_not_string,
+ cast_point_predictions_to_ndarray,
+ cast_predictions_to_ndarray_tuple,
+ prepare_params,
+ prepare_fit_params_and_sample_weight,
+ raise_error_if_previous_method_not_called,
+ raise_error_if_method_already_called,
+ raise_error_if_fit_called_in_prefit_mode, transform_confidence_level_to_alpha,
+)
+
+
+class SplitConformalRegressor:
+ """
+ Computes prediction intervals using the split conformal regression technique:
+
+ 1. The ``fit`` method (optional) fits the base regressor to the training data.
+ 2. The ``conformalize`` method estimates the uncertainty of the base regressor by
+ computing conformity scores on the conformity set.
+ 3. The ``predict_interval`` method predicts points and intervals.
+
+ Parameters
+ ----------
+ estimator : RegressorMixin, default=LinearRegression()
+ The base regressor used to predict points.
+
+ confidence_level : Union[float, List[float]], default=0.9
+ The confidence level(s) for the prediction intervals, indicating the
+ desired coverage probability of the prediction intervals. If a float is
+ provided, it represents a single confidence level. If a list, multiple
+ prediction intervals for each specified confidence level are returned.
+
+ conformity_score : Union[str, BaseRegressionScore], default="absolute"
+ The method used to compute conformity scores
+
+ Valid options:
+
+ - "absolute"
+ - "gamma"
+ - "residual_normalized"
+ - Any subclass of BaseRegressionScore
+
+ A custom score function inheriting from BaseRegressionScore may also
+ be provided.
+
+ See :ref:`theoretical_description_conformity_scores`.
+ prefit : bool, default=False
+ If True, the base regressor must be fitted, and the ``fit``
+ method must be skipped.
+
+ If False, the base regressor will be fitted during the ``fit`` method.
+
+ n_jobs : Optional[int], default=None
+ The number of jobs to run in parallel when applicable.
+
+ verbose : int, default=0
+ Controls the verbosity level.
+ Higher values increase the output details.
+
+ Examples
+ --------
+ >>> from mapie_v1.regression import SplitConformalRegressor
+ >>> from mapie_v1.utils import train_conformalize_test_split
+ >>> from sklearn.datasets import make_regression
+ >>> from sklearn.linear_model import Ridge
+
+ >>> X, y = make_regression(n_samples=500, n_features=2, noise=1.0)
+ >>> (
+ ... X_train, X_conformalize, X_test,
+ ... y_train, y_conformalize, y_test
+ ... ) = train_conformalize_test_split(
+ ... X, y, train_size=0.6, conformalize_size=0.2, test_size=0.2, random_state=1
+ ... )
+
+ >>> mapie_regressor = SplitConformalRegressor(
+ ... estimator=Ridge(),
+ ... confidence_level=0.95,
+ ... prefit=False,
+ ... ).fit(X_train, y_train).conformalize(X_conformalize, y_conformalize)
+
+ >>> predicted_points, predicted_intervals = mapie_regressor.predict_interval(X_test)
+ """
+
+ def __init__(
+ self,
+ estimator: RegressorMixin = LinearRegression(),
+ confidence_level: Union[float, Iterable[float]] = 0.9,
+ conformity_score: Union[str, BaseRegressionScore] = "absolute",
+ prefit: bool = True,
+ n_jobs: Optional[int] = None,
+ verbose: int = 0,
+ ) -> None:
+ check_estimator_fit_predict(estimator)
+ self._estimator = estimator
+ self._prefit = prefit
+ self._is_fitted = prefit
+ self._is_conformalized = False
+ self._conformity_score = check_and_select_conformity_score(
+ conformity_score,
+ BaseRegressionScore,
+ )
+
+ # Note to developers: to implement this v1 class without touching the
+ # v0 backend, we're for now using a hack. We always set cv="prefit",
+ # and we fit the estimator if needed. See the .fit method below.
+ self._mapie_regressor = MapieRegressor(
+ estimator=self._estimator,
+ method="base",
+ cv="prefit",
+ n_jobs=n_jobs,
+ verbose=verbose,
+ conformity_score=self._conformity_score,
+ )
+
+ self._alphas = transform_confidence_level_to_alpha_list(
+ confidence_level
+ )
+ self._predict_params: dict = {}
+
+ def fit(
+ self,
+ X_train: ArrayLike,
+ y_train: ArrayLike,
+ fit_params: Optional[dict] = None,
+ ) -> Self:
+ """
+ Fits the base regressor to the training data.
+
+ Parameters
+ ----------
+ X_train : ArrayLike
+ Training data features.
+
+ y_train : ArrayLike
+ Training data targets.
+
+ fit_params : Optional[dict], default=None
+ Parameters to pass to the ``fit`` method of the base regressor.
+
+ Returns
+ -------
+ Self
+ The fitted SplitConformalRegressor instance.
+ """
+ raise_error_if_fit_called_in_prefit_mode(self._prefit)
+ raise_error_if_method_already_called("fit", self._is_fitted)
+
+ cloned_estimator = clone(self._estimator)
+ fit_params_ = prepare_params(fit_params)
+ cloned_estimator.fit(X_train, y_train, **fit_params_)
+ self._mapie_regressor.estimator = cloned_estimator
+
+ self._is_fitted = True
+ return self
+
+ def conformalize(
+ self,
+ X_conformalize: ArrayLike,
+ y_conformalize: ArrayLike,
+ predict_params: Optional[dict] = None,
+ ) -> Self:
+ """
+ Estimates the uncertainty of the base regressor by computing
+ conformity scores on the conformity set.
+
+ Parameters
+ ----------
+ X_conformalize : ArrayLike
+ Features of the conformity set.
+
+ y_conformalize : ArrayLike
+ Targets of the conformity set.
+
+ predict_params : Optional[dict], default=None
+ Parameters to pass to the ``predict`` method of the base regressor.
+ These parameters will also be used in the ``predict_interval``
+ and ``predict`` methods of this SplitConformalRegressor.
+
+ Returns
+ -------
+ Self
+ The conformalized SplitConformalRegressor instance.
+ """
+ raise_error_if_previous_method_not_called(
+ "conformalize",
+ "fit",
+ self._is_fitted,
+ )
+ raise_error_if_method_already_called(
+ "conformalize",
+ self._is_conformalized,
+ )
+
+ self._predict_params = prepare_params(predict_params)
+ self._mapie_regressor.fit(
+ X_conformalize,
+ y_conformalize,
+ predict_params=self._predict_params
+ )
+
+ self._is_conformalized = True
+ return self
+
+ def predict_interval(
+ self,
+ X: ArrayLike,
+ minimize_interval_width: bool = False,
+ allow_infinite_bounds: bool = False,
+ ) -> Tuple[NDArray, NDArray]:
+ """
+ Predicts points and intervals.
+
+ If several confidence levels were provided during initialisation, several
+ intervals will be predicted for each sample. See the return signature.
+
+ Parameters
+ ----------
+ X : ArrayLike
+ Features
+
+ minimize_interval_width : bool, default=False
+ If True, attempts to minimize the intervals width.
+
+ allow_infinite_bounds : bool, default=False
+ If True, allows prediction intervals with infinite bounds.
+
+ Returns
+ -------
+ Tuple[NDArray, NDArray]
+ Two arrays:
+
+ - Prediction points, of shape ``(n_samples,)``
+ - Prediction intervals, of shape ``(n_samples, 2, n_confidence_levels)``
+ """
+ raise_error_if_previous_method_not_called(
+ "predict_interval",
+ "conformalize",
+ self._is_conformalized,
+ )
+ predictions = self._mapie_regressor.predict(
+ X,
+ alpha=self._alphas,
+ optimize_beta=minimize_interval_width,
+ allow_infinite_bounds=allow_infinite_bounds,
+ **self._predict_params,
+ )
+ return cast_predictions_to_ndarray_tuple(predictions)
+
+ def predict(
+ self,
+ X: ArrayLike,
+ ) -> NDArray:
+ """
+ Predicts points.
+
+ Parameters
+ ----------
+ X : ArrayLike
+ Features
+
+ Returns
+ -------
+ NDArray
+ Array of point predictions, with shape (n_samples,).
+ """
+ raise_error_if_previous_method_not_called(
+ "predict",
+ "conformalize",
+ self._is_conformalized,
+ )
+ predictions = self._mapie_regressor.predict(
+ X,
+ alpha=None,
+ **self._predict_params
+ )
+ return cast_point_predictions_to_ndarray(predictions)
+
+
+class CrossConformalRegressor:
+ """
+ Computes prediction intervals using the cross conformal regression technique:
+
+ 1. The ``fit_conformalize`` method estimates the uncertainty of the base regressor
+ in a cross-validation style. It fits the base regressor on folds of the dataset
+ and computes conformity scores on the out-of-fold data.
+ 2. The ``predict_interval`` computes prediction points and intervals.
+
+ Parameters
+ ----------
+ estimator : RegressorMixin, default=LinearRegression()
+ The base regressor used to predict points.
+
+ confidence_level : Union[float, List[float]], default=0.9
+ The confidence level(s) for the prediction intervals, indicating the
+ desired coverage probability of the prediction intervals. If a float is
+ provided, it represents a single confidence level. If a list, multiple
+ prediction intervals for each specified confidence level are returned.
+
+ conformity_score : Union[str, BaseRegressionScore], default="absolute"
+ The method used to compute conformity scores
+ Valid options:
+
+ - "absolute"
+ - "gamma"
+ - The corresponding subclasses of BaseRegressionScore
+
+ A custom score function inheriting from BaseRegressionScore may also
+ be provided.
+
+ See :ref:``theoretical_description_conformity_scores``.
+
+ method : str, default="plus"
+ The method used to compute prediction intervals. Options are:
+
+ - "base": Based on the conformity scores from each fold.
+ - "plus": Based on the conformity scores from each fold and
+ the test set predictions.
+ - "minmax": Based on the conformity scores from each fold and
+ the test set predictions, using the minimum and maximum among
+ each fold models.
+
+ cv : Union[int, BaseCrossValidator], default=5
+ The cross-validator used to compute conformity scores.
+ Valid options:
+
+ - integer, to specify the number of folds
+ - any ``sklearn.model_selection.BaseCrossValidator`` suitable for
+ regression, or a custom cross-validator inheriting from it.
+
+ Main variants in the cross conformal setting are:
+
+ - ``sklearn.model_selection.KFold`` (vanilla cross conformal)
+ - ``sklearn.model_selection.LeaveOneOut`` (jackknife)
+
+ n_jobs : Optional[int], default=None
+ The number of jobs to run in parallel when applicable.
+
+ verbose : int, default=0
+ Controls the verbosity level. Higher values increase the
+ output details.
+
+ random_state : Optional[Union[int, np.random.RandomState]], default=None
+ A seed or random state instance to ensure reproducibility in any random
+ operations within the regressor.
+
+ Examples
+ --------
+ >>> from mapie_v1.regression import CrossConformalRegressor
+ >>> from sklearn.datasets import make_regression
+ >>> from sklearn.model_selection import train_test_split
+ >>> from sklearn.linear_model import Ridge
+
+ >>> X_full, y_full = make_regression(n_samples=500,n_features=2,noise=1.0)
+ >>> X, X_test, y, y_test = train_test_split(X_full, y_full)
+
+ >>> mapie_regressor = CrossConformalRegressor(
+ ... estimator=Ridge(),
+ ... confidence_level=0.95,
+ ... cv=10
+ ... ).fit_conformalize(X, y)
+
+ >>> predicted_points, predicted_intervals = mapie_regressor.predict_interval(X_test)
+ """
+
+ _VALID_METHODS = ["base", "plus", "minmax"]
+
+ def __init__(
+ self,
+ estimator: RegressorMixin = LinearRegression(),
+ confidence_level: Union[float, Iterable[float]] = 0.9,
+ conformity_score: Union[str, BaseRegressionScore] = "absolute",
+ method: str = "plus",
+ cv: Union[int, BaseCrossValidator] = 5,
+ n_jobs: Optional[int] = None,
+ verbose: int = 0,
+ random_state: Optional[Union[int, np.random.RandomState]] = None
+ ) -> None:
+ check_if_param_in_allowed_values(
+ method,
+ "method",
+ CrossConformalRegressor._VALID_METHODS
+ )
+ check_cv_not_string(cv)
+
+ self._mapie_regressor = MapieRegressor(
+ estimator=estimator,
+ method=method,
+ cv=cv,
+ n_jobs=n_jobs,
+ verbose=verbose,
+ conformity_score=check_and_select_conformity_score(
+ conformity_score,
+ BaseRegressionScore,
+ ),
+ random_state=random_state,
+ )
+
+ self._alphas = transform_confidence_level_to_alpha_list(
+ confidence_level
+ )
+ self.is_fitted_and_conformalized = False
+
+ self._predict_params: dict = {}
+
+ def fit_conformalize(
+ self,
+ X: ArrayLike,
+ y: ArrayLike,
+ groups: Optional[ArrayLike] = None,
+ fit_params: Optional[dict] = None,
+ predict_params: Optional[dict] = None,
+ ) -> Self:
+ """
+ Estimates the uncertainty of the base regressor in a cross-validation style:
+ fits the base regressor on different folds of the dataset
+ and computes conformity scores on the corresponding out-of-fold data.
+
+ Parameters
+ ----------
+ X : ArrayLike
+ Features
+
+ y : ArrayLike
+ Targets
+
+ groups: Optional[ArrayLike] of shape (n_samples,), default=None
+ Groups to pass to the cross-validator.
+
+ fit_params : Optional[dict], default=None
+ Parameters to pass to the ``fit`` method of the base regressor.
+
+ predict_params : Optional[dict], default=None
+ Parameters to pass to the ``predict`` method of the base regressor.
+ These parameters will also be used in the ``predict_interval``
+ and ``predict`` methods of this CrossConformalRegressor.
+
+ Returns
+ -------
+ Self
+ The fitted CrossConformalRegressor instance.
+ """
+ raise_error_if_method_already_called(
+ "fit_conformalize",
+ self.is_fitted_and_conformalized,
+ )
+
+ fit_params_, sample_weight = prepare_fit_params_and_sample_weight(
+ fit_params
+ )
+ self._predict_params = prepare_params(predict_params)
+ self._mapie_regressor.fit(
+ X,
+ y,
+ sample_weight,
+ groups,
+ fit_params=fit_params_,
+ predict_params=self._predict_params
+ )
+
+ self.is_fitted_and_conformalized = True
+ return self
+
+ def predict_interval(
+ self,
+ X: ArrayLike,
+ aggregate_predictions: Optional[str] = "mean",
+ minimize_interval_width: bool = False,
+ allow_infinite_bounds: bool = False,
+ ) -> Tuple[NDArray, NDArray]:
+ """
+ Predicts points and intervals.
+
+ If several confidence levels were provided during initialisation, several
+ intervals will be predicted for each sample. See the return signature.
+
+ By default, points are predicted using an aggregation.
+ See the ``ensemble`` parameter.
+
+ Parameters
+ ----------
+ X : ArrayLike
+ Features
+
+ aggregate_predictions : Optional[str], default="mean"
+ The method to predict a point. Options:
+
+ - None: a point is predicted using the regressor trained on the entire data
+ - "mean": Averages the predictions of the regressors trained on each
+ cross-validation fold
+ - "median": Aggregates (using median) the predictions of the regressors
+ trained on each cross-validation fold
+
+ minimize_interval_width : bool, default=False
+ If True, attempts to minimize the interval width.
+
+ allow_infinite_bounds : bool, default=False
+ If True, allows prediction intervals with infinite bounds.
+
+ Returns
+ -------
+ Tuple[NDArray, NDArray]
+ Two arrays:
+
+ - Prediction points, of shape ``(n_samples,)``
+ - Prediction intervals, of shape ``(n_samples, 2, n_confidence_levels)``
+ """
+ raise_error_if_previous_method_not_called(
+ "predict_interval",
+ "fit_conformalize",
+ self.is_fitted_and_conformalized,
+ )
+
+ ensemble = self._set_aggregate_predictions_and_return_ensemble(
+ aggregate_predictions
+ )
+ predictions = self._mapie_regressor.predict(
+ X,
+ alpha=self._alphas,
+ optimize_beta=minimize_interval_width,
+ allow_infinite_bounds=allow_infinite_bounds,
+ ensemble=ensemble,
+ **self._predict_params,
+ )
+ return cast_predictions_to_ndarray_tuple(predictions)
+
+ def predict(
+ self,
+ X: ArrayLike,
+ aggregate_predictions: Optional[str] = "mean",
+ ) -> NDArray:
+ """
+ Predicts points.
+
+ By default, points are predicted using an aggregation.
+ See the ``ensemble`` parameter.
+
+ Parameters
+ ----------
+ X : ArrayLike
+ Features
+
+ aggregate_predictions : Optional[str], default="mean"
+ The method to predict a point. Options:
+
+ - None: a point is predicted using the regressor trained on the entire data
+ - "mean": Averages the predictions of the regressors trained on each
+ cross-validation fold
+ - "median": Aggregates (using median) the predictions of the regressors
+ trained on each cross-validation fold
+
+ Returns
+ -------
+ NDArray
+ Array of point predictions, with shape ``(n_samples,)``.
+ """
+ raise_error_if_previous_method_not_called(
+ "predict",
+ "fit_conformalize",
+ self.is_fitted_and_conformalized,
+ )
+
+ ensemble = self._set_aggregate_predictions_and_return_ensemble(
+ aggregate_predictions
+ )
+ predictions = self._mapie_regressor.predict(
+ X, alpha=None, ensemble=ensemble, **self._predict_params,
+ )
+ return cast_point_predictions_to_ndarray(predictions)
+
+ def _set_aggregate_predictions_and_return_ensemble(
+ self, aggregate_predictions: Optional[str]
+ ) -> bool:
+ if not aggregate_predictions:
+ ensemble = False
+ else:
+ ensemble = True
+ self._mapie_regressor._check_agg_function(aggregate_predictions)
+ # A hack here, to allow choosing the aggregation function at prediction time
+ self._mapie_regressor.agg_function = aggregate_predictions
+ return ensemble
+
+
+class JackknifeAfterBootstrapRegressor:
+ """
+ Computes prediction intervals using the jackknife-after-bootstrap technique:
+
+ 1. The ``fit_conformalize`` method estimates the uncertainty of the base regressor
+ using bootstrap sampling. It fits the base regressor on samples of the dataset
+ and computes conformity scores on the out-of-sample data.
+ 2. The ``predict_interval`` computes prediction points and intervals.
+
+ Parameters
+ ----------
+ estimator : RegressorMixin, default=LinearRegression()
+ The base regressor used to predict points.
+
+ confidence_level : Union[float, List[float]], default=0.9
+ The confidence level(s) for the prediction intervals, indicating the
+ desired coverage probability of the prediction intervals. If a float is
+ provided, it represents a single confidence level. If a list, multiple
+ prediction intervals for each specified confidence level are returned.
+
+ conformity_score : Union[str, BaseRegressionScore], default="absolute"
+ The method used to compute conformity scores
+
+ Valid options:
+
+ - "absolute"
+ - "gamma"
+ - The corresponding subclasses of BaseRegressionScore
+
+ A custom score function inheriting from BaseRegressionScore may also
+ be provided.
+
+ See :ref:``theoretical_description_conformity_scores``.
+
+ method : str, default="plus"
+
+ The method used to compute prediction intervals. Options are:
+
+ - "plus": Based on the conformity scores from each bootstrap sample and
+ the testing prediction.
+ - "minmax": Based on the minimum and maximum conformity scores from
+ each bootstrap sample.
+
+ Note: The "base" method is not mentioned in the conformal inference
+ literature for Jackknife after bootstrap strategies, hence not provided
+ here.
+
+ resampling : Union[int, Subsample], default=30
+ Number of bootstrap resamples or an instance of ``Subsample`` for
+ custom sampling strategy.
+
+ aggregation_method : str, default="mean"
+ Aggregation method for predictions across bootstrap samples. Options:
+
+ - "mean"
+ - "median"
+
+ n_jobs : Optional[int], default=None
+ The number of jobs to run in parallel when applicable.
+
+ verbose : int, default=0
+ Controls the verbosity level. Higher values increase the output
+ details.
+
+ random_state : Optional[Union[int, np.random.RandomState]], default=None
+ A seed or random state instance to ensure reproducibility in any random
+ operations within the regressor.
+
+ Examples
+ --------
+ >>> from mapie_v1.regression import JackknifeAfterBootstrapRegressor
+ >>> from sklearn.datasets import make_regression
+ >>> from sklearn.model_selection import train_test_split
+ >>> from sklearn.linear_model import Ridge
+
+ >>> X_full, y_full = make_regression(n_samples=500,n_features=2,noise=1.0)
+ >>> X, X_test, y, y_test = train_test_split(X_full, y_full)
+
+ >>> mapie_regressor = JackknifeAfterBootstrapRegressor(
+ ... estimator=Ridge(),
+ ... confidence_level=0.95,
+ ... resampling=25,
+ ... ).fit_conformalize(X, y)
+
+ >>> predicted_points, predicted_intervals = mapie_regressor.predict_interval(X_test)
+ """
+
+ _VALID_METHODS = ["plus", "minmax"]
+ _VALID_AGGREGATION_METHODS = ["mean", "median"]
+
+ def __init__(
+ self,
+ estimator: RegressorMixin = LinearRegression(),
+ confidence_level: Union[float, Iterable[float]] = 0.9,
+ conformity_score: Union[str, BaseRegressionScore] = "absolute",
+ method: str = "plus",
+ resampling: Union[int, Subsample] = 30,
+ aggregation_method: str = "mean",
+ n_jobs: Optional[int] = None,
+ verbose: int = 0,
+ random_state: Optional[Union[int, np.random.RandomState]] = None,
+ ) -> None:
+ check_if_param_in_allowed_values(
+ method,
+ "method",
+ JackknifeAfterBootstrapRegressor._VALID_METHODS
+ )
+ check_if_param_in_allowed_values(
+ aggregation_method,
+ "aggregation_method",
+ JackknifeAfterBootstrapRegressor._VALID_AGGREGATION_METHODS
+ )
+
+ if isinstance(resampling, int):
+ cv = Subsample(n_resamplings=resampling)
+ elif isinstance(resampling, Subsample):
+ cv = resampling
+ else:
+ raise ValueError(
+ "resampling must be an integer or a Subsample instance"
+ )
+
+ self._mapie_regressor = MapieRegressor(
+ estimator=estimator,
+ method=method,
+ cv=cv,
+ n_jobs=n_jobs,
+ verbose=verbose,
+ agg_function=aggregation_method,
+ conformity_score=check_and_select_conformity_score(
+ conformity_score,
+ BaseRegressionScore,
+ ),
+ random_state=random_state,
+ )
+
+ self._alphas = transform_confidence_level_to_alpha_list(
+ confidence_level
+ )
+
+ self.is_fitted_and_conformalized = False
+ self._predict_params: dict = {}
+
+ def fit_conformalize(
+ self,
+ X: ArrayLike,
+ y: ArrayLike,
+ fit_params: Optional[dict] = None,
+ predict_params: Optional[dict] = None,
+ ) -> Self:
+ """
+ Estimates the uncertainty of the base regressor using bootstrap sampling:
+ fits the base regressor on (potentially overlapping) samples of the dataset,
+ and computes conformity scores on the corresponding out of samples data.
+
+ Parameters
+ ----------
+ X : ArrayLike
+ Features. Must be the same X used in .fit
+
+ y : ArrayLike
+ Targets. Must be the same y used in .fit
+
+ fit_params : Optional[dict], default=None
+ Parameters to pass to the ``fit`` method of the base regressor.
+
+ predict_params : Optional[dict], default=None
+ Parameters to pass to the ``predict`` method of the base regressor.
+ These parameters will also be used in the ``predict_interval``
+ and ``predict`` methods of this JackknifeAfterBootstrapRegressor.
+
+ Returns
+ -------
+ Self
+ The JackknifeAfterBootstrapRegressor instance.
+ """
+ raise_error_if_method_already_called(
+ "fit_conformalize",
+ self.is_fitted_and_conformalized,
+ )
+
+ fit_params_, sample_weight = prepare_fit_params_and_sample_weight(
+ fit_params
+ )
+ self._predict_params = prepare_params(predict_params)
+ self._mapie_regressor.fit(
+ X,
+ y,
+ sample_weight,
+ fit_params=fit_params_,
+ predict_params=self._predict_params,
+ )
+
+ self.is_fitted_and_conformalized = True
+ return self
+
+ def predict_interval(
+ self,
+ X: ArrayLike,
+ ensemble: bool = True,
+ minimize_interval_width: bool = False,
+ allow_infinite_bounds: bool = False,
+ ) -> Tuple[NDArray, NDArray]:
+ """
+ Predicts points and intervals.
+
+ If several confidence levels were provided during initialisation, several
+ intervals will be predicted for each sample. See the return signature.
+
+ By default, points are predicted using an aggregation.
+ See the ``ensemble`` parameter.
+
+ Parameters
+ ----------
+ X : ArrayLike
+ Test data for prediction intervals.
+
+ ensemble : bool, default=True
+ If True, a predicted point is an aggregation of the predictions of the
+ regressors trained on each bootstrap samples. This aggregation depends on
+ the ``aggregation_method`` provided during initialisation.
+
+ If False, a point is predicted using the regressor trained on the entire
+ data
+
+ minimize_interval_width : bool, default=False
+ If True, attempts to minimize the interval width.
+
+ allow_infinite_bounds : bool, default=False
+ If True, allows prediction intervals with infinite bounds.
+
+ Returns
+ -------
+ Tuple[NDArray, NDArray]
+ Two arrays:
+
+ - Prediction points, of shape ``(n_samples,)``
+ - Prediction intervals, of shape ``(n_samples, 2, n_confidence_levels)``
+ """
+ raise_error_if_previous_method_not_called(
+ "predict_interval",
+ "fit_conformalize",
+ self.is_fitted_and_conformalized,
+ )
+
+ predictions = self._mapie_regressor.predict(
+ X,
+ alpha=self._alphas,
+ optimize_beta=minimize_interval_width,
+ allow_infinite_bounds=allow_infinite_bounds,
+ ensemble=ensemble,
+ **self._predict_params,
+ )
+ return cast_predictions_to_ndarray_tuple(predictions)
+
+ def predict(
+ self,
+ X: ArrayLike,
+ ensemble: bool = True,
+ ) -> NDArray:
+ """
+ Predicts points.
+
+ By default, points are predicted using an aggregation.
+ See the ``ensemble`` parameter.
+
+ Parameters
+ ----------
+ X : ArrayLike
+ Data features for generating point predictions.
+
+ ensemble : bool, default=True
+ If True, a predicted point is an aggregation of the predictions of the
+ regressors trained on each bootstrap samples. This aggregation depends on
+ the ``aggregation_method`` provided during initialisation.
+ If False, a point is predicted using the regressor trained on the entire
+ data
+
+ Returns
+ -------
+ NDArray
+ Array of point predictions, with shape ``(n_samples,)``.
+ """
+ raise_error_if_previous_method_not_called(
+ "predict",
+ "fit_conformalize",
+ self.is_fitted_and_conformalized,
+ )
+
+ predictions = self._mapie_regressor.predict(
+ X, alpha=None, ensemble=ensemble, **self._predict_params,
+ )
+ return cast_point_predictions_to_ndarray(predictions)
+
+
+class ConformalizedQuantileRegressor:
+ """
+ Computes prediction intervals using the conformalized quantile regression technique:
+
+ 1. The ``fit`` method fits three models to the training data using the provided
+ regressor: a model to predict the target, and models to predict upper
+ and lower quantiles around the target.
+ 2. The ``conformalize`` method estimates the uncertainty of the quantile models
+ using the conformity set.
+ 3. The ``predict_interval`` computes prediction points and intervals.
+
+ Parameters
+ ----------
+ estimator : Union[``RegressorMixin``, ``Pipeline``, \
+``List[Union[RegressorMixin, Pipeline]]``]
+ The regressor used to predict points and quantiles.
+
+ When ``prefit=False`` (default), a single regressor that supports the quantile
+ loss must be passed. Valid options:
+
+ - ``sklearn.linear_model.QuantileRegressor``
+ - ``sklearn.ensemble.GradientBoostingRegressor``
+ - ``sklearn.ensemble.HistGradientBoostingRegressor``
+ - ``lightgbm.LGBMRegressor``
+
+ When ``prefit=True``, a list of three fitted quantile regressors predicting the
+ lower, upper, and median quantiles must be passed (in that order).
+ These quantiles must be:
+
+ - ``lower quantile = (1 - confidence_level) / 2``
+ - ``upper quantile = (1 + confidence_level) / 2``
+ - ``median quantile = 0.5``
+
+ confidence_level : float default=0.9
+ The confidence level for the prediction intervals, indicating the
+ desired coverage probability of the prediction intervals.
+
+ prefit : bool, default=False
+ If True, three fitted quantile regressors must be provided, and the ``fit``
+ method must be skipped.
+
+ If False, the three regressors will be fitted during the ``fit`` method.
+
+ Examples
+ --------
+ >>> from mapie_v1.regression import ConformalizedQuantileRegressor
+ >>> from mapie_v1.utils import train_conformalize_test_split
+ >>> from sklearn.datasets import make_regression
+ >>> from sklearn.model_selection import train_test_split
+ >>> from sklearn.linear_model import QuantileRegressor
+
+ >>> X, y = make_regression(n_samples=500, n_features=2, noise=1.0)
+ >>> (
+ ... X_train, X_conformalize, X_test,
+ ... y_train, y_conformalize, y_test
+ ... ) = train_conformalize_test_split(
+ ... X, y, train_size=0.6, conformalize_size=0.2, test_size=0.2, random_state=1
+ ... )
+
+ >>> mapie_regressor = ConformalizedQuantileRegressor(
+ ... estimator=QuantileRegressor(),
+ ... confidence_level=0.95,
+ ... ).fit(X_train, y_train).conformalize(X_conformalize, y_conformalize)
+
+ >>> predicted_points, predicted_intervals = mapie_regressor.predict_interval(X_test)
+ """
+
+ def __init__(
+ self,
+ estimator: Optional[
+ Union[
+ RegressorMixin,
+ Pipeline,
+ List[Union[RegressorMixin, Pipeline]]
+ ]
+ ] = None,
+ confidence_level: float = 0.9,
+ prefit: bool = False,
+ ) -> None:
+ self._alpha = transform_confidence_level_to_alpha(confidence_level)
+ self._prefit = prefit
+ self._is_fitted = prefit
+ self._is_conformalized = False
+
+ self._mapie_quantile_regressor = MapieQuantileRegressor(
+ estimator=estimator,
+ method="quantile",
+ cv="prefit" if prefit else "split",
+ alpha=self._alpha,
+ )
+
+ self._sample_weight: Optional[ArrayLike] = None
+ self._predict_params: dict = {}
+
+ def fit(
+ self,
+ X_train: ArrayLike,
+ y_train: ArrayLike,
+ fit_params: Optional[dict] = None,
+ ) -> Self:
+ """
+ Fits three models using the regressor provided at initialisation:
+
+ - a model to predict the target
+ - a model to predict the upper quantile of the target
+ - a model to predict the lower quantile of the target
+
+ Parameters
+ ----------
+ X_train : ArrayLike
+ Training data features.
+
+ y_train : ArrayLike
+ Training data targets.
+
+ fit_params : Optional[dict], default=None
+ Parameters to pass to the ``fit`` method of the regressors.
+
+ Returns
+ -------
+ Self
+ The fitted ConformalizedQuantileRegressor instance.
+ """
+ raise_error_if_fit_called_in_prefit_mode(self._prefit)
+ raise_error_if_method_already_called("fit", self._is_fitted)
+
+ fit_params_, self._sample_weight = prepare_fit_params_and_sample_weight(
+ fit_params
+ )
+ self._mapie_quantile_regressor._initialize_fit_conformalize()
+ self._mapie_quantile_regressor._fit_estimators(
+ X=X_train,
+ y=y_train,
+ sample_weight=self._sample_weight,
+ **fit_params_,
+ )
+
+ self._is_fitted = True
+ return self
+
+ def conformalize(
+ self,
+ X_conformalize: ArrayLike,
+ y_conformalize: ArrayLike,
+ predict_params: Optional[dict] = None,
+ ) -> Self:
+ """
+ Estimates the uncertainty of the quantile regressors by computing
+ conformity scores on the conformity set.
+
+ Parameters
+ ----------
+ X_conformalize : ArrayLike
+ Features of the conformity set.
+
+ y_conformalize : ArrayLike
+ Targets of the conformity set.
+
+ predict_params : Optional[dict], default=None
+ Parameters to pass to the ``predict`` method of the regressors.
+ These parameters will also be used in the ``predict_interval``
+ and ``predict`` methods of this SplitConformalRegressor.
+
+ Returns
+ -------
+ Self
+ The ConformalizedQuantileRegressor instance.
+ """
+ raise_error_if_previous_method_not_called(
+ "conformalize",
+ "fit",
+ self._is_fitted,
+ )
+ raise_error_if_method_already_called(
+ "conformalize",
+ self._is_conformalized,
+ )
+
+ self._predict_params = prepare_params(predict_params)
+ self._mapie_quantile_regressor.conformalize(
+ X_conformalize,
+ y_conformalize,
+ **self._predict_params
+ )
+
+ self._is_conformalized = True
+ return self
+
+ def predict_interval(
+ self,
+ X: ArrayLike,
+ minimize_interval_width: bool = False,
+ allow_infinite_bounds: bool = False,
+ symmetric_correction: bool = False,
+ ) -> Tuple[NDArray, NDArray]:
+ """
+ Predicts points and intervals.
+
+ The returned NDArray containing the prediction intervals is of shape
+ (n_samples, 2, 1). The third dimension is unnecessary, but kept for consistency
+ with the other conformal regression methods available in MAPIE.
+
+ Parameters
+ ----------
+ X : ArrayLike
+ Features
+
+ minimize_interval_width : bool, default=False
+ If True, attempts to minimize the intervals width.
+
+ allow_infinite_bounds : bool, default=False
+ If True, allows prediction intervals with infinite bounds.
+
+ symmetric_correction : bool, default=False
+ To produce prediction intervals, the conformalized quantile regression
+ technique corrects the predictions of the upper and lower quantile
+ regressors by adding a constant.
+
+ If ``symmetric_correction`` is set to ``False`` , this constant is different
+ for the upper and the lower quantile predictions. If set to ``True``,
+ this constant is the same for both.
+
+ Returns
+ -------
+ Tuple[NDArray, NDArray]
+ Two arrays:
+
+ - Prediction points, of shape ``(n_samples,)``
+ - Prediction intervals, of shape ``(n_samples, 2, 1)``
+ """
+ raise_error_if_previous_method_not_called(
+ "predict_interval",
+ "conformalize",
+ self._is_conformalized,
+ )
+
+ predictions = self._mapie_quantile_regressor.predict(
+ X,
+ optimize_beta=minimize_interval_width,
+ allow_infinite_bounds=allow_infinite_bounds,
+ symmetry=symmetric_correction,
+ **self._predict_params
+ )
+ return cast_predictions_to_ndarray_tuple(predictions)
+
+ def predict(
+ self,
+ X: ArrayLike,
+ ) -> NDArray:
+ """
+ Predicts points.
+
+ Parameters
+ ----------
+ X : ArrayLike
+ Features
+
+ Returns
+ -------
+ NDArray
+ Array of point predictions with shape ``(n_samples,)``.
+ """
+ raise_error_if_previous_method_not_called(
+ "predict",
+ "conformalize",
+ self._is_conformalized,
+ )
+
+ estimator = self._mapie_quantile_regressor
+ predictions, _ = estimator.predict(X, **self._predict_params)
+ return predictions
diff --git a/mapie_v1/utils.py b/mapie_v1/utils.py
new file mode 100644
index 000000000..fecec8d9b
--- /dev/null
+++ b/mapie_v1/utils.py
@@ -0,0 +1,271 @@
+import copy
+from typing import Union, Tuple, cast, Optional, Iterable
+from collections.abc import Iterable as IterableType
+
+from numpy.typing import ArrayLike, NDArray
+from sklearn.model_selection import BaseCrossValidator
+from sklearn.model_selection import train_test_split
+from decimal import Decimal
+from math import isclose
+
+
+def train_conformalize_test_split(
+ X: NDArray,
+ y: NDArray,
+ train_size: Union[float, int],
+ conformalize_size: Union[float, int],
+ test_size: Union[float, int],
+ random_state: Optional[int] = None,
+ shuffle: bool = True,
+) -> Tuple[NDArray, NDArray, NDArray, NDArray, NDArray, NDArray]:
+ """Split arrays or matrices into train, conformalize and test subsets.
+
+ Utility similar to sklearn.model_selection.train_test_split
+ for splitting data into 3 sets.
+
+ We advise to give the major part of the data points to the train set
+ and at least 200 data points to the conformalize set.
+
+ Parameters
+ ----------
+ X : indexable with same type and length / shape[0] than "y"
+ Allowed inputs are lists, numpy arrays, scipy-sparse
+ matrices or pandas dataframes.
+
+ y : indexable with same type and length / shape[0] than "X"
+ Allowed inputs are lists, numpy arrays, scipy-sparse
+ matrices or pandas dataframes.
+
+ train_size : float or int
+ If float, should be between 0.0 and 1.0 and represent the
+ proportion of the dataset to include in the train split. If
+ int, represents the absolute number of train samples.
+
+ conformalize_size : float or int
+ If float, should be between 0.0 and 1.0 and represent the proportion
+ of the dataset to include in the conformalize split. If int, represents the
+ absolute number of conformalize samples.
+
+ test_size : float or int
+ If float, should be between 0.0 and 1.0 and represent the proportion
+ of the dataset to include in the test split. If int, represents the
+ absolute number of test samples.
+
+ random_state : int, RandomState instance or None, default=None
+ Controls the shuffling applied to the data before applying the split.
+ Pass an int for reproducible output across multiple function calls.
+
+ shuffle : bool, default=True
+ Whether or not to shuffle the data before splitting.
+
+ Returns
+ -------
+ X_train, X_conformalize, X_test, y_train, y_conformalize, y_test :
+ 6 array-like splits of inputs.
+ output types are the same as the input types.
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> from sklearn.datasets import make_regression
+ >>> from mapie_v1.utils import train_conformalize_test_split
+ >>> X, y = np.arange(10).reshape((5, 2)), range(5)
+ >>> X
+ array([[0, 1],
+ [2, 3],
+ [4, 5],
+ [6, 7],
+ [8, 9]])
+ >>> list(y)
+ [0, 1, 2, 3, 4]
+ >>> (
+ ... X_train, X_conformalize, X_test,
+ ... y_train, y_conformalize, y_test
+ ... ) = train_conformalize_test_split(
+ ... X, y, train_size=0.6, conformalize_size=0.2, test_size=0.2, random_state=1
+ ... )
+ >>> X_train
+ array([[8, 9],
+ [0, 1],
+ [6, 7]])
+ >>> X_conformalize
+ array([[2, 3]])
+ >>> X_test
+ array([[4, 5]])
+ >>> y_train
+ [4, 0, 3]
+ >>> y_conformalize
+ [1]
+ >>> y_test
+ [2]
+ """
+
+ _check_train_conf_test_proportions(
+ train_size, conformalize_size, test_size, len(X)
+ )
+
+ X_train, X_conformalize_test, y_train, y_conformalize_test = train_test_split(
+ X, y,
+ train_size=train_size,
+ random_state=random_state,
+ shuffle=shuffle,
+ )
+
+ if isinstance(train_size, float):
+ test_size_after_split = test_size / (1 - train_size)
+ else:
+ test_size_after_split = test_size
+
+ X_conformalize, X_test, y_conformalize, y_test = train_test_split(
+ X_conformalize_test, y_conformalize_test,
+ test_size=test_size_after_split,
+ random_state=random_state,
+ shuffle=shuffle,
+ )
+
+ return X_train, X_conformalize, X_test, y_train, y_conformalize, y_test
+
+
+def _check_train_conf_test_proportions(
+ train_size: Union[float, int],
+ conformalize_size: Union[float, int],
+ test_size: Union[float, int],
+ dataset_size: int,
+) -> None:
+ count_input_proportions = sum([test_size, train_size, conformalize_size])
+
+ if isinstance(train_size, float) and \
+ isinstance(conformalize_size, float) and \
+ isinstance(test_size, float):
+ if not isclose(1, count_input_proportions):
+ raise ValueError(
+ "When using floats, train_size + conformalize_size"
+ " + test_size must be equal to 1."
+ )
+
+ elif isinstance(train_size, int) and \
+ isinstance(conformalize_size, int) and \
+ isinstance(test_size, int):
+ if count_input_proportions != dataset_size:
+ raise ValueError(
+ "When using integers, train_size + "
+ "conformalize_size + test_size must be equal "
+ "to the size of the input data."
+ )
+
+ else:
+ raise TypeError(
+ "train_size, conformalize_size and test_size"
+ "should be either all int or all float."
+ )
+
+
+def transform_confidence_level_to_alpha(
+ confidence_level: float,
+) -> float:
+ # Using decimals to avoid weird-looking float approximations
+ # when computing alpha = 1 - confidence_level
+ # Such approximations arise even with simple confidence levels like 0.9
+ confidence_level_decimal = Decimal(str(confidence_level))
+ alpha_decimal = Decimal("1") - confidence_level_decimal
+ return float(alpha_decimal)
+
+
+def transform_confidence_level_to_alpha_list(
+ confidence_level: Union[float, Iterable[float]]
+) -> Iterable[float]:
+ if isinstance(confidence_level, IterableType):
+ confidence_levels = confidence_level
+ else:
+ confidence_levels = [confidence_level]
+ return [
+ transform_confidence_level_to_alpha(confidence_level)
+ for confidence_level in confidence_levels
+ ]
+
+
+# Could be replaced by using sklearn _validate_params (ideally wrapping it)
+def check_if_param_in_allowed_values(
+ param: str, param_name: str, allowed_values: list
+) -> None:
+ if param not in allowed_values:
+ raise ValueError(
+ f"'{param}' option not valid for parameter '{param_name}'"
+ f"Available options are: {allowed_values}"
+ )
+
+
+def check_cv_not_string(cv: Union[int, str, BaseCrossValidator]) -> None:
+ if isinstance(cv, str):
+ raise ValueError(
+ "'cv' string options not available in MAPIE >= v1.0.0"
+ "Use SplitConformalClassifier or SplitConformalRegressor"
+ 'for "split" and "prefit" modes.'
+ )
+
+
+def cast_point_predictions_to_ndarray(
+ point_predictions: Union[NDArray, Tuple[NDArray, NDArray]]
+) -> NDArray:
+ if isinstance(point_predictions, tuple):
+ raise TypeError(
+ "Developer error: use this function to cast point predictions only, "
+ "not points + intervals."
+ )
+ return cast(NDArray, point_predictions)
+
+
+def cast_predictions_to_ndarray_tuple(
+ predictions: Union[NDArray, Tuple[NDArray, NDArray]]
+) -> Tuple[NDArray, NDArray]:
+ if not isinstance(predictions, tuple):
+ raise TypeError(
+ "Developer error: use this function to cast predictions containing points "
+ "and intervals, not points only."
+ )
+ return cast(Tuple[NDArray, NDArray], predictions)
+
+
+def prepare_params(params: Union[dict, None]) -> dict:
+ return copy.deepcopy(params) if params else {}
+
+
+def prepare_fit_params_and_sample_weight(
+ fit_params: Union[dict, None]
+) -> Tuple[dict, Optional[ArrayLike]]:
+ fit_params_ = prepare_params(fit_params)
+ sample_weight = fit_params_.pop("sample_weight", None)
+ return fit_params_, sample_weight
+
+
+def raise_error_if_previous_method_not_called(
+ current_method_name: str,
+ previous_method_name: str,
+ was_previous_method_called: bool,
+) -> None:
+ if not was_previous_method_called:
+ raise ValueError(
+ f"Incorrect method order: call {previous_method_name} "
+ f"before calling {current_method_name}."
+ )
+
+
+def raise_error_if_method_already_called(
+ method_name: str,
+ was_method_called: bool,
+) -> None:
+ if was_method_called:
+ raise ValueError(
+ f"{method_name} method already called. "
+ f"MAPIE does not currently support calling {method_name} several times."
+ )
+
+
+def raise_error_if_fit_called_in_prefit_mode(
+ is_mode_prefit: bool,
+) -> None:
+ if is_mode_prefit:
+ raise ValueError(
+ "The fit method must be skipped when the prefit parameter is set to True. "
+ "Use the conformalize method directly after instanciation."
+ )
diff --git a/notebooks/calibration/top_label_calibration.ipynb b/notebooks/calibration/top_label_calibration.ipynb
index 40f768106..2329185c0 100644
--- a/notebooks/calibration/top_label_calibration.ipynb
+++ b/notebooks/calibration/top_label_calibration.ipynb
@@ -21,10 +21,10 @@
"output_type": "stream",
"text": [
"Cloning into 'df-posthoc-calibration'...\n",
- "remote: Enumerating objects: 309, done.\u001b[K\n",
- "remote: Counting objects: 100% (197/197), done.\u001b[K\n",
- "remote: Compressing objects: 100% (105/105), done.\u001b[K\n",
- "remote: Total 309 (delta 101), reused 183 (delta 89), pack-reused 112\u001b[K\n",
+ "remote: Enumerating objects: 309, done.\u001B[K\n",
+ "remote: Counting objects: 100% (197/197), done.\u001B[K\n",
+ "remote: Compressing objects: 100% (105/105), done.\u001B[K\n",
+ "remote: Total 309 (delta 101), reused 183 (delta 89), pack-reused 112\u001B[K\n",
"Receiving objects: 100% (309/309), 60.76 MiB | 8.86 MiB/s, done.\n",
"Resolving deltas: 100% (132/132), done.\n",
"Note: switching to '109da93c1487cb38ee51fcac47088cdd29854999'.\n",
@@ -81,7 +81,7 @@
"from sklearn.ensemble import RandomForestClassifier\n",
"\n",
"from mapie.calibration import MapieCalibrator\n",
- "from mapie.metrics import top_label_ece\n",
+ "from mapie.metrics.calibration import top_label_ece\n",
"\n",
"import sys\n",
"\n",
diff --git a/notebooks/classification/Cifar10.ipynb b/notebooks/classification/Cifar10.ipynb
index 0d2b2400d..f7e7ece20 100755
--- a/notebooks/classification/Cifar10.ipynb
+++ b/notebooks/classification/Cifar10.ipynb
@@ -76,7 +76,7 @@
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import label_binarize\n",
"\n",
- "from mapie.metrics import classification_coverage_score\n",
+ "from mapie.metrics.classification import classification_coverage_score\n",
"from mapie.classification import MapieClassifier\n",
"\n",
"warnings.filterwarnings('ignore')\n",
@@ -301,7 +301,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "\u001b[1mDownloading and preparing dataset 162.17 MiB (download: 162.17 MiB, generated: 132.40 MiB, total: 294.58 MiB) to ~/tensorflow_datasets/cifar10/3.0.2...\u001b[0m\n"
+ "\u001B[1mDownloading and preparing dataset 162.17 MiB (download: 162.17 MiB, generated: 132.40 MiB, total: 294.58 MiB) to ~/tensorflow_datasets/cifar10/3.0.2...\u001B[0m\n"
]
},
{
@@ -441,7 +441,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "\u001b[1mDataset cifar10 downloaded and prepared to ~/tensorflow_datasets/cifar10/3.0.2. Subsequent calls will reuse this data.\u001b[0m\n"
+ "\u001B[1mDataset cifar10 downloaded and prepared to ~/tensorflow_datasets/cifar10/3.0.2. Subsequent calls will reuse this data.\u001B[0m\n"
]
},
{
diff --git a/notebooks/regression/exoplanets.ipynb b/notebooks/regression/exoplanets.ipynb
index f1fecaf99..bdff00d26 100755
--- a/notebooks/regression/exoplanets.ipynb
+++ b/notebooks/regression/exoplanets.ipynb
@@ -8,6 +8,14 @@
"# Estimating the uncertainties in the exoplanet masses"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "43970544651bd3bb",
+ "metadata": {},
+ "source": [
+ ""
+ ]
+ },
{
"cell_type": "markdown",
"id": "66082f1a-7415-4392-973f-a267b23ec64b",
@@ -26,35 +34,30 @@
},
{
"cell_type": "code",
- "execution_count": 1,
"id": "03861a87-5d8e-40d7-bb2b-9b40e7c716f9",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Requirement already satisfied: mapie in /Users/vtaquet/Library/CloudStorage/OneDrive-Quantmetry/Perso/missions/simai/MAPIE (0.3.2)\n",
- "Requirement already satisfied: scikit-learn in /Users/vtaquet/miniforge3/envs/mapie-notebooks/lib/python3.10/site-packages (from mapie) (1.0.1)\n",
- "Requirement already satisfied: numpy>=1.21 in /Users/vtaquet/miniforge3/envs/mapie-notebooks/lib/python3.10/site-packages (from mapie) (1.22.3)\n",
- "Requirement already satisfied: scipy>=1.1.0 in /Users/vtaquet/miniforge3/envs/mapie-notebooks/lib/python3.10/site-packages (from scikit-learn->mapie) (1.7.3)\n",
- "Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/vtaquet/miniforge3/envs/mapie-notebooks/lib/python3.10/site-packages (from scikit-learn->mapie) (2.2.0)\n",
- "Requirement already satisfied: joblib>=0.11 in /Users/vtaquet/miniforge3/envs/mapie-notebooks/lib/python3.10/site-packages (from scikit-learn->mapie) (1.1.0)\n"
- ]
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2025-04-03T13:46:41.027075Z",
+ "start_time": "2025-04-03T13:46:40.994718Z"
}
- ],
+ },
"source": [
- "install_mapie = True\n",
+ "install_mapie = False\n",
"if install_mapie:\n",
" !pip install mapie"
- ]
+ ],
+ "outputs": [],
+ "execution_count": 3
},
{
"cell_type": "code",
- "execution_count": 2,
"id": "c067a082-3d1b-4662-b24f-3322e75f6d30",
- "metadata": {},
- "outputs": [],
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2025-04-03T13:46:41.666948Z",
+ "start_time": "2025-04-03T13:46:41.092780Z"
+ }
+ },
"source": [
"from typing_extensions import TypedDict\n",
"from typing import Union\n",
@@ -76,12 +79,14 @@
"import pandas as pd\n",
"import seaborn as sns\n",
"\n",
- "from mapie.metrics import regression_coverage_score\n",
- "from mapie.regression import MapieRegressor\n",
+ "from mapie.metrics.regression import regression_coverage_score\n",
+ "from mapie_v1.regression import CrossConformalRegressor, JackknifeAfterBootstrapRegressor\n",
"from mapie.subsample import Subsample\n",
"\n",
"warnings.filterwarnings(\"ignore\")"
- ]
+ ],
+ "outputs": [],
+ "execution_count": 4
},
{
"cell_type": "markdown",
@@ -101,20 +106,32 @@
},
{
"cell_type": "code",
- "execution_count": 3,
"id": "38e46833-67d7-4162-9fec-cd3db0d9320f",
- "metadata": {},
- "outputs": [],
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2025-04-03T13:46:42.271399Z",
+ "start_time": "2025-04-03T13:46:41.677837Z"
+ }
+ },
"source": [
"url_file = \"https://raw.githubusercontent.com/scikit-learn-contrib/MAPIE/master/notebooks/regression/exoplanets_mass.csv\"\n",
"exo_df = pd.read_csv(url_file, index_col=0)"
- ]
+ ],
+ "outputs": [],
+ "execution_count": 5
},
{
"cell_type": "code",
- "execution_count": 4,
"id": "cb401421-e2ee-48ff-af59-fa4b4b2af5f2",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2025-04-03T13:46:42.324984Z",
+ "start_time": "2025-04-03T13:46:42.305416Z"
+ }
+ },
+ "source": [
+ "exo_df.info()"
+ ],
"outputs": [
{
"name": "stdout",
@@ -152,9 +169,7 @@
]
}
],
- "source": [
- "exo_df.info()"
- ]
+ "execution_count": 6
},
{
"cell_type": "markdown",
@@ -174,10 +189,13 @@
},
{
"cell_type": "code",
- "execution_count": 5,
"id": "d4cadab1-b8e8-4ea6-bc31-148f25494d39",
- "metadata": {},
- "outputs": [],
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2025-04-03T13:46:42.356187Z",
+ "start_time": "2025-04-03T13:46:42.348793Z"
+ }
+ },
"source": [
"exo_df[\"Stellar_Mass_[Solar_mass]\"] = exo_df[\"Stellar_Mass_[Solar_mass]\"].replace(0, np.nan)\n",
"vars2log = [\n",
@@ -191,27 +209,107 @@
"]\n",
"for var in vars2log:\n",
" exo_df[var+\"_log\"] = np.log(exo_df[var])"
- ]
+ ],
+ "outputs": [],
+ "execution_count": 7
},
{
"cell_type": "code",
- "execution_count": 6,
"id": "90f427b0-10f8-4c94-83eb-ff6c19f93001",
- "metadata": {},
- "outputs": [],
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2025-04-03T13:46:42.404629Z",
+ "start_time": "2025-04-03T13:46:42.400155Z"
+ }
+ },
"source": [
"vars2keep = list(set(exo_df.columns) - set(vars2log))\n",
"exo_df = exo_df[vars2keep]"
- ]
+ ],
+ "outputs": [],
+ "execution_count": 8
},
{
"cell_type": "code",
- "execution_count": 7,
"id": "08416c2b-1db4-4ccd-9884-9e23b2671d25",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2025-04-03T13:46:42.464362Z",
+ "start_time": "2025-04-03T13:46:42.447896Z"
+ }
+ },
+ "source": [
+ "exo_df.head()"
+ ],
"outputs": [
{
"data": {
+ "text/plain": [
+ " Planet_Eccentricity Planet_Mass_Provenance Number_Stars \\\n",
+ "1 NaN Mass 1 \n",
+ "2 NaN Mass 1 \n",
+ "3 NaN Mass 1 \n",
+ "4 NaN Mass 1 \n",
+ "5 NaN Mass 1 \n",
+ "\n",
+ " Stellar_Metallicity_Radio Planet_Orbital_Period_[day]_log Discovery_Method \\\n",
+ "1 NaN NaN Imaging \n",
+ "2 NaN NaN Imaging \n",
+ "3 NaN NaN Imaging \n",
+ "4 NaN NaN Imaging \n",
+ "5 [Fe/H] NaN Imaging \n",
+ "\n",
+ " Planet_Radius_[Earth_radius]_log Solution_type \\\n",
+ "1 NaN Published Confirmed \n",
+ "2 NaN Published Confirmed \n",
+ "3 NaN Published Confirmed \n",
+ "4 2.925685 Published Confirmed \n",
+ "5 NaN Published Confirmed \n",
+ "\n",
+ " Stellar_Mass_[Solar_mass]_log System_Ks_Magnitude ... \\\n",
+ "1 -0.162519 8.916 ... \n",
+ "2 -0.162519 8.916 ... \n",
+ "3 -0.478036 8.916 ... \n",
+ "4 -0.162519 8.916 ... \n",
+ "5 -1.771957 10.386 ... \n",
+ "\n",
+ " Planet_Eq_Temperature_[K] Stellar_Effective_Temperature_[K]_log \\\n",
+ "1 1700.0 8.308938 \n",
+ "2 1700.0 8.308938 \n",
+ "3 1800.0 8.292298 \n",
+ "4 1800.0 8.308938 \n",
+ "5 1450.0 8.039157 \n",
+ "\n",
+ " Planet_Mass_[Earth_mass]_log System_Gaia_Magnitude \\\n",
+ "1 8.294050 12.0572 \n",
+ "2 8.006368 12.0572 \n",
+ "3 8.006368 12.0572 \n",
+ "4 7.841100 12.0572 \n",
+ "5 7.147806 14.3374 \n",
+ "\n",
+ " Planet_Orbital_SemiMajorAxis_[day]_log Number_Planets \\\n",
+ "1 5.799093 1 \n",
+ "2 5.799093 1 \n",
+ "3 5.799093 1 \n",
+ "4 5.799093 1 \n",
+ "5 4.770685 1 \n",
+ "\n",
+ " Stellar_Metallicity_[dex] Discovery_Year \\\n",
+ "1 NaN 2008 \n",
+ "2 NaN 2008 \n",
+ "3 NaN 2008 \n",
+ "4 NaN 2008 \n",
+ "5 0.01 2021 \n",
+ "\n",
+ " Stellar_Surface_Gravity_[log10(cm/s**2)] System_Distance_[pc] \n",
+ "1 NaN 139.135 \n",
+ "2 NaN 139.135 \n",
+ "3 4.0 139.135 \n",
+ "4 NaN 139.135 \n",
+ "5 4.5 128.484 \n",
+ "\n",
+ "[5 rows x 22 columns]"
+ ],
"text/html": [
"\n",
"