From 534e833c132626e4e6784b09e6a43f00d73cc1b1 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Mon, 15 Apr 2024 17:03:21 +0200 Subject: [PATCH 1/9] Try : beginning of the Ensemble_Classifier --- mapie/estimator/Ensemble_Classifier.py | 425 ++++++++++++++++++ .../Ensemble_Estimator_Classification.py | 36 ++ 2 files changed, 461 insertions(+) create mode 100644 mapie/estimator/Ensemble_Classifier.py create mode 100644 mapie/estimator/Ensemble_Estimator_Classification.py diff --git a/mapie/estimator/Ensemble_Classifier.py b/mapie/estimator/Ensemble_Classifier.py new file mode 100644 index 000000000..186a0c95f --- /dev/null +++ b/mapie/estimator/Ensemble_Classifier.py @@ -0,0 +1,425 @@ +###TEST#### + +from __future__ import annotations +import warnings + +from typing import Any, Iterable, List, Optional, Tuple, Union, cast + +import numpy as np +from joblib import Parallel, delayed +from sklearn.base import BaseEstimator, ClassifierMixin, clone +from sklearn.model_selection import BaseCrossValidator, ShuffleSplit +from sklearn.preprocessing import LabelEncoder, label_binarize +from sklearn.utils import _safe_indexing, check_random_state +from sklearn.utils.multiclass import (check_classification_targets, + type_of_target) +from sklearn.utils.validation import (_check_y, _num_samples, check_is_fitted, + indexable) +from mapie._typing import ArrayLike, NDArray +from mapie.aggregation_functions import aggregate_all, phi2D +from mapie.estimator.interface import EnsembleEstimator +from mapie.utils import (check_nan_in_aposteriori_prediction, check_no_agg_cv, + fit_estimator) + + +class EnsembleClassifier(EnsembleEstimator): + + raps_valid_cv_ = ["prefit", "split"] + valid_methods_ = [ + "naive", "score", "lac", "cumulated_score", "aps", "top_k", "raps" + ] + fit_attributes = [ + "single_estimator_", + "estimators_", + "k_", + "n_features_in_", + "conformity_scores_", + "classes_", + "label_encoder_" + ] + + #TODO : dans le paragraphe init, pas sûr de garder les "None" par défaut + def __init__( + self, + estimator: Optional[ClassifierMixin]= None, + method: str = "lac", + cv: Optional[Union[int, str, BaseCrossValidator]]= None, + agg_function: Optional[str] = None, + n_jobs: Optional[int] = None, + random_state: Optional[Union[int, np.random.RandomState]] = None, + test_size: Optional[Union[int, float]] = None, + verbose: int =0 + )--> None: + self.estimator = estimator + self.method = method + self.cv = cv + self.agg_function = agg_function # TODO : à voir si je garde l'argument (pas présent dans MapieClassifier) + self.n_jobs = n_jobs + self.random_state = random_state + self.test_size = test_size + self.verbose = verbose + + @staticmethod + def _fit_and_predict_oof_estimator( + self, + estimator: ClassifierMixin, + X: ArrayLike, + y: ArrayLike, + train_index: ArrayLike, + val_index: ArrayLike, + k:int, + sample_weight: Optional[ArrayLike] = None, + **fit_params, + ) -> Tuple[ClassifierMixin, NDArray, NDArray, ArrayLike]: + + X_train = _safe_indexing(X, train_index) + y_train = _safe_indexing(y, train_index) + X_val = _safe_indexing(X, val_index) + y_val = _safe_indexing(y, val_index) + #TODO : reprendre ici + if not (sample_weight is None): + sample_weight = _safe_indexing(sample_weight, train_index) + sample_weight = cast(NDArray, sample_weight) + + estimator = fit_estimator( + estimator, + X_train, + y_train, + sample_weight=sample_weight, + **fit_params + ) + return estimator + + @staticmethod + def _predict_oof_estimator( + estimator: RegressorMixin, + X: ArrayLike, + val_index: ArrayLike, + ) -> Tuple[NDArray, ArrayLike]: + """ + Perform predictions on a single out-of-fold model on a validation set. + + Parameters + ---------- + estimator: RegressorMixin + Estimator to train. + + X: ArrayLike of shape (n_samples, n_features) + Input data. + + val_index: ArrayLike of shape (n_samples_val) + Validation data indices. + + Returns + ------- + Tuple[NDArray, ArrayLike] + Predictions of estimator from val_index of X. + """ + X_val = _safe_indexing(X, val_index) + if _num_samples(X_val) > 0: + y_pred = estimator.predict(X_val) + else: + y_pred = np.array([]) + return y_pred, val_index + + def _aggregate_with_mask( + self, + x: NDArray, + k: NDArray + ) -> NDArray: + """ + Take the array of predictions, made by the refitted estimators, + on the testing set, and the 1-or-nan array indicating for each training + sample which one to integrate, and aggregate to produce phi-{t}(x_t) + for each training sample x_t. + + Parameters + ---------- + x: ArrayLike of shape (n_samples_test, n_estimators) + Array of predictions, made by the refitted estimators, + for each sample of the testing set. + + k: ArrayLike of shape (n_samples_training, n_estimators) + 1-or-nan array: indicates whether to integrate the prediction + of a given estimator into the aggregation, for each training + sample. + + Returns + ------- + ArrayLike of shape (n_samples_test,) + Array of aggregated predictions for each testing sample. + """ + if self.method in self.no_agg_methods_ or self.use_split_method_: + raise ValueError( + "There should not be aggregation of predictions " + f"if cv is in '{self.no_agg_cv_}', if cv >=2 " + f"or if method is in '{self.no_agg_methods_}'." + ) + elif self.agg_function == "median": + return phi2D(A=x, B=k, fun=lambda x: np.nanmedian(x, axis=1)) + # To aggregate with mean() the aggregation coud be done + # with phi2D(A=x, B=k, fun=lambda x: np.nanmean(x, axis=1). + # However, phi2D contains a np.apply_along_axis loop which + # is much slower than the matrices multiplication that can + # be used to compute the means. + elif self.agg_function in ["mean", None]: + K = np.nan_to_num(k, nan=0.0) + return np.matmul(x, (K / (K.sum(axis=1, keepdims=True))).T) + else: + raise ValueError("The value of self.agg_function is not correct") + + def _pred_multi(self, X: ArrayLike) -> NDArray: + """ + Return a prediction per train sample for each test sample, by + aggregation with matrix ``k_``. + + Parameters + ---------- + X: ArrayLike of shape (n_samples_test, n_features) + Input data + + Returns + ------- + NDArray of shape (n_samples_test, n_samples_train) + """ + y_pred_multi = np.column_stack( + [e.predict(X) for e in self.estimators_] + ) + # At this point, y_pred_multi is of shape + # (n_samples_test, n_estimators_). The method + # ``_aggregate_with_mask`` fits it to the right size + # thanks to the shape of k_. + y_pred_multi = self._aggregate_with_mask(y_pred_multi, self.k_) + return y_pred_multi + + def predict_calib( + self, + X: ArrayLike, + y: Optional[ArrayLike] = None, + groups: Optional[ArrayLike] = None + ) -> NDArray: + """ + Perform predictions on X : the calibration set. + + Parameters + ---------- + X: ArrayLike of shape (n_samples_test, n_features) + Input data + + y: Optional[ArrayLike] of shape (n_samples_test,) + Input labels. + + By default ``None``. + + groups: Optional[ArrayLike] of shape (n_samples_test,) + Group labels for the samples used while splitting the dataset into + train/test set. + + By default ``None``. + + Returns + ------- + NDArray of shape (n_samples_test, 1) + The predictions. + """ + check_is_fitted(self, self.fit_attributes) + + if self.cv == "prefit": + y_pred = self.single_estimator_.predict(X) + else: + if self.method == "naive": + y_pred = self.single_estimator_.predict(X) + else: + cv = cast(BaseCrossValidator, self.cv) + outputs = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( + delayed(self._predict_oof_estimator)( + estimator, X, calib_index, + ) + for (_, calib_index), estimator in zip( + cv.split(X, y, groups), + self.estimators_ + ) + ) + predictions, indices = map( + list, zip(*outputs) + ) + n_samples = _num_samples(X) + pred_matrix = np.full( + shape=(n_samples, cv.get_n_splits(X, y, groups)), + fill_value=np.nan, + dtype=float, + ) + for i, ind in enumerate(indices): + pred_matrix[ind, i] = np.array( + predictions[i], dtype=float + ) + self.k_[ind, i] = 1 + check_nan_in_aposteriori_prediction(pred_matrix) + + y_pred = aggregate_all(self.agg_function, pred_matrix) + + return y_pred + + def fit( + self, + X: ArrayLike, + y: ArrayLike, + sample_weight: Optional[ArrayLike] = None, + groups: Optional[ArrayLike] = None, + **fit_params, + ) -> EnsembleRegressor: + """ + Fit the base estimator under the ``single_estimator_`` attribute. + Fit all cross-validated estimator clones + and rearrange them into a list, the ``estimators_`` attribute. + Out-of-fold conformity scores are stored under + the ``conformity_scores_`` attribute. + + Parameters + ---------- + X: ArrayLike of shape (n_samples, n_features) + Input data. + + y: ArrayLike of shape (n_samples,) + Input labels. + + sample_weight: Optional[ArrayLike] of shape (n_samples,) + Sample weights. If None, then samples are equally weighted. + + By default ``None``. + + groups: Optional[ArrayLike] of shape (n_samples,) + Group labels for the samples used while splitting the dataset into + train/test set. + + By default ``None``. + + **fit_params : dict + Additional fit parameters. + + Returns + ------- + EnsembleRegressor + The estimator fitted. + """ + # Initialization + single_estimator_: RegressorMixin + estimators_: List[RegressorMixin] = [] + full_indexes = np.arange(_num_samples(X)) + cv = self.cv + self.use_split_method_ = check_no_agg_cv(X, self.cv, self.no_agg_cv_) + estimator = self.estimator + n_samples = _num_samples(y) + + # Computation + if cv == "prefit": + single_estimator_ = estimator + self.k_ = np.full( + shape=(n_samples, 1), fill_value=np.nan, dtype=float + ) + else: + single_estimator_ = self._fit_oof_estimator( + clone(estimator), + X, + y, + full_indexes, + sample_weight, + **fit_params + ) + cv = cast(BaseCrossValidator, cv) + self.k_ = np.full( + shape=(n_samples, cv.get_n_splits(X, y, groups)), + fill_value=np.nan, + dtype=float, + ) + if self.method == "naive": + estimators_ = [single_estimator_] + else: + estimators_ = Parallel(self.n_jobs, verbose=self.verbose)( + delayed(self._fit_oof_estimator)( + clone(estimator), + X, + y, + train_index, + sample_weight, + **fit_params + ) + for train_index, _ in cv.split(X, y, groups) + ) + # In split-CP, we keep only the model fitted on train dataset + if self.use_split_method_: + single_estimator_ = estimators_[0] + + self.single_estimator_ = single_estimator_ + self.estimators_ = estimators_ + + return self + + def predict( + self, + X: ArrayLike, + ensemble: bool = False, + return_multi_pred: bool = True + ) -> Union[NDArray, Tuple[NDArray, NDArray, NDArray]]: + """ + Predict target from X. It also computes the prediction per train sample + for each test sample according to ``self.method``. + + Parameters + ---------- + X: ArrayLike of shape (n_samples, n_features) + Test data. + + ensemble: bool + Boolean determining whether the predictions are ensembled or not. + If ``False``, predictions are those of the model trained on the + whole training set. + If ``True``, predictions from perturbed models are aggregated by + the aggregation function specified in the ``agg_function`` + attribute. + + If ``cv`` is ``"prefit"`` or ``"split"``, ``ensemble`` is ignored. + + By default ``False``. + + return_multi_pred: bool + If ``True`` the method returns the predictions and the multiple + predictions (3 arrays). If ``False`` the method return the + simple predictions only. + + Returns + ------- + Tuple[NDArray, NDArray, NDArray] + - Predictions + - The multiple predictions for the lower bound of the intervals. + - The multiple predictions for the upper bound of the intervals. + """ + check_is_fitted(self, self.fit_attributes) + + y_pred = self.single_estimator_.predict(X) + if not return_multi_pred and not ensemble: + return y_pred + + if self.method in self.no_agg_methods_ or self.use_split_method_: + y_pred_multi_low = y_pred[:, np.newaxis] + y_pred_multi_up = y_pred[:, np.newaxis] + else: + y_pred_multi = self._pred_multi(X) + + if self.method == "minmax": + y_pred_multi_low = np.min(y_pred_multi, axis=1, keepdims=True) + y_pred_multi_up = np.max(y_pred_multi, axis=1, keepdims=True) + elif self.method == "plus": + y_pred_multi_low = y_pred_multi + y_pred_multi_up = y_pred_multi + else: + y_pred_multi_low = y_pred[:, np.newaxis] + y_pred_multi_up = y_pred[:, np.newaxis] + + if ensemble: + y_pred = aggregate_all(self.agg_function, y_pred_multi) + + if return_multi_pred: + return y_pred, y_pred_multi_low, y_pred_multi_up + else: + return y_pred diff --git a/mapie/estimator/Ensemble_Estimator_Classification.py b/mapie/estimator/Ensemble_Estimator_Classification.py new file mode 100644 index 000000000..5a4d99578 --- /dev/null +++ b/mapie/estimator/Ensemble_Estimator_Classification.py @@ -0,0 +1,36 @@ +##TEST### + +from __future__ import annotations + +from abc import ABCMeta, abstractmethod +from typing import Optional, Tuple, Union + +from sklearn.base import BaseEstimator, ClassifierMixin + +from mapie._typing import ArrayLike, NDArray + + +class EnsembleEstimator(ClassifierMixin, BaseEstimator): + """ + This class implements methods to handle the training and usage of the + estimator. This estimator can be unique or composed by cross validated + estimators. + """ + + @abstractmethod + def fit( + self, + X: ArrayLike, + y: ArrayLike, + sample_weight: Optional[ArrayLike] = None, + groups: Optional[ArrayLike] = None, + **fit_params + ) -> EnsembleEstimator: + + @abstractmethod + def predict( + self, + X: ArrayLike, + ensemble: bool = False, + return_multi_pred: bool = True + ) -> Union[NDArray, Tuple[NDArray, NDArray, NDArray]]: From 98309b680cd0514f90027380a942687d46cc76aa Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Tue, 16 Apr 2024 15:09:15 +0200 Subject: [PATCH 2/9] TRY - Further exploration of the Ensemble Classifier --- mapie/estimator/Ensemble_Classifier.py | 541 +++++++++++++++++-------- 1 file changed, 382 insertions(+), 159 deletions(-) diff --git a/mapie/estimator/Ensemble_Classifier.py b/mapie/estimator/Ensemble_Classifier.py index 186a0c95f..a4f895521 100644 --- a/mapie/estimator/Ensemble_Classifier.py +++ b/mapie/estimator/Ensemble_Classifier.py @@ -15,11 +15,17 @@ type_of_target) from sklearn.utils.validation import (_check_y, _num_samples, check_is_fitted, indexable) +from mapie._machine_precision import EPSILON from mapie._typing import ArrayLike, NDArray from mapie.aggregation_functions import aggregate_all, phi2D +from mapie.metrics import classification_mean_width_score from mapie.estimator.interface import EnsembleEstimator from mapie.utils import (check_nan_in_aposteriori_prediction, check_no_agg_cv, fit_estimator) +from mapie.utils import (check_alpha, check_alpha_and_n_samples, check_cv, + check_estimator_classification, check_n_features_in, + check_n_jobs, check_null_weight, check_verbose, + compute_quantiles, fit_estimator, fix_number_of_classes) class EnsembleClassifier(EnsembleEstimator): @@ -38,7 +44,7 @@ class EnsembleClassifier(EnsembleEstimator): "label_encoder_" ] - #TODO : dans le paragraphe init, pas sûr de garder les "None" par défaut + #TODO : dans le paragraphe init, pas sûr de garder les "None" par défaut présent dans MapieClassifier def __init__( self, estimator: Optional[ClassifierMixin]= None, @@ -53,7 +59,6 @@ def __init__( self.estimator = estimator self.method = method self.cv = cv - self.agg_function = agg_function # TODO : à voir si je garde l'argument (pas présent dans MapieClassifier) self.n_jobs = n_jobs self.random_state = random_state self.test_size = test_size @@ -76,51 +81,55 @@ def _fit_and_predict_oof_estimator( y_train = _safe_indexing(y, train_index) X_val = _safe_indexing(X, val_index) y_val = _safe_indexing(y, val_index) - #TODO : reprendre ici - if not (sample_weight is None): - sample_weight = _safe_indexing(sample_weight, train_index) - sample_weight = cast(NDArray, sample_weight) - - estimator = fit_estimator( - estimator, - X_train, - y_train, - sample_weight=sample_weight, - **fit_params - ) - return estimator + + if sample_weight is None: + estimator = fit_estimator( + estimator, X_train, y_train, **fit_params + ) + else: + sample_weight_train = _safe_indexing(sample_weight, train_index) + estimator = fit_estimator( + estimator, X_train, y_train, sample_weight_train, **fit_params + ) + if _num_samples(X_val) > 0: + y_pred_proba = self._predict_oof_model(estimator, X_val) + else: + y_pred_proba = np.array([]) + val_id = np.full_like(y_val, k, dtype=int) + return estimator, y_pred_proba, val_id, val_index @staticmethod - def _predict_oof_estimator( - estimator: RegressorMixin, + def _predict_oof_model( + self, + estimator: ClassifierMixin, X: ArrayLike, - val_index: ArrayLike, - ) -> Tuple[NDArray, ArrayLike]: + ) -> NDArray: """ - Perform predictions on a single out-of-fold model on a validation set. + Predict probabilities of a test set from a fitted estimator. Parameters ---------- - estimator: RegressorMixin - Estimator to train. + estimator: ClassifierMixin + Fitted estimator. - X: ArrayLike of shape (n_samples, n_features) - Input data. - - val_index: ArrayLike of shape (n_samples_val) - Validation data indices. + X: ArrayLike + Test set. Returns ------- - Tuple[NDArray, ArrayLike] - Predictions of estimator from val_index of X. + ArrayLike + Predicted probabilities. """ - X_val = _safe_indexing(X, val_index) - if _num_samples(X_val) > 0: - y_pred = estimator.predict(X_val) - else: - y_pred = np.array([]) - return y_pred, val_index + y_pred_proba = estimator.predict_proba(X) + # we enforce y_pred_proba to contain all labels included in y + if len(estimator.classes_) != self.n_classes_: + y_pred_proba = fix_number_of_classes( + self.n_classes_, + estimator.classes_, + y_pred_proba + ) + y_pred_proba = self._check_proba_normalized(y_pred_proba) + return y_pred_proba def _aggregate_with_mask( self, @@ -265,161 +274,375 @@ def fit( X: ArrayLike, y: ArrayLike, sample_weight: Optional[ArrayLike] = None, + size_raps: Optional[float] = .2, groups: Optional[ArrayLike] = None, **fit_params, - ) -> EnsembleRegressor: - """ - Fit the base estimator under the ``single_estimator_`` attribute. - Fit all cross-validated estimator clones - and rearrange them into a list, the ``estimators_`` attribute. - Out-of-fold conformity scores are stored under - the ``conformity_scores_`` attribute. + ) -> EnsembleClassifier: + # Checks - Parameters - ---------- - X: ArrayLike of shape (n_samples, n_features) - Input data. + self._check_parameters() + cv = check_cv( + self.cv, test_size=self.test_size, random_state=self.random_state + ) + X, y = indexable(X, y) + y = _check_y(y) - y: ArrayLike of shape (n_samples,) - Input labels. + sample_weight = cast(Optional[NDArray], sample_weight) + groups = cast(Optional[NDArray], groups) + sample_weight, X, y = check_null_weight(sample_weight, X, y) - sample_weight: Optional[ArrayLike] of shape (n_samples,) - Sample weights. If None, then samples are equally weighted. + y = cast(NDArray, y) - By default ``None``. + estimator = check_estimator_classification( + X, + y, + cv, + self.estimator + ) + self.n_features_in_ = check_n_features_in(X, cv, estimator) - groups: Optional[ArrayLike] of shape (n_samples,) - Group labels for the samples used while splitting the dataset into - train/test set. + n_samples = _num_samples(y) - By default ``None``. + self.n_classes_, self.classes_ = self._get_classes_info( + estimator, y + ) + enc = LabelEncoder() + enc.fit(self.classes_) + y_enc = enc.transform(y) - **fit_params : dict - Additional fit parameters. + self.label_encoder_ = enc + self._check_target(y) - Returns - ------- - EnsembleRegressor - The estimator fitted. - """ # Initialization - single_estimator_: RegressorMixin - estimators_: List[RegressorMixin] = [] - full_indexes = np.arange(_num_samples(X)) - cv = self.cv - self.use_split_method_ = check_no_agg_cv(X, self.cv, self.no_agg_cv_) - estimator = self.estimator - n_samples = _num_samples(y) + self.estimators_: List[ClassifierMixin] = [] + self.k_ = np.empty_like(y, dtype=int) + self.n_samples_ = _num_samples(X) - # Computation - if cv == "prefit": - single_estimator_ = estimator - self.k_ = np.full( - shape=(n_samples, 1), fill_value=np.nan, dtype=float + if self.method == "raps": + raps_split = ShuffleSplit( + 1, test_size=size_raps, random_state=self.random_state ) - else: - single_estimator_ = self._fit_oof_estimator( - clone(estimator), - X, - y, - full_indexes, - sample_weight, - **fit_params + train_raps_index, val_raps_index = next(raps_split.split(X)) + X, self.X_raps, y_enc, self.y_raps = \ + _safe_indexing(X, train_raps_index), \ + _safe_indexing(X, val_raps_index), \ + _safe_indexing(y_enc, train_raps_index), \ + _safe_indexing(y_enc, val_raps_index) + self.y_raps_no_enc = self.label_encoder_.inverse_transform( + self.y_raps ) + y = self.label_encoder_.inverse_transform(y_enc) + y_enc = cast(NDArray, y_enc) + n_samples = _num_samples(y_enc) + if sample_weight is not None: + sample_weight = sample_weight[train_raps_index] + sample_weight = cast(NDArray, sample_weight) + if groups is not None: + groups = groups[train_raps_index] + groups = cast(NDArray, groups) + + # Work + if cv == "prefit": + self.single_estimator_ = estimator + y_pred_proba = self.single_estimator_.predict_proba(X) + y_pred_proba = self._check_proba_normalized(y_pred_proba) + + else: cv = cast(BaseCrossValidator, cv) - self.k_ = np.full( - shape=(n_samples, cv.get_n_splits(X, y, groups)), - fill_value=np.nan, - dtype=float, + self.single_estimator_ = fit_estimator( + clone(estimator), X, y, sample_weight, **fit_params ) - if self.method == "naive": - estimators_ = [single_estimator_] - else: - estimators_ = Parallel(self.n_jobs, verbose=self.verbose)( - delayed(self._fit_oof_estimator)( - clone(estimator), - X, - y, - train_index, - sample_weight, - **fit_params - ) - for train_index, _ in cv.split(X, y, groups) + y_pred_proba = np.empty( + (n_samples, self.n_classes_), + dtype=float + ) + outputs = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( + delayed(self._fit_and_predict_oof_model)( + clone(estimator), + X, + y, + train_index, + val_index, + k, + sample_weight, + **fit_params, + ) + for k, (train_index, val_index) in enumerate( + cv.split(X, y_enc, groups) ) - # In split-CP, we keep only the model fitted on train dataset - if self.use_split_method_: - single_estimator_ = estimators_[0] + ) + ( + self.estimators_, + predictions_list, + val_ids_list, + val_indices_list + ) = map(list, zip(*outputs)) + predictions = np.concatenate( + cast(List[NDArray], predictions_list) + ) + val_ids = np.concatenate(cast(List[NDArray], val_ids_list)) + val_indices = np.concatenate( + cast(List[NDArray], val_indices_list) + ) + self.k_[val_indices] = val_ids + y_pred_proba[val_indices] = predictions + + if isinstance(cv, ShuffleSplit): + # Should delete values indices that + # are not used during calibration + self.k_ = self.k_[val_indices] + y_pred_proba = y_pred_proba[val_indices] + y_enc = y_enc[val_indices] + y = cast(NDArray, y)[val_indices] + + # RAPS: compute y_pred and position on the RAPS validation dataset + if self.method == "raps": + self.y_pred_proba_raps = self.single_estimator_.predict_proba( + self.X_raps + ) + self.position_raps = self._get_true_label_position( + self.y_pred_proba_raps, + self.y_raps + ) + + # Conformity scores + if self.method == "naive": + self.conformity_scores_ = np.empty( + y_pred_proba.shape, + dtype="float" + ) + elif self.method in ["score", "lac"]: + self.conformity_scores_ = np.take_along_axis( + 1 - y_pred_proba, y_enc.reshape(-1, 1), axis=1 + ) + elif self.method in ["cumulated_score", "aps", "raps"]: + self.conformity_scores_, self.cutoff = ( + self._get_true_label_cumsum_proba( + y, + y_pred_proba + ) + ) + y_proba_true = np.take_along_axis( + y_pred_proba, y_enc.reshape(-1, 1), axis=1 + ) + random_state = check_random_state(self.random_state) + u = random_state.uniform(size=len(y_pred_proba)).reshape(-1, 1) + self.conformity_scores_ -= u * y_proba_true + elif self.method == "top_k": + # Here we reorder the labels by decreasing probability + # and get the position of each label from decreasing + # probability + self.conformity_scores_ = self._get_true_label_position( + y_pred_proba, + y_enc + ) + else: + raise ValueError( + "Invalid method. " + f"Allowed values are {self.valid_methods_}." + ) - self.single_estimator_ = single_estimator_ - self.estimators_ = estimators_ + if isinstance(cv, ShuffleSplit): + self.single_estimator_ = self.estimators_[0] return self def predict( self, X: ArrayLike, - ensemble: bool = False, - return_multi_pred: bool = True - ) -> Union[NDArray, Tuple[NDArray, NDArray, NDArray]]: - """ - Predict target from X. It also computes the prediction per train sample - for each test sample according to ``self.method``. - - Parameters - ---------- - X: ArrayLike of shape (n_samples, n_features) - Test data. - - ensemble: bool - Boolean determining whether the predictions are ensembled or not. - If ``False``, predictions are those of the model trained on the - whole training set. - If ``True``, predictions from perturbed models are aggregated by - the aggregation function specified in the ``agg_function`` - attribute. - - If ``cv`` is ``"prefit"`` or ``"split"``, ``ensemble`` is ignored. + alpha: Optional[Union[float, Iterable[float]]] = None, + include_last_label: Optional[Union[bool, str]] = True, + agg_scores: Optional[str] = "mean" + ) -> Union[NDArray, Tuple[NDArray, NDArray]]: + + if self.method == "top_k": + agg_scores = "mean" + # Checks + cv = check_cv( + self.cv, test_size=self.test_size, random_state=self.random_state + ) + include_last_label = self._check_include_last_label(include_last_label) + alpha = cast(Optional[NDArray], check_alpha(alpha)) + check_is_fitted(self, self.fit_attributes) + lambda_star, k_star = None, None + # Estimate prediction sets + y_pred = self.single_estimator_.predict(X) - By default ``False``. + if alpha is None: + return y_pred - return_multi_pred: bool - If ``True`` the method returns the predictions and the multiple - predictions (3 arrays). If ``False`` the method return the - simple predictions only. + n = len(self.conformity_scores_) - Returns - ------- - Tuple[NDArray, NDArray, NDArray] - - Predictions - - The multiple predictions for the lower bound of the intervals. - - The multiple predictions for the upper bound of the intervals. - """ - check_is_fitted(self, self.fit_attributes) + # Estimate of probabilities from estimator(s) + # In all cases: len(y_pred_proba.shape) == 3 + # with (n_test, n_classes, n_alpha or n_train_samples) + alpha_np = cast(NDArray, alpha) + check_alpha_and_n_samples(alpha_np, n) + if cv == "prefit": + y_pred_proba = self.single_estimator_.predict_proba(X) + y_pred_proba = np.repeat( + y_pred_proba[:, :, np.newaxis], len(alpha_np), axis=2 + ) + else: + y_pred_proba_k = np.asarray( + Parallel( + n_jobs=self.n_jobs, verbose=self.verbose + )( + delayed(self._predict_oof_model)(estimator, X) + for estimator in self.estimators_ + ) + ) + if agg_scores == "crossval": + y_pred_proba = np.moveaxis(y_pred_proba_k[self.k_], 0, 2) + elif agg_scores == "mean": + y_pred_proba = np.mean(y_pred_proba_k, axis=0) + y_pred_proba = np.repeat( + y_pred_proba[:, :, np.newaxis], len(alpha_np), axis=2 + ) + else: + raise ValueError("Invalid 'agg_scores' argument.") + # Check that sum of probas is equal to 1 + y_pred_proba = self._check_proba_normalized(y_pred_proba, axis=1) - y_pred = self.single_estimator_.predict(X) - if not return_multi_pred and not ensemble: - return y_pred + # Choice of the quantile + check_alpha_and_n_samples(alpha_np, n) - if self.method in self.no_agg_methods_ or self.use_split_method_: - y_pred_multi_low = y_pred[:, np.newaxis] - y_pred_multi_up = y_pred[:, np.newaxis] + if self.method == "naive": + self.quantiles_ = 1 - alpha_np else: - y_pred_multi = self._pred_multi(X) - - if self.method == "minmax": - y_pred_multi_low = np.min(y_pred_multi, axis=1, keepdims=True) - y_pred_multi_up = np.max(y_pred_multi, axis=1, keepdims=True) - elif self.method == "plus": - y_pred_multi_low = y_pred_multi - y_pred_multi_up = y_pred_multi + if (cv == "prefit") or (agg_scores in ["mean"]): + if self.method == "raps": + check_alpha_and_n_samples(alpha_np, len(self.X_raps)) + k_star = compute_quantiles( + self.position_raps, + alpha_np + ) + 1 + y_pred_proba_raps = np.repeat( + self.y_pred_proba_raps[:, :, np.newaxis], + len(alpha_np), + axis=2 + ) + lambda_star = self._find_lambda_star( + y_pred_proba_raps, + alpha_np, + include_last_label, + k_star + ) + self.conformity_scores_regularized = ( + self._regularize_conformity_score( + k_star, + lambda_star, + self.conformity_scores_, + self.cutoff + ) + ) + self.quantiles_ = compute_quantiles( + self.conformity_scores_regularized, + alpha_np + ) + else: + self.quantiles_ = compute_quantiles( + self.conformity_scores_, + alpha_np + ) else: - y_pred_multi_low = y_pred[:, np.newaxis] - y_pred_multi_up = y_pred[:, np.newaxis] + self.quantiles_ = (n + 1) * (1 - alpha_np) - if ensemble: - y_pred = aggregate_all(self.agg_function, y_pred_multi) + # Build prediction sets + if self.method in ["score", "lac"]: + if (cv == "prefit") or (agg_scores == "mean"): + prediction_sets = np.greater_equal( + y_pred_proba - (1 - self.quantiles_), -EPSILON + ) + else: + y_pred_included = np.less_equal( + (1 - y_pred_proba) - self.conformity_scores_.ravel(), + EPSILON + ).sum(axis=2) + prediction_sets = np.stack( + [ + np.greater_equal( + y_pred_included - _alpha * (n - 1), -EPSILON + ) + for _alpha in alpha_np + ], axis=2 + ) - if return_multi_pred: - return y_pred, y_pred_multi_low, y_pred_multi_up + elif self.method in ["naive", "cumulated_score", "aps", "raps"]: + # specify which thresholds will be used + if (cv == "prefit") or (agg_scores in ["mean"]): + thresholds = self.quantiles_ + else: + thresholds = self.conformity_scores_.ravel() + # sort labels by decreasing probability + y_pred_proba_cumsum, y_pred_index_last, y_pred_proba_last = ( + self._get_last_included_proba( + y_pred_proba, + thresholds, + include_last_label, + lambda_star, + k_star, + ) + ) + # get the prediction set by taking all probabilities + # above the last one + if (cv == "prefit") or (agg_scores in ["mean"]): + y_pred_included = np.greater_equal( + y_pred_proba - y_pred_proba_last, -EPSILON + ) + else: + y_pred_included = np.less_equal( + y_pred_proba - y_pred_proba_last, EPSILON + ) + # remove last label randomly + if include_last_label == "randomized": + y_pred_included = self._add_random_tie_breaking( + y_pred_included, + y_pred_index_last, + y_pred_proba_cumsum, + y_pred_proba_last, + thresholds, + lambda_star, + k_star + ) + if (cv == "prefit") or (agg_scores in ["mean"]): + prediction_sets = y_pred_included + else: + # compute the number of times the inequality is verified + prediction_sets_summed = y_pred_included.sum(axis=2) + prediction_sets = np.less_equal( + prediction_sets_summed[:, :, np.newaxis] + - self.quantiles_[np.newaxis, np.newaxis, :], + EPSILON + ) + elif self.method == "top_k": + y_pred_proba = y_pred_proba[:, :, 0] + index_sorted = np.fliplr(np.argsort(y_pred_proba, axis=1)) + y_pred_index_last = np.stack( + [ + index_sorted[:, quantile] + for quantile in self.quantiles_ + ], axis=1 + ) + y_pred_proba_last = np.stack( + [ + np.take_along_axis( + y_pred_proba, + y_pred_index_last[:, iq].reshape(-1, 1), + axis=1 + ) + for iq, _ in enumerate(self.quantiles_) + ], axis=2 + ) + prediction_sets = np.greater_equal( + y_pred_proba[:, :, np.newaxis] + - y_pred_proba_last, + -EPSILON + ) else: - return y_pred + raise ValueError( + "Invalid method. " + f"Allowed values are {self.valid_methods_}." + ) + return y_pred, prediction_sets + From d27fc907d684598ee506b1e003af3bdc03794059 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Thu, 18 Apr 2024 10:52:01 +0200 Subject: [PATCH 3/9] ENH: add global check fit parameters in classif --- mapie/classification.py | 77 ++++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 31 deletions(-) diff --git a/mapie/classification.py b/mapie/classification.py index b636bd6ab..d55b64716 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -1047,6 +1047,46 @@ def _get_classes_info( return n_classes, classes + def _check_fit_parameter(self, X, y, sample_weight, groups): + self._check_parameters() + cv = check_cv( + self.cv, test_size=self.test_size, random_state=self.random_state + ) + X, y = indexable(X, y) + y = _check_y(y) + + sample_weight = cast(Optional[NDArray], sample_weight) + groups = cast(Optional[NDArray], groups) + sample_weight, X, y = check_null_weight(sample_weight, X, y) + + y = cast(NDArray, y) + + estimator = check_estimator_classification( + X, + y, + cv, + self.estimator + ) + self.n_features_in_ = check_n_features_in(X, cv, estimator) + + n_samples = _num_samples(y) + + self.n_classes_, self.classes_ = self._get_classes_info( + estimator, y + ) + enc = LabelEncoder() + enc.fit(self.classes_) + y_enc = enc.transform(y) + + self.label_encoder_ = enc + self._check_target(y) + + return ( + estimator, cv, X, y, y_enc, + sample_weight, groups, + n_samples + ) + def fit( self, X: ArrayLike, @@ -1097,38 +1137,13 @@ def fit( The model itself. """ # Checks - self._check_parameters() - cv = check_cv( - self.cv, test_size=self.test_size, random_state=self.random_state + ( + estimator, cv, X, y, y_enc, + sample_weight, groups, + n_samples + ) = self._check_fit_parameter( + X, y, sample_weight, groups ) - X, y = indexable(X, y) - y = _check_y(y) - - sample_weight = cast(Optional[NDArray], sample_weight) - groups = cast(Optional[NDArray], groups) - sample_weight, X, y = check_null_weight(sample_weight, X, y) - - y = cast(NDArray, y) - - estimator = check_estimator_classification( - X, - y, - cv, - self.estimator - ) - self.n_features_in_ = check_n_features_in(X, cv, estimator) - - n_samples = _num_samples(y) - - self.n_classes_, self.classes_ = self._get_classes_info( - estimator, y - ) - enc = LabelEncoder() - enc.fit(self.classes_) - y_enc = enc.transform(y) - - self.label_encoder_ = enc - self._check_target(y) # Initialization self.estimators_: List[ClassifierMixin] = [] From aec9ed509548023a67745dc27e5569f021c36d8f Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Thu, 18 Apr 2024 11:07:55 +0200 Subject: [PATCH 4/9] ENH: move RAPS split data in class method --- mapie/classification.py | 51 +++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/mapie/classification.py b/mapie/classification.py index d55b64716..7c65caafc 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -1087,6 +1087,31 @@ def _check_fit_parameter(self, X, y, sample_weight, groups): n_samples ) + def _split_raps_data(self, X, y_enc, sample_weight, groups, size_raps): + raps_split = ShuffleSplit( + 1, test_size=size_raps, random_state=self.random_state + ) + train_raps_index, val_raps_index = next(raps_split.split(X)) + X, self.X_raps, y_enc, self.y_raps = \ + _safe_indexing(X, train_raps_index), \ + _safe_indexing(X, val_raps_index), \ + _safe_indexing(y_enc, train_raps_index), \ + _safe_indexing(y_enc, val_raps_index) + self.y_raps_no_enc = self.label_encoder_.inverse_transform( + self.y_raps + ) + y = self.label_encoder_.inverse_transform(y_enc) + y_enc = cast(NDArray, y_enc) + n_samples = _num_samples(y_enc) + if sample_weight is not None: + sample_weight = sample_weight[train_raps_index] + sample_weight = cast(NDArray, sample_weight) + if groups is not None: + groups = groups[train_raps_index] + groups = cast(NDArray, groups) + + return X, y_enc, y, n_samples, sample_weight, groups + def fit( self, X: ArrayLike, @@ -1151,27 +1176,13 @@ def fit( self.n_samples_ = _num_samples(X) if self.method == "raps": - raps_split = ShuffleSplit( - 1, test_size=size_raps, random_state=self.random_state - ) - train_raps_index, val_raps_index = next(raps_split.split(X)) - X, self.X_raps, y_enc, self.y_raps = \ - _safe_indexing(X, train_raps_index), \ - _safe_indexing(X, val_raps_index), \ - _safe_indexing(y_enc, train_raps_index), \ - _safe_indexing(y_enc, val_raps_index) - self.y_raps_no_enc = self.label_encoder_.inverse_transform( - self.y_raps + ( + X, y_enc, y, n_samples, + sample_weight, groups + ) = self._split_raps_data( + X, y_enc, sample_weight, + groups, size_raps ) - y = self.label_encoder_.inverse_transform(y_enc) - y_enc = cast(NDArray, y_enc) - n_samples = _num_samples(y_enc) - if sample_weight is not None: - sample_weight = sample_weight[train_raps_index] - sample_weight = cast(NDArray, sample_weight) - if groups is not None: - groups = groups[train_raps_index] - groups = cast(NDArray, groups) # Work if cv == "prefit": From c71db010084c0efe875bf5f11a863c403c949a38 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Thu, 18 Apr 2024 11:59:39 +0200 Subject: [PATCH 5/9] ADD: initialize EnsembleClassfier --- mapie/estimator/estimator.py | 551 ++++++++++++++++++++++++++++++++++- 1 file changed, 550 insertions(+), 1 deletion(-) diff --git a/mapie/estimator/estimator.py b/mapie/estimator/estimator.py index b8c7d4ecf..47a7f48e1 100644 --- a/mapie/estimator/estimator.py +++ b/mapie/estimator/estimator.py @@ -4,7 +4,7 @@ import numpy as np from joblib import Parallel, delayed -from sklearn.base import RegressorMixin, clone +from sklearn.base import ClassifierMixin, RegressorMixin, clone from sklearn.model_selection import BaseCrossValidator from sklearn.utils import _safe_indexing from sklearn.utils.validation import _num_samples, check_is_fitted @@ -561,3 +561,552 @@ def predict( return y_pred, y_pred_multi_low, y_pred_multi_up else: return y_pred + + +class EnsembleClassifier(EnsembleEstimator): + """ + This class implements methods to handle the training and usage of the + estimator. This estimator can be unique or composed by cross validated + estimators. + + Parameters + ---------- + estimator: Optional[RegressorMixin] + Any regressor with scikit-learn API + (i.e. with ``fit`` and ``predict`` methods). + If ``None``, estimator defaults to a ``LinearRegression`` instance. + + By default ``None``. + + method: str + Method to choose for prediction interval estimates. + Choose among: + + - ``"naive"``, based on training set conformity scores, + - ``"base"``, based on validation sets conformity scores, + - ``"plus"``, based on validation conformity scores and + testing predictions, + - ``"minmax"``, based on validation conformity scores and + testing predictions (min/max among cross-validation clones). + + By default ``"plus"``. + + cv: Optional[Union[int, str, BaseCrossValidator]] + The cross-validation strategy for computing conformity scores. + It directly drives the distinction between jackknife and cv variants. + Choose among: + + - ``None``, to use the default 5-fold cross-validation + - integer, to specify the number of folds. + If equal to ``-1``, equivalent to + ``sklearn.model_selection.LeaveOneOut()``. + - CV splitter: any ``sklearn.model_selection.BaseCrossValidator`` + Main variants are: + - ``sklearn.model_selection.LeaveOneOut`` (jackknife), + - ``sklearn.model_selection.KFold`` (cross-validation), + - ``subsample.Subsample`` object (bootstrap). + - ``"split"``, does not involve cross-validation but a division + of the data into training and calibration subsets. The splitter + used is the following: ``sklearn.model_selection.ShuffleSplit``. + - ``"prefit"``, assumes that ``estimator`` has been fitted already, + and the ``method`` parameter is ignored. + All data provided in the ``fit`` method is then used + for computing conformity scores only. + At prediction time, quantiles of these conformity scores are used + to provide a prediction interval with fixed width. + The user has to take care manually that data for model fitting and + conformity scores estimate are disjoint. + + By default ``None``. + + test_size: Optional[Union[int, float]] + If ``float``, should be between ``0.0`` and ``1.0`` and represent the + proportion of the dataset to include in the test split. If ``int``, + represents the absolute number of test samples. If ``None``, + it will be set to ``0.1``. + + If cv is not ``"split"``, ``test_size`` is ignored. + + By default ``None``. + + n_jobs: Optional[int] + Number of jobs for parallel processing using joblib + via the "locky" backend. + If ``-1`` all CPUs are used. + If ``1`` is given, no parallel computing code is used at all, + which is useful for debugging. + For ``n_jobs`` below ``-1``, ``(n_cpus + 1 - n_jobs)`` are used. + ``None`` is a marker for `unset` that will be interpreted as + ``n_jobs=1`` (sequential execution). + + By default ``None``. + + agg_function: Optional[str] + Determines how to aggregate predictions from perturbed models, both at + training and prediction time. + + If ``None``, it is ignored except if ``cv`` class is ``Subsample``, + in which case an error is raised. + If ``"mean"`` or ``"median"``, returns the mean or median of the + predictions computed from the out-of-folds models. + Note: if you plan to set the ``ensemble`` argument to ``True`` in the + ``predict`` method, you have to specify an aggregation function. + Otherwise an error would be raised. + + The Jackknife+ interval can be interpreted as an interval around the + median prediction, and is guaranteed to lie inside the interval, + unlike the single estimator predictions. + + When the cross-validation strategy is ``Subsample`` (i.e. for the + Jackknife+-after-Bootstrap method), this function is also used to + aggregate the training set in-sample predictions. + + If ``cv`` is ``"prefit"`` or ``"split"``, ``agg_function`` is ignored. + + By default ``"mean"``. + + verbose: int + The verbosity level, used with joblib for multiprocessing. + The frequency of the messages increases with the verbosity level. + If it more than ``10``, all iterations are reported. + Above ``50``, the output is sent to stdout. + + By default ``0``. + + random_state: Optional[Union[int, RandomState]] + Pseudo random number generator state used for random sampling. + Pass an int for reproducible output across multiple function calls. + + By default ``None``. + + Attributes + ---------- + single_estimator_: sklearn.RegressorMixin + Estimator fitted on the whole training set. + + estimators_: list + List of out-of-folds estimators. + + k_: ArrayLike + - Array of nans, of shape (len(y), 1) if ``cv`` is ``"prefit"`` + (defined but not used) + - Dummy array of folds containing each training sample, otherwise. + Of shape (n_samples_train, cv.get_n_splits(X_train, y_train)). + """ + no_agg_cv_ = ["prefit", "split"] + no_agg_methods_ = ["naive", "base"] + fit_attributes = [ + "single_estimator_", + "estimators_", + "k_", + "use_split_method_", + ] + + def __init__( + self, + estimator: Optional[RegressorMixin], + method: str, + cv: Optional[Union[int, str, BaseCrossValidator]], + agg_function: Optional[str], + n_jobs: Optional[int], + random_state: Optional[Union[int, np.random.RandomState]], + test_size: Optional[Union[int, float]], + verbose: int + ): + self.estimator = estimator + self.method = method + self.cv = cv + self.agg_function = agg_function + self.n_jobs = n_jobs + self.random_state = random_state + self.test_size = test_size + self.verbose = verbose + + @staticmethod + def _fit_oof_estimator( + estimator: RegressorMixin, + X: ArrayLike, + y: ArrayLike, + train_index: ArrayLike, + sample_weight: Optional[ArrayLike] = None, + **fit_params, + ) -> RegressorMixin: + """ + Fit a single out-of-fold model on a given training set. + + Parameters + ---------- + estimator: RegressorMixin + Estimator to train. + + X: ArrayLike of shape (n_samples, n_features) + Input data. + + y: ArrayLike of shape (n_samples,) + Input labels. + + train_index: ArrayLike of shape (n_samples_train) + Training data indices. + + sample_weight: Optional[ArrayLike] of shape (n_samples,) + Sample weights. If None, then samples are equally weighted. + By default ``None``. + + **fit_params : dict + Additional fit parameters. + + Returns + ------- + RegressorMixin + Fitted estimator. + """ + X_train = _safe_indexing(X, train_index) + y_train = _safe_indexing(y, train_index) + if not (sample_weight is None): + sample_weight = _safe_indexing(sample_weight, train_index) + sample_weight = cast(NDArray, sample_weight) + + estimator = fit_estimator( + estimator, + X_train, + y_train, + sample_weight=sample_weight, + **fit_params + ) + return estimator + + @staticmethod + def _predict_oof_estimator( + estimator: RegressorMixin, + X: ArrayLike, + val_index: ArrayLike, + ) -> Tuple[NDArray, ArrayLike]: + """ + Perform predictions on a single out-of-fold model on a validation set. + + Parameters + ---------- + estimator: RegressorMixin + Estimator to train. + + X: ArrayLike of shape (n_samples, n_features) + Input data. + + val_index: ArrayLike of shape (n_samples_val) + Validation data indices. + + Returns + ------- + Tuple[NDArray, ArrayLike] + Predictions of estimator from val_index of X. + """ + X_val = _safe_indexing(X, val_index) + if _num_samples(X_val) > 0: + y_pred = estimator.predict(X_val) + else: + y_pred = np.array([]) + return y_pred, val_index + + def _aggregate_with_mask( + self, + x: NDArray, + k: NDArray + ) -> NDArray: + """ + Take the array of predictions, made by the refitted estimators, + on the testing set, and the 1-or-nan array indicating for each training + sample which one to integrate, and aggregate to produce phi-{t}(x_t) + for each training sample x_t. + + Parameters + ---------- + x: ArrayLike of shape (n_samples_test, n_estimators) + Array of predictions, made by the refitted estimators, + for each sample of the testing set. + + k: ArrayLike of shape (n_samples_training, n_estimators) + 1-or-nan array: indicates whether to integrate the prediction + of a given estimator into the aggregation, for each training + sample. + + Returns + ------- + ArrayLike of shape (n_samples_test,) + Array of aggregated predictions for each testing sample. + """ + if self.method in self.no_agg_methods_ or self.use_split_method_: + raise ValueError( + "There should not be aggregation of predictions " + f"if cv is in '{self.no_agg_cv_}', if cv >=2 " + f"or if method is in '{self.no_agg_methods_}'." + ) + elif self.agg_function == "median": + return phi2D(A=x, B=k, fun=lambda x: np.nanmedian(x, axis=1)) + # To aggregate with mean() the aggregation coud be done + # with phi2D(A=x, B=k, fun=lambda x: np.nanmean(x, axis=1). + # However, phi2D contains a np.apply_along_axis loop which + # is much slower than the matrices multiplication that can + # be used to compute the means. + elif self.agg_function in ["mean", None]: + K = np.nan_to_num(k, nan=0.0) + return np.matmul(x, (K / (K.sum(axis=1, keepdims=True))).T) + else: + raise ValueError("The value of self.agg_function is not correct") + + def _pred_multi(self, X: ArrayLike) -> NDArray: + """ + Return a prediction per train sample for each test sample, by + aggregation with matrix ``k_``. + + Parameters + ---------- + X: ArrayLike of shape (n_samples_test, n_features) + Input data + + Returns + ------- + NDArray of shape (n_samples_test, n_samples_train) + """ + y_pred_multi = np.column_stack( + [e.predict(X) for e in self.estimators_] + ) + # At this point, y_pred_multi is of shape + # (n_samples_test, n_estimators_). The method + # ``_aggregate_with_mask`` fits it to the right size + # thanks to the shape of k_. + y_pred_multi = self._aggregate_with_mask(y_pred_multi, self.k_) + return y_pred_multi + + def predict_calib( + self, + X: ArrayLike, + y: Optional[ArrayLike] = None, + groups: Optional[ArrayLike] = None + ) -> NDArray: + """ + Perform predictions on X : the calibration set. + + Parameters + ---------- + X: ArrayLike of shape (n_samples_test, n_features) + Input data + + y: Optional[ArrayLike] of shape (n_samples_test,) + Input labels. + + By default ``None``. + + groups: Optional[ArrayLike] of shape (n_samples_test,) + Group labels for the samples used while splitting the dataset into + train/test set. + + By default ``None``. + + Returns + ------- + NDArray of shape (n_samples_test, 1) + The predictions. + """ + check_is_fitted(self, self.fit_attributes) + + if self.cv == "prefit": + y_pred = self.single_estimator_.predict(X) + else: + if self.method == "naive": + y_pred = self.single_estimator_.predict(X) + else: + cv = cast(BaseCrossValidator, self.cv) + outputs = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( + delayed(self._predict_oof_estimator)( + estimator, X, calib_index, + ) + for (_, calib_index), estimator in zip( + cv.split(X, y, groups), + self.estimators_ + ) + ) + predictions, indices = map( + list, zip(*outputs) + ) + n_samples = _num_samples(X) + pred_matrix = np.full( + shape=(n_samples, cv.get_n_splits(X, y, groups)), + fill_value=np.nan, + dtype=float, + ) + for i, ind in enumerate(indices): + pred_matrix[ind, i] = np.array( + predictions[i], dtype=float + ) + self.k_[ind, i] = 1 + check_nan_in_aposteriori_prediction(pred_matrix) + + y_pred = aggregate_all(self.agg_function, pred_matrix) + + return y_pred + + def fit( + self, + X: ArrayLike, + y: ArrayLike, + sample_weight: Optional[ArrayLike] = None, + groups: Optional[ArrayLike] = None, + **fit_params, + ) -> EnsembleRegressor: + """ + Fit the base estimator under the ``single_estimator_`` attribute. + Fit all cross-validated estimator clones + and rearrange them into a list, the ``estimators_`` attribute. + Out-of-fold conformity scores are stored under + the ``conformity_scores_`` attribute. + + Parameters + ---------- + X: ArrayLike of shape (n_samples, n_features) + Input data. + + y: ArrayLike of shape (n_samples,) + Input labels. + + sample_weight: Optional[ArrayLike] of shape (n_samples,) + Sample weights. If None, then samples are equally weighted. + + By default ``None``. + + groups: Optional[ArrayLike] of shape (n_samples,) + Group labels for the samples used while splitting the dataset into + train/test set. + + By default ``None``. + + **fit_params : dict + Additional fit parameters. + + Returns + ------- + EnsembleRegressor + The estimator fitted. + """ + # Initialization + single_estimator_: RegressorMixin + estimators_: List[RegressorMixin] = [] + full_indexes = np.arange(_num_samples(X)) + cv = self.cv + self.use_split_method_ = check_no_agg_cv(X, self.cv, self.no_agg_cv_) + estimator = self.estimator + n_samples = _num_samples(y) + + # Computation + if cv == "prefit": + single_estimator_ = estimator + self.k_ = np.full( + shape=(n_samples, 1), fill_value=np.nan, dtype=float + ) + else: + single_estimator_ = self._fit_oof_estimator( + clone(estimator), + X, + y, + full_indexes, + sample_weight, + **fit_params + ) + cv = cast(BaseCrossValidator, cv) + self.k_ = np.full( + shape=(n_samples, cv.get_n_splits(X, y, groups)), + fill_value=np.nan, + dtype=float, + ) + if self.method == "naive": + estimators_ = [single_estimator_] + else: + estimators_ = Parallel(self.n_jobs, verbose=self.verbose)( + delayed(self._fit_oof_estimator)( + clone(estimator), + X, + y, + train_index, + sample_weight, + **fit_params + ) + for train_index, _ in cv.split(X, y, groups) + ) + # In split-CP, we keep only the model fitted on train dataset + if self.use_split_method_: + single_estimator_ = estimators_[0] + + self.single_estimator_ = single_estimator_ + self.estimators_ = estimators_ + + return self + + def predict( + self, + X: ArrayLike, + ensemble: bool = False, + return_multi_pred: bool = True + ) -> Union[NDArray, Tuple[NDArray, NDArray, NDArray]]: + """ + Predict target from X. It also computes the prediction per train sample + for each test sample according to ``self.method``. + + Parameters + ---------- + X: ArrayLike of shape (n_samples, n_features) + Test data. + + ensemble: bool + Boolean determining whether the predictions are ensembled or not. + If ``False``, predictions are those of the model trained on the + whole training set. + If ``True``, predictions from perturbed models are aggregated by + the aggregation function specified in the ``agg_function`` + attribute. + + If ``cv`` is ``"prefit"`` or ``"split"``, ``ensemble`` is ignored. + + By default ``False``. + + return_multi_pred: bool + If ``True`` the method returns the predictions and the multiple + predictions (3 arrays). If ``False`` the method return the + simple predictions only. + + Returns + ------- + Tuple[NDArray, NDArray, NDArray] + - Predictions + - The multiple predictions for the lower bound of the intervals. + - The multiple predictions for the upper bound of the intervals. + """ + check_is_fitted(self, self.fit_attributes) + + y_pred = self.single_estimator_.predict(X) + if not return_multi_pred and not ensemble: + return y_pred + + if self.method in self.no_agg_methods_ or self.use_split_method_: + y_pred_multi_low = y_pred[:, np.newaxis] + y_pred_multi_up = y_pred[:, np.newaxis] + else: + y_pred_multi = self._pred_multi(X) + + if self.method == "minmax": + y_pred_multi_low = np.min(y_pred_multi, axis=1, keepdims=True) + y_pred_multi_up = np.max(y_pred_multi, axis=1, keepdims=True) + elif self.method == "plus": + y_pred_multi_low = y_pred_multi + y_pred_multi_up = y_pred_multi + else: + y_pred_multi_low = y_pred[:, np.newaxis] + y_pred_multi_up = y_pred[:, np.newaxis] + + if ensemble: + y_pred = aggregate_all(self.agg_function, y_pred_multi) + + if return_multi_pred: + return y_pred, y_pred_multi_low, y_pred_multi_up + else: + return y_pred + + From c9575ed1eca35cb74df09c64d575918bcf5925e6 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Thu, 18 Apr 2024 18:29:19 +0200 Subject: [PATCH 6/9] WIP: global architecture of EnsembleClassifier --- mapie/classification.py | 186 +++--------------------- mapie/estimator/estimator.py | 273 +++++++++++++++-------------------- 2 files changed, 143 insertions(+), 316 deletions(-) diff --git a/mapie/classification.py b/mapie/classification.py index 7c65caafc..0c5536175 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -1,11 +1,10 @@ from __future__ import annotations import warnings -from typing import Any, Iterable, List, Optional, Tuple, Union, cast +from typing import Any, Iterable, Optional, Tuple, Union, cast import numpy as np -from joblib import Parallel, delayed -from sklearn.base import BaseEstimator, ClassifierMixin, clone +from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.model_selection import BaseCrossValidator, ShuffleSplit from sklearn.preprocessing import LabelEncoder, label_binarize from sklearn.utils import _safe_indexing, check_random_state @@ -16,11 +15,12 @@ from ._machine_precision import EPSILON from ._typing import ArrayLike, NDArray +from .estimator.estimator import EnsembleClassifier from .metrics import classification_mean_width_score from .utils import (check_alpha, check_alpha_and_n_samples, check_cv, check_estimator_classification, check_n_features_in, check_n_jobs, check_null_weight, check_verbose, - compute_quantiles, fit_estimator, fix_number_of_classes) + compute_quantiles, fix_number_of_classes) class MapieClassifier(BaseEstimator, ClassifierMixin): @@ -190,9 +190,6 @@ class MapieClassifier(BaseEstimator, ClassifierMixin): "naive", "score", "lac", "cumulated_score", "aps", "top_k", "raps" ] fit_attributes = [ - "single_estimator_", - "estimators_", - "k_", "n_features_in_", "conformity_scores_", "classes_", @@ -577,81 +574,6 @@ def _predict_oof_model( y_pred_proba = self._check_proba_normalized(y_pred_proba) return y_pred_proba - def _fit_and_predict_oof_model( - self, - estimator: ClassifierMixin, - X: ArrayLike, - y: ArrayLike, - train_index: ArrayLike, - val_index: ArrayLike, - k: int, - sample_weight: Optional[ArrayLike] = None, - **fit_params, - ) -> Tuple[ClassifierMixin, NDArray, NDArray, ArrayLike]: - """ - Fit a single out-of-fold model on a given training set and - perform predictions on a test set. - - Parameters - ---------- - estimator: ClassifierMixin - Estimator to train. - - X: ArrayLike of shape (n_samples, n_features) - Input data. - - y: ArrayLike of shape (n_samples,) - Input labels. - - train_index: np.ndarray of shape (n_samples_train) - Training data indices. - - val_index: np.ndarray of shape (n_samples_val) - Validation data indices. - - k: int - Split identification number. - - sample_weight: Optional[ArrayLike] of shape (n_samples,) - Sample weights. If None, then samples are equally weighted. - By default None. - - **fit_params : dict - Additional fit parameters. - - Returns - ------- - Tuple[ClassifierMixin, NDArray, NDArray, ArrayLike] - - - [0]: ClassifierMixin, fitted estimator - - [1]: NDArray of shape (n_samples_val,), - Estimator predictions on the validation fold, - - [2]: NDArray of shape (n_samples_val,) - Identification number of the validation fold, - - [3]: ArrayLike of shape (n_samples_val,) - Validation data indices - """ - X_train = _safe_indexing(X, train_index) - y_train = _safe_indexing(y, train_index) - X_val = _safe_indexing(X, val_index) - y_val = _safe_indexing(y, val_index) - - if sample_weight is None: - estimator = fit_estimator( - estimator, X_train, y_train, **fit_params - ) - else: - sample_weight_train = _safe_indexing(sample_weight, train_index) - estimator = fit_estimator( - estimator, X_train, y_train, sample_weight_train, **fit_params - ) - if _num_samples(X_val) > 0: - y_pred_proba = self._predict_oof_model(estimator, X_val) - else: - y_pred_proba = np.array([]) - val_id = np.full_like(y_val, k, dtype=int) - return estimator, y_pred_proba, val_id, val_index - def _get_true_label_cumsum_proba( self, y: ArrayLike, @@ -1170,8 +1092,6 @@ def fit( X, y, sample_weight, groups ) - # Initialization - self.estimators_: List[ClassifierMixin] = [] self.k_ = np.empty_like(y, dtype=int) self.n_samples_ = _num_samples(X) @@ -1185,62 +1105,24 @@ def fit( ) # Work - if cv == "prefit": - self.single_estimator_ = estimator - y_pred_proba = self.single_estimator_.predict_proba(X) - y_pred_proba = self._check_proba_normalized(y_pred_proba) - - else: - cv = cast(BaseCrossValidator, cv) - self.single_estimator_ = fit_estimator( - clone(estimator), X, y, sample_weight, **fit_params - ) - y_pred_proba = np.empty( - (n_samples, self.n_classes_), - dtype=float - ) - outputs = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( - delayed(self._fit_and_predict_oof_model)( - clone(estimator), - X, - y, - train_index, - val_index, - k, - sample_weight, - **fit_params, - ) - for k, (train_index, val_index) in enumerate( - cv.split(X, y_enc, groups) - ) - ) - ( - self.estimators_, - predictions_list, - val_ids_list, - val_indices_list - ) = map(list, zip(*outputs)) - predictions = np.concatenate( - cast(List[NDArray], predictions_list) - ) - val_ids = np.concatenate(cast(List[NDArray], val_ids_list)) - val_indices = np.concatenate( - cast(List[NDArray], val_indices_list) - ) - self.k_[val_indices] = val_ids - y_pred_proba[val_indices] = predictions + self.estimator_ = EnsembleClassifier( + estimator, + self.n_classes_, + cv, + self.n_jobs, + self.random_state, + self.test_size, + self.verbose + ) - if isinstance(cv, ShuffleSplit): - # Should delete values indices that - # are not used during calibration - self.k_ = self.k_[val_indices] - y_pred_proba = y_pred_proba[val_indices] - y_enc = y_enc[val_indices] - y = cast(NDArray, y)[val_indices] + self.estimator_.fit(X, y, y_enc, sample_weight, groups, **fit_params) + y_pred_proba, y, y_enc = self.estimator_.predict_proba_calib( + X, y, y_enc, groups + ) # RAPS: compute y_pred and position on the RAPS validation dataset if self.method == "raps": - self.y_pred_proba_raps = self.single_estimator_.predict_proba( + self.y_pred_proba_raps = self.estimator_.single_estimator_.predict_proba( self.X_raps ) self.position_raps = self._get_true_label_position( @@ -1285,9 +1167,6 @@ def fit( f"Allowed values are {self.valid_methods_}." ) - if isinstance(cv, ShuffleSplit): - self.single_estimator_ = self.estimators_[0] - return self def predict( @@ -1373,7 +1252,7 @@ def predict( check_is_fitted(self, self.fit_attributes) lambda_star, k_star = None, None # Estimate prediction sets - y_pred = self.single_estimator_.predict(X) + y_pred = self.estimator_.single_estimator_.predict(X) if alpha is None: return y_pred @@ -1385,31 +1264,14 @@ def predict( # with (n_test, n_classes, n_alpha or n_train_samples) alpha_np = cast(NDArray, alpha) check_alpha_and_n_samples(alpha_np, n) - if cv == "prefit": - y_pred_proba = self.single_estimator_.predict_proba(X) + y_pred_proba = self.estimator_.predict( + X, agg_scores + ) + y_pred_proba = self._check_proba_normalized(y_pred_proba, axis=1) + if (cv == "prefit") or (agg_scores in ["mean"]): y_pred_proba = np.repeat( y_pred_proba[:, :, np.newaxis], len(alpha_np), axis=2 ) - else: - y_pred_proba_k = np.asarray( - Parallel( - n_jobs=self.n_jobs, verbose=self.verbose - )( - delayed(self._predict_oof_model)(estimator, X) - for estimator in self.estimators_ - ) - ) - if agg_scores == "crossval": - y_pred_proba = np.moveaxis(y_pred_proba_k[self.k_], 0, 2) - elif agg_scores == "mean": - y_pred_proba = np.mean(y_pred_proba_k, axis=0) - y_pred_proba = np.repeat( - y_pred_proba[:, :, np.newaxis], len(alpha_np), axis=2 - ) - else: - raise ValueError("Invalid 'agg_scores' argument.") - # Check that sum of probas is equal to 1 - y_pred_proba = self._check_proba_normalized(y_pred_proba, axis=1) # Choice of the quantile check_alpha_and_n_samples(alpha_np, n) diff --git a/mapie/estimator/estimator.py b/mapie/estimator/estimator.py index 47a7f48e1..64ce1ac10 100644 --- a/mapie/estimator/estimator.py +++ b/mapie/estimator/estimator.py @@ -5,7 +5,7 @@ import numpy as np from joblib import Parallel, delayed from sklearn.base import ClassifierMixin, RegressorMixin, clone -from sklearn.model_selection import BaseCrossValidator +from sklearn.model_selection import BaseCrossValidator, ShuffleSplit from sklearn.utils import _safe_indexing from sklearn.utils.validation import _num_samples, check_is_fitted @@ -13,7 +13,7 @@ from mapie.aggregation_functions import aggregate_all, phi2D from mapie.estimator.interface import EnsembleEstimator from mapie.utils import (check_nan_in_aposteriori_prediction, check_no_agg_cv, - fit_estimator) + fit_estimator, fix_number_of_classes) class EnsembleRegressor(EnsembleEstimator): @@ -578,44 +578,27 @@ class EnsembleClassifier(EnsembleEstimator): By default ``None``. - method: str - Method to choose for prediction interval estimates. - Choose among: - - - ``"naive"``, based on training set conformity scores, - - ``"base"``, based on validation sets conformity scores, - - ``"plus"``, based on validation conformity scores and - testing predictions, - - ``"minmax"``, based on validation conformity scores and - testing predictions (min/max among cross-validation clones). - - By default ``"plus"``. - - cv: Optional[Union[int, str, BaseCrossValidator]] - The cross-validation strategy for computing conformity scores. + cv: Optional[str] + The cross-validation strategy for computing scores. It directly drives the distinction between jackknife and cv variants. Choose among: - ``None``, to use the default 5-fold cross-validation - integer, to specify the number of folds. - If equal to ``-1``, equivalent to + If equal to -1, equivalent to ``sklearn.model_selection.LeaveOneOut()``. - CV splitter: any ``sklearn.model_selection.BaseCrossValidator`` Main variants are: - - ``sklearn.model_selection.LeaveOneOut`` (jackknife), - - ``sklearn.model_selection.KFold`` (cross-validation), - - ``subsample.Subsample`` object (bootstrap). + - ``sklearn.model_selection.LeaveOneOut`` (jackknife), + - ``sklearn.model_selection.KFold`` (cross-validation) - ``"split"``, does not involve cross-validation but a division of the data into training and calibration subsets. The splitter used is the following: ``sklearn.model_selection.ShuffleSplit``. - - ``"prefit"``, assumes that ``estimator`` has been fitted already, - and the ``method`` parameter is ignored. + - ``"prefit"``, assumes that ``estimator`` has been fitted already. All data provided in the ``fit`` method is then used - for computing conformity scores only. - At prediction time, quantiles of these conformity scores are used - to provide a prediction interval with fixed width. - The user has to take care manually that data for model fitting and - conformity scores estimate are disjoint. + to calibrate the predictions through the score computation. + At prediction time, quantiles of these scores are used to estimate + prediction sets. By default ``None``. @@ -641,44 +624,22 @@ class EnsembleClassifier(EnsembleEstimator): By default ``None``. - agg_function: Optional[str] - Determines how to aggregate predictions from perturbed models, both at - training and prediction time. - - If ``None``, it is ignored except if ``cv`` class is ``Subsample``, - in which case an error is raised. - If ``"mean"`` or ``"median"``, returns the mean or median of the - predictions computed from the out-of-folds models. - Note: if you plan to set the ``ensemble`` argument to ``True`` in the - ``predict`` method, you have to specify an aggregation function. - Otherwise an error would be raised. - - The Jackknife+ interval can be interpreted as an interval around the - median prediction, and is guaranteed to lie inside the interval, - unlike the single estimator predictions. - - When the cross-validation strategy is ``Subsample`` (i.e. for the - Jackknife+-after-Bootstrap method), this function is also used to - aggregate the training set in-sample predictions. - - If ``cv`` is ``"prefit"`` or ``"split"``, ``agg_function`` is ignored. + random_state: Optional[Union[int, RandomState]] + Pseudo random number generator state used for random uniform sampling + for evaluation quantiles and prediction sets. + Pass an int for reproducible output across multiple function calls. - By default ``"mean"``. + By default ``None``. - verbose: int + verbose: int, optional The verbosity level, used with joblib for multiprocessing. + At this moment, parallel processing is disabled. The frequency of the messages increases with the verbosity level. If it more than ``10``, all iterations are reported. Above ``50``, the output is sent to stdout. By default ``0``. - random_state: Optional[Union[int, RandomState]] - Pseudo random number generator state used for random sampling. - Pass an int for reproducible output across multiple function calls. - - By default ``None``. - Attributes ---------- single_estimator_: sklearn.RegressorMixin @@ -694,7 +655,6 @@ class EnsembleClassifier(EnsembleEstimator): Of shape (n_samples_train, cv.get_n_splits(X_train, y_train)). """ no_agg_cv_ = ["prefit", "split"] - no_agg_methods_ = ["naive", "base"] fit_attributes = [ "single_estimator_", "estimators_", @@ -704,19 +664,17 @@ class EnsembleClassifier(EnsembleEstimator): def __init__( self, - estimator: Optional[RegressorMixin], - method: str, + estimator: Optional[ClassifierMixin], + n_classes: int, cv: Optional[Union[int, str, BaseCrossValidator]], - agg_function: Optional[str], n_jobs: Optional[int], random_state: Optional[Union[int, np.random.RandomState]], test_size: Optional[Union[int, float]], verbose: int ): self.estimator = estimator - self.method = method + self.n_classes = n_classes self.cv = cv - self.agg_function = agg_function self.n_jobs = n_jobs self.random_state = random_state self.test_size = test_size @@ -724,13 +682,13 @@ def __init__( @staticmethod def _fit_oof_estimator( - estimator: RegressorMixin, + estimator: ClassifierMixin, X: ArrayLike, y: ArrayLike, train_index: ArrayLike, sample_weight: Optional[ArrayLike] = None, **fit_params, - ) -> RegressorMixin: + ) -> ClassifierMixin: """ Fit a single out-of-fold model on a given training set. @@ -774,12 +732,23 @@ def _fit_oof_estimator( **fit_params ) return estimator + + def _predict_proba_oof_estimator(self, estimator, X): + y_pred_proba = estimator.predict_proba(X) + if len(estimator.classes_) != self.n_classes: + y_pred_proba = fix_number_of_classes( + self.n_classes, + estimator.classes_, + y_pred_proba + ) + return y_pred_proba - @staticmethod - def _predict_oof_estimator( - estimator: RegressorMixin, + def _predict_proba_calib_oof_estimator( + self, + estimator: ClassifierMixin, X: ArrayLike, val_index: ArrayLike, + k: int ) -> Tuple[NDArray, ArrayLike]: """ Perform predictions on a single out-of-fold model on a validation set. @@ -800,12 +769,16 @@ def _predict_oof_estimator( Tuple[NDArray, ArrayLike] Predictions of estimator from val_index of X. """ + X_val = _safe_indexing(X, val_index) if _num_samples(X_val) > 0: - y_pred = estimator.predict(X_val) + y_pred_proba = self._predict_proba_oof_estimator( + estimator, X_val + ) else: - y_pred = np.array([]) - return y_pred, val_index + y_pred_proba = np.array([]) + val_id = np.full(len(X_val), k, dtype=int) + return y_pred_proba, val_id, val_index def _aggregate_with_mask( self, @@ -877,10 +850,11 @@ def _pred_multi(self, X: ArrayLike) -> NDArray: y_pred_multi = self._aggregate_with_mask(y_pred_multi, self.k_) return y_pred_multi - def predict_calib( + def predict_proba_calib( self, X: ArrayLike, y: Optional[ArrayLike] = None, + y_enc=None, groups: Optional[ArrayLike] = None ) -> NDArray: """ @@ -910,45 +884,53 @@ def predict_calib( check_is_fitted(self, self.fit_attributes) if self.cv == "prefit": - y_pred = self.single_estimator_.predict(X) + y_pred_proba = self.single_estimator_.predict_proba(X) else: - if self.method == "naive": - y_pred = self.single_estimator_.predict(X) - else: - cv = cast(BaseCrossValidator, self.cv) - outputs = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( - delayed(self._predict_oof_estimator)( - estimator, X, calib_index, - ) - for (_, calib_index), estimator in zip( - cv.split(X, y, groups), - self.estimators_ - ) - ) - predictions, indices = map( - list, zip(*outputs) - ) - n_samples = _num_samples(X) - pred_matrix = np.full( - shape=(n_samples, cv.get_n_splits(X, y, groups)), - fill_value=np.nan, - dtype=float, + y_pred_proba = np.empty( + (len(X), self.n_classes), + dtype=float + ) + cv = cast(BaseCrossValidator, self.cv) + outputs = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( + delayed(self._predict_proba_calib_oof_estimator)( + estimator, X, calib_index, k ) - for i, ind in enumerate(indices): - pred_matrix[ind, i] = np.array( - predictions[i], dtype=float - ) - self.k_[ind, i] = 1 - check_nan_in_aposteriori_prediction(pred_matrix) + for k, ((_, calib_index), estimator) in enumerate(zip( + cv.split(X, y, groups), + self.estimators_ + )) + ) + ( + predictions_list, + val_ids_list, + val_indices_list + ) = map(list, zip(*outputs)) + + predictions = np.concatenate( + cast(List[NDArray], predictions_list) + ) + val_ids = np.concatenate(cast(List[NDArray], val_ids_list)) + val_indices = np.concatenate( + cast(List[NDArray], val_indices_list) + ) + self.k_[val_indices] = val_ids + y_pred_proba[val_indices] = predictions - y_pred = aggregate_all(self.agg_function, pred_matrix) + if isinstance(cv, ShuffleSplit): + # Should delete values indices that + # are not used during calibration + self.k_ = self.k_[val_indices] + y_pred_proba = y_pred_proba[val_indices] + y_enc = y_enc[val_indices] + y = cast(NDArray, y)[val_indices] - return y_pred + return y_pred_proba, y, y_enc def fit( self, X: ArrayLike, y: ArrayLike, + y_enc: ArrayLike, sample_weight: Optional[ArrayLike] = None, groups: Optional[ArrayLike] = None, **fit_params, @@ -988,8 +970,8 @@ def fit( The estimator fitted. """ # Initialization - single_estimator_: RegressorMixin - estimators_: List[RegressorMixin] = [] + single_estimator_: ClassifierMixin + estimators_: List[ClassifierMixin] = [] full_indexes = np.arange(_num_samples(X)) cv = self.cv self.use_split_method_ = check_no_agg_cv(X, self.cv, self.no_agg_cv_) @@ -1012,28 +994,22 @@ def fit( **fit_params ) cv = cast(BaseCrossValidator, cv) - self.k_ = np.full( - shape=(n_samples, cv.get_n_splits(X, y, groups)), - fill_value=np.nan, - dtype=float, - ) - if self.method == "naive": - estimators_ = [single_estimator_] - else: - estimators_ = Parallel(self.n_jobs, verbose=self.verbose)( - delayed(self._fit_oof_estimator)( - clone(estimator), - X, - y, - train_index, - sample_weight, - **fit_params - ) - for train_index, _ in cv.split(X, y, groups) + self.k_ = np.empty_like(y, dtype=int) + + estimators_ = Parallel(self.n_jobs, verbose=self.verbose)( + delayed(self._fit_oof_estimator)( + clone(estimator), + X, + y_enc, + train_index, + sample_weight, + **fit_params ) - # In split-CP, we keep only the model fitted on train dataset - if self.use_split_method_: - single_estimator_ = estimators_[0] + for train_index, _ in cv.split(X, y, groups) + ) + # In split-CP, we keep only the model fitted on train dataset + if self.use_split_method_: + single_estimator_ = estimators_[0] self.single_estimator_ = single_estimator_ self.estimators_ = estimators_ @@ -1043,8 +1019,7 @@ def fit( def predict( self, X: ArrayLike, - ensemble: bool = False, - return_multi_pred: bool = True + agg_scores ) -> Union[NDArray, Tuple[NDArray, NDArray, NDArray]]: """ Predict target from X. It also computes the prediction per train sample @@ -1081,32 +1056,22 @@ def predict( """ check_is_fitted(self, self.fit_attributes) - y_pred = self.single_estimator_.predict(X) - if not return_multi_pred and not ensemble: - return y_pred - - if self.method in self.no_agg_methods_ or self.use_split_method_: - y_pred_multi_low = y_pred[:, np.newaxis] - y_pred_multi_up = y_pred[:, np.newaxis] + if self.cv == "prefit": + y_pred_proba = self.single_estimator_.predict_proba(X) else: - y_pred_multi = self._pred_multi(X) - - if self.method == "minmax": - y_pred_multi_low = np.min(y_pred_multi, axis=1, keepdims=True) - y_pred_multi_up = np.max(y_pred_multi, axis=1, keepdims=True) - elif self.method == "plus": - y_pred_multi_low = y_pred_multi - y_pred_multi_up = y_pred_multi + y_pred_proba_k = np.asarray( + Parallel( + n_jobs=self.n_jobs, verbose=self.verbose + )( + delayed(self._predict_proba_oof_estimator)(estimator, X) + for estimator in self.estimators_ + ) + ) + if agg_scores == "crossval": + y_pred_proba = np.moveaxis(y_pred_proba_k[self.k_], 0, 2) + elif agg_scores == "mean": + y_pred_proba = np.mean(y_pred_proba_k, axis=0) else: - y_pred_multi_low = y_pred[:, np.newaxis] - y_pred_multi_up = y_pred[:, np.newaxis] - - if ensemble: - y_pred = aggregate_all(self.agg_function, y_pred_multi) - - if return_multi_pred: - return y_pred, y_pred_multi_low, y_pred_multi_up - else: - return y_pred - - + raise ValueError("Invalid 'agg_scores' argument.") + # y_pred_proba = self._check_proba_normalized(y_pred_proba, axis=1) + return y_pred_proba From 11b5a514653a183a921964ccb0dce740ddbc1d47 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Fri, 26 Apr 2024 18:41:20 +0200 Subject: [PATCH 7/9] add print in draft python files in order to understand MapieClassifier --- mapie/classification_draft.py | 1472 ++++++++++++++++++++++++++++ mapie/estimator/estimator_draft.py | 587 +++++++++++ 2 files changed, 2059 insertions(+) create mode 100644 mapie/classification_draft.py create mode 100644 mapie/estimator/estimator_draft.py diff --git a/mapie/classification_draft.py b/mapie/classification_draft.py new file mode 100644 index 000000000..7c9c11597 --- /dev/null +++ b/mapie/classification_draft.py @@ -0,0 +1,1472 @@ +from __future__ import annotations + +import warnings +from typing import Any, Iterable, Optional, Tuple, Union, cast + +import numpy as np +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.model_selection import BaseCrossValidator, ShuffleSplit +from sklearn.preprocessing import LabelEncoder, label_binarize +from sklearn.utils import _safe_indexing, check_random_state +from sklearn.utils.multiclass import (check_classification_targets, + type_of_target) +from sklearn.utils.validation import (_check_y, _num_samples, check_is_fitted, + indexable) + +from ._machine_precision import EPSILON +from ._typing import ArrayLike, NDArray +from .estimator.estimator_draft import EnsembleClassifier +from .metrics import classification_mean_width_score +from .utils import (check_alpha, check_alpha_and_n_samples, check_cv, + check_estimator_classification, check_n_features_in, + check_n_jobs, check_null_weight, check_verbose, + compute_quantiles, fix_number_of_classes) + + +class MapieClassifier(BaseEstimator, ClassifierMixin): + """ + Prediction sets for classification. + + This class implements several conformal prediction strategies for + estimating prediction sets for classification. Instead of giving a + single predicted label, the idea is to give a set of predicted labels + (or prediction sets) which come with mathematically guaranteed coverages. + + Parameters + ---------- + estimator: Optional[ClassifierMixin] + Any classifier with scikit-learn API + (i.e. with fit, predict, and predict_proba methods), by default None. + If ``None``, estimator defaults to a ``LogisticRegression`` instance. + + method: Optional[str] + Method to choose for prediction interval estimates. + Choose among: + + - ``"naive"``, sum of the probabilities until the 1-alpha thresold. + + - ``"lac"`` (formerly called ``"score"``), Least Ambiguous set-valued + Classifier. It is based on the the scores + (i.e. 1 minus the softmax score of the true label) + on the calibration set. See [1] for more details. + + - ``"aps"`` (formerly called "cumulated_score"), Adaptive Prediction + Sets method. It is based on the sum of the softmax outputs of the + labels until the true label is reached, on the calibration set. + See [2] for more details. + + - ``"raps"``, Regularized Adaptive Prediction Sets method. It uses the + same technique as ``"aps"`` method but with a penalty term + to reduce the size of prediction sets. See [3] for more + details. For now, this method only works with ``"prefit"`` and + ``"split"`` strategies. + + - ``"top_k"``, based on the sorted index of the probability of the true + label in the softmax outputs, on the calibration set. In case two + probabilities are equal, both are taken, thus, the size of some + prediction sets may be different from the others. See [3] for + more details. + + By default ``"lac"``. + + cv: Optional[str] + The cross-validation strategy for computing scores. + It directly drives the distinction between jackknife and cv variants. + Choose among: + + - ``None``, to use the default 5-fold cross-validation + - integer, to specify the number of folds. + If equal to -1, equivalent to + ``sklearn.model_selection.LeaveOneOut()``. + - CV splitter: any ``sklearn.model_selection.BaseCrossValidator`` + Main variants are: + - ``sklearn.model_selection.LeaveOneOut`` (jackknife), + - ``sklearn.model_selection.KFold`` (cross-validation) + - ``"split"``, does not involve cross-validation but a division + of the data into training and calibration subsets. The splitter + used is the following: ``sklearn.model_selection.ShuffleSplit``. + - ``"prefit"``, assumes that ``estimator`` has been fitted already. + All data provided in the ``fit`` method is then used + to calibrate the predictions through the score computation. + At prediction time, quantiles of these scores are used to estimate + prediction sets. + + By default ``None``. + + test_size: Optional[Union[int, float]] + If float, should be between 0.0 and 1.0 and represent the proportion + of the dataset to include in the test split. If int, represents the + absolute number of test samples. If None, it will be set to 0.1. + + If cv is not ``"split"``, ``test_size`` is ignored. + + By default ``None``. + + n_jobs: Optional[int] + Number of jobs for parallel processing using joblib + via the "locky" backend. + At this moment, parallel processing is disabled. + If ``-1`` all CPUs are used. + If ``1`` is given, no parallel computing code is used at all, + which is useful for debugging. + For n_jobs below ``-1``, ``(n_cpus + 1 + n_jobs)`` are used. + None is a marker for `unset` that will be interpreted as ``n_jobs=1`` + (sequential execution). + + By default ``None``. + + random_state: Optional[Union[int, RandomState]] + Pseudo random number generator state used for random uniform sampling + for evaluation quantiles and prediction sets. + Pass an int for reproducible output across multiple function calls. + + By default ``None``. + + verbose: int, optional + The verbosity level, used with joblib for multiprocessing. + At this moment, parallel processing is disabled. + The frequency of the messages increases with the verbosity level. + If it more than ``10``, all iterations are reported. + Above ``50``, the output is sent to stdout. + + By default ``0``. + + Attributes + ---------- + valid_methods: List[str] + List of all valid methods. + + single_estimator_: sklearn.ClassifierMixin + Estimator fitted on the whole training set. + + n_features_in_: int + Number of features passed to the fit method. + + conformity_scores_: ArrayLike of shape (n_samples_train) + The conformity scores used to calibrate the prediction sets. + + quantiles_: ArrayLike of shape (n_alpha) + The quantiles estimated from ``conformity_scores_`` and alpha values. + + References + ---------- + [1] Mauricio Sadinle, Jing Lei, and Larry Wasserman. + "Least Ambiguous Set-Valued Classifiers with Bounded Error Levels.", + Journal of the American Statistical Association, 114, 2019. + + [2] Yaniv Romano, Matteo Sesia and Emmanuel J. Candès. + "Classification with Valid and Adaptive Coverage." + NeurIPS 202 (spotlight) 2020. + + [3] Anastasios Nikolas Angelopoulos, Stephen Bates, Michael Jordan + and Jitendra Malik. + "Uncertainty Sets for Image Classifiers using Conformal Prediction." + International Conference on Learning Representations 2021. + + Examples + -------- + >>> import numpy as np + >>> from sklearn.naive_bayes import GaussianNB + >>> from mapie.classification import MapieClassifier + >>> X_toy = np.arange(9).reshape(-1, 1) + >>> y_toy = np.stack([0, 0, 1, 0, 1, 2, 1, 2, 2]) + >>> clf = GaussianNB().fit(X_toy, y_toy) + >>> mapie = MapieClassifier(estimator=clf, cv="prefit").fit(X_toy, y_toy) + >>> _, y_pi_mapie = mapie.predict(X_toy, alpha=0.2) + >>> print(y_pi_mapie[:, :, 0]) + [[ True False False] + [ True False False] + [ True True False] + [ True True False] + [False True False] + [False True True] + [False True True] + [False False True] + [False False True]] + """ + + raps_valid_cv_ = ["prefit", "split"] + valid_methods_ = [ + "naive", "score", "lac", "cumulated_score", "aps", "top_k", "raps" + ] + fit_attributes = [ + "n_features_in_", + "conformity_scores_", + "classes_", + "label_encoder_" + ] + + def __init__( + self, + estimator: Optional[ClassifierMixin] = None, + method: str = "lac", + cv: Optional[Union[int, str, BaseCrossValidator]] = None, + test_size: Optional[Union[int, float]] = None, + n_jobs: Optional[int] = None, + random_state: Optional[Union[int, np.random.RandomState]] = None, + verbose: int = 0 + ) -> None: + print() + print("use of _init") + self.estimator = estimator + print("estimator", estimator) + self.method = method + print("method:", method) + self.cv = cv + print("cv:", cv) + self.test_size = test_size + print("test_size:", test_size) + self.n_jobs = n_jobs + print("n_jobs:", n_jobs) + self.random_state = random_state + print("random_state:", random_state) + self.verbose = verbose + print("verbose:", verbose) + print() + + def _check_parameters(self) -> None: + """ + Perform several checks on input parameters. + + Raises + ------ + ValueError + If parameters are not valid. + """ + print() + print("use of _check_parameters") + if self.method not in self.valid_methods_: + raise ValueError( + "Invalid method. " + f"Allowed values are {self.valid_methods_}." + ) + check_n_jobs(self.n_jobs) + check_verbose(self.verbose) + check_random_state(self.random_state) + self._check_depreciated() + self._check_raps() + + def _check_depreciated(self) -> None: + """ + Check if the chosen method is outdated. + + Raises + ------ + Warning + If method is ``"score"`` (not ``"lac"``) or + if method is ``"cumulated_score"`` (not ``"aps"``). + """ + print() + print("use of _check_depreciated") + if self.method == "score": + warnings.warn( + "WARNING: Deprecated method. " + + "The method \"score\" is outdated. " + + "Prefer to use \"lac\" instead to keep " + + "the same behavior in the next release.", + DeprecationWarning + ) + if self.method == "cumulated_score": + warnings.warn( + "WARNING: Deprecated method. " + + "The method \"cumulated_score\" is outdated. " + + "Prefer to use \"aps\" instead to keep " + + "the same behavior in the next release.", + DeprecationWarning + ) + + def _check_target(self, y: ArrayLike) -> None: + """ + Check that if the type of target is binary, + (then the method have to be ``"lac"``), or multi-class. + + Parameters + ---------- + y: NDArray of shape (n_samples,) + Training labels. + + Raises + ------ + ValueError + If type of target is binary and method is not ``"lac"`` + or ``"score"`` or if type of target is not multi-class. + """ + print() + print("use of _check_target") + check_classification_targets(y) + if type_of_target(y) == "binary" and \ + self.method not in ["score", "lac"]: + raise ValueError( + "Invalid method for binary target. " + "Your target is not of type multiclass and " + "allowed values for binary type are " + f"{['score', 'lac']}." + ) + + def _check_raps(self): + """ + Check that if the method used is ``"raps"``, then + the cross validation strategy is ``"prefit"``. + + Raises + ------ + ValueError + If ``method`` is ``"raps"`` and ``cv`` is not ``"prefit"``. + """ + print() + print("use of _check_raps") + if (self.method == "raps") and ( + (self.cv not in self.raps_valid_cv_) + or isinstance(self.cv, ShuffleSplit) + ): + raise ValueError( + "RAPS method can only be used " + f"with cv in {self.raps_valid_cv_}." + ) + + def _check_include_last_label( + self, + include_last_label: Optional[Union[bool, str]] + ) -> Optional[Union[bool, str]]: + """ + Check if ``include_last_label`` is a boolean or a string. + Else raise error. + + Parameters + ---------- + include_last_label: Optional[Union[bool, str]] + Whether or not to include last label in + prediction sets for the ``"aps"`` method. Choose among: + + - ``False``, does not include label whose cumulated score is just + over the quantile. + - ``True``, includes label whose cumulated score is just over the + quantile, unless there is only one label in the prediction set. + - ``"randomized"``, randomly includes label whose cumulated score + is just over the quantile based on the comparison of a uniform + number and the difference between the cumulated score of the last + label and the quantile. + + Returns + ------- + Optional[Union[bool, str]] + + Raises + ------ + ValueError + "Invalid include_last_label argument. " + "Should be a boolean or 'randomized'." + """ + print() + print("use of _check_include_last_label") + if ( + (not isinstance(include_last_label, bool)) and + (not include_last_label == "randomized") + ): + raise ValueError( + "Invalid include_last_label argument. " + "Should be a boolean or 'randomized'." + ) + else: + return include_last_label + + def _check_proba_normalized( + self, + y_pred_proba: ArrayLike, + axis: int = 1 + ) -> NDArray: + """ + Check if, for all the observations, the sum of + the probabilities is equal to one. + + Parameters + ---------- + y_pred_proba: ArrayLike of shape + (n_samples, n_classes) or + (n_samples, n_train_samples, n_classes) + Softmax output of a model. + + Returns + ------- + ArrayLike of shape (n_samples, n_classes) + Softmax output of a model if the scores all sum + to one. + + Raises + ------ + ValueError + If the sum of the scores is not equal to one. + """ + print() + print("use of _check_proba_normalized") + np.testing.assert_allclose( + np.sum(y_pred_proba, axis=axis), + 1, + err_msg="The sum of the scores is not equal to one.", + rtol=1e-5 + ) + y_pred_proba = cast(NDArray, y_pred_proba).astype(np.float64) + return y_pred_proba + + def _get_last_index_included( + self, + y_pred_proba_cumsum: NDArray, + threshold: NDArray, + include_last_label: Optional[Union[bool, str]] + ) -> NDArray: + """ + Return the index of the last included sorted probability + depending if we included the first label over the quantile + or not. + + Parameters + ---------- + y_pred_proba_cumsum: NDArray of shape (n_samples, n_classes) + Cumsumed probabilities in the original order. + + threshold: NDArray of shape (n_alpha,) or shape (n_samples_train,) + Threshold to compare with y_proba_last_cumsum, can be either: + + - the quantiles associated with alpha values when + ``cv`` == "prefit", ``cv`` == "split" + or ``agg_scores`` is "mean" + - the conformity score from training samples otherwise + (i.e., when ``cv`` is a CV splitter and + ``agg_scores`` is "crossval") + + include_last_label: Union[bool, str] + Whether or not include the last label. If 'randomized', + the last label is included. + + Returns + ------- + NDArray of shape (n_samples, n_alpha) + Index of the last included sorted probability. + """ + print() + print("use of _get_last_index_included") + if ( + (include_last_label) or + (include_last_label == 'randomized') + ): + y_pred_index_last = ( + np.ma.masked_less( + y_pred_proba_cumsum + - threshold[np.newaxis, :], + -EPSILON + ).argmin(axis=1) + ) + elif (include_last_label is False): + max_threshold = np.maximum( + threshold[np.newaxis, :], + np.min(y_pred_proba_cumsum, axis=1) + ) + y_pred_index_last = np.argmax( + np.ma.masked_greater( + y_pred_proba_cumsum - max_threshold[:, np.newaxis, :], + EPSILON + ), axis=1 + ) + else: + raise ValueError( + "Invalid include_last_label argument. " + "Should be a boolean or 'randomized'." + ) + return y_pred_index_last[:, np.newaxis, :] + + def _add_random_tie_breaking( + self, + prediction_sets: NDArray, + y_pred_index_last: NDArray, + y_pred_proba_cumsum: NDArray, + y_pred_proba_last: NDArray, + threshold: NDArray, + lambda_star: Union[NDArray, float, None], + k_star: Union[NDArray, None] + ) -> NDArray: + """ + Randomly remove last label from prediction set based on the + comparison between a random number and the difference between + cumulated score of the last included label and the quantile. + + Parameters + ---------- + prediction_sets: NDArray of shape + (n_samples, n_classes, n_threshold) + Prediction set for each observation and each alpha. + + y_pred_index_last: NDArray of shape (n_samples, threshold) + Index of the last included label. + + y_pred_proba_cumsum: NDArray of shape (n_samples, n_classes) + Cumsumed probability of the model in the original order. + + y_pred_proba_last: NDArray of shape (n_samples, 1, threshold) + Last included probability. + + threshold: NDArray of shape (n_alpha,) or shape (n_samples_train,) + Threshold to compare with y_proba_last_cumsum, can be either: + + - the quantiles associated with alpha values when + ``cv`` == "prefit", ``cv`` == "split" or + ``agg_scores`` is "mean" + - the conformity score from training samples otherwise + (i.e., when ``cv`` is a CV splitter and + ``agg_scores`` is "crossval") + + lambda_star: Union[NDArray, float, None] of shape (n_alpha): + Optimal value of the regulizer lambda. + + k_star: Union[NDArray, None] of shape (n_alpha): + Optimal value of the regulizer k. + + Returns + ------- + NDArray of shape (n_samples, n_classes, n_alpha) + Updated version of prediction_sets with randomly removed + labels. + """ + print() + print("use of _add_random_tie_breaking") + # get cumsumed probabilities up to last retained label + y_proba_last_cumsumed = np.squeeze( + np.take_along_axis( + y_pred_proba_cumsum, + y_pred_index_last, + axis=1 + ), axis=1 + ) + + if self.method in ["cumulated_score", "aps"]: + # compute V parameter from Romano+(2020) + vs = ( + (y_proba_last_cumsumed - threshold.reshape(1, -1)) / + y_pred_proba_last[:, 0, :] + ) + else: + # compute V parameter from Angelopoulos+(2020) + L = np.sum(prediction_sets, axis=1) + vs = ( + (y_proba_last_cumsumed - threshold.reshape(1, -1)) / + ( + y_pred_proba_last[:, 0, :] - + lambda_star * np.maximum(0, L - k_star) + + lambda_star * (L > k_star) + ) + ) + + # get random numbers for each observation and alpha value + random_state = check_random_state(self.random_state) + us = random_state.uniform(size=(prediction_sets.shape[0], 1)) + # remove last label from comparison between uniform number and V + vs_less_than_us = np.less_equal(vs - us, EPSILON) + np.put_along_axis( + prediction_sets, + y_pred_index_last, + vs_less_than_us[:, np.newaxis, :], + axis=1 + ) + return prediction_sets + + def _predict_oof_model( + self, + estimator: ClassifierMixin, + X: ArrayLike, + ) -> NDArray: + """ + Predict probabilities of a test set from a fitted estimator. + + Parameters + ---------- + estimator: ClassifierMixin + Fitted estimator. + + X: ArrayLike + Test set. + + Returns + ------- + ArrayLike + Predicted probabilities. + """ + print() + print("use of _predict_oof_model") + y_pred_proba = estimator.predict_proba(X) + # we enforce y_pred_proba to contain all labels included in y + if len(estimator.classes_) != self.n_classes_: + y_pred_proba = fix_number_of_classes( + self.n_classes_, + estimator.classes_, + y_pred_proba + ) + y_pred_proba = self._check_proba_normalized(y_pred_proba) + return y_pred_proba + + def _get_true_label_cumsum_proba( + self, + y: ArrayLike, + y_pred_proba: NDArray + ) -> Tuple[NDArray, NDArray]: + """ + Compute the cumsumed probability of the true label. + + Parameters + ---------- + y: NDArray of shape (n_samples, ) + Array with the labels. + y_pred_proba: NDArray of shape (n_samples, n_classes) + Predictions of the model. + + Returns + ------- + Tuple[NDArray, NDArray] of shapes + (n_samples, 1) and (n_samples, ). The first element + is the cumsum probability of the true label. The second + is the sorted position of the true label. + """ + print() + print("use of _get_true_label_cumsum_proba") + y_true = label_binarize( + y=y, classes=self.classes_ + ) + index_sorted = np.fliplr(np.argsort(y_pred_proba, axis=1)) + y_pred_proba_sorted = np.take_along_axis( + y_pred_proba, index_sorted, axis=1 + ) + y_true_sorted = np.take_along_axis(y_true, index_sorted, axis=1) + y_pred_proba_sorted_cumsum = np.cumsum(y_pred_proba_sorted, axis=1) + cutoff = np.argmax(y_true_sorted, axis=1) + true_label_cumsum_proba = np.take_along_axis( + y_pred_proba_sorted_cumsum, cutoff.reshape(-1, 1), axis=1 + ) + return true_label_cumsum_proba, cutoff + 1 + + def _regularize_conformity_score( + self, + k_star: NDArray, + lambda_: Union[NDArray, float], + conf_score: NDArray, + cutoff: NDArray + ) -> NDArray: + """ + Regularize the conformity scores with the ``"raps"`` + method. See algo. 2 in [3]. + + Parameters + ---------- + k_star: NDArray of shape (n_alphas, ) + Optimal value of k (called k_reg in the paper). There + is one value per alpha. + + lambda_: Union[NDArray, float] of shape (n_alphas, ) + One value of lambda for each alpha. + + conf_score: NDArray of shape (n_samples, 1) + Conformity scores. + + cutoff: NDArray of shape (n_samples, 1) + Position of the true label. + + Returns + ------- + NDArray of shape (n_samples, 1, n_alphas) + Regularized conformity scores. The regularization + depends on the value of alpha. + """ + print() + print("use of _regularize_conformity_score") + conf_score = np.repeat( + conf_score[:, :, np.newaxis], len(k_star), axis=2 + ) + cutoff = np.repeat( + cutoff[:, np.newaxis], len(k_star), axis=1 + ) + conf_score += np.maximum( + np.expand_dims( + lambda_ * (cutoff - k_star), + axis=1 + ), + 0 + ) + return conf_score + + def _get_true_label_position( + self, + y_pred_proba: NDArray, + y: NDArray + ) -> NDArray: + """ + Return the sorted position of the true label in the + prediction + + Parameters + ---------- + y_pred_proba: NDArray of shape (n_samples, n_calsses) + Model prediction. + + y: NDArray of shape (n_samples) + Labels. + + Returns + ------- + NDArray of shape (n_samples, 1) + Position of the true label in the prediction. + """ + print() + print("use of _get_true_label_position") + index = np.argsort( + np.fliplr(np.argsort(y_pred_proba, axis=1)) + ) + position = np.take_along_axis( + index, + y.reshape(-1, 1), + axis=1 + ) + return position + + def _get_last_included_proba( + self, + y_pred_proba: NDArray, + thresholds: NDArray, + include_last_label: Union[bool, str, None], + lambda_: Union[NDArray, float, None], + k_star: Union[NDArray, Any] + ) -> Tuple[NDArray, NDArray, NDArray]: + """ + Function that returns the smallest score + among those which are included in the prediciton set. + + Parameters + ---------- + y_pred_proba: NDArray of shape (n_samples, n_classes) + Predictions of the model. + + thresholds: NDArray of shape (n_alphas, ) + Quantiles that have been computed from the conformity + scores. + + include_last_label: Union[bool, str, None] + Whether to include or not the label whose score + exceeds the threshold. + + lambda_: Union[NDArray, float, None] of shape (n_alphas) + Values of lambda for the regularization. + + k_star: Union[NDArray, Any] + Values of k for the regularization. + + Returns + ------- + Tuple[ArrayLike, ArrayLike, ArrayLike] + Arrays of shape (n_samples, n_classes, n_alphas), + (n_samples, 1, n_alphas) and (n_samples, 1, n_alphas). + They are respectively the cumsumed scores in the original + order which can be different according to the value of alpha + with the RAPS method, the index of the last included score + and the value of the last included score. + """ + print() + print("use of _get_last_included_proba") + index_sorted = np.flip( + np.argsort(y_pred_proba, axis=1), axis=1 + ) + # sort probabilities by decreasing order + y_pred_proba_sorted = np.take_along_axis( + y_pred_proba, index_sorted, axis=1 + ) + # get sorted cumulated score + y_pred_proba_sorted_cumsum = np.cumsum( + y_pred_proba_sorted, axis=1 + ) + + if self.method == "raps": + y_pred_proba_sorted_cumsum += lambda_ * np.maximum( + 0, + np.cumsum( + np.ones(y_pred_proba_sorted_cumsum.shape), + axis=1 + ) - k_star + ) + # get cumulated score at their original position + y_pred_proba_cumsum = np.take_along_axis( + y_pred_proba_sorted_cumsum, + np.argsort(index_sorted, axis=1), + axis=1 + ) + # get index of the last included label + y_pred_index_last = self._get_last_index_included( + y_pred_proba_cumsum, + thresholds, + include_last_label + ) + # get the probability of the last included label + y_pred_proba_last = np.take_along_axis( + y_pred_proba, + y_pred_index_last, + axis=1 + ) + + zeros_scores_proba_last = (y_pred_proba_last <= EPSILON) + + # If the last included proba is zero, change it to the + # smallest non-zero value to avoid inluding them in the + # prediction sets. + if np.sum(zeros_scores_proba_last) > 0: + y_pred_proba_last[zeros_scores_proba_last] = np.expand_dims( + np.min( + np.ma.masked_less( + y_pred_proba, + EPSILON + ).filled(fill_value=np.inf), + axis=1 + ), axis=1 + )[zeros_scores_proba_last] + return y_pred_proba_cumsum, y_pred_index_last, y_pred_proba_last + + def _update_size_and_lambda( + self, + best_sizes: NDArray, + alpha_np: NDArray, + y_ps: NDArray, + lambda_: Union[NDArray, float], + lambda_star: NDArray + ) -> Tuple[NDArray, NDArray]: + """Update the values of the optimal lambda if the + average size of the prediction sets decreases with + this new value of lambda. + + Parameters + ---------- + best_sizes: NDArray of shape (n_alphas, ) + Smallest average prediciton set size before testing + for the new value of lambda_ + + alpha_np: NDArray of shape (n_alphas) + Level of confidences. + + y_ps: NDArray of shape (n_samples, n_classes, n_alphas) + Prediction sets computed with the RAPS method and the + new value of lambda_ + + lambda_: NDArray of shape (n_alphas, ) + New value of lambda_star to test + + lambda_star: NDArray of shape (n_alphas, ) + Actual optimal lambda values for each alpha. + + Returns + ------- + Tuple[NDArray, NDArray] + Arrays of shape (n_alphas, ) and (n_alpha, ) which + respectively represent the updated values of lambda_star + and the new best sizes. + """ + print() + print('use of _update_size_and_lambda') + sizes = [ + classification_mean_width_score( + y_ps[:, :, i] + ) for i in range(len(alpha_np)) + ] + + sizes_improve = (sizes < best_sizes - EPSILON) + lambda_star = ( + sizes_improve * lambda_ + (1 - sizes_improve) * lambda_star + ) + best_sizes = sizes_improve * sizes + (1 - sizes_improve) * best_sizes + return lambda_star, best_sizes + + def _find_lambda_star( + self, + y_pred_proba_raps: NDArray, + alpha_np: NDArray, + include_last_label: Union[bool, str, None], + k_star: NDArray + ) -> Union[NDArray, float]: + """Find the optimal value of lambda for each alpha. + + Parameters + ---------- + y_pred_proba_raps: NDArray of shape (n_samples, n_labels, n_alphas) + Predictions of the model repeated on the last axis as many times + as the number of alphas + + alpha_np: NDArray of shape (n_alphas, ) + Levels of confidences. + + include_last_label: bool + Whether to include or not last label in + the prediction sets + + k_star: NDArray of shape (n_alphas, ) + Values of k for the regularization. + + Returns + ------- + ArrayLike of shape (n_alphas, ) + Optimal values of lambda. + """ + print() + print("use of _find_lambda_star") + lambda_star = np.zeros(len(alpha_np)) + best_sizes = np.full(len(alpha_np), np.finfo(np.float64).max) + + for lambda_ in [.001, .01, .1, .2, .5]: # values given in paper[3] + true_label_cumsum_proba, cutoff = ( + self._get_true_label_cumsum_proba( + self.y_raps_no_enc, + y_pred_proba_raps[:, :, 0], + ) + ) + + true_label_cumsum_proba_reg = self._regularize_conformity_score( + k_star, + lambda_, + true_label_cumsum_proba, + cutoff + ) + + quantiles_ = compute_quantiles( + true_label_cumsum_proba_reg, + alpha_np + ) + + _, _, y_pred_proba_last = self._get_last_included_proba( + y_pred_proba_raps, + quantiles_, + include_last_label, + lambda_, + k_star + ) + + y_ps = np.greater_equal( + y_pred_proba_raps - y_pred_proba_last, -EPSILON + ) + lambda_star, best_sizes = self._update_size_and_lambda( + best_sizes, alpha_np, y_ps, lambda_, lambda_star + ) + if len(lambda_star) == 1: + lambda_star = lambda_star[0] + return lambda_star + + def _get_classes_info( + self, estimator: ClassifierMixin, y: NDArray + ) -> Tuple[int, NDArray]: + """ + Compute the number of classes and the classes values + according to either the pre-trained model or to the + values in y. + + Parameters + ---------- + estimator: ClassifierMixin + Estimator pre-fitted or not. + + y: NDArray + Values to predict. + + Returns + ------- + Tuple[int, NDArray] + The number of unique classes and their unique + values. + + Raises + ------ + ValueError + If `cv="prefit"` and that classes in `y` are not included into + `estimator.classes_`. + + Warning + If number of calibration labels is lower than number of labels + for training (in prefit setting) + """ + print() + print("use of _get_classes_info") + n_unique_y_labels = len(np.unique(y)) + if self.cv == "prefit": + classes = estimator.classes_ + n_classes = len(np.unique(classes)) + if not set(np.unique(y)).issubset(classes): + raise ValueError( + "Values in y do not matched values in estimator.classes_." + + " Check that you are not adding any new label" + ) + if n_classes > n_unique_y_labels: + warnings.warn( + "WARNING: your calibration dataset has less labels" + + " than your training dataset (training" + + f" has {n_classes} unique labels while" + + f" calibration have {n_unique_y_labels} unique labels" + ) + + else: + n_classes = n_unique_y_labels + classes = np.unique(y) + + return n_classes, classes + + def _check_fit_parameter(self, X, y, sample_weight, groups): + print() + print("use of _check_fit_parameters") + self._check_parameters() + cv = check_cv( + self.cv, test_size=self.test_size, random_state=self.random_state + ) + X, y = indexable(X, y) + y = _check_y(y) + + sample_weight = cast(Optional[NDArray], sample_weight) + groups = cast(Optional[NDArray], groups) + sample_weight, X, y = check_null_weight(sample_weight, X, y) + + y = cast(NDArray, y) + + estimator = check_estimator_classification( + X, + y, + cv, + self.estimator + ) + self.n_features_in_ = check_n_features_in(X, cv, estimator) + + n_samples = _num_samples(y) + + self.n_classes_, self.classes_ = self._get_classes_info( + estimator, y + ) + enc = LabelEncoder() + enc.fit(self.classes_) + y_enc = enc.transform(y) + + self.label_encoder_ = enc + self._check_target(y) + + return ( + estimator, cv, X, y, y_enc, + sample_weight, groups, + n_samples, + ) + + def _split_raps_data(self, X, y_enc, sample_weight, groups, size_raps): + print() + print("use of _split_raps_data") + raps_split = ShuffleSplit( + 1, test_size=size_raps, random_state=self.random_state + ) + train_raps_index, val_raps_index = next(raps_split.split(X)) + X, self.X_raps, y_enc, self.y_raps = \ + _safe_indexing(X, train_raps_index), \ + _safe_indexing(X, val_raps_index), \ + _safe_indexing(y_enc, train_raps_index), \ + _safe_indexing(y_enc, val_raps_index) + self.y_raps_no_enc = self.label_encoder_.inverse_transform( + self.y_raps + ) + y = self.label_encoder_.inverse_transform(y_enc) + y_enc = cast(NDArray, y_enc) + n_samples = _num_samples(y_enc) + if sample_weight is not None: + sample_weight = sample_weight[train_raps_index] + sample_weight = cast(NDArray, sample_weight) + if groups is not None: + groups = groups[train_raps_index] + groups = cast(NDArray, groups) + + return X, y_enc, y, n_samples, sample_weight, groups + + def fit( + self, + X: ArrayLike, + y: ArrayLike, + sample_weight: Optional[ArrayLike] = None, + size_raps: Optional[float] = .2, + groups: Optional[ArrayLike] = None, + **fit_params, + ) -> MapieClassifier: + """ + Fit the base estimator or use the fitted base estimator. + + Parameters + ---------- + X: ArrayLike of shape (n_samples, n_features) + Training data. + + y: NDArray of shape (n_samples,) + Training labels. + + sample_weight: Optional[ArrayLike] of shape (n_samples,) + Sample weights for fitting the out-of-fold models. + If None, then samples are equally weighted. + If some weights are null, + their corresponding observations are removed + before the fitting process and hence have no prediction sets. + + By default ``None``. + + size_raps: Optional[float] + Percentage of the data to be used for choosing lambda_star and + k_star for the RAPS method. + + By default ``.2``. + + groups: Optional[ArrayLike] of shape (n_samples,) + Group labels for the samples used while splitting the dataset into + train/test set. + + By default ``None``. + + **fit_params : dict + Additional fit parameters. + + Returns + ------- + MapieClassifier + The model itself. + """ + print() + print("USE OF FIT") + # Checks + ( + estimator, cv, X, y, y_enc, + sample_weight, groups, + n_samples + ) = self._check_fit_parameter( + X, y, sample_weight, groups + ) + print() + print(( + "estimator:",estimator, "cv:",cv, "X_train:", X, "y_train:", y, "y_enc:" ,y_enc, + "sample_weight:",sample_weight, "groups:",groups, + "n_samples",n_samples)) + + + self.k_ = np.empty_like(y, dtype=int) + print() + print("self.k_", self.k_, "shape_of k_ :", self.k_.shape,"type :" , type(self.k_), "unique_values :", list(set(self.k_))) + self.n_samples_ = _num_samples(X) + print() + print("self.n_samples",self.n_samples_) + + if self.method == "raps": + ( + X, y_enc, y, n_samples, + sample_weight, groups + ) = self._split_raps_data( + X, y_enc, sample_weight, + groups, size_raps + ) + + # Work + self.estimator_ = EnsembleClassifier( + estimator, + self.n_classes_, + cv, + self.n_jobs, + self.random_state, + self.test_size, + self.verbose + ) + + self.estimator_.fit(X, y, y_enc, sample_weight, groups, **fit_params) + + y_pred_proba, y, y_enc = self.estimator_.predict_proba_calib( + X, y, y_enc, groups + ) + + # RAPS: compute y_pred and position on the RAPS validation dataset + if self.method == "raps": + self.y_pred_proba_raps = self.estimator_.single_estimator_.predict_proba( + self.X_raps + ) + self.position_raps = self._get_true_label_position( + self.y_pred_proba_raps, + self.y_raps + ) + + # Conformity scores + if self.method == "naive": + self.conformity_scores_ = np.empty( + y_pred_proba.shape, + dtype="float" + ) + elif self.method in ["score", "lac"]: + self.conformity_scores_ = np.take_along_axis( + 1 - y_pred_proba, y_enc.reshape(-1, 1), axis=1 + ) + elif self.method in ["cumulated_score", "aps", "raps"]: + self.conformity_scores_, self.cutoff = ( + self._get_true_label_cumsum_proba( + y, + y_pred_proba + ) + ) + y_proba_true = np.take_along_axis( + y_pred_proba, y_enc.reshape(-1, 1), axis=1 + ) + random_state = check_random_state(self.random_state) + u = random_state.uniform(size=len(y_pred_proba)).reshape(-1, 1) + self.conformity_scores_ -= u * y_proba_true + elif self.method == "top_k": + # Here we reorder the labels by decreasing probability + # and get the position of each label from decreasing + # probability + self.conformity_scores_ = self._get_true_label_position( + y_pred_proba, + y_enc + ) + else: + raise ValueError( + "Invalid method. " + f"Allowed values are {self.valid_methods_}." + ) + + return self + + def predict( + self, + X: ArrayLike, + alpha: Optional[Union[float, Iterable[float]]] = None, + include_last_label: Optional[Union[bool, str]] = True, + agg_scores: Optional[str] = "mean" + ) -> Union[NDArray, Tuple[NDArray, NDArray]]: + """ + Prediction prediction sets on new samples based on target confidence + interval. + Prediction sets for a given ``alpha`` are deduced from: + + - quantiles of softmax scores (``"lac"`` method) + - quantiles of cumulated scores (``"aps"`` method) + + Parameters + ---------- + X: ArrayLike of shape (n_samples, n_features) + Test data. + + alpha: Optional[Union[float, Iterable[float]]] + Can be a float, a list of floats, or a ``ArrayLike`` of floats. + Between 0 and 1, represent the uncertainty of the confidence + interval. + Lower ``alpha`` produce larger (more conservative) prediction + sets. + ``alpha`` is the complement of the target coverage level. + + By default ``None``. + + include_last_label: Optional[Union[bool, str]] + Whether or not to include last label in + prediction sets for the "aps" method. Choose among: + + - False, does not include label whose cumulated score is just over + the quantile. + - True, includes label whose cumulated score is just over the + quantile, unless there is only one label in the prediction set. + - "randomized", randomly includes label whose cumulated score is + just over the quantile based on the comparison of a uniform + number and the difference between the cumulated score of + the last label and the quantile. + + When set to ``True`` or ``False``, it may result in a coverage + higher than ``1 - alpha`` (because contrary to the "randomized" + setting, none of this methods create empty prediction sets). See + [2] and [3] for more details. + + By default ``True``. + + agg_scores: Optional[str] + + How to aggregate the scores output by the estimators on test data + if a cross-validation strategy is used. Choose among: + + - "mean", take the mean of scores. + - "crossval", compare the scores between all training data and each + test point for each label to estimate if the label must be + included in the prediction set. Follows algorithm 2 of + Romano+2020. + + By default "mean". + + Returns + ------- + Union[NDArray, Tuple[NDArray, NDArray]] + + - NDArray of shape (n_samples,) if alpha is None. + + - Tuple[NDArray, NDArray] of shapes + (n_samples,) and (n_samples, n_classes, n_alpha) if alpha is not None. + """ + print() + print("use of predict") + if self.method == "top_k": + agg_scores = "mean" + # Checks + cv = check_cv( + self.cv, test_size=self.test_size, random_state=self.random_state + ) + include_last_label = self._check_include_last_label(include_last_label) + alpha = cast(Optional[NDArray], check_alpha(alpha)) + check_is_fitted(self, self.fit_attributes) + lambda_star, k_star = None, None + # Estimate prediction sets + y_pred = self.estimator_.single_estimator_.predict(X) + + if alpha is None: + return y_pred + + n = len(self.conformity_scores_) + + # Estimate of probabilities from estimator(s) + # In all cases: len(y_pred_proba.shape) == 3 + # with (n_test, n_classes, n_alpha or n_train_samples) + alpha_np = cast(NDArray, alpha) + check_alpha_and_n_samples(alpha_np, n) + y_pred_proba = self.estimator_.predict( + X, agg_scores + ) + y_pred_proba = self._check_proba_normalized(y_pred_proba, axis=1) + if (cv == "prefit") or (agg_scores in ["mean"]): + y_pred_proba = np.repeat( + y_pred_proba[:, :, np.newaxis], len(alpha_np), axis=2 + ) + + # Choice of the quantile + check_alpha_and_n_samples(alpha_np, n) + + if self.method == "naive": + self.quantiles_ = 1 - alpha_np + else: + if (cv == "prefit") or (agg_scores in ["mean"]): + if self.method == "raps": + check_alpha_and_n_samples(alpha_np, len(self.X_raps)) + k_star = compute_quantiles( + self.position_raps, + alpha_np + ) + 1 + y_pred_proba_raps = np.repeat( + self.y_pred_proba_raps[:, :, np.newaxis], + len(alpha_np), + axis=2 + ) + lambda_star = self._find_lambda_star( + y_pred_proba_raps, + alpha_np, + include_last_label, + k_star + ) + self.conformity_scores_regularized = ( + self._regularize_conformity_score( + k_star, + lambda_star, + self.conformity_scores_, + self.cutoff + ) + ) + self.quantiles_ = compute_quantiles( + self.conformity_scores_regularized, + alpha_np + ) + else: + self.quantiles_ = compute_quantiles( + self.conformity_scores_, + alpha_np + ) + else: + self.quantiles_ = (n + 1) * (1 - alpha_np) + + # Build prediction sets + if self.method in ["score", "lac"]: + if (cv == "prefit") or (agg_scores == "mean"): + prediction_sets = np.greater_equal( + y_pred_proba - (1 - self.quantiles_), -EPSILON + ) + else: + y_pred_included = np.less_equal( + (1 - y_pred_proba) - self.conformity_scores_.ravel(), + EPSILON + ).sum(axis=2) + prediction_sets = np.stack( + [ + np.greater_equal( + y_pred_included - _alpha * (n - 1), -EPSILON + ) + for _alpha in alpha_np + ], axis=2 + ) + + elif self.method in ["naive", "cumulated_score", "aps", "raps"]: + # specify which thresholds will be used + if (cv == "prefit") or (agg_scores in ["mean"]): + thresholds = self.quantiles_ + else: + thresholds = self.conformity_scores_.ravel() + # sort labels by decreasing probability + y_pred_proba_cumsum, y_pred_index_last, y_pred_proba_last = ( + self._get_last_included_proba( + y_pred_proba, + thresholds, + include_last_label, + lambda_star, + k_star, + ) + ) + # get the prediction set by taking all probabilities + # above the last one + if (cv == "prefit") or (agg_scores in ["mean"]): + y_pred_included = np.greater_equal( + y_pred_proba - y_pred_proba_last, -EPSILON + ) + else: + y_pred_included = np.less_equal( + y_pred_proba - y_pred_proba_last, EPSILON + ) + # remove last label randomly + if include_last_label == "randomized": + y_pred_included = self._add_random_tie_breaking( + y_pred_included, + y_pred_index_last, + y_pred_proba_cumsum, + y_pred_proba_last, + thresholds, + lambda_star, + k_star + ) + if (cv == "prefit") or (agg_scores in ["mean"]): + prediction_sets = y_pred_included + else: + # compute the number of times the inequality is verified + prediction_sets_summed = y_pred_included.sum(axis=2) + prediction_sets = np.less_equal( + prediction_sets_summed[:, :, np.newaxis] + - self.quantiles_[np.newaxis, np.newaxis, :], + EPSILON + ) + elif self.method == "top_k": + y_pred_proba = y_pred_proba[:, :, 0] + index_sorted = np.fliplr(np.argsort(y_pred_proba, axis=1)) + y_pred_index_last = np.stack( + [ + index_sorted[:, quantile] + for quantile in self.quantiles_ + ], axis=1 + ) + y_pred_proba_last = np.stack( + [ + np.take_along_axis( + y_pred_proba, + y_pred_index_last[:, iq].reshape(-1, 1), + axis=1 + ) + for iq, _ in enumerate(self.quantiles_) + ], axis=2 + ) + prediction_sets = np.greater_equal( + y_pred_proba[:, :, np.newaxis] + - y_pred_proba_last, + -EPSILON + ) + else: + raise ValueError( + "Invalid method. " + f"Allowed values are {self.valid_methods_}." + ) + return y_pred, prediction_sets diff --git a/mapie/estimator/estimator_draft.py b/mapie/estimator/estimator_draft.py new file mode 100644 index 000000000..15fa6694f --- /dev/null +++ b/mapie/estimator/estimator_draft.py @@ -0,0 +1,587 @@ +from __future__ import annotations + +from typing import List, Optional, Tuple, Union, cast + +import numpy as np +from joblib import Parallel, delayed +from sklearn.base import ClassifierMixin, RegressorMixin, clone +from sklearn.model_selection import BaseCrossValidator, ShuffleSplit +from sklearn.utils import _safe_indexing +from sklearn.utils.validation import _num_samples, check_is_fitted + +from mapie._typing import ArrayLike, NDArray +from mapie.aggregation_functions import aggregate_all, phi2D +from mapie.estimator.interface import EnsembleEstimator +from mapie.utils import (check_nan_in_aposteriori_prediction, check_no_agg_cv, + fit_estimator, fix_number_of_classes) + + +class EnsembleClassifier(EnsembleEstimator): + """ + This class implements methods to handle the training and usage of the + estimator. This estimator can be unique or composed by cross validated + estimators. + + Parameters + ---------- + estimator: Optional[RegressorMixin] + Any regressor with scikit-learn API + (i.e. with ``fit`` and ``predict`` methods). + If ``None``, estimator defaults to a ``LinearRegression`` instance. + + By default ``None``. + + cv: Optional[str] + The cross-validation strategy for computing scores. + It directly drives the distinction between jackknife and cv variants. + Choose among: + + - ``None``, to use the default 5-fold cross-validation + - integer, to specify the number of folds. + If equal to -1, equivalent to + ``sklearn.model_selection.LeaveOneOut()``. + - CV splitter: any ``sklearn.model_selection.BaseCrossValidator`` + Main variants are: + - ``sklearn.model_selection.LeaveOneOut`` (jackknife), + - ``sklearn.model_selection.KFold`` (cross-validation) + - ``"split"``, does not involve cross-validation but a division + of the data into training and calibration subsets. The splitter + used is the following: ``sklearn.model_selection.ShuffleSplit``. + - ``"prefit"``, assumes that ``estimator`` has been fitted already. + All data provided in the ``fit`` method is then used + to calibrate the predictions through the score computation. + At prediction time, quantiles of these scores are used to estimate + prediction sets. + + By default ``None``. + + test_size: Optional[Union[int, float]] + If ``float``, should be between ``0.0`` and ``1.0`` and represent the + proportion of the dataset to include in the test split. If ``int``, + represents the absolute number of test samples. If ``None``, + it will be set to ``0.1``. + + If cv is not ``"split"``, ``test_size`` is ignored. + + By default ``None``. + + n_jobs: Optional[int] + Number of jobs for parallel processing using joblib + via the "locky" backend. + If ``-1`` all CPUs are used. + If ``1`` is given, no parallel computing code is used at all, + which is useful for debugging. + For ``n_jobs`` below ``-1``, ``(n_cpus + 1 - n_jobs)`` are used. + ``None`` is a marker for `unset` that will be interpreted as + ``n_jobs=1`` (sequential execution). + + By default ``None``. + + random_state: Optional[Union[int, RandomState]] + Pseudo random number generator state used for random uniform sampling + for evaluation quantiles and prediction sets. + Pass an int for reproducible output across multiple function calls. + + By default ``None``. + + verbose: int, optional + The verbosity level, used with joblib for multiprocessing. + At this moment, parallel processing is disabled. + The frequency of the messages increases with the verbosity level. + If it more than ``10``, all iterations are reported. + Above ``50``, the output is sent to stdout. + + By default ``0``. + + Attributes + ---------- + single_estimator_: sklearn.RegressorMixin + Estimator fitted on the whole training set. + + estimators_: list + List of out-of-folds estimators. + + k_: ArrayLike + - Array of nans, of shape (len(y), 1) if ``cv`` is ``"prefit"`` + (defined but not used) + - Dummy array of folds containing each training sample, otherwise. + Of shape (n_samples_train, cv.get_n_splits(X_train, y_train)). + """ + no_agg_cv_ = ["prefit", "split"] + fit_attributes = [ + "single_estimator_", + "estimators_", + "k_", + "use_split_method_", + ] + + def __init__( + self, + estimator: Optional[ClassifierMixin], + n_classes: int, + cv: Optional[Union[int, str, BaseCrossValidator]], + n_jobs: Optional[int], + random_state: Optional[Union[int, np.random.RandomState]], + test_size: Optional[Union[int, float]], + verbose: int + ): + print() + print("EC : USE OF INIT") + self.estimator = estimator + print() + print("estimator", estimator) + self.n_classes = n_classes + print() + print("n_classes", n_classes) + self.cv = cv + print() + print("cv", cv) + self.n_jobs = n_jobs + print() + print("n_jobs", n_jobs) + self.random_state = random_state + print() + print("random_state", random_state) + self.test_size = test_size + print() + print("test_size", test_size) + self.verbose = verbose + print() + print("verbose", verbose) + + @staticmethod + def _fit_oof_estimator( + estimator: ClassifierMixin, + X: ArrayLike, + y: ArrayLike, + train_index: ArrayLike, + sample_weight: Optional[ArrayLike] = None, + **fit_params, + ) -> ClassifierMixin: + """ + Fit a single out-of-fold model on a given training set. + + Parameters + ---------- + estimator: RegressorMixin + Estimator to train. + + X: ArrayLike of shape (n_samples, n_features) + Input data. + + y: ArrayLike of shape (n_samples,) + Input labels. + + train_index: ArrayLike of shape (n_samples_train) + Training data indices. + + sample_weight: Optional[ArrayLike] of shape (n_samples,) + Sample weights. If None, then samples are equally weighted. + By default ``None``. + + **fit_params : dict + Additional fit parameters. + + Returns + ------- + RegressorMixin + Fitted estimator. + """ + print() + print("EC : use of _fit_oof_estimator :") + X_train = _safe_indexing(X, train_index) + print() + print("X_train", X_train, "shape_X_train", X_train.shape) + y_train = _safe_indexing(y, train_index) + print() + print("y_train", y_train,"shape_y_train", y_train.shape) + if not (sample_weight is None): + sample_weight = _safe_indexing(sample_weight, train_index) + sample_weight = cast(NDArray, sample_weight) + print() + print("sample_weight", sample_weight) + + estimator = fit_estimator( + estimator, + X_train, + y_train, + sample_weight=sample_weight, + **fit_params + ) + print() + print("estimator:", estimator) + return estimator + + def _predict_proba_oof_estimator(self, estimator, X): + print() + print("EC : use of _predict_proba_oof_estimator") + y_pred_proba = estimator.predict_proba(X) + if len(estimator.classes_) != self.n_classes: + y_pred_proba = fix_number_of_classes( + self.n_classes, + estimator.classes_, + y_pred_proba + ) + return y_pred_proba + + def _predict_proba_calib_oof_estimator( + self, + estimator: ClassifierMixin, + X: ArrayLike, + val_index: ArrayLike, + k: int + ) -> Tuple[NDArray, ArrayLike]: + """ + Perform predictions on a single out-of-fold model on a validation set. + + Parameters + ---------- + estimator: RegressorMixin + Estimator to train. + + X: ArrayLike of shape (n_samples, n_features) + Input data. + + val_index: ArrayLike of shape (n_samples_val) + Validation data indices. + + Returns + ------- + Tuple[NDArray, ArrayLike] + Predictions of estimator from val_index of X. + """ + print() + print("EC : use of _predict_proba_calib_oof_estimator") + X_val = _safe_indexing(X, val_index) + if _num_samples(X_val) > 0: + y_pred_proba = self._predict_proba_oof_estimator( + estimator, X_val + ) + else: + y_pred_proba = np.array([]) + val_id = np.full(len(X_val), k, dtype=int) + return y_pred_proba, val_id, val_index + + def _aggregate_with_mask( + self, + x: NDArray, + k: NDArray + ) -> NDArray: + """ + Take the array of predictions, made by the refitted estimators, + on the testing set, and the 1-or-nan array indicating for each training + sample which one to integrate, and aggregate to produce phi-{t}(x_t) + for each training sample x_t. + + Parameters + ---------- + x: ArrayLike of shape (n_samples_test, n_estimators) + Array of predictions, made by the refitted estimators, + for each sample of the testing set. + + k: ArrayLike of shape (n_samples_training, n_estimators) + 1-or-nan array: indicates whether to integrate the prediction + of a given estimator into the aggregation, for each training + sample. + + Returns + ------- + ArrayLike of shape (n_samples_test,) + Array of aggregated predictions for each testing sample. + """ + print() + print("EC : use of _aggregate_with_mask") + if self.method in self.no_agg_methods_ or self.use_split_method_: + raise ValueError( + "There should not be aggregation of predictions " + f"if cv is in '{self.no_agg_cv_}', if cv >=2 " + f"or if method is in '{self.no_agg_methods_}'." + ) + elif self.agg_function == "median": + return phi2D(A=x, B=k, fun=lambda x: np.nanmedian(x, axis=1)) + # To aggregate with mean() the aggregation coud be done + # with phi2D(A=x, B=k, fun=lambda x: np.nanmean(x, axis=1). + # However, phi2D contains a np.apply_along_axis loop which + # is much slower than the matrices multiplication that can + # be used to compute the means. + elif self.agg_function in ["mean", None]: + K = np.nan_to_num(k, nan=0.0) + return np.matmul(x, (K / (K.sum(axis=1, keepdims=True))).T) + else: + raise ValueError("The value of self.agg_function is not correct") + + def _pred_multi(self, X: ArrayLike) -> NDArray: + """ + Return a prediction per train sample for each test sample, by + aggregation with matrix ``k_``. + + Parameters + ---------- + X: ArrayLike of shape (n_samples_test, n_features) + Input data + + Returns + ------- + NDArray of shape (n_samples_test, n_samples_train) + """ + print() + print("EC : use of _pred_multi") + y_pred_multi = np.column_stack( + [e.predict(X) for e in self.estimators_] + ) + # At this point, y_pred_multi is of shape + # (n_samples_test, n_estimators_). The method + # ``_aggregate_with_mask`` fits it to the right size + # thanks to the shape of k_. + y_pred_multi = self._aggregate_with_mask(y_pred_multi, self.k_) + return y_pred_multi + + def predict_proba_calib( + self, + X: ArrayLike, + y: Optional[ArrayLike] = None, + y_enc=None, + groups: Optional[ArrayLike] = None + ) -> NDArray: + """ + Perform predictions on X : the calibration set. + + Parameters + ---------- + X: ArrayLike of shape (n_samples_test, n_features) + Input data + + y: Optional[ArrayLike] of shape (n_samples_test,) + Input labels. + + By default ``None``. + + groups: Optional[ArrayLike] of shape (n_samples_test,) + Group labels for the samples used while splitting the dataset into + train/test set. + + By default ``None``. + + Returns + ------- + NDArray of shape (n_samples_test, 1) + The predictions. + """ + print() + print("EC : use of predict_proba_calib") + check_is_fitted(self, self.fit_attributes) + + if self.cv == "prefit": + y_pred_proba = self.single_estimator_.predict_proba(X) + else: + y_pred_proba = np.empty( + (len(X), self.n_classes), + dtype=float + ) + cv = cast(BaseCrossValidator, self.cv) + outputs = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( + delayed(self._predict_proba_calib_oof_estimator)( + estimator, X, calib_index, k + ) + for k, ((_, calib_index), estimator) in enumerate(zip( + cv.split(X, y, groups), + self.estimators_ + )) + ) + ( + predictions_list, + val_ids_list, + val_indices_list + ) = map(list, zip(*outputs)) + + predictions = np.concatenate( + cast(List[NDArray], predictions_list) + ) + val_ids = np.concatenate(cast(List[NDArray], val_ids_list)) + val_indices = np.concatenate( + cast(List[NDArray], val_indices_list) + ) + self.k_[val_indices] = val_ids + y_pred_proba[val_indices] = predictions + + if isinstance(cv, ShuffleSplit): + # Should delete values indices that + # are not used during calibration + self.k_ = self.k_[val_indices] + y_pred_proba = y_pred_proba[val_indices] + y_enc = y_enc[val_indices] + y = cast(NDArray, y)[val_indices] + + return y_pred_proba, y, y_enc + + def fit( + self, + X: ArrayLike, + y: ArrayLike, + y_enc: ArrayLike, + sample_weight: Optional[ArrayLike] = None, + groups: Optional[ArrayLike] = None, + **fit_params, + ) -> EnsembleClassifier: + """ + Fit the base estimator under the ``single_estimator_`` attribute. + Fit all cross-validated estimator clones + and rearrange them into a list, the ``estimators_`` attribute. + Out-of-fold conformity scores are stored under + the ``conformity_scores_`` attribute. + + Parameters + ---------- + X: ArrayLike of shape (n_samples, n_features) + Input data. + + y: ArrayLike of shape (n_samples,) + Input labels. + + sample_weight: Optional[ArrayLike] of shape (n_samples,) + Sample weights. If None, then samples are equally weighted. + + By default ``None``. + + groups: Optional[ArrayLike] of shape (n_samples,) + Group labels for the samples used while splitting the dataset into + train/test set. + + By default ``None``. + + **fit_params : dict + Additional fit parameters. + + Returns + ------- + EnsembleRegressor + The estimator fitted. + """ + print() + print("EC : USE OF FIT") + # Initialization + single_estimator_: ClassifierMixin + estimators_: List[ClassifierMixin] = [] + full_indexes = np.arange(_num_samples(X)) + print() + print("full_indexes", full_indexes) + cv = self.cv + print() + print("cv", cv) + self.use_split_method_ = check_no_agg_cv(X, self.cv, self.no_agg_cv_) + print() + print("self.use_split_method_",self.use_split_method_) + estimator = self.estimator + print() + print("estimator: ", estimator) + n_samples = _num_samples(y) + print() + print("n_samples", n_samples) + + # Computation + if cv == "prefit": + single_estimator_ = estimator + self.k_ = np.full( + shape=(n_samples, 1), fill_value=np.nan, dtype=float + ) + else: + single_estimator_ = self._fit_oof_estimator( + clone(estimator), + X, + y, + full_indexes, + sample_weight, + **fit_params + ) + print() + print("single_estimator_ :",single_estimator_) + cv = cast(BaseCrossValidator, cv) + print() + print("cv: ", cv) + self.k_ = np.empty_like(y, dtype=int) + print("self.k_", self.k_, "shape_of k_ :", self.k_.shape, "unique_values :", np.unique(self.k_.shape)) + estimators_ = Parallel(self.n_jobs, verbose=self.verbose)( + delayed(self._fit_oof_estimator)( + clone(estimator), + X, + y_enc, + train_index, + sample_weight, + **fit_params + ) + for train_index, _ in cv.split(X, y, groups) + ) + # In split-CP, we keep only the model fitted on train dataset + if self.use_split_method_: + single_estimator_ = estimators_[0] + + self.single_estimator_ = single_estimator_ + print() + print("self.single_estimator_", self.single_estimator_) + self.estimators_ = estimators_ + print() + print("self.estimators_", self.estimators_) + + return self + + def predict( + self, + X: ArrayLike, + agg_scores + ) -> Union[NDArray, Tuple[NDArray, NDArray, NDArray]]: + """ + Predict target from X. It also computes the prediction per train sample + for each test sample according to ``self.method``. + + Parameters + ---------- + X: ArrayLike of shape (n_samples, n_features) + Test data. + + ensemble: bool + Boolean determining whether the predictions are ensembled or not. + If ``False``, predictions are those of the model trained on the + whole training set. + If ``True``, predictions from perturbed models are aggregated by + the aggregation function specified in the ``agg_function`` + attribute. + + If ``cv`` is ``"prefit"`` or ``"split"``, ``ensemble`` is ignored. + + By default ``False``. + + return_multi_pred: bool + If ``True`` the method returns the predictions and the multiple + predictions (3 arrays). If ``False`` the method return the + simple predictions only. + + Returns + ------- + Tuple[NDArray, NDArray, NDArray] + - Predictions + - The multiple predictions for the lower bound of the intervals. + - The multiple predictions for the upper bound of the intervals. + """ + print() + print("EC : use of predict") + check_is_fitted(self, self.fit_attributes) + + if self.cv == "prefit": + y_pred_proba = self.single_estimator_.predict_proba(X) + else: + y_pred_proba_k = np.asarray( + Parallel( + n_jobs=self.n_jobs, verbose=self.verbose + )( + delayed(self._predict_proba_oof_estimator)(estimator, X) + for estimator in self.estimators_ + ) + ) + if agg_scores == "crossval": + y_pred_proba = np.moveaxis(y_pred_proba_k[self.k_], 0, 2) + elif agg_scores == "mean": + y_pred_proba = np.mean(y_pred_proba_k, axis=0) + else: + raise ValueError("Invalid 'agg_scores' argument.") + # y_pred_proba = self._check_proba_normalized(y_pred_proba, axis=1) + return y_pred_proba From 04d44b8271b44f457c27318fa602bb72f9a9db5c Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Mon, 29 Apr 2024 18:06:16 +0200 Subject: [PATCH 8/9] add new print --- mapie/classification_draft.py | 30 ++++++++++++++++++++- mapie/estimator/estimator_draft.py | 42 +++++++++++++++++++++++++++--- 2 files changed, 68 insertions(+), 4 deletions(-) diff --git a/mapie/classification_draft.py b/mapie/classification_draft.py index 7c9c11597..333d7176f 100644 --- a/mapie/classification_draft.py +++ b/mapie/classification_draft.py @@ -1194,6 +1194,12 @@ def fit( self.conformity_scores_ = np.take_along_axis( 1 - y_pred_proba, y_enc.reshape(-1, 1), axis=1 ) + + print() + print("conformity_scores",self.conformity_scores_ ) + print() + print("y_enc", y_enc) + elif self.method in ["cumulated_score", "aps", "raps"]: self.conformity_scores_, self.cutoff = ( self._get_true_label_cumsum_proba( @@ -1296,30 +1302,52 @@ def predict( (n_samples,) and (n_samples, n_classes, n_alpha) if alpha is not None. """ print() - print("use of predict") + print("USE OF PREDICT") if self.method == "top_k": agg_scores = "mean" # Checks cv = check_cv( self.cv, test_size=self.test_size, random_state=self.random_state ) + print() + print("cv",cv) include_last_label = self._check_include_last_label(include_last_label) + print() + print("include_last_label",include_last_label) alpha = cast(Optional[NDArray], check_alpha(alpha)) + print() + print("alpha", alpha) check_is_fitted(self, self.fit_attributes) lambda_star, k_star = None, None # Estimate prediction sets + + print() + print(self.estimator_.single_estimator_) y_pred = self.estimator_.single_estimator_.predict(X) + print() + print("y_pred", y_pred) if alpha is None: return y_pred n = len(self.conformity_scores_) + + print() + print("n",n) + + print() + print("alpha",alpha) # Estimate of probabilities from estimator(s) # In all cases: len(y_pred_proba.shape) == 3 # with (n_test, n_classes, n_alpha or n_train_samples) alpha_np = cast(NDArray, alpha) + + print() + print("alpha_np",alpha_np) + check_alpha_and_n_samples(alpha_np, n) + y_pred_proba = self.estimator_.predict( X, agg_scores ) diff --git a/mapie/estimator/estimator_draft.py b/mapie/estimator/estimator_draft.py index 15fa6694f..967a86577 100644 --- a/mapie/estimator/estimator_draft.py +++ b/mapie/estimator/estimator_draft.py @@ -3,6 +3,7 @@ from typing import List, Optional, Tuple, Union, cast import numpy as np +import inspect from joblib import Parallel, delayed from sklearn.base import ClassifierMixin, RegressorMixin, clone from sklearn.model_selection import BaseCrossValidator, ShuffleSplit @@ -223,6 +224,7 @@ def _predict_proba_oof_estimator(self, estimator, X): y_pred_proba ) return y_pred_proba + def _predict_proba_calib_oof_estimator( self, @@ -253,13 +255,19 @@ def _predict_proba_calib_oof_estimator( print() print("EC : use of _predict_proba_calib_oof_estimator") X_val = _safe_indexing(X, val_index) + print() + print("X_val:", X_val, "shape", X_val.shape) if _num_samples(X_val) > 0: y_pred_proba = self._predict_proba_oof_estimator( estimator, X_val ) + print() + print("y_pred_proba", y_pred_proba, "shape:", y_pred_proba.shape) else: y_pred_proba = np.array([]) val_id = np.full(len(X_val), k, dtype=int) + print() + print("val_id :", val_id, "val_index: ", val_index) return y_pred_proba, val_id, val_index def _aggregate_with_mask( @@ -368,26 +376,35 @@ def predict_proba_calib( The predictions. """ print() - print("EC : use of predict_proba_calib") + print("EC : USE OF PREDICT_PROBA_CALIB") check_is_fitted(self, self.fit_attributes) if self.cv == "prefit": y_pred_proba = self.single_estimator_.predict_proba(X) + print() + print("dans le cas prefit: ", y_pred_proba) else: y_pred_proba = np.empty( (len(X), self.n_classes), dtype=float ) + print() + print("y_pred_proba", y_pred_proba,"y_pred_proba_shape", y_pred_proba.shape, "y_pred_proba_max :", np.max(y_pred_proba), "y_pred_proba_min :", np.min(y_pred_proba)) cv = cast(BaseCrossValidator, self.cv) + print() + print("cv", cv) outputs = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( - delayed(self._predict_proba_calib_oof_estimator)( + delayed( + self._predict_proba_calib_oof_estimator)( estimator, X, calib_index, k ) + for k, ((_, calib_index), estimator) in enumerate(zip( cv.split(X, y, groups), self.estimators_ )) ) + ( predictions_list, val_ids_list, @@ -397,20 +414,38 @@ def predict_proba_calib( predictions = np.concatenate( cast(List[NDArray], predictions_list) ) + print() + print("predictions",predictions, "shape", predictions.shape) val_ids = np.concatenate(cast(List[NDArray], val_ids_list)) + print() + print("val_ids", val_ids) val_indices = np.concatenate( cast(List[NDArray], val_indices_list) ) + print() + print("val_indices", val_indices) self.k_[val_indices] = val_ids + print() + print("self.k_[val_indices]", self.k_[val_indices]) y_pred_proba[val_indices] = predictions + print() + print("y_pred_proba[val_indices]: ", y_pred_proba[val_indices]) if isinstance(cv, ShuffleSplit): # Should delete values indices that # are not used during calibration + print() + print("on est dans le cas cv = shuffle split") self.k_ = self.k_[val_indices] + print() + print("self.k_ :", self.k_) y_pred_proba = y_pred_proba[val_indices] + print() + print("y_pred_proba", y_pred_proba) y_enc = y_enc[val_indices] + print("y_enc", y_enc) y = cast(NDArray, y)[val_indices] + print("y", y) return y_pred_proba, y, y_enc @@ -514,7 +549,6 @@ def fit( # In split-CP, we keep only the model fitted on train dataset if self.use_split_method_: single_estimator_ = estimators_[0] - self.single_estimator_ = single_estimator_ print() print("self.single_estimator_", self.single_estimator_) @@ -577,6 +611,7 @@ def predict( for estimator in self.estimators_ ) ) + if agg_scores == "crossval": y_pred_proba = np.moveaxis(y_pred_proba_k[self.k_], 0, 2) elif agg_scores == "mean": @@ -584,4 +619,5 @@ def predict( else: raise ValueError("Invalid 'agg_scores' argument.") # y_pred_proba = self._check_proba_normalized(y_pred_proba, axis=1) + return y_pred_proba From 3c3546e2c6a2b76c516e5e87483f97e75515a164 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Tue, 30 Apr 2024 17:33:09 +0200 Subject: [PATCH 9/9] add new print --- mapie/classification_draft.py | 11 +++++++++-- mapie/estimator/estimator_draft.py | 21 ++++++++++++++++++++- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/mapie/classification_draft.py b/mapie/classification_draft.py index 333d7176f..e783dfabc 100644 --- a/mapie/classification_draft.py +++ b/mapie/classification_draft.py @@ -1169,6 +1169,8 @@ def fit( ) self.estimator_.fit(X, y, y_enc, sample_weight, groups, **fit_params) + print() + print("self.estimator_", self.estimator_) y_pred_proba, y, y_enc = self.estimator_.predict_proba_calib( X, y, y_enc, groups @@ -1196,7 +1198,7 @@ def fit( ) print() - print("conformity_scores",self.conformity_scores_ ) + print("conformity_scores",self.conformity_scores_, "shape", self.conformity_scores_.shape) print() print("y_enc", y_enc) @@ -1356,7 +1358,6 @@ def predict( y_pred_proba = np.repeat( y_pred_proba[:, :, np.newaxis], len(alpha_np), axis=2 ) - # Choice of the quantile check_alpha_and_n_samples(alpha_np, n) @@ -1407,6 +1408,12 @@ def predict( prediction_sets = np.greater_equal( y_pred_proba - (1 - self.quantiles_), -EPSILON ) + + print() + print("self.quantiles_", self.quantiles_) + print() + print("prediction_sets", prediction_sets) + else: y_pred_included = np.less_equal( (1 - y_pred_proba) - self.conformity_scores_.ravel(), diff --git a/mapie/estimator/estimator_draft.py b/mapie/estimator/estimator_draft.py index 967a86577..b56917d9d 100644 --- a/mapie/estimator/estimator_draft.py +++ b/mapie/estimator/estimator_draft.py @@ -344,6 +344,17 @@ def _pred_multi(self, X: ArrayLike) -> NDArray: y_pred_multi = self._aggregate_with_mask(y_pred_multi, self.k_) return y_pred_multi + + def process_estimator(self, k, calib_index, estimator, X): + print() + print("USE OF PROCESS ESTIMATOR") + print() + print("k:", k, "calib_index:", calib_index, "estimator:",estimator) # Affichage des valeurs + return self._predict_proba_calib_oof_estimator(estimator, X, calib_index, k) + + + + def predict_proba_calib( self, X: ArrayLike, @@ -393,6 +404,12 @@ def predict_proba_calib( cv = cast(BaseCrossValidator, self.cv) print() print("cv", cv) + + # outputs = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( + # delayed(self.process_estimator)(k, calib_index, estimator, X) + # for k, ((_, calib_index), estimator) in enumerate(zip(cv.split(X, y, groups), self.estimators_)) + # ) + outputs = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed( self._predict_proba_calib_oof_estimator)( @@ -548,13 +565,15 @@ def fit( ) # In split-CP, we keep only the model fitted on train dataset if self.use_split_method_: + print() + print("estimators_", estimators_) single_estimator_ = estimators_[0] self.single_estimator_ = single_estimator_ print() print("self.single_estimator_", self.single_estimator_) self.estimators_ = estimators_ print() - print("self.estimators_", self.estimators_) + print("estimators_", estimators_) return self