Skip to content

ENH _fit_and_score now returns a dictionary #17332

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 13 additions & 27 deletions sklearn/model_selection/_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,13 +368,12 @@ def fit_grid_point(X, y, estimator, parameters, train, test, scorer,
# NOTE we are not using the return value as the scorer by itself should be
# validated before. We use check_scoring only to reject multimetric scorer
check_scoring(estimator, scorer)
scores, n_samples_test = _fit_and_score(estimator, X, y,
scorer, train,
test, verbose, parameters,
fit_params=fit_params,
return_n_test_samples=True,
error_score=error_score)
return scores, parameters, n_samples_test
results = _fit_and_score(estimator, X, y, scorer, train,
test, verbose, parameters,
fit_params=fit_params,
return_n_test_samples=True,
error_score=error_score)
return results["test_scores"], parameters, results["n_test_samples"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to get the parameters from the dict?

Suggested change
return results["test_scores"], parameters, results["n_test_samples"]
return results["test_scores"], results["parameters"], results["n_test_samples"]

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case, I would say no. return_parameters is set to false so parameters is not in the dict and fit_grid_point is deprecated.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK make sense



def _check_param_grid(param_grid):
Expand Down Expand Up @@ -805,20 +804,7 @@ def evaluate_candidates(candidate_params):

def _format_results(self, candidate_params, scorers, n_splits, out):
n_candidates = len(candidate_params)

# if one choose to see train score, "out" will contain train score info
if self.return_train_score:
(train_score_dicts, test_score_dicts, test_sample_counts, fit_time,
score_time) = zip(*out)
else:
(test_score_dicts, test_sample_counts, fit_time,
score_time) = zip(*out)

# test_score_dicts and train_score dicts are lists of dictionaries and
# we make them into dict of lists
test_scores = _aggregate_score_dicts(test_score_dicts)
if self.return_train_score:
train_scores = _aggregate_score_dicts(train_score_dicts)
out = _aggregate_score_dicts(out)

results = {}

Expand Down Expand Up @@ -846,8 +832,8 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
results["rank_%s" % key_name] = np.asarray(
rankdata(-array_means, method='min'), dtype=np.int32)

_store('fit_time', fit_time)
_store('score_time', score_time)
_store('fit_time', out["fit_time"])
_store('score_time', out["score_time"])
# Use one MaskedArray and mask all the places where the param is not
# applicable for that candidate. Use defaultdict as each candidate may
# not contain all the params
Expand All @@ -866,11 +852,11 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
# Store a list of param dicts at the key 'params'
results['params'] = candidate_params

# NOTE test_sample counts (weights) remain the same for all candidates
test_sample_counts = np.array(test_sample_counts[:n_splits],
dtype=int)
test_scores = _aggregate_score_dicts(out["test_scores"])
if self.return_train_score:
train_scores = _aggregate_score_dicts(out["train_scores"])

for scorer_name in scorers.keys():
for scorer_name in test_scores:
# Computed the (weighted) mean and std for test scores alone
_store('test_%s' % scorer_name, test_scores[scorer_name],
splits=True, rank=True,
Expand Down
108 changes: 54 additions & 54 deletions sklearn/model_selection/_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,35 +239,34 @@ def cross_validate(estimator, X, y=None, *, groups=None, scoring=None, cv=None,
# independent, and that it is pickle-able.
parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
pre_dispatch=pre_dispatch)
scores = parallel(
results = parallel(
delayed(_fit_and_score)(
clone(estimator), X, y, scorers, train, test, verbose, None,
fit_params, return_train_score=return_train_score,
return_times=True, return_estimator=return_estimator,
error_score=error_score)
for train, test in cv.split(X, y, groups))

zipped_scores = list(zip(*scores))
if return_train_score:
train_scores = zipped_scores.pop(0)
train_scores = _aggregate_score_dicts(train_scores)
results = _aggregate_score_dicts(results)
if return_estimator:
fitted_estimators = zipped_scores.pop()
test_scores, fit_times, score_times = zipped_scores
test_scores = _aggregate_score_dicts(test_scores)
fitted_estimators = results["estimator"]

ret = {}
ret['fit_time'] = np.array(fit_times)
ret['score_time'] = np.array(score_times)
ret['fit_time'] = results["fit_time"]
ret['score_time'] = results["score_time"]

if return_estimator:
ret['estimator'] = fitted_estimators

for name in scorers:
ret['test_%s' % name] = np.array(test_scores[name])
test_scores = _aggregate_score_dicts(results["test_scores"])
if return_train_score:
train_scores = _aggregate_score_dicts(results["train_scores"])

for name in test_scores:
ret['test_%s' % name] = test_scores[name]
if return_train_score:
key = 'train_%s' % name
ret[key] = np.array(train_scores[name])
ret[key] = train_scores[name]

return ret

Expand Down Expand Up @@ -484,27 +483,22 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,

Returns
-------
train_scores : dict of scorer name -> float
Score on training set (for all the scorers),
returned only if `return_train_score` is `True`.

test_scores : dict of scorer name -> float
Score on testing set (for all the scorers).

n_test_samples : int
Number of test samples.

fit_time : float
Time spent for fitting in seconds.

score_time : float
Time spent for scoring in seconds.

parameters : dict or None
The parameters that have been evaluated.

estimator : estimator object
The fitted estimator
result : dict with the following attributes
train_scores : dict of scorer name -> float
Score on training set (for all the scorers),
returned only if `return_train_score` is `True`.
test_scores : dict of scorer name -> float
Score on testing set (for all the scorers).
n_test_samples : int
Number of test samples.
fit_time : float
Time spent for fitting in seconds.
score_time : float
Time spent for scoring in seconds.
parameters : dict or None
The parameters that have been evaluated.
estimator : estimator object
The fitted estimator.
"""
progress_msg = ""
if verbose > 2:
Expand All @@ -529,7 +523,6 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
fit_params = fit_params if fit_params is not None else {}
fit_params = _check_fit_params(X, fit_params, train)

train_scores = {}
if parameters is not None:
# clone after setting parameters in case any parameters
# are estimators (like pipeline steps)
Expand All @@ -545,6 +538,7 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
X_train, y_train = _safe_split(estimator, X, y, train)
X_test, y_test = _safe_split(estimator, X, y, test, train)

result = {}
try:
if y_train is None:
estimator.fit(X_train, **fit_params)
Expand Down Expand Up @@ -575,7 +569,6 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
raise ValueError("error_score must be the string 'raise' or a"
" numeric value. (Hint: if using 'raise', please"
" make sure that it has been spelled correctly.)")

else:
fit_time = time.time() - start_time
test_scores = _score(estimator, X_test, y_test, scorer)
Expand All @@ -602,17 +595,19 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
end_msg += result_msg
print(end_msg)

ret = [train_scores, test_scores] if return_train_score else [test_scores]

result["test_scores"] = test_scores
if return_train_score:
result["train_scores"] = train_scores
if return_n_test_samples:
ret.append(_num_samples(X_test))
result["n_test_samples"] = _num_samples(X_test)
if return_times:
ret.extend([fit_time, score_time])
result["fit_time"] = fit_time
result["score_time"] = score_time
if return_parameters:
ret.append(parameters)
result["parameters"] = parameters
if return_estimator:
ret.append(estimator)
return ret
result["estimator"] = estimator
return result


def _score(estimator, X_test, y_test, scorer):
Expand Down Expand Up @@ -1296,23 +1291,27 @@ def learning_curve(estimator, X, y, *, groups=None,
out = parallel(delayed(_incremental_fit_estimator)(
clone(estimator), X, y, classes, train, test, train_sizes_abs,
scorer, verbose, return_times) for train, test in cv_iter)
out = np.asarray(out).transpose((2, 1, 0))
else:
train_test_proportions = []
for train, test in cv_iter:
for n_train_samples in train_sizes_abs:
train_test_proportions.append((train[:n_train_samples], test))

out = parallel(delayed(_fit_and_score)(
results = parallel(delayed(_fit_and_score)(
clone(estimator), X, y, scorer, train, test, verbose,
parameters=None, fit_params=None, return_train_score=True,
error_score=error_score, return_times=return_times)
for train, test in train_test_proportions)
out = np.array(out)
n_cv_folds = out.shape[0] // n_unique_ticks
dim = 4 if return_times else 2
out = out.reshape(n_cv_folds, n_unique_ticks, dim)
results = _aggregate_score_dicts(results)
train_scores = results["train_scores"].reshape(-1, n_unique_ticks).T
test_scores = results["test_scores"].reshape(-1, n_unique_ticks).T
out = [train_scores, test_scores]

out = np.asarray(out).transpose((2, 1, 0))
if return_times:
fit_times = results["fit_time"].reshape(-1, n_unique_ticks).T
score_times = results["score_time"].reshape(-1, n_unique_ticks).T
out.extend([fit_times, score_times])

ret = train_sizes_abs, out[0], out[1]

Expand Down Expand Up @@ -1522,18 +1521,19 @@ def validation_curve(estimator, X, y, *, param_name, param_range, groups=None,

parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch,
verbose=verbose)
out = parallel(delayed(_fit_and_score)(
results = parallel(delayed(_fit_and_score)(
clone(estimator), X, y, scorer, train, test, verbose,
parameters={param_name: v}, fit_params=None, return_train_score=True,
error_score=error_score)
# NOTE do not change order of iteration to allow one time cv splitters
for train, test in cv.split(X, y, groups) for v in param_range)
out = np.asarray(out)
n_params = len(param_range)
n_cv_folds = out.shape[0] // n_params
out = out.reshape(n_cv_folds, n_params, 2).transpose((2, 1, 0))

return out[0], out[1]
results = _aggregate_score_dicts(results)
train_scores = results["train_scores"].reshape(-1, n_params).T
test_scores = results["test_scores"].reshape(-1, n_params).T

return train_scores, test_scores


def _aggregate_score_dicts(scores):
Expand Down
2 changes: 1 addition & 1 deletion sklearn/model_selection/tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1700,7 +1700,7 @@ def test_fit_and_score_working():
'return_parameters': True}
result = _fit_and_score(*fit_and_score_args,
**fit_and_score_kwargs)
assert result[-1] == fit_and_score_kwargs['parameters']
assert result['parameters'] == fit_and_score_kwargs['parameters']


def three_params_scorer(i, j, k):
Expand Down