Skip to content

Commit 14e7c32

Browse files
jnothmanogrisel
authored andcommitted
Restructure access to vendored/site Joblib (scikit-learn#11471)
In order to fix scikit-learn#11408, this swaps `joblib` and `_joblib`. It however, allows users to access joblib's `Memory` or `Parallel` functionality without accessing `sklearn.externals._joblib` by importing `Memory`, `Parallel`, etc. into `sklearn.utils`.
1 parent 8a15053 commit 14e7c32

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+151
-88
lines changed

.travis.yml

+5-3
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,15 @@ matrix:
3838
NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.16.1" CYTHON_VERSION="0.25.2"
3939
PILLOW_VERSION="4.0.0" COVERAGE=true
4040
if: type != cron
41-
# This environment tests the newest supported Anaconda release (5.0.0)
42-
# It also runs tests requiring Pandas and PyAMG
41+
# This environment tests the newest supported Anaconda release.
42+
# It runs tests requiring pandas and PyAMG.
43+
# It also runs with the site joblib instead of the vendored copy of joblib.
4344
- env: DISTRIB="conda" PYTHON_VERSION="3.6.2" INSTALL_MKL="true"
4445
NUMPY_VERSION="1.14.2" SCIPY_VERSION="1.0.0" PANDAS_VERSION="0.20.3"
4546
CYTHON_VERSION="0.26.1" PYAMG_VERSION="3.3.2" PILLOW_VERSION="4.3.0"
46-
COVERAGE=true
47+
JOBLIB_VERSION="0.12.0" COVERAGE=true
4748
CHECK_PYTEST_SOFT_DEPENDENCY="true" TEST_DOCSTRINGS="true"
49+
SKLEARN_SITE_JOBLIB=1
4850
if: type != cron
4951
# flake8 linting on diff wrt common ancestor with upstream/master
5052
- env: RUN_FLAKE8="true" SKIP_TESTS="true"

benchmarks/bench_covertype.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
6060
from sklearn.ensemble import GradientBoostingClassifier
6161
from sklearn.metrics import zero_one_loss
62-
from sklearn.externals.joblib import Memory
62+
from sklearn.utils import Memory
6363
from sklearn.utils import check_array
6464

6565
# Memoize the data extraction and memory map the resulting

benchmarks/bench_mnist.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from sklearn.ensemble import ExtraTreesClassifier
4242
from sklearn.ensemble import RandomForestClassifier
4343
from sklearn.dummy import DummyClassifier
44-
from sklearn.externals.joblib import Memory
44+
from sklearn.utils import Memory
4545
from sklearn.kernel_approximation import Nystroem
4646
from sklearn.kernel_approximation import RBFSampler
4747
from sklearn.metrics import zero_one_loss

benchmarks/bench_plot_nmf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from sklearn.decomposition.nmf import _initialize_nmf
2323
from sklearn.decomposition.nmf import _beta_divergence
2424
from sklearn.decomposition.nmf import INTEGER_TYPES, _check_init
25-
from sklearn.externals.joblib import Memory
25+
from sklearn.utils import Memory
2626
from sklearn.exceptions import ConvergenceWarning
2727
from sklearn.utils.extmath import safe_sparse_dot, squared_norm
2828
from sklearn.utils import check_array

benchmarks/bench_rcv1_logreg_convergence.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import gc
99
import time
1010

11-
from sklearn.externals.joblib import Memory
11+
from sklearn.utils import Memory
1212
from sklearn.linear_model import (LogisticRegression, SGDClassifier)
1313
from sklearn.datasets import fetch_rcv1
1414
from sklearn.linear_model.sag import get_auto_step_size

benchmarks/bench_saga.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
from sklearn.datasets import fetch_rcv1, load_iris, load_digits, \
1414
fetch_20newsgroups_vectorized
15-
from sklearn.externals.joblib import delayed, Parallel, Memory
15+
from sklearn.utils import delayed, Parallel, Memory
1616
from sklearn.linear_model import LogisticRegression
1717
from sklearn.metrics import log_loss
1818
from sklearn.model_selection import train_test_split

benchmarks/bench_tsne_mnist.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import json
1616
import argparse
1717

18-
from sklearn.externals.joblib import Memory
18+
from sklearn.utils import Memory
1919
from sklearn.datasets import fetch_mldata
2020
from sklearn.manifold import TSNE
2121
from sklearn.neighbors import NearestNeighbors

build_tools/travis/install.sh

+4
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ if [[ "$DISTRIB" == "conda" ]]; then
5959
TO_INSTALL="$TO_INSTALL pillow=$PILLOW_VERSION"
6060
fi
6161

62+
if [[ -n "$JOBLIB_VERSION" ]]; then
63+
TO_INSTALL="$TO_INSTALL joblib=$JOBLIB_VERSION"
64+
fi
65+
6266
conda create -n testenv --yes $TO_INSTALL
6367
source activate testenv
6468

doc/developers/utilities.rst

+1-1

doc/glossary.rst

+1-1

doc/modules/classes.rst

+16

doc/whats_new/v0.20.rst

+3-8

examples/applications/wikipedia_principal_eigenvector.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
from scipy import sparse
4646

4747
from sklearn.decomposition import randomized_svd
48-
from sklearn.externals.joblib import Memory
48+
from sklearn.utils import Memory
4949
from sklearn.externals.six.moves.urllib.request import urlopen
5050
from sklearn.externals.six import iteritems
5151

examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from sklearn.cluster import FeatureAgglomeration
3131
from sklearn.linear_model import BayesianRidge
3232
from sklearn.pipeline import Pipeline
33-
from sklearn.externals.joblib import Memory
33+
from sklearn.utils import Memory
3434
from sklearn.model_selection import GridSearchCV
3535
from sklearn.model_selection import KFold
3636

examples/compose/plot_compare_reduction.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@
104104

105105
from tempfile import mkdtemp
106106
from shutil import rmtree
107-
from sklearn.externals.joblib import Memory
107+
from sklearn.utils import Memory
108108

109109
# Create a temporary folder to store the transformers of the pipeline
110110
cachedir = mkdtemp()

sklearn/cluster/k_means_.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@
2929
from ..utils import gen_batches
3030
from ..utils.validation import check_is_fitted
3131
from ..utils.validation import FLOAT_DTYPES
32-
from ..externals.joblib import Parallel
33-
from ..externals.joblib import delayed
32+
from ..utils import Parallel
33+
from ..utils import delayed
3434
from ..externals.six import string_types
3535
from ..exceptions import ConvergenceWarning
3636
from . import _k_means

sklearn/cluster/mean_shift_.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
from ..base import BaseEstimator, ClusterMixin
2525
from ..neighbors import NearestNeighbors
2626
from ..metrics.pairwise import pairwise_distances_argmin
27-
from ..externals.joblib import Parallel
28-
from ..externals.joblib import delayed
27+
from ..utils import Parallel
28+
from ..utils import delayed
2929

3030

3131
def estimate_bandwidth(X, quantile=0.3, n_samples=None, random_state=0,

sklearn/compose/_column_transformer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from scipy import sparse
1313

1414
from ..base import clone, TransformerMixin
15-
from ..externals.joblib import Parallel, delayed
15+
from ..utils import Parallel, delayed
1616
from ..externals import six
1717
from ..pipeline import (
1818
_fit_one_transformer, _fit_transform_one, _transform_one, _name_estimators)

sklearn/covariance/graph_lasso_.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from ..linear_model import lars_path
2424
from ..linear_model import cd_fast
2525
from ..model_selection import check_cv, cross_val_score
26-
from ..externals.joblib import Parallel, delayed
26+
from ..utils import Parallel, delayed
2727

2828

2929
# Helper functions to compute the objective and dual objective functions

sklearn/datasets/lfw.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
from .base import get_data_home, _fetch_remote, RemoteFileMetadata
3333
from ..utils import Bunch
34-
from ..externals.joblib import Memory
34+
from ..utils import Memory
3535
from ..externals.six import b
3636

3737
logger = logging.getLogger(__name__)

sklearn/datasets/svmlight_format.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def load_svmlight_file(f, n_features=None, dtype=np.float64,
132132
--------
133133
To use joblib.Memory to cache the svmlight file::
134134
135-
from sklearn.externals.joblib import Memory
135+
from sklearn.utils import Memory
136136
from sklearn.datasets import load_svmlight_file
137137
mem = Memory("./mycache")
138138

sklearn/decomposition/dict_learning.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from numpy.lib.stride_tricks import as_strided
1616

1717
from ..base import BaseEstimator, TransformerMixin
18-
from ..externals.joblib import Parallel, delayed, cpu_count
18+
from ..utils import Parallel, delayed, cpu_count
1919
from ..externals.six.moves import zip
2020
from ..utils import (check_array, check_random_state, gen_even_slices,
2121
gen_batches, _get_n_jobs)

sklearn/decomposition/online_lda.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
gen_batches, gen_even_slices, _get_n_jobs)
2222
from ..utils.fixes import logsumexp
2323
from ..utils.validation import check_non_negative
24-
from ..externals.joblib import Parallel, delayed
24+
from ..utils import Parallel, delayed
2525
from ..externals.six.moves import xrange
2626
from ..exceptions import NotFittedError
2727

sklearn/decomposition/tests/test_sparse_pca.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ def test_mini_batch_fit_transform():
151151
U1 = spca_lars.transform(Y)
152152
# Test multiple CPUs
153153
if sys.platform == 'win32': # fake parallelism for win32
154-
import sklearn.externals.joblib.parallel as joblib_par
154+
import sklearn.utils._joblib.parallel as joblib_par
155155
_mp = joblib_par.multiprocessing
156156
joblib_par.multiprocessing = None
157157
try:

sklearn/ensemble/bagging.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from .base import BaseEnsemble, _partition_estimators
1515
from ..base import ClassifierMixin, RegressorMixin
16-
from ..externals.joblib import Parallel, delayed
16+
from ..utils import Parallel, delayed
1717
from ..externals.six import with_metaclass
1818
from ..externals.six.moves import zip
1919
from ..metrics import r2_score, accuracy_score

sklearn/ensemble/forest.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class calls the ``fit`` method of each sub-estimator on random samples
5252

5353

5454
from ..base import ClassifierMixin, RegressorMixin
55-
from ..externals.joblib import Parallel, delayed
55+
from ..utils import Parallel, delayed
5656
from ..externals import six
5757
from ..metrics import r2_score
5858
from ..preprocessing import OneHotEncoder

sklearn/ensemble/partial_dependence.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from scipy.stats.mstats import mquantiles
1111

1212
from ..utils.extmath import cartesian
13-
from ..externals.joblib import Parallel, delayed
13+
from ..utils import Parallel, delayed
1414
from ..externals import six
1515
from ..externals.six.moves import map, range, zip
1616
from ..utils import check_array

sklearn/ensemble/voting_classifier.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from ..base import TransformerMixin
1919
from ..base import clone
2020
from ..preprocessing import LabelEncoder
21-
from ..externals.joblib import Parallel, delayed
21+
from ..utils import Parallel, delayed
2222
from ..utils.validation import has_fit_parameter, check_is_fitted
2323
from ..utils.metaestimators import _BaseComposition
2424
from ..utils import Bunch

sklearn/externals/copy_joblib.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@ else
1212
fi
1313

1414
pip install $JOBLIB --target $INSTALL_FOLDER
15-
cp -r $INSTALL_FOLDER/joblib _joblib
15+
cp -r $INSTALL_FOLDER/joblib joblib
1616
rm -rf $INSTALL_FOLDER
1717

1818
# Needed to rewrite the doctests
1919
# Note: BSD sed -i needs an argument unders OSX
2020
# so first renaming to .bak and then deleting backup files
21-
find _joblib -name "*.py" | xargs sed -i.bak "s/from joblib/from sklearn.externals.joblib/"
22-
find _joblib -name "*.bak" | xargs rm
21+
find joblib -name "*.py" | xargs sed -i.bak "s/from joblib/from sklearn.externals.joblib/"
22+
find joblib -name "*.bak" | xargs rm
2323

2424
# Remove the tests folders to speed-up test time for scikit-learn.
2525
# joblib is already tested on its own CI infrastructure upstream.

sklearn/externals/_joblib/__init__.py sklearn/externals/joblib/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
inputs and outputs: Python functions. Joblib can save their
5959
computation to disk and rerun it only if necessary::
6060
61-
>>> from sklearn.externals.joblib import Memory
61+
>>> from sklearn.utils import Memory
6262
>>> mem = Memory(cachedir='/tmp/joblib')
6363
>>> import numpy as np
6464
>>> a = np.vander(np.arange(3)).astype(np.float)
@@ -77,7 +77,7 @@
7777
2) **Embarrassingly parallel helper:** to make it easy to write readable
7878
parallel code and debug it quickly::
7979
80-
>>> from sklearn.externals.joblib import Parallel, delayed
80+
>>> from sklearn.utils import Parallel, delayed
8181
>>> from math import sqrt
8282
>>> Parallel(n_jobs=1)(delayed(sqrt)(i**2) for i in range(10))
8383
[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

sklearn/externals/_joblib/parallel.py sklearn/externals/joblib/parallel.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -384,15 +384,15 @@ class Parallel(Logger):
384384
A simple example:
385385
386386
>>> from math import sqrt
387-
>>> from sklearn.externals.joblib import Parallel, delayed
387+
>>> from sklearn.utils import Parallel, delayed
388388
>>> Parallel(n_jobs=1)(delayed(sqrt)(i**2) for i in range(10))
389389
[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
390390
391391
Reshaping the output when the function has several return
392392
values:
393393
394394
>>> from math import modf
395-
>>> from sklearn.externals.joblib import Parallel, delayed
395+
>>> from sklearn.utils import Parallel, delayed
396396
>>> r = Parallel(n_jobs=1)(delayed(modf)(i/2.) for i in range(10))
397397
>>> res, i = zip(*r)
398398
>>> res
@@ -404,7 +404,7 @@ class Parallel(Logger):
404404
messages:
405405
406406
>>> from time import sleep
407-
>>> from sklearn.externals.joblib import Parallel, delayed
407+
>>> from sklearn.utils import Parallel, delayed
408408
>>> r = Parallel(n_jobs=2, verbose=5)(delayed(sleep)(.1) for _ in range(10)) #doctest: +SKIP
409409
[Parallel(n_jobs=2)]: Done 1 out of 10 | elapsed: 0.1s remaining: 0.9s
410410
[Parallel(n_jobs=2)]: Done 3 out of 10 | elapsed: 0.2s remaining: 0.5s
@@ -418,7 +418,7 @@ class Parallel(Logger):
418418
child process:
419419
420420
>>> from heapq import nlargest
421-
>>> from sklearn.externals.joblib import Parallel, delayed
421+
>>> from sklearn.utils import Parallel, delayed
422422
>>> Parallel(n_jobs=2)(delayed(nlargest)(2, n) for n in (range(4), 'abcde', 3)) #doctest: +SKIP
423423
#...
424424
---------------------------------------------------------------------------
@@ -449,7 +449,7 @@ class Parallel(Logger):
449449
number of iterations cannot be reported in the progress messages:
450450
451451
>>> from math import sqrt
452-
>>> from sklearn.externals.joblib import Parallel, delayed
452+
>>> from sklearn.utils import Parallel, delayed
453453
>>> def producer():
454454
... for i in range(6):
455455
... print('Produced %s' % i)
File renamed without changes.

sklearn/externals/setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@
44
def configuration(parent_package='', top_path=None):
55
from numpy.distutils.misc_util import Configuration
66
config = Configuration('externals', parent_package, top_path)
7-
config.add_subpackage('_joblib')
7+
config.add_subpackage('joblib')
88

99
return config

sklearn/feature_selection/rfe.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from ..base import MetaEstimatorMixin
1616
from ..base import clone
1717
from ..base import is_classifier
18-
from ..externals.joblib import Parallel, delayed
18+
from ..utils import Parallel, delayed
1919
from ..model_selection import check_cv
2020
from ..model_selection._validation import _score
2121
from ..metrics.scorer import check_scoring

0 commit comments

Comments
 (0)