Skip to content

Commit 4c33ea2

Browse files
glemaitrejnothman
authored andcommitted
[MRG+1] EHN/DOC Make error msg about Memory more explicit (scikit-learn#8865)
1 parent 33f50ad commit 4c33ea2

File tree

5 files changed

+59
-15
lines changed

5 files changed

+59
-15
lines changed

sklearn/cluster/hierarchical.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -609,7 +609,8 @@ class AgglomerativeClustering(BaseEstimator, ClusterMixin):
609609
"manhattan", "cosine", or 'precomputed'.
610610
If linkage is "ward", only "euclidean" is accepted.
611611
612-
memory : Instance of joblib.Memory or string (optional)
612+
memory : Instance of sklearn.externals.joblib.Memory or string, optional \
613+
(default=None)
613614
Used to cache the output of the computation of the tree.
614615
By default, no caching is done. If a string is given, it is the
615616
path to the caching directory.
@@ -690,8 +691,10 @@ def fit(self, X, y=None):
690691
elif isinstance(memory, six.string_types):
691692
memory = Memory(cachedir=memory, verbose=0)
692693
elif not isinstance(memory, Memory):
693-
raise ValueError('`memory` has to be a `str` or a `joblib.Memory`'
694-
' instance')
694+
raise ValueError("'memory' should either be a string or"
695+
" a sklearn.externals.joblib.Memory"
696+
" instance, got 'memory={!r}' instead.".format(
697+
type(memory)))
695698

696699
if self.n_clusters <= 0:
697700
raise ValueError("n_clusters should be an integer greater than 0."
@@ -776,7 +779,8 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
776779
"manhattan", "cosine", or 'precomputed'.
777780
If linkage is "ward", only "euclidean" is accepted.
778781
779-
memory : Instance of joblib.Memory or string, optional
782+
memory : Instance of sklearn.externals.joblib.Memory or string, optional \
783+
(default=None)
780784
Used to cache the output of the computation of the tree.
781785
By default, no caching is done. If a string is given, it is the
782786
path to the caching directory.

sklearn/linear_model/randomized_l1.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,15 @@ def fit(self, X, y):
9999

100100
estimator_func, params = self._make_estimator_and_params(X, y)
101101
memory = self.memory
102-
if isinstance(memory, six.string_types):
103-
memory = Memory(cachedir=memory)
102+
if memory is None:
103+
memory = Memory(cachedir=None, verbose=0)
104+
elif isinstance(memory, six.string_types):
105+
memory = Memory(cachedir=memory, verbose=0)
106+
elif not isinstance(memory, Memory):
107+
raise ValueError("'memory' should either be a string or"
108+
" a sklearn.externals.joblib.Memory"
109+
" instance, got 'memory={!r}' instead.".format(
110+
type(memory)))
104111

105112
scores_ = memory.cache(
106113
_resample_model, ignore=['verbose', 'n_jobs', 'pre_dispatch']
@@ -265,7 +272,8 @@ class RandomizedLasso(BaseRandomizedLinearModel):
265272
- A string, giving an expression as a function of n_jobs,
266273
as in '2*n_jobs'
267274
268-
memory : Instance of joblib.Memory or string
275+
memory : Instance of sklearn.externals.joblib.Memory or string, optional \
276+
(default=None)
269277
Used for internal caching. By default, no caching is done.
270278
If a string is given, it is the path to the caching directory.
271279
@@ -307,7 +315,7 @@ def __init__(self, alpha='aic', scaling=.5, sample_fraction=.75,
307315
max_iter=500,
308316
eps=np.finfo(np.float).eps, random_state=None,
309317
n_jobs=1, pre_dispatch='3*n_jobs',
310-
memory=Memory(cachedir=None, verbose=0)):
318+
memory=None):
311319
self.alpha = alpha
312320
self.scaling = scaling
313321
self.sample_fraction = sample_fraction
@@ -456,7 +464,8 @@ class RandomizedLogisticRegression(BaseRandomizedLinearModel):
456464
- A string, giving an expression as a function of n_jobs,
457465
as in '2*n_jobs'
458466
459-
memory : Instance of joblib.Memory or string
467+
memory : Instance of sklearn.externals.joblib.Memory or string, optional \
468+
(default=None)
460469
Used for internal caching. By default, no caching is done.
461470
If a string is given, it is the path to the caching directory.
462471
@@ -498,7 +507,7 @@ def __init__(self, C=1, scaling=.5, sample_fraction=.75,
498507
normalize=True,
499508
random_state=None,
500509
n_jobs=1, pre_dispatch='3*n_jobs',
501-
memory=Memory(cachedir=None, verbose=0)):
510+
memory=None):
502511
self.C = C
503512
self.scaling = scaling
504513
self.sample_fraction = sample_fraction

sklearn/linear_model/tests/test_randomized_l1.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
# Authors: Alexandre Gramfort <[email protected]>
22
# License: BSD 3 clause
3+
from tempfile import mkdtemp
4+
import shutil
35

46
import numpy as np
57
from scipy import sparse
68

79
from sklearn.utils.testing import assert_equal
810
from sklearn.utils.testing import assert_array_equal
911
from sklearn.utils.testing import assert_raises
12+
from sklearn.utils.testing import assert_raises_regex
1013

1114
from sklearn.linear_model.randomized_l1 import (lasso_stability_path,
1215
RandomizedLasso,
@@ -38,6 +41,19 @@ def test_lasso_stability_path():
3841
np.argsort(np.sum(scores_path, axis=1))[-3:])
3942

4043

44+
def test_randomized_lasso_error_memory():
45+
scaling = 0.3
46+
selection_threshold = 0.5
47+
tempdir = 5
48+
clf = RandomizedLasso(verbose=False, alpha=[1, 0.8], random_state=42,
49+
scaling=scaling,
50+
selection_threshold=selection_threshold,
51+
memory=tempdir)
52+
assert_raises_regex(ValueError, "'memory' should either be a string or"
53+
" a sklearn.externals.joblib.Memory instance",
54+
clf.fit, X, y)
55+
56+
4157
def test_randomized_lasso():
4258
# Check randomized lasso
4359
scaling = 0.3
@@ -57,6 +73,18 @@ def test_randomized_lasso():
5773
feature_scores = clf.fit(X, y).scores_
5874
assert_equal(clf.all_scores_.shape, (X.shape[1], 2))
5975
assert_array_equal(np.argsort(F)[-3:], np.argsort(feature_scores)[-3:])
76+
# test caching
77+
try:
78+
tempdir = mkdtemp()
79+
clf = RandomizedLasso(verbose=False, alpha=[1, 0.8], random_state=42,
80+
scaling=scaling,
81+
selection_threshold=selection_threshold,
82+
memory=tempdir)
83+
feature_scores = clf.fit(X, y).scores_
84+
assert_equal(clf.all_scores_.shape, (X.shape[1], 2))
85+
assert_array_equal(np.argsort(F)[-3:], np.argsort(feature_scores)[-3:])
86+
finally:
87+
shutil.rmtree(tempdir)
6088

6189
X_r = clf.transform(X)
6290
X_full = clf.inverse_transform(X_r)

sklearn/pipeline.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ class Pipeline(_BaseComposition):
5252
chained, in the order in which they are chained, with the last object
5353
an estimator.
5454
55-
memory : Instance of joblib.Memory or string, optional (default=None)
55+
memory : Instance of sklearn.external.joblib.Memory or string, optional \
56+
(default=None)
5657
Used to cache the fitted transformers of the pipeline. By default,
5758
no caching is performed. If a string is given, it is the path to
5859
the caching directory. Enabling caching triggers a clone of
@@ -193,8 +194,9 @@ def _fit(self, X, y=None, **fit_params):
193194
memory = Memory(cachedir=memory, verbose=0)
194195
elif not isinstance(memory, Memory):
195196
raise ValueError("'memory' should either be a string or"
196-
" a joblib.Memory instance, got"
197-
" 'memory={!r}' instead.".format(memory))
197+
" a sklearn.externals.joblib.Memory"
198+
" instance, got 'memory={!r}' instead.".format(
199+
type(memory)))
198200

199201
fit_transform_one_cached = memory.cache(_fit_transform_one)
200202

@@ -536,7 +538,8 @@ def make_pipeline(*steps, **kwargs):
536538
----------
537539
*steps : list of estimators,
538540
539-
memory : Instance of joblib.Memory or string, optional (default=None)
541+
memory : Instance of sklearn.externals.joblib.Memory or string, optional \
542+
(default=None)
540543
Used to cache the fitted transformers of the pipeline. By default,
541544
no caching is performed. If a string is given, it is the path to
542545
the caching directory. Enabling caching triggers a clone of

sklearn/tests/test_pipeline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -853,7 +853,7 @@ def test_pipeline_wrong_memory():
853853
cached_pipe = Pipeline([('transf', DummyTransf()), ('svc', SVC())],
854854
memory=memory)
855855
assert_raises_regex(ValueError, "'memory' should either be a string or a"
856-
" joblib.Memory instance, got 'memory=1' instead.",
856+
" sklearn.externals.joblib.Memory instance, got",
857857
cached_pipe.fit, X, y)
858858

859859

0 commit comments

Comments
 (0)