-
Notifications
You must be signed in to change notification settings - Fork 418
added y argument to fit methods #59
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,12 +3,37 @@ | |
from sklearn.utils import tosequence | ||
|
||
|
||
def _call_fit(fit_method, X, y=None, **kwargs): | ||
""" | ||
helper function, calls the fit or fit_transform method with the correct | ||
number of parameters | ||
|
||
fit_method: fit or fit_transform method of the transformer | ||
X: the data to fit | ||
y: the target vector relative to X, optional | ||
kwargs: any keyword arguments to the fit method | ||
|
||
return: the result of the fit or fit_transform method | ||
|
||
WARNING: if this function raises a TypeError exception, test the fit | ||
or fit_transform method passed to it in isolation as _call_fit will not | ||
distinguish TypeError due to incorrect number of arguments from | ||
other TypeError | ||
""" | ||
try: | ||
return fit_method(X, y, **kwargs) | ||
except TypeError: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about:
I know it looks a bit hacky but I guess it will solve your warning above. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is pretty clever, I hadn't thought of doing something like this before. Unfortunately when I tested it, the error message accompanying a TypeError varies between Python 2 and 3. On 2, it's "test_func() takes exactly 2 arguments (1 given)" but on 3 it's "test_func() missing 1 required positional argument:". It's probably safer to leave as is. |
||
# fit takes only one argument | ||
return fit_method(X, **kwargs) | ||
|
||
|
||
class TransformerPipeline(Pipeline): | ||
""" | ||
Pipeline that expects all steps to be transformers taking a single argument | ||
Pipeline that expects all steps to be transformers taking a single X argument, | ||
an optional y argument, | ||
and having fit and transform methods. | ||
|
||
Code is copied from sklearn's Pipeline, leaving out the `y=None` argument. | ||
Code is copied from sklearn's Pipeline | ||
""" | ||
def __init__(self, steps): | ||
names, estimators = zip(*steps) | ||
|
@@ -31,31 +56,34 @@ def __init__(self, steps): | |
"'%s' (type %s) doesn't)" | ||
% (estimator, type(estimator))) | ||
|
||
def _pre_transform(self, X, **fit_params): | ||
def _pre_transform(self, X, y=None, **fit_params): | ||
fit_params_steps = dict((step, {}) for step, _ in self.steps) | ||
for pname, pval in six.iteritems(fit_params): | ||
step, param = pname.split('__', 1) | ||
fit_params_steps[step][param] = pval | ||
Xt = X | ||
for name, transform in self.steps[:-1]: | ||
if hasattr(transform, "fit_transform"): | ||
Xt = transform.fit_transform(Xt, **fit_params_steps[name]) | ||
Xt = _call_fit(transform.fit_transform, | ||
Xt, y, **fit_params_steps[name]) | ||
else: | ||
Xt = transform.fit(Xt, **fit_params_steps[name]) \ | ||
.transform(Xt) | ||
Xt = _call_fit(transform.fit, | ||
Xt, y, **fit_params_steps[name]).transform(Xt) | ||
return Xt, fit_params_steps[self.steps[-1][0]] | ||
|
||
def fit(self, X, **fit_params): | ||
Xt, fit_params = self._pre_transform(X, **fit_params) | ||
self.steps[-1][-1].fit(Xt, **fit_params) | ||
def fit(self, X, y=None, **fit_params): | ||
Xt, fit_params = self._pre_transform(X, y, **fit_params) | ||
_call_fit(self.steps[-1][-1].fit, Xt, y, **fit_params) | ||
return self | ||
|
||
def fit_transform(self, X, **fit_params): | ||
Xt, fit_params = self._pre_transform(X, **fit_params) | ||
def fit_transform(self, X, y=None, **fit_params): | ||
Xt, fit_params = self._pre_transform(X, y, **fit_params) | ||
if hasattr(self.steps[-1][-1], 'fit_transform'): | ||
return self.steps[-1][-1].fit_transform(Xt, **fit_params) | ||
return _call_fit(self.steps[-1][-1].fit_transform, | ||
Xt, y, **fit_params) | ||
else: | ||
return self.steps[-1][-1].fit(Xt, **fit_params).transform(Xt) | ||
return _call_fit(self.steps[-1][-1].fit, | ||
Xt, y, **fit_params).transform(Xt) | ||
|
||
|
||
def make_transformer_pipeline(*steps): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why does one need to pass "1" as second argument to this transform, and why is the output different from the previous case in the last column?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The "1" is the second argument of np.round. This test case was failing even though I don't think I modified anything that affected it. The issue seems to be that on my machine np.round(-0.3) equals "0.", not "-0." Changing it to round to 1 decimal place fixed the test case.