-
Notifications
You must be signed in to change notification settings - Fork 94
Add RMSEs and targets #182
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bc0be68
81fe10b
b3f46f8
240e358
1e5f958
a4c6c44
db6845e
72fe6b8
4461201
373f1b9
f9b875e
2279a6c
e1cebce
10b180e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,7 @@ share/python-wheels/ | |
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
*.vscode | ||
MANIFEST | ||
*.idea | ||
*.vscode |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
import warnings | ||
|
||
from sklearn.base import is_regressor, is_classifier | ||
from sklearn.metrics import mean_squared_error | ||
|
||
from scipy.stats import norm | ||
|
||
|
@@ -45,8 +46,10 @@ def __init__(self, | |
self._learner = None | ||
self._params = None | ||
|
||
# initialize predictions to None which are only stored if method fit is called with store_predictions=True | ||
# initialize predictions and target to None which are only stored if method fit is called with store_predictions=True | ||
self._predictions = None | ||
self._nuisance_targets = None | ||
self._rmses = None | ||
|
||
# initialize models to None which are only stored if method fit is called with store_models=True | ||
self._models = None | ||
|
@@ -129,6 +132,11 @@ def __str__(self): | |
learner_info = '' | ||
for key, value in self.learner.items(): | ||
learner_info += f'Learner {key}: {str(value)}\n' | ||
if self.rmses is not None: | ||
learner_info += 'Out-of-sample Performance:\n' | ||
for learner in self.params_names: | ||
learner_info += f'Learner {learner} RMSE: {self.rmses[learner]}\n' | ||
|
||
if self._is_cluster_data: | ||
resampling_info = f'No. folds per cluster: {self._n_folds_per_cluster}\n' \ | ||
f'No. folds: {self.n_folds}\n' \ | ||
|
@@ -231,6 +239,20 @@ def predictions(self): | |
""" | ||
return self._predictions | ||
|
||
@property | ||
def nuisance_targets(self): | ||
""" | ||
The outcome of the nuisance models. | ||
""" | ||
return self._nuisance_targets | ||
|
||
@property | ||
def rmses(self): | ||
""" | ||
The root-mean-squared-errors of the nuisance models. | ||
""" | ||
return self._rmses | ||
|
||
@property | ||
def models(self): | ||
""" | ||
|
@@ -434,7 +456,7 @@ def __psi_deriv(self): | |
def __all_se(self): | ||
return self._all_se[self._i_treat, self._i_rep] | ||
|
||
def fit(self, n_jobs_cv=None, store_predictions=False, store_models=False): | ||
def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks @SvenKlaassen - I think it's a good idea to set the default for |
||
""" | ||
Estimate DoubleML models. | ||
|
||
|
@@ -471,8 +493,11 @@ def fit(self, n_jobs_cv=None, store_predictions=False, store_models=False): | |
raise TypeError('store_models must be True or False. ' | ||
f'Got {str(store_models)}.') | ||
|
||
# initialize rmse arrays for nuisance functions evaluation | ||
self._initialize_rmses() | ||
|
||
if store_predictions: | ||
self._initialize_predictions() | ||
self._initialize_predictions_and_targets() | ||
|
||
if store_models: | ||
self._initialize_models() | ||
|
@@ -491,8 +516,10 @@ def fit(self, n_jobs_cv=None, store_predictions=False, store_models=False): | |
|
||
self._set_score_elements(score_elements, self._i_rep, self._i_treat) | ||
|
||
# calculate rmses and store predictions and targets of the nuisance models | ||
self._calc_rmses(preds['predictions'], preds['targets']) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi @SvenKlaassen , thanks for this PR! I think adding some diagnostics is really nice. I think it should be possible to make this a bit more general by using (maybe only a subset of) sklearn's metrics either by letting users pass a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess that sklearn's measures have built in some methods to handle exceptions that might occur ... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks @PhilippBach, that would be helpful. I think I can add this, but I think keeping RMSE as default would be useful, since one would have to specify different metrics for each learner. RMSE is still useful for classifications. |
||
if store_predictions: | ||
self._store_predictions(preds['predictions']) | ||
self._store_predictions_and_targets(preds['predictions'], preds['targets']) | ||
if store_models: | ||
self._store_models(preds['models']) | ||
|
||
|
@@ -990,22 +1017,103 @@ def _initialize_boot_arrays(self, n_rep_boot): | |
boot_t_stat = np.full((self._dml_data.n_coefs, n_rep_boot * self.n_rep), np.nan) | ||
return n_rep_boot, boot_coef, boot_t_stat | ||
|
||
def _initialize_predictions(self): | ||
def _initialize_predictions_and_targets(self): | ||
self._predictions = {learner: np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan) | ||
for learner in self.params_names} | ||
self._nuisance_targets = {learner: np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan) | ||
for learner in self.params_names} | ||
|
||
def _initialize_rmses(self): | ||
self._rmses = {learner: np.full((self.n_rep, self._dml_data.n_coefs), np.nan) | ||
for learner in self.params_names} | ||
|
||
def _initialize_models(self): | ||
self._models = {learner: {treat_var: [None] * self.n_rep for treat_var in self._dml_data.d_cols} | ||
for learner in self.params_names} | ||
|
||
def _store_predictions(self, preds): | ||
def _store_predictions_and_targets(self, preds, targets): | ||
for learner in self.params_names: | ||
self._predictions[learner][:, self._i_rep, self._i_treat] = preds[learner] | ||
self._nuisance_targets[learner][:, self._i_rep, self._i_treat] = targets[learner] | ||
|
||
def _calc_rmses(self, preds, targets): | ||
for learner in self.params_names: | ||
if targets[learner] is None: | ||
self._rmses[learner][self._i_rep, self._i_treat] = np.nan | ||
else: | ||
sq_error = np.power(targets[learner] - preds[learner], 2) | ||
self._rmses[learner][self._i_rep, self._i_treat] = np.sqrt(np.mean(sq_error, 0)) | ||
|
||
def _store_models(self, models): | ||
for learner in self.params_names: | ||
self._models[learner][self._dml_data.d_cols[self._i_treat]][self._i_rep] = models[learner] | ||
|
||
def evaluate_learners(self, learners=None, metric=mean_squared_error): | ||
""" | ||
Evaluate fitted learners for DoubleML models on cross-validated predictions. | ||
|
||
Parameters | ||
---------- | ||
learners : list | ||
A list of strings which correspond to the nuisance functions of the model. | ||
|
||
metric : callable | ||
A callable function with inputs ``y_pred`` and ``y_true`` of shape ``(1, n)``, | ||
where ``n`` specifies the number of observations. | ||
Default is the euclidean distance. | ||
|
||
Returns | ||
------- | ||
dist : dict | ||
A dictionary containing the evaluated metric for each learner. | ||
|
||
Examples | ||
-------- | ||
>>> import numpy as np | ||
>>> import doubleml as dml | ||
>>> from sklearn.metrics import mean_absolute_error | ||
>>> from doubleml.datasets import make_irm_data | ||
>>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier | ||
>>> np.random.seed(3141) | ||
>>> ml_g = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2) | ||
>>> ml_m = RandomForestClassifier(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2) | ||
>>> data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type='DataFrame') | ||
>>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd') | ||
>>> dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m) | ||
>>> dml_irm_obj.fit() | ||
>>> dml_irm_obj.evaluate_learners(metric=mean_absolute_error) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm afraid we get a little problem here if we use callables for classification here, like for example if we run instead:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we'd have to check whether a learner is a regression or classification learner and then pass (optionally) two callables (one for the regressions, one for the classification tasks); Alternatively, one could pass through a keyword referring to the I think the default can still be RMSE (or another regression measure) for all nuisance parts, but the option to use a classification measure is probably reasonable, what do you think? |
||
{'ml_g0': array([[1.13318973]]), | ||
'ml_g1': array([[0.91659939]]), | ||
'ml_m': array([[0.36350912]])} | ||
""" | ||
# if no learners are provided try to evaluate all learners | ||
if learners is None: | ||
learners = self.params_names | ||
|
||
# check metric | ||
if not callable(metric): | ||
raise TypeError('metric should be a callable. ' | ||
'%r was passed.' % metric) | ||
|
||
if all(learner in self.params_names for learner in learners): | ||
if self.nuisance_targets is None: | ||
raise ValueError('Apply fit() before evaluate_learners().') | ||
else: | ||
dist = {learner: np.full((self.n_rep, self._dml_data.n_coefs), np.nan) | ||
for learner in learners} | ||
for learner in learners: | ||
for rep in range(self.n_rep): | ||
for coef_idx in range(self._dml_data.n_coefs): | ||
res = metric(y_pred=self.predictions[learner][:, rep, coef_idx].reshape(1, -1), | ||
y_true=self.nuisance_targets[learner][:, rep, coef_idx].reshape(1, -1)) | ||
if not np.isfinite(res): | ||
raise ValueError(f'Evaluation from learner {str(learner)} is not finite.') | ||
dist[learner][rep, coef_idx] = res | ||
return dist | ||
else: | ||
raise ValueError(f'The learners have to be a subset of {str(self.params_names)}. ' | ||
f'Learners {str(learners)} provided.') | ||
|
||
def draw_sample_splitting(self): | ||
""" | ||
Draw sample splitting for DoubleML models. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As far as I see, we're not using the
train_targets
right now, right? I guess that's alright and we can leave a in-sample (fold-wise cross-validated) vs. out-of-sample (cross-fitted) evaluation for later. I'm wondering how we can implement this in a clever way... 🤔