Skip to content

Extend Default Setting Unit Tests and check propensities #183

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ share/python-wheels/
*.egg
MANIFEST
*.idea
*.vscode
9 changes: 9 additions & 0 deletions doubleml/_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import warnings

from sklearn.model_selection import cross_val_predict
from sklearn.base import clone
Expand Down Expand Up @@ -214,3 +215,11 @@ def _check_finite_predictions(preds, learner, learner_name, smpls):
if not np.all(np.isfinite(preds[test_indices])):
raise ValueError(f'Predictions from learner {str(learner)} for {learner_name} are not finite.')
return


def _check_is_propensity(preds, learner, learner_name, smpls, eps=1e-12):
test_indices = np.concatenate([test_index for _, test_index in smpls])
if any((preds[test_indices] < eps) | (preds[test_indices] > 1 - eps)):
warnings.warn(f'Propensity predictions from learner {str(learner)} for'
f' {learner_name} are close to zero or one (eps={eps}).')
return
9 changes: 9 additions & 0 deletions doubleml/double_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def __init__(self,

# also initialize bootstrap arrays with the default number of bootstrap replications
self._n_rep_boot, self._boot_coef, self._boot_t_stat = self._initialize_boot_arrays(n_rep_boot=500)
self._boot_method = None

# initialize instance attributes which are later used for iterating
self._i_rep = None
Expand Down Expand Up @@ -181,6 +182,13 @@ def n_rep_boot(self):
"""
return self._n_rep_boot

@property
def boot_method(self):
"""
The method to construct the bootstrap replications.
"""
return self._boot_method

@property
def score(self):
"""
Expand Down Expand Up @@ -567,6 +575,7 @@ def bootstrap(self, method='normal', n_rep_boot=500):
self._boot_coef[self._i_treat, i_start:i_end], self._boot_t_stat[self._i_treat, i_start:i_end] =\
self._compute_bootstrap(weights)

self._boot_method = method
return self

def confint(self, joint=False, level=0.95):
Expand Down
7 changes: 6 additions & 1 deletion doubleml/double_ml_iivm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .double_ml import DoubleML
from .double_ml_data import DoubleMLData
from .double_ml_score_mixins import LinearScoreMixin
from ._utils import _dml_cv_predict, _get_cond_smpls, _dml_tune, _check_finite_predictions
from ._utils import _dml_cv_predict, _get_cond_smpls, _dml_tune, _check_finite_predictions, _check_is_propensity


class DoubleMLIIVM(LinearScoreMixin, DoubleML):
Expand Down Expand Up @@ -251,6 +251,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
'observed to be binary with values 0 and 1. Make sure that for classifiers '
'probabilities and not labels are predicted.')

_check_is_propensity(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls, eps=1e-12)

g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z1, n_jobs=n_jobs_cv,
est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
return_models=return_models)
Expand All @@ -265,11 +267,14 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
'observed to be binary with values 0 and 1. Make sure that for classifiers '
'probabilities and not labels are predicted.')

_check_is_propensity(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls, eps=1e-12)

# nuisance m
m_hat = _dml_cv_predict(self._learner['ml_m'], x, z, smpls=smpls, n_jobs=n_jobs_cv,
est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
return_models=return_models)
_check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
_check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)

# nuisance r
if self.subgroups['always_takers']:
Expand Down
3 changes: 2 additions & 1 deletion doubleml/double_ml_irm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from .double_ml_data import DoubleMLData
from .double_ml_score_mixins import LinearScoreMixin

from ._utils import _dml_cv_predict, _get_cond_smpls, _dml_tune, _check_finite_predictions
from ._utils import _dml_cv_predict, _get_cond_smpls, _dml_tune, _check_finite_predictions, _check_is_propensity


class DoubleMLIRM(LinearScoreMixin, DoubleML):
Expand Down Expand Up @@ -227,6 +227,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
return_models=return_models)
_check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
_check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)

psi_a, psi_b = self._score_elements(y, d,
g_hat0['preds'], g_hat1['preds'], m_hat['preds'],
Expand Down
4 changes: 3 additions & 1 deletion doubleml/double_ml_plr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from .double_ml import DoubleML
from .double_ml_data import DoubleMLData
from .double_ml_score_mixins import LinearScoreMixin
from ._utils import _dml_cv_predict, _dml_tune, _check_finite_predictions
from ._utils import _dml_cv_predict, _dml_tune, _check_finite_predictions, _check_is_propensity


# To be removed in version 0.6.0
Expand Down Expand Up @@ -219,6 +219,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
return_models=return_models)
_check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
if self._check_learner(self._learner['ml_m'], 'ml_m', regressor=True, classifier=True):
_check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)

if self._dml_data.binary_treats[self._dml_data.d_cols[self._i_treat]]:
binary_preds = (type_of_target(m_hat['preds']) == 'binary')
Expand Down
2 changes: 1 addition & 1 deletion doubleml/tests/test_blp.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,4 @@ def test_dml_blp_ci_2(dml_blp_fixture):

def test_dml_blp_return_types(dml_blp_fixture):
assert isinstance(dml_blp_fixture['blp_model'].__str__(), str)
assert isinstance(dml_blp_fixture['blp_model'].summary, pd.DataFrame)
assert isinstance(dml_blp_fixture['blp_model'].summary, pd.DataFrame)
24 changes: 24 additions & 0 deletions doubleml/tests/test_doubleml_model_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,37 @@
dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression())
dml_iivm = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression())

dml_plr.fit()
dml_pliv.fit()
dml_irm.fit()
dml_iivm.fit()

dml_plr.bootstrap()
dml_pliv.bootstrap()
dml_irm.bootstrap()
dml_iivm.bootstrap()


def _assert_resampling_default_settings(dml_obj):
assert dml_obj.n_folds == 5
assert dml_obj.n_rep == 1
assert dml_obj.draw_sample_splitting
assert dml_obj.apply_cross_fitting

# fit method
assert dml_obj.predictions is None
assert dml_obj.models is None

# bootstrap method
assert dml_obj.boot_method == 'normal'
assert dml_obj.n_rep_boot == 500

# confint method
assert dml_obj.confint().equals(dml_obj.confint(joint=False, level=0.95))

# p_adjust method
assert dml_obj.p_adjust().equals(dml_obj.p_adjust(method='romano-wolf'))


@pytest.mark.ci
def test_plr_defaults():
Expand Down