Skip to content

Commit 4ced5e3

Browse files
authored
Merge pull request #202 from DoubleML/s-incr-dev-version
Extend unit tests and bug fixes
2 parents f64630c + 10841c3 commit 4ced5e3

14 files changed

+165
-192
lines changed

.coverage

68 KB
Binary file not shown.

doubleml/double_ml.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,7 +1064,7 @@ def evaluate_learners(self, learners=None, metric=_rmse):
10641064
where ``n`` specifies the number of observations. Remark that some models like IRM are
10651065
not able to provide all values for ``y_true`` for all learners and might contain
10661066
some ``nan`` values in the target vector.
1067-
Default is the euclidean distance.
1067+
Default is the root-mean-square error.
10681068
10691069
Returns
10701070
-------
@@ -1085,10 +1085,13 @@ def evaluate_learners(self, learners=None, metric=_rmse):
10851085
>>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
10861086
>>> dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m)
10871087
>>> dml_irm_obj.fit()
1088-
>>> dml_irm_obj.evaluate_learners(metric=mean_absolute_error)
1089-
{'ml_g0': array([[1.13318973]]),
1090-
'ml_g1': array([[0.91659939]]),
1091-
'ml_m': array([[0.36350912]])}
1088+
>>> def mae(y_true, y_pred):
1089+
>>> subset = np.logical_not(np.isnan(y_true))
1090+
>>> return mean_absolute_error(y_true[subset], y_pred[subset])
1091+
>>> dml_irm_obj.evaluate_learners(metric=mae)
1092+
{'ml_g0': array([[0.85974356]]),
1093+
'ml_g1': array([[0.85280376]]),
1094+
'ml_m': array([[0.35365143]])}
10921095
"""
10931096
# if no learners are provided try to evaluate all learners
10941097
if learners is None:

doubleml/double_ml_pliv.py

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -281,17 +281,6 @@ def _check_data(self, obj_dml_data):
281281
'use DoubleMLPLR instead of DoubleMLPLIV.')
282282
return
283283

284-
# To be removed in version 0.6.0
285-
def set_ml_nuisance_params(self, learner, treat_var, params):
286-
if isinstance(self.score, str) & (self.score == 'partialling out') & (learner == 'ml_g'):
287-
warnings.warn(("Learner ml_g was renamed to ml_l. "
288-
"Please adapt the argument learner accordingly. "
289-
"The provided parameters are set for ml_l. "
290-
"The redirection will be removed in a future version."),
291-
DeprecationWarning, stacklevel=2)
292-
learner = 'ml_l'
293-
super(DoubleMLPLIV, self).set_ml_nuisance_params(learner, treat_var, params)
294-
295284
def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
296285
if self.partialX & (not self.partialZ):
297286
psi_elements, preds = self._nuisance_est_partial_x(smpls, n_jobs_cv, return_models)
@@ -523,41 +512,6 @@ def _nuisance_est_partial_xz(self, smpls, n_jobs_cv, return_models=False):
523512

524513
return psi_elements, preds
525514

526-
# To be removed in version 0.6.0
527-
def tune(self,
528-
param_grids,
529-
tune_on_folds=False,
530-
scoring_methods=None, # if None the estimator's score method is used
531-
n_folds_tune=5,
532-
search_mode='grid_search',
533-
n_iter_randomized_search=100,
534-
n_jobs_cv=None,
535-
set_as_params=True,
536-
return_tune_res=False):
537-
538-
if isinstance(self.score, str) and (self.score == 'partialling out') and (param_grids is not None) and \
539-
('ml_g' in param_grids) and ('ml_l' not in param_grids):
540-
warnings.warn(("Learner ml_g was renamed to ml_l. "
541-
"Please adapt the key of param_grids accordingly. "
542-
"The provided param_grids for ml_g are set for ml_l. "
543-
"The redirection will be removed in a future version."),
544-
DeprecationWarning, stacklevel=2)
545-
param_grids['ml_l'] = param_grids.pop('ml_g')
546-
547-
if isinstance(self.score, str) and (self.score == 'partialling out') and (scoring_methods is not None) and \
548-
('ml_g' in scoring_methods) and ('ml_l' not in scoring_methods):
549-
warnings.warn(("Learner ml_g was renamed to ml_l. "
550-
"Please adapt the key of scoring_methods accordingly. "
551-
"The provided scoring_methods for ml_g are set for ml_l. "
552-
"The redirection will be removed in a future version."),
553-
DeprecationWarning, stacklevel=2)
554-
scoring_methods['ml_l'] = scoring_methods.pop('ml_g')
555-
556-
tune_res = super(DoubleMLPLIV, self).tune(param_grids, tune_on_folds, scoring_methods, n_folds_tune,
557-
search_mode, n_iter_randomized_search, n_jobs_cv, set_as_params,
558-
return_tune_res)
559-
return tune_res
560-
561515
def _nuisance_tuning_partial_x(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
562516
search_mode, n_iter_randomized_search):
563517
x, y = check_X_y(self._dml_data.x, self._dml_data.y,

doubleml/double_ml_plr.py

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -163,17 +163,6 @@ def _check_data(self, obj_dml_data):
163163
'To fit a partially linear IV regression model use DoubleMLPLIV instead of DoubleMLPLR.')
164164
return
165165

166-
# To be removed in version 0.6.0
167-
def set_ml_nuisance_params(self, learner, treat_var, params):
168-
if isinstance(self.score, str) & (self.score == 'partialling out') & (learner == 'ml_g'):
169-
warnings.warn(("Learner ml_g was renamed to ml_l. "
170-
"Please adapt the argument learner accordingly. "
171-
"The provided parameters are set for ml_l. "
172-
"The redirection will be removed in a future version."),
173-
DeprecationWarning, stacklevel=2)
174-
learner = 'ml_l'
175-
super(DoubleMLPLR, self).set_ml_nuisance_params(learner, treat_var, params)
176-
177166
def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
178167
x, y = check_X_y(self._dml_data.x, self._dml_data.y,
179168
force_all_finite=False)
@@ -252,40 +241,6 @@ def _score_elements(self, y, d, l_hat, m_hat, g_hat, smpls):
252241

253242
return psi_a, psi_b
254243

255-
# To be removed in version 0.6.0
256-
def tune(self,
257-
param_grids,
258-
tune_on_folds=False,
259-
scoring_methods=None, # if None the estimator's score method is used
260-
n_folds_tune=5,
261-
search_mode='grid_search',
262-
n_iter_randomized_search=100,
263-
n_jobs_cv=None,
264-
set_as_params=True,
265-
return_tune_res=False):
266-
267-
if isinstance(self.score, str) and (self.score == 'partialling out') and (param_grids is not None) and \
268-
('ml_g' in param_grids) and ('ml_l' not in param_grids):
269-
warnings.warn(("Learner ml_g was renamed to ml_l. "
270-
"Please adapt the key of param_grids accordingly. "
271-
"The provided param_grids for ml_g are set for ml_l. "
272-
"The redirection will be removed in a future version."),
273-
DeprecationWarning, stacklevel=2)
274-
param_grids['ml_l'] = param_grids.pop('ml_g')
275-
276-
if isinstance(self.score, str) and (self.score == 'partialling out') and (scoring_methods is not None) and \
277-
('ml_g' in scoring_methods) and ('ml_l' not in scoring_methods):
278-
warnings.warn(("Learner ml_g was renamed to ml_l. "
279-
"Please adapt the key of scoring_methods accordingly. "
280-
"The provided scoring_methods for ml_g are set for ml_l. "
281-
"The redirection will be removed in a future version."),
282-
DeprecationWarning, stacklevel=2)
283-
scoring_methods['ml_l'] = scoring_methods.pop('ml_g')
284-
285-
tune_res = super(DoubleMLPLR, self).tune(param_grids, tune_on_folds, scoring_methods, n_folds_tune, search_mode,
286-
n_iter_randomized_search, n_jobs_cv, set_as_params, return_tune_res)
287-
return tune_res
288-
289244
def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
290245
search_mode, n_iter_randomized_search):
291246
x, y = check_X_y(self._dml_data.x, self._dml_data.y,

doubleml/tests/_utils_lpq_manual.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ def fit_lpq(y, x, d, z, quantile,
1111
learner_g, learner_m, all_smpls, treatment, dml_procedure, n_rep=1,
1212
trimming_rule='truncate',
1313
trimming_threshold=1e-2,
14+
kde=_default_kde,
1415
normalize_ipw=True, m_z_params=None,
1516
m_d_z0_params=None, m_d_z1_params=None,
1617
g_du_z0_params=None, g_du_z1_params=None):
@@ -37,10 +38,10 @@ def fit_lpq(y, x, d, z, quantile,
3738
g_du_z1_params=g_du_z1_params)
3839
if dml_procedure == 'dml1':
3940
lpqs[i_rep], ses[i_rep] = lpq_dml1(y, d, z, m_z_hat, g_du_z0_hat, g_du_z1_hat, comp_prob_hat,
40-
treatment, quantile, ipw_vec, coef_bounds, smpls)
41+
treatment, quantile, ipw_vec, coef_bounds, smpls, kde)
4142
else:
4243
lpqs[i_rep], ses[i_rep] = lpq_dml2(y, d, z, m_z_hat, g_du_z0_hat, g_du_z1_hat, comp_prob_hat,
43-
treatment, quantile, ipw_vec, coef_bounds)
44+
treatment, quantile, ipw_vec, coef_bounds, kde)
4445

4546
lpq = np.median(lpqs)
4647
se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(lpqs - lpq, 2)) / n_obs)
@@ -200,7 +201,7 @@ def ipw_score(theta):
200201
return m_z_hat, g_du_z0_hat, g_du_z1_hat, comp_prob_hat, ipw_vec, coef_bounds
201202

202203

203-
def lpq_dml1(y, d, z, m_z, g_du_z0, g_du_z1, comp_prob, treatment, quantile, ipw_vec, coef_bounds, smpls):
204+
def lpq_dml1(y, d, z, m_z, g_du_z0, g_du_z1, comp_prob, treatment, quantile, ipw_vec, coef_bounds, smpls, kde):
204205
thetas = np.zeros(len(smpls))
205206
n_obs = len(y)
206207
ipw_est = ipw_vec.mean()
@@ -211,17 +212,17 @@ def lpq_dml1(y, d, z, m_z, g_du_z0, g_du_z1, comp_prob, treatment, quantile, ipw
211212

212213
theta_hat = np.mean(thetas)
213214

214-
se = np.sqrt(lpq_var_est(theta_hat, m_z, g_du_z0, g_du_z1, comp_prob, d, y, z, treatment, quantile, n_obs))
215+
se = np.sqrt(lpq_var_est(theta_hat, m_z, g_du_z0, g_du_z1, comp_prob, d, y, z, treatment, quantile, n_obs, kde))
215216

216217
return theta_hat, se
217218

218219

219-
def lpq_dml2(y, d, z, m_z, g_du_z0, g_du_z1, comp_prob, treatment, quantile, ipw_vec, coef_bounds):
220+
def lpq_dml2(y, d, z, m_z, g_du_z0, g_du_z1, comp_prob, treatment, quantile, ipw_vec, coef_bounds, kde):
220221
n_obs = len(y)
221222
ipw_est = ipw_vec.mean()
222223
theta_hat = lpq_est(m_z, g_du_z0, g_du_z1, comp_prob, d, y, z, treatment, quantile, ipw_est, coef_bounds)
223224

224-
se = np.sqrt(lpq_var_est(theta_hat, m_z, g_du_z0, g_du_z1, comp_prob, d, y, z, treatment, quantile, n_obs))
225+
se = np.sqrt(lpq_var_est(theta_hat, m_z, g_du_z0, g_du_z1, comp_prob, d, y, z, treatment, quantile, n_obs, kde))
225226

226227
return theta_hat, se
227228

doubleml/tests/test_blp.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
import pandas as pd
33
import pytest
4+
import copy
45

56
import doubleml as dml
67

@@ -26,7 +27,10 @@ def dml_blp_fixture(ci_joint, ci_level):
2627
random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 3)))
2728
random_signal = np.random.normal(0, 1, size=(n, ))
2829

29-
blp = dml.DoubleMLBLP(random_signal, random_basis).fit()
30+
blp = dml.DoubleMLBLP(random_signal, random_basis)
31+
32+
blp_obj = copy.copy(blp)
33+
blp.fit()
3034
blp_manual = fit_blp(random_signal, random_basis)
3135

3236
np.random.seed(42)
@@ -47,7 +51,8 @@ def dml_blp_fixture(ci_joint, ci_level):
4751
'ci_1': ci_1,
4852
'ci_2': ci_2,
4953
'ci_manual': ci_manual,
50-
'blp_model': blp}
54+
'blp_model': blp,
55+
'unfitted_blp_model': blp_obj}
5156

5257
return res_dict
5358

@@ -91,6 +96,7 @@ def test_dml_blp_ci_2(dml_blp_fixture):
9196
def test_dml_blp_return_types(dml_blp_fixture):
9297
assert isinstance(dml_blp_fixture['blp_model'].__str__(), str)
9398
assert isinstance(dml_blp_fixture['blp_model'].summary, pd.DataFrame)
99+
assert isinstance(dml_blp_fixture['unfitted_blp_model'].summary, pd.DataFrame)
94100

95101

96102
@pytest.mark.ci

doubleml/tests/test_cvar_tune.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import math
44

55
from sklearn.base import clone
6-
7-
from sklearn.linear_model import LogisticRegression
86
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
97

108
import doubleml as dml
@@ -58,9 +56,6 @@ def tune_on_folds(request):
5856
def get_par_grid(learner):
5957
if learner.__class__ in [RandomForestRegressor, RandomForestClassifier]:
6058
par_grid = {'n_estimators': [5, 10, 15, 20]}
61-
else:
62-
assert learner.__class__ in [LogisticRegression]
63-
par_grid = {'C': np.logspace(-4, 2, 10)}
6459
return par_grid
6560

6661

doubleml/tests/test_dml_data.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,27 @@
55
from doubleml import DoubleMLData, DoubleMLPLR, DoubleMLClusterData, DoubleMLDIDCS
66
from doubleml.datasets import make_plr_CCDDHNR2018, _make_pliv_data, make_pliv_CHS2015,\
77
make_pliv_multiway_cluster_CKMS2021, make_did_SZ2020
8+
from doubleml.double_ml_data import DoubleMLBaseData
9+
810
from sklearn.linear_model import Lasso, LogisticRegression
911

1012

13+
class DummyDataClass(DoubleMLBaseData):
14+
def __init__(self, data):
15+
DoubleMLBaseData.__init__(self, data)
16+
17+
@property
18+
def n_coefs(self):
19+
return 1
20+
21+
22+
@pytest.mark.ci
23+
def test_doubleml_basedata():
24+
dummy_dml_data = DummyDataClass(pd.DataFrame(np.zeros((100, 10))))
25+
assert dummy_dml_data.d_cols[0] == 'theta'
26+
assert dummy_dml_data.n_treat == 1
27+
assert dummy_dml_data.n_coefs == 1
28+
1129
@pytest.fixture(scope="module")
1230
def dml_data_fixture(generate_data1):
1331
data = generate_data1
@@ -157,12 +175,22 @@ def test_dml_data_no_instr_no_time():
157175

158176

159177
@pytest.mark.ci
160-
def test_dml_cluster_summary_with_time():
178+
def test_dml_summary_with_time():
161179
dml_data_did_cs = make_did_SZ2020(n_obs=200, cross_sectional_data=True)
162180
dml_did_cs = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression())
163181
assert isinstance(dml_did_cs.__str__(), str)
164182
assert isinstance(dml_did_cs.summary, pd.DataFrame)
165183

184+
dml_data = make_plr_CCDDHNR2018(n_obs=100)
185+
df = dml_data.data.copy().iloc[:, :11]
186+
df.columns = [f'X{i + 1}' for i in np.arange(8)] + ['y', 'd1', 'd2']
187+
print(df)
188+
dml_data = DoubleMLClusterData(df, 'y', ['d1', 'd2'],
189+
cluster_cols=[f'X{i + 1}' for i in [5, 6]],
190+
x_cols=[f'X{i + 1}' for i in np.arange(5)],
191+
t_col='X8')
192+
assert isinstance(dml_data._data_summary_str(), str)
193+
166194

167195
@pytest.mark.ci
168196
def test_x_cols_setter_defaults():

0 commit comments

Comments
 (0)