Skip to content

Commit 701fedc

Browse files
authored
Merge pull request #227 from DoubleML/s-remove-apply-crossfitting
Remove `apply_crossfitting` and `dml_procedure` options from DoubleML class
2 parents 1ad6ec2 + e8384cf commit 701fedc

File tree

88 files changed

+532
-2718
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+532
-2718
lines changed

doubleml/did/did.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,6 @@ class DoubleMLDID(LinearScoreMixin, DoubleML):
4949
Indicates whether to use a sligthly different normalization from Sant'Anna and Zhao (2020).
5050
Default is ``True``.
5151
52-
dml_procedure : str
53-
A str (``'dml1'`` or ``'dml2'``) specifying the double machine learning algorithm.
54-
Default is ``'dml2'``.
55-
5652
trimming_rule : str
5753
A str (``'truncate'`` is the only choice) specifying the trimming approach.
5854
Default is ``'truncate'``.
@@ -65,10 +61,6 @@ class DoubleMLDID(LinearScoreMixin, DoubleML):
6561
Indicates whether the sample splitting should be drawn during initialization of the object.
6662
Default is ``True``.
6763
68-
apply_cross_fitting : bool
69-
Indicates whether cross-fitting should be applied.
70-
Default is ``True``.
71-
7264
Examples
7365
--------
7466
>>> import numpy as np
@@ -93,18 +85,14 @@ def __init__(self,
9385
n_rep=1,
9486
score='observational',
9587
in_sample_normalization=True,
96-
dml_procedure='dml2',
9788
trimming_rule='truncate',
9889
trimming_threshold=1e-2,
99-
draw_sample_splitting=True,
100-
apply_cross_fitting=True):
90+
draw_sample_splitting=True):
10191
super().__init__(obj_dml_data,
10292
n_folds,
10393
n_rep,
10494
score,
105-
dml_procedure,
106-
draw_sample_splitting,
107-
apply_cross_fitting)
95+
draw_sample_splitting)
10896

10997
self._check_data(self._dml_data)
11098
valid_scores = ['observational', 'experimental']

doubleml/did/did_cs.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,6 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
4949
Indicates whether to use a sligthly different normalization from Sant'Anna and Zhao (2020).
5050
Default is ``True``.
5151
52-
dml_procedure : str
53-
A str (``'dml1'`` or ``'dml2'``) specifying the double machine learning algorithm.
54-
Default is ``'dml2'``.
55-
5652
trimming_rule : str
5753
A str (``'truncate'`` is the only choice) specifying the trimming approach.
5854
Default is ``'truncate'``.
@@ -65,10 +61,6 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
6561
Indicates whether the sample splitting should be drawn during initialization of the object.
6662
Default is ``True``.
6763
68-
apply_cross_fitting : bool
69-
Indicates whether cross-fitting should be applied.
70-
Default is ``True``.
71-
7264
Examples
7365
--------
7466
>>> import numpy as np
@@ -93,18 +85,14 @@ def __init__(self,
9385
n_rep=1,
9486
score='observational',
9587
in_sample_normalization=True,
96-
dml_procedure='dml2',
9788
trimming_rule='truncate',
9889
trimming_threshold=1e-2,
99-
draw_sample_splitting=True,
100-
apply_cross_fitting=True):
90+
draw_sample_splitting=True):
10191
super().__init__(obj_dml_data,
10292
n_folds,
10393
n_rep,
10494
score,
105-
dml_procedure,
106-
draw_sample_splitting,
107-
apply_cross_fitting)
95+
draw_sample_splitting)
10896

10997
self._check_data(self._dml_data)
11098
valid_scores = ['observational', 'experimental']

doubleml/did/tests/_utils_did_cs_manual.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
from sklearn.base import clone
33

44
from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search
5-
from ._utils_did_manual import did_dml1, did_dml2
5+
from ._utils_did_manual import did_dml2
66

77

88
def fit_did_cs(y, x, d, t,
9-
learner_g, learner_m, all_smpls, dml_procedure, score, in_sample_normalization,
9+
learner_g, learner_m, all_smpls, score, in_sample_normalization,
1010
n_rep=1, g_d0_t0_params=None, g_d0_t1_params=None,
1111
g_d1_t0_params=None, g_d1_t1_params=None, m_params=None,
1212
trimming_threshold=1e-2):
@@ -57,11 +57,7 @@ def fit_did_cs(y, x, d, t,
5757
all_psi_a.append(psi_a)
5858
all_psi_b.append(psi_b)
5959

60-
if dml_procedure == 'dml1':
61-
thetas[i_rep], ses[i_rep] = did_dml1(psi_a, psi_b, smpls)
62-
else:
63-
assert dml_procedure == 'dml2'
64-
thetas[i_rep], ses[i_rep] = did_dml2(psi_a, psi_b)
60+
thetas[i_rep], ses[i_rep] = did_dml2(psi_a, psi_b)
6561

6662
theta = np.median(thetas)
6763
se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)

doubleml/did/tests/_utils_did_manual.py

Lines changed: 6 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77

88
def fit_did(y, x, d,
9-
learner_g, learner_m, all_smpls, dml_procedure, score, in_sample_normalization,
9+
learner_g, learner_m, all_smpls, score, in_sample_normalization,
1010
n_rep=1, g0_params=None, g1_params=None, m_params=None,
1111
trimming_threshold=1e-2):
1212
n_obs = len(y)
@@ -43,11 +43,7 @@ def fit_did(y, x, d,
4343
all_psi_a.append(psi_a)
4444
all_psi_b.append(psi_b)
4545

46-
if dml_procedure == 'dml1':
47-
thetas[i_rep], ses[i_rep] = did_dml1(psi_a, psi_b, smpls)
48-
else:
49-
assert dml_procedure == 'dml2'
50-
thetas[i_rep], ses[i_rep] = did_dml2(psi_a, psi_b)
46+
thetas[i_rep], ses[i_rep] = did_dml2(psi_a, psi_b)
5147

5248
theta = np.median(thetas)
5349
se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
@@ -107,25 +103,6 @@ def compute_did_residuals(y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, s
107103
return resid_d0, g_hat0, g_hat1, m_hat, p_hat
108104

109105

110-
def did_dml1(psi_a, psi_b, smpls):
111-
thetas = np.zeros(len(smpls))
112-
n_obs = len(psi_a)
113-
114-
for idx, (_, test_index) in enumerate(smpls):
115-
thetas[idx] = - np.mean(psi_b[test_index]) / np.mean(psi_a[test_index])
116-
theta_hat = np.mean(thetas)
117-
118-
if len(smpls) > 1:
119-
se = np.sqrt(var_did(theta_hat, psi_a, psi_b, n_obs))
120-
else:
121-
assert len(smpls) == 1
122-
test_index = smpls[0][1]
123-
n_obs = len(test_index)
124-
se = np.sqrt(var_did(theta_hat, psi_a[test_index], psi_b[test_index], n_obs))
125-
126-
return theta_hat, se
127-
128-
129106
def did_dml2(psi_a, psi_b):
130107
n_obs = len(psi_a)
131108
theta_hat = - np.mean(psi_b) / np.mean(psi_a)
@@ -176,7 +153,6 @@ def var_did(theta, psi_a, psi_b, n_obs):
176153

177154
def boot_did(y, thetas, ses, all_psi_a, all_psi_b,
178155
all_smpls, bootstrap, n_rep_boot, n_rep=1, apply_cross_fitting=True):
179-
all_boot_theta = list()
180156
all_boot_t_stat = list()
181157
for i_rep in range(n_rep):
182158
smpls = all_smpls[i_rep]
@@ -186,16 +162,14 @@ def boot_did(y, thetas, ses, all_psi_a, all_psi_b,
186162
test_index = smpls[0][1]
187163
n_obs = len(test_index)
188164
weights = draw_weights(bootstrap, n_rep_boot, n_obs)
189-
boot_theta, boot_t_stat = boot_did_single_split(
165+
boot_t_stat = boot_did_single_split(
190166
thetas[i_rep], all_psi_a[i_rep], all_psi_b[i_rep], smpls,
191167
ses[i_rep], weights, n_rep_boot, apply_cross_fitting)
192-
all_boot_theta.append(boot_theta)
193168
all_boot_t_stat.append(boot_t_stat)
194169

195-
boot_theta = np.hstack(all_boot_theta)
196170
boot_t_stat = np.hstack(all_boot_t_stat)
197171

198-
return boot_theta, boot_t_stat
172+
return boot_t_stat
199173

200174

201175
def boot_did_single_split(theta, psi_a, psi_b,
@@ -208,9 +182,9 @@ def boot_did_single_split(theta, psi_a, psi_b,
208182
J = np.mean(psi_a[test_index])
209183

210184
psi = np.multiply(psi_a, theta) + psi_b
211-
boot_theta, boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep_boot, apply_cross_fitting)
185+
boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep_boot, apply_cross_fitting)
212186

213-
return boot_theta, boot_t_stat
187+
return boot_t_stat
214188

215189

216190
def tune_nuisance_did(y, x, d, ml_g, ml_m, smpls, score, n_folds_tune,

doubleml/did/tests/test_did.py

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -34,21 +34,14 @@ def in_sample_normalization(request):
3434
return request.param
3535

3636

37-
@pytest.fixture(scope='module',
38-
params=['dml1', 'dml2'])
39-
def dml_procedure(request):
40-
return request.param
41-
42-
4337
@pytest.fixture(scope='module',
4438
params=[0.1])
4539
def trimming_threshold(request):
4640
return request.param
4741

4842

4943
@pytest.fixture(scope='module')
50-
def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization,
51-
dml_procedure, trimming_threshold):
44+
def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization, trimming_threshold):
5245
boot_methods = ['normal']
5346
n_folds = 2
5447
n_rep_boot = 499
@@ -71,7 +64,6 @@ def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization,
7164
n_folds,
7265
score=score,
7366
in_sample_normalization=in_sample_normalization,
74-
dml_procedure=dml_procedure,
7567
draw_sample_splitting=False,
7668
trimming_threshold=trimming_threshold)
7769

@@ -82,7 +74,7 @@ def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization,
8274
np.random.seed(3141)
8375
res_manual = fit_did(y, x, d,
8476
clone(learner[0]), clone(learner[1]),
85-
all_smpls, dml_procedure, score, in_sample_normalization,
77+
all_smpls, score, in_sample_normalization,
8678
trimming_threshold=trimming_threshold)
8779

8880
res_dict = {'coef': dml_did_obj.coef,
@@ -93,15 +85,13 @@ def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization,
9385

9486
for bootstrap in boot_methods:
9587
np.random.seed(3141)
96-
boot_theta, boot_t_stat = boot_did(y, res_manual['thetas'], res_manual['ses'],
97-
res_manual['all_psi_a'], res_manual['all_psi_b'],
98-
all_smpls, bootstrap, n_rep_boot)
88+
boot_t_stat = boot_did(y, res_manual['thetas'], res_manual['ses'],
89+
res_manual['all_psi_a'], res_manual['all_psi_b'],
90+
all_smpls, bootstrap, n_rep_boot)
9991

10092
np.random.seed(3141)
10193
dml_did_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
102-
res_dict['boot_coef' + bootstrap] = dml_did_obj.boot_coef
10394
res_dict['boot_t_stat' + bootstrap] = dml_did_obj.boot_t_stat
104-
res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta
10595
res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat
10696

10797
# sensitivity tests
@@ -137,9 +127,6 @@ def test_dml_did_se(dml_did_fixture):
137127
@pytest.mark.ci
138128
def test_dml_did_boot(dml_did_fixture):
139129
for bootstrap in dml_did_fixture['boot_methods']:
140-
assert np.allclose(dml_did_fixture['boot_coef' + bootstrap],
141-
dml_did_fixture['boot_coef' + bootstrap + '_manual'],
142-
rtol=1e-9, atol=1e-4)
143130
assert np.allclose(dml_did_fixture['boot_t_stat' + bootstrap],
144131
dml_did_fixture['boot_t_stat' + bootstrap + '_manual'],
145132
rtol=1e-9, atol=1e-4)

doubleml/did/tests/test_did_cs.py

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -35,21 +35,14 @@ def in_sample_normalization(request):
3535
return request.param
3636

3737

38-
@pytest.fixture(scope='module',
39-
params=['dml1', 'dml2'])
40-
def dml_procedure(request):
41-
return request.param
42-
43-
4438
@pytest.fixture(scope='module',
4539
params=[0.1])
4640
def trimming_threshold(request):
4741
return request.param
4842

4943

5044
@pytest.fixture(scope='module')
51-
def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normalization,
52-
dml_procedure, trimming_threshold):
45+
def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normalization, trimming_threshold):
5346
boot_methods = ['normal']
5447
n_folds = 2
5548
n_rep_boot = 499
@@ -73,7 +66,6 @@ def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normaliza
7366
n_folds,
7467
score=score,
7568
in_sample_normalization=in_sample_normalization,
76-
dml_procedure=dml_procedure,
7769
draw_sample_splitting=False,
7870
trimming_threshold=trimming_threshold)
7971

@@ -84,7 +76,7 @@ def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normaliza
8476
np.random.seed(3141)
8577
res_manual = fit_did_cs(y, x, d, t,
8678
clone(learner[0]), clone(learner[1]),
87-
all_smpls, dml_procedure, score, in_sample_normalization,
79+
all_smpls, score, in_sample_normalization,
8880
trimming_threshold=trimming_threshold)
8981

9082
res_dict = {'coef': dml_did_cs_obj.coef,
@@ -95,15 +87,13 @@ def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normaliza
9587

9688
for bootstrap in boot_methods:
9789
np.random.seed(3141)
98-
boot_theta, boot_t_stat = boot_did(y, res_manual['thetas'], res_manual['ses'],
99-
res_manual['all_psi_a'], res_manual['all_psi_b'],
100-
all_smpls, bootstrap, n_rep_boot)
90+
boot_t_stat = boot_did(y, res_manual['thetas'], res_manual['ses'],
91+
res_manual['all_psi_a'], res_manual['all_psi_b'],
92+
all_smpls, bootstrap, n_rep_boot)
10193

10294
np.random.seed(3141)
10395
dml_did_cs_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
104-
res_dict['boot_coef' + bootstrap] = dml_did_cs_obj.boot_coef
10596
res_dict['boot_t_stat' + bootstrap] = dml_did_cs_obj.boot_t_stat
106-
res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta
10797
res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat
10898

10999
# sensitivity tests
@@ -138,9 +128,6 @@ def test_dml_did_cs_se(dml_did_cs_fixture):
138128
@pytest.mark.ci
139129
def test_dml_did_cs_boot(dml_did_cs_fixture):
140130
for bootstrap in dml_did_cs_fixture['boot_methods']:
141-
assert np.allclose(dml_did_cs_fixture['boot_coef' + bootstrap],
142-
dml_did_cs_fixture['boot_coef' + bootstrap + '_manual'],
143-
rtol=1e-9, atol=1e-4)
144131
assert np.allclose(dml_did_cs_fixture['boot_t_stat' + bootstrap],
145132
dml_did_cs_fixture['boot_t_stat' + bootstrap + '_manual'],
146133
rtol=1e-9, atol=1e-4)

doubleml/did/tests/test_didcs_external_predictions.py renamed to doubleml/did/tests/test_did_cs_external_predictions.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,13 @@ def did_score(request):
1313
return request.param
1414

1515

16-
@pytest.fixture(scope="module", params=["dml1", "dml2"])
17-
def dml_procedure(request):
18-
return request.param
19-
20-
2116
@pytest.fixture(scope="module", params=[1, 3])
2217
def n_rep(request):
2318
return request.param
2419

2520

2621
@pytest.fixture(scope="module")
27-
def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
22+
def doubleml_didcs_fixture(did_score, n_rep):
2823
ext_predictions = {"d": {}}
2924
dml_data = make_did_SZ2020(n_obs=500, cross_sectional_data=True, return_type="DoubleMLData")
3025
all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
@@ -33,7 +28,6 @@ def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
3328
"score": did_score,
3429
"n_rep": n_rep,
3530
"n_folds": 5,
36-
"dml_procedure": dml_procedure,
3731
"draw_sample_splitting": False
3832
}
3933
dml_did_cs = DoubleMLDIDCS(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)

0 commit comments

Comments
 (0)