Skip to content

Commit 6115f25

Browse files
author
Michaela Kecskésová
committed
Save fitted models under nonignorable nonresponse
1 parent 7bd83d7 commit 6115f25

File tree

1 file changed

+38
-20
lines changed

1 file changed

+38
-20
lines changed

doubleml/double_ml_ssm.py

Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class DoubleMLSSM(LinearScoreMixin, DoubleML):
5858
5959
normalize_ipw : bool
6060
Indicates whether the inverse probability weights are normalized.
61-
Default is ``True``.
61+
Default is ``False``.
6262
6363
trimming_rule : str
6464
A str (``'truncate'`` is the only choice) specifying the trimming approach.
@@ -136,7 +136,7 @@ def __init__(self,
136136
apply_cross_fitting)
137137

138138
self._external_predictions_implemented = False
139-
self._sensitivity_implemented = True
139+
self._sensitivity_implemented = False
140140
self._normalize_ipw = normalize_ipw
141141

142142
self._trimming_rule = trimming_rule
@@ -264,15 +264,33 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
264264
pi_hat = copy.deepcopy(g_hat_d1)
265265
m_hat = copy.deepcopy(g_hat_d1)
266266

267-
# create strata for splitting
268-
strata = self._dml_data.d.reshape(-1, 1) + 2 * self._dml_data.t.reshape(-1, 1)
269-
270267
# pi_hat - used for preliminary estimation of propensity score pi, overwritten in each iteration
271268
pi_hat_prelim = {'models': None,
272269
'targets': [],
273270
'preds': []
274271
}
275272

273+
# initialize models
274+
fitted_models = {}
275+
for learner in self.params_names:
276+
# set nuisance model parameters
277+
est_params = self._get_params(learner)
278+
279+
if learner == 'ml_g_d1' or learner == 'ml_g_d0':
280+
nuisance = 'ml_g'
281+
else:
282+
nuisance = learner
283+
284+
if est_params is not None:
285+
fitted_models[learner] = [
286+
clone(self._learner[nuisance]).set_params(**est_params[i_fold]) for i_fold in range(self.n_folds)
287+
]
288+
else:
289+
fitted_models[learner] = [clone(self._learner[nuisance]) for i_fold in range(self.n_folds)]
290+
291+
# create strata for splitting
292+
strata = self._dml_data.d.reshape(-1, 1) + 2 * self._dml_data.t.reshape(-1, 1)
293+
276294
# calculate nuisance functions over different folds - nested cross-fitting
277295
for i_fold in range(self.n_folds):
278296
train_inds = smpls[i_fold][0]
@@ -286,12 +304,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
286304
s_train_1 = s[train_inds_1]
287305
dx_train_1 = dx[train_inds_1, :]
288306

289-
# preliminary propensity score for selection
290-
ml_pi_prelim = clone(self._learner['ml_pi'])
291-
292-
# fit on first part of training set
293-
ml_pi_prelim.fit(dx_train_1, s_train_1)
294-
pi_hat_prelim['preds'] = _predict_zero_one_propensity(ml_pi_prelim, dx)
307+
# fit propensity score for selection on first part of training set
308+
fitted_models['ml_pi'][i_fold].fit(dx_train_1, s_train_1)
309+
pi_hat_prelim['preds'] = _predict_zero_one_propensity(fitted_models['ml_pi'][i_fold], dx)
295310
pi_hat_prelim['targets'] = s
296311

297312
# predictions for small pi in denominator
@@ -306,10 +321,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
306321
d_train_2 = d[train_inds_2]
307322
xpi_test = xpi[test_inds, :]
308323

309-
ml_m = clone(self._learner['ml_m'])
310-
ml_m.fit(xpi_train_2, d_train_2)
324+
fitted_models['ml_m'][i_fold].fit(xpi_train_2, d_train_2)
311325

312-
m_hat['preds'][test_inds] = _predict_zero_one_propensity(ml_m, xpi_test)
326+
m_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m'][i_fold], xpi_test)
313327
m_hat['targets'][test_inds] = d[test_inds]
314328

315329
# estimate conditional outcome g on second training sample - treatment
@@ -318,11 +332,10 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
318332
xpi_s1_d1_train_2 = xpi[s1_d1_train_2_indices, :]
319333
y_s1_d1_train_2 = y[s1_d1_train_2_indices]
320334

321-
ml_g_d1_prelim = clone(self._learner['ml_g'])
322-
ml_g_d1_prelim.fit(xpi_s1_d1_train_2, y_s1_d1_train_2)
335+
fitted_models['ml_g_d1'][i_fold].fit(xpi_s1_d1_train_2, y_s1_d1_train_2)
323336

324337
# predict conditional outcome
325-
g_hat_d1['preds'][test_inds] = ml_g_d1_prelim.predict(xpi_test)
338+
g_hat_d1['preds'][test_inds] = fitted_models['ml_g_d1'][i_fold].predict(xpi_test)
326339
g_hat_d1['targets'][test_inds] = y[test_inds]
327340

328341
# estimate conditional outcome on second training sample - control
@@ -331,13 +344,18 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
331344
xpi_s1_d0_train_2 = xpi[s1_d0_train_2_indices, :]
332345
y_s1_d0_train_2 = y[s1_d0_train_2_indices]
333346

334-
ml_g_d0_prelim = clone(self._learner['ml_g'])
335-
ml_g_d0_prelim.fit(xpi_s1_d0_train_2, y_s1_d0_train_2)
347+
fitted_models['ml_g_d0'][i_fold].fit(xpi_s1_d0_train_2, y_s1_d0_train_2)
336348

337349
# predict conditional outcome
338-
g_hat_d0['preds'][test_inds] = ml_g_d0_prelim.predict(xpi_test)
350+
g_hat_d0['preds'][test_inds] = fitted_models['ml_g_d0'][i_fold].predict(xpi_test)
339351
g_hat_d0['targets'][test_inds] = y[test_inds]
340352

353+
if return_models:
354+
g_hat_d1['models'] = fitted_models['ml_g_d1']
355+
g_hat_d0['models'] = fitted_models['ml_g_d0']
356+
pi_hat['models'] = fitted_models['ml_pi']
357+
m_hat['models'] = fitted_models['ml_m']
358+
341359
m_hat['preds'] = _trimm(m_hat['preds'], self._trimming_rule, self._trimming_threshold)
342360

343361
# treatment indicator

0 commit comments

Comments
 (0)