Skip to content

Commit 57720b2

Browse files
committed
update DoubleMLQTE for Framework
1 parent d0b4722 commit 57720b2

File tree

1 file changed

+68
-183
lines changed

1 file changed

+68
-183
lines changed

doubleml/irm/qte.py

Lines changed: 68 additions & 183 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import numpy as np
22
import pandas as pd
3-
from scipy.stats import norm
43

54
from sklearn.base import clone
65

@@ -12,7 +11,7 @@
1211
from .cvar import DoubleMLCVAR
1312
from ..double_ml_framework import concat
1413

15-
from ..utils._estimation import _draw_weights, _default_kde
14+
from ..utils._estimation import _default_kde
1615
from ..utils.resampling import DoubleMLResampling
1716
from ..utils._checks import _check_score, _check_trimming, _check_zero_one_treatment
1817

@@ -155,13 +154,6 @@ def __init__(self,
155154
# initialize all models
156155
self._modellist_0, self._modellist_1 = self._initialize_models()
157156

158-
# initialize arrays according to obj_dml_data and the resampling settings
159-
self._psi0, self._psi1, self._psi0_deriv, self._psi1_deriv, \
160-
self._coef, self._se, self._all_coef, self._all_se = self._initialize_arrays()
161-
162-
# also initialize bootstrap arrays with the default number of bootstrap replications
163-
self._n_rep_boot, self._boot_coef, self._boot_t_stat = self._initialize_boot_arrays(n_rep_boot=500)
164-
165157
def __str__(self):
166158
class_name = self.__class__.__name__
167159
header = f'================== {class_name} Object ==================\n'
@@ -276,52 +268,69 @@ def trimming_threshold(self):
276268
@property
277269
def coef(self):
278270
"""
279-
Estimates for the causal parameter(s) after calling :meth:`fit`.
271+
Estimates for the causal parameter(s) after calling :meth:`fit` (shape (``n_quantiles``,)).
280272
"""
281-
return self._coef
282-
283-
@coef.setter
284-
def coef(self, value):
285-
self._coef = value
273+
if self._framework is None:
274+
coef = None
275+
else:
276+
coef = self.framework.thetas
277+
return coef
286278

287279
@property
288280
def all_coef(self):
289281
"""
290-
Estimates of the causal parameter(s) for the ``n_rep`` different sample splits after calling :meth:`fit`.
282+
Estimates of the causal parameter(s) for the ``n_rep`` different sample splits after calling :meth:`fit`
283+
(shape (``n_quantiles``, ``n_rep``)).
291284
"""
292-
return self._all_coef
285+
if self._framework is None:
286+
all_coef = None
287+
else:
288+
all_coef = self.framework.all_thetas
289+
return all_coef
293290

294291
@property
295292
def se(self):
296293
"""
297-
Standard errors for the causal parameter(s) after calling :meth:`fit`.
294+
Standard errors for the causal parameter(s) after calling :meth:`fit` (shape (``n_quantiles``,)).
298295
"""
299-
return self._se
296+
if self._framework is None:
297+
se = None
298+
else:
299+
se = self.framework.ses
300+
return se
300301

301-
@se.setter
302-
def se(self, value):
303-
self._se = value
302+
@property
303+
def all_se(self):
304+
"""
305+
Standard errors of the causal parameter(s) for the ``n_rep`` different sample splits after calling :meth:`fit`
306+
(shape (``n_quantiles``, ``n_rep``)).
307+
"""
308+
if self._framework is None:
309+
all_se = None
310+
else:
311+
all_se = self.framework.all_ses
312+
return all_se
304313

305314
@property
306315
def t_stat(self):
307316
"""
308-
t-statistics for the causal parameter(s) after calling :meth:`fit`.
317+
t-statistics for the causal parameter(s) after calling :meth:`fit` (shape (``n_quantiles``,)).
309318
"""
310319
t_stat = self.coef / self.se
311320
return t_stat
312321

313322
@property
314323
def pval(self):
315324
"""
316-
p-values for the causal parameter(s) after calling :meth:`fit`.
325+
p-values for the causal parameter(s) (shape (``n_quantiles``,)).
317326
"""
318-
pval = 2 * norm.cdf(-np.abs(self.t_stat))
319-
return pval
327+
return self.framework.pvals
320328

321329
@property
322330
def boot_t_stat(self):
323331
"""
324-
Bootstrapped t-statistics for the causal parameter(s) after calling :meth:`fit` and :meth:`bootstrap`.
332+
Bootstrapped t-statistics for the causal parameter(s) after calling :meth:`fit` and :meth:`bootstrap`
333+
(shape (``n_rep_boot``, ``n_quantiles``, ``n_rep``)).
325334
"""
326335
if self._framework is None:
327336
boot_t_stat = None
@@ -362,30 +371,6 @@ def summary(self):
362371
df_summary = df_summary.join(ci)
363372
return df_summary
364373

365-
# The private properties with __ always deliver the single treatment, single (cross-fitting) sample subselection.
366-
# The slicing is based on the two properties self._i_quant, the index of the quantile, and
367-
# self._i_rep, the index of the cross-fitting sample.
368-
369-
@property
370-
def __psi0(self):
371-
return self._psi0[:, self._i_rep, self._i_quant]
372-
373-
@property
374-
def __psi0_deriv(self):
375-
return self._psi0_deriv[:, self._i_rep, self._i_quant]
376-
377-
@property
378-
def __psi1(self):
379-
return self._psi1[:, self._i_rep, self._i_quant]
380-
381-
@property
382-
def __psi1_deriv(self):
383-
return self._psi1_deriv[:, self._i_rep, self._i_quant]
384-
385-
@property
386-
def __all_se(self):
387-
return self._all_se[self._i_quant, self._i_rep]
388-
389374
def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_models=False, external_predictions=None):
390375
"""
391376
Estimate DoubleMLQTE models.
@@ -436,26 +421,6 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_
436421
framework_list[self._i_quant] = self._modellist_1[self._i_quant].framework - \
437422
self._modellist_0[self._i_quant].framework
438423

439-
# treatment Effects
440-
self._all_coef[self._i_quant, :] = self.modellist_1[self._i_quant].all_coef - \
441-
self.modellist_0[self._i_quant].all_coef
442-
443-
# save scores and derivatives
444-
self._psi0[:, :, self._i_quant] = np.squeeze(self.modellist_0[i_quant].psi, 2)
445-
self._psi1[:, :, self._i_quant] = np.squeeze(self.modellist_1[i_quant].psi, 2)
446-
447-
self._psi0_deriv[:, :, self._i_quant] = np.squeeze(self.modellist_0[i_quant].psi_deriv, 2)
448-
self._psi1_deriv[:, :, self._i_quant] = np.squeeze(self.modellist_1[i_quant].psi_deriv, 2)
449-
450-
# Estimate the variance
451-
for i_rep in range(self.n_rep):
452-
self._i_rep = i_rep
453-
454-
self._all_se[self._i_quant, self._i_rep] = self._se_causal_pars()
455-
456-
# aggregated parameter estimates and standard errors from repeated cross-fitting
457-
self._agg_cross_fit()
458-
459424
# aggregate all frameworks
460425
self._framework = concat(framework_list)
461426

@@ -478,33 +443,10 @@ def bootstrap(self, method='normal', n_rep_boot=500):
478443
-------
479444
self : object
480445
"""
481-
if np.isnan(self.coef).all():
446+
if self._framework is None:
482447
raise ValueError('Apply fit() before bootstrap().')
448+
self._framework.bootstrap(method=method, n_rep_boot=n_rep_boot)
483449

484-
if (not isinstance(method, str)) | (method not in ['Bayes', 'normal', 'wild']):
485-
raise ValueError('Method must be "Bayes", "normal" or "wild". '
486-
f'Got {str(method)}.')
487-
488-
if not isinstance(n_rep_boot, int):
489-
raise TypeError('The number of bootstrap replications must be of int type. '
490-
f'{str(n_rep_boot)} of type {str(type(n_rep_boot))} was passed.')
491-
if n_rep_boot < 1:
492-
raise ValueError('The number of bootstrap replications must be positive. '
493-
f'{str(n_rep_boot)} was passed.')
494-
495-
self._n_rep_boot, self._boot_coef, self._boot_t_stat = self._initialize_boot_arrays(n_rep_boot)
496-
497-
for i_rep in range(self.n_rep):
498-
self._i_rep = i_rep
499-
500-
n_obs = self._dml_data.n_obs
501-
weights = _draw_weights(method, n_rep_boot, n_obs)
502-
for i_quant in range(self.n_quantiles):
503-
self._i_quant = i_quant
504-
i_start = self._i_rep * self.n_rep_boot
505-
i_end = (self._i_rep + 1) * self.n_rep_boot
506-
self._boot_coef[self._i_quant, i_start:i_end], self._boot_t_stat[self._i_quant, i_start:i_end] =\
507-
self._compute_bootstrap(weights)
508450
return self
509451

510452
def draw_sample_splitting(self):
@@ -526,15 +468,6 @@ def draw_sample_splitting(self):
526468

527469
return self
528470

529-
def _compute_bootstrap(self, weights):
530-
J0 = np.mean(self.__psi0_deriv)
531-
J1 = np.mean(self.__psi1_deriv)
532-
scaled_score = self.__psi1 / J1 - self.__psi0 / J0
533-
534-
boot_coef = np.matmul(weights, scaled_score) / self._dml_data.n_obs
535-
boot_t_stat = np.matmul(weights, scaled_score) / (self._dml_data.n_obs * self.__all_se)
536-
return boot_coef, boot_t_stat
537-
538471
def confint(self, joint=False, level=0.95):
539472
"""
540473
Confidence intervals for DoubleML models.
@@ -555,97 +488,49 @@ def confint(self, joint=False, level=0.95):
555488
A data frame with the confidence interval(s).
556489
"""
557490

558-
if not isinstance(joint, bool):
559-
raise TypeError('joint must be True or False. '
560-
f'Got {str(joint)}.')
561-
562-
if not isinstance(level, float):
563-
raise TypeError('The confidence level must be of float type. '
564-
f'{str(level)} of type {str(type(level))} was passed.')
565-
if (level <= 0) | (level >= 1):
566-
raise ValueError('The confidence level must be in (0,1). '
567-
f'{str(level)} was passed.')
568-
569-
a = (1 - level)
570-
ab = np.array([a / 2, 1. - a / 2])
571-
if joint:
572-
if np.isnan(self.boot_coef).all():
573-
raise ValueError('Apply fit() & bootstrap() before confint(joint=True).')
574-
sim = np.amax(np.abs(self.boot_t_stat), 0)
575-
hatc = np.quantile(sim, 1 - a)
576-
ci = np.vstack((self.coef - self.se * hatc, self.coef + self.se * hatc)).T
577-
else:
578-
if np.isnan(self.coef).all():
579-
raise ValueError('Apply fit() before confint().')
580-
fac = norm.ppf(ab)
581-
ci = np.vstack((self.coef + self.se * fac[0], self.coef + self.se * fac[1])).T
582-
583-
df_ci = pd.DataFrame(ci,
584-
columns=['{:.1f} %'.format(i * 100) for i in ab],
585-
index=self._quantiles)
586-
return df_ci
587-
588-
def _fit_quantile(self, i_quant, n_jobs_cv=None, store_predictions=True, store_models=False):
589-
590-
model_0 = self.modellist_0[i_quant]
591-
model_1 = self.modellist_1[i_quant]
592-
593-
model_0.fit(n_jobs_cv=n_jobs_cv, store_predictions=store_predictions, store_models=store_models)
594-
model_1.fit(n_jobs_cv=n_jobs_cv, store_predictions=store_predictions, store_models=store_models)
595-
596-
return model_0, model_1
491+
if self.framework is None:
492+
raise ValueError('Apply fit() before confint().')
597493

598-
def _agg_cross_fit(self):
599-
# aggregate parameters from the repeated cross-fitting
600-
# don't use the getter (always for one treatment variable and one sample), but the private variable
601-
self.coef = np.median(self._all_coef, 1)
494+
df_ci = self.framework.confint(joint=joint, level=level)
495+
df_ci.set_index(pd.Index(self._quantiles), inplace=True)
602496

603-
# TODO: In the documentation of standard errors we need to cleary state what we return here, i.e.,
604-
# the asymptotic variance sigma_hat/N and not sigma_hat (which sometimes is also called the asympt var)!
605-
# TODO: In the edge case of repeated no-cross-fitting, the test sets might have different size and therefore
606-
# it would note be valid to always use the same self._var_scaling_factor
607-
xx = np.tile(self.coef.reshape(-1, 1), self.n_rep)
608-
self.se = np.sqrt(np.divide(np.median(np.multiply(np.power(self._all_se, 2), self._var_scaling_factor) +
609-
np.power(self._all_coef - xx, 2), 1), self._var_scaling_factor))
497+
return df_ci
610498

611-
def _var_est(self):
612-
"""
613-
Estimate the standard errors of the structural parameter
499+
def p_adjust(self, method='romano-wolf'):
614500
"""
615-
J0 = self._psi0_deriv[:, self._i_rep, self._i_quant].mean()
616-
J1 = self._psi1_deriv[:, self._i_rep, self._i_quant].mean()
617-
score0 = self._psi0[:, self._i_rep, self._i_quant]
618-
score1 = self._psi1[:, self._i_rep, self._i_quant]
619-
omega = score1 / J1 - score0 / J0
501+
Multiple testing adjustment for DoubleML models.
620502
621-
self._var_scaling_factor = self._dml_data.n_obs
622-
sigma2_hat = 1 / self._var_scaling_factor * np.mean(np.power(omega, 2))
503+
Parameters
504+
----------
505+
method : str
506+
A str (``'romano-wolf''``, ``'bonferroni'``, ``'holm'``, etc) specifying the adjustment method.
507+
In addition to ``'romano-wolf''``, all methods implemented in
508+
:py:func:`statsmodels.stats.multitest.multipletests` can be applied.
509+
Default is ``'romano-wolf'``.
623510
624-
return sigma2_hat
511+
Returns
512+
-------
513+
p_val : pd.DataFrame
514+
A data frame with adjusted p-values.
515+
"""
625516

626-
def _se_causal_pars(self):
627-
se = np.sqrt(self._var_est())
628-
return se
517+
if self.framework is None:
518+
raise ValueError('Apply fit() before p_adjust().')
629519

630-
def _initialize_arrays(self):
631-
psi0 = np.full((self._dml_data.n_obs, self.n_rep, self.n_quantiles), np.nan)
632-
psi0_deriv = np.full((self._dml_data.n_obs, self.n_rep, self.n_quantiles), np.nan)
520+
p_val, _ = self.framework.p_adjust(method=method)
521+
p_val.set_index(pd.Index(self._quantiles), inplace=True)
633522

634-
psi1 = np.full((self._dml_data.n_obs, self.n_rep, self.n_quantiles), np.nan)
635-
psi1_deriv = np.full((self._dml_data.n_obs, self.n_rep, self.n_quantiles), np.nan)
523+
return p_val
636524

637-
coef = np.full(self.n_quantiles, np.nan)
638-
se = np.full(self.n_quantiles, np.nan)
525+
def _fit_quantile(self, i_quant, n_jobs_cv=None, store_predictions=True, store_models=False):
639526

640-
all_coef = np.full((self.n_quantiles, self.n_rep), np.nan)
641-
all_se = np.full((self.n_quantiles, self.n_rep), np.nan)
527+
model_0 = self.modellist_0[i_quant]
528+
model_1 = self.modellist_1[i_quant]
642529

643-
return psi0, psi1, psi0_deriv, psi1_deriv, coef, se, all_coef, all_se
530+
model_0.fit(n_jobs_cv=n_jobs_cv, store_predictions=store_predictions, store_models=store_models)
531+
model_1.fit(n_jobs_cv=n_jobs_cv, store_predictions=store_predictions, store_models=store_models)
644532

645-
def _initialize_boot_arrays(self, n_rep_boot):
646-
boot_coef = np.full((self.n_quantiles, n_rep_boot * self.n_rep), np.nan)
647-
boot_t_stat = np.full((self.n_quantiles, n_rep_boot * self.n_rep), np.nan)
648-
return n_rep_boot, boot_coef, boot_t_stat
533+
return model_0, model_1
649534

650535
def _check_data(self, obj_dml_data):
651536
if not isinstance(obj_dml_data, DoubleMLData):

0 commit comments

Comments
 (0)