Skip to content

Restructure doubleml #225

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
de5ee08
move tests for dummy learners to utils
SvenKlaassen Dec 21, 2023
69282b0
move and rename utils
SvenKlaassen Dec 21, 2023
da17874
move blp and policytree to utils
SvenKlaassen Dec 22, 2023
b8764d0
update utils init
SvenKlaassen Dec 22, 2023
441c7fa
first iv_model submodule
SvenKlaassen Dec 22, 2023
b185bd8
fis plr and irm blp model
SvenKlaassen Dec 22, 2023
f3224b8
rename sensitivity not implemented error
SvenKlaassen Dec 22, 2023
a6b47a2
move pliv tests to submodule
SvenKlaassen Dec 22, 2023
93c246b
fix unit tests
SvenKlaassen Dec 22, 2023
969b240
create did_model submodule
SvenKlaassen Dec 22, 2023
01246d3
shorten submodule names
SvenKlaassen Dec 22, 2023
cdc2354
updated submodule description
SvenKlaassen Dec 22, 2023
53786fa
create plr submodule
SvenKlaassen Dec 22, 2023
6cb1c7a
create irm submodule
SvenKlaassen Dec 22, 2023
6c22f02
move tests to irm submodule
SvenKlaassen Dec 22, 2023
d679446
rename plr to plm and move pliv to plm submodule
SvenKlaassen Dec 22, 2023
5cdfad2
move iivm to irm submodule
SvenKlaassen Dec 22, 2023
62be459
remove iv submodule
SvenKlaassen Dec 22, 2023
7e4f1c5
remove doubleml from test names
SvenKlaassen Dec 29, 2023
b9bf726
Reduce deprication warnings did
SvenKlaassen Dec 29, 2023
54d3863
fix tests
SvenKlaassen Jan 2, 2024
fffa952
fix codacy issues
SvenKlaassen Jan 2, 2024
ee6f755
Update _utils_irm_manual.py
SvenKlaassen Jan 2, 2024
f2b3113
simplify checks and initialization in doubleml fit
SvenKlaassen Jan 2, 2024
557741e
simplify doubleml fit method
SvenKlaassen Jan 2, 2024
0347057
update resampling for stratification
SvenKlaassen Jan 2, 2024
0698a48
fix unit tests for sample splits
SvenKlaassen Jan 2, 2024
b7127c2
fix propensity score adjustments
SvenKlaassen Jan 3, 2024
8d34a73
update trimming for external predictions
SvenKlaassen Jan 5, 2024
9c51b93
Merge branch 'main' into s-restructure-doubleml
SvenKlaassen Feb 2, 2024
7bb829c
move test for weighted irm
SvenKlaassen Feb 2, 2024
12cad97
Merge branch 'main' into s-restructure-doubleml
SvenKlaassen Feb 2, 2024
ece1b45
update github workflow actions to node.js 20
SvenKlaassen Feb 2, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,18 @@ jobs:

steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Initialize CodeQL
uses: github/codeql-action/init@v2
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
queries: +security-and-quality

- name: Autobuild
uses: github/codeql-action/autobuild@v2
uses: github/codeql-action/autobuild@v3

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
uses: github/codeql-action/analyze@v3
with:
category: "/language:${{ matrix.language }}"
6 changes: 3 additions & 3 deletions .github/workflows/deploy_pkg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
persist-credentials: false

- name: Install python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '3.8'

Expand All @@ -32,7 +32,7 @@ jobs:
pip install wheel
python setup.py sdist bdist_wheel

- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
with:
name: DoubleML-pkg
path: dist/
4 changes: 2 additions & 2 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ jobs:
- {os: 'ubuntu-latest', python-version: '3.11'}

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 2
- name: Set up Python ${{ matrix.config.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.config.python-version }}
- name: Install OpenMP runtime for unit tests with xgboost learners
Expand Down
27 changes: 14 additions & 13 deletions doubleml/__init__.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,33 @@
from pkg_resources import get_distribution

from .double_ml_plr import DoubleMLPLR
from .double_ml_pliv import DoubleMLPLIV
from .double_ml_irm import DoubleMLIRM
from .double_ml_iivm import DoubleMLIIVM
from .plm.plr import DoubleMLPLR
from .plm.pliv import DoubleMLPLIV
from .irm.irm import DoubleMLIRM
from .irm.iivm import DoubleMLIIVM
from .double_ml_data import DoubleMLData, DoubleMLClusterData
from .double_ml_blp import DoubleMLBLP
from .double_ml_did import DoubleMLDID
from .double_ml_did_cs import DoubleMLDIDCS
from .double_ml_qte import DoubleMLQTE
from .double_ml_pq import DoubleMLPQ
from .double_ml_lpq import DoubleMLLPQ
from .double_ml_cvar import DoubleMLCVAR
from .double_ml_policytree import DoubleMLPolicyTree
from .did.did import DoubleMLDID
from .did.did_cs import DoubleMLDIDCS
from .irm.qte import DoubleMLQTE
from .irm.pq import DoubleMLPQ
from .irm.lpq import DoubleMLLPQ
from .irm.cvar import DoubleMLCVAR

from .utils.blp import DoubleMLBLP
from .utils.policytree import DoubleMLPolicyTree

__all__ = ['DoubleMLPLR',
'DoubleMLPLIV',
'DoubleMLIRM',
'DoubleMLIIVM',
'DoubleMLData',
'DoubleMLClusterData',
'DoubleMLBLP',
'DoubleMLDID',
'DoubleMLDIDCS',
'DoubleMLPQ',
'DoubleMLQTE',
'DoubleMLLPQ',
'DoubleMLCVAR',
'DoubleMLBLP',
'DoubleMLPolicyTree']

__version__ = get_distribution('doubleml').version
11 changes: 11 additions & 0 deletions doubleml/did/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""
The :mod:`doubleml.did` module implements double machine learning estimates based on difference in differences models.
"""

from .did import DoubleMLDID
from .did_cs import DoubleMLDIDCS

__all__ = [
"DoubleMLDID",
"DoubleMLDIDCS",
]
12 changes: 7 additions & 5 deletions doubleml/double_ml_did.py → doubleml/did/did.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
from sklearn.utils.multiclass import type_of_target
import warnings

from .double_ml import DoubleML
from .double_ml_data import DoubleMLData
from .double_ml_score_mixins import LinearScoreMixin
from ..double_ml import DoubleML
from ..double_ml_data import DoubleMLData
from ..double_ml_score_mixins import LinearScoreMixin

from ._utils import _dml_cv_predict, _get_cond_smpls, _dml_tune, _trimm
from ._utils_checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity
from ..utils._estimation import _dml_cv_predict, _get_cond_smpls, _dml_tune, _trimm
from ..utils._checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity


class DoubleMLDID(LinearScoreMixin, DoubleML):
Expand Down Expand Up @@ -117,6 +117,8 @@ def __init__(self,

# set stratication for resampling
self._strata = self._dml_data.d
if draw_sample_splitting:
self.draw_sample_splitting()

# check learners
ml_g_is_classifier = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=True)
Expand Down
12 changes: 7 additions & 5 deletions doubleml/double_ml_did_cs.py → doubleml/did/did_cs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
from sklearn.utils.multiclass import type_of_target
import warnings

from .double_ml import DoubleML
from .double_ml_data import DoubleMLData
from .double_ml_score_mixins import LinearScoreMixin
from ..double_ml import DoubleML
from ..double_ml_data import DoubleMLData
from ..double_ml_score_mixins import LinearScoreMixin

from ._utils import _dml_cv_predict, _trimm, _get_cond_smpls_2d, _dml_tune
from ._utils_checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity
from ..utils._estimation import _dml_cv_predict, _trimm, _get_cond_smpls_2d, _dml_tune
from ..utils._checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity


class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
Expand Down Expand Up @@ -117,6 +117,8 @@ def __init__(self,

# set stratication for resampling
self._strata = self._dml_data.d.reshape(-1, 1) + 2 * self._dml_data.t.reshape(-1, 1)
if draw_sample_splitting:
self.draw_sample_splitting()

# check learners
ml_g_is_classifier = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=True)
Expand Down
Empty file added doubleml/did/tests/__init__.py
Empty file.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
from sklearn.base import clone

from ._utils import fit_predict, fit_predict_proba, tune_grid_search
from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search
from ._utils_did_manual import did_dml1, did_dml2


Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import numpy as np
from sklearn.base import clone

from ._utils_boot import boot_manual, draw_weights
from ._utils import fit_predict, fit_predict_proba, tune_grid_search
from ...tests._utils_boot import boot_manual, draw_weights
from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search


def fit_did(y, x, d,
Expand Down
38 changes: 38 additions & 0 deletions doubleml/did/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import numpy as np
import pytest

from doubleml.datasets import make_did_SZ2020


@pytest.fixture(scope='session',
params=[(500, 1),
(1000, 1),
(1000, 2)])
def generate_data_did(request):
params = request.param
np.random.seed(1111)
# setting parameters
n = params[0]
dpg = params[1]

# generating data
data = make_did_SZ2020(n, dgp_type=dpg, return_type='array')

return data


@pytest.fixture(scope='session',
params=[(500, 1),
(1000, 1),
(1000, 2)])
def generate_data_did_cs(request):
params = request.param
np.random.seed(1111)
# setting parameters
n = params[0]
dpg = params[1]

# generating data
data = make_did_SZ2020(n, dgp_type=dpg, cross_sectional_data=True, return_type='array')

return data
10 changes: 5 additions & 5 deletions doubleml/tests/test_did.py → doubleml/did/tests/test_did.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import doubleml as dml

from ._utils import draw_smpls
from ...tests._utils import draw_smpls
from ._utils_did_manual import fit_did, boot_did, fit_sensitivity_elements_did


Expand Down Expand Up @@ -122,14 +122,14 @@ def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization,

@pytest.mark.ci
def test_dml_did_coef(dml_did_fixture):
assert math.isclose(dml_did_fixture['coef'],
assert math.isclose(dml_did_fixture['coef'][0],
dml_did_fixture['coef_manual'],
rel_tol=1e-9, abs_tol=1e-4)


@pytest.mark.ci
def test_dml_did_se(dml_did_fixture):
assert math.isclose(dml_did_fixture['se'],
assert math.isclose(dml_did_fixture['se'][0],
dml_did_fixture['se_manual'],
rel_tol=1e-9, abs_tol=1e-4)

Expand Down Expand Up @@ -189,8 +189,8 @@ def test_dml_did_experimental(generate_data_did, in_sample_normalization, learne
score='experimental',
in_sample_normalization=in_sample_normalization)
dml_did_obj_with_ml_m.fit()
assert math.isclose(dml_did_obj_with_ml_m.coef,
dml_did_obj_without_ml_m.coef,
assert math.isclose(dml_did_obj_with_ml_m.coef[0],
dml_did_obj_without_ml_m.coef[0],
rel_tol=1e-9, abs_tol=1e-4)

msg = ('A learner ml_m has been provided for score = "experimental" but will be ignored. '
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import doubleml as dml

from ._utils import draw_smpls
from ...tests._utils import draw_smpls
from ._utils_did_cs_manual import fit_did_cs, fit_sensitivity_elements_did_cs
from ._utils_did_manual import boot_did

Expand Down Expand Up @@ -63,7 +63,8 @@ def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normaliza

np.random.seed(3141)
n_obs = len(y)
all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d)

all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d+2*t)
obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, t=t)

np.random.seed(3141)
Expand Down Expand Up @@ -122,14 +123,14 @@ def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normaliza

@pytest.mark.ci
def test_dml_did_cs_coef(dml_did_cs_fixture):
assert math.isclose(dml_did_cs_fixture['coef'],
assert math.isclose(dml_did_cs_fixture['coef'][0],
dml_did_cs_fixture['coef_manual'],
rel_tol=1e-9, abs_tol=1e-4)


@pytest.mark.ci
def test_dml_did_cs_se(dml_did_cs_fixture):
assert math.isclose(dml_did_cs_fixture['se'],
assert math.isclose(dml_did_cs_fixture['se'][0],
dml_did_cs_fixture['se_manual'],
rel_tol=1e-9, abs_tol=1e-4)

Expand Down Expand Up @@ -189,8 +190,8 @@ def test_dml_did_cs_experimental(generate_data_did_cs, in_sample_normalization,
score='experimental',
in_sample_normalization=in_sample_normalization)
dml_did_obj_with_ml_m.fit()
assert math.isclose(dml_did_obj_with_ml_m.coef,
dml_did_obj_without_ml_m.coef,
assert math.isclose(dml_did_obj_with_ml_m.coef[0],
dml_did_obj_without_ml_m.coef[0],
rel_tol=1e-9, abs_tol=1e-4)

msg = ('A learner ml_m has been provided for score = "experimental" but will be ignored. '
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import doubleml as dml

from ._utils import draw_smpls
from ...tests._utils import draw_smpls
from ._utils_did_manual import boot_did
from ._utils_did_cs_manual import fit_did_cs, tune_nuisance_did_cs

Expand Down Expand Up @@ -77,15 +77,22 @@ def dml_did_cs_fixture(generate_data_did_cs, learner_g, learner_m, score, in_sam
ml_g = clone(learner_g)
ml_m = clone(learner_m)

n_obs = len(y)
all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d+2*t)

np.random.seed(3141)
obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, t=t)
dml_did_cs_obj = dml.DoubleMLDIDCS(obj_dml_data,
ml_g, ml_m,
n_folds,
score=score,
in_sample_normalization=in_sample_normalization,
dml_procedure=dml_procedure)
dml_procedure=dml_procedure,
draw_sample_splitting=False)
# synchronize the sample splitting
dml_did_cs_obj.set_sample_splitting(all_smpls=all_smpls)

np.random.seed(3141)
# tune hyperparameters
tune_res = dml_did_cs_obj.tune(par_grid, tune_on_folds=tune_on_folds,
n_folds_tune=n_folds_tune,
Expand All @@ -95,8 +102,6 @@ def dml_did_cs_fixture(generate_data_did_cs, learner_g, learner_m, score, in_sam
dml_did_cs_obj.fit()

np.random.seed(3141)
n_obs = len(y)
all_smpls = draw_smpls(n_obs, n_folds)
smpls = all_smpls[0]

if tune_on_folds:
Expand Down Expand Up @@ -152,14 +157,14 @@ def dml_did_cs_fixture(generate_data_did_cs, learner_g, learner_m, score, in_sam

@pytest.mark.ci
def test_dml_did_cs_coef(dml_did_cs_fixture):
assert math.isclose(dml_did_cs_fixture['coef'],
assert math.isclose(dml_did_cs_fixture['coef'][0],
dml_did_cs_fixture['coef_manual'],
rel_tol=1e-9, abs_tol=1e-4)


@pytest.mark.ci
def test_dml_did_cs_se(dml_did_cs_fixture):
assert math.isclose(dml_did_cs_fixture['se'],
assert math.isclose(dml_did_cs_fixture['se'][0],
dml_did_cs_fixture['se_manual'],
rel_tol=1e-9, abs_tol=1e-4)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from doubleml import DoubleMLDID
from doubleml.datasets import make_did_SZ2020
from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
from ._utils import draw_smpls
from ...tests._utils import draw_smpls


@pytest.fixture(scope="module", params=["observational", "experimental"])
Expand Down
Loading