Skip to content

Commit e1fedb8

Browse files
brandonwillardtwiecki
authored andcommitted
Update tests for new missing values approach
1 parent 74e683b commit e1fedb8

File tree

3 files changed

+44
-65
lines changed

3 files changed

+44
-65
lines changed

pymc3/tests/test_idata_conversion.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pandas as pd
66
import pytest
77

8+
from aesara.tensor.subtensor import AdvancedIncSubtensor
89
from arviz import InferenceData
910
from arviz.tests.helpers import check_multiple_attrs
1011
from numpy import ma
@@ -295,27 +296,23 @@ def test_ovewrite_model_coords_dims(self):
295296
assert np.all(idata2.constant_data.x.dim1.values == np.array(new_dim1))
296297
assert np.all(idata2.constant_data.x.dim2.values == np.array(["c1", "c2"]))
297298

298-
@pytest.mark.xfail(reason="Missing data not refactored for v4")
299299
def test_missing_data_model(self):
300300
# source pymc3/pymc3/tests/test_missing.py
301301
data = ma.masked_values([1, 2, -1, 4, -1], value=-1)
302302
model = pm.Model()
303303
with model:
304304
x = pm.Normal("x", 1, 1)
305-
pm.Normal("y", x, 1, observed=data)
305+
y = pm.Normal("y", x, 1, observed=data)
306306
inference_data = pm.sample(100, chains=2, return_inferencedata=True)
307307

308308
# make sure that data is really missing
309-
(y_missing,) = model.missing_values
310-
# TODO: Test values aren't enabled anymore
311-
assert y_missing.tag.test_value.shape == (2,)
309+
assert isinstance(y.owner.op, AdvancedIncSubtensor)
312310

313311
test_dict = {"posterior": ["x"], "observed_data": ["y"], "log_likelihood": ["y"]}
314312
fails = check_multiple_attrs(test_dict, inference_data)
315313
assert not fails
316314

317315
@pytest.mark.xfail(reason="LKJCholeskyCov not refactored for v4")
318-
@pytest.mark.xfail(reason="Missing data not refactored for v4")
319316
def test_mv_missing_data_model(self):
320317
data = ma.masked_values([[1, 2], [2, 2], [-1, 4], [2, -1], [-1, -1]], value=-1)
321318

@@ -324,13 +321,11 @@ def test_mv_missing_data_model(self):
324321
mu = pm.Normal("mu", 0, 1, size=2)
325322
sd_dist = pm.HalfNormal.dist(1.0)
326323
chol, *_ = pm.LKJCholeskyCov("chol_cov", n=2, eta=1, sd_dist=sd_dist, compute_corr=True)
327-
pm.MvNormal("y", mu=mu, chol=chol, observed=data)
324+
y = pm.MvNormal("y", mu=mu, chol=chol, observed=data)
328325
inference_data = pm.sample(100, chains=2, return_inferencedata=True)
329326

330327
# make sure that data is really missing
331-
(y_missing,) = model.missing_values
332-
# TODO: Test values aren't enabled anymore
333-
assert y_missing.tag.test_value.shape == (4,)
328+
assert isinstance(y.owner.op, AdvancedIncSubtensor)
334329

335330
test_dict = {
336331
"posterior": ["mu", "chol_cov"],

pymc3/tests/test_missing.py

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -16,63 +16,45 @@
1616
import pandas as pd
1717
import pytest
1818

19+
from aesara.tensor.subtensor import AdvancedIncSubtensor
1920
from numpy import array, ma
2021

2122
from pymc3 import ImputationWarning, Model, Normal, sample, sample_prior_predictive
2223

2324

24-
# @pytest.mark.xfail(reason="Missing values not fully refactored")
25-
def test_missing():
26-
data = ma.masked_values([1, 2, -1, 4, -1], value=-1)
27-
with Model() as model:
28-
x = Normal("x", 1, 1)
29-
with pytest.warns(ImputationWarning):
30-
Normal("y", x, 1, observed=data)
31-
32-
(y_missing,) = model.missing_values
33-
assert y_missing.eval().shape == (2,)
34-
35-
# In v3, the log-likelihoods for these missing points are zero, and the
36-
# missing data point values are the `Distribution`'s "default" values.
37-
test_point = model.initial_point
38-
model.logp(test_point)
25+
@pytest.mark.parametrize(
26+
"data",
27+
[ma.masked_values([1, 2, -1, 4, -1], value=-1), pd.DataFrame([1, 2, numpy.nan, 4, numpy.nan])],
28+
)
29+
def test_missing(data):
3930

40-
with model:
41-
prior_trace = sample_prior_predictive()
42-
assert {"x", "y"} <= set(prior_trace.keys())
43-
44-
45-
@pytest.mark.xfail(reason="Missing values not fully refactored")
46-
def test_missing_pandas():
47-
data = pd.DataFrame([1, 2, numpy.nan, 4, numpy.nan])
4831
with Model() as model:
4932
x = Normal("x", 1, 1)
5033
with pytest.warns(ImputationWarning):
51-
Normal("y", x, 1, observed=data)
34+
y = Normal("y", x, 1, observed=data)
5235

53-
(y_missing,) = model.missing_values
54-
assert y_missing.tag.test_value.shape == (2,)
36+
assert isinstance(y.owner.op, AdvancedIncSubtensor)
5537

56-
model.logp(model.initial_point)
38+
test_point = model.initial_point
39+
assert not numpy.isnan(model.logp(test_point))
5740

5841
with model:
5942
prior_trace = sample_prior_predictive()
6043
assert {"x", "y"} <= set(prior_trace.keys())
6144

6245

63-
@pytest.mark.xfail(reason="Missing values not fully refactored")
6446
def test_missing_with_predictors():
6547
predictors = array([0.5, 1, 0.5, 2, 0.3])
6648
data = ma.masked_values([1, 2, -1, 4, -1], value=-1)
6749
with Model() as model:
6850
x = Normal("x", 1, 1)
6951
with pytest.warns(ImputationWarning):
70-
Normal("y", x * predictors, 1, observed=data)
52+
y = Normal("y", x * predictors, 1, observed=data)
7153

72-
(y_missing,) = model.missing_values
73-
assert y_missing.tag.test_value.shape == (2,)
54+
assert isinstance(y.owner.op, AdvancedIncSubtensor)
7455

75-
model.logp(model.initial_point)
56+
test_point = model.initial_point
57+
assert not numpy.isnan(model.logp(test_point))
7658

7759
with model:
7860
prior_trace = sample_prior_predictive()
@@ -93,9 +75,15 @@ def test_missing_dual_observations():
9375

9476
prior_trace = sample_prior_predictive()
9577
assert {"beta1", "beta2", "theta", "o1", "o2"} <= set(prior_trace.keys())
78+
# TODO: Assert something
9679
sample()
9780

9881

82+
@pytest.mark.skip(
83+
reason="This doesn't make sense in v4, because there are no "
84+
"explicit variables to sample. The missing values are "
85+
"implicit random variables."
86+
)
9987
def test_internal_missing_observations():
10088
with Model() as model:
10189
obs1 = ma.masked_values([1, 2, -1, 4, -1], value=-1)
@@ -107,4 +95,5 @@ def test_internal_missing_observations():
10795

10896
prior_trace = sample_prior_predictive()
10997
assert {"theta1", "theta2"} <= set(prior_trace.keys())
98+
# TODO: Assert something
11099
sample()

pymc3/tests/test_model.py

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,17 @@
2424
import pytest
2525
import scipy.sparse as sps
2626

27+
from aesara.tensor.random.op import RandomVariable
2728
from aesara.tensor.subtensor import AdvancedIncSubtensor
2829
from aesara.tensor.var import TensorConstant
2930
from numpy.testing import assert_almost_equal
3031

3132
import pymc3 as pm
3233

3334
from pymc3 import Deterministic, Potential
34-
from pymc3.blocking import RaveledVars
35+
from pymc3.blocking import DictToArrayBijection, RaveledVars
3536
from pymc3.distributions import Normal, logpt_sum, transforms
36-
from pymc3.model import ValueGradFunction
37+
from pymc3.model import Point, ValueGradFunction
3738
from pymc3.tests.helpers import SeededTest
3839

3940

@@ -201,20 +202,9 @@ def test_duplicate_vars():
201202
def test_empty_observed():
202203
data = pd.DataFrame(np.ones((2, 3)) / 3)
203204
data.values[:] = np.nan
204-
with pm.Model(aesara_config={"compute_test_value": "raise"}):
205+
with pm.Model():
205206
a = pm.Normal("a", observed=data)
206-
207-
assert isinstance(a.tag.observations.owner.op, AdvancedIncSubtensor)
208-
# The masked observations are replaced by elements of the RV `a`,
209-
# which means that they should all have the same sample test values
210-
a_data = a.tag.observations.owner.inputs[1]
211-
npt.assert_allclose(a.tag.test_value.flatten(), a_data.tag.test_value)
212-
213-
# Let's try this again with another distribution
214-
b = pm.Gamma("b", alpha=1, beta=1, observed=data)
215-
assert isinstance(b.tag.observations.owner.op, AdvancedIncSubtensor)
216-
b_data = b.tag.observations.owner.inputs[1]
217-
npt.assert_allclose(b.tag.test_value.flatten(), b_data.tag.test_value)
207+
assert not hasattr(a.tag, "observations")
218208

219209

220210
class TestValueGradFunction(unittest.TestCase):
@@ -302,8 +292,8 @@ def test_edge_case(self):
302292
assert dlogp.size == 4
303293
npt.assert_allclose(dlogp, 0.0, atol=1e-5)
304294

305-
def test_tensor_type_conversion(self):
306-
# case described in #3122
295+
def test_missing_data(self):
296+
# Originally from a case described in #3122
307297
X = np.random.binomial(1, 0.5, 10)
308298
X[0] = -1 # masked a single value
309299
X = np.ma.masked_values(X, value=-1)
@@ -312,9 +302,16 @@ def test_tensor_type_conversion(self):
312302
x2 = pm.Bernoulli("x2", x1, observed=X)
313303

314304
gf = m.logp_dlogp_function()
305+
gf._extra_are_set = True
315306

316-
# TODO: Assert something.
317-
# assert m["x2_missing"].type == gf._extra_vars_shared["x2_missing"].type
307+
m.default_rng.get_value(borrow=True).seed(102)
308+
309+
# The gradient should have random values as inputs, so its value should
310+
# change every time we evaluate it at the same point
311+
#
312+
# TODO: We could probably use a better test than this.
313+
res = [gf(DictToArrayBijection.map(Point(m.test_point, model=m))) for i in range(20)]
314+
assert np.var(res) > 0.0
318315

319316
def test_aesara_switch_broadcast_edge_cases_1(self):
320317
# Tests against two subtle issues related to a previous bug in Theano
@@ -474,7 +471,7 @@ def test_make_obs_var():
474471

475472
# Ensure that the missing values are appropriately set to None
476473
for func_output in [dense_output, sparse_output]:
477-
assert func_output.tag.missing_values is None
474+
assert isinstance(func_output.owner.op, RandomVariable)
478475

479476
# Ensure that the Aesara variable names are correctly set.
480477
# Note that the output for masked inputs do not have their names set
@@ -488,9 +485,7 @@ def test_make_obs_var():
488485

489486
# Masked output is something weird. Just ensure it has missing values
490487
# self.assertIsInstance(masked_output, TensorConstant)
491-
assert masked_output.tag.missing_values is not None
492-
493-
return None
488+
assert isinstance(masked_output.owner.op, AdvancedIncSubtensor)
494489

495490

496491
def test_initial_point():

0 commit comments

Comments
 (0)