Skip to content

Infer GRW steps from shape #5541

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 69 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
69 commits
Select commit Hold shift + click to select a range
63826fa
Start adding GRW
canyon289 Dec 30, 2021
c4f2ae2
Update GRW rv
canyon289 Jan 1, 2022
1ba3fcd
Run black
canyon289 Jan 1, 2022
ca9529d
Run precommit manually
canyon289 Jan 1, 2022
d89bc1b
Fix pylint
canyon289 Jan 1, 2022
7cb138e
Fill out GRW random op and add test
canyon289 Jan 2, 2022
1486856
Actually add black
canyon289 Jan 3, 2022
cd881e7
Update time series distribution
canyon289 Jan 5, 2022
0caf601
Remove extra param from GRW
canyon289 Jan 5, 2022
47d400f
Update test with size keyword argument and expand tolerance
canyon289 Jan 5, 2022
398fcdd
WIP Add logp to GRW
canyon289 Jan 6, 2022
69af800
Add more code
canyon289 Jan 6, 2022
7f21c91
Replace logp
canyon289 Jan 8, 2022
da18584
Update timeseries
canyon289 Jan 14, 2022
11fc57d
Update logp for GRW
canyon289 Jan 14, 2022
bd90071
Add test
canyon289 Jan 14, 2022
e60fc4c
Update logp calc
canyon289 Jan 14, 2022
ff7c56c
Add type hints
canyon289 Jan 14, 2022
bc00e04
Fix indexing in values
canyon289 Jan 14, 2022
a4f9e3a
Simplify type hint
canyon289 Jan 15, 2022
bce4a62
Update logp calculation and tests
canyon289 Jan 17, 2022
11747bf
Update time series docs
canyon289 Jan 17, 2022
58151c0
Update rng
canyon289 Jan 17, 2022
620e369
Replace manual aesara calcs with pm Normal
canyon289 Jan 17, 2022
3759897
Fix logp call
canyon289 Jan 17, 2022
e16fda7
Remove check params
canyon289 Jan 18, 2022
57f3ec4
Clean up test imports
canyon289 Jan 18, 2022
59ac03a
Update shape and length
canyon289 Jan 22, 2022
b8bff4e
Add random draw on sigma
canyon289 Jan 22, 2022
7bacca2
Reenable GRW random shape test
canyon289 Jan 24, 2022
77d2145
Rename length to steps
canyon289 Jan 24, 2022
f9ba7e4
Fix cumulative sum and (size,steps)
canyon289 Jan 24, 2022
91068ed
Add WIP tests for grw size and steps
canyon289 Jan 24, 2022
4a622e5
Add temporary notebook API discussion
canyon289 Jan 24, 2022
b366616
Update pymc/distributions/timeseries.py
canyon289 Jan 27, 2022
21a31f8
Try and subclass shape params
canyon289 Jan 27, 2022
28f4f92
WIP try shape and size
canyon289 Jan 27, 2022
2b77b9f
Refactor rng_fn
canyon289 Jan 28, 2022
dc051b3
Update so size=1 returned (1,n)
canyon289 Jan 30, 2022
39b0147
Update pymc/distributions/timeseries.py
canyon289 Jan 31, 2022
796334f
Remove temporary notebooks
canyon289 Feb 6, 2022
0d2c360
Skip test_lkjcorr test as its failing for unknown reason
canyon289 Feb 6, 2022
d390672
Try and fix up some tests
canyon289 Feb 7, 2022
e5d728f
Add fixes from Ricardos help
canyon289 Feb 19, 2022
40ff7d4
Remove duplicate GRW from rebase
canyon289 Feb 21, 2022
b6da137
Update tests
canyon289 Feb 21, 2022
43577db
Add grw inference test
canyon289 Feb 21, 2022
d38d6dd
Remove docstrings
canyon289 Feb 21, 2022
c523d49
Organize grw tests
canyon289 Feb 21, 2022
11d6eca
Remove outdated grw test
canyon289 Feb 21, 2022
52404bd
Remove uneeded test class and test
canyon289 Feb 21, 2022
3128ba7
Remove pymc top level import
canyon289 Feb 21, 2022
1cfa962
Remove wishart skip
canyon289 Feb 21, 2022
f5c3a3d
Update pymc/distributions/timeseries.py
canyon289 Feb 21, 2022
7cabf6b
Update pymc/distributions/timeseries.py
canyon289 Feb 21, 2022
570131a
Update per ricardos changes
canyon289 Feb 21, 2022
4632371
Add check pymc params match rv op
canyon289 Feb 21, 2022
2315d79
Add initial shape checks
canyon289 Feb 21, 2022
12235ff
Update GRW tests
canyon289 Feb 22, 2022
eb24507
Update distributions
canyon289 Feb 22, 2022
627a527
Remove init from logp calc
canyon289 Mar 1, 2022
c7fb3d7
Update distribution keyword
canyon289 Mar 1, 2022
09d239a
Update GRW
canyon289 Mar 2, 2022
2be8f42
Fix exception capitalization
canyon289 Mar 2, 2022
6929906
Update exception testing string
canyon289 Mar 2, 2022
31a37e6
More string updates
canyon289 Mar 2, 2022
d23458e
Allow init to be a distribution again
ricardoV94 Mar 3, 2022
2171790
Add temporary workaround for bug it `at.diff` in tests
ricardoV94 Mar 3, 2022
47571a7
Infer steps from shape
ricardoV94 Mar 3, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
313 changes: 190 additions & 123 deletions pymc/distributions/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Tuple, Union

import aesara.tensor as at
import numpy as np

from aesara import scan
from scipy import stats
from aesara.tensor.random.op import RandomVariable

from pymc.distributions import distribution, multivariate
from pymc.aesaraf import change_rv_size, floatX, intX
from pymc.distributions import distribution, logprob, multivariate
from pymc.distributions.continuous import Flat, Normal, get_tau_sigma
from pymc.distributions.shape_utils import to_tuple

Expand All @@ -32,6 +35,191 @@
"MvStudentTRandomWalk",
]

from pymc.util import check_dist_not_registered


class GaussianRandomWalkRV(RandomVariable):
"""
GaussianRandomWalk Random Variable
"""

name = "GaussianRandomWalk"
ndim_supp = 1
ndims_params = [0, 0, 0, 0]
dtype = "floatX"
_print_name = ("GaussianRandomWalk", "\\operatorname{GaussianRandomWalk}")

# TODO: Assert steps is a scalar!

def _shape_from_params(self, dist_params, **kwargs):
steps = dist_params[3]

# TODO: Ask ricardo why this is correct. Isn't shape different if size is passed?
return (steps + 1,)

@classmethod
def rng_fn(
cls,
rng: np.random.RandomState,
mu: Union[np.ndarray, float],
sigma: Union[np.ndarray, float],
init: float,
steps: int,
size: Tuple[int],
) -> np.ndarray:
"""Gaussian Random Walk generator.

The init value is drawn from the Normal distribution with the same sigma as the
innovations.

Notes
-----
Currently does not support custom init distribution

Parameters
----------
rng: np.random.RandomState
Numpy random number generator
mu: array_like
Random walk mean
sigma: np.ndarray
Standard deviation of innovation (sigma > 0)
init: float
Initialization value for GaussianRandomWalk
steps: int
Length of random walk, must be greater than 1. Returned array will be of size+1 to
account as first value is initial value
size: int
The number of Random Walk time series generated

Returns
-------
ndarray
"""

# TODO: Maybe we can remove this contraint?
if steps is None or steps < 1:
raise ValueError("Steps must be None or greater than 0")

# If size is None then the returned series should be (1+steps,)
if size is None:
init_size = 1
steps_size = steps

# TODO: Ask Ricardo how size is known to be a tuple? Its int above
# If size is None then the returned series should be (size, 1+steps)
else:
init_size = (*size, 1)
steps_size = (*size, steps)

init = np.reshape(init, init_size)
steps = rng.normal(loc=mu, scale=sigma, size=steps_size)

grw = np.concatenate([init, steps], axis=-1)

return np.cumsum(grw, axis=-1)


gaussianrandomwalk = GaussianRandomWalkRV()


class GaussianRandomWalk(distribution.Continuous):
r"""Random Walk with Normal innovations


Notes
-----
init is currently drawn from a Normal distribution with the same sigma as the innovations

Parameters
----------
mu: tensor_like of float
innovation drift, defaults to 0.0
sigma: tensor_like of float, optional
sigma > 0, innovation standard deviation, defaults to 0.0
init: Scalar PyMC distribution
Scalar distribution of the initial value, created with the `.dist()` API. Defaults to
Normal with same `mu` and `sigma` as the GaussianRandomWalk
steps: int
Number of steps in Gaussian Random Walks
size: int
Number of independent Gaussian Random Walks
"""

rv_op = gaussianrandomwalk

def __new__(cls, name, mu=0.0, sigma=1.0, init=None, steps=None, **kwargs):
check_dist_not_registered(init)
return super().__new__(cls, name, mu, sigma, init, steps, **kwargs)

@classmethod
def dist(
cls, mu=0.0, sigma=1.0, init=None, steps=None, size=None, shape=None, **kwargs
) -> RandomVariable:

mu = at.as_tensor_variable(floatX(mu))
sigma = at.as_tensor_variable(floatX(sigma))

if steps is None:
# We can infer steps from the shape, if it was given
if shape is not None:
steps = to_tuple(shape)[-1] - 1
else:
# TODO: Raise ValueError?
steps = 1

steps = at.as_tensor_variable(intX(steps))

if init is None:
init = Normal.dist(mu, sigma, size=size)
else:
if not (
isinstance(init, at.TensorVariable)
and init.owner is not None
and isinstance(init.owner.op, RandomVariable)
and init.owner.op.ndim_supp == 0
):
raise TypeError("init must be a scalar distribution variable")
if size is not None or shape is not None:
init = change_rv_size(init, to_tuple(size or shape))
else:
# If not explicit, size is determined by the shape of mu and sigma
mu_ = at.broadcast_arrays(mu, sigma)[0]
init = change_rv_size(init, mu_.shape)

return super().dist([mu, sigma, init, steps], size=size, shape=shape, **kwargs)

def logp(
value: at.Variable,
mu: at.Variable,
sigma: at.Variable,
init: at.Variable,
steps: at.Variable,
) -> at.TensorVariable:
"""Calculate log-probability of Gaussian Random Walk distribution at specified value.

Parameters
----------
value: at.Variable,
mu: at.Variable,
sigma: at.Variable,
init: at.Variable, Not used
steps: at.Variable, Not used

Returns
-------
TensorVariable
"""

# Calculate initialization logp
init_logp = logprob.logp(init, value[..., 0])

# Make time series stationary around the mean value
stationary_series = at.diff(value, axis=-1)
series_logp = logprob.logp(Normal.dist(mu, sigma), stationary_series)

return init_logp + series_logp.sum(axis=-1)


class AR1(distribution.Continuous):
"""
Expand Down Expand Up @@ -176,127 +364,6 @@ def logp(self, value):
return at.sum(innov_like) + at.sum(init_like)


class GaussianRandomWalk(distribution.Continuous):
r"""Random Walk with Normal innovations

Note that this is mainly a user-friendly wrapper to enable an easier specification
of GRW. You are not restricted to use only Normal innovations but can use any
distribution: just use `aesara.tensor.cumsum()` to create the random walk behavior.

Parameters
----------
mu : tensor_like of float, default 0
innovation drift, defaults to 0.0
For vector valued `mu`, first dimension must match shape of the random walk, and
the first element will be discarded (since there is no innovation in the first timestep)
sigma : tensor_like of float, optional
`sigma` > 0, innovation standard deviation (only required if `tau` is not specified)
For vector valued `sigma`, first dimension must match shape of the random walk, and
the first element will be discarded (since there is no innovation in the first timestep)
tau : tensor_like of float, optional
`tau` > 0, innovation precision (only required if `sigma` is not specified)
For vector valued `tau`, first dimension must match shape of the random walk, and
the first element will be discarded (since there is no innovation in the first timestep)
init : pymc.Distribution, optional
distribution for initial value (Defaults to Flat())
"""

def __init__(self, tau=None, init=None, sigma=None, mu=0.0, sd=None, *args, **kwargs):
kwargs.setdefault("shape", 1)
super().__init__(*args, **kwargs)
if sum(self.shape) == 0:
raise TypeError("GaussianRandomWalk must be supplied a non-zero shape argument!")
if sd is not None:
sigma = sd
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
self.tau = at.as_tensor_variable(tau)
sigma = at.as_tensor_variable(sigma)
self.sigma = self.sd = sigma
self.mu = at.as_tensor_variable(mu)
self.init = init or Flat.dist()
self.mean = at.as_tensor_variable(0.0)

def _mu_and_sigma(self, mu, sigma):
"""Helper to get mu and sigma if they are high dimensional."""
if sigma.ndim > 0:
sigma = sigma[1:]
if mu.ndim > 0:
mu = mu[1:]
return mu, sigma

def logp(self, x):
"""
Calculate log-probability of Gaussian Random Walk distribution at specified value.

Parameters
----------
x : numeric
Value for which log-probability is calculated.

Returns
-------
TensorVariable
"""
if x.ndim > 0:
x_im1 = x[:-1]
x_i = x[1:]
mu, sigma = self._mu_and_sigma(self.mu, self.sigma)
innov_like = Normal.dist(mu=x_im1 + mu, sigma=sigma).logp(x_i)
return self.init.logp(x[0]) + at.sum(innov_like)
return self.init.logp(x)

def random(self, point=None, size=None):
"""Draw random values from GaussianRandomWalk.

Parameters
----------
point : dict or Point, optional
Dict of variable values on which random values are to be
conditioned (uses default point if not specified).
size : int, optional
Desired size of random sample (returns one sample if not
specified).

Returns
-------
array
"""
# sigma, mu = distribution.draw_values([self.sigma, self.mu], point=point, size=size)
# return distribution.generate_samples(
# self._random,
# sigma=sigma,
# mu=mu,
# size=size,
# dist_shape=self.shape,
# not_broadcast_kwargs={"sample_shape": to_tuple(size)},
# )
pass

def _random(self, sigma, mu, size, sample_shape):
"""Implement a Gaussian random walk as a cumulative sum of normals.
axis = len(size) - 1 denotes the axis along which cumulative sum would be calculated.
This might need to be corrected in future when issue #4010 is fixed.
"""
if size[len(sample_shape)] == sample_shape:
axis = len(sample_shape)
else:
axis = len(size) - 1
rv = stats.norm(mu, sigma)
data = rv.rvs(size).cumsum(axis=axis)
data = np.array(data)

# the following lines center the random walk to start at the origin.
if len(data.shape) > 1:
for i in range(data.shape[0]):
data[i] = data[i] - data[i][0]
else:
data = data - data[0]
return data

def _distr_parameters_for_repr(self):
return ["mu", "sigma"]


class GARCH11(distribution.Continuous):
r"""
GARCH(1,1) with Normal innovations. The model is specified by
Expand Down
10 changes: 10 additions & 0 deletions pymc/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2616,6 +2616,16 @@ def test_interpolated_transform(self, transform):
assert not np.isfinite(m.compile_logp()({"x": -1.0}))
assert not np.isfinite(m.compile_logp()({"x": 11.0}))

@pytest.mark.skip("Unsure how to use this test fixture")
def test_grw(self):
self.check_logp(
pm.GaussianRandomWalk,
R,
{"mu": R, "sigma": Rplus, "steps": Nat},
lambda value, mu, sigma: sp.norm.logpdf(value, mu, sigma).cumsum().sum(),
decimal=select_by_precision(float64=6, float32=1),
)


class TestBound:
"""Tests for pm.Bound distribution"""
Expand Down
Loading