pymc-devs · jessegrabowski · May 28, 2025 · May 22, 2025 · May 26, 2025 · May 27, 2025
diff --git a/pymc_extras/statespace/core/statespace.py b/pymc_extras/statespace/core/statespace.py
@@ -1,4 +1,5 @@
 import logging
+import warnings
 
 from collections.abc import Callable, Sequence
 from typing import Any, Literal
@@ -14,7 +15,6 @@
 from pymc.model.transform.optimization import freeze_dims_and_data
 from pymc.util import RandomState
 from pytensor import Variable, graph_replace
-from pytensor.compile import get_mode
 from rich.box import SIMPLE_HEAD
 from rich.console import Console
 from rich.table import Table
@@ -99,6 +99,13 @@ class PyMCStateSpace:
         compute the observation errors. If False, these errors are deterministically zero; if True, they are sampled
         from a multivariate normal.
 
+    mode: str or Mode, optional
+        Pytensor compile mode, used in auxiliary sampling methods such as ``sample_conditional_posterior`` and
+        ``forecast``. The mode does **not** effect calls to ``pm.sample``.
+
+        Regardless of whether a mode is specified, it can always be overwritten via the ``compile_kwargs`` argument
+        to all sampling methods.
+
     Notes
     -----
     Based on the statsmodels statespace implementation https://github.com/statsmodels/statsmodels/blob/main/statsmodels/tsa/statespace/representation.py,
@@ -221,8 +228,8 @@ def __init__(
         filter_type: str = "standard",
         verbose: bool = True,
         measurement_error: bool = False,
+        mode: str | None = None,
     ):
-        self._fit_mode: str | None = None
         self._fit_coords: dict[str, Sequence[str]] | None = None
         self._fit_dims: dict[str, Sequence[str]] | None = None
         self._fit_data: pt.TensorVariable | None = None
@@ -237,6 +244,7 @@ def __init__(
         self.k_states = k_states
         self.k_posdef = k_posdef
         self.measurement_error = measurement_error
+        self.mode = mode
 
         # All models contain a state space representation and a Kalman filter
         self.ssm = PytensorRepresentation(k_endog, k_states, k_posdef)
@@ -819,11 +827,11 @@ def build_statespace_graph(
         self,
         data: np.ndarray | pd.DataFrame | pt.TensorVariable,
         register_data: bool = True,
-        mode: str | None = None,
         missing_fill_value: float | None = None,
         cov_jitter: float | None = JITTER_DEFAULT,
         mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
         save_kalman_filter_outputs_in_idata: bool = False,
+        mode: str | None = None,
     ) -> None:
         """
         Given a parameter vector `theta`, constructs the full computational graph describing the state space model and
@@ -877,7 +885,25 @@ def build_statespace_graph(
         save_kalman_filter_outputs_in_idata: bool, optional, default=False
             If True, Kalman Filter outputs will be saved in the model as deterministics. Useful for debugging, but
             should not be necessary for the majority of users.
+
+        mode: str, optional
+            Pytensor mode to use when compiling the graph. This will be saved as a model attribute and used when
+            compiling sampling functions (e.g. ``sample_conditional_prior``).
+
+            .. deprecated:: 0.2.5
+                The `mode` argument is deprecated and will be removed in a future version. Pass ``mode`` to the
+                model constructor, or manually specify ``compile_kwargs`` in sampling functions instead.
+
         """
+        if mode is not None:
+            warnings.warn(
+                "The `mode` argument is deprecated and will be removed in a future version. "
+                "Pass `mode` to the model constructor, or manually specify `compile_kwargs` in sampling functions"
+                " instead.",
+                DeprecationWarning,
+            )
+            self.mode = mode
+
         pm_mod = modelcontext(None)
 
         self._insert_random_variables()
@@ -898,7 +924,6 @@ def build_statespace_graph(
         filter_outputs = self.kalman_filter.build_graph(
             pt.as_tensor_variable(data),
             *self.unpack_statespace(),
-            mode=mode,
             missing_fill_value=missing_fill_value,
             cov_jitter=cov_jitter,
         )
@@ -909,7 +934,7 @@ def build_statespace_graph(
         filtered_covariances, predicted_covariances, observed_covariances = covs
         if save_kalman_filter_outputs_in_idata:
             smooth_states, smooth_covariances = self._build_smoother_graph(
-                filtered_states, filtered_covariances, self.unpack_statespace(), mode=mode
+                filtered_states, filtered_covariances, self.unpack_statespace()
             )
             all_kf_outputs = [*states, smooth_states, *covs, smooth_covariances]
             self._register_kalman_filter_outputs_with_pymc_model(all_kf_outputs)
@@ -929,7 +954,6 @@ def build_statespace_graph(
 
         self._fit_coords = pm_mod.coords.copy()
         self._fit_dims = pm_mod.named_vars_to_dims.copy()
-        self._fit_mode = mode
 
     def _build_smoother_graph(
         self,
@@ -974,7 +998,7 @@ def _build_smoother_graph(
             *_, T, Z, R, H, Q = matrices
 
             smooth_states, smooth_covariances = self.kalman_smoother.build_graph(
-                T, R, Q, filtered_states, filtered_covariances, mode=mode, cov_jitter=cov_jitter
+                T, R, Q, filtered_states, filtered_covariances, cov_jitter=cov_jitter
             )
             smooth_states.name = "smooth_states"
             smooth_covariances.name = "smooth_covariances"
@@ -1092,7 +1116,6 @@ def _kalman_filter_outputs_from_dummy_graph(
             R,
             H,
             Q,
-            mode=self._fit_mode,
         )
 
         filter_outputs.pop(-1)
@@ -1102,7 +1125,7 @@ def _kalman_filter_outputs_from_dummy_graph(
         filtered_covariances, predicted_covariances, _ = covariances
 
         [smoothed_states, smoothed_covariances] = self.kalman_smoother.build_graph(
-            T, R, Q, filtered_states, filtered_covariances, mode=self._fit_mode
+            T, R, Q, filtered_states, filtered_covariances
         )
 
         grouped_outputs = [
@@ -1164,6 +1187,9 @@ def _sample_conditional(
         _verify_group(group)
         group_idata = getattr(idata, group)
 
+        compile_kwargs = kwargs.pop("compile_kwargs", {})
+        compile_kwargs.setdefault("mode", self.mode)
+
         with pm.Model(coords=self._fit_coords) as forward_model:
             (
                 [
@@ -1229,8 +1255,8 @@ def _sample_conditional(
                     for name in FILTER_OUTPUT_TYPES
                     for suffix in ["", "_observed"]
                 ],
-                compile_kwargs={"mode": get_mode(self._fit_mode)},
                 random_seed=random_seed,
+                compile_kwargs=compile_kwargs,
                 **kwargs,
             )
 
@@ -1296,6 +1322,10 @@ def _sample_unconditional(
               the latent state trajectories: `y[t] = Z @ x[t] + nu[t]`, where `nu ~ N(0, H)`.
         """
         _verify_group(group)
+
+        compile_kwargs = kwargs.pop("compile_kwargs", {})
+        compile_kwargs.setdefault("mode", self.mode)
+
         group_idata = getattr(idata, group)
         dims = None
         temp_coords = self._fit_coords.copy()
@@ -1338,7 +1368,6 @@ def _sample_unconditional(
                 *matrices,
                 steps=steps,
                 dims=dims,
-                mode=self._fit_mode,
                 method=mvn_method,
                 sequence_names=self.kalman_filter.seq_names,
                 k_endog=self.k_endog,
@@ -1354,8 +1383,8 @@ def _sample_unconditional(
             idata_unconditional = pm.sample_posterior_predictive(
                 group_idata,
                 var_names=[f"{group}_latent", f"{group}_observed"],
-                compile_kwargs={"mode": self._fit_mode},
                 random_seed=random_seed,
+                compile_kwargs=compile_kwargs,
                 **kwargs,
             )
 
@@ -1583,7 +1612,7 @@ def sample_unconditional_posterior(
         )
 
     def sample_statespace_matrices(
-        self, idata, matrix_names: str | list[str] | None, group: str = "posterior"
+        self, idata, matrix_names: str | list[str] | None, group: str = "posterior", **kwargs
     ):
         """
         Draw samples of requested statespace matrices from provided idata
@@ -1600,12 +1629,18 @@ def sample_statespace_matrices(
         group: str, one of "posterior" or "prior"
             Whether to sample from priors or posteriors
 
+        kwargs:
+            Additional keyword arguments are passed to ``pymc.sample_posterior_predictive``
+
         Returns
         -------
         idata_matrices: az.InterenceData
         """
         _verify_group(group)
 
+        compile_kwargs = kwargs.pop("compile_kwargs", {})
+        compile_kwargs.setdefault("mode", self.mode)
+
         if matrix_names is None:
             matrix_names = MATRIX_NAMES
         elif isinstance(matrix_names, str):
@@ -1636,8 +1671,9 @@ def sample_statespace_matrices(
             matrix_idata = pm.sample_posterior_predictive(
                 idata if group == "posterior" else idata.prior,
                 var_names=matrix_names,
-                compile_kwargs={"mode": self._fit_mode},
                 extend_inferencedata=False,
+                compile_kwargs=compile_kwargs,
+                **kwargs,
             )
 
         return matrix_idata
@@ -2106,6 +2142,10 @@ def forecast(
         filter_time_dim = TIME_DIM
 
         _validate_filter_arg(filter_output)
+
+        compile_kwargs = kwargs.pop("compile_kwargs", {})
+        compile_kwargs.setdefault("mode", self.mode)
+
         time_index = self._get_fit_time_index()
 
         if start is None and verbose:
@@ -2192,7 +2232,6 @@ def forecast(
                 *matrices,
                 steps=len(forecast_index),
                 dims=dims,
-                mode=self._fit_mode,
                 sequence_names=self.kalman_filter.seq_names,
                 k_endog=self.k_endog,
                 append_x0=False,
@@ -2208,8 +2247,8 @@ def forecast(
             idata_forecast = pm.sample_posterior_predictive(
                 idata,
                 var_names=["forecast_latent", "forecast_observed"],
-                compile_kwargs={"mode": self._fit_mode},
                 random_seed=random_seed,
+                compile_kwargs=compile_kwargs,
                 **kwargs,
             )
 
@@ -2297,6 +2336,9 @@ def impulse_response_function(
         n_options = sum(x is not None for x in options)
         Q = None  # No covariance matrix needed if a trajectory is provided. Will be overwritten later if needed.
 
+        compile_kwargs = kwargs.pop("compile_kwargs", {})
+        compile_kwargs.setdefault("mode", self.mode)
+
         if n_options > 1:
             raise ValueError("Specify exactly 0 or 1 of shock_size, shock_cov, or shock_trajectory")
         elif n_options == 1:
@@ -2368,29 +2410,15 @@ def irf_step(shock, x, c, T, R):
                 non_sequences=[c, T, R],
                 n_steps=n_steps,
                 strict=True,
-                mode=self._fit_mode,
             )
 
             pm.Deterministic("irf", irf, dims=[TIME_DIM, ALL_STATE_DIM])
 
-            compile_kwargs = kwargs.get("compile_kwargs", {})
-            if "mode" not in compile_kwargs.keys():
-                compile_kwargs = {"mode": self._fit_mode}
-            else:
-                mode = compile_kwargs.get("mode")
-                if mode is not None and mode != self._fit_mode:
-                    raise ValueError(
-                        f"User provided compile mode ({mode}) does not match the compile mode used to "
-                        f"construct the model ({self._fit_mode})."
-                    )
-
-                compile_kwargs.update({"mode": self._fit_mode})
-
             irf_idata = pm.sample_posterior_predictive(
                 idata,
                 var_names=["irf"],
-                compile_kwargs=compile_kwargs,
                 random_seed=random_seed,
+                compile_kwargs=compile_kwargs,
                 **kwargs,
             )
 

diff --git a/pymc_extras/statespace/filters/distributions.py b/pymc_extras/statespace/filters/distributions.py
@@ -69,7 +69,6 @@ def __new__(
         H,
         Q,
         steps=None,
-        mode=None,
         sequence_names=None,
         append_x0=True,
         method="svd",
@@ -98,7 +97,6 @@ def __new__(
             H,
             Q,
             steps=steps,
-            mode=mode,
             sequence_names=sequence_names,
             append_x0=append_x0,
             method=method,
@@ -118,7 +116,6 @@ def dist(
         H,
         Q,
         steps=None,
-        mode=None,
         sequence_names=None,
         append_x0=True,
         method="svd",
@@ -135,7 +132,6 @@ def dist(
 
         return super().dist(
             [a0, P0, c, d, T, Z, R, H, Q, steps],
-            mode=mode,
             sequence_names=sequence_names,
             append_x0=append_x0,
             method=method,
@@ -156,7 +152,6 @@ def rv_op(
         Q,
         steps,
         size=None,
-        mode=None,
         sequence_names=None,
         append_x0=True,
         method="svd",
@@ -240,7 +235,6 @@ def step_fn(*args):
             sequences=None if len(sequences) == 0 else sequences,
             non_sequences=[*non_sequences, rng],
             n_steps=steps,
-            mode=mode,
             strict=True,
         )
 
@@ -284,7 +278,6 @@ def __new__(
         steps,
         k_endog=None,
         sequence_names=None,
-        mode=None,
         append_x0=True,
         method="svd",
         **kwargs,
@@ -313,7 +306,6 @@ def __new__(
             H,
             Q,
             steps=steps,
-            mode=mode,
             sequence_names=sequence_names,
             append_x0=append_x0,
             method=method,