Match statsmodels implementation

jessegrabowski · jessegrabowski · commit 9f3c480d304b · 2024-09-23T19:43:59.000+08:00
Add direct and transformed parameterizations
diff --git a/pymc_experimental/statespace/models/ETS.py b/pymc_experimental/statespace/models/ETS.py
@@ -67,52 +67,65 @@ class BayesianETS(PyMCStateSpace):
 
             \begin{align}
             y_t &= l_{t-1} + b_{t-1} + \epsilon_t \\
-            l_t &= l_{t-1} + \alpha \epsilon_t \\
-            b_t &= b_{t-1} + \beta \epsilon_t
+            l_t &= l_{t-1} + b_{t-1} + \alpha \epsilon_t \\
+            b_t &= b_{t-1} + \alpha \beta^\star \epsilon_t
             \end{align}
 
+        [1]_ also consider an alternative parameterization with :math:`\beta = \alpha \beta^\star`.
+
     * `ETS(A,N,A)`: Additive seasonal method
 
         .. math::
 
             \begin{align}
             y_t &= l_{t-1} + s_{t-m} + \epsilon_t \\
             l_t &= l_{t-1} + \alpha \epsilon_t \\
-            s_t &= s_{t-m} + \gamma \epsilon_t
+            s_t &= s_{t-m} + (1 - \alpha)\gamma^\star \epsilon_t
             \end{align}
 
+        [1]_ also consider an alternative parameterization with :math:`\gamma = (1 - \alpha) \gamma^\star`.
+
     * `ETS(A,A,A)`: Additive Holt-Winters method
 
         .. math::
 
             \begin{align}
             y_t &= l_{t-1} + b_{t-1} + s_{t-m} + \epsilon_t \\
             l_t &= l_{t-1} + \alpha \epsilon_t \\
-            b_t &= b_{t-1} + \beta \epsilon_t \\
-            s_t &= s_{t-m} + \gamma \epsilon_t
+            b_t &= b_{t-1} + \alpha \beta^\star \epsilon_t \\
+            s_t &= s_{t-m} + (1 - \alpha) \gamma^\star \epsilon_t
             \end{align}
 
+        [1]_ also consider an alternative parameterization with :math:`\beta = \alpha \beta^star` and
+        :math:`\gamma = (1 - \alpha) \gamma^\star`.
+
     * `ETS(A, Ad, N)`: Dampened trend method
 
         .. math::
 
             \begin{align}
             y_t &= l_{t-1} + b_{t-1} + \epsilon_t \\
             l_t &= l_{t-1} + \alpha \epsilon_t \\
-            b_t &= \phi b_{t-1} + \beta \epsilon_t
+            b_t &= \phi b_{t-1} + \alpha \beta^\star \epsilon_t
             \end{align}
 
+        [1]_ also consider an alternative parameterization with :math:`\beta = \alpha \beta^\star`.
+
     * `ETS(A, Ad, A)`: Dampened trend with seasonal method
 
         .. math::
 
             \begin{align}
             y_t &= l_{t-1} + b_{t-1} + s_{t-m} + \epsilon_t \\
             l_t &= l_{t-1} + \alpha \epsilon_t \\
-            b_t &= \phi b_{t-1} + \beta \epsilon_t \\
-            s_t &= s_{t-m} + \gamma \epsilon_t
+            b_t &= \phi b_{t-1} + \alpha \beta^\star \epsilon_t \\
+            s_t &= s_{t-m} + (1 - \alpha) \gamma^\star \epsilon_t
             \end{align}
 
+        [1]_ also consider an alternative parameterization with :math:`\beta = \alpha \beta^star` and
+        :math:`\gamma = (1 - \alpha) \gamma^\star`.
+
+
     Parameters
     ----------
     endog: pd.DataFrame
@@ -138,6 +151,17 @@ class BayesianETS(PyMCStateSpace):
         The number of periods in a complete seasonal cycle. Ignored if `seasonal` is `False`.
     measurement_error: bool
         Whether to include a measurement error term in the model. Default is `False`.
+    use_transformed_parameterization: bool, default False
+        If true, use the :math:`\alpha, \beta, \gamma` parameterization, otherwise use the :math:`\alpha, \beta^\star,
+        \gamma^\star` parameterization. This will change the admissible region for the priors.
+
+        - Under the **non-transformed** parameterization, all of :math:`\alpha, \beta^\star, \gamma^\star` should be
+          between 0 and 1.
+        - Under the **transformed**  parameterization, :math:`\alpha \in (0, 1)`, :math:`\beta \in (0, \alpha)`, and
+          :math:`\gamma \in (0, 1 - \alpha)`
+
+        The :meth:`param_info` method will change to reflect the suggested intervals based on the value of this
+        argument.
     filter_type: str, default "standard"
         The type of Kalman Filter to use. Options are "standard", "single", "univariate", "steady_state",
         and "cholesky". See the docs for kalman filters for more details.
@@ -157,6 +181,7 @@ def __init__(
         seasonal: bool = False,
         seasonal_periods: int | None = None,
         measurement_error: bool = False,
+        use_transformed_parameterization: bool = False,
         filter_type: str = "standard",
         verbose: bool = True,
     ):
@@ -184,6 +209,7 @@ def __init__(
         self.damped_trend = damped_trend
         self.seasonal = seasonal
         self.seasonal_periods = seasonal_periods
+        self.use_transformed_parameterization = use_transformed_parameterization
 
         if self.seasonal and self.seasonal_periods is None:
             raise ValueError("If seasonal is True, seasonal_periods must be provided.")
@@ -258,15 +284,19 @@ def param_info(self) -> dict[str, dict[str, Any]]:
             },
             "alpha": {
                 "shape": None,
-                "constraints": "0 < Sum(alpha, beta, gamma) < 1",
+                "constraints": "0 < alpha < 1",
             },
             "beta": {
                 "shape": None,
-                "constraints": "0 < Sum(alpha, beta, gamma) < 1",
+                "constraints": "0 < beta < 1"
+                if not self.use_transformed_parameterization
+                else "0 < beta < alpha",
             },
             "gamma": {
                 "shape": None,
-                "constraints": "0 < Sum(alpha, beta, gamma) < 1",
+                "constraints": "0 < gamma< 1"
+                if not self.use_transformed_parameterization
+                else "0 < gamma < (1 - alpha)",
             },
             "phi": {
                 "shape": None,
@@ -342,11 +372,18 @@ def make_symbolic_graph(self) -> None:
 
         # The shape of R can be pre-allocated, then filled with the required parameters
         R = pt.zeros((self.k_states, self.k_posdef))
-        R = pt.set_subtensor(R[0, :], 1.0)  # We will always have y_t = ... + e_t
 
         alpha = self.make_and_register_variable("alpha", shape=(), dtype=floatX)
         R = pt.set_subtensor(R[1, 0], alpha)  # and l_t = ... + alpha * e_t
 
+        # The R[0, 0] entry needs to be adjusted for a shift in the time indices. Consider the (A, N, N) model:
+        # y_t = l_{t-1} + e_t
+        # l_t = l_{t-1} + alpha * e_t
+        # We want the first equation to be in terms of time t on the RHS, because our observation equation is always
+        # y_t = Z @ x_t. Re-arranging equation 2, we get l_{t-1} = l_t - alpha * e_t --> y_t = l_t + e_t - alpha * e_t
+        # --> y_t = l_t + (1 - alpha) * e_t
+        R = pt.set_subtensor(R[0, :], (1 - alpha))
+
         # Shock and level component always exists, the base case is e_t = e_t and l_t = l_{t-1}
         T_base = pt.as_tensor_variable(np.array([[0.0, 0.0], [0.0, 1.0]]))
 
@@ -357,10 +394,12 @@ def make_symbolic_graph(self) -> None:
             self.ssm["initial_state", 2] = initial_trend
 
             beta = self.make_and_register_variable("beta", shape=(), dtype=floatX)
-            R = pt.set_subtensor(R[2, 0], beta)
+            if self.use_transformed_parameterization:
+                R = pt.set_subtensor(R[2, 0], beta)
+            else:
+                R = pt.set_subtensor(R[2, 0], alpha * beta)
 
             # If a trend is requested, we have the following transition equations (omitting the shocks):
-            # y_t = l_{t-1} + b_{t-1}
             # l_t = l_{t-1} + b_{t-1}
             # b_t = b_{t-1}
             T_base = pt.as_tensor_variable(([0.0, 0.0, 0.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0]))
@@ -369,7 +408,6 @@ def make_symbolic_graph(self) -> None:
             phi = self.make_and_register_variable("phi", shape=(), dtype=floatX)
             # We are always in the case where we have a trend, so we can add the dampening parameter to T_base defined
             # in that branch. Transition equations become:
-            # y_t = l_{t-1} + phi * b_{t-1}
             # l_t = l_{t-1} + phi * b_{t-1}
             # b_t = phi * b_{t-1}
             T_base = pt.set_subtensor(T_base[1:, 2], phi)
@@ -384,7 +422,21 @@ def make_symbolic_graph(self) -> None:
             self.ssm["initial_state", 2 + int(self.trend) :] = initial_seasonal
 
             gamma = self.make_and_register_variable("gamma", shape=(), dtype=floatX)
-            R = pt.set_subtensor(R[2 + int(self.trend), 0], gamma)
+
+            if self.use_transformed_parameterization:
+                param = gamma
+            else:
+                param = (1 - alpha) * gamma
+
+            R = pt.set_subtensor(R[2 + int(self.trend), 0], param)
+
+            # Additional adjustment to the R[0, 0] position is required. Start from:
+            # y_t = l_{t-1} + s_{t-m} + e_t
+            # l_t = l_{t-1} + alpha * e_t
+            # s_t = s_{t-m} + gamma * e_t
+            # Solve for l_{t-1} and s_{t-m} in terms of l_t and s_t, then substitute into the observation equation:
+            # y_t = l_t + s_t - alpha * e_t - gamma * e_t + e_t --> y_t = l_t + s_t + (1 - alpha - gamma) * e_t
+            R = pt.set_subtensor(R[0, 0], R[0, 0] - param)
 
             # The seasonal component is always going to look like a TimeFrequency structural component, see that
             # docstring for more details
diff --git a/tests/statespace/test_ETS.py b/tests/statespace/test_ETS.py
@@ -104,20 +104,28 @@ def test_param_info(order: tuple[str, str, str], expected_params):
 
 
 @pytest.mark.parametrize("order, expected_params", zip(orders, order_params), ids=order_names)
-def test_statespace_matrices(order: tuple[str, str, str], expected_params: list[str]):
+@pytest.mark.parametrize("use_transformed", [True, False], ids=["transformed", "untransformed"])
+def test_statespace_matrices(
+    rng, order: tuple[str, str, str], expected_params: list[str], use_transformed: bool
+):
     seasonal_periods = np.random.randint(3, 12)
-    mod = BayesianETS(order=order, seasonal_periods=seasonal_periods, measurement_error=True)
+    mod = BayesianETS(
+        order=order,
+        seasonal_periods=seasonal_periods,
+        measurement_error=True,
+        use_transformed_parameterization=use_transformed,
+    )
     expected_states = 2 + int(order[1] != "N") + int(order[2] != "N") * seasonal_periods
 
     test_values = {
-        "alpha": 0.7,
-        "beta": 0.15,
-        "gamma": 0.15,
-        "phi": 0.95,
-        "sigma_state": 0.1,
-        "sigma_obs": 0.1,
-        "initial_level": 3.0,
-        "initial_trend": 1.0,
+        "alpha": rng.beta(1, 1),
+        "beta": rng.beta(1, 1),
+        "gamma": rng.beta(1, 1),
+        "phi": rng.beta(1, 1),
+        "sigma_state": rng.normal() ** 2,
+        "sigma_obs": rng.normal() ** 2,
+        "initial_level": rng.normal() ** 2,
+        "initial_trend": rng.normal() ** 2,
         "initial_seasonal": np.ones(seasonal_periods),
         "initial_state_cov": np.eye(expected_states),
     }
@@ -145,7 +153,7 @@ def test_statespace_matrices(order: tuple[str, str, str], expected_params: list[
     assert_allclose(Q, np.eye(1) * test_values["sigma_state"] ** 2)
 
     R_val = np.zeros((expected_states, 1))
-    R_val[0] = 1.0
+    R_val[0] = 1.0 - test_values["alpha"]
     R_val[1] = test_values["alpha"]
 
     Z_val = np.zeros((1, expected_states))
@@ -159,15 +167,24 @@ def test_statespace_matrices(order: tuple[str, str, str], expected_params: list[
         T_val = np.array([[0.0, 0.0], [0.0, 1.0]])
     else:
         x0_val[2] = test_values["initial_trend"]
-        R_val[2] = test_values["beta"]
+        R_val[2] = (
+            test_values["beta"] if use_transformed else test_values["beta"] * test_values["alpha"]
+        )
         T_val = np.array([[0.0, 0.0, 0.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0]])
 
     if order[1] == "Ad":
         T_val[1:, -1] *= test_values["phi"]
 
     if order[2] == "A":
         x0_val[2 + int(order[1] != "N") :] = test_values["initial_seasonal"]
-        R_val[2 + int(order[1] != "N")] = test_values["gamma"]
+        gamma = (
+            test_values["gamma"]
+            if use_transformed
+            else (1 - test_values["alpha"]) * test_values["gamma"]
+        )
+        R_val[2 + int(order[1] != "N")] = gamma
+        R_val[0] = R_val[0] - gamma
+
         S = np.eye(seasonal_periods, k=-1)
         S[0, -1] = 1.0
         Z_val[0, 2 + int(order[1] != "N")] = 1.0
@@ -186,7 +203,12 @@ def test_statespace_matrices(order: tuple[str, str, str], expected_params: list[
 def test_statespace_matches_statsmodels(rng, order: tuple[str, str, str], params):
     seasonal_periods = rng.integers(3, 12)
     data = rng.normal(size=(100,))
-    mod = BayesianETS(order=order, seasonal_periods=seasonal_periods, measurement_error=False)
+    mod = BayesianETS(
+        order=order,
+        seasonal_periods=seasonal_periods,
+        measurement_error=False,
+        use_transformed_parameterization=True,
+    )
     sm_mod = sm.tsa.statespace.ExponentialSmoothing(
         data,
         trend=mod.trend,
@@ -232,11 +254,4 @@ def test_statespace_matches_statsmodels(rng, order: tuple[str, str, str], params
     sm_matrices = [sm_mod.ssm[name] for name in LONG_MATRIX_NAMES[2:]]
 
     for matrix, sm_matrix, name in zip(matrices[2:], sm_matrices, LONG_MATRIX_NAMES[2:]):
-        if name == "selection":
-            # statsmodel selection matrix seems to be wrong? They set the first element of the selection matrix to
-            # 1 - sum(alpha, beta, gamma), which doesn't match the equations presented in ffp3
-            assert_allclose(matrix[1:], sm_matrix[1:], err_msg=f"{name} does not match")
-            assert matrix[0] == 1.0
-            assert sm_matrix[0] != 1.0
-        else:
-            assert_allclose(matrix, sm_matrix, err_msg=f"{name} does not match")
+        assert_allclose(matrix, sm_matrix, err_msg=f"{name} does not match")