add ess, remove multiprocessing

aloctavodia · aloctavodia · commit 1da02c41c8f9 · 2020-06-24T08:13:52.000-03:00
diff --git a/pymc3/smc/sample_smc.py b/pymc3/smc/sample_smc.py
@@ -18,19 +18,16 @@
 
 
 def sample_smc(
-    draws=1000,
+    draws=2000,
     kernel="metropolis",
     n_steps=25,
-    parallel=False,
     start=None,
-    cores=None,
     tune_steps=True,
     p_acc_rate=0.99,
     threshold=0.5,
     epsilon=1.0,
     dist_func="gaussian_kernel",
     sum_stat="identity",
-    progressbar=False,
     model=None,
     random_seed=-1,
 ):
@@ -49,15 +46,9 @@ def sample_smc(
         The number of steps of each Markov Chain. If ``tune_steps == True`` ``n_steps`` will be used
         for the first stage and for the others it will be determined automatically based on the
         acceptance rate and `p_acc_rate`, the max number of steps is ``n_steps``.
-    parallel: bool
-        Distribute computations across cores if the number of cores is larger than 1.
-        Defaults to False.
     start: dict, or array of dict
         Starting point in parameter space. It should be a list of dict with length `chains`.
         When None (default) the starting point is sampled from the prior distribution. 
-    cores: int
-        The number of chains to run in parallel. If ``None`` (default), it will be automatically
-        set to the number of CPUs in the system.
     tune_steps: bool
         Whether to compute the number of steps automatically or not. Defaults to True
     p_acc_rate: float
@@ -75,8 +66,6 @@ def sample_smc(
     sum_stat: str or callable
         Summary statistics. Available options are ``indentity``, ``sorted``, ``mean``, ``median``.
         If a callable is based it should return a number or a 1d numpy array.
-    progressbar: bool
-        Flag for displaying a progress bar. Defaults to False.
     model: Model (optional if in ``with`` context)).
     random_seed: int
         random seed
@@ -130,16 +119,13 @@ def sample_smc(
         draws=draws,
         kernel=kernel,
         n_steps=n_steps,
-        parallel=parallel,
         start=start,
-        cores=cores,
         tune_steps=tune_steps,
         p_acc_rate=p_acc_rate,
         threshold=threshold,
         epsilon=epsilon,
         dist_func=dist_func,
         sum_stat=sum_stat,
-        progressbar=progressbar,
         model=model,
         random_seed=random_seed,
     )
@@ -159,19 +145,16 @@ def sample_smc(
                 stage, smc.beta, smc.n_steps, smc.acc_rate
             )
         )
-        smc.resample()
         smc.update_proposal()
-        if stage > 0:
+        smc.resample()
+        for _ in range(2):
+            smc.mutate()
             smc.tune()
-        smc.mutate()
         stage += 1
 
-    if smc.parallel and smc.cores > 1:
-        smc.pool.close()
-        smc.pool.join()
-
     trace = smc.posterior_to_trace()
     trace.report._n_draws = smc.draws
     trace.report._n_tune = 0
     trace.report._t_sampling = time.time() - t1
+    trace.report.ess = smc.ess
     return trace
diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py
@@ -16,19 +16,14 @@
 
 import numpy as np
 from scipy.special import logsumexp
-from fastprogress.fastprogress import progress_bar
-import multiprocessing as mp
 import warnings
 from theano import function as theano_function
+from arviz import psislw
 
 from ..model import modelcontext, Point
 from ..parallel_sampling import _cpu_count
-from ..theanof import inputvars, make_shared_replacements
-from ..vartypes import discrete_types
+from ..theanof import floatX, inputvars, make_shared_replacements, join_nonshared_inputs
 from ..sampling import sample_prior_predictive
-from ..theanof import floatX, join_nonshared_inputs
-from ..step_methods.arraystep import metrop_select
-from ..step_methods.metropolis import MultivariateNormalProposal
 from ..backends.ndarray import NDArray
 from ..backends.base import MultiTrace
 
@@ -41,36 +36,30 @@
 class SMC:
     def __init__(
         self,
-        draws=1000,
+        draws=2000,
         kernel="metropolis",
         n_steps=25,
-        parallel=False,
         start=None,
-        cores=None,
         tune_steps=True,
         p_acc_rate=0.99,
         threshold=0.5,
         epsilon=1.0,
         dist_func="absolute_error",
         sum_stat="Identity",
-        progressbar=False,
         model=None,
         random_seed=-1,
     ):
 
         self.draws = draws
         self.kernel = kernel
         self.n_steps = n_steps
-        self.parallel = parallel
         self.start = start
-        self.cores = cores
         self.tune_steps = tune_steps
         self.p_acc_rate = p_acc_rate
         self.threshold = threshold
         self.epsilon = epsilon
         self.dist_func = dist_func
         self.sum_stat = sum_stat
-        self.progressbar = progressbar
         self.model = model
         self.random_seed = random_seed
 
@@ -79,23 +68,16 @@ def __init__(
         if self.random_seed != -1:
             np.random.seed(self.random_seed)
 
-        if self.cores is None:
-            self.cores = _cpu_count()
-
         self.beta = 0
         self.max_steps = n_steps
         self.proposed = draws * n_steps
         self.acc_rate = 1
         self.acc_per_chain = np.ones(self.draws)
-        self.model.marginal_log_likelihood = 0
+        self.model.log_marginal_likelihood = 0
         self.variables = inputvars(self.model.vars)
         self.dimension = sum(v.dsize for v in self.variables)
-        self.scalings = np.ones(self.draws) * min(1, 2.38 ** 2 / self.dimension)
-        self.discrete = np.concatenate(
-            [[v.dtype in discrete_types] * (v.dsize or 1) for v in self.variables]
-        )
-        self.any_discrete = self.discrete.any()
-        self.all_discrete = self.discrete.all()
+        self.scalings = np.ones(self.draws) * 2.38 / (self.dimension) ** 0.5
+        self.weights = np.ones(self.draws) / self.draws
 
     def initialize_population(self):
         """
@@ -153,17 +135,8 @@ def initialize_logp(self):
         """
         initialize the prior and likelihood log probabilities
         """
-        if self.parallel and self.cores > 1:
-            self.pool = mp.Pool(processes=self.cores)
-            priors = self.pool.starmap(
-                self.prior_logp_func, [(sample,) for sample in self.posterior]
-            )
-            likelihoods = self.pool.starmap(
-                self.likelihood_logp_func, [(sample,) for sample in self.posterior]
-            )
-        else:
-            priors = [self.prior_logp_func(sample) for sample in self.posterior]
-            likelihoods = [self.likelihood_logp_func(sample) for sample in self.posterior]
+        priors = [self.prior_logp_func(sample) for sample in self.posterior]
+        likelihoods = [self.likelihood_logp_func(sample) for sample in self.posterior]
 
         self.prior_logp = np.array(priors).squeeze()
         self.likelihood_logp = np.array(likelihoods).squeeze()
@@ -192,11 +165,9 @@ def update_weights_beta(self):
             new_beta = 1
             log_weights_un = (new_beta - old_beta) * self.likelihood_logp
             log_weights = log_weights_un - logsumexp(log_weights_un)
+            self.ess = np.exp(-logsumexp(log_weights * 2))
 
-        ll_max = np.max(log_weights_un)
-        self.model.marginal_log_likelihood += ll_max + np.log(
-            np.exp(log_weights_un - ll_max).mean()
-        )
+        self.model.log_marginal_likelihood += logsumexp(log_weights_un) - np.log(self.draws)
         self.beta = new_beta
         self.weights = np.exp(log_weights)
 
@@ -218,13 +189,12 @@ def update_proposal(self):
         """
         Update proposal based on the covariance matrix from tempered posterior
         """
-        cov = np.cov(self.posterior, bias=False, rowvar=0)
+        cov = np.cov(self.posterior, ddof=0, aweights=self.weights, rowvar=0)
         cov = np.atleast_2d(cov)
         cov += 1e-6 * np.eye(cov.shape[0])
         if np.isnan(cov).any() or np.isinf(cov).any():
             raise ValueError('Sample covariances not valid! Likely "draws" is too small!')
         self.cov = cov
-        self.proposal = MultivariateNormalProposal(cov)
 
     def tune(self):
         """
@@ -244,8 +214,8 @@ def tune(self):
         self.proposed = self.draws * self.n_steps
 
     def mutate(self):
-
         ac_ = np.empty((self.n_steps, self.draws))
+
         proposals = (
             np.random.multivariate_normal(
                 np.zeros(self.dimension), self.cov, size=(self.n_steps, self.draws)