stan-dev · aleksgorica · Apr 23, 2024 · Jan 30, 2024 · Jan 31, 2024 · Jan 31, 2024
diff --git a/src/stan/analyze/mcmc/compute_potential_scale_reduction.hpp b/src/stan/analyze/mcmc/compute_potential_scale_reduction.hpp
@@ -8,6 +8,7 @@
 #include <boost/accumulators/statistics/stats.hpp>
 #include <boost/accumulators/statistics/mean.hpp>
 #include <boost/accumulators/statistics/variance.hpp>
+#include <boost/math/distributions/normal.hpp>
 #include <algorithm>
 #include <cmath>
 #include <vector>
@@ -16,6 +17,150 @@
 namespace stan {
 namespace analyze {
 
+/**
+ * Computes normalized average ranks for draws. Transforming them to normal
+ * scores using inverse normal transformation and a fractional offset. Based on
+ * paper https://arxiv.org/abs/1903.08008
+ * @param draws stores chains in columns
+ * @return normal scores for average ranks of draws
+ *
+ */
+
-
-
+Eigen::MatrixXd rank_transform(const Eigen::MatrixXd& draws) {
-Eigen::MatrixXd rank_transform(const Eigen::MatrixXd& draws) {
+inline Eigen::MatrixXd rank_transform(const Eigen::MatrixXd& draws) {
-Eigen::MatrixXd rank_transform(const Eigen::MatrixXd& draws) {
+inline Eigen::MatrixXd rank_transform(const Eigen::MatrixXd& draws) {
+  const Eigen::Index rows = draws.rows();
+  const Eigen::Index cols = draws.cols();
+  const Eigen::Index size = rows * cols;
+
+  std::vector<std::pair<double, int>> value_with_index(size);
+
+  for (Eigen::Index i = 0; i < size; ++i) {
+    value_with_index[i] = {draws(i), i};
+  }
+
+  std::sort(value_with_index.begin(), value_with_index.end());
+
+  Eigen::MatrixXd rankMatrix = Eigen::MatrixXd::Zero(rows, cols);
-  Eigen::MatrixXd rankMatrix = Eigen::MatrixXd::Zero(rows, cols);
+  Eigen::MatrixXd rank_matrix = Eigen::MatrixXd::Zero(rows, cols);
-  Eigen::MatrixXd rankMatrix = Eigen::MatrixXd::Zero(rows, cols);
+  Eigen::MatrixXd rank_matrix = Eigen::MatrixXd::Zero(rows, cols);
+
+  // Assigning average ranks
+  for (Eigen::Index i = 0; i < size; ++i) {
+    // Handle ties by averaging ranks
+    Eigen::Index j = i + 1;
+    double sumRanks = j;
+    Eigen::Index count = 1;
+
+    while (j < size && value_with_index[j].first == value_with_index[i].first) {
+      sumRanks += j + 1;  // Rank starts from 1
+      ++j;
+      ++count;
+    }
+    double avgRank = sumRanks / count;
+    boost::math::normal_distribution<double> dist;
+    for (std::size_t k = i; k < j; ++k) {
+      Eigen::Index index = value_with_index[k].second;
+      double p = (avgRank - 3.0 / 8.0) / (size - 2.0 * 3.0 / 8.0 + 1.0);
+      rankMatrix(index) = boost::math::quantile(dist, p);
-      Eigen::Index index = value_with_index[k].second;
-      double p = (avgRank - 3.0 / 8.0) / (size - 2.0 * 3.0 / 8.0 + 1.0);
-      rankMatrix(index) = boost::math::quantile(dist, p);
+      double p = (avgRank - 3.0 / 8.0) / (size - 2.0 * 3.0 / 8.0 + 1.0);
+      const Eigen::Index index = value_with_index[k].second;
+      rankMatrix(index) = boost::math::quantile(dist, p);
-      Eigen::Index index = value_with_index[k].second;
-      double p = (avgRank - 3.0 / 8.0) / (size - 2.0 * 3.0 / 8.0 + 1.0);
-      rankMatrix(index) = boost::math::quantile(dist, p);
+      double p = (avgRank - 3.0 / 8.0) / (size - 2.0 * 3.0 / 8.0 + 1.0);
+      const Eigen::Index index = value_with_index[k].second;
+      rankMatrix(index) = boost::math::quantile(dist, p);
+    }
+    i = j - 1;  // Skip over tied elements
+  }
+  return rankMatrix;
+}
+
+/**
+ * Computes square root of marginal posterior variance of the estimand by the
+ * weigted average of within-chain variance W and between-chain variance B.
+ *
+ * @param draws stores chains in columns
+ * @return square root of ((N-1)/N)W + B/N
+ *
+ */
+
+inline double rhat(const Eigen::MatrixXd& draws) {
-inline double rhat(const Eigen::MatrixXd& draws) {
+inline double rhat(const Eigen::MatrixXd& chains) {
-inline double rhat(const Eigen::MatrixXd& draws) {
+inline double rhat(const Eigen::MatrixXd& chains) {
+  const Eigen::Index num_chains = draws.cols();
+  const Eigen::Index num_draws = draws.rows();
+
+  Eigen::VectorXd chain_mean(num_chains);
+  chain_mean = draws.colwise().mean();
-  Eigen::VectorXd chain_mean(num_chains);
-  chain_mean = draws.colwise().mean();
+  Eigen::VectorXd within_chain_mean  = draws.colwise().mean();
-  Eigen::VectorXd chain_mean(num_chains);
-  chain_mean = draws.colwise().mean();
+  Eigen::VectorXd within_chain_mean  = draws.colwise().mean();
+  double total_mean = chain_mean.mean();
-  double total_mean = chain_mean.mean();
+  double across_chain_mean = chain_mean.mean();
-  double total_mean = chain_mean.mean();
+  double across_chain_mean = chain_mean.mean();
+  double var_between = num_draws
+                       * (chain_mean.array() - total_mean).square().sum()
+                       / (num_chains - 1);
+  double var_sum = 0;
+  for (Eigen::Index col = 0; col < num_chains; ++col) {
+    var_sum += (draws.col(col).array() - chain_mean(col)).square().sum()
+               / (num_draws - 1);
+  }
+  double var_within = var_sum / num_chains;
-  double var_sum = 0;
-  for (Eigen::Index col = 0; col < num_chains; ++col) {
-    var_sum += (draws.col(col).array() - chain_mean(col)).square().sum()
-               / (num_draws - 1);
-  }
-  double var_within = var_sum / num_chains;
+  double within_variance =
+     // Divide each row by chains and get sum of squares for each chain (getting a vector back)
+      ((chains.array().rowwise() - chain_means.array())
+           .square()
+           .colwise()
+           .sum() /
+       // divide each sum of square by num_draws, sum the sum of squares, and divide by num chains
+       (num_draws - 1.0))
+          .sum() / num_chains;
+
-  double var_sum = 0;
-  for (Eigen::Index col = 0; col < num_chains; ++col) {
-    var_sum += (draws.col(col).array() - chain_mean(col)).square().sum()
-               / (num_draws - 1);
-  }
-  double var_within = var_sum / num_chains;
+  double within_variance =
+     // Divide each row by chains and get sum of squares for each chain (getting a vector back)
+      ((chains.array().rowwise() - chain_means.array())
+           .square()
+           .colwise()
+           .sum() /
+       // divide each sum of square by num_draws, sum the sum of squares, and divide by num chains
+       (num_draws - 1.0))
+          .sum() / num_chains;
+
+  return sqrt((var_between / var_within + num_draws - 1) / num_draws);
+}
+
+/**
+ * Computes the potential scale reduction (Rhat) using rank based diagnostic for
+ * the specified parameter across all kept samples. Based on paper
+ * https://arxiv.org/abs/1903.08008
+ *
+ * Current implementation assumes draws are stored in contiguous
+ * blocks of memory.  Chains are trimmed from the back to match the
+ * length of the shortest chain.
+ *
+ * @param draws stores pointers to arrays of chains
+ * @param sizes stores sizes of chains
+ * @return potential scale reduction for the specified parameter
+ */
+inline double compute_potential_scale_reduction_rank(
+    std::vector<const double*> draws, std::vector<size_t> sizes) {
-    std::vector<const double*> draws, std::vector<size_t> sizes) {
+    std::vector<const double*> chain_begins, std::vector<size_t> chain_sizes) {
-    std::vector<const double*> draws, std::vector<size_t> sizes) {
+    std::vector<const double*> chain_begins, std::vector<size_t> chain_sizes) {
+  int num_chains = sizes.size();
-  int num_chains = sizes.size();
+  std::size_t num_chains = sizes.size();
-  int num_chains = sizes.size();
+  std::size_t num_chains = sizes.size();
+  size_t num_draws = sizes[0];
+  if (num_draws == 0) {
+    return std::numeric_limits<double>::quiet_NaN();
+  }
+  for (int chain = 1; chain < num_chains; ++chain) {
+    num_draws = std::min(num_draws, sizes[chain]);
+  }
+
+  // check if chains are constant; all equal to first draw's value
+  bool are_all_const = false;
+  Eigen::VectorXd init_draw = Eigen::VectorXd::Zero(num_chains);
+
+  for (int chain = 0; chain < num_chains; chain++) {
+    Eigen::Map<const Eigen::Matrix<double, Eigen::Dynamic, 1>> draw(
+        draws[chain], sizes[chain]);
-    Eigen::Map<const Eigen::Matrix<double, Eigen::Dynamic, 1>> draw(
-        draws[chain], sizes[chain]);
+    Eigen::Map<const Eigen::Matrix<double, Eigen::Dynamic, 1>> draws(
+        nonzero_chain_begins[chain], nonzero_chain_sizes[chain]);
-    Eigen::Map<const Eigen::Matrix<double, Eigen::Dynamic, 1>> draw(
-        draws[chain], sizes[chain]);
+    Eigen::Map<const Eigen::Matrix<double, Eigen::Dynamic, 1>> draws(
+        nonzero_chain_begins[chain], nonzero_chain_sizes[chain]);
+
+    for (int n = 0; n < num_draws; n++) {
+      if (!std::isfinite(draw(n))) {
+        return std::numeric_limits<double>::quiet_NaN();
+      }
+    }
+
+    init_draw(chain) = draw(0);
+
+    if (draw.isApproxToConstant(draw(0))) {
+      are_all_const |= true;
+    }
+  }
+
+  if (are_all_const) {
+    // If all chains are constant then return NaN
+    // if they all equal the same constant value
+    if (init_draw.isApproxToConstant(init_draw(0))) {
+      return std::numeric_limits<double>::quiet_NaN();
+    }
+  }
+
+  Eigen::MatrixXd matrix(num_draws, num_chains);
-  Eigen::MatrixXd matrix(num_draws, num_chains);
+  Eigen::MatrixXd draws_matrix(num_draws, num_chains);
-  Eigen::MatrixXd matrix(num_draws, num_chains);
+  Eigen::MatrixXd draws_matrix(num_draws, num_chains);
+
+  for (int col = 0; col < num_chains; ++col) {
+    for (int row = 0; row < num_draws; ++row) {
+      matrix(row, col) = draws[col][row];
+    }
+  }
-  for (int col = 0; col < num_chains; ++col) {
-    for (int row = 0; row < num_draws; ++row) {
-      matrix(row, col) = draws[col][row];
-    }
-  }
-  for (int col = 0; col < num_chains; ++col) {
-    for (int row = 0; row < num_draws; ++row) {
-      matrix(row, col) = draws[col][row];
-    }
-  }
+
+  double rhat_bulk = rhat(rank_transform(matrix));
+  double rhat_tail = rhat(rank_transform(
+      (matrix.array() - math::quantile(matrix.reshaped(), 0.5)).abs()));
+
+  return std::max(rhat_bulk, rhat_tail);
+}
+
 /**
  * Computes the potential scale reduction (Rhat) for the specified
  * parameter across all kept samples.
@@ -31,6 +176,7 @@ namespace analyze {
  * @param sizes stores sizes of chains
  * @return potential scale reduction for the specified parameter
  */
+
 inline double compute_potential_scale_reduction(
     std::vector<const double*> draws, std::vector<size_t> sizes) {
   int num_chains = sizes.size();
@@ -71,34 +217,39 @@ inline double compute_potential_scale_reduction(
     }
   }
 
-  using boost::accumulators::accumulator_set;
-  using boost::accumulators::stats;
-  using boost::accumulators::tag::mean;
-  using boost::accumulators::tag::variance;
+  Eigen::MatrixXd matrix(num_draws, num_chains);
 
-  Eigen::VectorXd chain_mean(num_chains);
-  accumulator_set<double, stats<variance>> acc_chain_mean;
-  Eigen::VectorXd chain_var(num_chains);
-  double unbiased_var_scale = num_draws / (num_draws - 1.0);
-
-  for (int chain = 0; chain < num_chains; ++chain) {
-    accumulator_set<double, stats<mean, variance>> acc_draw;
-    for (int n = 0; n < num_draws; ++n) {
-      acc_draw(draws[chain][n]);
+  for (int col = 0; col < num_chains; ++col) {
+    for (int row = 0; row < num_draws; ++row) {
+      matrix(row, col) = draws[col][row];
     }
-
-    chain_mean(chain) = boost::accumulators::mean(acc_draw);
-    acc_chain_mean(chain_mean(chain));
-    chain_var(chain)
-        = boost::accumulators::variance(acc_draw) * unbiased_var_scale;
   }
 
-  double var_between = num_draws * boost::accumulators::variance(acc_chain_mean)
-                       * num_chains / (num_chains - 1);
-  double var_within = chain_var.mean();
+  return rhat(matrix);
+}
 
-  // rewrote [(n-1)*W/n + B/n]/W as (n-1+ B/W)/n
-  return sqrt((var_between / var_within + num_draws - 1) / num_draws);
+/**
+ * Computes the potential scale reduction (Rhat) using rank based diagnostic for
+ * the specified parameter across all kept samples. Based on paper
+ * https://arxiv.org/abs/1903.08008
+ *
+ * See more details in Stan reference manual section "Potential
+ * Scale Reduction". http://mc-stan.org/users/documentation
+ *
+ * Current implementation assumes draws are stored in contiguous
+ * blocks of memory.  Chains are trimmed from the back to match the
+ * length of the shortest chain.  Argument size will be broadcast to
+ * same length as draws.
+ *
+ * @param draws stores pointers to arrays of chains
+ * @param size stores sizes of chains
+ * @return potential scale reduction for the specified parameter
+ */
+inline double compute_potential_scale_reduction_rank(
+    std::vector<const double*> draws, size_t size) {
+  int num_chains = draws.size();
+  std::vector<size_t> sizes(num_chains, size);
+  return compute_potential_scale_reduction_rank(draws, sizes);
 }
 
 /**
@@ -124,6 +275,40 @@ inline double compute_potential_scale_reduction(
   return compute_potential_scale_reduction(draws, sizes);
 }
 
+/**
+ * Computes the potential scale reduction (Rhat) using rank based diagnostic for
+ * the specified parameter across all kept samples. Based on paper
+ * https://arxiv.org/abs/1903.08008
+ *
+ * When the number of total draws N is odd, the (N+1)/2th draw is ignored.
+ *
+ * See more details in Stan reference manual section "Potential
+ * Scale Reduction". http://mc-stan.org/users/documentation
+ *
+ * Current implementation assumes draws are stored in contiguous
+ * blocks of memory.  Chains are trimmed from the back to match the
+ * length of the shortest chain.
+ *
+ * @param draws stores pointers to arrays of chains
+ * @param sizes stores sizes of chains
+ * @return potential scale reduction for the specified parameter
+ */
+inline double compute_split_potential_scale_reduction_rank(
+    std::vector<const double*> draws, std::vector<size_t> sizes) {
-inline double compute_split_potential_scale_reduction_rank(
-    std::vector<const double*> draws, std::vector<size_t> sizes) {
+inline double compute_split_potential_scale_reduction_rank(
+    const std::vector<const double*>& draws, const std::vector<size_t>& sizes) {
-inline double compute_split_potential_scale_reduction_rank(
-    std::vector<const double*> draws, std::vector<size_t> sizes) {
+inline double compute_split_potential_scale_reduction_rank(
+    const std::vector<const double*>& draws, const std::vector<size_t>& sizes) {
+  int num_chains = sizes.size();
+  size_t num_draws = sizes[0];
+  for (int chain = 1; chain < num_chains; ++chain) {
+    num_draws = std::min(num_draws, sizes[chain]);
+  }
+
+  std::vector<const double*> split_draws = split_chains(draws, sizes);
+
+  double half = num_draws / 2.0;
+  std::vector<size_t> half_sizes(2 * num_chains, std::floor(half));
-  double half = num_draws / 2.0;
-  std::vector<size_t> half_sizes(2 * num_chains, std::floor(half));
+  std::size_thalf = std::floor(num_draws / 2.0);
+  std::vector<size_t> half_sizes(2 * num_chains, half);
-  double half = num_draws / 2.0;
-  std::vector<size_t> half_sizes(2 * num_chains, std::floor(half));
+  std::size_thalf = std::floor(num_draws / 2.0);
+  std::vector<size_t> half_sizes(2 * num_chains, half);
+
+  return compute_potential_scale_reduction_rank(split_draws, half_sizes);
+}
+
 /**
  * Computes the split potential scale reduction (Rhat) for the
  * specified parameter across all kept samples.  When the number of
@@ -156,6 +341,32 @@ inline double compute_split_potential_scale_reduction(
   return compute_potential_scale_reduction(split_draws, half_sizes);
 }
 
+/**
+ * Computes the potential scale reduction (Rhat) using rank based diagnostic for
+ * the specified parameter across all kept samples. Based on paper
+ * https://arxiv.org/abs/1903.08008
+ *
+ * When the number of total draws N is odd, the (N+1)/2th draw is ignored.
+ *
+ * See more details in Stan reference manual section "Potential
+ * Scale Reduction". http://mc-stan.org/users/documentation
+ *
+ * Current implementation assumes draws are stored in contiguous
+ * blocks of memory.  Chains are trimmed from the back to match the
+ * length of the shortest chain.  Argument size will be broadcast to
+ * same length as draws.
+ *
+ * @param draws stores pointers to arrays of chains
+ * @param size stores sizes of chains
+ * @return potential scale reduction for the specified parameter
+ */
+inline double compute_split_potential_scale_reduction_rank(
+    std::vector<const double*> draws, size_t size) {
+  int num_chains = draws.size();
+  std::vector<size_t> sizes(num_chains, size);
+  return compute_split_potential_scale_reduction_rank(draws, sizes);
+}
+
 /**
  * Computes the split potential scale reduction (Rhat) for the
  * specified parameter across all kept samples.  When the number of

diff --git a/src/stan/mcmc/chains.hpp b/src/stan/mcmc/chains.hpp
@@ -595,6 +595,21 @@ class chains {
     return split_effective_sample_size(index(name));
   }
 
+  double split_potential_scale_reduction_rank(const int index) const {
+    int n_chains = num_chains();
+    std::vector<const double*> draws(n_chains);
+    std::vector<size_t> sizes(n_chains);
+    int n_kept_samples = 0;
+    for (int chain = 0; chain < n_chains; ++chain) {
+      n_kept_samples = num_kept_samples(chain);
+      draws[chain]
+          = samples_(chain).col(index).bottomRows(n_kept_samples).data();
+      sizes[chain] = n_kept_samples;
+    }
+
+    return analyze::compute_split_potential_scale_reduction_rank(draws, sizes);
+  }
+
   double split_potential_scale_reduction(const int index) const {
     int n_chains = num_chains();
     std::vector<const double*> draws(n_chains);
@@ -610,6 +625,10 @@ class chains {
     return analyze::compute_split_potential_scale_reduction(draws, sizes);
   }
 
+  double split_potential_scale_reduction_rank(const std::string& name) const {
+    return split_potential_scale_reduction_rank(index(name));
+  }
+
   double split_potential_scale_reduction(const std::string& name) const {
     return split_potential_scale_reduction(index(name));
   }