From 3cef4b6d4b3da035399b19a08447f1e425b90267 Mon Sep 17 00:00:00 2001 From: Zhi Ling <1336265834@qq.com> Date: Mon, 7 Oct 2024 11:18:44 +0800 Subject: [PATCH 1/6] add beta_neg_binomial_lccdf --- stan/math/prim/prob.hpp | 1 + .../prim/prob/beta_neg_binomial_lccdf.hpp | 151 ++++++++++++++++++ .../beta_neg_binomial_ccdf_log_test.hpp | 96 +++++++++++ 3 files changed, 248 insertions(+) create mode 100644 stan/math/prim/prob/beta_neg_binomial_lccdf.hpp create mode 100644 test/prob/beta_neg_binomial/beta_neg_binomial_ccdf_log_test.hpp diff --git a/stan/math/prim/prob.hpp b/stan/math/prim/prob.hpp index 0760c087a1c..14c80636924 100644 --- a/stan/math/prim/prob.hpp +++ b/stan/math/prim/prob.hpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp b/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp new file mode 100644 index 00000000000..876bbe802b4 --- /dev/null +++ b/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp @@ -0,0 +1,151 @@ +#ifndef STAN_MATH_PRIM_PROB_BETA_NEG_BINOMIAL_LCCDF_HPP +#define STAN_MATH_PRIM_PROB_BETA_NEG_BINOMIAL_LCCDF_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace stan { +namespace math { + +/** \ingroup prob_dists + * Returns the log CCDF of the Beta-Negative Binomial distribution with given + * number of successes, prior success, and prior failure parameters. + * Given containers of matching sizes, returns the log sum of probabilities. + * + * @tparam T_n type of failure parameter + * @tparam T_r type of number of successes parameter + * @tparam T_alpha type of prior success parameter + * @tparam T_beta type of prior failure parameter + * + * @param n failure parameter + * @param r Number of successes parameter + * @param alpha prior success parameter + * @param beta prior failure parameter + * @return log probability or log sum of probabilities + * @throw std::domain_error if r, alpha, or beta fails to be positive + * @throw std::invalid_argument if container sizes mismatch + */ +template +inline return_type_t beta_neg_binomial_lccdf( + const T_n& n, const T_r& r, const T_alpha& alpha, const T_beta& beta) { + using std::exp; + using std::log; + using T_partials_return = partials_return_t; + using T_r_ref = ref_type_t; + using T_alpha_ref = ref_type_t; + using T_beta_ref = ref_type_t; + static constexpr const char* function = "beta_neg_binomial_lccdf"; + check_consistent_sizes( + function, "Failures variable", n, "Number of successes parameter", r, + "Prior success parameter", alpha, "Prior failure parameter", beta); + if (size_zero(n, r, alpha, beta)) { + return 0; + } + + T_r_ref r_ref = r; + T_alpha_ref alpha_ref = alpha; + T_beta_ref beta_ref = beta; + check_positive_finite(function, "Number of successes parameter", r_ref); + check_positive_finite(function, "Prior success parameter", alpha_ref); + check_positive_finite(function, "Prior failure parameter", beta_ref); + + T_partials_return P(0.0); + auto ops_partials = make_partials_propagator(r_ref, alpha_ref, beta_ref); + + scalar_seq_view n_vec(n); + scalar_seq_view r_vec(r_ref); + scalar_seq_view alpha_vec(alpha_ref); + scalar_seq_view beta_vec(beta_ref); + size_t size_n = stan::math::size(n); + size_t max_size_seq_view = max_size(n, r, alpha, beta); + + // Explicit return for extreme values + // The gradients are technically ill-defined, but treated as zero + for (size_t i = 0; i < size_n; i++) { + if (n_vec.val(i) < 0) { + return ops_partials.build(0.0); + } + } + + for (size_t i = 0; i < max_size_seq_view; i++) { + // Explicit return for extreme values + // The gradients are technically ill-defined, but treated as zero + if (n_vec.val(i) == std::numeric_limits::max()) { + return ops_partials.build(negative_infinity()); + } + T_partials_return n_dbl = n_vec.val(i); + T_partials_return r_dbl = r_vec.val(i); + T_partials_return alpha_dbl = alpha_vec.val(i); + T_partials_return beta_dbl = beta_vec.val(i); + T_partials_return b_plus_n = beta_dbl + n_dbl; + T_partials_return r_plus_n = r_dbl + n_dbl; + T_partials_return a_plus_r = alpha_dbl + r_dbl; + T_partials_return one = 1; + T_partials_return precision = 1e-8; // default -6, set -8 to pass all tests + + T_partials_return F + = hypergeometric_3F2({one, b_plus_n + 1, r_plus_n + 1}, + {n_dbl + 2, a_plus_r + b_plus_n + 1}, one); + T_partials_return C = lgamma(r_plus_n + 1) + lbeta(a_plus_r, b_plus_n + 1) + - lgamma(r_dbl) - lbeta(alpha_dbl, beta_dbl) + - lgamma(n_dbl + 2); + T_partials_return ccdf = exp(C) * F; + T_partials_return P_i = log(ccdf); + P += P_i; + + if (!is_constant_all::value) { + T_partials_return digamma_n_r_alpha_beta + = digamma(a_plus_r + b_plus_n + 1); + T_partials_return dF[6]; + grad_F32(dF, one, b_plus_n + 1, r_plus_n + 1, n_dbl + 2, + a_plus_r + b_plus_n + 1, one, precision, 1e5); + + if constexpr (!is_constant::value || !is_constant::value) { + T_partials_return digamma_r_alpha = digamma(a_plus_r); + if constexpr (!is_constant_all::value) { + partials<0>(ops_partials)[i] + += digamma(r_plus_n + 1) + + (digamma_r_alpha - digamma_n_r_alpha_beta) + + (dF[2] + dF[4]) / F - digamma(r_dbl); + } + if constexpr (!is_constant_all::value) { + partials<1>(ops_partials)[i] += digamma_r_alpha + - digamma_n_r_alpha_beta + dF[4] / F + - digamma(alpha_dbl); + } + } + + if constexpr (!is_constant::value + || !is_constant::value) { + T_partials_return digamma_alpha_beta = digamma(alpha_dbl + beta_dbl); + if constexpr (!is_constant::value) { + partials<1>(ops_partials)[i] += digamma_alpha_beta; + } + if constexpr (!is_constant::value) { + partials<2>(ops_partials)[i] + += digamma(b_plus_n + 1) - digamma_n_r_alpha_beta + + (dF[1] + dF[4]) / F + - (digamma(beta_dbl) - digamma_alpha_beta); + } + } + } + } + + return ops_partials.build(P); +} + +} // namespace math +} // namespace stan +#endif diff --git a/test/prob/beta_neg_binomial/beta_neg_binomial_ccdf_log_test.hpp b/test/prob/beta_neg_binomial/beta_neg_binomial_ccdf_log_test.hpp new file mode 100644 index 00000000000..82a3da2eb45 --- /dev/null +++ b/test/prob/beta_neg_binomial/beta_neg_binomial_ccdf_log_test.hpp @@ -0,0 +1,96 @@ +// Arguments: Ints, Doubles, Doubles, Doubles +#include +#include +#include + +using stan::math::var; +using std::numeric_limits; +using std::vector; + +class AgradCcdfLogBetaNegBinomial : public AgradCcdfLogTest { + public: + void valid_values(vector>& parameters, + vector& ccdf_log) { + vector param(4); + + param[0] = 10; // n + param[1] = 5.5; // r + param[2] = 2.5; // alpha + param[3] = 0.5; // beta + parameters.push_back(param); + ccdf_log.push_back(std::log(1.0 - 0.967906252841089)); // expected ccdf_log + } + + void invalid_values(vector& index, vector& value) { + // n + + // r + index.push_back(1U); + value.push_back(0.0); + + index.push_back(1U); + value.push_back(-1.0); + + index.push_back(1U); + value.push_back(std::numeric_limits::infinity()); + + // alpha + index.push_back(2U); + value.push_back(0.0); + + index.push_back(2U); + value.push_back(-1.0); + + index.push_back(2U); + value.push_back(std::numeric_limits::infinity()); + + // beta + index.push_back(3U); + value.push_back(0.0); + + index.push_back(3U); + value.push_back(-1.0); + + index.push_back(3U); + value.push_back(std::numeric_limits::infinity()); + } + + // BOUND INCLUDED IN ORDER FOR TEST TO PASS WITH CURRENT FRAMEWORK + bool has_lower_bound() { return false; } + + bool has_upper_bound() { return false; } + + template + stan::return_type_t ccdf_log(const T_n& n, + const T_r& r, + const T_size1& alpha, + const T_size2& beta, + const T4&, const T5&) { + return stan::math::beta_neg_binomial_lccdf(n, r, alpha, beta); + } + + template + stan::return_type_t ccdf_log_function( + const T_n& n, const T_r& r, const T_size1& alpha, const T_size2& beta, + const T4&, const T5&) { + using stan::math::lbeta; + using stan::math::lgamma; + using stan::math::log1m; + using stan::math::log_sum_exp; + using std::vector; + + vector> lpmf_values; + + for (int i = 0; i <= n; i++) { + auto lpmf = lbeta(i + r, alpha + beta) - lbeta(r, alpha) + + lgamma(i + beta) - lgamma(i + 1) - lgamma(beta); + lpmf_values.push_back(lpmf); + } + + auto log_cdf = log_sum_exp(lpmf_values); + + return log1m(exp(log_cdf)); + } +}; From c6e8a2bb7ad861731b9a4c703e7a153943051346 Mon Sep 17 00:00:00 2001 From: Zhi Ling <1336265834@qq.com> Date: Wed, 9 Oct 2024 13:43:23 +0800 Subject: [PATCH 2/6] fix issues --- .../prim/prob/beta_neg_binomial_lccdf.hpp | 20 +++++++++++-------- .../beta_neg_binomial_ccdf_log_test.hpp | 7 +++++++ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp b/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp index 876bbe802b4..e5dd7d6c760 100644 --- a/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp +++ b/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp @@ -33,13 +33,16 @@ namespace math { * @param r Number of successes parameter * @param alpha prior success parameter * @param beta prior failure parameter + * @param precision precision for `grad_F32`, default \f$10^{-8}\f$ + * @param max_steps max iteration allowed for `grad_F32`, default \f$10^{-8}\f$ * @return log probability or log sum of probabilities * @throw std::domain_error if r, alpha, or beta fails to be positive * @throw std::invalid_argument if container sizes mismatch */ template inline return_type_t beta_neg_binomial_lccdf( - const T_n& n, const T_r& r, const T_alpha& alpha, const T_beta& beta) { + const T_n& n, const T_r& r, const T_alpha& alpha, const T_beta& beta, + const double precision = 1e-8, const int max_steps = 1e6) { using std::exp; using std::log; using T_partials_return = partials_return_t; @@ -61,7 +64,7 @@ inline return_type_t beta_neg_binomial_lccdf( check_positive_finite(function, "Prior success parameter", alpha_ref); check_positive_finite(function, "Prior failure parameter", beta_ref); - T_partials_return P(0.0); + T_partials_return log_ccdf(0.0); auto ops_partials = make_partials_propagator(r_ref, alpha_ref, beta_ref); scalar_seq_view n_vec(n); @@ -93,7 +96,8 @@ inline return_type_t beta_neg_binomial_lccdf( T_partials_return r_plus_n = r_dbl + n_dbl; T_partials_return a_plus_r = alpha_dbl + r_dbl; T_partials_return one = 1; - T_partials_return precision = 1e-8; // default -6, set -8 to pass all tests + T_partials_return precision_t + = precision; // default -6, set -8 to pass all tests T_partials_return F = hypergeometric_3F2({one, b_plus_n + 1, r_plus_n + 1}, @@ -102,15 +106,15 @@ inline return_type_t beta_neg_binomial_lccdf( - lgamma(r_dbl) - lbeta(alpha_dbl, beta_dbl) - lgamma(n_dbl + 2); T_partials_return ccdf = exp(C) * F; - T_partials_return P_i = log(ccdf); - P += P_i; + T_partials_return log_ccdf_i = log(ccdf); + log_ccdf += log_ccdf_i; - if (!is_constant_all::value) { + if constexpr (!is_constant_all::value) { T_partials_return digamma_n_r_alpha_beta = digamma(a_plus_r + b_plus_n + 1); T_partials_return dF[6]; grad_F32(dF, one, b_plus_n + 1, r_plus_n + 1, n_dbl + 2, - a_plus_r + b_plus_n + 1, one, precision, 1e5); + a_plus_r + b_plus_n + 1, one, precision_t, max_steps); if constexpr (!is_constant::value || !is_constant::value) { T_partials_return digamma_r_alpha = digamma(a_plus_r); @@ -143,7 +147,7 @@ inline return_type_t beta_neg_binomial_lccdf( } } - return ops_partials.build(P); + return ops_partials.build(log_ccdf); } } // namespace math diff --git a/test/prob/beta_neg_binomial/beta_neg_binomial_ccdf_log_test.hpp b/test/prob/beta_neg_binomial/beta_neg_binomial_ccdf_log_test.hpp index 82a3da2eb45..b7379161a3a 100644 --- a/test/prob/beta_neg_binomial/beta_neg_binomial_ccdf_log_test.hpp +++ b/test/prob/beta_neg_binomial/beta_neg_binomial_ccdf_log_test.hpp @@ -19,6 +19,13 @@ class AgradCcdfLogBetaNegBinomial : public AgradCcdfLogTest { param[3] = 0.5; // beta parameters.push_back(param); ccdf_log.push_back(std::log(1.0 - 0.967906252841089)); // expected ccdf_log + + param[0] = 0; // n + param[1] = 0.01; // r + param[2] = 100; // alpha + param[3] = 0.01; // beta + parameters.push_back(param); + ccdf_log.push_back(std::log(1.0 - 0.999998995084973)); // expected ccdf_log } void invalid_values(vector& index, vector& value) { From 7fc9aab80aeca738d75943907f0f0a2ea8967e3f Mon Sep 17 00:00:00 2001 From: Zhi Ling <1336265834@qq.com> Date: Thu, 17 Oct 2024 21:59:42 +0800 Subject: [PATCH 3/6] recall the added test --- .../beta_neg_binomial/beta_neg_binomial_ccdf_log_test.hpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/test/prob/beta_neg_binomial/beta_neg_binomial_ccdf_log_test.hpp b/test/prob/beta_neg_binomial/beta_neg_binomial_ccdf_log_test.hpp index b7379161a3a..82a3da2eb45 100644 --- a/test/prob/beta_neg_binomial/beta_neg_binomial_ccdf_log_test.hpp +++ b/test/prob/beta_neg_binomial/beta_neg_binomial_ccdf_log_test.hpp @@ -19,13 +19,6 @@ class AgradCcdfLogBetaNegBinomial : public AgradCcdfLogTest { param[3] = 0.5; // beta parameters.push_back(param); ccdf_log.push_back(std::log(1.0 - 0.967906252841089)); // expected ccdf_log - - param[0] = 0; // n - param[1] = 0.01; // r - param[2] = 100; // alpha - param[3] = 0.01; // beta - parameters.push_back(param); - ccdf_log.push_back(std::log(1.0 - 0.999998995084973)); // expected ccdf_log } void invalid_values(vector& index, vector& value) { From 939f03c5718e318ebfaffe0344f43d6e3f722d99 Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Fri, 25 Oct 2024 15:26:10 -0400 Subject: [PATCH 4/6] adds template parameters for grad_f32 for only calculating gradients that are needed. Small cleanup for beta_neg_binomial_lccdf --- stan/math/prim/fun/grad_F32.hpp | 123 ++++++++++-------- stan/math/prim/fun/grad_pFq.hpp | 2 +- stan/math/prim/fun/hypergeometric_3F2.hpp | 39 +++--- .../prim/prob/beta_neg_binomial_lccdf.hpp | 67 +++++----- 4 files changed, 119 insertions(+), 112 deletions(-) diff --git a/stan/math/prim/fun/grad_F32.hpp b/stan/math/prim/fun/grad_F32.hpp index e8b1bae0e59..3f511013d06 100644 --- a/stan/math/prim/fun/grad_F32.hpp +++ b/stan/math/prim/fun/grad_F32.hpp @@ -24,7 +24,14 @@ namespace math { * This power-series representation converges for all gradients * under the same conditions as the 3F2 function itself. * - * @tparam T type of arguments and result + * @tparam T1 type of g + * @tparam T1 type of g + * @tparam T1 type of g + * @tparam T1 type of g + * @tparam T1 type of g + * @tparam T1 type of g + * @tparam T1 type of g + * @tparam T1 type of g * @param[out] g g pointer to array of six values of type T, result. * @param[in] a1 a1 see generalized hypergeometric function definition. * @param[in] a2 a2 see generalized hypergeometric function definition. @@ -35,84 +42,90 @@ namespace math { * @param[in] precision precision of the infinite sum * @param[in] max_steps number of steps to take */ -template -void grad_F32(T* g, const T& a1, const T& a2, const T& a3, const T& b1, - const T& b2, const T& z, const T& precision = 1e-6, +template +void grad_F32(T1* g, const T2& a1, const T3& a2, const T4& a3, const T5& b1, + const T6& b2, const T7& z, const T8& precision = 1e-6, int max_steps = 1e5) { check_3F2_converges("grad_F32", a1, a2, a3, b1, b2, z); - using std::exp; - using std::fabs; - using std::log; - for (int i = 0; i < 6; ++i) { g[i] = 0.0; } - T log_g_old[6]; + T1 log_g_old[6]; for (auto& x : log_g_old) { x = NEGATIVE_INFTY; } - T log_t_old = 0.0; - T log_t_new = 0.0; + T1 log_t_old = 0.0; + T1 log_t_new = 0.0; - T log_z = log(z); + T7 log_z = log(z); - double log_t_new_sign = 1.0; - double log_t_old_sign = 1.0; - double log_g_old_sign[6]; + T1 log_t_new_sign = 1.0; + T1 log_t_old_sign = 1.0; + T1 log_g_old_sign[6]; for (int i = 0; i < 6; ++i) { log_g_old_sign[i] = 1.0; } - + std::array term{0}; for (int k = 0; k <= max_steps; ++k) { - T p = (a1 + k) * (a2 + k) * (a3 + k) / ((b1 + k) * (b2 + k) * (1 + k)); + T1 p = (a1 + k) * (a2 + k) * (a3 + k) / ((b1 + k) * (b2 + k) * (1 + k)); if (p == 0) { return; } log_t_new += log(fabs(p)) + log_z; log_t_new_sign = p >= 0.0 ? log_t_new_sign : -log_t_new_sign; + if constexpr (grad_a1) { + term[0] = log_g_old_sign[0] * log_t_old_sign * exp(log_g_old[0] - log_t_old) + + inv(a1 + k); + log_g_old[0] = log_t_new + log(fabs(term[0])); + log_g_old_sign[0] = term[0] >= 0.0 ? log_t_new_sign : -log_t_new_sign; + g[0] += log_g_old_sign[0] * exp(log_g_old[0]); + } + + if constexpr (grad_a2) { + term[1] = log_g_old_sign[1] * log_t_old_sign * exp(log_g_old[1] - log_t_old) + + inv(a2 + k); + log_g_old[1] = log_t_new + log(fabs(term[1])); + log_g_old_sign[1] = term[1] >= 0.0 ? log_t_new_sign : -log_t_new_sign; + g[1] += log_g_old_sign[1] * exp(log_g_old[1]); + } + + if constexpr (grad_a3) { + term[2] = log_g_old_sign[2] * log_t_old_sign * exp(log_g_old[2] - log_t_old) + + inv(a3 + k); + log_g_old[2] = log_t_new + log(fabs(term[2])); + log_g_old_sign[2] = term[2] >= 0.0 ? log_t_new_sign : -log_t_new_sign; + g[2] += log_g_old_sign[2] * exp(log_g_old[2]); + } + + if constexpr (grad_b1) { + term[3] = log_g_old_sign[3] * log_t_old_sign * exp(log_g_old[3] - log_t_old) + - inv(b1 + k); + log_g_old[3] = log_t_new + log(fabs(term[3])); + log_g_old_sign[3] = term[3] >= 0.0 ? log_t_new_sign : -log_t_new_sign; + g[3] += log_g_old_sign[3] * exp(log_g_old[3]); + } + + if constexpr (grad_b2) { + term[4] = log_g_old_sign[4] * log_t_old_sign * exp(log_g_old[4] - log_t_old) + - inv(b2 + k); + log_g_old[4] = log_t_new + log(fabs(term[4])); + log_g_old_sign[4] = term[4] >= 0.0 ? log_t_new_sign : -log_t_new_sign; + g[4] += log_g_old_sign[4] * exp(log_g_old[4]); + } - // g_old[0] = t_new * (g_old[0] / t_old + 1.0 / (a1 + k)); - T term = log_g_old_sign[0] * log_t_old_sign * exp(log_g_old[0] - log_t_old) - + inv(a1 + k); - log_g_old[0] = log_t_new + log(fabs(term)); - log_g_old_sign[0] = term >= 0.0 ? log_t_new_sign : -log_t_new_sign; - - // g_old[1] = t_new * (g_old[1] / t_old + 1.0 / (a2 + k)); - term = log_g_old_sign[1] * log_t_old_sign * exp(log_g_old[1] - log_t_old) - + inv(a2 + k); - log_g_old[1] = log_t_new + log(fabs(term)); - log_g_old_sign[1] = term >= 0.0 ? log_t_new_sign : -log_t_new_sign; - - // g_old[2] = t_new * (g_old[2] / t_old + 1.0 / (a3 + k)); - term = log_g_old_sign[2] * log_t_old_sign * exp(log_g_old[2] - log_t_old) - + inv(a3 + k); - log_g_old[2] = log_t_new + log(fabs(term)); - log_g_old_sign[2] = term >= 0.0 ? log_t_new_sign : -log_t_new_sign; - - // g_old[3] = t_new * (g_old[3] / t_old - 1.0 / (b1 + k)); - term = log_g_old_sign[3] * log_t_old_sign * exp(log_g_old[3] - log_t_old) - - inv(b1 + k); - log_g_old[3] = log_t_new + log(fabs(term)); - log_g_old_sign[3] = term >= 0.0 ? log_t_new_sign : -log_t_new_sign; - - // g_old[4] = t_new * (g_old[4] / t_old - 1.0 / (b2 + k)); - term = log_g_old_sign[4] * log_t_old_sign * exp(log_g_old[4] - log_t_old) - - inv(b2 + k); - log_g_old[4] = log_t_new + log(fabs(term)); - log_g_old_sign[4] = term >= 0.0 ? log_t_new_sign : -log_t_new_sign; - - // g_old[5] = t_new * (g_old[5] / t_old + 1.0 / z); - term = log_g_old_sign[5] * log_t_old_sign * exp(log_g_old[5] - log_t_old) - + inv(z); - log_g_old[5] = log_t_new + log(fabs(term)); - log_g_old_sign[5] = term >= 0.0 ? log_t_new_sign : -log_t_new_sign; - - for (int i = 0; i < 6; ++i) { - g[i] += log_g_old_sign[i] * exp(log_g_old[i]); + if constexpr (grad_z) { + term[5] = log_g_old_sign[5] * log_t_old_sign * exp(log_g_old[5] - log_t_old) + + inv(z); + log_g_old[5] = log_t_new + log(fabs(term[5])); + log_g_old_sign[5] = term[5] >= 0.0 ? log_t_new_sign : -log_t_new_sign; + g[5] += log_g_old_sign[5] * exp(log_g_old[5]); } if (log_t_new <= log(precision)) { diff --git a/stan/math/prim/fun/grad_pFq.hpp b/stan/math/prim/fun/grad_pFq.hpp index 2275662a443..bb0b00eee75 100644 --- a/stan/math/prim/fun/grad_pFq.hpp +++ b/stan/math/prim/fun/grad_pFq.hpp @@ -89,7 +89,7 @@ template , typename Ta_Rtn = promote_scalar_t>, typename Tb_Rtn = promote_scalar_t>> -std::tuple grad_pFq(const TpFq& pfq_val, const Ta& a, +inline std::tuple grad_pFq(const TpFq& pfq_val, const Ta& a, const Tb& b, const Tz& z, double precision = 1e-14, int max_steps = 1e6) { diff --git a/stan/math/prim/fun/hypergeometric_3F2.hpp b/stan/math/prim/fun/hypergeometric_3F2.hpp index 130408fd291..d1a4078fd5c 100644 --- a/stan/math/prim/fun/hypergeometric_3F2.hpp +++ b/stan/math/prim/fun/hypergeometric_3F2.hpp @@ -17,36 +17,33 @@ namespace stan { namespace math { namespace internal { template , - typename ArrayAT = Eigen::Array, 3, 1>, - typename ArrayBT = Eigen::Array, 3, 1>, require_all_vector_t* = nullptr, require_stan_scalar_t* = nullptr> -T_return hypergeometric_3F2_infsum(const Ta& a, const Tb& b, const Tz& z, +inline return_type_t hypergeometric_3F2_infsum(const Ta& a, const Tb& b, const Tz& z, double precision = 1e-6, int max_steps = 1e5) { - ArrayAT a_array = as_array_or_scalar(a); - ArrayBT b_array = append_row(as_array_or_scalar(b), 1.0); + using T_return = return_type_t; + Eigen::Array, 3, 1> a_array = as_array_or_scalar(a); + Eigen::Array, 3, 1> b_array = append_row(as_array_or_scalar(b), 1.0); check_3F2_converges("hypergeometric_3F2", a_array[0], a_array[1], a_array[2], b_array[0], b_array[1], z); T_return t_acc = 1.0; T_return log_t = 0.0; - T_return log_z = log(fabs(z)); - Eigen::ArrayXi a_signs = sign(value_of_rec(a_array)); - Eigen::ArrayXi b_signs = sign(value_of_rec(b_array)); - plain_type_t apk = a_array; - plain_type_t bpk = b_array; + auto log_z = log(fabs(z)); + Eigen::Array a_signs = sign(value_of_rec(a_array)); + Eigen::Array b_signs = sign(value_of_rec(b_array)); int z_sign = sign(value_of_rec(z)); int t_sign = z_sign * a_signs.prod() * b_signs.prod(); int k = 0; - while (k <= max_steps && log_t >= log(precision)) { + const double log_precision = log(precision); + while (k <= max_steps && log_t >= log_precision) { // Replace zero values with 1 prior to taking the log so that we accumulate // 0.0 rather than -inf - const auto& abs_apk = math::fabs((apk == 0).select(1.0, apk)); - const auto& abs_bpk = math::fabs((bpk == 0).select(1.0, bpk)); - T_return p = sum(log(abs_apk)) - sum(log(abs_bpk)); + const auto& abs_apk = math::fabs((a_array == 0).select(1.0, a_array)); + const auto& abs_bpk = math::fabs((b_array == 0).select(1.0, b_array)); + auto p = sum(log(abs_apk)) - sum(log(abs_bpk)); if (p == NEGATIVE_INFTY) { return t_acc; } @@ -59,10 +56,10 @@ T_return hypergeometric_3F2_infsum(const Ta& a, const Tb& b, const Tz& z, "overflow hypergeometric function did not converge."); } k++; - apk.array() += 1.0; - bpk.array() += 1.0; - a_signs = sign(value_of_rec(apk)); - b_signs = sign(value_of_rec(bpk)); + a_array += 1.0; + b_array += 1.0; + a_signs = sign(value_of_rec(a_array)); + b_signs = sign(value_of_rec(b_array)); t_sign = a_signs.prod() * b_signs.prod() * t_sign; } if (k == max_steps) { @@ -115,7 +112,7 @@ T_return hypergeometric_3F2_infsum(const Ta& a, const Tb& b, const Tz& z, template * = nullptr, require_stan_scalar_t* = nullptr> -auto hypergeometric_3F2(const Ta& a, const Tb& b, const Tz& z) { +inline auto hypergeometric_3F2(const Ta& a, const Tb& b, const Tz& z) { check_3F2_converges("hypergeometric_3F2", a[0], a[1], a[2], b[0], b[1], z); // Boost's pFq throws convergence errors in some cases, fallback to naive // infinite-sum approach (tests pass for these) @@ -143,7 +140,7 @@ auto hypergeometric_3F2(const Ta& a, const Tb& b, const Tz& z) { */ template * = nullptr> -auto hypergeometric_3F2(const std::initializer_list& a, +inline auto hypergeometric_3F2(const std::initializer_list& a, const std::initializer_list& b, const Tz& z) { return hypergeometric_3F2(std::vector(a), std::vector(b), z); } diff --git a/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp b/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp index e5dd7d6c760..83b9d1af953 100644 --- a/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp +++ b/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp @@ -43,12 +43,6 @@ template inline return_type_t beta_neg_binomial_lccdf( const T_n& n, const T_r& r, const T_alpha& alpha, const T_beta& beta, const double precision = 1e-8, const int max_steps = 1e6) { - using std::exp; - using std::log; - using T_partials_return = partials_return_t; - using T_r_ref = ref_type_t; - using T_alpha_ref = ref_type_t; - using T_beta_ref = ref_type_t; static constexpr const char* function = "beta_neg_binomial_lccdf"; check_consistent_sizes( function, "Failures variable", n, "Number of successes parameter", r, @@ -57,67 +51,70 @@ inline return_type_t beta_neg_binomial_lccdf( return 0; } + using T_r_ref = ref_type_t; T_r_ref r_ref = r; + using T_alpha_ref = ref_type_t; T_alpha_ref alpha_ref = alpha; + using T_beta_ref = ref_type_t; T_beta_ref beta_ref = beta; check_positive_finite(function, "Number of successes parameter", r_ref); check_positive_finite(function, "Prior success parameter", alpha_ref); check_positive_finite(function, "Prior failure parameter", beta_ref); - T_partials_return log_ccdf(0.0); - auto ops_partials = make_partials_propagator(r_ref, alpha_ref, beta_ref); scalar_seq_view n_vec(n); scalar_seq_view r_vec(r_ref); scalar_seq_view alpha_vec(alpha_ref); scalar_seq_view beta_vec(beta_ref); - size_t size_n = stan::math::size(n); + int size_n = stan::math::size(n); size_t max_size_seq_view = max_size(n, r, alpha, beta); // Explicit return for extreme values // The gradients are technically ill-defined, but treated as zero - for (size_t i = 0; i < size_n; i++) { + for (int i = 0; i < size_n; i++) { if (n_vec.val(i) < 0) { - return ops_partials.build(0.0); + return 0.0; } } + using T_partials_return = partials_return_t; + T_partials_return log_ccdf(0.0); + auto ops_partials = make_partials_propagator(r_ref, alpha_ref, beta_ref); for (size_t i = 0; i < max_size_seq_view; i++) { // Explicit return for extreme values // The gradients are technically ill-defined, but treated as zero if (n_vec.val(i) == std::numeric_limits::max()) { return ops_partials.build(negative_infinity()); } - T_partials_return n_dbl = n_vec.val(i); - T_partials_return r_dbl = r_vec.val(i); - T_partials_return alpha_dbl = alpha_vec.val(i); - T_partials_return beta_dbl = beta_vec.val(i); - T_partials_return b_plus_n = beta_dbl + n_dbl; - T_partials_return r_plus_n = r_dbl + n_dbl; - T_partials_return a_plus_r = alpha_dbl + r_dbl; - T_partials_return one = 1; - T_partials_return precision_t - = precision; // default -6, set -8 to pass all tests - - T_partials_return F - = hypergeometric_3F2({one, b_plus_n + 1, r_plus_n + 1}, - {n_dbl + 2, a_plus_r + b_plus_n + 1}, one); - T_partials_return C = lgamma(r_plus_n + 1) + lbeta(a_plus_r, b_plus_n + 1) + auto n_dbl = n_vec.val(i); + auto r_dbl = r_vec.val(i); + auto alpha_dbl = alpha_vec.val(i); + auto beta_dbl = beta_vec.val(i); + auto b_plus_n = beta_dbl + n_dbl; + auto r_plus_n = r_dbl + n_dbl; + auto a_plus_r = alpha_dbl + r_dbl; + using a_t = return_type_t; + using b_t = return_type_t; + auto F + = hypergeometric_3F2( + std::initializer_list{1.0, b_plus_n + 1.0, r_plus_n + 1.0}, + std::initializer_list{n_dbl + 2.0, a_plus_r + b_plus_n + 1.0}, 1.0); + auto C = lgamma(r_plus_n + 1.0) + lbeta(a_plus_r, b_plus_n + 1.0) - lgamma(r_dbl) - lbeta(alpha_dbl, beta_dbl) - lgamma(n_dbl + 2); - T_partials_return ccdf = exp(C) * F; - T_partials_return log_ccdf_i = log(ccdf); - log_ccdf += log_ccdf_i; + log_ccdf += C + stan::math::log(F); if constexpr (!is_constant_all::value) { - T_partials_return digamma_n_r_alpha_beta - = digamma(a_plus_r + b_plus_n + 1); + auto digamma_n_r_alpha_beta + = digamma(a_plus_r + b_plus_n + 1.0); T_partials_return dF[6]; - grad_F32(dF, one, b_plus_n + 1, r_plus_n + 1, n_dbl + 2, - a_plus_r + b_plus_n + 1, one, precision_t, max_steps); + grad_F32::value, + !is_constant_all::value, false, true, false>(dF, 1.0, + b_plus_n + 1.0, r_plus_n + 1.0, n_dbl + 2.0, + a_plus_r + b_plus_n + 1.0, 1.0, precision, max_steps); if constexpr (!is_constant::value || !is_constant::value) { - T_partials_return digamma_r_alpha = digamma(a_plus_r); + auto digamma_r_alpha = digamma(a_plus_r); if constexpr (!is_constant_all::value) { partials<0>(ops_partials)[i] += digamma(r_plus_n + 1) @@ -133,7 +130,7 @@ inline return_type_t beta_neg_binomial_lccdf( if constexpr (!is_constant::value || !is_constant::value) { - T_partials_return digamma_alpha_beta = digamma(alpha_dbl + beta_dbl); + auto digamma_alpha_beta = digamma(alpha_dbl + beta_dbl); if constexpr (!is_constant::value) { partials<1>(ops_partials)[i] += digamma_alpha_beta; } From dcb7beeb36f1a06edb1fd11da2cbc50b3dab6e07 Mon Sep 17 00:00:00 2001 From: Stan Jenkins Date: Fri, 25 Oct 2024 15:27:54 -0400 Subject: [PATCH 5/6] [Jenkins] auto-formatting by clang-format version 10.0.0-4ubuntu1 --- stan/math/prim/fun/grad_F32.hpp | 26 ++++++++++++------- stan/math/prim/fun/grad_pFq.hpp | 9 ++++--- stan/math/prim/fun/hypergeometric_3F2.hpp | 12 +++++---- .../prim/prob/beta_neg_binomial_lccdf.hpp | 26 +++++++++---------- 4 files changed, 40 insertions(+), 33 deletions(-) diff --git a/stan/math/prim/fun/grad_F32.hpp b/stan/math/prim/fun/grad_F32.hpp index 3f511013d06..f683cca4d8d 100644 --- a/stan/math/prim/fun/grad_F32.hpp +++ b/stan/math/prim/fun/grad_F32.hpp @@ -43,9 +43,9 @@ namespace math { * @param[in] max_steps number of steps to take */ template + bool grad_b1 = true, bool grad_b2 = true, bool grad_z = true, + typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8 = double> void grad_F32(T1* g, const T2& a1, const T3& a2, const T4& a3, const T5& b1, const T6& b2, const T7& z, const T8& precision = 1e-6, int max_steps = 1e5) { @@ -81,15 +81,17 @@ void grad_F32(T1* g, const T2& a1, const T3& a2, const T4& a3, const T5& b1, log_t_new += log(fabs(p)) + log_z; log_t_new_sign = p >= 0.0 ? log_t_new_sign : -log_t_new_sign; if constexpr (grad_a1) { - term[0] = log_g_old_sign[0] * log_t_old_sign * exp(log_g_old[0] - log_t_old) - + inv(a1 + k); + term[0] + = log_g_old_sign[0] * log_t_old_sign * exp(log_g_old[0] - log_t_old) + + inv(a1 + k); log_g_old[0] = log_t_new + log(fabs(term[0])); log_g_old_sign[0] = term[0] >= 0.0 ? log_t_new_sign : -log_t_new_sign; g[0] += log_g_old_sign[0] * exp(log_g_old[0]); } if constexpr (grad_a2) { - term[1] = log_g_old_sign[1] * log_t_old_sign * exp(log_g_old[1] - log_t_old) + term[1] + = log_g_old_sign[1] * log_t_old_sign * exp(log_g_old[1] - log_t_old) + inv(a2 + k); log_g_old[1] = log_t_new + log(fabs(term[1])); log_g_old_sign[1] = term[1] >= 0.0 ? log_t_new_sign : -log_t_new_sign; @@ -97,7 +99,8 @@ void grad_F32(T1* g, const T2& a1, const T3& a2, const T4& a3, const T5& b1, } if constexpr (grad_a3) { - term[2] = log_g_old_sign[2] * log_t_old_sign * exp(log_g_old[2] - log_t_old) + term[2] + = log_g_old_sign[2] * log_t_old_sign * exp(log_g_old[2] - log_t_old) + inv(a3 + k); log_g_old[2] = log_t_new + log(fabs(term[2])); log_g_old_sign[2] = term[2] >= 0.0 ? log_t_new_sign : -log_t_new_sign; @@ -105,7 +108,8 @@ void grad_F32(T1* g, const T2& a1, const T3& a2, const T4& a3, const T5& b1, } if constexpr (grad_b1) { - term[3] = log_g_old_sign[3] * log_t_old_sign * exp(log_g_old[3] - log_t_old) + term[3] + = log_g_old_sign[3] * log_t_old_sign * exp(log_g_old[3] - log_t_old) - inv(b1 + k); log_g_old[3] = log_t_new + log(fabs(term[3])); log_g_old_sign[3] = term[3] >= 0.0 ? log_t_new_sign : -log_t_new_sign; @@ -113,7 +117,8 @@ void grad_F32(T1* g, const T2& a1, const T3& a2, const T4& a3, const T5& b1, } if constexpr (grad_b2) { - term[4] = log_g_old_sign[4] * log_t_old_sign * exp(log_g_old[4] - log_t_old) + term[4] + = log_g_old_sign[4] * log_t_old_sign * exp(log_g_old[4] - log_t_old) - inv(b2 + k); log_g_old[4] = log_t_new + log(fabs(term[4])); log_g_old_sign[4] = term[4] >= 0.0 ? log_t_new_sign : -log_t_new_sign; @@ -121,7 +126,8 @@ void grad_F32(T1* g, const T2& a1, const T3& a2, const T4& a3, const T5& b1, } if constexpr (grad_z) { - term[5] = log_g_old_sign[5] * log_t_old_sign * exp(log_g_old[5] - log_t_old) + term[5] + = log_g_old_sign[5] * log_t_old_sign * exp(log_g_old[5] - log_t_old) + inv(z); log_g_old[5] = log_t_new + log(fabs(term[5])); log_g_old_sign[5] = term[5] >= 0.0 ? log_t_new_sign : -log_t_new_sign; diff --git a/stan/math/prim/fun/grad_pFq.hpp b/stan/math/prim/fun/grad_pFq.hpp index bb0b00eee75..8970d3b85f0 100644 --- a/stan/math/prim/fun/grad_pFq.hpp +++ b/stan/math/prim/fun/grad_pFq.hpp @@ -89,10 +89,11 @@ template , typename Ta_Rtn = promote_scalar_t>, typename Tb_Rtn = promote_scalar_t>> -inline std::tuple grad_pFq(const TpFq& pfq_val, const Ta& a, - const Tb& b, const Tz& z, - double precision = 1e-14, - int max_steps = 1e6) { +inline std::tuple grad_pFq(const TpFq& pfq_val, + const Ta& a, const Tb& b, + const Tz& z, + double precision = 1e-14, + int max_steps = 1e6) { using std::max; using Ta_Array = Eigen::Array, -1, 1>; using Tb_Array = Eigen::Array, -1, 1>; diff --git a/stan/math/prim/fun/hypergeometric_3F2.hpp b/stan/math/prim/fun/hypergeometric_3F2.hpp index d1a4078fd5c..68479d1cbcc 100644 --- a/stan/math/prim/fun/hypergeometric_3F2.hpp +++ b/stan/math/prim/fun/hypergeometric_3F2.hpp @@ -19,12 +19,13 @@ namespace internal { template * = nullptr, require_stan_scalar_t* = nullptr> -inline return_type_t hypergeometric_3F2_infsum(const Ta& a, const Tb& b, const Tz& z, - double precision = 1e-6, - int max_steps = 1e5) { +inline return_type_t hypergeometric_3F2_infsum( + const Ta& a, const Tb& b, const Tz& z, double precision = 1e-6, + int max_steps = 1e5) { using T_return = return_type_t; Eigen::Array, 3, 1> a_array = as_array_or_scalar(a); - Eigen::Array, 3, 1> b_array = append_row(as_array_or_scalar(b), 1.0); + Eigen::Array, 3, 1> b_array + = append_row(as_array_or_scalar(b), 1.0); check_3F2_converges("hypergeometric_3F2", a_array[0], a_array[1], a_array[2], b_array[0], b_array[1], z); @@ -141,7 +142,8 @@ inline auto hypergeometric_3F2(const Ta& a, const Tb& b, const Tz& z) { template * = nullptr> inline auto hypergeometric_3F2(const std::initializer_list& a, - const std::initializer_list& b, const Tz& z) { + const std::initializer_list& b, + const Tz& z) { return hypergeometric_3F2(std::vector(a), std::vector(b), z); } diff --git a/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp b/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp index 83b9d1af953..ebb9834afe2 100644 --- a/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp +++ b/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp @@ -61,7 +61,6 @@ inline return_type_t beta_neg_binomial_lccdf( check_positive_finite(function, "Prior success parameter", alpha_ref); check_positive_finite(function, "Prior failure parameter", beta_ref); - scalar_seq_view n_vec(n); scalar_seq_view r_vec(r_ref); scalar_seq_view alpha_vec(alpha_ref); @@ -94,24 +93,23 @@ inline return_type_t beta_neg_binomial_lccdf( auto r_plus_n = r_dbl + n_dbl; auto a_plus_r = alpha_dbl + r_dbl; using a_t = return_type_t; - using b_t = return_type_t; - auto F - = hypergeometric_3F2( - std::initializer_list{1.0, b_plus_n + 1.0, r_plus_n + 1.0}, - std::initializer_list{n_dbl + 2.0, a_plus_r + b_plus_n + 1.0}, 1.0); + using b_t = return_type_t; + auto F = hypergeometric_3F2( + std::initializer_list{1.0, b_plus_n + 1.0, r_plus_n + 1.0}, + std::initializer_list{n_dbl + 2.0, a_plus_r + b_plus_n + 1.0}, + 1.0); auto C = lgamma(r_plus_n + 1.0) + lbeta(a_plus_r, b_plus_n + 1.0) - - lgamma(r_dbl) - lbeta(alpha_dbl, beta_dbl) - - lgamma(n_dbl + 2); + - lgamma(r_dbl) - lbeta(alpha_dbl, beta_dbl) - lgamma(n_dbl + 2); log_ccdf += C + stan::math::log(F); if constexpr (!is_constant_all::value) { - auto digamma_n_r_alpha_beta - = digamma(a_plus_r + b_plus_n + 1.0); + auto digamma_n_r_alpha_beta = digamma(a_plus_r + b_plus_n + 1.0); T_partials_return dF[6]; - grad_F32::value, - !is_constant_all::value, false, true, false>(dF, 1.0, - b_plus_n + 1.0, r_plus_n + 1.0, n_dbl + 2.0, - a_plus_r + b_plus_n + 1.0, 1.0, precision, max_steps); + grad_F32::value, !is_constant_all::value, + false, true, false>(dF, 1.0, b_plus_n + 1.0, r_plus_n + 1.0, + n_dbl + 2.0, a_plus_r + b_plus_n + 1.0, 1.0, + precision, max_steps); if constexpr (!is_constant::value || !is_constant::value) { auto digamma_r_alpha = digamma(a_plus_r); From 73c406631aa8b79ebdf4d92a6fc37a648f859325 Mon Sep 17 00:00:00 2001 From: Steve Bronder Date: Fri, 25 Oct 2024 15:28:47 -0400 Subject: [PATCH 6/6] add docs for new grad_F32 template parameters --- stan/math/prim/fun/grad_F32.hpp | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/stan/math/prim/fun/grad_F32.hpp b/stan/math/prim/fun/grad_F32.hpp index 3f511013d06..9de5ebafd1c 100644 --- a/stan/math/prim/fun/grad_F32.hpp +++ b/stan/math/prim/fun/grad_F32.hpp @@ -24,14 +24,20 @@ namespace math { * This power-series representation converges for all gradients * under the same conditions as the 3F2 function itself. * - * @tparam T1 type of g - * @tparam T1 type of g - * @tparam T1 type of g - * @tparam T1 type of g - * @tparam T1 type of g - * @tparam T1 type of g - * @tparam T1 type of g - * @tparam T1 type of g + * @tparam grad_a1 boolean indicating if gradient with respect to a1 is required + * @tparam grad_a2 boolean indicating if gradient with respect to a2 is required + * @tparam grad_a3 boolean indicating if gradient with respect to a3 is required + * @tparam grad_b1 boolean indicating if gradient with respect to b1 is required + * @tparam grad_b2 boolean indicating if gradient with respect to b2 is required + * @tparam grad_z boolean indicating if gradient with respect to z is required + * @tparam T1 a scalar type + * @tparam T2 a scalar type + * @tparam T3 a scalar type + * @tparam T4 a scalar type + * @tparam T5 a scalar type + * @tparam T6 a scalar type + * @tparam T7 a scalar type + * @tparam T8 a scalar type * @param[out] g g pointer to array of six values of type T, result. * @param[in] a1 a1 see generalized hypergeometric function definition. * @param[in] a2 a2 see generalized hypergeometric function definition.