diff --git a/stan/math/prim/fun/grad_F32.hpp b/stan/math/prim/fun/grad_F32.hpp index e8b1bae0e59..a9ab44f3d09 100644 --- a/stan/math/prim/fun/grad_F32.hpp +++ b/stan/math/prim/fun/grad_F32.hpp @@ -24,7 +24,20 @@ namespace math { * This power-series representation converges for all gradients * under the same conditions as the 3F2 function itself. * - * @tparam T type of arguments and result + * @tparam grad_a1 boolean indicating if gradient with respect to a1 is required + * @tparam grad_a2 boolean indicating if gradient with respect to a2 is required + * @tparam grad_a3 boolean indicating if gradient with respect to a3 is required + * @tparam grad_b1 boolean indicating if gradient with respect to b1 is required + * @tparam grad_b2 boolean indicating if gradient with respect to b2 is required + * @tparam grad_z boolean indicating if gradient with respect to z is required + * @tparam T1 a scalar type + * @tparam T2 a scalar type + * @tparam T3 a scalar type + * @tparam T4 a scalar type + * @tparam T5 a scalar type + * @tparam T6 a scalar type + * @tparam T7 a scalar type + * @tparam T8 a scalar type * @param[out] g g pointer to array of six values of type T, result. * @param[in] a1 a1 see generalized hypergeometric function definition. * @param[in] a2 a2 see generalized hypergeometric function definition. @@ -35,84 +48,96 @@ namespace math { * @param[in] precision precision of the infinite sum * @param[in] max_steps number of steps to take */ -template -void grad_F32(T* g, const T& a1, const T& a2, const T& a3, const T& b1, - const T& b2, const T& z, const T& precision = 1e-6, +template +void grad_F32(T1* g, const T2& a1, const T3& a2, const T4& a3, const T5& b1, + const T6& b2, const T7& z, const T8& precision = 1e-6, int max_steps = 1e5) { check_3F2_converges("grad_F32", a1, a2, a3, b1, b2, z); - using std::exp; - using std::fabs; - using std::log; - for (int i = 0; i < 6; ++i) { g[i] = 0.0; } - T log_g_old[6]; + T1 log_g_old[6]; for (auto& x : log_g_old) { x = NEGATIVE_INFTY; } - T log_t_old = 0.0; - T log_t_new = 0.0; + T1 log_t_old = 0.0; + T1 log_t_new = 0.0; - T log_z = log(z); + T7 log_z = log(z); - double log_t_new_sign = 1.0; - double log_t_old_sign = 1.0; - double log_g_old_sign[6]; + T1 log_t_new_sign = 1.0; + T1 log_t_old_sign = 1.0; + T1 log_g_old_sign[6]; for (int i = 0; i < 6; ++i) { log_g_old_sign[i] = 1.0; } - + std::array term{0}; for (int k = 0; k <= max_steps; ++k) { - T p = (a1 + k) * (a2 + k) * (a3 + k) / ((b1 + k) * (b2 + k) * (1 + k)); + T1 p = (a1 + k) * (a2 + k) * (a3 + k) / ((b1 + k) * (b2 + k) * (1 + k)); if (p == 0) { return; } log_t_new += log(fabs(p)) + log_z; log_t_new_sign = p >= 0.0 ? log_t_new_sign : -log_t_new_sign; + if constexpr (grad_a1) { + term[0] + = log_g_old_sign[0] * log_t_old_sign * exp(log_g_old[0] - log_t_old) + + inv(a1 + k); + log_g_old[0] = log_t_new + log(fabs(term[0])); + log_g_old_sign[0] = term[0] >= 0.0 ? log_t_new_sign : -log_t_new_sign; + g[0] += log_g_old_sign[0] * exp(log_g_old[0]); + } + + if constexpr (grad_a2) { + term[1] + = log_g_old_sign[1] * log_t_old_sign * exp(log_g_old[1] - log_t_old) + + inv(a2 + k); + log_g_old[1] = log_t_new + log(fabs(term[1])); + log_g_old_sign[1] = term[1] >= 0.0 ? log_t_new_sign : -log_t_new_sign; + g[1] += log_g_old_sign[1] * exp(log_g_old[1]); + } + + if constexpr (grad_a3) { + term[2] + = log_g_old_sign[2] * log_t_old_sign * exp(log_g_old[2] - log_t_old) + + inv(a3 + k); + log_g_old[2] = log_t_new + log(fabs(term[2])); + log_g_old_sign[2] = term[2] >= 0.0 ? log_t_new_sign : -log_t_new_sign; + g[2] += log_g_old_sign[2] * exp(log_g_old[2]); + } + + if constexpr (grad_b1) { + term[3] + = log_g_old_sign[3] * log_t_old_sign * exp(log_g_old[3] - log_t_old) + - inv(b1 + k); + log_g_old[3] = log_t_new + log(fabs(term[3])); + log_g_old_sign[3] = term[3] >= 0.0 ? log_t_new_sign : -log_t_new_sign; + g[3] += log_g_old_sign[3] * exp(log_g_old[3]); + } + + if constexpr (grad_b2) { + term[4] + = log_g_old_sign[4] * log_t_old_sign * exp(log_g_old[4] - log_t_old) + - inv(b2 + k); + log_g_old[4] = log_t_new + log(fabs(term[4])); + log_g_old_sign[4] = term[4] >= 0.0 ? log_t_new_sign : -log_t_new_sign; + g[4] += log_g_old_sign[4] * exp(log_g_old[4]); + } - // g_old[0] = t_new * (g_old[0] / t_old + 1.0 / (a1 + k)); - T term = log_g_old_sign[0] * log_t_old_sign * exp(log_g_old[0] - log_t_old) - + inv(a1 + k); - log_g_old[0] = log_t_new + log(fabs(term)); - log_g_old_sign[0] = term >= 0.0 ? log_t_new_sign : -log_t_new_sign; - - // g_old[1] = t_new * (g_old[1] / t_old + 1.0 / (a2 + k)); - term = log_g_old_sign[1] * log_t_old_sign * exp(log_g_old[1] - log_t_old) - + inv(a2 + k); - log_g_old[1] = log_t_new + log(fabs(term)); - log_g_old_sign[1] = term >= 0.0 ? log_t_new_sign : -log_t_new_sign; - - // g_old[2] = t_new * (g_old[2] / t_old + 1.0 / (a3 + k)); - term = log_g_old_sign[2] * log_t_old_sign * exp(log_g_old[2] - log_t_old) - + inv(a3 + k); - log_g_old[2] = log_t_new + log(fabs(term)); - log_g_old_sign[2] = term >= 0.0 ? log_t_new_sign : -log_t_new_sign; - - // g_old[3] = t_new * (g_old[3] / t_old - 1.0 / (b1 + k)); - term = log_g_old_sign[3] * log_t_old_sign * exp(log_g_old[3] - log_t_old) - - inv(b1 + k); - log_g_old[3] = log_t_new + log(fabs(term)); - log_g_old_sign[3] = term >= 0.0 ? log_t_new_sign : -log_t_new_sign; - - // g_old[4] = t_new * (g_old[4] / t_old - 1.0 / (b2 + k)); - term = log_g_old_sign[4] * log_t_old_sign * exp(log_g_old[4] - log_t_old) - - inv(b2 + k); - log_g_old[4] = log_t_new + log(fabs(term)); - log_g_old_sign[4] = term >= 0.0 ? log_t_new_sign : -log_t_new_sign; - - // g_old[5] = t_new * (g_old[5] / t_old + 1.0 / z); - term = log_g_old_sign[5] * log_t_old_sign * exp(log_g_old[5] - log_t_old) - + inv(z); - log_g_old[5] = log_t_new + log(fabs(term)); - log_g_old_sign[5] = term >= 0.0 ? log_t_new_sign : -log_t_new_sign; - - for (int i = 0; i < 6; ++i) { - g[i] += log_g_old_sign[i] * exp(log_g_old[i]); + if constexpr (grad_z) { + term[5] + = log_g_old_sign[5] * log_t_old_sign * exp(log_g_old[5] - log_t_old) + + inv(z); + log_g_old[5] = log_t_new + log(fabs(term[5])); + log_g_old_sign[5] = term[5] >= 0.0 ? log_t_new_sign : -log_t_new_sign; + g[5] += log_g_old_sign[5] * exp(log_g_old[5]); } if (log_t_new <= log(precision)) { diff --git a/stan/math/prim/fun/grad_pFq.hpp b/stan/math/prim/fun/grad_pFq.hpp index 2275662a443..8970d3b85f0 100644 --- a/stan/math/prim/fun/grad_pFq.hpp +++ b/stan/math/prim/fun/grad_pFq.hpp @@ -89,10 +89,11 @@ template , typename Ta_Rtn = promote_scalar_t>, typename Tb_Rtn = promote_scalar_t>> -std::tuple grad_pFq(const TpFq& pfq_val, const Ta& a, - const Tb& b, const Tz& z, - double precision = 1e-14, - int max_steps = 1e6) { +inline std::tuple grad_pFq(const TpFq& pfq_val, + const Ta& a, const Tb& b, + const Tz& z, + double precision = 1e-14, + int max_steps = 1e6) { using std::max; using Ta_Array = Eigen::Array, -1, 1>; using Tb_Array = Eigen::Array, -1, 1>; diff --git a/stan/math/prim/fun/hypergeometric_3F2.hpp b/stan/math/prim/fun/hypergeometric_3F2.hpp index 130408fd291..68479d1cbcc 100644 --- a/stan/math/prim/fun/hypergeometric_3F2.hpp +++ b/stan/math/prim/fun/hypergeometric_3F2.hpp @@ -17,36 +17,34 @@ namespace stan { namespace math { namespace internal { template , - typename ArrayAT = Eigen::Array, 3, 1>, - typename ArrayBT = Eigen::Array, 3, 1>, require_all_vector_t* = nullptr, require_stan_scalar_t* = nullptr> -T_return hypergeometric_3F2_infsum(const Ta& a, const Tb& b, const Tz& z, - double precision = 1e-6, - int max_steps = 1e5) { - ArrayAT a_array = as_array_or_scalar(a); - ArrayBT b_array = append_row(as_array_or_scalar(b), 1.0); +inline return_type_t hypergeometric_3F2_infsum( + const Ta& a, const Tb& b, const Tz& z, double precision = 1e-6, + int max_steps = 1e5) { + using T_return = return_type_t; + Eigen::Array, 3, 1> a_array = as_array_or_scalar(a); + Eigen::Array, 3, 1> b_array + = append_row(as_array_or_scalar(b), 1.0); check_3F2_converges("hypergeometric_3F2", a_array[0], a_array[1], a_array[2], b_array[0], b_array[1], z); T_return t_acc = 1.0; T_return log_t = 0.0; - T_return log_z = log(fabs(z)); - Eigen::ArrayXi a_signs = sign(value_of_rec(a_array)); - Eigen::ArrayXi b_signs = sign(value_of_rec(b_array)); - plain_type_t apk = a_array; - plain_type_t bpk = b_array; + auto log_z = log(fabs(z)); + Eigen::Array a_signs = sign(value_of_rec(a_array)); + Eigen::Array b_signs = sign(value_of_rec(b_array)); int z_sign = sign(value_of_rec(z)); int t_sign = z_sign * a_signs.prod() * b_signs.prod(); int k = 0; - while (k <= max_steps && log_t >= log(precision)) { + const double log_precision = log(precision); + while (k <= max_steps && log_t >= log_precision) { // Replace zero values with 1 prior to taking the log so that we accumulate // 0.0 rather than -inf - const auto& abs_apk = math::fabs((apk == 0).select(1.0, apk)); - const auto& abs_bpk = math::fabs((bpk == 0).select(1.0, bpk)); - T_return p = sum(log(abs_apk)) - sum(log(abs_bpk)); + const auto& abs_apk = math::fabs((a_array == 0).select(1.0, a_array)); + const auto& abs_bpk = math::fabs((b_array == 0).select(1.0, b_array)); + auto p = sum(log(abs_apk)) - sum(log(abs_bpk)); if (p == NEGATIVE_INFTY) { return t_acc; } @@ -59,10 +57,10 @@ T_return hypergeometric_3F2_infsum(const Ta& a, const Tb& b, const Tz& z, "overflow hypergeometric function did not converge."); } k++; - apk.array() += 1.0; - bpk.array() += 1.0; - a_signs = sign(value_of_rec(apk)); - b_signs = sign(value_of_rec(bpk)); + a_array += 1.0; + b_array += 1.0; + a_signs = sign(value_of_rec(a_array)); + b_signs = sign(value_of_rec(b_array)); t_sign = a_signs.prod() * b_signs.prod() * t_sign; } if (k == max_steps) { @@ -115,7 +113,7 @@ T_return hypergeometric_3F2_infsum(const Ta& a, const Tb& b, const Tz& z, template * = nullptr, require_stan_scalar_t* = nullptr> -auto hypergeometric_3F2(const Ta& a, const Tb& b, const Tz& z) { +inline auto hypergeometric_3F2(const Ta& a, const Tb& b, const Tz& z) { check_3F2_converges("hypergeometric_3F2", a[0], a[1], a[2], b[0], b[1], z); // Boost's pFq throws convergence errors in some cases, fallback to naive // infinite-sum approach (tests pass for these) @@ -143,8 +141,9 @@ auto hypergeometric_3F2(const Ta& a, const Tb& b, const Tz& z) { */ template * = nullptr> -auto hypergeometric_3F2(const std::initializer_list& a, - const std::initializer_list& b, const Tz& z) { +inline auto hypergeometric_3F2(const std::initializer_list& a, + const std::initializer_list& b, + const Tz& z) { return hypergeometric_3F2(std::vector(a), std::vector(b), z); } diff --git a/stan/math/prim/prob.hpp b/stan/math/prim/prob.hpp index 0760c087a1c..14c80636924 100644 --- a/stan/math/prim/prob.hpp +++ b/stan/math/prim/prob.hpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp b/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp new file mode 100644 index 00000000000..ebb9834afe2 --- /dev/null +++ b/stan/math/prim/prob/beta_neg_binomial_lccdf.hpp @@ -0,0 +1,150 @@ +#ifndef STAN_MATH_PRIM_PROB_BETA_NEG_BINOMIAL_LCCDF_HPP +#define STAN_MATH_PRIM_PROB_BETA_NEG_BINOMIAL_LCCDF_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace stan { +namespace math { + +/** \ingroup prob_dists + * Returns the log CCDF of the Beta-Negative Binomial distribution with given + * number of successes, prior success, and prior failure parameters. + * Given containers of matching sizes, returns the log sum of probabilities. + * + * @tparam T_n type of failure parameter + * @tparam T_r type of number of successes parameter + * @tparam T_alpha type of prior success parameter + * @tparam T_beta type of prior failure parameter + * + * @param n failure parameter + * @param r Number of successes parameter + * @param alpha prior success parameter + * @param beta prior failure parameter + * @param precision precision for `grad_F32`, default \f$10^{-8}\f$ + * @param max_steps max iteration allowed for `grad_F32`, default \f$10^{-8}\f$ + * @return log probability or log sum of probabilities + * @throw std::domain_error if r, alpha, or beta fails to be positive + * @throw std::invalid_argument if container sizes mismatch + */ +template +inline return_type_t beta_neg_binomial_lccdf( + const T_n& n, const T_r& r, const T_alpha& alpha, const T_beta& beta, + const double precision = 1e-8, const int max_steps = 1e6) { + static constexpr const char* function = "beta_neg_binomial_lccdf"; + check_consistent_sizes( + function, "Failures variable", n, "Number of successes parameter", r, + "Prior success parameter", alpha, "Prior failure parameter", beta); + if (size_zero(n, r, alpha, beta)) { + return 0; + } + + using T_r_ref = ref_type_t; + T_r_ref r_ref = r; + using T_alpha_ref = ref_type_t; + T_alpha_ref alpha_ref = alpha; + using T_beta_ref = ref_type_t; + T_beta_ref beta_ref = beta; + check_positive_finite(function, "Number of successes parameter", r_ref); + check_positive_finite(function, "Prior success parameter", alpha_ref); + check_positive_finite(function, "Prior failure parameter", beta_ref); + + scalar_seq_view n_vec(n); + scalar_seq_view r_vec(r_ref); + scalar_seq_view alpha_vec(alpha_ref); + scalar_seq_view beta_vec(beta_ref); + int size_n = stan::math::size(n); + size_t max_size_seq_view = max_size(n, r, alpha, beta); + + // Explicit return for extreme values + // The gradients are technically ill-defined, but treated as zero + for (int i = 0; i < size_n; i++) { + if (n_vec.val(i) < 0) { + return 0.0; + } + } + + using T_partials_return = partials_return_t; + T_partials_return log_ccdf(0.0); + auto ops_partials = make_partials_propagator(r_ref, alpha_ref, beta_ref); + for (size_t i = 0; i < max_size_seq_view; i++) { + // Explicit return for extreme values + // The gradients are technically ill-defined, but treated as zero + if (n_vec.val(i) == std::numeric_limits::max()) { + return ops_partials.build(negative_infinity()); + } + auto n_dbl = n_vec.val(i); + auto r_dbl = r_vec.val(i); + auto alpha_dbl = alpha_vec.val(i); + auto beta_dbl = beta_vec.val(i); + auto b_plus_n = beta_dbl + n_dbl; + auto r_plus_n = r_dbl + n_dbl; + auto a_plus_r = alpha_dbl + r_dbl; + using a_t = return_type_t; + using b_t = return_type_t; + auto F = hypergeometric_3F2( + std::initializer_list{1.0, b_plus_n + 1.0, r_plus_n + 1.0}, + std::initializer_list{n_dbl + 2.0, a_plus_r + b_plus_n + 1.0}, + 1.0); + auto C = lgamma(r_plus_n + 1.0) + lbeta(a_plus_r, b_plus_n + 1.0) + - lgamma(r_dbl) - lbeta(alpha_dbl, beta_dbl) - lgamma(n_dbl + 2); + log_ccdf += C + stan::math::log(F); + + if constexpr (!is_constant_all::value) { + auto digamma_n_r_alpha_beta = digamma(a_plus_r + b_plus_n + 1.0); + T_partials_return dF[6]; + grad_F32::value, !is_constant_all::value, + false, true, false>(dF, 1.0, b_plus_n + 1.0, r_plus_n + 1.0, + n_dbl + 2.0, a_plus_r + b_plus_n + 1.0, 1.0, + precision, max_steps); + + if constexpr (!is_constant::value || !is_constant::value) { + auto digamma_r_alpha = digamma(a_plus_r); + if constexpr (!is_constant_all::value) { + partials<0>(ops_partials)[i] + += digamma(r_plus_n + 1) + + (digamma_r_alpha - digamma_n_r_alpha_beta) + + (dF[2] + dF[4]) / F - digamma(r_dbl); + } + if constexpr (!is_constant_all::value) { + partials<1>(ops_partials)[i] += digamma_r_alpha + - digamma_n_r_alpha_beta + dF[4] / F + - digamma(alpha_dbl); + } + } + + if constexpr (!is_constant::value + || !is_constant::value) { + auto digamma_alpha_beta = digamma(alpha_dbl + beta_dbl); + if constexpr (!is_constant::value) { + partials<1>(ops_partials)[i] += digamma_alpha_beta; + } + if constexpr (!is_constant::value) { + partials<2>(ops_partials)[i] + += digamma(b_plus_n + 1) - digamma_n_r_alpha_beta + + (dF[1] + dF[4]) / F + - (digamma(beta_dbl) - digamma_alpha_beta); + } + } + } + } + + return ops_partials.build(log_ccdf); +} + +} // namespace math +} // namespace stan +#endif diff --git a/test/prob/beta_neg_binomial/beta_neg_binomial_ccdf_log_test.hpp b/test/prob/beta_neg_binomial/beta_neg_binomial_ccdf_log_test.hpp new file mode 100644 index 00000000000..82a3da2eb45 --- /dev/null +++ b/test/prob/beta_neg_binomial/beta_neg_binomial_ccdf_log_test.hpp @@ -0,0 +1,96 @@ +// Arguments: Ints, Doubles, Doubles, Doubles +#include +#include +#include + +using stan::math::var; +using std::numeric_limits; +using std::vector; + +class AgradCcdfLogBetaNegBinomial : public AgradCcdfLogTest { + public: + void valid_values(vector>& parameters, + vector& ccdf_log) { + vector param(4); + + param[0] = 10; // n + param[1] = 5.5; // r + param[2] = 2.5; // alpha + param[3] = 0.5; // beta + parameters.push_back(param); + ccdf_log.push_back(std::log(1.0 - 0.967906252841089)); // expected ccdf_log + } + + void invalid_values(vector& index, vector& value) { + // n + + // r + index.push_back(1U); + value.push_back(0.0); + + index.push_back(1U); + value.push_back(-1.0); + + index.push_back(1U); + value.push_back(std::numeric_limits::infinity()); + + // alpha + index.push_back(2U); + value.push_back(0.0); + + index.push_back(2U); + value.push_back(-1.0); + + index.push_back(2U); + value.push_back(std::numeric_limits::infinity()); + + // beta + index.push_back(3U); + value.push_back(0.0); + + index.push_back(3U); + value.push_back(-1.0); + + index.push_back(3U); + value.push_back(std::numeric_limits::infinity()); + } + + // BOUND INCLUDED IN ORDER FOR TEST TO PASS WITH CURRENT FRAMEWORK + bool has_lower_bound() { return false; } + + bool has_upper_bound() { return false; } + + template + stan::return_type_t ccdf_log(const T_n& n, + const T_r& r, + const T_size1& alpha, + const T_size2& beta, + const T4&, const T5&) { + return stan::math::beta_neg_binomial_lccdf(n, r, alpha, beta); + } + + template + stan::return_type_t ccdf_log_function( + const T_n& n, const T_r& r, const T_size1& alpha, const T_size2& beta, + const T4&, const T5&) { + using stan::math::lbeta; + using stan::math::lgamma; + using stan::math::log1m; + using stan::math::log_sum_exp; + using std::vector; + + vector> lpmf_values; + + for (int i = 0; i <= n; i++) { + auto lpmf = lbeta(i + r, alpha + beta) - lbeta(r, alpha) + + lgamma(i + beta) - lgamma(i + 1) - lgamma(beta); + lpmf_values.push_back(lpmf); + } + + auto log_cdf = log_sum_exp(lpmf_values); + + return log1m(exp(log_cdf)); + } +};