Reapply #113 and prepare release of StatsFuns 1.0.0 (#142)

devmotion · andreasnoack · web-flow · commit c8f50cff5c7c · 2022-04-30T01:24:19.000+02:00
* Use julia implementations for pdfs and some cdf-like functions (#113) * Use Julia implementations for (log)pdfs. Adjust Rmath tests to use testsets to give more informative errors. * Use Julia implementations for most cdf-like functions of the Chisq distribution. * Change Beta distribution to be using native implementations. * Switch binomial cdfs to use native implementations (but not the inverses) * Use native implementations for most cdf like gamma functions * Just use the gamma defitions for the chisq * Use the beta definitions for binomial * Add native implementions for Poisson's cdf-like functions * Rely on SpecialFunctions promotion handling in beta functions * Rely on gamma_inc_inv promotion in gammainv(c)cdf * Apply suggestions from code review * Add compat for HypergeometricFunctions * Handle negative arguments in Poisson (log)(c)cdf * Handle negative x in Gamma (log)(c)cdf * Handle arguments outside support for Beta and Fdist (log)(c)cdf * Fix Fdist quantile functions * Handle arguments out of support in Binomial (log)(c)cdf and apply the floor function to make the results correct for non-integer arguments * Avoid accidental promotion to Float64 in fdist(log)(c)cdf * Fix typo in "x" calculation for fdist(log)(c)cdf * Drop redundant low tolerance tests for Beta(1000, 2). We are now generally more accurate than Rmath so failure are because of Rmath. * Handle some non-finite edge cases in gamma and binomial * Only switch to log-version of betalogcdf when the result is tiny * Handle the degenerate cases in beta(log)(c)cdf when alpha is zero * Handle the degenerate case of the gamma distribution when k==0. Also, only calculate the log(c)cdf based on the hypergeometric function when p is zero or subnormal * Address review comments * Avoid rational return type in gamma(log)(c)cdf * Handle the case when alpha and beta are zero in the betacdf * Fix compat entries * Remove precompile statement * Update version number Co-authored-by: Andreas Noack <andreas@noack.dk>
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -26,8 +26,9 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.0'
-          - '1'
+          - '1.3' # oldest supported version
+          - '1.6' # LTS
+          - '1' # latest release
         os:
           - ubuntu-latest
           - macos-latest
diff --git a/Project.toml b/Project.toml
@@ -1,9 +1,10 @@
 name = "StatsFuns"
 uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
-version = "0.9.18"
+version = "1.0.0"
 
 [deps]
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+HypergeometricFunctions = "34004b35-14d8-5ef3-9330-4cdb6864b03a"
 InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112"
 IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6"
 LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
@@ -13,13 +14,14 @@ SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 
 [compat]
 ChainRulesCore = "1"
+HypergeometricFunctions = "0.3"
 InverseFunctions = "0.1"
 IrrationalConstants = "0.1"
 LogExpFunctions = "0.3.2"
 Reexport = "1"
-Rmath = "0.4, 0.5, 0.6, 0.7"
-SpecialFunctions = "0.8, 0.9, 0.10, 1.0, 2"
-julia = "1"
+Rmath = "0.6.1, 0.7"
+SpecialFunctions = "1.8.4, 2.1.4"
+julia = "1.3"
 
 [extras]
 ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
diff --git a/src/StatsFuns.jl b/src/StatsFuns.jl
@@ -1,5 +1,3 @@
-__precompile__(true)
-
 module StatsFuns
 
 using Base: Math.@horner
diff --git a/src/distrs/beta.jl b/src/distrs/beta.jl
@@ -1,14 +1,17 @@
 # functions related to beta distributions
 
+using HypergeometricFunctions: _₂F₁
+
 # R implementations
-# For pdf and logpdf we use the Julia implementation
 using .RFunctions:
-    betacdf,
-    betaccdf,
-    betalogcdf,
-    betalogccdf,
-    betainvcdf,
-    betainvccdf,
+    # betapdf,
+    # betalogpdf,
+    # betacdf,
+    # betaccdf,
+    # betalogcdf,
+    # betalogccdf,
+    # betainvcdf,
+    # betainvccdf,
     betainvlogcdf,
     betainvlogccdf
 
@@ -22,3 +25,60 @@ function betalogpdf(α::T, β::T, x::T) where {T<:Real}
     val = xlogy(α - 1, y) + xlog1py(β - 1, -y) - logbeta(α, β)
     return x < 0 || x > 1 ? oftype(val, -Inf) : val
 end
+
+function betacdf(α::Real, β::Real, x::Real)
+    # Handle a degenerate case
+    if iszero(α) && β > 0
+        return float(last(promote(α, β, x, x >= 0)))
+    end
+
+    return first(beta_inc(α, β, clamp(x, 0, 1)))
+end
+
+function betaccdf(α::Real, β::Real, x::Real)
+    # Handle a degenerate case
+    if iszero(α) && β > 0
+        return float(last(promote(α, β, x, x < 0)))
+    end
+
+    last(beta_inc(α, β, clamp(x, 0, 1)))
+end
+
+# The log version is currently based on non-log version. When the cdf is very small we shift
+# to an implementation based on the hypergeometric function ₂F₁ to avoid underflow.
+function betalogcdf(α::T, β::T, x::T) where {T<:Real}
+    # Handle a degenerate case
+    if iszero(α) && β > 0
+        return log(last(promote(x, x >= 0)))
+    end
+
+    _x = clamp(x, 0, 1)
+    p, q = beta_inc(α, β, _x)
+    if p < floatmin(p)
+        # see https://dlmf.nist.gov/8.17#E7
+        return -log(α) + xlogy(α, _x) + log(_₂F₁(promote(α, 1 - β, α + 1, _x)...)) - logbeta(α, β)
+    elseif p <= 0.7
+        return log(p)
+    else
+        return log1p(-q)
+    end
+end
+betalogcdf(α::Real, β::Real, x::Real) = betalogcdf(promote(α, β, x)...)
+
+function betalogccdf(α::Real, β::Real, x::Real)
+    # Handle a degenerate case
+    if iszero(α) && β > 0
+        return log(last(promote(α, β, x, x < 0)))
+    end
+
+    p, q = beta_inc(α, β, clamp(x, 0, 1))
+    if q < 0.7
+        return log(q)
+    else
+        return log1p(-p)
+    end
+end
+
+betainvcdf(α::Real, β::Real, p::Real) = first(beta_inc_inv(α, β, p))
+
+betainvccdf(α::Real, β::Real, p::Real) = last(beta_inc_inv(β, α, p))
diff --git a/src/distrs/binom.jl b/src/distrs/binom.jl
@@ -1,18 +1,18 @@
 # functions related to binomial distribution
 
 # R implementations
-# For pdf and logpdf we use the Julia implementation
 using .RFunctions:
-    binomcdf,
-    binomccdf,
-    binomlogcdf,
-    binomlogccdf,
+    # binompdf,
+    # binomlogpdf,
+    # binomcdf,
+    # binomccdf,
+    # binomlogcdf,
+    # binomlogccdf,
     binominvcdf,
     binominvccdf,
     binominvlogcdf,
     binominvlogccdf
 
-
 # Julia implementations
 binompdf(n::Real, p::Real, k::Real) = exp(binomlogpdf(n, p, k))
 
@@ -22,3 +22,23 @@ function binomlogpdf(n::T, p::T, k::T) where {T<:Real}
     val = min(0, betalogpdf(m + 1, n - m + 1, p) - log(n + 1))
     return 0 <= k <= n && isinteger(k) ? val : oftype(val, -Inf)
 end
+
+for l in ("", "log"), compl in (false, true)
+    fbinom = Symbol(string("binom", l, ifelse(compl, "c", "" ), "cdf"))
+    fbeta  = Symbol(string("beta" , l, ifelse(compl,  "", "c"), "cdf"))
+    @eval function ($fbinom)(n::Real, p::Real, k::Real)
+        if isnan(k)
+            return last(promote(n, p, k))
+        end
+        res = ($fbeta)(max(0, floor(k) + 1), max(0, n - floor(k)), p)
+
+        # When p == 1 the betaccdf doesn't return the correct result
+        # so these cases have to be special cased
+        if isone(p)
+            newres = oftype(res, $compl ? k < n : k >= n)
+            return $(l === "" ? :newres : :(log(newres)))
+        else
+            return res
+        end
+    end
+end
diff --git a/src/distrs/chisq.jl b/src/distrs/chisq.jl
@@ -1,22 +1,11 @@
 # functions related to chi-square distribution
 
-# R implementations
-# For pdf and logpdf we use the Julia implementation
-using .RFunctions:
-    chisqcdf,
-    chisqccdf,
-    chisqlogcdf,
-    chisqlogccdf,
-    chisqinvcdf,
-    chisqinvccdf,
-    chisqinvlogcdf,
-    chisqinvlogccdf
-
-# Julia implementations
-# promotion ensures that we do forward e.g. `chisqpdf(::Int, ::Float32)` to
-# `gammapdf(::Float32, ::Int, ::Float32)` but not `gammapdf(::Float64, ::Int, ::Float32)`
-chisqpdf(k::Real, x::Real) = chisqpdf(promote(k, x)...)
-chisqpdf(k::T, x::T) where {T<:Real} = gammapdf(k / 2, 2, x)
-
-chisqlogpdf(k::Real, x::Real) = chisqlogpdf(promote(k, x)...)
-chisqlogpdf(k::T, x::T) where {T<:Real} = gammalogpdf(k / 2, 2, x)
+# Just use the Gamma definitions
+for f in ("pdf", "logpdf", "cdf", "ccdf", "logcdf", "logccdf", "invcdf", "invccdf", "invlogcdf", "invlogccdf")
+    _chisqf = Symbol("chisq"*f)
+    _gammaf = Symbol("gamma"*f)
+    @eval begin
+        $(_chisqf)(k::Real, x::Real) = $(_chisqf)(promote(k, x)...)
+        $(_chisqf)(k::T, x::T) where {T<:Real} = $(_gammaf)(k/2, 2, x)
+    end
+end
diff --git a/src/distrs/fdist.jl b/src/distrs/fdist.jl
@@ -1,17 +1,5 @@
 # functions related to F distribution
 
-# R implementations
-# For pdf and logpdf we use the Julia implementation
-using .RFunctions:
-    fdistcdf,
-    fdistccdf,
-    fdistlogcdf,
-    fdistlogccdf,
-    fdistinvcdf,
-    fdistinvccdf,
-    fdistinvlogcdf,
-    fdistinvlogccdf
-
 # Julia implementations
 fdistpdf(ν1::Real, ν2::Real, x::Real) = exp(fdistlogpdf(ν1, ν2, x))
 
@@ -23,3 +11,19 @@ function fdistlogpdf(ν1::T, ν2::T, x::T) where {T<:Real}
     val = (xlogy(ν1, ν1ν2) + xlogy(ν1 - 2, y) - xlogy(ν1 + ν2, 1 + ν1ν2 * y)) / 2 - logbeta(ν1 / 2, ν2 / 2)
     return x < 0 ? oftype(val, -Inf) : val
 end
+
+for f in ("cdf", "ccdf", "logcdf", "logccdf")
+    ff = Symbol("fdist"*f)
+    bf = Symbol("beta"*f)
+    @eval $ff(ν1::T, ν2::T, x::T) where {T<:Real} = $bf(ν1/2, ν2/2, inv(1 + ν2/(ν1*max(0, x))))
+    @eval $ff(ν1::Real, ν2::Real, x::Real) = $ff(promote(ν1, ν2, x)...)
+end
+for f in ("invcdf", "invccdf", "invlogcdf", "invlogccdf")
+    ff = Symbol("fdist"*f)
+    bf = Symbol("beta"*f)
+    @eval function $ff(ν1::T, ν2::T, y::T) where {T<:Real}
+        x = $bf(ν1/2, ν2/2, y)
+        return x/(1 - x)*ν2/ν1
+    end
+    @eval $ff(ν1::Real, ν2::Real, y::Real) = $ff(promote(ν1, ν2, y)...)
+end
diff --git a/src/distrs/gamma.jl b/src/distrs/gamma.jl
@@ -1,14 +1,17 @@
 # functions related to gamma distribution
 
+using HypergeometricFunctions: drummond1F1
+
 # R implementations
-# For pdf and logpdf we use the Julia implementation
 using .RFunctions:
-    gammacdf,
-    gammaccdf,
-    gammalogcdf,
-    gammalogccdf,
-    gammainvcdf,
-    gammainvccdf,
+    # gammapdf,
+    # gammalogpdf,
+    # gammacdf,
+    # gammaccdf,
+    # gammalogcdf,
+    # gammalogccdf,
+    # gammainvcdf,
+    # gammainvccdf,
     gammainvlogcdf,
     gammainvlogccdf
 
@@ -22,3 +25,80 @@ function gammalogpdf(k::T, θ::T, x::T) where {T<:Real}
     val = -loggamma(k) + xlogy(k - 1, xθ) - log(θ) - xθ
     return x < 0 ? oftype(val, -Inf) : val
 end
+
+function gammacdf(k::T, θ::T, x::T) where {T<:Real}
+    # Handle the degenerate case
+    if iszero(k)
+        return float(last(promote(x, x >= 0)))
+    end
+    return first(gamma_inc(k, max(0, x)/θ))
+end
+gammacdf(k::Real, θ::Real, x::Real) = gammacdf(map(float, promote(k, θ, x))...)
+
+function gammaccdf(k::T, θ::T, x::T) where {T<:Real}
+    # Handle the degenerate case
+    if iszero(k)
+        return float(last(promote(x, x < 0)))
+    end
+    return last(gamma_inc(k, max(0, x)/θ))
+end
+gammaccdf(k::Real, θ::Real, x::Real) = gammaccdf(map(float, promote(k, θ, x))...)
+
+gammalogcdf(k::Real, θ::Real, x::Real) = _gammalogcdf(map(float, promote(k, θ, x))...)
+
+# Implemented via the non-log version. For tiny values, we recompute the result with
+# loggamma. In that situation, there is little risk of significant cancellation.
+function _gammalogcdf(k::Float64, θ::Float64, x::Float64)
+    # Handle the degenerate case
+    if iszero(k)
+        return log(x >= 0)
+    end
+
+    xdθ = max(0, x)/θ
+    l, u = gamma_inc(k, xdθ)
+    if l < floatmin(Float64) && isfinite(k) && isfinite(xdθ)
+        return -log(k) + k*log(xdθ) - xdθ + log(drummond1F1(1.0, 1 + k, xdθ)) - loggamma(k)
+    elseif l < 0.7
+        return log(l)
+    else
+        return log1p(-u)
+    end
+end
+function _gammalogcdf(k::T, θ::T, x::T) where {T<:Union{Float16,Float32}}
+    return T(_gammalogcdf(Float64(k), Float64(θ), Float64(x)))
+end
+
+gammalogccdf(k::Real, θ::Real, x::Real) = _gammalogccdf(map(float, promote(k, θ, x))...)
+
+# Implemented via the non-log version. For tiny values, we recompute the result with
+# loggamma. In that situation, there is little risk of significant cancellation.
+function _gammalogccdf(k::Float64, θ::Float64, x::Float64)
+    # Handle the degenerate case
+    if iszero(k)
+        return log(x < 0)
+    end
+
+    xdθ = max(0, x)/θ
+    l, u = gamma_inc(k, xdθ)
+    if u < floatmin(Float64)
+        return loggamma(k, xdθ) - loggamma(k)
+    elseif u < 0.7
+        return log(u)
+    else
+        return log1p(-l)
+    end
+end
+
+function _gammalogccdf(k::T, θ::T, x::T) where {T<:Union{Float16,Float32}}
+    return T(_gammalogccdf(Float64(k), Float64(θ), Float64(x)))
+end
+
+function gammainvcdf(k::Real, θ::Real, p::Real)
+    _k, _θ, _p = promote(k, θ, p)
+    return _θ*gamma_inc_inv(_k, _p, 1 - _p)
+end
+
+function gammainvccdf(k::Real, θ::Real, p::Real)
+    _k, _θ, _p = promote(k, θ, p)
+    return _θ*gamma_inc_inv(_k, 1 - _p, _p)
+end
diff --git a/src/distrs/pois.jl b/src/distrs/pois.jl
@@ -1,12 +1,13 @@
 # functions related to Poisson distribution
 
 # R implementations
-# For pdf and logpdf we use the Julia implementation
 using .RFunctions:
-    poiscdf,
-    poisccdf,
-    poislogcdf,
-    poislogccdf,
+    # poispdf,
+    # poislogpdf,
+    # poiscdf,
+    # poisccdf,
+    # poislogcdf,
+    # poislogccdf,
     poisinvcdf,
     poisinvccdf,
     poisinvlogcdf,
@@ -20,3 +21,12 @@ function poislogpdf(λ::T, x::T) where {T <: Real}
     val = xlogy(x, λ) - λ - loggamma(x + 1)
     return x >= 0 && isinteger(x) ? val : oftype(val, -Inf)
 end
+
+# Just use the Gamma definitions
+poiscdf(λ::Real, x::Real) = gammaccdf(max(0, floor(x + 1)), 1, λ)
+
+poisccdf(λ::Real, x::Real) = gammacdf(max(0, floor(x + 1)), 1, λ)
+
+poislogcdf(λ::Real, x::Real) = gammalogccdf(max(0, floor(x + 1)), 1, λ)
+
+poislogccdf(λ::Real, x::Real) = gammalogcdf(max(0, floor(x + 1)), 1, λ)
diff --git a/src/distrs/tdist.jl b/src/distrs/tdist.jl
diff --git a/test/rmath.jl b/test/rmath.jl

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,3 @@`
`1`		`-__precompile__(true)`
`2`		`-`
`3`	`1`	`module StatsFuns`
`4`	`2`
`5`	`3`	`using Base: Math.@horner`