Constrain precision parameter during optimization (#8)

Also adjust some expressions for improved numerical accuracy. The model coefficients are unbounded but the precision must be positive in order for the solution to be feasible. We can impose such a constraint by setting the precision to the maximum of the updated value at the current step and machine epsilon. This bumps Newton back into the feasible region so that optimization can continue in the right direction. Based on ideas and discussion with Jason Manley and Phillip Alday. Both are included as coauthors of this commit. Fixes #6. Co-authored-by: Jason Manley <[email protected]> Co-authored-by: Phillip Alday <[email protected]>
ararslan · Jun 11, 2023 · 24f9a03 · 24f9a03 · ararslan · Jun 11, 2023
1 parent 861626b
commit 24f9a03
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 26 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "BetaRegression"
 uuid = "2339b9c3-daaf-4eaa-90d5-e8471159c344"
-version = "0.1.2"
+version = "0.1.3"
 
 [deps]
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"

diff --git a/src/BetaRegression.jl b/src/BetaRegression.jl
@@ -317,13 +317,14 @@ function StatsAPI.score(b::BetaRegressionModel)
     ∂θ = zero(params(b))
     Tr = copy(η)
     @inbounds for i in eachindex(y, η)
-        ηᵢ = η[i]
-        μᵢ = linkinv(link, ηᵢ)
         yᵢ = y[i]
-        a = digamma((1 - μᵢ) * ϕ)
-        r = logit(yᵢ) - digamma(μᵢ * ϕ) + a
-        ∂θ[end] += μᵢ * r + log(1 - yᵢ) - a + ψϕ
-        Tr[i] = ϕ * r * mueta(link, ηᵢ)
+        μᵢ, omμᵢ, dμdη = inverselink(link, η[i])
+        ψp = digamma(ϕ * μᵢ)
+        ψq = digamma(ϕ * omμᵢ)
+        Δ = logit(yᵢ) - ψp + ψq   # logit(yᵢ) - 𝔼(logit(yᵢ))
+        z = log1p(-yᵢ) - ψq + ψϕ  # log(1 - yᵢ) - 𝔼(log(1 - yᵢ))
+        ∂θ[end] += fma(μᵢ, Δ, z)
+        Tr[i] = ϕ * Δ * dμdη
     end
     mul!(view(∂θ, 1:size(X, 2)), X', Tr)
     return ∂θ
@@ -333,14 +334,12 @@ end
 # Q for observed information (pg 10). `p = μ * ϕ` and `q = (1 - μ) * ϕ` are the beta
 # distribution parameters in the typical parameterization, `ψ′_` is `trigamma(_)`.
 function weightdiag(link, p, q, ψ′p, ψ′q, ϕ, yᵢ, ηᵢ, dμdη, expected)
-    w = ϕ * (ψ′p + ψ′q)
+    w = abs(ϕ) * (ψ′p + ψ′q)
     if expected
-        return sqrt(w) * dμdη
+        return sqrt(w) * abs(dμdη)
     else
         w *= dμdη^2
-        ystar = logit(yᵢ)
-        μstar = digamma(p) - digamma(q)
-        w += (ystar - μstar) * dmueta(link, ηᵢ)
+        w += (logit(yᵢ) - digamma(p) + digamma(q)) * dmueta(link, ηᵢ)
         return sqrt(w)
     end
 end
@@ -363,15 +362,14 @@ function 🐟(b::BetaRegressionModel, expected::Bool, inverse::Bool)
     γ = zero(ϕ)
     for i in eachindex(y, η, w)
         ηᵢ = η[i]
-        μᵢ = linkinv(link, ηᵢ)
+        μᵢ, omμᵢ, dμdη = inverselink(link, ηᵢ)
         p = μᵢ * ϕ
-        q = (1 - μᵢ) * ϕ
+        q = omμᵢ * ϕ
         ψ′p = trigamma(p)
         ψ′q = trigamma(q)
-        dμdη = mueta(link, ηᵢ)
         w[i] = weightdiag(link, p, q, ψ′p, ψ′q, ϕ, y[i], ηᵢ, dμdη, expected)
         Tc[i] = (ψ′p * p - ψ′q * q) * dμdη
-        γ += ψ′p * μᵢ^2 + ψ′q * (1 - μᵢ)^2 - ψ′ϕ
+        γ += ψ′p * μᵢ^2 + ψ′q * omμᵢ^2 - ψ′ϕ
     end
     Xᵀ = copy(adjoint(X))
     XᵀTc = Xᵀ * Tc
@@ -432,14 +430,16 @@ approximately zero. This is determined by `isapprox` using the specified `atol`
 """
 function StatsAPI.fit!(b::BetaRegressionModel; maxiter=100, atol=1e-8, rtol=1e-8)
     initialize!(b)
-    z = zero(params(b))
+    θ = params(b)
+    z = zero(θ)
     for iter in 1:maxiter
         U = score(b)
         checkfinite(U, iter)
         isapprox(U, z; atol, rtol) && return b  # converged!
         K = 🐟(b, true, true)
         checkfinite(K, iter)
-        mul!(params(b), K, U, true, true)
+        mul!(θ, K, U, true, true)
+        θ[end] = max(θ[end], eps(eltype(θ)))  # impose positivity constraint on ϕ
         linearpredictor!(b)
     end
     throw(ConvergenceException(maxiter))

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,4 +1,5 @@
 using BetaRegression
+using Distributions
 using GLM
 using StatsBase
 using Test
@@ -212,13 +213,32 @@ end
     end
 end
 
-@testset "Resetting an invalid initial precision" begin
+@testset "Parameter constraints" begin
+    # Generating distribution and `rand(_, 10)` for several problematic cases. Without
+    # constraints placed on the parameters during optimization, the models built using
+    # these as their respective responses fail to converge. See issue #6.
+    d = [Beta(0.5, 0.5) => [0.9020980693394288, 0.055577211500829754, 0.23132559790498958,
+                            0.5813942170987118, 0.9709116084487788, 0.7754094004739907,
+                            0.05982031817793439, 0.8670342033149658, 0.683216406088941,
+                            0.141451701046685],
+         Beta(0.2, 0.2) => [0.023848292440454045, 0.9355547503109088, 0.9924242111793663,
+                            0.008946868197901494, 0.04010019793873337, 0.16627955105469605,
+                            0.999828498409377, 0.9999768604670911, 0.9930500996079485,
+                            0.9642281316220574],
+         Beta(0.5, 10) => [0.034541975300900515, 0.01939889173586482, 0.24100845407491417,
+                           0.0011108618208461997, 0.00937646060618697, 0.014267329875521517,
+                           0.04085538895184706, 0.016118136919340345, 0.008924027953777908,
+                           0.0760514673345253],
+         Beta(10, 0.3) => [0.9946023297631961, 0.9680165382504563, 0.999142668249286,
+                           0.9917649725155366, 0.9843468826146887, 0.9999997547187489,
+                           0.9943098787910513, 0.9991354297579114, 0.9521187943069395,
+                           0.9805165186163991]]
     X = ones(10, 1)
-    # Generated via `Beta(0.5, 0.5)`
-    y = [0.9020980693394288, 0.055577211500829754, 0.23132559790498958,
-         0.5813942170987118, 0.9709116084487788, 0.7754094004739907,
-         0.05982031817793439, 0.8670342033149658, 0.683216406088941,
-         0.141451701046685]
-    model = fit(BetaRegressionModel, X, y)
-    @test linkinv(Link(model), only(coef(model))) ≈ mean(y) rtol=0.05
+    @testset "Generated from $dist" for (dist, y) in d
+        model = fit(BetaRegressionModel, X, y)
+        @test linkinv(Link(model), only(coef(model))) ≈ mean(y) rtol=0.05
+        μ̂ = mean(fitted(model))
+        ϕ̂ = precision(model)
+        @test μ̂ * (1 - μ̂) / (1 + ϕ̂) ≈ var(y) rtol=0.5
+    end
 end