From 670d2c7e24e841e80cb0944d50c607471debaa1e Mon Sep 17 00:00:00 2001 From: Jose Storopoli Date: Mon, 26 Dec 2022 11:49:39 -0300 Subject: [PATCH] Move images to Makie (#72) * added .JuliaFormatter.toml * makie 02_bayes_stats.jl * makie 03_prob_dist * format 01_why_Julia * format everything * makie conversion * using stuff multiline * makie 04_Turing * remove plots stuff from Project.toml * animations and makie 05_MCMC * fix setrange in 12_Turing_tricks --- .JuliaFormatter.toml | 1 + Project.toml | 4 +- _literate/01_why_Julia.jl | 18 +- _literate/02_bayes_stats.jl | 54 +-- _literate/03_prob_dist.jl | 209 ++++++------ _literate/04_Turing.jl | 53 +-- _literate/05_MCMC.jl | 539 +++++++++++++++++------------- _literate/06_linear_reg.jl | 6 +- _literate/07_logistic_reg.jl | 24 +- _literate/08_ordinal_reg.jl | 49 +-- _literate/09_count_reg.jl | 39 +-- _literate/10_robust_reg.jl | 71 ++-- _literate/11_multilevel_models.jl | 33 +- _literate/12_Turing_tricks.jl | 62 ++-- _literate/13_epi_models.jl | 77 +++-- 15 files changed, 686 insertions(+), 553 deletions(-) create mode 100644 .JuliaFormatter.toml diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml new file mode 100644 index 00000000..c7439503 --- /dev/null +++ b/.JuliaFormatter.toml @@ -0,0 +1 @@ +style = "blue" \ No newline at end of file diff --git a/Project.toml b/Project.toml index 76801f81..396996c2 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc" +Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" @@ -14,15 +15,12 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Franklin = "713c75ef-9fc9-4b05-94a9-213340da978e" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" -LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" NodeJS = "2bd173c7-0d6d-553b-b6af-13a54713934c" -Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" -StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" [compat] diff --git a/_literate/01_why_Julia.jl b/_literate/01_why_Julia.jl index ada9e33c..6ba42a12 100644 --- a/_literate/01_why_Julia.jl +++ b/_literate/01_why_Julia.jl @@ -76,7 +76,11 @@ # Here is Julia: # ```julia -# using Random, StatsBase, DataFrames, BenchmarkTools, Chain +# using Random +# using StatsBase +# using DataFrames +# using BenchmarkTools +# using Chain # Random.seed!(123) # # n = 10_000 @@ -474,12 +478,16 @@ # to define the action that one type `Pet` takes when it meets another `Pet`: abstract type Pet end -struct Dog <: Pet name::String end -struct Cat <: Pet name::String end +struct Dog <: Pet + name::String +end +struct Cat <: Pet + name::String +end function encounter(a::Pet, b::Pet) verb = meets(a, b) - println("$(a.name) meets $(b.name) and $verb") + return println("$(a.name) meets $(b.name) and $verb") end meets(a::Dog, b::Dog) = "sniffs"; @@ -639,7 +647,7 @@ using BenchmarkTools # with a simple column selection. We do this by defining a new method of the `*` function (multiplier function) # of `Base` Julia. Additionally we also create a new optimized method of `inner()` for dealing with `OneHotVector`: -import Base:* +import Base: * *(A::AbstractMatrix, v::OneHotVector) = A[:, v.ind] inner(v::OneHotVector, A, w::OneHotVector) = A[v.ind, w.ind] diff --git a/_literate/02_bayes_stats.jl b/_literate/02_bayes_stats.jl index 780c58e4..4b2290ad 100644 --- a/_literate/02_bayes_stats.jl +++ b/_literate/02_bayes_stats.jl @@ -416,24 +416,23 @@ # and the 75% percentile of the probability density of $\theta$. In this example, MLE leads to estimated values that are not # consistent with the actual probability density of the value of $\theta$. -using Plots, StatsPlots, Distributions, LaTeXStrings +using CairoMakie +using Distributions d = LogNormal(0, 2) range_d = 0:0.001:4 q25 = quantile(d, 0.25) q75 = quantile(d, 0.75) -plot((range_d, pdf.(d, range_d)), - leg=false, - xlims=(-0.2, 4.2), - lw=3, - xlabel=L"\theta", - ylabel="Density") -scatter!((mode(d), pdf(d, mode(d))), mc=:green, ms=5) -plot!(range(q25, stop=q75, length=100), - x -> pdf(d, x), - lc=false, fc=:blues, - fill=true, fa=0.5) -savefig(joinpath(@OUTPUT, "lognormal.svg")); # hide +credint = range(q25; stop=q75, length=100) +f, ax, l = lines( + range_d, + pdf.(d, range_d); + linewidth=3, + axis=(; limits=(-0.2, 4.2, nothing, nothing), xlabel=L"\theta", ylabel="Density"), +) +scatter!(ax, mode(d), pdf(d, mode(d)); color=:green, markersize=12) +band!(ax, credint, 0.0, pdf.(d, credint); color=(:steelblue, 0.5)) +save(joinpath(@OUTPUT, "lognormal.svg"), f); # hide # \fig{lognormal} # \center{_**Log-Normal**: Maximum Likelihood Estimate vs Credible Intervals_} \\ @@ -454,19 +453,22 @@ range_d = -2:0.01:14 sim_d = rand(d, 10_000) q25 = quantile(sim_d, 0.25) q75 = quantile(sim_d, 0.75) -plot((range_d, pdf.(d, range_d)), - leg=false, - xlims=(-2, 14), - xticks=[0, 5, 10], - lw=3, - xlabel=L"\theta", - ylabel="Density") -scatter!((mode(d2), pdf(d, mode(d2))), mc=:green, ms=5) -plot!(range(q25, stop=q75, length=100), - x -> pdf(d, x), - lc=false, fc=:blues, - fill=true, fa=0.5) -savefig(joinpath(@OUTPUT, "mixture.svg")); # hide +credint = range(q25; stop=q75, length=100) + +f, ax, l = lines( + range_d, + pdf.(d, range_d); + linewidth=3, + axis=(; + limits=(-2, 14, nothing, nothing), + xticks=[0, 5, 10], + xlabel=L"\theta", + ylabel="Density", + ), +) +scatter!(ax, mode(d2), pdf(d, mode(d2)); color=:green, markersize=12) +band!(ax, credint, 0.0, pdf.(d, credint); color=(:steelblue, 0.5)) +save(joinpath(@OUTPUT, "mixture.svg"), f); # hide # \fig{mixture} # \center{_**Mixture**: Maximum Likelihood Estimate vs Credible Intervals_} \\ diff --git a/_literate/03_prob_dist.jl b/_literate/03_prob_dist.jl index 264d8ba3..8fd815f3 100644 --- a/_literate/03_prob_dist.jl +++ b/_literate/03_prob_dist.jl @@ -41,16 +41,20 @@ # Example: a 6-sided dice. -using Plots, StatsPlots, Distributions, LaTeXStrings +using CairoMakie +using Distributions -plot(DiscreteUniform(1, 6), - label="6-sided Dice", - markershape=:circle, +f, ax, b = barplot( + DiscreteUniform(1, 6); + axis=(; + title="6-sided Dice", xlabel=L"\theta", ylabel="Mass", - ylims=(0, 0.3) - ) -savefig(joinpath(@OUTPUT, "discrete_uniform.svg")); # hide + xticks=1:6, + limits=(nothing, nothing, 0, 0.3), + ), +) +save(joinpath(@OUTPUT, "discrete_uniform.svg"), f); # hide # \fig{discrete_uniform} # \center{*Discrete Uniform between 1 and 6*} \\ @@ -68,19 +72,23 @@ savefig(joinpath(@OUTPUT, "discrete_uniform.svg")); # hide # Example: Whether the patient survived or died or whether the customer completes their purchase or not. -plot(Bernoulli(0.5), - markershape=:circle, - label=L"p=0.5", - alpha=0.5, +f, ax1, b = barplot( + Bernoulli(0.5); + width=0.3, + axis=(; + title=L"p=0.5", xlabel=L"\theta", ylabel="Mass", - ylim=(0, 1) - ) -plot!(Bernoulli(0.2), - markershape=:circle, - label=L"p=0.2", - alpha=0.5) -savefig(joinpath(@OUTPUT, "bernoulli.svg")); # hide + xticks=0:1, + limits=(nothing, nothing, 0, 1), + ), +) +ax2 = Axis( + f[1, 2]; title=L"p=0.2", xlabel=L"\theta", xticks=0:1, limits=(nothing, nothing, 0, 1) +) +barplot!(ax2, Bernoulli(0.2); width=0.3) +linkaxes!(ax1, ax2) +save(joinpath(@OUTPUT, "bernoulli.svg"), f); # hide # \fig{bernoulli} # \center{*Bernoulli with $p = \{ 0.5, 0.2 \}$*} \\ @@ -98,19 +106,13 @@ savefig(joinpath(@OUTPUT, "bernoulli.svg")); # hide # Example: number of heads in 5 coin flips. -plot(Binomial(5, 0.5), - markershape=:circle, - label=L"p=0.5", - alpha=0.5, - xlabel=L"\theta", - ylabel="Mass" - ) -plot!(Binomial(5, 0.2), - markershape=:circle, - label=L"p=0.2", - alpha=0.5) -savefig(joinpath(@OUTPUT, "binomial.svg")); # hide - +f, ax1, b = barplot( + Binomial(5, 0.5); axis=(; title=L"p=0.5", xlabel=L"\theta", ylabel="Mass") +) +ax2 = Axis(f[1, 2]; title=L"p=0.2", xlabel=L"\theta") +barplot!(ax2, Binomial(5, 0.2)) +linkaxes!(ax1, ax2) +save(joinpath(@OUTPUT, "binomial.svg"), f); # hide # \fig{binomial} # \center{*Binomial with $n=5$ and $p = \{ 0.5, 0.2 \}$*} \\ @@ -126,27 +128,23 @@ savefig(joinpath(@OUTPUT, "binomial.svg")); # hide # Example: Number of emails you receive daily. Number of holes you find on the street. -plot(Poisson(1), - markershape=:circle, - label=L"\lambda=1", - alpha=0.5, - xlabel=L"\theta", - ylabel="Mass" - ) -plot!(Poisson(4), - markershape=:circle, - label=L"\lambda=4", - alpha=0.5) -savefig(joinpath(@OUTPUT, "poisson.svg")); # hide +f, ax1, b = barplot( + Poisson(1); axis=(; title=L"\lambda=1", xlabel=L"\theta", ylabel="Mass") +) +ax2 = Axis(f[1, 2]; title=L"\lambda=4", xlabel=L"\theta") +barplot!(ax2, Poisson(4)) +linkaxes!(ax1, ax2) +save(joinpath(@OUTPUT, "poisson.svg"), f); # hide # \fig{poisson} # \center{*Poisson with $\lambda = \{ 1, 4 \}$*} \\ # ### Negative Binomial -# The negative binomial distribution describes an event of the number of successes in a sequence of $n$ independent experiment(s), -# each asking a yes-no question with probability $p$ until $k$ success(es) is obtained. Note that it becomes identical to the -# Poisson distribution at the limit of $k \to \infty$. This makes the negative binomial a robust option to replace a Poisson +# The negative binomial distribution describes an event of the number of failures before the $k$th success in a sequence of $n$ independent experiment(s), +# each asking a yes-no question with probability $p$. +# Note that it becomes identical to the Poisson distribution at the limit of $k \to \infty$. +# This makes the negative binomial a robust option to replace a Poisson # distribution to model phenomena with a overdispersion* (excess expected variation in data). # The negative binomial distribution has two parameters and its notation is $\text{NB} (k, p)$ or $\text{Negative-Binomial} (k, p)$: @@ -159,18 +157,13 @@ savefig(joinpath(@OUTPUT, "poisson.svg")); # hide # Example: Annual count of tropical cyclones. -plot(NegativeBinomial(1, 0.5), - markershape=:circle, - label=L"k=1", - alpha=0.5, - xlabel=L"\theta", - ylabel="Mass" - ) -plot!(NegativeBinomial(2, 0.5), - markershape=:circle, - label=L"k=2", - alpha=0.5) -savefig(joinpath(@OUTPUT, "negbinomial.svg")); # hide +f, ax1, b = barplot( + NegativeBinomial(1, 0.5); axis=(; title=L"k=1", xlabel=L"\theta", ylabel="Mass") +) +ax2 = Axis(f[1, 2]; title=L"k=2", xlabel=L"\theta") +barplot!(ax2, NegativeBinomial(2, 0.5)) +linkaxes!(ax1, ax2) +save(joinpath(@OUTPUT, "negbinomial.svg"), f); # hide # \fig{negbinomial} # \center{*Negative Binomial with $p=0.5$ and $r = \{ 1, 2 \}$*} \\ @@ -202,16 +195,16 @@ savefig(joinpath(@OUTPUT, "negbinomial.svg")); # hide # Example: Height, Weight, etc. -plot(Normal(0, 1), - label=L"\sigma=1", - lw=5, - xlabel=L"\theta", - ylabel="Density", - xlims=(-4, 4) - ) -plot!(Normal(0, 0.5), label=L"\sigma=0.5", lw=5) -plot!(Normal(0, 2), label=L"\sigma=2", lw=5) -savefig(joinpath(@OUTPUT, "normal.svg")); # hide +f, ax, l = lines( + Normal(0, 1); + label=L"\sigma=1", + linewidth=5, + axis=(; xlabel=L"\theta", ylabel="Density", limits=(-4, 4, nothing, nothing)), +) +lines!(ax, Normal(0, 0.5); label=L"\sigma=0.5", linewidth=5) +lines!(ax, Normal(0, 2); label=L"\sigma=2", linewidth=5) +axislegend(ax) +save(joinpath(@OUTPUT, "normal.svg"), f); # hide # \fig{normal} # \center{*Normal with $\mu=0$ and $\sigma = \{ 1, 0.5, 2 \}$*} \\ @@ -234,16 +227,16 @@ savefig(joinpath(@OUTPUT, "normal.svg")); # hide # * Mean ($\mu$): natural logarithm of the mean the distribution # * Standard Deviation ($\sigma$): natural logarithm of the variance of the distribution ($\sigma^2$) is a measure of the dispersion of the observations in relation to the mean -plot(LogNormal(0, 1), - label=L"\sigma=1", - lw=5, - xlabel=L"\theta", - ylabel="Density", - xlims=(0, 3) - ) -plot!(LogNormal(0, 0.25), label=L"\sigma=0.25", lw=5) -plot!(LogNormal(0, 0.5), label=L"\sigma=0.5", lw=5) -savefig(joinpath(@OUTPUT, "lognormal.svg")); # hide +f, ax, l = lines( + LogNormal(0, 1); + label=L"\sigma=1", + linewidth=5, + axis=(; xlabel=L"\theta", ylabel="Density", limits=(0, 3, nothing, nothing)), +) +lines!(ax, LogNormal(0, 0.25); label=L"\sigma=0.25", linewidth=5) +lines!(ax, LogNormal(0, 0.5); label=L"\sigma=0.5", linewidth=5) +axislegend(ax) +save(joinpath(@OUTPUT, "lognormal.svg"), f); # hide # \fig{lognormal} # \center{*Log-Normal with $\mu=0$ and $\sigma = \{ 1, 0.25, 0.5 \}$*} \\ @@ -259,16 +252,16 @@ savefig(joinpath(@OUTPUT, "lognormal.svg")); # hide # Example: How long until the next earthquake. How long until the next bus arrives. -plot(Exponential(1), - label=L"\lambda=1", - lw=5, - xlabel=L"\theta", - ylabel="Density", - xlims=(0, 4.5) - ) -plot!(Exponential(0.5), label=L"\lambda=0.5", lw=5) -plot!(Exponential(1.5), label=L"\lambda=2", lw=5) -savefig(joinpath(@OUTPUT, "exponential.svg")); # hide +f, ax, l = lines( + Exponential(1); + label=L"\lambda=1", + linewidth=5, + axis=(; xlabel=L"\theta", ylabel="Density", limits=(0, 4.5, nothing, nothing)), +) +lines!(ax, Exponential(0.5); label=L"\lambda=0.5", linewidth=5) +lines!(ax, Exponential(1.5); label=L"\lambda=2", linewidth=5) +axislegend(ax) +save(joinpath(@OUTPUT, "exponential.svg"), f); # hide # \fig{exponential} # \center{*Exponential with $\lambda = \{ 1, 0.5, 1.5 \}$*} \\ @@ -292,16 +285,16 @@ savefig(joinpath(@OUTPUT, "exponential.svg")); # hide # Example: A database full of outliers. -plot(TDist(2), - label=L"\nu=2", - lw=5, - xlabel=L"\theta", - ylabel="Density", - xlims=(-4, 4) - ) -plot!(TDist(8), label=L"\nu=8", lw=5) -plot!(TDist(30), label=L"\nu=30", lw=5) -savefig(joinpath(@OUTPUT, "tdist.svg")); # hide +f, ax, l = lines( + TDist(2); + label=L"\nu=2", + linewidth=5, + axis=(; xlabel=L"\theta", ylabel="Density", limits=(-4, 4, nothing, nothing)), +) +lines!(ax, TDist(8); label=L"\nu=8", linewidth=5) +lines!(ax, TDist(30); label=L"\nu=30", linewidth=5) +axislegend(ax) +save(joinpath(@OUTPUT, "tdist.svg"), f); # hide # \fig{tdist} # \center{*Student-$t$ with $\nu = \{ 2, 8, 30 \}$*} \\ @@ -319,16 +312,16 @@ savefig(joinpath(@OUTPUT, "tdist.svg")); # hide # Example: A basketball player has made already scored 5 free throws while missing 3 in a total of 8 attempts # -- $\text{Beta}(3, 5)$. -plot(Beta(1, 1), - label=L"a=b=1", - lw=5, - xlabel=L"\theta", - ylabel="Density", - xlims=(0, 1) - ) -plot!(Beta(3, 2), label=L"a=3, b=2", lw=5) -plot!(Beta(2, 3), label=L"a=2, b=3", lw=5) -savefig(joinpath(@OUTPUT, "beta.svg")); # hide +f, ax, l = lines( + Beta(1, 1); + label=L"a=b=1", + linewidth=5, + axis=(; xlabel=L"\theta", ylabel="Density", limits=(0, 1, nothing, nothing)), +) +lines!(ax, Beta(3, 2); label=L"a=3, b=2", linewidth=5) +lines!(ax, Beta(2, 3); label=L"a=2, b=3", linewidth=5) +axislegend(ax) +save(joinpath(@OUTPUT, "beta.svg"), f); # hide # \fig{beta} # \center{*Beta with different values of $a$ and $b$*} \\ diff --git a/_literate/04_Turing.jl b/_literate/04_Turing.jl index 65319d61..34fb4f12 100644 --- a/_literate/04_Turing.jl +++ b/_literate/04_Turing.jl @@ -84,19 +84,18 @@ # Graphically this means: -using Plots, StatsPlots, Distributions, LaTeXStrings +using CairoMakie +using Distributions dice = DiscreteUniform(1, 6) -plot(dice, +f, ax, b = barplot( + dice; label="six-sided Dice", - markershape=:circle, - ms=5, - xlabel=L"\theta", - ylabel="Mass", - ylims=(0, 0.3) + axis=(; xlabel=L"\theta", ylabel="Mass", xticks=1:6, limits=(nothing, nothing, 0, 0.3)), ) -vline!([mean(dice)], lw=5, col=:red, label=L"E(\theta)") -savefig(joinpath(@OUTPUT, "dice.svg")); # hide +vlines!(ax, [mean(dice)]; linewidth=5, color=:red, label=L"E(\theta)") +axislegend(ax) +save(joinpath(@OUTPUT, "dice.svg"), f); # hide # \fig{dice} # \center{*A "fair" six-sided Dice: Discrete Uniform between 1 and 6*} \\ @@ -148,15 +147,15 @@ using Random Random.seed!(123); -data = rand(DiscreteUniform(1, 6), 1_000); +my_data = rand(DiscreteUniform(1, 6), 1_000); -# The vector `data` is a 1,000-length vector of `Int`s ranging from 1 to 6, just like how a regular six-sided dice outcome would be: +# The vector `my_data` is a 1,000-length vector of `Int`s ranging from 1 to 6, just like how a regular six-sided dice outcome would be: -first(data, 5) +first(my_data, 5) -# Once the model is specified we instantiate the model with the single parameter `y` as the simulated `data`: +# Once the model is specified we instantiate the model with the single parameter `y` as the simulated `my_data`: -model = dice_throw(data); +model = dice_throw(my_data); # Next, we call Turing's `sample()` function that takes a Turing model as a first argument, along with a # sampler as the second argument, and the third argument is the number of iterations. Here, I will use the `NUTS()` sampler from @@ -207,10 +206,22 @@ sum([idx * i for (i, idx) in enumerate(summaries[:, :mean])]) typeof(chain) -# We can use plotting capabilities of `MCMCChains.jl` with any `Chains` object: - -plot(chain) -savefig(joinpath(@OUTPUT, "chain.svg")); # hide +# Since `Chains` is a [`Tables.jl`-compatible](https://github.com/JuliaData/Tables.jl/blob/main/INTEGRATIONS.md) data structure, +# we can use all of the plotting capabilities from [`AlgebraOfGraphics.jl`](https://aog.makie.org/stable/). + +using AlgebraOfGraphics +using AlgebraOfGraphics: density +#exclude additional information such as log probability +params = names(chain, :parameters) +chain_mapping = + mapping(params .=> "sample value") * + mapping(; color=:chain => nonnumeric, row=dims(1) => renamer(params)) +plt1 = data(chain) * mapping(:iteration) * chain_mapping * visual(Lines) +plt2 = data(chain) * chain_mapping * density() +f = Figure(; resolution=(800, 600)) +draw!(f[1, 1], plt1) +draw!(f[1, 2], plt2; axis=(; ylabel="density")) +save(joinpath(@OUTPUT, "chain.svg"), f); # hide # \fig{chain} # \center{*Visualization of a MCMC Chain simulation*} \\ @@ -244,12 +255,12 @@ prior_chain = sample(model, Prior(), 2_000); # To draw from the prior and posterior predictive distributions we instantiate a "predictive model", *i.e.* a Turing model but with the observations set to `missing`, and then calling `predict()` on the predictive model and the previously drawn samples. # First let's do the *prior* predictive check: -missing_data = similar(data, Missing) # vector of `missing` +missing_data = similar(my_data, Missing) # vector of `missing` model_missing = dice_throw(missing_data) # instantiate the "predictive model prior_check = predict(model_missing, prior_chain); -# Here we are creating a `missing_data` object which is a `Vector` of the same length as the `data` and populated with type `missing` as values. -# We then instantiate a new `dice_throw` model with the `missing_data` vector as the `data` argument. +# Here we are creating a `missing_data` object which is a `Vector` of the same length as the `my_data` and populated with type `missing` as values. +# We then instantiate a new `dice_throw` model with the `missing_data` vector as the `y` argument. # Finally, we call `predict()` on the predictive model and the previously drawn samples, which in our case are the samples from the prior distribution (`prior_chain`). # Note that `predict()` returns a `Chains` object from `MCMCChains.jl`: diff --git a/_literate/05_MCMC.jl b/_literate/05_MCMC.jl index 8e851be7..d62f7944 100644 --- a/_literate/05_MCMC.jl +++ b/_literate/05_MCMC.jl @@ -118,7 +118,9 @@ # I will do some simulations to illustrate MCMC algorithms and techniques. So, here's the initial setup: -using Plots, StatsPlots, Distributions, LaTeXStrings, Random +using CairoMakie +using Distributions +using Random Random.seed!(123); @@ -181,8 +183,6 @@ const Σ = [1 0.8; 0.8 1] const mvnormal = MvNormal(μ, Σ) -data = rand(mvnormal, N)'; - # In the figure below it is possible to see a contour plot of the PDF of a multivariate normal distribution composed of two normal # variables $X$ and $Y$, both with mean 0 and standard deviation 1. # The correlation between $X$ and $Y$ is $\rho = 0.8$: @@ -190,16 +190,22 @@ data = rand(mvnormal, N)'; x = -3:0.01:3 y = -3:0.01:3 dens_mvnormal = [pdf(mvnormal, [i, j]) for i in x, j in y] -contour(x, y, dens_mvnormal, xlabel=L"X", ylabel=L"Y", fill=true) -savefig(joinpath(@OUTPUT, "countour_mvnormal.svg")); # hide +f, ax, c = contourf(x, y, dens_mvnormal; axis=(; xlabel=L"X", ylabel=L"Y")) +Colorbar(f[1, 2], c) +save(joinpath(@OUTPUT, "countour_mvnormal.svg"), f); # hide # \fig{countour_mvnormal} # \center{*Countour Plot of the PDF of a Multivariate Normal Distribution*} \\ # Also a surface plot can be seen below for you to get a 3-D intuition of what is going on: -surface(x, y, dens_mvnormal, xlabel=L"X", ylabel=L"Y", zlabel="PDF") -savefig(joinpath(@OUTPUT, "surface_mvnormal.svg")); # hide +f, ax, s = surface( + x, + y, + dens_mvnormal; + axis=(type=Axis3, xlabel=L"X", ylabel=L"Y", zlabel="PDF", azimuth=pi / 8), +) +save(joinpath(@OUTPUT, "surface_mvnormal.svg"), f); # hide # \fig{surface_mvnormal} # \center{*Surface Plot of the PDF of a Multivariate Normal Distribution*} \\ @@ -339,11 +345,18 @@ savefig(joinpath(@OUTPUT, "surface_mvnormal.svg")); # hide # Here is a simple implementation in Julia: -function metropolis(S::Int64, width::Float64, ρ::Float64; - μ_x::Float64=0.0, μ_y::Float64=0.0, - σ_x::Float64=1.0, σ_y::Float64=1.0, - start_x=-2.5, start_y=2.5, - seed=123) +function metropolis( + S::Int64, + width::Float64, + ρ::Float64; + μ_x::Float64=0.0, + μ_y::Float64=0.0, + σ_x::Float64=1.0, + σ_y::Float64=1.0, + start_x=-2.5, + start_y=2.5, + seed=123, +) rgn = MersenneTwister(seed) binormal = MvNormal([μ_x; μ_y], [σ_x ρ; ρ σ_y]) draws = Matrix{Float64}(undef, S, 2) @@ -413,26 +426,65 @@ mean(summarystats(chain_met)[:, :ess]) / S # The animation in figure below shows the first 100 simulations of the Metropolis algorithm used to generate `X_met`. # Note that in several iterations the proposal is rejected and the algorithm samples the parameters $\theta_1$ and $\theta_2$ -# from the previous state (which becomes the current one, since the proposal is refused). The blue-filled ellipsis represents +# from the previous state (which becomes the current one, since the proposal is refused). The blue ellipsis represents # the 90% HPD of our toy example's bivariate normal distribution. # Note: `HPD` stands for *Highest Probability Density* (which in our case the posterior's 90% probability range). -plt = covellipse(μ, Σ, - n_std=1.64, # 5% - 95% quantiles - xlims=(-3, 3), ylims=(-3, 3), - alpha=0.3, - c=:steelblue, - label="90% HPD", - xlabel=L"\theta_1", ylabel=L"\theta_2") +using LinearAlgebra: eigvals, eigvecs +#source: https://discourse.julialang.org/t/plot-ellipse-in-makie/82814/4 +function getellipsepoints(cx, cy, rx, ry, θ) + t = range(0, 2 * pi; length=100) + ellipse_x_r = @. rx * cos(t) + ellipse_y_r = @. ry * sin(t) + R = [cos(θ) sin(θ); -sin(θ) cos(θ)] + r_ellipse = [ellipse_x_r ellipse_y_r] * R + x = @. cx + r_ellipse[:, 1] + y = @. cy + r_ellipse[:, 2] + return (x, y) +end +function getellipsepoints(μ, Σ; confidence=0.95) + quant = sqrt(quantile(Chisq(2), confidence)) + cx = μ[1] + cy = μ[2] + + egvs = eigvals(Σ) + if egvs[1] > egvs[2] + idxmax = 1 + largestegv = egvs[1] + smallesttegv = egvs[2] + else + idxmax = 2 + largestegv = egvs[2] + smallesttegv = egvs[1] + end + + rx = quant * sqrt(largestegv) + ry = quant * sqrt(smallesttegv) -met_anim = @animate for i in 1:100 - scatter!(plt, (X_met[i, 1], X_met[i, 2]), - label=false, mc=:red, ma=0.5) - plot!(X_met[i:i+1, 1], X_met[i:i+1, 2], seriestype=:path, - lc=:green, la=0.5, label=false) + eigvecmax = eigvecs(Σ)[:, idxmax] + θ = atan(eigvecmax[2] / eigvecmax[1]) + if θ < 0 + θ += 2 * π + end + + return getellipsepoints(cx, cy, rx, ry, θ) +end + +f, ax, l = lines( + getellipsepoints(μ, Σ; confidence=0.9)...; + label="90% HPD", + linewidth=2, + axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"), +) +axislegend(ax) +record(f, joinpath(@OUTPUT, "met_anim.gif"); framerate=5) do frame + for i in 1:100 + scatter!(ax, (X_met[i, 1], X_met[i, 2]); color=(:red, 0.5)) + linesegments!(X_met[i:(i + 1), 1], X_met[i:(i + 1), 2]; color=(:green, 0.5)) + recordframe!(frame) + end end -gif(met_anim, joinpath(@OUTPUT, "met_anim.gif"), fps=5); # hide # \fig{met_anim} # \center{*Animation of the First 100 Samples Generated from the Metropolis Algorithm*} \\ @@ -441,38 +493,35 @@ gif(met_anim, joinpath(@OUTPUT, "met_anim.gif"), fps=5); # hide const warmup = 1_000 -scatter((X_met[warmup:warmup+1_000, 1], X_met[warmup:warmup+1_000, 2]), - label=false, mc=:red, ma=0.3, - xlims=(-3, 3), ylims=(-3, 3), - xlabel=L"\theta_1", ylabel=L"\theta_2") - -covellipse!(μ, Σ, - n_std=1.64, # 5% - 95% quantiles - xlims=(-3, 3), ylims=(-3, 3), - alpha=0.5, - c=:steelblue, - label="90% HPD") - - -savefig(joinpath(@OUTPUT, "met_first1000.svg")); # hide +f, ax, l = lines( + getellipsepoints(μ, Σ; confidence=0.9)...; + label="90% HPD", + linewidth=2, + axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"), +) +axislegend(ax) +scatter!( + ax, + X_met[warmup:(warmup + 1_000), 1], + X_met[warmup:(warmup + 1_000), 2]; + color=(:red, 0.3), +) +save(joinpath(@OUTPUT, "met_first1000.svg"), f); # hide # \fig{met_first1000} # \center{*First 1,000 Samples Generated from the Metropolis Algorithm after warm-up*} \\ # And, finally, lets take a look in the all 9,000 samples generated after the warm-up of 1,000 iterations. -scatter((X_met[warmup:end, 1], X_met[warmup:end, 2]), - label=false, mc=:red, ma=0.3, - xlims=(-3, 3), ylims=(-3, 3), - xlabel=L"\theta_1", ylabel=L"\theta_2") - -covellipse!(μ, Σ, - n_std=1.64, # 5% - 95% quantiles - xlims=(-3, 3), ylims=(-3, 3), - alpha=0.5, - c=:steelblue, - label="90% HPD") -savefig(joinpath(@OUTPUT, "met_all.svg")); # hide +f, ax, l = lines( + getellipsepoints(μ, Σ; confidence=0.9)...; + label="90% HPD", + linewidth=2, + axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"), +) +axislegend(ax) +scatter!(ax, X_met[warmup:end, 1], X_met[warmup:end, 2]; color=(:red, 0.3)) +save(joinpath(@OUTPUT, "met_all.svg"), f); # hide # \fig{met_all} # \center{*All 9,000 Samples Generated from the Metropolis Algorithm after warm-up*} \\ @@ -558,11 +607,17 @@ savefig(joinpath(@OUTPUT, "met_all.svg")); # hide # \end{aligned} # $$ -function gibbs(S::Int64, ρ::Float64; - μ_x::Float64=0.0, μ_y::Float64=0.0, - σ_x::Float64=1.0, σ_y::Float64=1.0, - start_x=-2.5, start_y=2.5, - seed=123) +function gibbs( + S::Int64, + ρ::Float64; + μ_x::Float64=0.0, + μ_y::Float64=0.0, + σ_x::Float64=1.0, + σ_y::Float64=1.0, + start_x=-2.5, + start_y=2.5, + seed=123, +) rgn = MersenneTwister(seed) binormal = MvNormal([μ_x; μ_y], [σ_x ρ; ρ σ_y]) draws = Matrix{Float64}(undef, S, 2) @@ -627,61 +682,57 @@ summarystats(chain_gibbs) # The animation in figure below shows the first 100 simulations of the Gibbs algorithm used to generate `X_gibbs`. # Note that all proposals are accepted now, so the at each iteration we sample new parameters values. -# The blue-filled ellipsis represents the 90% HPD of our toy example's bivariate normal distribution. +# The blue ellipsis represents the 90% HPD of our toy example's bivariate normal distribution. -plt = covellipse(μ, Σ, - n_std=1.64, # 5% - 95% quantiles - xlims=(-3, 3), ylims=(-3, 3), - alpha=0.3, - c=:steelblue, +f, ax, l = lines( + getellipsepoints(μ, Σ; confidence=0.9)...; label="90% HPD", - xlabel=L"\theta_1", ylabel=L"\theta_2") - -gibbs_anim = @animate for i in 1:200 - scatter!(plt, (X_gibbs[i, 1], X_gibbs[i, 2]), - label=false, mc=:red, ma=0.5) - plot!(X_gibbs[i:i+1, 1], X_gibbs[i:i+1, 2], seriestype=:path, - lc=:green, la=0.5, label=false) + linewidth=2, + axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"), +) +axislegend(ax) +record(f, joinpath(@OUTPUT, "gibbs_anim.gif"); framerate=5) do frame + for i in 1:200 + scatter!(ax, (X_gibbs[i, 1], X_gibbs[i, 2]); color=(:red, 0.5)) + linesegments!(X_gibbs[i:(i + 1), 1], X_gibbs[i:(i + 1), 2]; color=(:green, 0.5)) + recordframe!(frame) + end end -gif(gibbs_anim, joinpath(@OUTPUT, "gibbs_anim.gif"), fps=5); # hide # \fig{gibbs_anim} # \center{*Animation of the First 100 Samples Generated from the Gibbs Algorithm*} \\ # Now let's take a look how the first 1,000 simulations were, excluding 1,000 initial iterations as warm-up. -scatter((X_gibbs[2*warmup:2*warmup+1_000, 1], X_gibbs[2*warmup:2*warmup+1_000, 2]), - label=false, mc=:red, ma=0.3, - xlims=(-3, 3), ylims=(-3, 3), - xlabel=L"\theta_1", ylabel=L"\theta_2") - -covellipse!(μ, Σ, - n_std=1.64, # 5% - 95% quantiles - xlims=(-3, 3), ylims=(-3, 3), - alpha=0.5, - c=:steelblue, - label="90% HPD") - - -savefig(joinpath(@OUTPUT, "gibbs_first1000.svg")); # hide +f, ax, l = lines( + getellipsepoints(μ, Σ; confidence=0.9)...; + label="90% HPD", + linewidth=2, + axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"), +) +axislegend(ax) +scatter!( + ax, + X_gibbs[(2 * warmup):(2 * warmup + 1_000), 1], + X_gibbs[(2 * warmup):(2 * warmup + 1_000), 2]; + color=(:red, 0.3), +) +save(joinpath(@OUTPUT, "gibbs_first1000.svg"), f); # hide # \fig{gibbs_first1000} # \center{*First 1,000 Samples Generated from the Gibbs Algorithm after warm-up*} \\ # And, finally, lets take a look in the all 9,000 samples generated after the warm-up of 1,000 iterations. -scatter((X_gibbs[2*warmup:end, 1], X_gibbs[2*warmup:end, 2]), - label=false, mc=:red, ma=0.3, - xlims=(-3, 3), ylims=(-3, 3), - xlabel=L"\theta_1", ylabel=L"\theta_2") - -covellipse!(μ, Σ, - n_std=1.64, # 5% - 95% quantiles - xlims=(-3, 3), ylims=(-3, 3), - alpha=0.5, - c=:steelblue, - label="90% HPD") -savefig(joinpath(@OUTPUT, "gibbs_all.svg")); # hide +f, ax, l = lines( + getellipsepoints(μ, Σ; confidence=0.9)...; + label="90% HPD", + linewidth=2, + axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"), +) +axislegend(ax) +scatter!(ax, X_gibbs[(2 * warmup):end, 1], X_gibbs[(2 * warmup):end, 2]; color=(:red, 0.3)) +save(joinpath(@OUTPUT, "gibbs_all.svg"), f); # hide # \fig{gibbs_all} # \center{*All 9,000 Samples Generated from the Gibbs Algorithm after warm-up*} \\ @@ -697,7 +748,7 @@ savefig(joinpath(@OUTPUT, "gibbs_all.svg")); # hide # First, let's defined 4 different pairs of starting points using a nice Cartesian product # from Julia's `Base.Iterators`: -const starts = Iterators.product((-2.5, 2.5), (2.5, -2.5)) |> collect +const starts = collect(Iterators.product((-2.5, 2.5), (2.5, -2.5))) # Also, I will restrict this simulation to 100 samples: @@ -705,10 +756,18 @@ const S_parallel = 100; # Additionally, note that we are using different `seed`s: -X_met_1 = metropolis(S_parallel, width, ρ, seed=124, start_x=first(starts[1]), start_y=last(starts[1])); -X_met_2 = metropolis(S_parallel, width, ρ, seed=125, start_x=first(starts[2]), start_y=last(starts[2])); -X_met_3 = metropolis(S_parallel, width, ρ, seed=126, start_x=first(starts[3]), start_y=last(starts[3])); -X_met_4 = metropolis(S_parallel, width, ρ, seed=127, start_x=first(starts[4]), start_y=last(starts[4])); +X_met_1 = metropolis( + S_parallel, width, ρ; seed=124, start_x=first(starts[1]), start_y=last(starts[1]) +); +X_met_2 = metropolis( + S_parallel, width, ρ; seed=125, start_x=first(starts[2]), start_y=last(starts[2]) +); +X_met_3 = metropolis( + S_parallel, width, ρ; seed=126, start_x=first(starts[3]), start_y=last(starts[3]) +); +X_met_4 = metropolis( + S_parallel, width, ρ; seed=127, start_x=first(starts[4]), start_y=last(starts[4]) +); # There have been some significant changes in the approval rate for Metropolis proposals. All were around 13%-24%, # this is due to the low number of samples (only 100 for each Markov chain), if the samples were larger we would see @@ -718,80 +777,86 @@ X_met_4 = metropolis(S_parallel, width, ρ, seed=127, start_x=first(starts[4]), # Now let's take a look on how those 4 Metropolis Markov chains sample the parameter space starting from different positions. # Each chain will have its own marker and path color, so that we can see their different behavior: -plt = covellipse(μ, Σ, - n_std=1.64, # 5% - 95% quantiles - xlims=(-3, 3), ylims=(-3, 3), - alpha=0.3, - c=:grey, - label="90% HPD", - xlabel=L"\theta_1", ylabel=L"\theta_2") - +using Colors const logocolors = Colors.JULIA_LOGO_COLORS; - -parallel_met = Animation() -for i in 1:99 - scatter!(plt, (X_met_1[i, 1], X_met_1[i, 2]), - label=false, mc=logocolors.blue, ma=0.5) - plot!(X_met_1[i:i+1, 1], X_met_1[i:i+1, 2], seriestype=:path, - lc=logocolors.blue, la=0.5, label=false) - scatter!(plt, (X_met_2[i, 1], X_met_2[i, 2]), - label=false, mc=logocolors.red, ma=0.5) - plot!(X_met_2[i:i+1, 1], X_met_2[i:i+1, 2], seriestype=:path, - lc=logocolors.red, la=0.5, label=false) - scatter!(plt, (X_met_3[i, 1], X_met_3[i, 2]), - label=false, mc=logocolors.green, ma=0.5) - plot!(X_met_3[i:i+1, 1], X_met_3[i:i+1, 2], seriestype=:path, - lc=logocolors.green, la=0.5, label=false) - scatter!(plt, (X_met_4[i, 1], X_met_4[i, 2]), - label=false, mc=logocolors.purple, ma=0.5) - plot!(X_met_4[i:i+1, 1], X_met_4[i:i+1, 2], seriestype=:path, - lc=logocolors.purple, la=0.5, label=false) - frame(parallel_met) +f, ax, l = lines( + getellipsepoints(μ, Σ; confidence=0.9)...; + label="90% HPD", + linewidth=2, + axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"), +) +axislegend(ax) +record(f, joinpath(@OUTPUT, "parallel_met.gif"); framerate=5) do frame + for i in 1:99 + scatter!(ax, (X_met_1[i, 1], X_met_1[i, 2]); color=(logocolors.blue, 0.5)) + linesegments!( + X_met_1[i:(i + 1), 1], X_met_1[i:(i + 1), 2]; color=(logocolors.blue, 0.5) + ) + scatter!(ax, (X_met_2[i, 1], X_met_2[i, 2]); color=(logocolors.red, 0.5)) + linesegments!( + X_met_2[i:(i + 1), 1], X_met_2[i:(i + 1), 2]; color=(logocolors.red, 0.5) + ) + scatter!(ax, (X_met_3[i, 1], X_met_3[i, 2]); color=(logocolors.green, 0.5)) + linesegments!( + X_met_3[i:(i + 1), 1], X_met_3[i:(i + 1), 2]; color=(logocolors.green, 0.5) + ) + scatter!(ax, (X_met_4[i, 1], X_met_4[i, 2]); color=(logocolors.purple, 0.5)) + linesegments!( + X_met_4[i:(i + 1), 1], X_met_4[i:(i + 1), 2]; color=(logocolors.purple, 0.5) + ) + recordframe!(frame) + end end -gif(parallel_met, joinpath(@OUTPUT, "parallel_met.gif"), fps=5); # hide # \fig{parallel_met} # \center{*Animation of 4 Parallel Metropolis Markov Chains*} \\ # Now we'll do the the same for Gibbs, taking care to provide also different `seed`s and starting points: -X_gibbs_1 = gibbs(S_parallel * 2, ρ, seed=124, start_x=first(starts[1]), start_y=last(starts[1])); -X_gibbs_2 = gibbs(S_parallel * 2, ρ, seed=125, start_x=first(starts[2]), start_y=last(starts[2])); -X_gibbs_3 = gibbs(S_parallel * 2, ρ, seed=126, start_x=first(starts[3]), start_y=last(starts[3])); -X_gibbs_4 = gibbs(S_parallel * 2, ρ, seed=127, start_x=first(starts[4]), start_y=last(starts[4])); +X_gibbs_1 = gibbs( + S_parallel * 2, ρ; seed=124, start_x=first(starts[1]), start_y=last(starts[1]) +); +X_gibbs_2 = gibbs( + S_parallel * 2, ρ; seed=125, start_x=first(starts[2]), start_y=last(starts[2]) +); +X_gibbs_3 = gibbs( + S_parallel * 2, ρ; seed=126, start_x=first(starts[3]), start_y=last(starts[3]) +); +X_gibbs_4 = gibbs( + S_parallel * 2, ρ; seed=127, start_x=first(starts[4]), start_y=last(starts[4]) +); # Now let's take a look on how those 4 Gibbs Markov chains sample the parameter space starting from different positions. # Each chain will have its own marker and path color, so that we can see their different behavior: -plt = covellipse(μ, Σ, - n_std=1.64, # 5% - 95% quantiles - xlims=(-3, 3), ylims=(-3, 3), - alpha=0.3, - c=:grey, +f, ax, l = lines( + getellipsepoints(μ, Σ; confidence=0.9)...; label="90% HPD", - xlabel=L"\theta_1", ylabel=L"\theta_2") - -parallel_gibbs = Animation() -for i in 1:199 - scatter!(plt, (X_gibbs_1[i, 1], X_gibbs_1[i, 2]), - label=false, mc=logocolors.blue, ma=0.5) - plot!(X_gibbs_1[i:i+1, 1], X_gibbs_1[i:i+1, 2], seriestype=:path, - lc=logocolors.blue, la=0.5, label=false) - scatter!(plt, (X_gibbs_2[i, 1], X_gibbs_2[i, 2]), - label=false, mc=logocolors.red, ma=0.5) - plot!(X_gibbs_2[i:i+1, 1], X_gibbs_2[i:i+1, 2], seriestype=:path, - lc=logocolors.red, la=0.5, label=false) - scatter!(plt, (X_gibbs_3[i, 1], X_gibbs_3[i, 2]), - label=false, mc=logocolors.green, ma=0.5) - plot!(X_gibbs_3[i:i+1, 1], X_gibbs_3[i:i+1, 2], seriestype=:path, - lc=logocolors.green, la=0.5, label=false) - scatter!(plt, (X_gibbs_4[i, 1], X_gibbs_4[i, 2]), - label=false, mc=logocolors.purple, ma=0.5) - plot!(X_gibbs_4[i:i+1, 1], X_gibbs_4[i:i+1, 2], seriestype=:path, - lc=logocolors.purple, la=0.5, label=false) - frame(parallel_gibbs) + linewidth=2, + axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"), +) +axislegend(ax) +record(f, joinpath(@OUTPUT, "parallel_gibbs.gif"); framerate=5) do frame + for i in 1:199 + scatter!(ax, (X_gibbs_1[i, 1], X_gibbs_1[i, 2]); color=(logocolors.blue, 0.5)) + linesegments!( + X_gibbs_1[i:(i + 1), 1], X_gibbs_1[i:(i + 1), 2]; color=(logocolors.blue, 0.5) + ) + scatter!(ax, (X_gibbs_2[i, 1], X_gibbs_2[i, 2]); color=(logocolors.red, 0.5)) + linesegments!( + X_gibbs_2[i:(i + 1), 1], X_gibbs_2[i:(i + 1), 2]; color=(logocolors.red, 0.5) + ) + scatter!(ax, (X_gibbs_3[i, 1], X_gibbs_3[i, 2]); color=(logocolors.green, 0.5)) + linesegments!( + X_gibbs_3[i:(i + 1), 1], X_gibbs_3[i:(i + 1), 2]; color=(logocolors.green, 0.5) + ) + scatter!(ax, (X_gibbs_4[i, 1], X_gibbs_4[i, 2]); color=(logocolors.purple, 0.5)) + linesegments!( + X_gibbs_4[i:(i + 1), 1], X_gibbs_4[i:(i + 1), 2]; color=(logocolors.purple, 0.5) + ) + recordframe!(frame) + end end -gif(parallel_gibbs, joinpath(@OUTPUT, "parallel_gibbs.gif"), fps=5); # hide # \fig{parallel_gibbs} # \center{*Animation of 4 Parallel Gibbs Markov Chains*} \\ @@ -886,18 +951,25 @@ gif(parallel_gibbs, joinpath(@OUTPUT, "parallel_gibbs.gif"), fps=5); # hide # \theta^{t-1} & \text{otherwise} # \end{cases}$$ - # ### HMC -- Implementation # Alright let's code the HMC algorithm for our toy example's bivariate normal distribution: using ForwardDiff: gradient -function hmc(S::Int64, width::Float64, ρ::Float64; - L=40, ϵ=0.001, - μ_x::Float64=0.0, μ_y::Float64=0.0, - σ_x::Float64=1.0, σ_y::Float64=1.0, - start_x=-2.5, start_y=2.5, - seed=123) +function hmc( + S::Int64, + width::Float64, + ρ::Float64; + L=40, + ϵ=0.001, + μ_x::Float64=0.0, + μ_y::Float64=0.0, + σ_x::Float64=1.0, + σ_y::Float64=1.0, + start_x=-2.5, + start_y=2.5, + seed=123, +) rgn = MersenneTwister(seed) binormal = MvNormal([μ_x; μ_y], [σ_x ρ; ρ σ_y]) draws = Matrix{Float64}(undef, S, 2) @@ -954,7 +1026,7 @@ gradient(x -> logpdf(mvnormal, x), [1, -1]) # Now let's run our HMC Markov chain. # We are going to use $L = 40$ and (don't ask me how I found out) $\epsilon = 0.0856$: -X_hmc = hmc(S, width, ρ, ϵ=0.0856, L=40); +X_hmc = hmc(S, width, ρ; ϵ=0.0856, L=40); # Our acceptance rate is 20.79%. # As before lets' take a quick peek into `X_hmc`, we'll see it's a matrix of $X$ and $Y$ values as columns and the time $t$ as rows: @@ -988,62 +1060,57 @@ mean(summarystats(chain_hmc)[:, :ess]) / S # The animation in figure below shows the first 100 simulations of the HMC algorithm used to generate `X_hmc`. # Note that we have a gradient-guided proposal at each iteration, so the animation would resemble more like # a very lucky random-walk Metropolis [^luckymetropolis]. -# The blue-filled ellipsis represents the 90% HPD of our toy example's bivariate normal distribution. +# The blue ellipsis represents the 90% HPD of our toy example's bivariate normal distribution. -plt = covellipse(μ, Σ, - n_std=1.64, # 5% - 95% quantiles - xlims=(-3, 3), ylims=(-3, 3), - alpha=0.3, - c=:steelblue, +f, ax, l = lines( + getellipsepoints(μ, Σ; confidence=0.9)...; label="90% HPD", - xlabel=L"\theta_1", ylabel=L"\theta_2") - -hmc_anim = @animate for i in 1:100 - scatter!(plt, (X_hmc[i, 1], X_hmc[i, 2]), - label=false, mc=:red, ma=0.5) - plot!(X_hmc[i:i+1, 1], X_hmc[i:i+1, 2], seriestype=:path, - lc=:green, la=0.5, label=false) + linewidth=2, + axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"), +) +axislegend(ax) +record(f, joinpath(@OUTPUT, "hmc_anim.gif"); framerate=5) do frame + for i in 1:100 + scatter!(ax, (X_hmc[i, 1], X_hmc[i, 2]); color=(:red, 0.5)) + linesegments!(X_hmc[i:(i + 1), 1], X_hmc[i:(i + 1), 2]; color=(:green, 0.5)) + recordframe!(frame) + end end -gif(hmc_anim, joinpath(@OUTPUT, "hmc_anim.gif"), fps=5); # hide # \fig{hmc_anim} # \center{*Animation of the First 100 Samples Generated from the HMC Algorithm*} \\ # As usual, let's take a look how the first 1,000 simulations were, excluding 1,000 initial iterations as warm-up. -scatter((X_hmc[warmup:warmup+1_000, 1], X_hmc[warmup:warmup+1_000, 2]), - label=false, mc=:red, ma=0.3, - xlims=(-3, 3), ylims=(-3, 3), - xlabel=L"\theta_1", ylabel=L"\theta_2") - -covellipse!(μ, Σ, - n_std=1.64, # 5% - 95% quantiles - xlims=(-3, 3), ylims=(-3, 3), - alpha=0.5, - c=:steelblue, - label="90% HPD") - - -savefig(joinpath(@OUTPUT, "hmc_first1000.svg")); # hide +f, ax, l = lines( + getellipsepoints(μ, Σ; confidence=0.9)...; + label="90% HPD", + linewidth=2, + axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"), +) +axislegend(ax) +scatter!( + ax, + X_hmc[warmup:(warmup + 1_000), 1], + X_hmc[warmup:(warmup + 1_000), 2]; + color=(:red, 0.3), +) +save(joinpath(@OUTPUT, "hmc_first1000.svg"), f); # hide # \fig{hmc_first1000} # \center{*First 1,000 Samples Generated from the HMC Algorithm after warm-up*} \\ - # And, finally, lets take a look in the all 9,000 samples generated after the warm-up of 1,000 iterations. -scatter((X_hmc[warmup:end, 1], X_hmc[warmup:end, 2]), - label=false, mc=:red, ma=0.3, - xlims=(-3, 3), ylims=(-3, 3), - xlabel=L"\theta_1", ylabel=L"\theta_2") - -covellipse!(μ, Σ, - n_std=1.64, # 5% - 95% quantiles - xlims=(-3, 3), ylims=(-3, 3), - alpha=0.5, - c=:steelblue, - label="90% HPD") -savefig(joinpath(@OUTPUT, "hmc_all.svg")); # hide +f, ax, l = lines( + getellipsepoints(μ, Σ; confidence=0.9)...; + label="90% HPD", + linewidth=2, + axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"), +) +axislegend(ax) +scatter!(ax, X_hmc[warmup:end, 1], X_hmc[warmup:end, 2]; color=(:red, 0.3)) +save(joinpath(@OUTPUT, "hmc_all.svg"), f); # hide # \fig{hmc_all} # \center{*All 9,000 Samples Generated from the HMC Algorithm after warm-up*} \\ @@ -1085,12 +1152,17 @@ d2 = MvNormal([0, 0], [8.4 2.0; 2.0 1.7]) d = MixtureModel([d1, d2]) -data_mixture = rand(d, 1_000)' +x = -6:0.01:15 +y = -2.5:0.01:4.2 +dens_mixture = [pdf(d, [i, j]) for i in x, j in y] -marginalkde(data_mixture[:, 1], data_mixture[:, 2], - clip=((-1.6, 3), (-3, 3)), - xlabel=L"X", ylabel=L"Y") -savefig(joinpath(@OUTPUT, "bimodal.svg")); # hide +f, ax, s = surface( + x, + y, + dens_mixture; + axis=(type=Axis3, xlabel=L"X", ylabel=L"Y", zlabel="PDF", azimuth=pi / 4), +) +save(joinpath(@OUTPUT, "bimodal.svg"), f); # hide # \fig{bimodal} # \center{*Multivariate Bimodal Normal*} \\ @@ -1104,11 +1176,13 @@ savefig(joinpath(@OUTPUT, "bimodal.svg")); # hide funnel_y = rand(Normal(0, 3), 10_000) funnel_x = rand(Normal(), 10_000) .* exp.(funnel_y / 2) -scatter((funnel_x, funnel_y), - label=false, mc=:steelblue, ma=0.3, - xlabel=L"X", ylabel=L"Y", - xlims=(-100, 100)) -savefig(joinpath(@OUTPUT, "funnel.svg")); # hide +f, ax, s = scatter( + funnel_x, + funnel_y; + color=(:steelblue, 0.3), + axis=(; xlabel=L"X", ylabel=L"Y", limits=(-100, 100, nothing, nothing)), +) +save(joinpath(@OUTPUT, "funnel.svg"), f); # hide # \fig{funnel} # \center{*Neal's Funnel*} \\ @@ -1146,7 +1220,7 @@ setprogress!(false) # hide p ~ Dirichlet(6, 1) #Each outcome of the six-sided dice has a probability p. - y ~ filldist(Categorical(p), length(y)) + return y ~ filldist(Categorical(p), length(y)) end; # Let's once again generate 1,000 throws of a six-sided dice: @@ -1210,16 +1284,29 @@ mean(chain[:acceptance_rate]) # during a good part (or all) of the Markov chain sampling. We can do that with the `MCMCChains.jl`'s function `traceplot()`. # Let's look the "good" `chain` first: -traceplot(chain) -savefig(joinpath(@OUTPUT, "traceplot_chain.svg")); # hide +using AlgebraOfGraphics +params = names(chain, :parameters) +chain_mapping = + mapping(params .=> "sample value") * + mapping(; color=:chain => nonnumeric, row=dims(1) => renamer(params)) +plt = data(chain) * mapping(:iteration) * chain_mapping * visual(Lines) +f = Figure(; resolution=(1200, 900)) +draw!(f[1, 1], plt) +save(joinpath(@OUTPUT, "traceplot_chain.svg"), f); # hide # \fig{traceplot_chain} # \center{*Traceplot for `chain`*} \\ # And now the `bad_chain`: -traceplot(bad_chain) -savefig(joinpath(@OUTPUT, "traceplot_bad_chain.svg")); # hide +params = names(bad_chain, :parameters) +chain_mapping = + mapping(params .=> "sample value") * + mapping(; color=:chain => nonnumeric, row=dims(1) => renamer(params)) +plt = data(bad_chain) * mapping(:iteration) * chain_mapping * visual(Lines) +f = Figure(; resolution=(1200, 900)) +draw!(f[1, 1], plt) +save(joinpath(@OUTPUT, "traceplot_bad_chain.svg"), f); # hide # \fig{traceplot_bad_chain} # \center{*Traceplot for `bad_chain`*} \\ diff --git a/_literate/06_linear_reg.jl b/_literate/06_linear_reg.jl index 37268706..c52cb04b 100644 --- a/_literate/06_linear_reg.jl +++ b/_literate/06_linear_reg.jl @@ -85,7 +85,7 @@ setprogress!(false) # hide σ ~ Exponential(1) #likelihood - y ~ MvNormal(α .+ X * β, σ^2 * I) + return y ~ MvNormal(α .+ X * β, σ^2 * I) end; # Here I am specifying very weakly informative priors: @@ -114,7 +114,9 @@ end; # Ok let's read our data with `CSV.jl` and output into a `DataFrame` from `DataFrames.jl`: -using DataFrames, CSV, HTTP +using DataFrames +using CSV +using HTTP url = "https://raw.githubusercontent.com/storopoli/Bayesian-Julia/master/datasets/kidiq.csv" kidiq = CSV.read(HTTP.get(url).body, DataFrame) diff --git a/_literate/07_logistic_reg.jl b/_literate/07_logistic_reg.jl index 791c516f..f24ece7f 100644 --- a/_literate/07_logistic_reg.jl +++ b/_literate/07_logistic_reg.jl @@ -13,15 +13,15 @@ # We use logistic regression when our dependent variable is binary. It has only two distinct values, usually # encoded as $0$ or $1$. See the figure below for a graphical intuition of the logistic function: -using Plots, LaTeXStrings +using CairoMakie function logistic(x) return 1 / (1 + exp(-x)) end -plot(logistic, -10, 10, label=false, - xlabel=L"x", ylabel=L"\mathrm{Logistic}(x)") -savefig(joinpath(@OUTPUT, "logistic.svg")); # hide +f, ax, l = lines(-10 .. 10, logistic; axis=(; xlabel=L"x", ylabel=L"\mathrm{Logistic}(x)")) +f +save(joinpath(@OUTPUT, "logistic.svg"), f); # hide # \fig{logistic} # \center{*Logistic Function*} \\ @@ -141,7 +141,7 @@ setprogress!(false) # hide β ~ filldist(TDist(3), predictors) #likelihood - y ~ arraydist(LazyArray(@~ BernoulliLogit.(α .+ X * β))) + return y ~ arraydist(LazyArray(@~ BernoulliLogit.(α .+ X * β))) end; # Here I am specifying very weakly informative priors: @@ -177,7 +177,9 @@ end; # * `educ` -- years of education (head of household). # Ok let's read our data with `CSV.jl` and output into a `DataFrame` from `DataFrames.jl`: -using DataFrames, CSV, HTTP +using DataFrames +using CSV +using HTTP url = "https://raw.githubusercontent.com/storopoli/Bayesian-Julia/master/datasets/wells.csv" wells = CSV.read(HTTP.get(url).body, DataFrame) @@ -223,10 +225,7 @@ using Chain @chain quantile(chain) begin DataFrame - select(_, - :parameters, - names(_, r"%") .=> ByRow(exp), - renamecols=false) + select(_, :parameters, names(_, r"%") .=> ByRow(exp); renamecols=false) end # Our interpretation of odds is the same as in betting games. Anything below 1 signals a unlikely probability that $y$ will be $1$. @@ -240,10 +239,7 @@ end @chain quantile(chain) begin DataFrame - select(_, - :parameters, - names(_, r"%") .=> ByRow(logodds2prob), - renamecols=false) + select(_, :parameters, names(_, r"%") .=> ByRow(logodds2prob); renamecols=false) end # There you go, much better now. Let's analyze our results. The intercept `α` is the basal `switch` probability which has diff --git a/_literate/08_ordinal_reg.jl b/_literate/08_ordinal_reg.jl index 9e0031bb..ebe2ac70 100644 --- a/_literate/08_ordinal_reg.jl +++ b/_literate/08_ordinal_reg.jl @@ -122,28 +122,19 @@ let x_pmf = pdf.(dist, x) x_cdf = cdf.(dist, x) x_logodds_cdf = logit.(x_cdf) - df = DataFrame(; - x, - x_pmf, - x_cdf, - x_logodds_cdf) + df = DataFrame(; x, x_pmf, x_cdf, x_logodds_cdf) labels = ["CDF", "Log-cumulative-odds"] - fig = Figure() - plt1 = data(df) * - mapping(:x, :x_pmf) * - visual(BarPlot) - plt2 = data(df) * - mapping(:x, - [:x_cdf, :x_logodds_cdf]; - col=dims(1) => renamer(labels)) * - visual(ScatterLines) + f = Figure() + plt1 = data(df) * mapping(:x, :x_pmf) * visual(BarPlot) + plt2 = + data(df) * + mapping(:x, [:x_cdf, :x_logodds_cdf]; col=dims(1) => renamer(labels)) * + visual(ScatterLines) axis = (; xticks=1:6) - draw!(fig[1, 2:3], plt1; axis) - draw!(fig[2, 1:4], plt2; - axis, - facet=(; linkyaxes=:none)) - fig - save(joinpath(@OUTPUT, "logodds.svg"), fig) # hide + draw!(f[1, 2:3], plt1; axis) + draw!(f[2, 1:4], plt2; axis, facet=(; linkyaxes=:none)) + f + save(joinpath(@OUTPUT, "logodds.svg"), f) # hide end # \fig{logodds} @@ -287,7 +278,7 @@ setprogress!(false) # hide β ~ filldist(TDist(3) * 2.5, predictors) #likelihood - y ~ arraydist([OrderedLogistic(X[i, :] ⋅ β, cutpoints) for i in 1:length(y)]) + return y ~ arraydist([OrderedLogistic(X[i, :] ⋅ β, cutpoints) for i in 1:length(y)]) end; # First, let's deal with the new stuff in our model: the **`Bijectors.ordered`**. @@ -338,7 +329,9 @@ end; # * `ncontrols`: Number of controls # Ok let's read our data with `CSV.jl` and output into a `DataFrame` from `DataFrames.jl`: -using DataFrames, CSV, HTTP +using DataFrames +using CSV +using HTTP url = "https://raw.githubusercontent.com/storopoli/Bayesian-Julia/master/datasets/esoph.csv" esoph = CSV.read(HTTP.get(url).body, DataFrame) @@ -360,7 +353,7 @@ transform!( x -> categorical(x; levels=["0-39g/day", "40-79", "80-119", "120+"], ordered=true), :tobgp => x -> categorical(x; levels=["0-9g/day", "10-19", "20-29", "30+"], ordered=true); - renamecols=false + renamecols=false, ) transform!(esoph, [:agegp, :alcgp, :tobgp] .=> ByRow(levelcode); renamecols=false) @@ -395,10 +388,7 @@ using Chain @chain quantile(chain) begin DataFrame - select(_, - :parameters, - names(_, r"%") .=> ByRow(exp), - renamecols=false) + select(_, :parameters, names(_, r"%") .=> ByRow(exp); renamecols=false) end # Our interpretation of odds is the same as in betting games. Anything below 1 signals a unlikely probability that $y$ will be $1$. @@ -412,10 +402,7 @@ end @chain quantile(chain) begin DataFrame - select(_, - :parameters, - names(_, r"%") .=> ByRow(logodds2prob), - renamecols=false) + select(_, :parameters, names(_, r"%") .=> ByRow(logodds2prob); renamecols=false) end # There you go, much better now. Let's analyze our results. diff --git a/_literate/09_count_reg.jl b/_literate/09_count_reg.jl index 83b3c00e..d33c33d0 100644 --- a/_literate/09_count_reg.jl +++ b/_literate/09_count_reg.jl @@ -11,11 +11,10 @@ # We use regression with count data when our dependent variable is restricted to positive integers, *i.e.* $y \in \mathbb{Z}^+$. # See the figure below for a graphical intuition of the exponential function: -using Plots, LaTeXStrings +using CairoMakie -plot(exp, -6, 6, label=false, - xlabel=L"x", ylabel=L"e^x") -savefig(joinpath(@OUTPUT, "exponential.svg")); # hide +f, ax, l = lines(-6 .. 6, exp; axis=(xlabel=L"x", ylabel=L"e^x")) +save(joinpath(@OUTPUT, "exponential.svg"), f); # hide # \fig{exponential} # \center{*Exponential Function*} \\ @@ -123,7 +122,7 @@ setprogress!(false) # hide β ~ filldist(TDist(3), predictors) #likelihood - y ~ arraydist(LazyArray(@~ LogPoisson.(α .+ X * β))) + return y ~ arraydist(LazyArray(@~ LogPoisson.(α .+ X * β))) end; # Here I am specifying very weakly informative priors: @@ -167,13 +166,13 @@ end; # Here is what a $\text{Gamma}(0.01, 0.01)$ looks like: -using StatsPlots, Distributions -plot(Gamma(0.01, 0.01), - lw=2, label=false, - xlabel=L"\phi", - ylabel="Density", - xlims=(0, 0.001)) -savefig(joinpath(@OUTPUT, "gamma.svg")); # hide +using Distributions +f, ax, l = lines( + Gamma(0.01, 0.01); + linewidth=2, + axis=(xlabel=L"\phi", ylabel="Density", limits=(0, 0.03, nothing, nothing)), +) +save(joinpath(@OUTPUT, "gamma.svg"), f); # hide # \fig{gamma} # \center{*Gamma Distribution with $\alpha = 0.01$ and $\theta = 0.01$*} \\ @@ -262,7 +261,7 @@ end ϕ = 1 / ϕ⁻ #likelihood - y ~ arraydist(LazyArray(@~ NegativeBinomial2.(exp.(α .+ X * β), ϕ))) + return y ~ arraydist(LazyArray(@~ NegativeBinomial2.(exp.(α .+ X * β), ϕ))) end; # Here I am also specifying very weakly informative priors: @@ -291,7 +290,9 @@ end; # * `exposure2` -- number of days for which the roach traps were used # Ok let's read our data with `CSV.jl` and output into a `DataFrame` from `DataFrames.jl`: -using DataFrames, CSV, HTTP +using DataFrames +using CSV +using HTTP url = "https://raw.githubusercontent.com/storopoli/Bayesian-Julia/master/datasets/roaches.csv" roaches = CSV.read(HTTP.get(url).body, DataFrame) @@ -326,10 +327,7 @@ using Chain @chain quantile(chain_poisson) begin DataFrame - select(_, - :parameters, - names(_, r"%") .=> ByRow(exp), - renamecols=false) + select(_, :parameters, names(_, r"%") .=> ByRow(exp); renamecols=false) end # Let's analyze our results. The intercept `α` is the basal number of roaches caught `y` and has @@ -360,10 +358,7 @@ summarystats(chain_negbin) @chain quantile(chain_negbin) begin DataFrame - select(_, - :parameters, - names(_, r"%") .=> ByRow(exp), - renamecols=false) + select(_, :parameters, names(_, r"%") .=> ByRow(exp); renamecols=false) end # Our results show much more uncertainty in the coefficients than in the Poisson regression. diff --git a/_literate/10_robust_reg.jl b/_literate/10_robust_reg.jl index 4597df88..c460ac1c 100644 --- a/_literate/10_robust_reg.jl +++ b/_literate/10_robust_reg.jl @@ -19,13 +19,11 @@ # mean without having to change the mean's position (or location). In other words, the bell curve needs to "shift" to be able # to contemplate outliers, thus making the inference unstable. -using StatsPlots, Distributions, LaTeXStrings +using CairoMakie +using Distributions -plot(Normal(0, 1), - lw=5, label=false, - xlabel=L"x", - ylabel="Density") -savefig(joinpath(@OUTPUT, "normal.svg")); # hide +f, ax, l = lines(-4 .. 4, Normal(0, 1); linewidth=5, axis=(; xlabel=L"x", ylabel="Density")) +save(joinpath(@OUTPUT, "normal.svg"), f); # hide # \fig{normal} # \center{*Normal with $\mu=0$ and $\sigma = 1$*} \\ @@ -35,12 +33,8 @@ savefig(joinpath(@OUTPUT, "normal.svg")); # hide # far from the average without having to "shift" the mean's position (or location). For that we have the Student-$t$ distribution. # See the figure below to remember its shape. -plot(TDist(2), - lw=5, label=false, - xlabel=L"x", - ylabel="Density", - xlims=(-4, 4)) -savefig(joinpath(@OUTPUT, "tdist.svg")); # hide +f, ax, l = lines(-4 .. 4, TDist(2); linewidth=5, axis=(xlabel=L"x", ylabel="Density")) +save(joinpath(@OUTPUT, "tdist.svg"), f); # hide # \fig{tdist} # \center{*Student-$t$ with $\nu = 2$*} \\ @@ -49,14 +43,16 @@ savefig(joinpath(@OUTPUT, "tdist.svg")); # hide # Take a look at the tails in the comparison below: -plot(Normal(0, 1), - lw=5, label="Normal", - xlabel=L"x", - ylabel="Density", - xlims=(-4, 4)) -plot!(TDist(2), - lw=5, label="Student") -savefig(joinpath(@OUTPUT, "comparison_normal_student.svg")); # hide +f, ax, l = lines( + -4 .. 4, + Normal(0, 1); + linewidth=5, + label="Normal", + axis=(; xlabel=L"x", ylabel="Density"), +) +lines!(ax, -4 .. 4, TDist(2); linewidth=5, label="Student") +axislegend(ax) +save(joinpath(@OUTPUT, "comparison_normal_student.svg"), f); # hide # \fig{comparison_normal_student} # \center{*Comparison between Normal and Student-$t$ Distributions*} \\ @@ -123,14 +119,14 @@ seed!(456) # hide setprogress!(false) # hide @model function robustreg(X, y; predictors=size(X, 2)) - #priors - α ~ LocationScale(median(y), 2.5 * mad(y), TDist(3)) - β ~ filldist(TDist(3), predictors) - σ ~ Exponential(1) - ν ~ LogNormal(2, 1) - - #likelihood - y ~ arraydist(LocationScale.(α .+ X * β, σ, TDist.(ν))) + #priors + α ~ LocationScale(median(y), 2.5 * mad(y), TDist(3)) + β ~ filldist(TDist(3), predictors) + σ ~ Exponential(1) + ν ~ LogNormal(2, 1) + + #likelihood + return y ~ arraydist(LocationScale.(α .+ X * β, σ, TDist.(ν))) end; # Here I am specifying very weakly informative priors: @@ -161,7 +157,9 @@ end; # * `prestige`: percentage of respondents in the survey who classified their occupation as at least "good" with respect to prestige. # Ok let's read our data with `CSV.jl` and output into a `DataFrame` from `DataFrames.jl`: -using DataFrames, CSV, HTTP +using DataFrames +using CSV +using HTTP url = "https://raw.githubusercontent.com/storopoli/Bayesian-Julia/master/datasets/duncan.csv" duncan = CSV.read(HTTP.get(url).body, DataFrame) @@ -171,17 +169,22 @@ describe(duncan) # as at least "good" with respect to prestige is around 41%. But `prestige` variable is very dispersed and actually has a bimodal # distribution: -@df duncan density(:prestige, label=false) -savefig(joinpath(@OUTPUT, "prestige_density.svg")); # hide +f = Figure() +plt = data(duncan) * mapping(:prestige) * AlgebraOfGraphics.density() +draw!(f[1, 1], plt) +save(joinpath(@OUTPUT, "prestige_density.svg"), f); # hide # \fig{prestige_density} # \center{*Density Plot of `prestige`*} \\ # Besides that, the mean `prestige` per `type` shows us where the source of variation might come from: -gdf_type = groupby(duncan, :type) -@df combine(gdf_type, :prestige => mean) bar(:type, :prestige_mean, label=false) -savefig(joinpath(@OUTPUT, "prestige_per_type.svg")); # hide +gdf = groupby(duncan, :type) +f = Figure() +plt = + data(combine(gdf, :prestige => mean)) * mapping(:type, :prestige_mean) * visual(BarPlot) +draw!(f[1, 1], plt) +save(joinpath(@OUTPUT, "prestige_per_type.svg"), f); # hide # \fig{prestige_per_type} # \center{*Mean `prestige` per `type`*} \\ diff --git a/_literate/11_multilevel_models.jl b/_literate/11_multilevel_models.jl index b76e297b..3e1b2007 100644 --- a/_literate/11_multilevel_models.jl +++ b/_literate/11_multilevel_models.jl @@ -96,7 +96,9 @@ using Random: seed! seed!(123) setprogress!(false) # hide -@model function varying_intercept(X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2)) +@model function varying_intercept( + X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2) +) #priors α ~ Normal(mean(y), 2.5 * std(y)) # population-level intercept β ~ filldist(Normal(0, 2), predictors) # population-level coefficients @@ -108,7 +110,7 @@ setprogress!(false) # hide #likelihood ŷ = α .+ X * β .+ αⱼ[idx] - y ~ MvNormal(ŷ, σ^2 * I) + return y ~ MvNormal(ŷ, σ^2 * I) end; # ### Random-Slope Model @@ -147,7 +149,7 @@ end; #likelihood ŷ = α .+ X * βⱼ * τ - y ~ MvNormal(ŷ, σ^2 * I) + return y ~ MvNormal(ŷ, σ^2 * I) end; # ### Random-Intercept-Slope Model @@ -176,7 +178,9 @@ end; # In Turing we can accomplish this as: -@model function varying_intercept_slope(X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2)) +@model function varying_intercept_slope( + X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2) +) #priors α ~ Normal(mean(y), 2.5 * std(y)) # population-level intercept σ ~ Exponential(1 / std(y)) # residual SD @@ -189,7 +193,7 @@ end; #likelihood ŷ = α .+ αⱼ[idx] .+ X * βⱼ * τᵦ - y ~ MvNormal(ŷ, σ^2 * I) + return y ~ MvNormal(ŷ, σ^2 * I) end; # In all of the models, @@ -215,7 +219,9 @@ end; # Ok let's read our data with `CSV.jl` and output into a `DataFrame` from `DataFrames.jl`: -using DataFrames, CSV, HTTP +using DataFrames +using CSV +using HTTP url = "https://raw.githubusercontent.com/storopoli/Bayesian-Julia/master/datasets/cheese.csv" cheese = CSV.read(HTTP.get(url).body, DataFrame) @@ -234,8 +240,13 @@ for c in unique(cheese[:, :cheese]) end cheese[:, :background_int] = map(cheese[:, :background]) do b - b == "rural" ? 1 : - b == "urban" ? 2 : missing + if b == "rural" + 1 + elseif b == "urban" + 2 + else + missing + end end first(cheese, 5) @@ -253,7 +264,7 @@ idx = cheese[:, :background_int]; model_intercept = varying_intercept(X, idx, y) chain_intercept = sample(model_intercept, NUTS(), MCMCThreads(), 1_000, 4) -summarystats(chain_intercept) |> DataFrame |> println +println(DataFrame(summarystats(chain_intercept))) # Here we can see that the model has a population-level intercept `α` along with population-level coefficients `β`s for each `cheese` # dummy variable. But notice that we have also group-level intercepts for each of the groups `αⱼ`s. @@ -263,7 +274,7 @@ summarystats(chain_intercept) |> DataFrame |> println model_slope = varying_slope(X, idx, y) chain_slope = sample(model_slope, NUTS(), MCMCThreads(), 1_000, 4) -summarystats(chain_slope) |> DataFrame |> println +println(DataFrame(summarystats(chain_slope))) # Here we can see that the model has still a population-level intercept `α`. But now our population-level # coefficients `β`s are replaced by group-level coefficients `βⱼ`s along with their standard deviation `τᵦ`s. @@ -274,7 +285,7 @@ summarystats(chain_slope) |> DataFrame |> println model_intercept_slope = varying_intercept_slope(X, idx, y) chain_intercept_slope = sample(model_intercept_slope, NUTS(), MCMCThreads(), 1_000, 4) -summarystats(chain_intercept_slope) |> DataFrame |> println +println(DataFrame(summarystats(chain_intercept_slope))) # Now we have fused the previous model in one. We still have a population-level intercept `α`. But now # we have in the same model group-level intercepts for each of the groups `αⱼ`s and group-level along with their standard diff --git a/_literate/12_Turing_tricks.jl b/_literate/12_Turing_tricks.jl index 57a8699c..2e4bdb37 100644 --- a/_literate/12_Turing_tricks.jl +++ b/_literate/12_Turing_tricks.jl @@ -57,10 +57,12 @@ setprogress!(false) # hide σ ~ Exponential(1) #likelihood - y ~ MvNormal(α .+ X * β, σ^2 * I) + return y ~ MvNormal(α .+ X * β, σ^2 * I) end; -using DataFrames, CSV, HTTP +using DataFrames +using CSV +using HTTP url = "https://raw.githubusercontent.com/storopoli/Bayesian-Julia/master/datasets/kidiq.csv" kidiq = CSV.read(HTTP.get(url).body, DataFrame) @@ -115,8 +117,12 @@ chain_qr = sample(model_qr, NUTS(1_000, 0.65), MCMCThreads(), 1_000, 4) # the regular `linreg`. # Now we have to reconstruct our $\boldsymbol{\beta}$s: -betas = mapslices(x -> R_ast^-1 * x, chain_qr[:, namesingroup(chain_qr, :β), :].value.data, dims=[2]) -chain_beta = setrange(Chains(betas, ["real_β[$i]" for i in 1:size(Q_ast, 2)]), 1_001:1:3_000) +betas = mapslices( + x -> R_ast^-1 * x, chain_qr[:, namesingroup(chain_qr, :β), :].value.data; dims=[2] +) +chain_beta = setrange( + Chains(betas, ["real_β[$i]" for i in 1:size(Q_ast, 2)]), 1_001:1:2_000 +) chain_qr_reconstructed = hcat(chain_beta, chain_qr) # ## Non-Centered Parametrization @@ -126,15 +132,18 @@ chain_qr_reconstructed = hcat(chain_beta, chain_qr) # change the step size $L$ and the $\epsilon$ factor. This is I've showed one of the most infamous # case in [5. **Markov Chain Monte Carlo (MCMC)**](/pages/5_MCMC/): Neal's Funnel (Neal, 2003): -using StatsPlots, Distributions, LaTeXStrings +using CairoMakie +using Distributions funnel_y = rand(Normal(0, 3), 10_000) funnel_x = rand(Normal(), 10_000) .* exp.(funnel_y / 2) -scatter((funnel_x, funnel_y), - label=false, mc=:steelblue, ma=0.3, - xlabel=L"X", ylabel=L"Y", - xlims=(-100, 100)) -savefig(joinpath(@OUTPUT, "funnel.svg")); # hide +f, ax, s = scatter( + funnel_x, + funnel_y; + color=(:steelblue, 0.3), + axis=(; xlabel=L"X", ylabel=L"Y", limits=(-100, 100, nothing, nothing)), +) +save(joinpath(@OUTPUT, "funnel.svg"), f); # hide # \fig{funnel} # \center{*Neal's Funnel*} \\ @@ -147,7 +156,7 @@ savefig(joinpath(@OUTPUT, "funnel.svg")); # hide @model function funnel() y ~ Normal(0, 3) - x ~ Normal(0, exp(y / 2)) + return x ~ Normal(0, exp(y / 2)) end chain_funnel = sample(funnel(), NUTS(), MCMCThreads(), 1_000, 4) @@ -167,7 +176,7 @@ chain_funnel = sample(funnel(), NUTS(), MCMCThreads(), 1_000, 4) x̃ ~ Normal() ỹ ~ Normal() y = 3.0 * ỹ # implies y ~ Normal(0, 3) - x = exp(y / 2) * x̃ # implies x ~ Normal(0, exp(y / 2)) + return x = exp(y / 2) * x̃ # implies x ~ Normal(0, exp(y / 2)) end chain_ncp_funnel = sample(ncp_funnel(), NUTS(), MCMCThreads(), 1_000, 4) @@ -178,7 +187,9 @@ chain_ncp_funnel = sample(ncp_funnel(), NUTS(), MCMCThreads(), 1_000, 4) # in [10. **Multilevel Models (a.k.a. Hierarchical Models)**](/pages/10_multilevel_models/). Here was the # approach that we took, also known as Centered Parametrization (CP): -@model function varying_intercept(X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2)) +@model function varying_intercept( + X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2) +) #priors α ~ Normal(mean(y), 2.5 * std(y)) # population-level intercept β ~ filldist(Normal(0, 2), predictors) # population-level coefficients @@ -190,12 +201,14 @@ chain_ncp_funnel = sample(ncp_funnel(), NUTS(), MCMCThreads(), 1_000, 4) #likelihood ŷ = α .+ X * β .+ αⱼ[idx] - y ~ MvNormal(ŷ, σ^2 * I) + return y ~ MvNormal(ŷ, σ^2 * I) end; # To perform a Non-Centered Parametrization (NCP) in this model we do as following: -@model function varying_intercept_ncp(X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2)) +@model function varying_intercept_ncp( + X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2) +) #priors α ~ Normal(mean(y), 2.5 * std(y)) # population-level intercept β ~ filldist(Normal(0, 2), predictors) # population-level coefficients @@ -208,7 +221,7 @@ end; #likelihood ŷ = α .+ X * β .+ zⱼ[idx] .* τ - y ~ MvNormal(ŷ, σ^2 * I) + return y ~ MvNormal(ŷ, σ^2 * I) end; # Here we are using a NCP with the `zⱼ`s following a standard normal and we reconstruct the @@ -226,8 +239,13 @@ for c in unique(cheese[:, :cheese]) end cheese[:, :background_int] = map(cheese[:, :background]) do b - b == "rural" ? 1 : - b == "urban" ? 2 : missing + if b == "rural" + 1 + elseif b == "urban" + 2 + else + missing + end end X = Matrix(select(cheese, Between(:cheese_A, :cheese_D))); @@ -247,8 +265,12 @@ chain_ncp = sample(model_ncp, NUTS(), MCMCThreads(), 1_000, 4) # in Turing. Before we conclude, we need to recover our original `αⱼ`s. We can do this by multiplying `zⱼ[idx] .* τ`: τ = summarystats(chain_ncp)[:τ, :mean] -αⱼ = mapslices(x -> x * τ, chain_ncp[:, namesingroup(chain_ncp, :zⱼ), :].value.data, dims=[2]) -chain_ncp_reconstructed = hcat(MCMCChains.resetrange(chain_ncp), Chains(αⱼ, ["αⱼ[$i]" for i in 1:length(unique(idx))])) +αⱼ = mapslices( + x -> x * τ, chain_ncp[:, namesingroup(chain_ncp, :zⱼ), :].value.data; dims=[2] +) +chain_ncp_reconstructed = hcat( + MCMCChains.resetrange(chain_ncp), Chains(αⱼ, ["αⱼ[$i]" for i in 1:length(unique(idx))]) +) # ## References diff --git a/_literate/13_epi_models.jl b/_literate/13_epi_models.jl index 6de11ae9..91dc2fde 100644 --- a/_literate/13_epi_models.jl +++ b/_literate/13_epi_models.jl @@ -11,25 +11,40 @@ # For this tutorial I'll be using Brazil's COVID data from the [Media Consortium](https://brasil.io/covid19/). # For reproducibility, we'll restrict the data to the year of 2020: -using Downloads, DataFrames, CSV, Chain, Dates +using Downloads +using DataFrames +using CSV +using Chain +using Dates url = "https://data.brasil.io/dataset/covid19/caso_full.csv.gz" file = Downloads.download(url) -df = CSV.File(file) |> DataFrame +df = DataFrame(CSV.File(file)) br = @chain df begin - filter([:date, :city] => (date, city) -> date < Dates.Date("2021-01-01") && date > Dates.Date("2020-04-01") && ismissing(city), _) + filter( + [:date, :city] => + (date, city) -> + date < Dates.Date("2021-01-01") && + date > Dates.Date("2020-04-01") && + ismissing(city), + _, + ) groupby(:date) combine( - [:estimated_population_2019, + [ + :estimated_population_2019, :last_available_confirmed_per_100k_inhabitants, :last_available_deaths, :new_confirmed, - :new_deaths] .=> sum .=> - [:estimated_population_2019, + :new_deaths, + ] .=> + sum .=> [ + :estimated_population_2019, :last_available_confirmed_per_100k_inhabitants, :last_available_deaths, :new_confirmed, - :new_deaths] + :new_deaths, + ], ) end; @@ -43,13 +58,12 @@ last(br, 5) # Here is a plot of the data: -using Plots, StatsPlots, LaTeXStrings -@df br plot(:date, - :new_confirmed, - xlab=L"t", ylab="infected daily", - yformatter=y -> string(round(Int64, y ÷ 1_000)) * "K", - label=false) -savefig(joinpath(@OUTPUT, "infected.svg")); # hide +using AlgebraOfGraphics +using CairoMakie +f = Figure() +plt = data(br) * mapping(:date => L"t", :new_confirmed => "infected daily") * visual(Lines) +draw!(f[1, 1], plt) +save(joinpath(@OUTPUT, "infected.svg"), f); # hide # \fig{infected} # \center{*Infected in Brazil during COVID in 2020*} \\ @@ -110,7 +124,7 @@ function sir_ode!(du, u, p, t) du[2] = infection - recovery # Infected du[3] = recovery # Recovered end - nothing + return nothing end; # This is what the infection would look with some fixed `β` and `γ` @@ -123,13 +137,17 @@ u = [N - i₀, i₀, 0.0] p = [0.5, 0.05] prob = ODEProblem(sir_ode!, u, (1.0, 100.0), p) sol_ode = solve(prob) -plot(sol_ode, label=[L"S" L"I" L"R"], - lw=3, - xlabel=L"t", - ylabel=L"N", - yformatter=y -> string(round(Int64, y ÷ 1_000_000)) * "mi", - title="SIR Model for 100 days, β = $(p[1]), γ = $(p[2])") -savefig(joinpath(@OUTPUT, "ode_solve.svg")); # hide +f = Figure() +plt = + data(stack(DataFrame(sol_ode), Not(:timestamp))) * + mapping( + :timestamp => L"t", + :value => L"N"; + color=:variable => renamer(["value1" => "S", "value2" => "I", "value3" => "R"]), + ) * + visual(Lines; linewidth=3) +draw!(f[1, 1], plt; axis=(; title="SIR Model for 100 days, β = $(p[1]), γ = $(p[2])")) +save(joinpath(@OUTPUT, "ode_solve.svg"), f); # hide # \fig{ode_solve} # \center{*SIR ODE Solution for Brazil's 100 days of COVID in early 2020*} \\ @@ -168,18 +186,17 @@ setprogress!(false) # hide u0 = [N - I, I, r₀] # S,I,R p = [β, γ] tspan = (1.0, float(l)) - prob = ODEProblem(sir_ode!, - u0, - tspan, - p) - sol = solve(prob, - Tsit5(), # similar to Dormand-Prince RK45 in Stan but 20% faster - saveat=1.0) + prob = ODEProblem(sir_ode!, u0, tspan, p) + sol = solve( + prob, + Tsit5(); # similar to Dormand-Prince RK45 in Stan but 20% faster + saveat=1.0, + ) solᵢ = Array(sol)[2, :] # New Infected solᵢ = max.(1e-4, solᵢ) # numerical issues arose #likelihood - infected ~ arraydist(LazyArray(@~ NegativeBinomial2.(solᵢ, ϕ))) + return infected ~ arraydist(LazyArray(@~ NegativeBinomial2.(solᵢ, ϕ))) end; # Now run the model and inspect our parameters estimates.