From 670d2c7e24e841e80cb0944d50c607471debaa1e Mon Sep 17 00:00:00 2001
From: Jose Storopoli <jose@storopoli.io>
Date: Mon, 26 Dec 2022 11:49:39 -0300
Subject: [PATCH] Move images to Makie (#72)

* added .JuliaFormatter.toml

* makie 02_bayes_stats.jl

* makie 03_prob_dist

* format 01_why_Julia

* format everything

* makie conversion

* using stuff multiline

* makie 04_Turing

* remove plots stuff from Project.toml

* animations and makie 05_MCMC

* fix setrange in 12_Turing_tricks
---
 .JuliaFormatter.toml              |   1 +
 Project.toml                      |   4 +-
 _literate/01_why_Julia.jl         |  18 +-
 _literate/02_bayes_stats.jl       |  54 +--
 _literate/03_prob_dist.jl         | 209 ++++++------
 _literate/04_Turing.jl            |  53 +--
 _literate/05_MCMC.jl              | 539 +++++++++++++++++-------------
 _literate/06_linear_reg.jl        |   6 +-
 _literate/07_logistic_reg.jl      |  24 +-
 _literate/08_ordinal_reg.jl       |  49 +--
 _literate/09_count_reg.jl         |  39 +--
 _literate/10_robust_reg.jl        |  71 ++--
 _literate/11_multilevel_models.jl |  33 +-
 _literate/12_Turing_tricks.jl     |  62 ++--
 _literate/13_epi_models.jl        |  77 +++--
 15 files changed, 686 insertions(+), 553 deletions(-)
 create mode 100644 .JuliaFormatter.toml

diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml
new file mode 100644
index 00000000..c7439503
--- /dev/null
+++ b/.JuliaFormatter.toml
@@ -0,0 +1 @@
+style = "blue"
\ No newline at end of file
diff --git a/Project.toml b/Project.toml
index 76801f81..396996c2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -6,6 +6,7 @@ CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"
+Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
@@ -14,15 +15,12 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Franklin = "713c75ef-9fc9-4b05-94a9-213340da978e"
 HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
-LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
 LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02"
 Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
 MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
 NodeJS = "2bd173c7-0d6d-553b-b6af-13a54713934c"
-Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
-StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
 Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"
 
 [compat]
diff --git a/_literate/01_why_Julia.jl b/_literate/01_why_Julia.jl
index ada9e33c..6ba42a12 100644
--- a/_literate/01_why_Julia.jl
+++ b/_literate/01_why_Julia.jl
@@ -76,7 +76,11 @@
 # Here is Julia:
 
 # ```julia
-# using Random, StatsBase, DataFrames, BenchmarkTools, Chain
+# using Random
+# using StatsBase
+# using DataFrames
+# using BenchmarkTools
+# using Chain
 # Random.seed!(123)
 #
 # n = 10_000
@@ -474,12 +478,16 @@
 # to define the action that one type `Pet` takes when it meets another `Pet`:
 
 abstract type Pet end
-struct Dog <: Pet name::String end
-struct Cat <: Pet name::String end
+struct Dog <: Pet
+    name::String
+end
+struct Cat <: Pet
+    name::String
+end
 
 function encounter(a::Pet, b::Pet)
     verb = meets(a, b)
-    println("$(a.name) meets $(b.name) and $verb")
+    return println("$(a.name) meets $(b.name) and $verb")
 end
 
 meets(a::Dog, b::Dog) = "sniffs";
@@ -639,7 +647,7 @@ using BenchmarkTools
 # with a simple column selection. We do this by defining a new method of the `*` function (multiplier function)
 # of `Base` Julia. Additionally we also create a new optimized method of `inner()` for dealing with `OneHotVector`:
 
-import Base:*
+import Base: *
 
 *(A::AbstractMatrix, v::OneHotVector) = A[:, v.ind]
 inner(v::OneHotVector, A, w::OneHotVector) = A[v.ind, w.ind]
diff --git a/_literate/02_bayes_stats.jl b/_literate/02_bayes_stats.jl
index 780c58e4..4b2290ad 100644
--- a/_literate/02_bayes_stats.jl
+++ b/_literate/02_bayes_stats.jl
@@ -416,24 +416,23 @@
 # and the 75% percentile of the probability density of $\theta$. In this example, MLE leads to estimated values that are not
 # consistent with the actual probability density of the value of $\theta$.
 
-using Plots, StatsPlots, Distributions, LaTeXStrings
+using CairoMakie
+using Distributions
 
 d = LogNormal(0, 2)
 range_d = 0:0.001:4
 q25 = quantile(d, 0.25)
 q75 = quantile(d, 0.75)
-plot((range_d, pdf.(d, range_d)),
-     leg=false,
-     xlims=(-0.2, 4.2),
-     lw=3,
-     xlabel=L"\theta",
-     ylabel="Density")
-scatter!((mode(d), pdf(d, mode(d))), mc=:green, ms=5)
-plot!(range(q25, stop=q75, length=100),
-      x -> pdf(d, x),
-      lc=false, fc=:blues,
-      fill=true, fa=0.5)
-savefig(joinpath(@OUTPUT, "lognormal.svg")); # hide
+credint = range(q25; stop=q75, length=100)
+f, ax, l = lines(
+    range_d,
+    pdf.(d, range_d);
+    linewidth=3,
+    axis=(; limits=(-0.2, 4.2, nothing, nothing), xlabel=L"\theta", ylabel="Density"),
+)
+scatter!(ax, mode(d), pdf(d, mode(d)); color=:green, markersize=12)
+band!(ax, credint, 0.0, pdf.(d, credint); color=(:steelblue, 0.5))
+save(joinpath(@OUTPUT, "lognormal.svg"), f); # hide
 
 # \fig{lognormal}
 # \center{_**Log-Normal**: Maximum Likelihood Estimate vs Credible Intervals_} \\
@@ -454,19 +453,22 @@ range_d = -2:0.01:14
 sim_d = rand(d, 10_000)
 q25 = quantile(sim_d, 0.25)
 q75 = quantile(sim_d, 0.75)
-plot((range_d, pdf.(d, range_d)),
-     leg=false,
-     xlims=(-2, 14),
-     xticks=[0, 5, 10],
-     lw=3,
-     xlabel=L"\theta",
-     ylabel="Density")
-scatter!((mode(d2), pdf(d, mode(d2))), mc=:green, ms=5)
-plot!(range(q25, stop=q75, length=100),
-      x -> pdf(d, x),
-      lc=false, fc=:blues,
-      fill=true, fa=0.5)
-savefig(joinpath(@OUTPUT, "mixture.svg")); # hide
+credint = range(q25; stop=q75, length=100)
+
+f, ax, l = lines(
+    range_d,
+    pdf.(d, range_d);
+    linewidth=3,
+    axis=(;
+        limits=(-2, 14, nothing, nothing),
+        xticks=[0, 5, 10],
+        xlabel=L"\theta",
+        ylabel="Density",
+    ),
+)
+scatter!(ax, mode(d2), pdf(d, mode(d2)); color=:green, markersize=12)
+band!(ax, credint, 0.0, pdf.(d, credint); color=(:steelblue, 0.5))
+save(joinpath(@OUTPUT, "mixture.svg"), f); # hide
 
 # \fig{mixture}
 # \center{_**Mixture**: Maximum Likelihood Estimate vs Credible Intervals_} \\
diff --git a/_literate/03_prob_dist.jl b/_literate/03_prob_dist.jl
index 264d8ba3..8fd815f3 100644
--- a/_literate/03_prob_dist.jl
+++ b/_literate/03_prob_dist.jl
@@ -41,16 +41,20 @@
 
 # Example: a 6-sided dice.
 
-using Plots, StatsPlots, Distributions, LaTeXStrings
+using CairoMakie
+using Distributions
 
-plot(DiscreteUniform(1, 6),
-        label="6-sided Dice",
-        markershape=:circle,
+f, ax, b = barplot(
+    DiscreteUniform(1, 6);
+    axis=(;
+        title="6-sided Dice",
         xlabel=L"\theta",
         ylabel="Mass",
-        ylims=(0, 0.3)
-    )
-savefig(joinpath(@OUTPUT, "discrete_uniform.svg")); # hide
+        xticks=1:6,
+        limits=(nothing, nothing, 0, 0.3),
+    ),
+)
+save(joinpath(@OUTPUT, "discrete_uniform.svg"), f); # hide
 
 # \fig{discrete_uniform}
 # \center{*Discrete Uniform between 1 and 6*} \\
@@ -68,19 +72,23 @@ savefig(joinpath(@OUTPUT, "discrete_uniform.svg")); # hide
 
 # Example: Whether the patient survived or died or whether the customer completes their purchase or not.
 
-plot(Bernoulli(0.5),
-        markershape=:circle,
-        label=L"p=0.5",
-        alpha=0.5,
+f, ax1, b = barplot(
+    Bernoulli(0.5);
+    width=0.3,
+    axis=(;
+        title=L"p=0.5",
         xlabel=L"\theta",
         ylabel="Mass",
-        ylim=(0, 1)
-    )
-plot!(Bernoulli(0.2),
-        markershape=:circle,
-        label=L"p=0.2",
-        alpha=0.5)
-savefig(joinpath(@OUTPUT, "bernoulli.svg")); # hide
+        xticks=0:1,
+        limits=(nothing, nothing, 0, 1),
+    ),
+)
+ax2 = Axis(
+    f[1, 2]; title=L"p=0.2", xlabel=L"\theta", xticks=0:1, limits=(nothing, nothing, 0, 1)
+)
+barplot!(ax2, Bernoulli(0.2); width=0.3)
+linkaxes!(ax1, ax2)
+save(joinpath(@OUTPUT, "bernoulli.svg"), f); # hide
 
 # \fig{bernoulli}
 # \center{*Bernoulli with $p = \{ 0.5, 0.2 \}$*} \\
@@ -98,19 +106,13 @@ savefig(joinpath(@OUTPUT, "bernoulli.svg")); # hide
 
 # Example: number of heads in 5 coin flips.
 
-plot(Binomial(5, 0.5),
-        markershape=:circle,
-        label=L"p=0.5",
-        alpha=0.5,
-        xlabel=L"\theta",
-        ylabel="Mass"
-    )
-plot!(Binomial(5, 0.2),
-        markershape=:circle,
-        label=L"p=0.2",
-        alpha=0.5)
-savefig(joinpath(@OUTPUT, "binomial.svg")); # hide
-
+f, ax1, b = barplot(
+    Binomial(5, 0.5); axis=(; title=L"p=0.5", xlabel=L"\theta", ylabel="Mass")
+)
+ax2 = Axis(f[1, 2]; title=L"p=0.2", xlabel=L"\theta")
+barplot!(ax2, Binomial(5, 0.2))
+linkaxes!(ax1, ax2)
+save(joinpath(@OUTPUT, "binomial.svg"), f); # hide
 # \fig{binomial}
 # \center{*Binomial with $n=5$ and $p = \{ 0.5, 0.2 \}$*} \\
 
@@ -126,27 +128,23 @@ savefig(joinpath(@OUTPUT, "binomial.svg")); # hide
 
 # Example: Number of emails you receive daily. Number of holes you find on the street.
 
-plot(Poisson(1),
-        markershape=:circle,
-        label=L"\lambda=1",
-        alpha=0.5,
-        xlabel=L"\theta",
-        ylabel="Mass"
-    )
-plot!(Poisson(4),
-    markershape=:circle,
-    label=L"\lambda=4",
-    alpha=0.5)
-savefig(joinpath(@OUTPUT, "poisson.svg")); # hide
+f, ax1, b = barplot(
+    Poisson(1); axis=(; title=L"\lambda=1", xlabel=L"\theta", ylabel="Mass")
+)
+ax2 = Axis(f[1, 2]; title=L"\lambda=4", xlabel=L"\theta")
+barplot!(ax2, Poisson(4))
+linkaxes!(ax1, ax2)
+save(joinpath(@OUTPUT, "poisson.svg"), f); # hide
 
 # \fig{poisson}
 # \center{*Poisson with $\lambda = \{ 1, 4 \}$*} \\
 
 # ### Negative Binomial
 
-# The negative binomial distribution describes an event of the number of successes in a sequence of $n$ independent experiment(s),
-# each asking a yes-no question with probability $p$ until $k$ success(es) is obtained. Note that it becomes identical to the
-# Poisson distribution at the limit of $k \to \infty$. This makes the negative binomial a robust option to replace a Poisson
+# The negative binomial distribution describes an event of the number of failures before the $k$th success in a sequence of $n$ independent experiment(s),
+# each asking a yes-no question with probability $p$.
+# Note that it becomes identical to the Poisson distribution at the limit of $k \to \infty$.
+# This makes the negative binomial a robust option to replace a Poisson
 # distribution to model phenomena with a overdispersion* (excess expected variation in data).
 
 # The negative binomial distribution has two parameters and its notation is $\text{NB} (k, p)$ or $\text{Negative-Binomial} (k, p)$:
@@ -159,18 +157,13 @@ savefig(joinpath(@OUTPUT, "poisson.svg")); # hide
 
 # Example: Annual count of tropical cyclones.
 
-plot(NegativeBinomial(1, 0.5),
-        markershape=:circle,
-        label=L"k=1",
-        alpha=0.5,
-        xlabel=L"\theta",
-        ylabel="Mass"
-    )
-plot!(NegativeBinomial(2, 0.5),
-        markershape=:circle,
-        label=L"k=2",
-        alpha=0.5)
-savefig(joinpath(@OUTPUT, "negbinomial.svg")); # hide
+f, ax1, b = barplot(
+    NegativeBinomial(1, 0.5); axis=(; title=L"k=1", xlabel=L"\theta", ylabel="Mass")
+)
+ax2 = Axis(f[1, 2]; title=L"k=2", xlabel=L"\theta")
+barplot!(ax2, NegativeBinomial(2, 0.5))
+linkaxes!(ax1, ax2)
+save(joinpath(@OUTPUT, "negbinomial.svg"), f); # hide
 
 # \fig{negbinomial}
 # \center{*Negative Binomial with $p=0.5$ and $r = \{ 1, 2 \}$*} \\
@@ -202,16 +195,16 @@ savefig(joinpath(@OUTPUT, "negbinomial.svg")); # hide
 
 # Example: Height, Weight, etc.
 
-plot(Normal(0, 1),
-        label=L"\sigma=1",
-        lw=5,
-        xlabel=L"\theta",
-        ylabel="Density",
-        xlims=(-4, 4)
-    )
-plot!(Normal(0, 0.5), label=L"\sigma=0.5", lw=5)
-plot!(Normal(0, 2), label=L"\sigma=2", lw=5)
-savefig(joinpath(@OUTPUT, "normal.svg")); # hide
+f, ax, l = lines(
+    Normal(0, 1);
+    label=L"\sigma=1",
+    linewidth=5,
+    axis=(; xlabel=L"\theta", ylabel="Density", limits=(-4, 4, nothing, nothing)),
+)
+lines!(ax, Normal(0, 0.5); label=L"\sigma=0.5", linewidth=5)
+lines!(ax, Normal(0, 2); label=L"\sigma=2", linewidth=5)
+axislegend(ax)
+save(joinpath(@OUTPUT, "normal.svg"), f); # hide
 
 # \fig{normal}
 # \center{*Normal with $\mu=0$ and $\sigma = \{ 1, 0.5, 2 \}$*} \\
@@ -234,16 +227,16 @@ savefig(joinpath(@OUTPUT, "normal.svg")); # hide
 # * Mean ($\mu$): natural logarithm of the mean the distribution
 # * Standard Deviation ($\sigma$): natural logarithm of the variance of the distribution ($\sigma^2$) is a measure of the dispersion of the observations in relation to the mean
 
-plot(LogNormal(0, 1),
-        label=L"\sigma=1",
-        lw=5,
-        xlabel=L"\theta",
-        ylabel="Density",
-        xlims=(0, 3)
-    )
-plot!(LogNormal(0, 0.25), label=L"\sigma=0.25", lw=5)
-plot!(LogNormal(0, 0.5), label=L"\sigma=0.5", lw=5)
-savefig(joinpath(@OUTPUT, "lognormal.svg")); # hide
+f, ax, l = lines(
+    LogNormal(0, 1);
+    label=L"\sigma=1",
+    linewidth=5,
+    axis=(; xlabel=L"\theta", ylabel="Density", limits=(0, 3, nothing, nothing)),
+)
+lines!(ax, LogNormal(0, 0.25); label=L"\sigma=0.25", linewidth=5)
+lines!(ax, LogNormal(0, 0.5); label=L"\sigma=0.5", linewidth=5)
+axislegend(ax)
+save(joinpath(@OUTPUT, "lognormal.svg"), f); # hide
 
 # \fig{lognormal}
 # \center{*Log-Normal with $\mu=0$ and $\sigma = \{ 1, 0.25, 0.5 \}$*} \\
@@ -259,16 +252,16 @@ savefig(joinpath(@OUTPUT, "lognormal.svg")); # hide
 
 # Example: How long until the next earthquake. How long until the next bus arrives.
 
-plot(Exponential(1),
-        label=L"\lambda=1",
-        lw=5,
-        xlabel=L"\theta",
-        ylabel="Density",
-        xlims=(0, 4.5)
-    )
-plot!(Exponential(0.5), label=L"\lambda=0.5", lw=5)
-plot!(Exponential(1.5), label=L"\lambda=2", lw=5)
-savefig(joinpath(@OUTPUT, "exponential.svg")); # hide
+f, ax, l = lines(
+    Exponential(1);
+    label=L"\lambda=1",
+    linewidth=5,
+    axis=(; xlabel=L"\theta", ylabel="Density", limits=(0, 4.5, nothing, nothing)),
+)
+lines!(ax, Exponential(0.5); label=L"\lambda=0.5", linewidth=5)
+lines!(ax, Exponential(1.5); label=L"\lambda=2", linewidth=5)
+axislegend(ax)
+save(joinpath(@OUTPUT, "exponential.svg"), f); # hide
 
 # \fig{exponential}
 # \center{*Exponential with $\lambda = \{ 1, 0.5, 1.5 \}$*} \\
@@ -292,16 +285,16 @@ savefig(joinpath(@OUTPUT, "exponential.svg")); # hide
 
 # Example: A database full of outliers.
 
-plot(TDist(2),
-        label=L"\nu=2",
-        lw=5,
-        xlabel=L"\theta",
-        ylabel="Density",
-        xlims=(-4, 4)
-    )
-plot!(TDist(8), label=L"\nu=8", lw=5)
-plot!(TDist(30), label=L"\nu=30", lw=5)
-savefig(joinpath(@OUTPUT, "tdist.svg")); # hide
+f, ax, l = lines(
+    TDist(2);
+    label=L"\nu=2",
+    linewidth=5,
+    axis=(; xlabel=L"\theta", ylabel="Density", limits=(-4, 4, nothing, nothing)),
+)
+lines!(ax, TDist(8); label=L"\nu=8", linewidth=5)
+lines!(ax, TDist(30); label=L"\nu=30", linewidth=5)
+axislegend(ax)
+save(joinpath(@OUTPUT, "tdist.svg"), f); # hide
 
 # \fig{tdist}
 # \center{*Student-$t$ with $\nu = \{ 2, 8, 30 \}$*} \\
@@ -319,16 +312,16 @@ savefig(joinpath(@OUTPUT, "tdist.svg")); # hide
 # Example: A basketball player has made already scored 5 free throws while missing 3 in a total of 8 attempts
 # -- $\text{Beta}(3, 5)$.
 
-plot(Beta(1, 1),
-        label=L"a=b=1",
-        lw=5,
-        xlabel=L"\theta",
-        ylabel="Density",
-        xlims=(0, 1)
-    )
-plot!(Beta(3, 2), label=L"a=3, b=2", lw=5)
-plot!(Beta(2, 3), label=L"a=2, b=3", lw=5)
-savefig(joinpath(@OUTPUT, "beta.svg")); # hide
+f, ax, l = lines(
+    Beta(1, 1);
+    label=L"a=b=1",
+    linewidth=5,
+    axis=(; xlabel=L"\theta", ylabel="Density", limits=(0, 1, nothing, nothing)),
+)
+lines!(ax, Beta(3, 2); label=L"a=3, b=2", linewidth=5)
+lines!(ax, Beta(2, 3); label=L"a=2, b=3", linewidth=5)
+axislegend(ax)
+save(joinpath(@OUTPUT, "beta.svg"), f); # hide
 
 # \fig{beta}
 # \center{*Beta with different values of $a$ and $b$*} \\
diff --git a/_literate/04_Turing.jl b/_literate/04_Turing.jl
index 65319d61..34fb4f12 100644
--- a/_literate/04_Turing.jl
+++ b/_literate/04_Turing.jl
@@ -84,19 +84,18 @@
 
 # Graphically this means:
 
-using Plots, StatsPlots, Distributions, LaTeXStrings
+using CairoMakie
+using Distributions
 
 dice = DiscreteUniform(1, 6)
-plot(dice,
+f, ax, b = barplot(
+    dice;
     label="six-sided Dice",
-    markershape=:circle,
-    ms=5,
-    xlabel=L"\theta",
-    ylabel="Mass",
-    ylims=(0, 0.3)
+    axis=(; xlabel=L"\theta", ylabel="Mass", xticks=1:6, limits=(nothing, nothing, 0, 0.3)),
 )
-vline!([mean(dice)], lw=5, col=:red, label=L"E(\theta)")
-savefig(joinpath(@OUTPUT, "dice.svg")); # hide
+vlines!(ax, [mean(dice)]; linewidth=5, color=:red, label=L"E(\theta)")
+axislegend(ax)
+save(joinpath(@OUTPUT, "dice.svg"), f); # hide
 
 # \fig{dice}
 # \center{*A "fair" six-sided Dice: Discrete Uniform between 1 and 6*} \\
@@ -148,15 +147,15 @@ using Random
 
 Random.seed!(123);
 
-data = rand(DiscreteUniform(1, 6), 1_000);
+my_data = rand(DiscreteUniform(1, 6), 1_000);
 
-# The vector `data` is a 1,000-length vector of `Int`s ranging from 1 to 6, just like how a regular six-sided dice outcome would be:
+# The vector `my_data` is a 1,000-length vector of `Int`s ranging from 1 to 6, just like how a regular six-sided dice outcome would be:
 
-first(data, 5)
+first(my_data, 5)
 
-# Once the model is specified we instantiate the model with the single parameter `y` as the simulated `data`:
+# Once the model is specified we instantiate the model with the single parameter `y` as the simulated `my_data`:
 
-model = dice_throw(data);
+model = dice_throw(my_data);
 
 # Next, we call Turing's `sample()` function that takes a Turing model as a first argument, along with a
 # sampler as the second argument, and the third argument is the number of iterations. Here, I will use the `NUTS()` sampler from
@@ -207,10 +206,22 @@ sum([idx * i for (i, idx) in enumerate(summaries[:, :mean])])
 
 typeof(chain)
 
-# We can use plotting capabilities of `MCMCChains.jl` with any `Chains` object:
-
-plot(chain)
-savefig(joinpath(@OUTPUT, "chain.svg")); # hide
+# Since `Chains` is a [`Tables.jl`-compatible](https://github.com/JuliaData/Tables.jl/blob/main/INTEGRATIONS.md) data structure,
+# we can use all of the plotting capabilities from [`AlgebraOfGraphics.jl`](https://aog.makie.org/stable/).
+
+using AlgebraOfGraphics
+using AlgebraOfGraphics: density
+#exclude additional information such as log probability
+params = names(chain, :parameters)
+chain_mapping =
+    mapping(params .=> "sample value") *
+    mapping(; color=:chain => nonnumeric, row=dims(1) => renamer(params))
+plt1 = data(chain) * mapping(:iteration) * chain_mapping * visual(Lines)
+plt2 = data(chain) * chain_mapping * density()
+f = Figure(; resolution=(800, 600))
+draw!(f[1, 1], plt1)
+draw!(f[1, 2], plt2; axis=(; ylabel="density"))
+save(joinpath(@OUTPUT, "chain.svg"), f); # hide
 
 # \fig{chain}
 # \center{*Visualization of a MCMC Chain simulation*} \\
@@ -244,12 +255,12 @@ prior_chain = sample(model, Prior(), 2_000);
 # To draw from the prior and posterior predictive distributions we instantiate a "predictive model", *i.e.* a Turing model but with the observations set to `missing`, and then calling `predict()` on the predictive model and the previously drawn samples.
 # First let's do the *prior* predictive check:
 
-missing_data = similar(data, Missing) # vector of `missing`
+missing_data = similar(my_data, Missing) # vector of `missing`
 model_missing = dice_throw(missing_data) # instantiate the "predictive model
 prior_check = predict(model_missing, prior_chain);
 
-# Here we are creating a `missing_data` object which is a `Vector` of the same length as the `data` and populated with type `missing` as values.
-# We then instantiate a new `dice_throw` model with the `missing_data` vector as the `data` argument.
+# Here we are creating a `missing_data` object which is a `Vector` of the same length as the `my_data` and populated with type `missing` as values.
+# We then instantiate a new `dice_throw` model with the `missing_data` vector as the `y` argument.
 # Finally, we call `predict()` on the predictive model and the previously drawn samples, which in our case are the samples from the prior distribution (`prior_chain`).
 
 # Note that `predict()` returns a `Chains` object from `MCMCChains.jl`:
diff --git a/_literate/05_MCMC.jl b/_literate/05_MCMC.jl
index 8e851be7..d62f7944 100644
--- a/_literate/05_MCMC.jl
+++ b/_literate/05_MCMC.jl
@@ -118,7 +118,9 @@
 
 # I will do some simulations to illustrate MCMC algorithms and techniques. So, here's the initial setup:
 
-using Plots, StatsPlots, Distributions, LaTeXStrings, Random
+using CairoMakie
+using Distributions
+using Random
 
 Random.seed!(123);
 
@@ -181,8 +183,6 @@ const Σ = [1 0.8; 0.8 1]
 
 const mvnormal = MvNormal(μ, Σ)
 
-data = rand(mvnormal, N)';
-
 # In the figure below it is possible to see a contour plot of the PDF of a multivariate normal distribution composed of two normal
 # variables $X$ and $Y$, both with mean 0 and standard deviation 1.
 # The correlation between $X$ and $Y$ is $\rho = 0.8$:
@@ -190,16 +190,22 @@ data = rand(mvnormal, N)';
 x = -3:0.01:3
 y = -3:0.01:3
 dens_mvnormal = [pdf(mvnormal, [i, j]) for i in x, j in y]
-contour(x, y, dens_mvnormal, xlabel=L"X", ylabel=L"Y", fill=true)
-savefig(joinpath(@OUTPUT, "countour_mvnormal.svg")); # hide
+f, ax, c = contourf(x, y, dens_mvnormal; axis=(; xlabel=L"X", ylabel=L"Y"))
+Colorbar(f[1, 2], c)
+save(joinpath(@OUTPUT, "countour_mvnormal.svg"), f); # hide
 
 # \fig{countour_mvnormal}
 # \center{*Countour Plot of the PDF of a Multivariate Normal Distribution*} \\
 
 # Also a surface plot can be seen below for you to get a 3-D intuition of what is going on:
 
-surface(x, y, dens_mvnormal, xlabel=L"X", ylabel=L"Y", zlabel="PDF")
-savefig(joinpath(@OUTPUT, "surface_mvnormal.svg")); # hide
+f, ax, s = surface(
+    x,
+    y,
+    dens_mvnormal;
+    axis=(type=Axis3, xlabel=L"X", ylabel=L"Y", zlabel="PDF", azimuth=pi / 8),
+)
+save(joinpath(@OUTPUT, "surface_mvnormal.svg"), f); # hide
 
 # \fig{surface_mvnormal}
 # \center{*Surface Plot of the PDF of a Multivariate Normal Distribution*} \\
@@ -339,11 +345,18 @@ savefig(joinpath(@OUTPUT, "surface_mvnormal.svg")); # hide
 
 # Here is a simple implementation in Julia:
 
-function metropolis(S::Int64, width::Float64, ρ::Float64;
-    μ_x::Float64=0.0, μ_y::Float64=0.0,
-    σ_x::Float64=1.0, σ_y::Float64=1.0,
-    start_x=-2.5, start_y=2.5,
-    seed=123)
+function metropolis(
+    S::Int64,
+    width::Float64,
+    ρ::Float64;
+    μ_x::Float64=0.0,
+    μ_y::Float64=0.0,
+    σ_x::Float64=1.0,
+    σ_y::Float64=1.0,
+    start_x=-2.5,
+    start_y=2.5,
+    seed=123,
+)
     rgn = MersenneTwister(seed)
     binormal = MvNormal([μ_x; μ_y], [σ_x ρ; ρ σ_y])
     draws = Matrix{Float64}(undef, S, 2)
@@ -413,26 +426,65 @@ mean(summarystats(chain_met)[:, :ess]) / S
 
 # The animation in figure below shows the first 100 simulations of the Metropolis algorithm used to generate `X_met`.
 # Note that in several iterations the proposal is rejected and the algorithm samples the parameters $\theta_1$ and $\theta_2$
-# from the previous state (which becomes the current one, since the proposal is refused). The blue-filled ellipsis represents
+# from the previous state (which becomes the current one, since the proposal is refused). The blue ellipsis represents
 # the 90% HPD of our toy example's bivariate normal distribution.
 
 # Note: `HPD` stands for *Highest Probability Density* (which in our case the posterior's 90% probability range).
 
-plt = covellipse(μ, Σ,
-    n_std=1.64, # 5% - 95% quantiles
-    xlims=(-3, 3), ylims=(-3, 3),
-    alpha=0.3,
-    c=:steelblue,
-    label="90% HPD",
-    xlabel=L"\theta_1", ylabel=L"\theta_2")
+using LinearAlgebra: eigvals, eigvecs
+#source: https://discourse.julialang.org/t/plot-ellipse-in-makie/82814/4
+function getellipsepoints(cx, cy, rx, ry, θ)
+    t = range(0, 2 * pi; length=100)
+    ellipse_x_r = @. rx * cos(t)
+    ellipse_y_r = @. ry * sin(t)
+    R = [cos(θ) sin(θ); -sin(θ) cos(θ)]
+    r_ellipse = [ellipse_x_r ellipse_y_r] * R
+    x = @. cx + r_ellipse[:, 1]
+    y = @. cy + r_ellipse[:, 2]
+    return (x, y)
+end
+function getellipsepoints(μ, Σ; confidence=0.95)
+    quant = sqrt(quantile(Chisq(2), confidence))
+    cx = μ[1]
+    cy = μ[2]
+
+    egvs = eigvals(Σ)
+    if egvs[1] > egvs[2]
+        idxmax = 1
+        largestegv = egvs[1]
+        smallesttegv = egvs[2]
+    else
+        idxmax = 2
+        largestegv = egvs[2]
+        smallesttegv = egvs[1]
+    end
+
+    rx = quant * sqrt(largestegv)
+    ry = quant * sqrt(smallesttegv)
 
-met_anim = @animate for i in 1:100
-    scatter!(plt, (X_met[i, 1], X_met[i, 2]),
-        label=false, mc=:red, ma=0.5)
-    plot!(X_met[i:i+1, 1], X_met[i:i+1, 2], seriestype=:path,
-        lc=:green, la=0.5, label=false)
+    eigvecmax = eigvecs(Σ)[:, idxmax]
+    θ = atan(eigvecmax[2] / eigvecmax[1])
+    if θ < 0
+        θ += 2 * π
+    end
+
+    return getellipsepoints(cx, cy, rx, ry, θ)
+end
+
+f, ax, l = lines(
+    getellipsepoints(μ, Σ; confidence=0.9)...;
+    label="90% HPD",
+    linewidth=2,
+    axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"),
+)
+axislegend(ax)
+record(f, joinpath(@OUTPUT, "met_anim.gif"); framerate=5) do frame
+    for i in 1:100
+        scatter!(ax, (X_met[i, 1], X_met[i, 2]); color=(:red, 0.5))
+        linesegments!(X_met[i:(i + 1), 1], X_met[i:(i + 1), 2]; color=(:green, 0.5))
+        recordframe!(frame)
+    end
 end
-gif(met_anim, joinpath(@OUTPUT, "met_anim.gif"), fps=5); # hide
 
 # \fig{met_anim}
 # \center{*Animation of the First 100 Samples Generated from the Metropolis Algorithm*} \\
@@ -441,38 +493,35 @@ gif(met_anim, joinpath(@OUTPUT, "met_anim.gif"), fps=5); # hide
 
 const warmup = 1_000
 
-scatter((X_met[warmup:warmup+1_000, 1], X_met[warmup:warmup+1_000, 2]),
-    label=false, mc=:red, ma=0.3,
-    xlims=(-3, 3), ylims=(-3, 3),
-    xlabel=L"\theta_1", ylabel=L"\theta_2")
-
-covellipse!(μ, Σ,
-    n_std=1.64, # 5% - 95% quantiles
-    xlims=(-3, 3), ylims=(-3, 3),
-    alpha=0.5,
-    c=:steelblue,
-    label="90% HPD")
-
-
-savefig(joinpath(@OUTPUT, "met_first1000.svg")); # hide
+f, ax, l = lines(
+    getellipsepoints(μ, Σ; confidence=0.9)...;
+    label="90% HPD",
+    linewidth=2,
+    axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"),
+)
+axislegend(ax)
+scatter!(
+    ax,
+    X_met[warmup:(warmup + 1_000), 1],
+    X_met[warmup:(warmup + 1_000), 2];
+    color=(:red, 0.3),
+)
+save(joinpath(@OUTPUT, "met_first1000.svg"), f); # hide
 
 # \fig{met_first1000}
 # \center{*First 1,000 Samples Generated from the Metropolis Algorithm after warm-up*} \\
 
 # And, finally, lets take a look in the all 9,000 samples generated after the warm-up of 1,000 iterations.
 
-scatter((X_met[warmup:end, 1], X_met[warmup:end, 2]),
-    label=false, mc=:red, ma=0.3,
-    xlims=(-3, 3), ylims=(-3, 3),
-    xlabel=L"\theta_1", ylabel=L"\theta_2")
-
-covellipse!(μ, Σ,
-    n_std=1.64, # 5% - 95% quantiles
-    xlims=(-3, 3), ylims=(-3, 3),
-    alpha=0.5,
-    c=:steelblue,
-    label="90% HPD")
-savefig(joinpath(@OUTPUT, "met_all.svg")); # hide
+f, ax, l = lines(
+    getellipsepoints(μ, Σ; confidence=0.9)...;
+    label="90% HPD",
+    linewidth=2,
+    axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"),
+)
+axislegend(ax)
+scatter!(ax, X_met[warmup:end, 1], X_met[warmup:end, 2]; color=(:red, 0.3))
+save(joinpath(@OUTPUT, "met_all.svg"), f); # hide
 
 # \fig{met_all}
 # \center{*All 9,000 Samples Generated from the Metropolis Algorithm after warm-up*} \\
@@ -558,11 +607,17 @@ savefig(joinpath(@OUTPUT, "met_all.svg")); # hide
 # \end{aligned}
 # $$
 
-function gibbs(S::Int64, ρ::Float64;
-    μ_x::Float64=0.0, μ_y::Float64=0.0,
-    σ_x::Float64=1.0, σ_y::Float64=1.0,
-    start_x=-2.5, start_y=2.5,
-    seed=123)
+function gibbs(
+    S::Int64,
+    ρ::Float64;
+    μ_x::Float64=0.0,
+    μ_y::Float64=0.0,
+    σ_x::Float64=1.0,
+    σ_y::Float64=1.0,
+    start_x=-2.5,
+    start_y=2.5,
+    seed=123,
+)
     rgn = MersenneTwister(seed)
     binormal = MvNormal([μ_x; μ_y], [σ_x ρ; ρ σ_y])
     draws = Matrix{Float64}(undef, S, 2)
@@ -627,61 +682,57 @@ summarystats(chain_gibbs)
 
 # The animation in figure below shows the first 100 simulations of the Gibbs algorithm used to generate `X_gibbs`.
 # Note that all proposals are accepted now, so the at each iteration we sample new parameters values.
-# The blue-filled ellipsis represents the 90% HPD of our toy example's bivariate normal distribution.
+# The blue ellipsis represents the 90% HPD of our toy example's bivariate normal distribution.
 
-plt = covellipse(μ, Σ,
-    n_std=1.64, # 5% - 95% quantiles
-    xlims=(-3, 3), ylims=(-3, 3),
-    alpha=0.3,
-    c=:steelblue,
+f, ax, l = lines(
+    getellipsepoints(μ, Σ; confidence=0.9)...;
     label="90% HPD",
-    xlabel=L"\theta_1", ylabel=L"\theta_2")
-
-gibbs_anim = @animate for i in 1:200
-    scatter!(plt, (X_gibbs[i, 1], X_gibbs[i, 2]),
-        label=false, mc=:red, ma=0.5)
-    plot!(X_gibbs[i:i+1, 1], X_gibbs[i:i+1, 2], seriestype=:path,
-        lc=:green, la=0.5, label=false)
+    linewidth=2,
+    axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"),
+)
+axislegend(ax)
+record(f, joinpath(@OUTPUT, "gibbs_anim.gif"); framerate=5) do frame
+    for i in 1:200
+        scatter!(ax, (X_gibbs[i, 1], X_gibbs[i, 2]); color=(:red, 0.5))
+        linesegments!(X_gibbs[i:(i + 1), 1], X_gibbs[i:(i + 1), 2]; color=(:green, 0.5))
+        recordframe!(frame)
+    end
 end
-gif(gibbs_anim, joinpath(@OUTPUT, "gibbs_anim.gif"), fps=5); # hide
 
 # \fig{gibbs_anim}
 # \center{*Animation of the First 100 Samples Generated from the Gibbs Algorithm*} \\
 
 # Now let's take a look how the first 1,000 simulations were, excluding 1,000 initial iterations as warm-up.
 
-scatter((X_gibbs[2*warmup:2*warmup+1_000, 1], X_gibbs[2*warmup:2*warmup+1_000, 2]),
-    label=false, mc=:red, ma=0.3,
-    xlims=(-3, 3), ylims=(-3, 3),
-    xlabel=L"\theta_1", ylabel=L"\theta_2")
-
-covellipse!(μ, Σ,
-    n_std=1.64, # 5% - 95% quantiles
-    xlims=(-3, 3), ylims=(-3, 3),
-    alpha=0.5,
-    c=:steelblue,
-    label="90% HPD")
-
-
-savefig(joinpath(@OUTPUT, "gibbs_first1000.svg")); # hide
+f, ax, l = lines(
+    getellipsepoints(μ, Σ; confidence=0.9)...;
+    label="90% HPD",
+    linewidth=2,
+    axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"),
+)
+axislegend(ax)
+scatter!(
+    ax,
+    X_gibbs[(2 * warmup):(2 * warmup + 1_000), 1],
+    X_gibbs[(2 * warmup):(2 * warmup + 1_000), 2];
+    color=(:red, 0.3),
+)
+save(joinpath(@OUTPUT, "gibbs_first1000.svg"), f); # hide
 
 # \fig{gibbs_first1000}
 # \center{*First 1,000 Samples Generated from the Gibbs Algorithm after warm-up*} \\
 
 # And, finally, lets take a look in the all 9,000 samples generated after the warm-up of 1,000 iterations.
 
-scatter((X_gibbs[2*warmup:end, 1], X_gibbs[2*warmup:end, 2]),
-    label=false, mc=:red, ma=0.3,
-    xlims=(-3, 3), ylims=(-3, 3),
-    xlabel=L"\theta_1", ylabel=L"\theta_2")
-
-covellipse!(μ, Σ,
-    n_std=1.64, # 5% - 95% quantiles
-    xlims=(-3, 3), ylims=(-3, 3),
-    alpha=0.5,
-    c=:steelblue,
-    label="90% HPD")
-savefig(joinpath(@OUTPUT, "gibbs_all.svg")); # hide
+f, ax, l = lines(
+    getellipsepoints(μ, Σ; confidence=0.9)...;
+    label="90% HPD",
+    linewidth=2,
+    axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"),
+)
+axislegend(ax)
+scatter!(ax, X_gibbs[(2 * warmup):end, 1], X_gibbs[(2 * warmup):end, 2]; color=(:red, 0.3))
+save(joinpath(@OUTPUT, "gibbs_all.svg"), f); # hide
 
 # \fig{gibbs_all}
 # \center{*All 9,000 Samples Generated from the Gibbs Algorithm after warm-up*} \\
@@ -697,7 +748,7 @@ savefig(joinpath(@OUTPUT, "gibbs_all.svg")); # hide
 # First, let's defined 4 different pairs of starting points using a nice Cartesian product
 # from Julia's `Base.Iterators`:
 
-const starts = Iterators.product((-2.5, 2.5), (2.5, -2.5)) |> collect
+const starts = collect(Iterators.product((-2.5, 2.5), (2.5, -2.5)))
 
 # Also, I will restrict this simulation to 100 samples:
 
@@ -705,10 +756,18 @@ const S_parallel = 100;
 
 # Additionally, note that we are using different `seed`s:
 
-X_met_1 = metropolis(S_parallel, width, ρ, seed=124, start_x=first(starts[1]), start_y=last(starts[1]));
-X_met_2 = metropolis(S_parallel, width, ρ, seed=125, start_x=first(starts[2]), start_y=last(starts[2]));
-X_met_3 = metropolis(S_parallel, width, ρ, seed=126, start_x=first(starts[3]), start_y=last(starts[3]));
-X_met_4 = metropolis(S_parallel, width, ρ, seed=127, start_x=first(starts[4]), start_y=last(starts[4]));
+X_met_1 = metropolis(
+    S_parallel, width, ρ; seed=124, start_x=first(starts[1]), start_y=last(starts[1])
+);
+X_met_2 = metropolis(
+    S_parallel, width, ρ; seed=125, start_x=first(starts[2]), start_y=last(starts[2])
+);
+X_met_3 = metropolis(
+    S_parallel, width, ρ; seed=126, start_x=first(starts[3]), start_y=last(starts[3])
+);
+X_met_4 = metropolis(
+    S_parallel, width, ρ; seed=127, start_x=first(starts[4]), start_y=last(starts[4])
+);
 
 # There have been some significant changes in the approval rate for Metropolis proposals. All were around 13%-24%,
 # this is due to the low number of samples (only 100 for each Markov chain), if the samples were larger we would see
@@ -718,80 +777,86 @@ X_met_4 = metropolis(S_parallel, width, ρ, seed=127, start_x=first(starts[4]),
 # Now let's take a look on how those 4 Metropolis Markov chains sample the parameter space starting from different positions.
 # Each chain will have its own marker and path color, so that we can see their different behavior:
 
-plt = covellipse(μ, Σ,
-    n_std=1.64, # 5% - 95% quantiles
-    xlims=(-3, 3), ylims=(-3, 3),
-    alpha=0.3,
-    c=:grey,
-    label="90% HPD",
-    xlabel=L"\theta_1", ylabel=L"\theta_2")
-
+using Colors
 const logocolors = Colors.JULIA_LOGO_COLORS;
-
-parallel_met = Animation()
-for i in 1:99
-    scatter!(plt, (X_met_1[i, 1], X_met_1[i, 2]),
-        label=false, mc=logocolors.blue, ma=0.5)
-    plot!(X_met_1[i:i+1, 1], X_met_1[i:i+1, 2], seriestype=:path,
-        lc=logocolors.blue, la=0.5, label=false)
-    scatter!(plt, (X_met_2[i, 1], X_met_2[i, 2]),
-        label=false, mc=logocolors.red, ma=0.5)
-    plot!(X_met_2[i:i+1, 1], X_met_2[i:i+1, 2], seriestype=:path,
-        lc=logocolors.red, la=0.5, label=false)
-    scatter!(plt, (X_met_3[i, 1], X_met_3[i, 2]),
-        label=false, mc=logocolors.green, ma=0.5)
-    plot!(X_met_3[i:i+1, 1], X_met_3[i:i+1, 2], seriestype=:path,
-        lc=logocolors.green, la=0.5, label=false)
-    scatter!(plt, (X_met_4[i, 1], X_met_4[i, 2]),
-        label=false, mc=logocolors.purple, ma=0.5)
-    plot!(X_met_4[i:i+1, 1], X_met_4[i:i+1, 2], seriestype=:path,
-        lc=logocolors.purple, la=0.5, label=false)
-    frame(parallel_met)
+f, ax, l = lines(
+    getellipsepoints(μ, Σ; confidence=0.9)...;
+    label="90% HPD",
+    linewidth=2,
+    axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"),
+)
+axislegend(ax)
+record(f, joinpath(@OUTPUT, "parallel_met.gif"); framerate=5) do frame
+    for i in 1:99
+        scatter!(ax, (X_met_1[i, 1], X_met_1[i, 2]); color=(logocolors.blue, 0.5))
+        linesegments!(
+            X_met_1[i:(i + 1), 1], X_met_1[i:(i + 1), 2]; color=(logocolors.blue, 0.5)
+        )
+        scatter!(ax, (X_met_2[i, 1], X_met_2[i, 2]); color=(logocolors.red, 0.5))
+        linesegments!(
+            X_met_2[i:(i + 1), 1], X_met_2[i:(i + 1), 2]; color=(logocolors.red, 0.5)
+        )
+        scatter!(ax, (X_met_3[i, 1], X_met_3[i, 2]); color=(logocolors.green, 0.5))
+        linesegments!(
+            X_met_3[i:(i + 1), 1], X_met_3[i:(i + 1), 2]; color=(logocolors.green, 0.5)
+        )
+        scatter!(ax, (X_met_4[i, 1], X_met_4[i, 2]); color=(logocolors.purple, 0.5))
+        linesegments!(
+            X_met_4[i:(i + 1), 1], X_met_4[i:(i + 1), 2]; color=(logocolors.purple, 0.5)
+        )
+        recordframe!(frame)
+    end
 end
-gif(parallel_met, joinpath(@OUTPUT, "parallel_met.gif"), fps=5); # hide
 
 # \fig{parallel_met}
 # \center{*Animation of 4 Parallel Metropolis Markov Chains*} \\
 
 # Now we'll do the the same for Gibbs, taking care to provide also different `seed`s and starting points:
 
-X_gibbs_1 = gibbs(S_parallel * 2, ρ, seed=124, start_x=first(starts[1]), start_y=last(starts[1]));
-X_gibbs_2 = gibbs(S_parallel * 2, ρ, seed=125, start_x=first(starts[2]), start_y=last(starts[2]));
-X_gibbs_3 = gibbs(S_parallel * 2, ρ, seed=126, start_x=first(starts[3]), start_y=last(starts[3]));
-X_gibbs_4 = gibbs(S_parallel * 2, ρ, seed=127, start_x=first(starts[4]), start_y=last(starts[4]));
+X_gibbs_1 = gibbs(
+    S_parallel * 2, ρ; seed=124, start_x=first(starts[1]), start_y=last(starts[1])
+);
+X_gibbs_2 = gibbs(
+    S_parallel * 2, ρ; seed=125, start_x=first(starts[2]), start_y=last(starts[2])
+);
+X_gibbs_3 = gibbs(
+    S_parallel * 2, ρ; seed=126, start_x=first(starts[3]), start_y=last(starts[3])
+);
+X_gibbs_4 = gibbs(
+    S_parallel * 2, ρ; seed=127, start_x=first(starts[4]), start_y=last(starts[4])
+);
 
 # Now let's take a look on how those 4 Gibbs Markov chains sample the parameter space starting from different positions.
 # Each chain will have its own marker and path color, so that we can see their different behavior:
 
-plt = covellipse(μ, Σ,
-    n_std=1.64, # 5% - 95% quantiles
-    xlims=(-3, 3), ylims=(-3, 3),
-    alpha=0.3,
-    c=:grey,
+f, ax, l = lines(
+    getellipsepoints(μ, Σ; confidence=0.9)...;
     label="90% HPD",
-    xlabel=L"\theta_1", ylabel=L"\theta_2")
-
-parallel_gibbs = Animation()
-for i in 1:199
-    scatter!(plt, (X_gibbs_1[i, 1], X_gibbs_1[i, 2]),
-        label=false, mc=logocolors.blue, ma=0.5)
-    plot!(X_gibbs_1[i:i+1, 1], X_gibbs_1[i:i+1, 2], seriestype=:path,
-        lc=logocolors.blue, la=0.5, label=false)
-    scatter!(plt, (X_gibbs_2[i, 1], X_gibbs_2[i, 2]),
-        label=false, mc=logocolors.red, ma=0.5)
-    plot!(X_gibbs_2[i:i+1, 1], X_gibbs_2[i:i+1, 2], seriestype=:path,
-        lc=logocolors.red, la=0.5, label=false)
-    scatter!(plt, (X_gibbs_3[i, 1], X_gibbs_3[i, 2]),
-        label=false, mc=logocolors.green, ma=0.5)
-    plot!(X_gibbs_3[i:i+1, 1], X_gibbs_3[i:i+1, 2], seriestype=:path,
-        lc=logocolors.green, la=0.5, label=false)
-    scatter!(plt, (X_gibbs_4[i, 1], X_gibbs_4[i, 2]),
-        label=false, mc=logocolors.purple, ma=0.5)
-    plot!(X_gibbs_4[i:i+1, 1], X_gibbs_4[i:i+1, 2], seriestype=:path,
-        lc=logocolors.purple, la=0.5, label=false)
-    frame(parallel_gibbs)
+    linewidth=2,
+    axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"),
+)
+axislegend(ax)
+record(f, joinpath(@OUTPUT, "parallel_gibbs.gif"); framerate=5) do frame
+    for i in 1:199
+        scatter!(ax, (X_gibbs_1[i, 1], X_gibbs_1[i, 2]); color=(logocolors.blue, 0.5))
+        linesegments!(
+            X_gibbs_1[i:(i + 1), 1], X_gibbs_1[i:(i + 1), 2]; color=(logocolors.blue, 0.5)
+        )
+        scatter!(ax, (X_gibbs_2[i, 1], X_gibbs_2[i, 2]); color=(logocolors.red, 0.5))
+        linesegments!(
+            X_gibbs_2[i:(i + 1), 1], X_gibbs_2[i:(i + 1), 2]; color=(logocolors.red, 0.5)
+        )
+        scatter!(ax, (X_gibbs_3[i, 1], X_gibbs_3[i, 2]); color=(logocolors.green, 0.5))
+        linesegments!(
+            X_gibbs_3[i:(i + 1), 1], X_gibbs_3[i:(i + 1), 2]; color=(logocolors.green, 0.5)
+        )
+        scatter!(ax, (X_gibbs_4[i, 1], X_gibbs_4[i, 2]); color=(logocolors.purple, 0.5))
+        linesegments!(
+            X_gibbs_4[i:(i + 1), 1], X_gibbs_4[i:(i + 1), 2]; color=(logocolors.purple, 0.5)
+        )
+        recordframe!(frame)
+    end
 end
-gif(parallel_gibbs, joinpath(@OUTPUT, "parallel_gibbs.gif"), fps=5); # hide
 
 # \fig{parallel_gibbs}
 # \center{*Animation of 4 Parallel Gibbs Markov Chains*} \\
@@ -886,18 +951,25 @@ gif(parallel_gibbs, joinpath(@OUTPUT, "parallel_gibbs.gif"), fps=5); # hide
 #      \theta^{t-1} & \text{otherwise}
 #      \end{cases}$$
 
-
 # ### HMC -- Implementation
 
 # Alright let's code the HMC algorithm for our toy example's bivariate normal distribution:
 
 using ForwardDiff: gradient
-function hmc(S::Int64, width::Float64, ρ::Float64;
-    L=40, ϵ=0.001,
-    μ_x::Float64=0.0, μ_y::Float64=0.0,
-    σ_x::Float64=1.0, σ_y::Float64=1.0,
-    start_x=-2.5, start_y=2.5,
-    seed=123)
+function hmc(
+    S::Int64,
+    width::Float64,
+    ρ::Float64;
+    L=40,
+    ϵ=0.001,
+    μ_x::Float64=0.0,
+    μ_y::Float64=0.0,
+    σ_x::Float64=1.0,
+    σ_y::Float64=1.0,
+    start_x=-2.5,
+    start_y=2.5,
+    seed=123,
+)
     rgn = MersenneTwister(seed)
     binormal = MvNormal([μ_x; μ_y], [σ_x ρ; ρ σ_y])
     draws = Matrix{Float64}(undef, S, 2)
@@ -954,7 +1026,7 @@ gradient(x -> logpdf(mvnormal, x), [1, -1])
 # Now let's run our HMC Markov chain.
 # We are going to use $L = 40$ and (don't ask me how I found out) $\epsilon = 0.0856$:
 
-X_hmc = hmc(S, width, ρ, ϵ=0.0856, L=40);
+X_hmc = hmc(S, width, ρ; ϵ=0.0856, L=40);
 
 # Our acceptance rate is 20.79%.
 # As before lets' take a quick peek into `X_hmc`, we'll see it's a matrix of $X$ and $Y$ values as columns and the time $t$ as rows:
@@ -988,62 +1060,57 @@ mean(summarystats(chain_hmc)[:, :ess]) / S
 # The animation in figure below shows the first 100 simulations of the HMC algorithm used to generate `X_hmc`.
 # Note that we have a gradient-guided proposal at each iteration, so the animation would resemble more like
 # a very lucky random-walk Metropolis [^luckymetropolis].
-# The blue-filled ellipsis represents the 90% HPD of our toy example's bivariate normal distribution.
+# The blue ellipsis represents the 90% HPD of our toy example's bivariate normal distribution.
 
-plt = covellipse(μ, Σ,
-    n_std=1.64, # 5% - 95% quantiles
-    xlims=(-3, 3), ylims=(-3, 3),
-    alpha=0.3,
-    c=:steelblue,
+f, ax, l = lines(
+    getellipsepoints(μ, Σ; confidence=0.9)...;
     label="90% HPD",
-    xlabel=L"\theta_1", ylabel=L"\theta_2")
-
-hmc_anim = @animate for i in 1:100
-    scatter!(plt, (X_hmc[i, 1], X_hmc[i, 2]),
-        label=false, mc=:red, ma=0.5)
-    plot!(X_hmc[i:i+1, 1], X_hmc[i:i+1, 2], seriestype=:path,
-        lc=:green, la=0.5, label=false)
+    linewidth=2,
+    axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"),
+)
+axislegend(ax)
+record(f, joinpath(@OUTPUT, "hmc_anim.gif"); framerate=5) do frame
+    for i in 1:100
+        scatter!(ax, (X_hmc[i, 1], X_hmc[i, 2]); color=(:red, 0.5))
+        linesegments!(X_hmc[i:(i + 1), 1], X_hmc[i:(i + 1), 2]; color=(:green, 0.5))
+        recordframe!(frame)
+    end
 end
-gif(hmc_anim, joinpath(@OUTPUT, "hmc_anim.gif"), fps=5); # hide
 
 # \fig{hmc_anim}
 # \center{*Animation of the First 100 Samples Generated from the HMC Algorithm*} \\
 
 # As usual, let's take a look how the first 1,000 simulations were, excluding 1,000 initial iterations as warm-up.
 
-scatter((X_hmc[warmup:warmup+1_000, 1], X_hmc[warmup:warmup+1_000, 2]),
-    label=false, mc=:red, ma=0.3,
-    xlims=(-3, 3), ylims=(-3, 3),
-    xlabel=L"\theta_1", ylabel=L"\theta_2")
-
-covellipse!(μ, Σ,
-    n_std=1.64, # 5% - 95% quantiles
-    xlims=(-3, 3), ylims=(-3, 3),
-    alpha=0.5,
-    c=:steelblue,
-    label="90% HPD")
-
-
-savefig(joinpath(@OUTPUT, "hmc_first1000.svg")); # hide
+f, ax, l = lines(
+    getellipsepoints(μ, Σ; confidence=0.9)...;
+    label="90% HPD",
+    linewidth=2,
+    axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"),
+)
+axislegend(ax)
+scatter!(
+    ax,
+    X_hmc[warmup:(warmup + 1_000), 1],
+    X_hmc[warmup:(warmup + 1_000), 2];
+    color=(:red, 0.3),
+)
+save(joinpath(@OUTPUT, "hmc_first1000.svg"), f); # hide
 
 # \fig{hmc_first1000}
 # \center{*First 1,000 Samples Generated from the HMC Algorithm after warm-up*} \\
 
-
 # And, finally, lets take a look in the all 9,000 samples generated after the warm-up of 1,000 iterations.
 
-scatter((X_hmc[warmup:end, 1], X_hmc[warmup:end, 2]),
-    label=false, mc=:red, ma=0.3,
-    xlims=(-3, 3), ylims=(-3, 3),
-    xlabel=L"\theta_1", ylabel=L"\theta_2")
-
-covellipse!(μ, Σ,
-    n_std=1.64, # 5% - 95% quantiles
-    xlims=(-3, 3), ylims=(-3, 3),
-    alpha=0.5,
-    c=:steelblue,
-    label="90% HPD")
-savefig(joinpath(@OUTPUT, "hmc_all.svg")); # hide
+f, ax, l = lines(
+    getellipsepoints(μ, Σ; confidence=0.9)...;
+    label="90% HPD",
+    linewidth=2,
+    axis=(; limits=(-3, 3, -3, 3), xlabel=L"\theta_1", ylabel=L"\theta_2"),
+)
+axislegend(ax)
+scatter!(ax, X_hmc[warmup:end, 1], X_hmc[warmup:end, 2]; color=(:red, 0.3))
+save(joinpath(@OUTPUT, "hmc_all.svg"), f); # hide
 
 # \fig{hmc_all}
 # \center{*All 9,000 Samples Generated from the HMC Algorithm after warm-up*} \\
@@ -1085,12 +1152,17 @@ d2 = MvNormal([0, 0], [8.4 2.0; 2.0 1.7])
 
 d = MixtureModel([d1, d2])
 
-data_mixture = rand(d, 1_000)'
+x = -6:0.01:15
+y = -2.5:0.01:4.2
+dens_mixture = [pdf(d, [i, j]) for i in x, j in y]
 
-marginalkde(data_mixture[:, 1], data_mixture[:, 2],
-    clip=((-1.6, 3), (-3, 3)),
-    xlabel=L"X", ylabel=L"Y")
-savefig(joinpath(@OUTPUT, "bimodal.svg")); # hide
+f, ax, s = surface(
+    x,
+    y,
+    dens_mixture;
+    axis=(type=Axis3, xlabel=L"X", ylabel=L"Y", zlabel="PDF", azimuth=pi / 4),
+)
+save(joinpath(@OUTPUT, "bimodal.svg"), f); # hide
 
 # \fig{bimodal}
 # \center{*Multivariate Bimodal Normal*} \\
@@ -1104,11 +1176,13 @@ savefig(joinpath(@OUTPUT, "bimodal.svg")); # hide
 funnel_y = rand(Normal(0, 3), 10_000)
 funnel_x = rand(Normal(), 10_000) .* exp.(funnel_y / 2)
 
-scatter((funnel_x, funnel_y),
-    label=false, mc=:steelblue, ma=0.3,
-    xlabel=L"X", ylabel=L"Y",
-    xlims=(-100, 100))
-savefig(joinpath(@OUTPUT, "funnel.svg")); # hide
+f, ax, s = scatter(
+    funnel_x,
+    funnel_y;
+    color=(:steelblue, 0.3),
+    axis=(; xlabel=L"X", ylabel=L"Y", limits=(-100, 100, nothing, nothing)),
+)
+save(joinpath(@OUTPUT, "funnel.svg"), f); # hide
 
 # \fig{funnel}
 # \center{*Neal's Funnel*} \\
@@ -1146,7 +1220,7 @@ setprogress!(false) # hide
     p ~ Dirichlet(6, 1)
 
     #Each outcome of the six-sided dice has a probability p.
-    y ~ filldist(Categorical(p), length(y))
+    return y ~ filldist(Categorical(p), length(y))
 end;
 
 # Let's once again generate 1,000 throws of a six-sided dice:
@@ -1210,16 +1284,29 @@ mean(chain[:acceptance_rate])
 # during a good part (or all) of the Markov chain sampling. We can do that with the `MCMCChains.jl`'s function `traceplot()`.
 # Let's look the "good" `chain` first:
 
-traceplot(chain)
-savefig(joinpath(@OUTPUT, "traceplot_chain.svg")); # hide
+using AlgebraOfGraphics
+params = names(chain, :parameters)
+chain_mapping =
+    mapping(params .=> "sample value") *
+    mapping(; color=:chain => nonnumeric, row=dims(1) => renamer(params))
+plt = data(chain) * mapping(:iteration) * chain_mapping * visual(Lines)
+f = Figure(; resolution=(1200, 900))
+draw!(f[1, 1], plt)
+save(joinpath(@OUTPUT, "traceplot_chain.svg"), f); # hide
 
 # \fig{traceplot_chain}
 # \center{*Traceplot for `chain`*} \\
 
 # And now the `bad_chain`:
 
-traceplot(bad_chain)
-savefig(joinpath(@OUTPUT, "traceplot_bad_chain.svg")); # hide
+params = names(bad_chain, :parameters)
+chain_mapping =
+    mapping(params .=> "sample value") *
+    mapping(; color=:chain => nonnumeric, row=dims(1) => renamer(params))
+plt = data(bad_chain) * mapping(:iteration) * chain_mapping * visual(Lines)
+f = Figure(; resolution=(1200, 900))
+draw!(f[1, 1], plt)
+save(joinpath(@OUTPUT, "traceplot_bad_chain.svg"), f); # hide
 
 # \fig{traceplot_bad_chain}
 # \center{*Traceplot for `bad_chain`*} \\
diff --git a/_literate/06_linear_reg.jl b/_literate/06_linear_reg.jl
index 37268706..c52cb04b 100644
--- a/_literate/06_linear_reg.jl
+++ b/_literate/06_linear_reg.jl
@@ -85,7 +85,7 @@ setprogress!(false) # hide
     σ ~ Exponential(1)
 
     #likelihood
-    y ~ MvNormal(α .+ X * β, σ^2 * I)
+    return y ~ MvNormal(α .+ X * β, σ^2 * I)
 end;
 
 # Here I am specifying very weakly informative priors:
@@ -114,7 +114,9 @@ end;
 
 # Ok let's read our data with `CSV.jl` and output into a `DataFrame` from `DataFrames.jl`:
 
-using DataFrames, CSV, HTTP
+using DataFrames
+using CSV
+using HTTP
 
 url = "https://raw.githubusercontent.com/storopoli/Bayesian-Julia/master/datasets/kidiq.csv"
 kidiq = CSV.read(HTTP.get(url).body, DataFrame)
diff --git a/_literate/07_logistic_reg.jl b/_literate/07_logistic_reg.jl
index 791c516f..f24ece7f 100644
--- a/_literate/07_logistic_reg.jl
+++ b/_literate/07_logistic_reg.jl
@@ -13,15 +13,15 @@
 # We use logistic regression when our dependent variable is binary. It has only two distinct values, usually
 # encoded as $0$ or $1$. See the figure below for a graphical intuition of the logistic function:
 
-using Plots, LaTeXStrings
+using CairoMakie
 
 function logistic(x)
     return 1 / (1 + exp(-x))
 end
 
-plot(logistic, -10, 10, label=false,
-    xlabel=L"x", ylabel=L"\mathrm{Logistic}(x)")
-savefig(joinpath(@OUTPUT, "logistic.svg")); # hide
+f, ax, l = lines(-10 .. 10, logistic; axis=(; xlabel=L"x", ylabel=L"\mathrm{Logistic}(x)"))
+f
+save(joinpath(@OUTPUT, "logistic.svg"), f); # hide
 
 # \fig{logistic}
 # \center{*Logistic Function*} \\
@@ -141,7 +141,7 @@ setprogress!(false) # hide
     β ~ filldist(TDist(3), predictors)
 
     #likelihood
-    y ~ arraydist(LazyArray(@~ BernoulliLogit.(α .+ X * β)))
+    return y ~ arraydist(LazyArray(@~ BernoulliLogit.(α .+ X * β)))
 end;
 
 # Here I am specifying very weakly informative priors:
@@ -177,7 +177,9 @@ end;
 # * `educ` -- years of education (head of household).
 
 # Ok let's read our data with `CSV.jl` and output into a `DataFrame` from `DataFrames.jl`:
-using DataFrames, CSV, HTTP
+using DataFrames
+using CSV
+using HTTP
 
 url = "https://raw.githubusercontent.com/storopoli/Bayesian-Julia/master/datasets/wells.csv"
 wells = CSV.read(HTTP.get(url).body, DataFrame)
@@ -223,10 +225,7 @@ using Chain
 
 @chain quantile(chain) begin
     DataFrame
-    select(_,
-        :parameters,
-        names(_, r"%") .=> ByRow(exp),
-        renamecols=false)
+    select(_, :parameters, names(_, r"%") .=> ByRow(exp); renamecols=false)
 end
 
 # Our interpretation of odds is the same as in betting games. Anything below 1 signals a unlikely probability that $y$ will be $1$.
@@ -240,10 +239,7 @@ end
 
 @chain quantile(chain) begin
     DataFrame
-    select(_,
-        :parameters,
-        names(_, r"%") .=> ByRow(logodds2prob),
-        renamecols=false)
+    select(_, :parameters, names(_, r"%") .=> ByRow(logodds2prob); renamecols=false)
 end
 
 # There you go, much better now. Let's analyze our results. The intercept `α` is the basal `switch` probability which has
diff --git a/_literate/08_ordinal_reg.jl b/_literate/08_ordinal_reg.jl
index 9e0031bb..ebe2ac70 100644
--- a/_literate/08_ordinal_reg.jl
+++ b/_literate/08_ordinal_reg.jl
@@ -122,28 +122,19 @@ let
     x_pmf = pdf.(dist, x)
     x_cdf = cdf.(dist, x)
     x_logodds_cdf = logit.(x_cdf)
-    df = DataFrame(;
-        x,
-        x_pmf,
-        x_cdf,
-        x_logodds_cdf)
+    df = DataFrame(; x, x_pmf, x_cdf, x_logodds_cdf)
     labels = ["CDF", "Log-cumulative-odds"]
-    fig = Figure()
-    plt1 = data(df) *
-           mapping(:x, :x_pmf) *
-           visual(BarPlot)
-    plt2 = data(df) *
-           mapping(:x,
-               [:x_cdf, :x_logodds_cdf];
-               col=dims(1) => renamer(labels)) *
-           visual(ScatterLines)
+    f = Figure()
+    plt1 = data(df) * mapping(:x, :x_pmf) * visual(BarPlot)
+    plt2 =
+        data(df) *
+        mapping(:x, [:x_cdf, :x_logodds_cdf]; col=dims(1) => renamer(labels)) *
+        visual(ScatterLines)
     axis = (; xticks=1:6)
-    draw!(fig[1, 2:3], plt1; axis)
-    draw!(fig[2, 1:4], plt2;
-        axis,
-        facet=(; linkyaxes=:none))
-    fig
-    save(joinpath(@OUTPUT, "logodds.svg"), fig) # hide
+    draw!(f[1, 2:3], plt1; axis)
+    draw!(f[2, 1:4], plt2; axis, facet=(; linkyaxes=:none))
+    f
+    save(joinpath(@OUTPUT, "logodds.svg"), f) # hide
 end
 
 # \fig{logodds}
@@ -287,7 +278,7 @@ setprogress!(false) # hide
     β ~ filldist(TDist(3) * 2.5, predictors)
 
     #likelihood
-    y ~ arraydist([OrderedLogistic(X[i, :] ⋅ β, cutpoints) for i in 1:length(y)])
+    return y ~ arraydist([OrderedLogistic(X[i, :] ⋅ β, cutpoints) for i in 1:length(y)])
 end;
 
 # First, let's deal with the new stuff in our model: the **`Bijectors.ordered`**.
@@ -338,7 +329,9 @@ end;
 # * `ncontrols`: Number of controls
 
 # Ok let's read our data with `CSV.jl` and output into a `DataFrame` from `DataFrames.jl`:
-using DataFrames, CSV, HTTP
+using DataFrames
+using CSV
+using HTTP
 
 url = "https://raw.githubusercontent.com/storopoli/Bayesian-Julia/master/datasets/esoph.csv"
 esoph = CSV.read(HTTP.get(url).body, DataFrame)
@@ -360,7 +353,7 @@ transform!(
         x -> categorical(x; levels=["0-39g/day", "40-79", "80-119", "120+"], ordered=true),
     :tobgp =>
         x -> categorical(x; levels=["0-9g/day", "10-19", "20-29", "30+"], ordered=true);
-    renamecols=false
+    renamecols=false,
 )
 transform!(esoph, [:agegp, :alcgp, :tobgp] .=> ByRow(levelcode); renamecols=false)
 
@@ -395,10 +388,7 @@ using Chain
 
 @chain quantile(chain) begin
     DataFrame
-    select(_,
-        :parameters,
-        names(_, r"%") .=> ByRow(exp),
-        renamecols=false)
+    select(_, :parameters, names(_, r"%") .=> ByRow(exp); renamecols=false)
 end
 
 # Our interpretation of odds is the same as in betting games. Anything below 1 signals a unlikely probability that $y$ will be $1$.
@@ -412,10 +402,7 @@ end
 
 @chain quantile(chain) begin
     DataFrame
-    select(_,
-        :parameters,
-        names(_, r"%") .=> ByRow(logodds2prob),
-        renamecols=false)
+    select(_, :parameters, names(_, r"%") .=> ByRow(logodds2prob); renamecols=false)
 end
 
 # There you go, much better now. Let's analyze our results.
diff --git a/_literate/09_count_reg.jl b/_literate/09_count_reg.jl
index 83b3c00e..d33c33d0 100644
--- a/_literate/09_count_reg.jl
+++ b/_literate/09_count_reg.jl
@@ -11,11 +11,10 @@
 # We use regression with count data when our dependent variable is restricted to positive integers, *i.e.* $y \in \mathbb{Z}^+$.
 # See the figure below for a graphical intuition of the exponential function:
 
-using Plots, LaTeXStrings
+using CairoMakie
 
-plot(exp, -6, 6, label=false,
-    xlabel=L"x", ylabel=L"e^x")
-savefig(joinpath(@OUTPUT, "exponential.svg")); # hide
+f, ax, l = lines(-6 .. 6, exp; axis=(xlabel=L"x", ylabel=L"e^x"))
+save(joinpath(@OUTPUT, "exponential.svg"), f); # hide
 
 # \fig{exponential}
 # \center{*Exponential Function*} \\
@@ -123,7 +122,7 @@ setprogress!(false) # hide
     β ~ filldist(TDist(3), predictors)
 
     #likelihood
-    y ~ arraydist(LazyArray(@~ LogPoisson.(α .+ X * β)))
+    return y ~ arraydist(LazyArray(@~ LogPoisson.(α .+ X * β)))
 end;
 
 # Here I am specifying very weakly informative priors:
@@ -167,13 +166,13 @@ end;
 
 # Here is what a $\text{Gamma}(0.01, 0.01)$ looks like:
 
-using StatsPlots, Distributions
-plot(Gamma(0.01, 0.01),
-    lw=2, label=false,
-    xlabel=L"\phi",
-    ylabel="Density",
-    xlims=(0, 0.001))
-savefig(joinpath(@OUTPUT, "gamma.svg")); # hide
+using Distributions
+f, ax, l = lines(
+    Gamma(0.01, 0.01);
+    linewidth=2,
+    axis=(xlabel=L"\phi", ylabel="Density", limits=(0, 0.03, nothing, nothing)),
+)
+save(joinpath(@OUTPUT, "gamma.svg"), f); # hide
 
 # \fig{gamma}
 # \center{*Gamma Distribution with $\alpha = 0.01$ and $\theta = 0.01$*} \\
@@ -262,7 +261,7 @@ end
     ϕ = 1 / ϕ⁻
 
     #likelihood
-    y ~ arraydist(LazyArray(@~ NegativeBinomial2.(exp.(α .+ X * β), ϕ)))
+    return y ~ arraydist(LazyArray(@~ NegativeBinomial2.(exp.(α .+ X * β), ϕ)))
 end;
 
 # Here I am also specifying very weakly informative priors:
@@ -291,7 +290,9 @@ end;
 # * `exposure2` -- number of days for which the roach traps were used
 
 # Ok let's read our data with `CSV.jl` and output into a `DataFrame` from `DataFrames.jl`:
-using DataFrames, CSV, HTTP
+using DataFrames
+using CSV
+using HTTP
 
 url = "https://raw.githubusercontent.com/storopoli/Bayesian-Julia/master/datasets/roaches.csv"
 roaches = CSV.read(HTTP.get(url).body, DataFrame)
@@ -326,10 +327,7 @@ using Chain
 
 @chain quantile(chain_poisson) begin
     DataFrame
-    select(_,
-        :parameters,
-        names(_, r"%") .=> ByRow(exp),
-        renamecols=false)
+    select(_, :parameters, names(_, r"%") .=> ByRow(exp); renamecols=false)
 end
 
 # Let's analyze our results. The intercept `α` is the basal number of roaches caught `y` and has
@@ -360,10 +358,7 @@ summarystats(chain_negbin)
 
 @chain quantile(chain_negbin) begin
     DataFrame
-    select(_,
-        :parameters,
-        names(_, r"%") .=> ByRow(exp),
-        renamecols=false)
+    select(_, :parameters, names(_, r"%") .=> ByRow(exp); renamecols=false)
 end
 
 # Our results show much more uncertainty in the coefficients than in the Poisson regression.
diff --git a/_literate/10_robust_reg.jl b/_literate/10_robust_reg.jl
index 4597df88..c460ac1c 100644
--- a/_literate/10_robust_reg.jl
+++ b/_literate/10_robust_reg.jl
@@ -19,13 +19,11 @@
 # mean without having to change the mean's position (or location). In other words, the bell curve needs to "shift" to be able
 # to contemplate outliers, thus making the inference unstable.
 
-using StatsPlots, Distributions, LaTeXStrings
+using CairoMakie
+using Distributions
 
-plot(Normal(0, 1),
-        lw=5, label=false,
-        xlabel=L"x",
-        ylabel="Density")
-savefig(joinpath(@OUTPUT, "normal.svg")); # hide
+f, ax, l = lines(-4 .. 4, Normal(0, 1); linewidth=5, axis=(; xlabel=L"x", ylabel="Density"))
+save(joinpath(@OUTPUT, "normal.svg"), f); # hide
 
 # \fig{normal}
 # \center{*Normal with $\mu=0$ and $\sigma = 1$*} \\
@@ -35,12 +33,8 @@ savefig(joinpath(@OUTPUT, "normal.svg")); # hide
 # far from the average without having to "shift" the mean's position (or location). For that we have the Student-$t$ distribution.
 # See the figure below to remember its shape.
 
-plot(TDist(2),
-        lw=5, label=false,
-        xlabel=L"x",
-        ylabel="Density",
-        xlims=(-4, 4))
-savefig(joinpath(@OUTPUT, "tdist.svg")); # hide
+f, ax, l = lines(-4 .. 4, TDist(2); linewidth=5, axis=(xlabel=L"x", ylabel="Density"))
+save(joinpath(@OUTPUT, "tdist.svg"), f); # hide
 
 # \fig{tdist}
 # \center{*Student-$t$ with $\nu =  2$*} \\
@@ -49,14 +43,16 @@ savefig(joinpath(@OUTPUT, "tdist.svg")); # hide
 
 # Take a look at the tails in the comparison below:
 
-plot(Normal(0, 1),
-        lw=5, label="Normal",
-        xlabel=L"x",
-        ylabel="Density",
-        xlims=(-4, 4))
-plot!(TDist(2),
-        lw=5, label="Student")
-savefig(joinpath(@OUTPUT, "comparison_normal_student.svg")); # hide
+f, ax, l = lines(
+    -4 .. 4,
+    Normal(0, 1);
+    linewidth=5,
+    label="Normal",
+    axis=(; xlabel=L"x", ylabel="Density"),
+)
+lines!(ax, -4 .. 4, TDist(2); linewidth=5, label="Student")
+axislegend(ax)
+save(joinpath(@OUTPUT, "comparison_normal_student.svg"), f); # hide
 
 # \fig{comparison_normal_student}
 # \center{*Comparison between Normal and Student-$t$ Distributions*} \\
@@ -123,14 +119,14 @@ seed!(456) # hide
 setprogress!(false) # hide
 
 @model function robustreg(X, y; predictors=size(X, 2))
-        #priors
-        α ~ LocationScale(median(y), 2.5 * mad(y), TDist(3))
-        β ~ filldist(TDist(3), predictors)
-        σ ~ Exponential(1)
-        ν ~ LogNormal(2, 1)
-
-        #likelihood
-        y ~ arraydist(LocationScale.(α .+ X * β, σ, TDist.(ν)))
+    #priors
+    α ~ LocationScale(median(y), 2.5 * mad(y), TDist(3))
+    β ~ filldist(TDist(3), predictors)
+    σ ~ Exponential(1)
+    ν ~ LogNormal(2, 1)
+
+    #likelihood
+    return y ~ arraydist(LocationScale.(α .+ X * β, σ, TDist.(ν)))
 end;
 
 # Here I am specifying very weakly informative priors:
@@ -161,7 +157,9 @@ end;
 # * `prestige`: percentage of respondents in the survey who classified their occupation as at least "good" with respect to prestige.
 
 # Ok let's read our data with `CSV.jl` and output into a `DataFrame` from `DataFrames.jl`:
-using DataFrames, CSV, HTTP
+using DataFrames
+using CSV
+using HTTP
 
 url = "https://raw.githubusercontent.com/storopoli/Bayesian-Julia/master/datasets/duncan.csv"
 duncan = CSV.read(HTTP.get(url).body, DataFrame)
@@ -171,17 +169,22 @@ describe(duncan)
 # as at least "good" with respect to prestige is around 41%. But `prestige` variable is very dispersed and actually has a bimodal
 # distribution:
 
-@df duncan density(:prestige, label=false)
-savefig(joinpath(@OUTPUT, "prestige_density.svg")); # hide
+f = Figure()
+plt = data(duncan) * mapping(:prestige) * AlgebraOfGraphics.density()
+draw!(f[1, 1], plt)
+save(joinpath(@OUTPUT, "prestige_density.svg"), f); # hide
 
 # \fig{prestige_density}
 # \center{*Density Plot of `prestige`*} \\
 
 # Besides that, the mean `prestige` per `type` shows us where the source of variation might come from:
 
-gdf_type = groupby(duncan, :type)
-@df combine(gdf_type, :prestige => mean) bar(:type, :prestige_mean, label=false)
-savefig(joinpath(@OUTPUT, "prestige_per_type.svg")); # hide
+gdf = groupby(duncan, :type)
+f = Figure()
+plt =
+    data(combine(gdf, :prestige => mean)) * mapping(:type, :prestige_mean) * visual(BarPlot)
+draw!(f[1, 1], plt)
+save(joinpath(@OUTPUT, "prestige_per_type.svg"), f); # hide
 
 # \fig{prestige_per_type}
 # \center{*Mean `prestige` per `type`*} \\
diff --git a/_literate/11_multilevel_models.jl b/_literate/11_multilevel_models.jl
index b76e297b..3e1b2007 100644
--- a/_literate/11_multilevel_models.jl
+++ b/_literate/11_multilevel_models.jl
@@ -96,7 +96,9 @@ using Random: seed!
 seed!(123)
 setprogress!(false) # hide
 
-@model function varying_intercept(X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2))
+@model function varying_intercept(
+    X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2)
+)
     #priors
     α ~ Normal(mean(y), 2.5 * std(y))       # population-level intercept
     β ~ filldist(Normal(0, 2), predictors)  # population-level coefficients
@@ -108,7 +110,7 @@ setprogress!(false) # hide
 
     #likelihood
     ŷ = α .+ X * β .+ αⱼ[idx]
-    y ~ MvNormal(ŷ, σ^2 * I)
+    return y ~ MvNormal(ŷ, σ^2 * I)
 end;
 
 # ### Random-Slope Model
@@ -147,7 +149,7 @@ end;
 
     #likelihood
     ŷ = α .+ X * βⱼ * τ
-    y ~ MvNormal(ŷ, σ^2 * I)
+    return y ~ MvNormal(ŷ, σ^2 * I)
 end;
 
 # ### Random-Intercept-Slope Model
@@ -176,7 +178,9 @@ end;
 
 # In Turing we can accomplish this as:
 
-@model function varying_intercept_slope(X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2))
+@model function varying_intercept_slope(
+    X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2)
+)
     #priors
     α ~ Normal(mean(y), 2.5 * std(y))                     # population-level intercept
     σ ~ Exponential(1 / std(y))                           # residual SD
@@ -189,7 +193,7 @@ end;
 
     #likelihood
     ŷ = α .+ αⱼ[idx] .+ X * βⱼ * τᵦ
-    y ~ MvNormal(ŷ, σ^2 * I)
+    return y ~ MvNormal(ŷ, σ^2 * I)
 end;
 
 # In all of the models,
@@ -215,7 +219,9 @@ end;
 
 # Ok let's read our data with `CSV.jl` and output into a `DataFrame` from `DataFrames.jl`:
 
-using DataFrames, CSV, HTTP
+using DataFrames
+using CSV
+using HTTP
 
 url = "https://raw.githubusercontent.com/storopoli/Bayesian-Julia/master/datasets/cheese.csv"
 cheese = CSV.read(HTTP.get(url).body, DataFrame)
@@ -234,8 +240,13 @@ for c in unique(cheese[:, :cheese])
 end
 
 cheese[:, :background_int] = map(cheese[:, :background]) do b
-    b == "rural" ? 1 :
-    b == "urban" ? 2 : missing
+    if b == "rural"
+        1
+    elseif b == "urban"
+        2
+    else
+        missing
+    end
 end
 
 first(cheese, 5)
@@ -253,7 +264,7 @@ idx = cheese[:, :background_int];
 
 model_intercept = varying_intercept(X, idx, y)
 chain_intercept = sample(model_intercept, NUTS(), MCMCThreads(), 1_000, 4)
-summarystats(chain_intercept) |> DataFrame |> println
+println(DataFrame(summarystats(chain_intercept)))
 
 # Here we can see that the model has a population-level intercept `α` along with population-level coefficients `β`s for each `cheese`
 # dummy variable. But notice that we have also group-level intercepts for each of the groups `αⱼ`s.
@@ -263,7 +274,7 @@ summarystats(chain_intercept) |> DataFrame |> println
 
 model_slope = varying_slope(X, idx, y)
 chain_slope = sample(model_slope, NUTS(), MCMCThreads(), 1_000, 4)
-summarystats(chain_slope) |> DataFrame |> println
+println(DataFrame(summarystats(chain_slope)))
 
 # Here we can see that the model has still a population-level intercept `α`. But now our population-level
 # coefficients `β`s are replaced by group-level coefficients `βⱼ`s along with their standard deviation `τᵦ`s.
@@ -274,7 +285,7 @@ summarystats(chain_slope) |> DataFrame |> println
 
 model_intercept_slope = varying_intercept_slope(X, idx, y)
 chain_intercept_slope = sample(model_intercept_slope, NUTS(), MCMCThreads(), 1_000, 4)
-summarystats(chain_intercept_slope) |> DataFrame |> println
+println(DataFrame(summarystats(chain_intercept_slope)))
 
 # Now we have fused the previous model in one. We still have a population-level intercept `α`. But now
 # we have in the same model group-level intercepts for each of the groups `αⱼ`s and group-level along with their standard
diff --git a/_literate/12_Turing_tricks.jl b/_literate/12_Turing_tricks.jl
index 57a8699c..2e4bdb37 100644
--- a/_literate/12_Turing_tricks.jl
+++ b/_literate/12_Turing_tricks.jl
@@ -57,10 +57,12 @@ setprogress!(false) # hide
     σ ~ Exponential(1)
 
     #likelihood
-    y ~ MvNormal(α .+ X * β, σ^2 * I)
+    return y ~ MvNormal(α .+ X * β, σ^2 * I)
 end;
 
-using DataFrames, CSV, HTTP
+using DataFrames
+using CSV
+using HTTP
 
 url = "https://raw.githubusercontent.com/storopoli/Bayesian-Julia/master/datasets/kidiq.csv"
 kidiq = CSV.read(HTTP.get(url).body, DataFrame)
@@ -115,8 +117,12 @@ chain_qr = sample(model_qr, NUTS(1_000, 0.65), MCMCThreads(), 1_000, 4)
 # the regular `linreg`.
 # Now we have to reconstruct our $\boldsymbol{\beta}$s:
 
-betas = mapslices(x -> R_ast^-1 * x, chain_qr[:, namesingroup(chain_qr, :β), :].value.data, dims=[2])
-chain_beta = setrange(Chains(betas, ["real_β[$i]" for i in 1:size(Q_ast, 2)]), 1_001:1:3_000)
+betas = mapslices(
+    x -> R_ast^-1 * x, chain_qr[:, namesingroup(chain_qr, :β), :].value.data; dims=[2]
+)
+chain_beta = setrange(
+    Chains(betas, ["real_β[$i]" for i in 1:size(Q_ast, 2)]), 1_001:1:2_000
+)
 chain_qr_reconstructed = hcat(chain_beta, chain_qr)
 
 # ## Non-Centered Parametrization
@@ -126,15 +132,18 @@ chain_qr_reconstructed = hcat(chain_beta, chain_qr)
 # change the step size $L$ and the $\epsilon$ factor. This is  I've showed one of the most infamous
 # case in [5. **Markov Chain Monte Carlo (MCMC)**](/pages/5_MCMC/): Neal's Funnel (Neal, 2003):
 
-using StatsPlots, Distributions, LaTeXStrings
+using CairoMakie
+using Distributions
 funnel_y = rand(Normal(0, 3), 10_000)
 funnel_x = rand(Normal(), 10_000) .* exp.(funnel_y / 2)
 
-scatter((funnel_x, funnel_y),
-    label=false, mc=:steelblue, ma=0.3,
-    xlabel=L"X", ylabel=L"Y",
-    xlims=(-100, 100))
-savefig(joinpath(@OUTPUT, "funnel.svg")); # hide
+f, ax, s = scatter(
+    funnel_x,
+    funnel_y;
+    color=(:steelblue, 0.3),
+    axis=(; xlabel=L"X", ylabel=L"Y", limits=(-100, 100, nothing, nothing)),
+)
+save(joinpath(@OUTPUT, "funnel.svg"), f); # hide
 
 # \fig{funnel}
 # \center{*Neal's Funnel*} \\
@@ -147,7 +156,7 @@ savefig(joinpath(@OUTPUT, "funnel.svg")); # hide
 
 @model function funnel()
     y ~ Normal(0, 3)
-    x ~ Normal(0, exp(y / 2))
+    return x ~ Normal(0, exp(y / 2))
 end
 
 chain_funnel = sample(funnel(), NUTS(), MCMCThreads(), 1_000, 4)
@@ -167,7 +176,7 @@ chain_funnel = sample(funnel(), NUTS(), MCMCThreads(), 1_000, 4)
     x̃ ~ Normal()
     ỹ ~ Normal()
     y = 3.0 * ỹ         # implies y ~ Normal(0, 3)
-    x = exp(y / 2) * x̃  # implies x ~ Normal(0, exp(y / 2))
+    return x = exp(y / 2) * x̃  # implies x ~ Normal(0, exp(y / 2))
 end
 
 chain_ncp_funnel = sample(ncp_funnel(), NUTS(), MCMCThreads(), 1_000, 4)
@@ -178,7 +187,9 @@ chain_ncp_funnel = sample(ncp_funnel(), NUTS(), MCMCThreads(), 1_000, 4)
 # in [10. **Multilevel Models (a.k.a. Hierarchical Models)**](/pages/10_multilevel_models/). Here was the
 # approach that we took, also known as Centered Parametrization (CP):
 
-@model function varying_intercept(X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2))
+@model function varying_intercept(
+    X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2)
+)
     #priors
     α ~ Normal(mean(y), 2.5 * std(y))       # population-level intercept
     β ~ filldist(Normal(0, 2), predictors)  # population-level coefficients
@@ -190,12 +201,14 @@ chain_ncp_funnel = sample(ncp_funnel(), NUTS(), MCMCThreads(), 1_000, 4)
 
     #likelihood
     ŷ = α .+ X * β .+ αⱼ[idx]
-    y ~ MvNormal(ŷ, σ^2 * I)
+    return y ~ MvNormal(ŷ, σ^2 * I)
 end;
 
 # To perform a Non-Centered Parametrization (NCP) in this model we do as following:
 
-@model function varying_intercept_ncp(X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2))
+@model function varying_intercept_ncp(
+    X, idx, y; n_gr=length(unique(idx)), predictors=size(X, 2)
+)
     #priors
     α ~ Normal(mean(y), 2.5 * std(y))       # population-level intercept
     β ~ filldist(Normal(0, 2), predictors)  # population-level coefficients
@@ -208,7 +221,7 @@ end;
 
     #likelihood
     ŷ = α .+ X * β .+ zⱼ[idx] .* τ
-    y ~ MvNormal(ŷ, σ^2 * I)
+    return y ~ MvNormal(ŷ, σ^2 * I)
 end;
 
 # Here we are using a NCP with the `zⱼ`s following a standard normal and we reconstruct the
@@ -226,8 +239,13 @@ for c in unique(cheese[:, :cheese])
 end
 
 cheese[:, :background_int] = map(cheese[:, :background]) do b
-    b == "rural" ? 1 :
-    b == "urban" ? 2 : missing
+    if b == "rural"
+        1
+    elseif b == "urban"
+        2
+    else
+        missing
+    end
 end
 
 X = Matrix(select(cheese, Between(:cheese_A, :cheese_D)));
@@ -247,8 +265,12 @@ chain_ncp = sample(model_ncp, NUTS(), MCMCThreads(), 1_000, 4)
 # in Turing. Before we conclude, we need to recover our original `αⱼ`s. We can do this by multiplying `zⱼ[idx] .* τ`:
 
 τ = summarystats(chain_ncp)[:τ, :mean]
-αⱼ = mapslices(x -> x * τ, chain_ncp[:, namesingroup(chain_ncp, :zⱼ), :].value.data, dims=[2])
-chain_ncp_reconstructed = hcat(MCMCChains.resetrange(chain_ncp), Chains(αⱼ, ["αⱼ[$i]" for i in 1:length(unique(idx))]))
+αⱼ = mapslices(
+    x -> x * τ, chain_ncp[:, namesingroup(chain_ncp, :zⱼ), :].value.data; dims=[2]
+)
+chain_ncp_reconstructed = hcat(
+    MCMCChains.resetrange(chain_ncp), Chains(αⱼ, ["αⱼ[$i]" for i in 1:length(unique(idx))])
+)
 
 # ## References
 
diff --git a/_literate/13_epi_models.jl b/_literate/13_epi_models.jl
index 6de11ae9..91dc2fde 100644
--- a/_literate/13_epi_models.jl
+++ b/_literate/13_epi_models.jl
@@ -11,25 +11,40 @@
 # For this tutorial I'll be using Brazil's COVID data from the [Media Consortium](https://brasil.io/covid19/).
 # For reproducibility, we'll restrict the data to the year of 2020:
 
-using Downloads, DataFrames, CSV, Chain, Dates
+using Downloads
+using DataFrames
+using CSV
+using Chain
+using Dates
 
 url = "https://data.brasil.io/dataset/covid19/caso_full.csv.gz"
 file = Downloads.download(url)
-df = CSV.File(file) |> DataFrame
+df = DataFrame(CSV.File(file))
 br = @chain df begin
-    filter([:date, :city] => (date, city) -> date < Dates.Date("2021-01-01") && date > Dates.Date("2020-04-01") && ismissing(city), _)
+    filter(
+        [:date, :city] =>
+            (date, city) ->
+                date < Dates.Date("2021-01-01") &&
+                    date > Dates.Date("2020-04-01") &&
+                    ismissing(city),
+        _,
+    )
     groupby(:date)
     combine(
-        [:estimated_population_2019,
+        [
+            :estimated_population_2019,
             :last_available_confirmed_per_100k_inhabitants,
             :last_available_deaths,
             :new_confirmed,
-            :new_deaths] .=> sum .=>
-            [:estimated_population_2019,
+            :new_deaths,
+        ] .=>
+            sum .=> [
+                :estimated_population_2019,
                 :last_available_confirmed_per_100k_inhabitants,
                 :last_available_deaths,
                 :new_confirmed,
-                :new_deaths]
+                :new_deaths,
+            ],
     )
 end;
 
@@ -43,13 +58,12 @@ last(br, 5)
 
 # Here is a plot of the data:
 
-using Plots, StatsPlots, LaTeXStrings
-@df br plot(:date,
-    :new_confirmed,
-    xlab=L"t", ylab="infected daily",
-    yformatter=y -> string(round(Int64, y ÷ 1_000)) * "K",
-    label=false)
-savefig(joinpath(@OUTPUT, "infected.svg")); # hide
+using AlgebraOfGraphics
+using CairoMakie
+f = Figure()
+plt = data(br) * mapping(:date => L"t", :new_confirmed => "infected daily") * visual(Lines)
+draw!(f[1, 1], plt)
+save(joinpath(@OUTPUT, "infected.svg"), f); # hide
 
 # \fig{infected}
 # \center{*Infected in Brazil during COVID in 2020*} \\
@@ -110,7 +124,7 @@ function sir_ode!(du, u, p, t)
         du[2] = infection - recovery # Infected
         du[3] = recovery # Recovered
     end
-    nothing
+    return nothing
 end;
 
 # This is what the infection would look with some fixed `β` and `γ`
@@ -123,13 +137,17 @@ u = [N - i₀, i₀, 0.0]
 p = [0.5, 0.05]
 prob = ODEProblem(sir_ode!, u, (1.0, 100.0), p)
 sol_ode = solve(prob)
-plot(sol_ode, label=[L"S" L"I" L"R"],
-    lw=3,
-    xlabel=L"t",
-    ylabel=L"N",
-    yformatter=y -> string(round(Int64, y ÷ 1_000_000)) * "mi",
-    title="SIR Model for 100 days, β = $(p[1]), γ = $(p[2])")
-savefig(joinpath(@OUTPUT, "ode_solve.svg")); # hide
+f = Figure()
+plt =
+    data(stack(DataFrame(sol_ode), Not(:timestamp))) *
+    mapping(
+        :timestamp => L"t",
+        :value => L"N";
+        color=:variable => renamer(["value1" => "S", "value2" => "I", "value3" => "R"]),
+    ) *
+    visual(Lines; linewidth=3)
+draw!(f[1, 1], plt; axis=(; title="SIR Model for 100 days, β = $(p[1]), γ = $(p[2])"))
+save(joinpath(@OUTPUT, "ode_solve.svg"), f); # hide
 
 # \fig{ode_solve}
 # \center{*SIR ODE Solution for Brazil's 100 days of COVID in early 2020*} \\
@@ -168,18 +186,17 @@ setprogress!(false) # hide
     u0 = [N - I, I, r₀] # S,I,R
     p = [β, γ]
     tspan = (1.0, float(l))
-    prob = ODEProblem(sir_ode!,
-        u0,
-        tspan,
-        p)
-    sol = solve(prob,
-        Tsit5(), # similar to Dormand-Prince RK45 in Stan but 20% faster
-        saveat=1.0)
+    prob = ODEProblem(sir_ode!, u0, tspan, p)
+    sol = solve(
+        prob,
+        Tsit5(); # similar to Dormand-Prince RK45 in Stan but 20% faster
+        saveat=1.0,
+    )
     solᵢ = Array(sol)[2, :] # New Infected
     solᵢ = max.(1e-4, solᵢ) # numerical issues arose
 
     #likelihood
-    infected ~ arraydist(LazyArray(@~ NegativeBinomial2.(solᵢ, ϕ)))
+    return infected ~ arraydist(LazyArray(@~ NegativeBinomial2.(solᵢ, ϕ)))
 end;
 
 # Now run the model and inspect our parameters estimates.