From 7ebd6a10bb1d3bb3ecfc22f08a85a3683dd55b1a Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Thu, 31 Oct 2024 11:30:28 +0000 Subject: [PATCH] Update to new versions, minor updates --- .gitignore | 3 +- README.md | 2 + WORDLIST | 1 + _quarto.yml | 1 - arrays.qmd | 8 +-- colophon.qmd | 6 +- common.R | 20 +++++++ data.qmd | 55 +++++++++++------- index.qmd | 31 +++++++++- inference.qmd | 143 ++++++++++++++++++++++++++++++++++++++++++++-- intro.qmd | 11 ---- odin.qmd | 91 ++++++++++++++++++++--------- packaging.qmd | 6 +- stochasticity.qmd | 23 ++++++-- time.qmd | 18 ++++-- utils.R | 6 -- 16 files changed, 326 insertions(+), 99 deletions(-) create mode 100644 common.R delete mode 100644 intro.qmd delete mode 100644 utils.R diff --git a/.gitignore b/.gitignore index 45571f1..e48326f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ .Rproj.user _book/ _dust/ -odin_files +*_files/ +*_cache/ diff --git a/README.md b/README.md index cf86891..210c00c 100644 --- a/README.md +++ b/README.md @@ -25,3 +25,5 @@ which will open the site in a browser. Then edit the `.qmd` pages; changes will The site will build when pushed to GitHub, so failures will show on a PR. It will push to `gh-pages` automatically when merged. Documentation for quarto is [here](https://quarto.org/docs/guide/) and for books specifically [here](https://quarto.org/docs/books/) + +We cache models in `_dust` so that it's not painful to edit things. You should remove this directory after substantial changes to any of the packages (you may want to delete `_book` too so that quarto rebuilds everything as expected). diff --git a/WORDLIST b/WORDLIST index fc54238..99410b8 100644 --- a/WORDLIST +++ b/WORDLIST @@ -19,5 +19,6 @@ nondifferentiable nonlinear odin odin's +pMCMC standalone stochasticity diff --git a/_quarto.yml b/_quarto.yml index 60881ba..db398fc 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -9,7 +9,6 @@ book: repo-actions: [edit, issue, source] chapters: - index.qmd - - intro.qmd - part: "Odin" chapters: - odin.qmd diff --git a/arrays.qmd b/arrays.qmd index b506d22..ff35f05 100644 --- a/arrays.qmd +++ b/arrays.qmd @@ -1,12 +1,8 @@ -# Arrays +# Arrays {#sec-arrays} ```{r} #| include: false -source("utils.R") -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) +source("common.R") ``` ::: {.callout-warning} diff --git a/colophon.qmd b/colophon.qmd index 7da40ea..b72643f 100644 --- a/colophon.qmd +++ b/colophon.qmd @@ -2,17 +2,13 @@ ```{r} #| include: false +source("common.R") pkgs <- c("odin2", "dust2", "monty", "pkgload", "pkgbuild", "cpp11", "cli", "rlang", "brio", "decor", "fs", "knitr", "rmarkdown", "usethis") for (p in pkgs) { loadNamespace(p) } -source("utils.R") -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) ``` Key versions: diff --git a/common.R b/common.R new file mode 100644 index 0000000..33e9875 --- /dev/null +++ b/common.R @@ -0,0 +1,20 @@ +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) + +lang_output <- function(x, lang) { + cat(c(sprintf("```%s", lang), x, "```"), sep = "\n") +} + +cpp_output <- function(x) { + lang_output(x, "cpp") +} + +r_output <- function(x) { + lang_output(x, "r") +} + +plain_output <- function(x) { + lang_output(x, "plain") +} diff --git a/data.qmd b/data.qmd index 745c350..242f08c 100644 --- a/data.qmd +++ b/data.qmd @@ -2,17 +2,14 @@ ```{r} #| include: false -source("utils.R") -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) +source("common.R") ``` Before we start on inference, we need to start thinking about how our models fit to data, and how we can get data into the models. This gets very close to the interface that we need to work with [monty](https://mrc-ide.github.io/monty), which is the focus of the next section of the book. ```{r} #| include: false +source("common.R") set.seed(42) ``` @@ -67,7 +64,7 @@ When running this model we'll see incidence produced: ```{r} pars <- list(beta = 1, gamma = 0.6) -sys <- dust_system_create(sir(), pars, dt = 0.25) +sys <- dust_system_create(sir, pars, dt = 0.25) dust_system_set_state_initial(sys) t <- seq(0, 20, by = 0.25) y <- dust_system_simulate(sys, t) @@ -78,7 +75,7 @@ plot(t, y$incidence, type = "s", xlab = "Time", ylab = "Incidence") Our step is a quarter of a day, so we see four levels of incidence per day, with the last being the daily peak. Let's run again with a timestep of 1 day, we see: ```{r} -sys <- dust_system_create(sir(), pars, dt = 0.25) +sys <- dust_system_create(sir, pars, dt = 0.25) dust_system_set_state_initial(sys) t <- seq(0, 20) y <- dust_system_simulate(sys, t) @@ -98,7 +95,7 @@ tail(d) We have a column for time `time` and one for observed cases `cases` spanning the time range 0 to 20. -## Comparison to data +## Comparison to data {#sec-simple-sir-data} The next step is to tell our model about this data: @@ -135,10 +132,16 @@ First, `cases <- data()` says that `cases` is a special **data** variable. It w Second, `cases ~ Poisson(incidence)` describes the per-data-point [likelihood calculation](https://en.wikipedia.org/wiki/Likelihood_function); the syntax may be familiar to you if you have read [Richard McElreath's *Statistical Rethinking*](https://xcelab.net/rm/) or used any number of Bayesian statistical frameworks. +The generator will advertise that this system can be compared to data: + +```{r} +sir +``` + Once we have our new model, we can see how the data comparison works. ```{r} -sys <- dust_system_create(sir(), list(), n_particles = 10) +sys <- dust_system_create(sir, list(), n_particles = 10) dust_system_set_state_initial(sys) dust_system_run_to_time(sys, d$time[[1]]) dust_system_compare_data(sys, d[1, ]) @@ -178,14 +181,20 @@ As you can see, these are the same; the expression cases ~ Poisson(incidence) ``` -has done the same sort of thing as we can do ourselves by asking "what is the (log) probability of observing `cases` cases if the underlying rate of incidence is `incidence`". +has done the same sort of thing as we can do ourselves by asking "what is the (log) probability of observing `cases` cases if the underlying rate of incidence is `incidence`". Note that this can be `-Inf` (a probability of 0) in the case where the modelled incidence is zero. + +```{r} +dpois(10, 0, log = TRUE) +``` + +This is fine, so long as not all particles are impossible. ## The likelihood filter {#sec-data-filter} We include a simple bootstrap particle filter in `dust` for estimating the marginal likelihood in this case; the probability of the entire data series conditional on our model, marginalised (averaged) over all stochastic realisations. Because our model is stochastic, this is an **estimate** of the likelihood but the estimate will get less noisy as the number of particles increases. ```{r} -filter <- dust_filter_create(sir(), time_start = 0, data = d, n_particles = 200) +filter <- dust_filter_create(sir, time_start = 0, data = d, n_particles = 200) ``` Here, we've created a filter where the time series starts at 0, using our data set `d` and we've picked 200 particles to start with. We run the filter with `dust_likelihood_run`, and this returns a likelihood: @@ -211,7 +220,7 @@ var(ll) If we increase the number of particles, this variance will decrease, at the cost of taking longer to run: ```{r} -filter2 <- dust_filter_create(sir(), time_start = 0, data = d, +filter2 <- dust_filter_create(sir, time_start = 0, data = d, n_particles = 2000) ll2 <- replicate(100, dust_likelihood_run(filter2, list())) mean(ll2) @@ -220,37 +229,39 @@ var(ll2) The log-likelihoods from different numbers of particles are not directly comparable, though. -You can extract trajectories from the filter after running it: this gives a *tree* showing the ancestry of the particles remaining in the sample. To enable this, you must run with `save_history = TRUE`, as this incurs a runtime cost. +You can extract trajectories from the filter after running it: this gives a *tree* showing the ancestry of the particles remaining in the sample. To enable this, you must run with `save_trajectories = TRUE`, as this incurs a runtime cost. ```{r} -ll <- dust_likelihood_run(filter, list(), save_history = TRUE) -h <- dust_likelihood_last_history(filter) +ll <- dust_likelihood_run(filter, list(), save_trajectories = TRUE) +h <- dust_likelihood_last_trajectories(filter) ``` -The history will be an array with 3 dimensions representing (in turn) state, particle and time: +The trajectories will be an array with 3 dimensions representing (in turn) state, particle and time: ```{r} dim(h) ``` -We can plot the history of our incidence here: +We can plot the trajectories of our incidence here: ```{r} matplot(d$time, t(dust_unpack_state(filter, h)$incidence), type = "l", - lty = 1, col = "#00000044", xlab = "Time", ylab = "Incidence") + lty = 1, col = "#00000044", ylim = c(0, 75), + xlab = "Time", ylab = "Incidence") points(cases ~ time, d, pch = 19, col = "red") ``` -This model does not fit the data very well! The points don't lie close to the modelled trajectories. We also see everything before time ~10 reduced to a single particle's history, which will have the effect of increasing the variance. +This model does not fit the data very well! The points don't lie close to the modelled trajectories. We also see everything before time ~10 reduced to a single particle's trajectories, which will have the effect of increasing the variance. If we found better parameters things would look much better. We just so happen to know that if `beta` is 1 and `gamma` 0.6 then the model will fit better: ```{r} pars <- list(beta = 1, gamma = 0.6) -ll <- dust_likelihood_run(filter, pars, save_history = TRUE) -h <- dust_likelihood_last_history(filter) +ll <- dust_likelihood_run(filter, pars, save_trajectories = TRUE) +h <- dust_likelihood_last_trajectories(filter) matplot(d$time, t(dust_unpack_state(filter, h)$incidence), type = "l", - lty = 1, col = "#00000044", xlab = "Time", ylab = "Incidence") + lty = 1, col = "#00000044", ylim = c(0, 75), + xlab = "Time", ylab = "Incidence") points(cases ~ time, d, pch = 19, col = "red") ``` diff --git a/index.qmd b/index.qmd index 2062215..5dde57d 100644 --- a/index.qmd +++ b/index.qmd @@ -1,5 +1,32 @@ # Preface {.unnumbered} -This is a Quarto book. +```{r} +#| include: false +source("common.R") +``` -To learn more about Quarto books visit . +This is a book about writing models in the [`odin`](https://mrc-ide.github.io/odin2) "domain specific language" (DSL) and fitting these models to data using [`monty`](https://mrc-ide.github.io/monty). These tools can be used entirely separately; you can create a dynamical model (or "system") in `odin` and never fit it to data, or you can use `monty` to fit models that are written in plain R code. We'll structure the book to cover the tools fairly separately at first, then describe their intersection. Along the way, you'll also use [`dust`](https://mrc-ide.github.io/dust2), which powers `odin`, so we'll spend some time describing how to use that, and also how to write models directly in `dust` if you need more control than `odin` provides. + +## History + +Originally, we designed odin around describing and solving systems of differential equations, with a small amount of support for working with discrete-time stochastic systems. Over time, and particularly during the COVID-19 response, we expanded the stochastic support and introduced the ability to fit these models using particle filters and other sequential Monte Carlo algorithms. You may have used `odin.dust` and `mcstate` through this period. As of 2024 we have rationalised our tools into a new set of packages which are currently called `odin2`, `dust2` and `monty2` + +## Installation + +You can install these packages in one go from our R-universe: + +```r +install.packages( + c("monty", "dust2", "odin2"), + repos = c("https://mrc-ide.r-universe.dev", "https://cloud.r-project.org")) +``` + +You will also need a functioning C++ toolchain; you can test this by running + +```{r} +pkgbuild::has_build_tools(debug = TRUE) +``` + +## About this book + +This book exists to supplement the package documentation, and we will try to link extensively to it. It is designed as a fairly friendly walk-through of the tools, and to mirror courses that we run. diff --git a/inference.qmd b/inference.qmd index 014a41a..7719ac4 100644 --- a/inference.qmd +++ b/inference.qmd @@ -1,10 +1,145 @@ # Inference {#sec-inference} -Getting started with inference on odin models. +```{r} +#| include: false +source("common.R") +set.seed(1) +Sys.setenv(DUST_DEBUG = FALSE) +``` -We'll cover similar ground to the odin fitting vignette I think in this first chapter of this part. The later chapters will cover: +Getting started with inference on odin models. In this chapter, we will fit the simple SIR model from @sec-simple-sir-data; this combines all three packages together and tries to demonstrate some of the flexibility we are aiming for. -* observing the process -* multiple parameter sets and parameter packing +```{r} +library(odin2) +library(dust2) +library(monty) +``` + +## Recap + +Our basic odin code describes an SIR system. We compute daily incidence using `zero_every` in the initial conditions and compare this to case data using a Poison likelihood: + +```{r} +sir <- odin({ + update(S) <- S - n_SI + update(I) <- I + n_SI - n_IR + update(R) <- R + n_IR + update(incidence) <- incidence + n_SI + + p_SI <- 1 - exp(-beta * I / N * dt) + p_IR <- 1 - exp(-gamma * dt) + n_SI <- Binomial(S, p_SI) + n_IR <- Binomial(I, p_IR) + + initial(S) <- N - I0 + initial(I) <- I0 + initial(R) <- 0 + initial(incidence, zero_every = 1) <- 0 + + N <- parameter(1000) + I0 <- parameter(10) + beta <- parameter(0.2) + gamma <- parameter(0.1) + + cases <- data() + cases ~ Poisson(incidence) +}) +sir +``` + +We are looking to fit this to a small (synthetic) data set [`data/incidence.csv`](data/incidence.csv). + +```{r} +d <- read.csv("data/incidence.csv") +head(d) +``` + +We have constructed a particle filter with this system and data, with which we can compute likelihoods: + +```{r} +filter <- dust_filter_create(sir, time_start = 0, data = d, n_particles = 200) +dust_likelihood_run(filter, list(beta = 0.3, gamma = 0.15, I0 = 50)) +``` + +## Running an MCMC + +Our aim is to fit the parameters `beta`, `gamma` and `I0` using MCMC (because this is an MCMC using a particle filter we might call this pMCMC). + +The first challenge is that our filter takes a **named list** of inputs, but any MCMC we run will work in terms of a vector of parameter space. In this case it seems trivial, we should be able to take a vector of numbers `c(0.3, 0.15, 50)`, stick some names on them and convert to a list with `as.list()`. However, as seen in the more complex models (e.g., in @sec-arrays) this won't be generally possible. + +Our solution is to use `monty_packer` objects to smooth this transition: + +```{r} +packer <- monty_packer(c("beta", "gamma", "I0")) +packer +``` + +You can use a packer object to fix other inputs to your system. For example, we might write: + +```{r} +packer <- monty_packer(c("beta", "gamma", "I0"), fixed = list(N = 1000)) +``` + +which fixes `N` to 1000. This is an input to our system, but *not* an input to the statistical process. + +We can combine our `filter` and `packer` together to make a `monty_model` object: + +```{r} +likelihood <- dust_likelihood_monty(filter, packer) +likelihood +``` + +At this point, we can "forget" that our likelihood is an SIR model, and instead just not that it is a stochastic estimate of a likelihood. + +The other ingredient we need is a **prior**. This we can construct with `monty_dsl` as before: + +```{r} +prior <- monty_dsl({ + beta ~ Exponential(mean = 0.3) + gamma ~ Exponential(mean = 0.1) + I0 ~ Uniform(1, 50) +}) +``` + +We use broad exponential priors on `beta` and `gamma` but with a higher mean for `beta` (reflecting our prior belief that an epidemic did happen) and a uniform prior for the initial number of infected individuals. + +Our posterior is the product of the likelihood and prior, or the sum of their logs: + +```{r} +posterior <- likelihood + prior +``` + +Next, we define a sampler; we'll start with a random walk with a fairly arbitrary diagonal proposal matrix: + +```{r} +vcv <- diag(3) * 0.0004 +sampler <- monty_sampler_random_walk(vcv) +``` + +We start this off, using explicit initial conditions + +```{r} +#| cache: true +samples <- monty_sample(posterior, sampler, 1000, initial = c(0.3, 0.1, 5)) +``` + +Here is our chain over time, showing a rapid improvement in the posterior probability density followed by what might be reasonable mixing: + +```{r} +plot(samples$density, type = "l", + xlab = "Sample", ylab = "Log posterior probability density") +``` + +## Next steps + +The next steps here: + +* continue that chain without the burn-in (needs a monty tweak) +* add an observer so we can see what is going on +* run multiple chains and get started with diagnostics + +In other chapters eventually + +* run multiple groups * restarts (once enabled) * multistage fits (once enabled) diff --git a/intro.qmd b/intro.qmd deleted file mode 100644 index 5e6939c..0000000 --- a/intro.qmd +++ /dev/null @@ -1,11 +0,0 @@ -# Introduction - -This is a book about writing models in the [`odin`](https://mrc-ide.github.io/odin2) "domain specific language" (DSL) and fitting these models to data using [`monty`](https://mrc-ide.github.io/monty). These tools can be used entirely separately; you can create a dynamical model (or "system") in `odin` and never fit it to data, or you can use `monty` to fit models that are written in plain R code. We'll structure the book to cover the tools fairly separately at first, then describe their intersection. Along the way, you'll also use [`dust`](https://mrc-ide.github.io/dust2), which powers `odin`, so we'll spend some time describing how to use that, and also how to write models directly in `dust` if you need more control than `odin` provides. - -## History - -Originally, we designed odin around describing and solving systems of differential equations, with a small amount of support for working with discrete-time stochastic systems. Over time, and particularly during the COVID-19 response, we expanded the stochastic support and introduced the ability to fit these models using particle filters and other sequential Monte Carlo algorithms. You may have used `odin.dust` and `mcstate` through this period. As of 2024 we have rationalised our tools into a new set of packages which are currently called `odin2`, `dust2` and `monty2` - -## About this book - -This book exists to supplement the package documentation, and we will try to link extensively to it. It is designed as a fairly friendly walk-through of the tools, and to mirror courses that we run. diff --git a/odin.qmd b/odin.qmd index 5515960..570a5e7 100644 --- a/odin.qmd +++ b/odin.qmd @@ -2,18 +2,13 @@ ```{r} #| include: false -source("utils.R") -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) +source("common.R") ``` `odin` is a "[Domain Specific Language](https://en.wikipedia.org/wiki/Domain-specific_language)" (DSL), that is a mini-language that is specifically tailored to a domain. By targeting a narrow domain we can have a language that expressively captures a problem and gives an efficient solution, as opposed to a general purpose language where you can write code that expresses *anything*, but your solution might either be quite verbose or have poor performance. You might be familiar with other DSLs such as the `dplyr` or `ggplot2` syntax, or SQL if you have used databases. ```{r} library(odin2) -library(dust2) ``` ## A simple example {#sec-odin-sir} @@ -47,30 +42,34 @@ There are a few things to note here: Above you will see us struggle a bit with terminology, particularly between "system" and "model". There are going to be two places where "model" can be usefully used - a model could be the **generative model** (here an SIR model) or it could be the **statistical model**, more the focus on the second half of the book. In order to make things slightly less ambiguous we will often refer to the generative model as a "system", and this is reflected in the calls to functions in dust. ::: -The `odin()` function will compile (more strictly, [transpile](https://en.wikipedia.org/wiki/Source-to-source_compiler)) the odin code to C++ and then compile this into machine code and load it into your session. +The `odin()` function will compile (more strictly, [transpile](https://en.wikipedia.org/wiki/Source-to-source_compiler)) the odin code to C++ and then compile this into machine code and load it into your session. The resulting object is a `dust_sytem_generator` object: ```{r} -sir() +sir ``` -This `sir()` function creates a `dust_system_generator` object. As its name suggests, we can use this object to generate different versions of our system with different configurations. We pass this object to other functions from `dust2` to create, run and examine our simulations. +As its name suggests, we can use this object to generate different versions of our system with different configurations. We pass this object to other functions from `dust2` to create, run and examine our simulations. ## Running systems using `dust2` +```{r} +library(dust2) +``` + We create a system by using `dust_system_create` and passing in a list of parameters: ```{r} pars <- list() -sys <- dust2::dust_system_create(sir(), pars, n_particles = 1) +sys <- dust_system_create(sir, pars) sys ``` -To interact with a system, you will use functions from dust2, which we'll show below. +To interact with a system, you will use functions from `dust2`, which we'll show below. All systems start off with a state of all zeros: ```{r} -dust2::dust_system_state(sys) +dust_system_state(sys) ``` There are two ways of setting state: @@ -78,18 +77,18 @@ There are two ways of setting state: 1. Provide a new state vector and set it with `dust_system_set_state` 2. Use the initial condition from the model itself (the expressions with `initial()` on the left hand side) -Here, we'll use the latter +Here, we'll use the latter, and use the initial condition defined in the odin code: ```{r} -dust2::dust_system_set_state_initial(sys) -dust2::dust_system_state(sys) +dust_system_set_state_initial(sys) +dust_system_state(sys) ``` In general, the order of the state is arbitrary (though in practice it is fairly stable). To turn this state vector into something more interpretable you can use `dust_unpack_state`: ```{r} -s <- dust2::dust_system_state(sys) -dust2::dust_unpack_state(sys, s) +s <- dust_system_state(sys) +dust_unpack_state(sys, s) ``` This gives a named list of numbers, which means we can work with the output of the model in a more reasonable way. @@ -103,7 +102,7 @@ Here we'll do the latter as it will produce something we can look at! ```{r} t <- seq(0, 150, by = 0.25) -y <- dust2::dust_system_simulate(sys, t) +y <- dust_system_simulate(sys, t) dim(y) ``` @@ -117,39 +116,77 @@ This may take some getting used to at first, but practically some degree of mani We'll explore some other ways of plotting later, but here's the number of individuals in the infectious class over time, as the epidemic proceeds. ```{r} -plot(t, dust2::dust_unpack_state(sys, y)$I, type = "l", - xlab = "Time", ylab = "Total infectious") +plot(t, dust_unpack_state(sys, y)$I, type = "l", + xlab = "Time", ylab = "Infected population") ``` Once a system has been run to a time, you'll need to reset it to run it again. For example, this won't work ```{r} #| error: true -dust2::dust_system_simulate(sys, t) +dust_system_simulate(sys, t) ``` The error here is trying to tell us that the first time in our vector `t`, which is 0, is smaller than the current time in the system, which is `r max(t)`. We can query the system to get its current time, too: ```{r} -dust2::dust_system_time(sys) +dust_system_time(sys) ``` You can reset the time back to zero (or any other time) using `dust_system_set_time`: ```{r} -dust2::dust_system_set_time(sys, 0) +dust_system_set_time(sys, 0) ``` this does not reset the state, however: ```{r} -dust2::dust_system_state(sys) +dust_system_state(sys) ``` We can set new parameters, perhaps, and then update the state: ```{r} -dust2::dust_system_update_pars(sys, list(I0 = 5)) -dust2::dust_system_set_state_initial(sys) -dust2::dust_system_state(sys) +dust_system_update_pars(sys, list(I0 = 5)) +dust_system_set_state_initial(sys) +dust_system_state(sys) +``` + +## Going further + +These are the lowest-level functions you would typically use, and we expect that you would combine these together yourself to perform specific tasks. For example suppose you wanted to explore how `beta` affected the epidemic trajectory over this set of times, we might write: + +```{r} +run_with_beta <- function(beta, t) { + sys <- dust_system_create(sir, list(beta = beta)) + dust_system_set_state_initial(sys) + idx <- dust_unpack_index(sys)$I + drop(dust_system_simulate(sys, t, index_state = idx)) +} ``` + +Here, we've used `dust_unpack_index` to get the index of the `I` compartment and passed that in as the index to `dust_system_simulate`. + +We could then run this over a series of `beta` values: + +```{r} +beta <- seq(0.1, 0.3, by = 0.02) +y <- vapply(beta, run_with_beta, t, FUN.VALUE = numeric(length(t))) +matplot(t, y, type = "l", lty = 1, col = "steelblue4", + xlab = "Time", ylab = "Infected population") +``` + +We could modify this to take a single system object and reset it each time (which might be useful if our model was slow to initialise). Alternatively we could simulate the whole grid of beta values at once: + +```{r} +pars <- lapply(beta, function(b) list(beta = b)) +sys <- dust_system_create(sir, pars, n_groups = length(pars)) +dust_system_set_state_initial(sys) +idx <- dust_unpack_index(sys)$I +y <- dust_system_simulate(sys, t, index_state = idx) +matplot(t, t(y[1, , ]), type = "l", lty = 1, col = "steelblue4", + xlab = "Time", ylab = "Infected population") +``` + +Or perhaps you would want to combine these approaches and start the simulation from a range of stochastic starting points. Given the unbounded range of things people want to do with dynamical models, the hope is that you can use the functions in `dust2` in order to build workflows that match your needs, and that these functions should be well documented and efficient. diff --git a/packaging.qmd b/packaging.qmd index c2458be..d8ca507 100644 --- a/packaging.qmd +++ b/packaging.qmd @@ -2,11 +2,7 @@ ```{r} #| include: false -source("utils.R") -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) +source("common.R") ``` So far, we have compiled odin code as we have needed it, using the `odin()` function. This works well for many uses, but has its limitations: diff --git a/stochasticity.qmd b/stochasticity.qmd index 87ee27c..e10d4e4 100644 --- a/stochasticity.qmd +++ b/stochasticity.qmd @@ -1,5 +1,10 @@ # Stochasticity {#sec-stochasticity} +```{r} +#| include: false +source("common.R") +``` + [Everybody get random](https://www.youtube.com/watch?v=8MvMRdX2k5g) One of the key motivations for discrete time models is that they allow a natural mechanism for using stochasticity. By including stochasticity in our models we can better capture things like demographic effects where random fluctuations when population sizes are small have profound consequences for the longer term dynamics. In a deterministic system, things always happen the same way when you run them multiple times, but in a stochastic system -- especially nonlinear ones -- small changes in how the system arranges itself in its early steps can result in large changes in the dynamics later on, and this can help describe observed dynamics better. @@ -20,7 +25,7 @@ library(dust2) ## A simple stochastic model {#sec-stochastic-sir} -Here's a simple SIR model, based on the one in @sec-odin-sir, which is stochastic: +Here's a simple SIR model, based on the one in @sec-odin-sir, but which is stochastic: ```{r} sir <- odin({ @@ -46,12 +51,16 @@ sir <- odin({ This is a discrete-time model (using `update()` and not `deriv()`) as stochastic models must run in discrete time. We use `dt` to scale the rates, and adjusting `dt` will change the way that stochasticity affects the dynamics of the system. -The movement from S to I and from I to R are now drawn from a [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution) with probabilities that are computed from the rates (`beta` and `gamma`) multiplied by the change in time (`dt`). +```{r} +sir +``` + +The movement from S to I and from I to R are now drawn from a [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution) with probabilities that are computed from the rates (`beta` and `gamma`) multiplied by the change in time (`dt`). Some care is required to make sure that we subtract the same number of individuals from `S` as we add to `I` so we save this number as `n_SI`. We can run this model just like before: ```{r} -sys <- dust_system_create(sir(), list()) +sys <- dust_system_create(sir, list()) dust_system_set_state_initial(sys) t <- seq(0, 100) y <- dust_system_simulate(sys, t) @@ -69,7 +78,7 @@ As in @sec-odin-sir, we see an epidemic start, increase and then decay. Unlike You can pass an argument `n_particles` when creating a system (the default is 1) to simulate multiple independent realisations at once: ```{r} -sys <- dust_system_create(sir(), list(), n_particles = 50) +sys <- dust_system_create(sir, list(), n_particles = 50) dust_system_set_state_initial(sys) y <- dust_system_simulate(sys, t) y <- dust_unpack_state(sys, y) @@ -79,10 +88,14 @@ matplot(t, t(y$I), type = "l", lty = 1, col = "#00000044", Here, we simulate 50 particles at once, all from the same point (10 individuals infected at time 0) and we see the similarities and differences in the trajectories: all follow *approximately* the same shape, and all rise and fall at *approximately* the same rate, but there is quite a bit of difference in the individual trajectories and the timings of when the epidemic peaks. +::: {.callout-note} +The `t()` in plotting here moves time from the last dimension to the first, as is expected by `matplot`. We may write functions to help prepare output for plotting. +::: + The stochastic effects will be stronger around the critical parameter values where the behaviour of the model diverges, or with smaller initial population sizes: ```{r} -sys <- dust_system_create(sir(), list(I0 = 5, gamma = 0.15), n_particles = 50) +sys <- dust_system_create(sir, list(I0 = 5, gamma = 0.15), n_particles = 50) dust_system_set_state_initial(sys) y <- dust_system_simulate(sys, t) y <- dust_unpack_state(sys, y) diff --git a/time.qmd b/time.qmd index 81f90a9..1ab27de 100644 --- a/time.qmd +++ b/time.qmd @@ -1,6 +1,9 @@ # On the nature of time {#sec-time} -What is time, really? +```{r} +#| include: false +source("common.R") +``` There are two main sorts of treatment of time within odin models: @@ -34,17 +37,20 @@ logistic_ode <- odin({ x0 <- parameter(0.01) r <- parameter(0.1) }, debug = TRUE) +logistic_ode ``` +As in @sec-odin-sir, we can run this through time from its initial conditions: + ```{r} -sys <- dust_system_create(logistic_ode(), list(), n_particles = 1) +sys <- dust_system_create(logistic_ode, list()) dust_system_set_state_initial(sys) t <- seq(0, 100, by = 1) y <- dust_system_simulate(sys, t) plot(t, y[1, ], type = "l", xlab = "Time", ylab = "Population") ``` -Now, we write out the same system in discrete time: +Now, we write out a closely related system which uses discrete time; the "[logistic map](https://en.wikipedia.org/wiki/Logistic_map)" ```{r} logistic_map <- odin({ @@ -55,8 +61,10 @@ logistic_map <- odin({ }, debug = TRUE) ``` +and run it: + ```{r} -sys2 <- dust_system_create(logistic_map(), list(), n_particles = 1) +sys2 <- dust_system_create(logistic_map, list()) dust_system_set_state_initial(sys2) y2 <- dust_system_simulate(sys2, t) plot(t, y[1, ], type = "l", xlab = "Time", ylab = "Population") @@ -84,6 +92,8 @@ lines(t, run(1.8, sys2, t), col = "red") lines(t, run(2, sys2, t), col = "red") ``` +The equilibrium level of the discrete time system is not 0 or 1 (as in the continuous time version) but `(r - 1) / r`. + For larger values of `r` we start getting oscillations around the maximum, either decaying (blue lines) or stable (green line): ```{r} diff --git a/utils.R b/utils.R deleted file mode 100644 index 16e188d..0000000 --- a/utils.R +++ /dev/null @@ -1,6 +0,0 @@ -lang_output <- function(x, lang) { - cat(c(sprintf("```%s", lang), x, "```"), sep = "\n") -} -cpp_output <- function(x) lang_output(x, "cpp") -r_output <- function(x) lang_output(x, "r") -plain_output <- function(x) lang_output(x, "plain")