Merge pull request #17 from ghurault/dev

Release v0.3.0
ghurault · Mar 30, 2022 · 4a500e1 · 4a500e1
2 parents f0c9723 + 66349e1
commit 4a500e1
Show file tree

Hide file tree

Showing 63 changed files with 1,696 additions and 1,240 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: EczemaPred
 Title: Predicting the Evolution of Eczema Severity
-Version: 0.2.0
+Version: 0.3.0
 Authors@R: 
     person(given = "Guillem",
            family = "Hurault",
@@ -9,6 +9,7 @@ Authors@R:
            comment = c(ORCID = "0000-0002-1052-3564"))
 Description: A collection of models to serve as building blocks for predicting eczema severity.
 License: GPL (>= 3)
+Language: en-GB
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
@@ -27,7 +28,7 @@ Imports:
     tidyr,
     ggplot2,
     markovchain,
-    HuraultMisc (>= 1.0.0),
+    HuraultMisc (>= 1.1.1),
     stats,
     scoringRules
 LinkingTo: 
@@ -38,7 +39,8 @@ LinkingTo:
     rstan (>= 2.18.1),
     StanHeaders (>= 2.18.0)
 SystemRequirements: GNU make
-Suggests: 
+Suggests:
+    spelling, 
     testthat (>= 3.0.0),
     knitr,
     rmarkdown,
@@ -48,5 +50,3 @@ Suggests:
     covr
 Config/testthat/edition: 3
 VignetteBuilder: knitr
-Remotes: 
-    github::ghurault/HuraultMisc
diff --git a/DEVNOTES.md b/DEVNOTES.md
@@ -28,3 +28,7 @@ To regenerate the website locally, use `pkgdown::build_site()`.
   and you can call them with e.g., 'rstan::sampling(stanmodels$foo, ...)'
 
 See [Guidelines for developers of R packages interfacing with Stan](https://mc-stan.org/rstantools/articles/developer-guidelines.html) for more information.
+
+## Git
+
+I try to follow the Git flow for managing this repository.
diff --git a/NAMESPACE b/NAMESPACE
@@ -22,6 +22,8 @@ S3method(list_parameters,character)
 S3method(prepare_standata,AR1)
 S3method(prepare_standata,EczemaModel)
 S3method(prepare_standata,MC)
+S3method(prepare_standata,MixedAR1)
+S3method(prepare_standata,OrderedRW)
 S3method(prepare_standata,RW)
 S3method(prepare_standata,Smoothing)
 S3method(print,EczemaModel)
@@ -48,6 +50,8 @@ S3method(validate_prior,Smoothing)
 export("%>%")
 export(EczemaFit)
 export(EczemaModel)
+export(add_broken_pointline)
+export(add_fanchart)
 export(add_historical_pred)
 export(add_metrics1_c)
 export(add_metrics1_d)

diff --git a/NEWS.md b/NEWS.md
@@ -0,0 +1,23 @@
+# EczemaPred 0.3.0
+
+- Add plotting helpers `add_fanchart()` and `add_broken_pointline()`
+- Allow discrete input to `MixedAR1`
+- Refactor tests
+- Minor bug fixes and refactoring
+
+# EczemaPred 0.2.0
+
+- Reparametrise `OrderedRW` model
+
+# EczemaPred 0.1.1
+
+Post publication release
+
+- Update README, workflows
+- Add citation file
+- Combine `RW`, `Smoothing` and `AR1` into a single Stan model, thus reducing compilation time at the installation and allowing discrete input to `Smoothing` and `AR1`
+- Minor bug fixes
+
+# EczemaPred 0.1.0
+
+- Initial release
diff --git a/R/EczemaPred-package.R b/R/EczemaPred-package.R
@@ -11,6 +11,6 @@
 #' @importFrom rstan sampling
 #'
 #' @references
-#' Stan Development Team (2020). RStan: the R interface to Stan. R package version 2.21.2. https://mc-stan.org
+#' [Stan Development Team (2020). RStan: the R interface to Stan.](https://mc-stan.org) R package version 2.21.2.
 #'
 NULL
diff --git a/R/metrics.R b/R/metrics.R
@@ -49,7 +49,7 @@ extract_RPS <- function(fit) {
 #' for example to add a uniform distribution to the vector of samples to avoid problems at the tail of the distribution.
 #' If `NULL`, the empirical pmf is not changed.
 #' Default to the uniform distribution (i.e. `support`) for [add_metrics2_d()] and `NULL` for [add_metrics2_c()].
-#' @param bw Bandwith, for calculating lpd, see [scoringRules::logs_sample()].
+#' @param bw Bandwidth, for calculating lpd, see [scoringRules::logs_sample()].
 #' Useful to set the "resolution" of the distribution.
 #'
 #' @return Dataframe `df` appended by the columns "lpd", "RPS" (or "CRPS" for [add_metrics1_c()] and [add_metrics2_d()]).

diff --git a/R/missing.R b/R/missing.R
@@ -17,7 +17,7 @@
 generate_MC2_sequence <- function(N, p01 = .5, p10 = .5, t0 = NULL) {
 
   stopifnot(is.numeric(c(N, p01, p10)),
-            all(sapply(list(N, p01, p10), function(x) {length(x) == 1})),
+            all(vapply(list(N, p01, p10), function(x) {length(x) == 1}, logical(1))),
             N == round(N),
             p01 >= 0 & p01 <= 1,
             p10 >= 0 & p10 <= 1)
@@ -44,13 +44,13 @@ generate_MC2_sequence <- function(N, p01 = .5, p10 = .5, t0 = NULL) {
 #' Generate missing values in a times-series
 #'
 #' First and last values are nor missing.
-#' Missing indices can be generated at random (Binomial distribution) or using a Markov Chain (if consecutives missing values are deemed more likely).
+#' Missing indices can be generated at random (Binomial distribution) or using a Markov Chain (if consecutive missing values are deemed more likely).
 #' The markov chain is parametrised in terms of the steady state probability of a value being missing and the probability that the next value is observed when the current value is also observed.
 #'
 #' @param N Length of the time-series
 #' @param type Method to generate the missing values. One of "random" or "markovchain"
-#' @param p_mis Probability of a given value to be missing (steady state probability for type == "markovchain")
-#' @param p_obs_obs Probability of the next value being observed when the current is observed (for type == "markovchain")
+#' @param p_mis Probability of a given value to be missing (steady state probability for `type = "markovchain"`)
+#' @param p_obs_obs Probability of the next value being observed when the current is observed (for `type = "markovchain"`)
 #'
 #' @return Logical vector of length N
 #' @export

diff --git a/R/model_class.R b/R/model_class.R
@@ -15,7 +15,7 @@
 #' - `model_name`: Name of the model
 #' - `stanmodel`: Name of the Stan model.
 #' Used internally to locate the compiled code.
-#' It can also be used to store the filepath of the Stan code.
+#' It can also be used to store the Stan code filepath.
 #' - `discrete`: Whether the model is discrete or not.
 #' - `max_score`: Maximum value that the score can take (when applicable)
 #' - `K`: Number of categories (when applicable)
@@ -41,8 +41,6 @@ EczemaModel <- function(model_name = c("BinRW", "OrderedRW", "BinMC", "RW", "Smo
 
   if (model_name %in% c("BinRW", "OrderedRW", "BinMC", "MC")) {
     discrete <- TRUE
-  } else if (model_name %in% c("MixedAR1")) {
-    discrete <- FALSE
   } else {
     stopifnot(is_scalar(discrete),
               is.logical(discrete))
@@ -53,10 +51,16 @@ EczemaModel <- function(model_name = c("BinRW", "OrderedRW", "BinMC", "RW", "Smo
     if (is.null(max_score)) {
       stop("max_score must be supplied for ", model_name)
     } else {
-      # NB: max_score must be a wholenumber even if discrete=FALSE
-      stopifnot(is_scalar_wholenumber(max_score),
-                max_score > 0,
-                max_score > 1 || model_name != "OrderedRW")
+      stopifnot(is_scalar(max_score),
+                is.numeric(max_score),
+                max_score > 0)
+      if (model_spec$discrete || model_name == "MixedAR1") {
+        # max_score as a real not implemented for MixedAR1
+        stopifnot(is_wholenumber(max_score))
+      }
+      if (model_name == "OrderedRW") {
+        stopifnot(max_score > 1)
+      }
       model_spec$max_score <- max_score
     }
   }
@@ -188,11 +192,11 @@ print_prior <- function(model, ...) {
 #' @param ... Arguments to pass to other methods
 #'
 #' @return Named list of parameters names, grouped into broad categories:
-#' - Population: population parameters (i.e. patient- and time-independent)
-#' - Patient: patient-dependent parameters
-#' - PatientTime: patient- and time-dependent parameters (e.g. latent scores)
-#' - Test: parameters related to the test set
-#' - Misc: other parameters
+#' - `Population`: population parameters (i.e. patient- and time-independent)
+#' - `Patient`: patient-dependent parameters
+#' - `PatientTime`: patient- and time-dependent parameters (e.g. latent scores)
+#' - `Test`: parameters related to the test set
+#' - `Misc`: other parameters
 #'
 #' @details
 #' See [MC], [BinRW], [BinMC], [OrderedRW], [RW], [Smoothing], [AR1] and [MixedAR1] for details about the model-specific parameters.

diff --git a/R/model_doc.R b/R/model_doc.R
@@ -155,7 +155,7 @@ NULL
 #' ## Observation-dependent (patient- and time-dependent) parameters:
 #'
 #' - `p01`: Probability of transitioning from state 0 to state 1
-#' - `lambda`: Mobility of the Markov Chain (eigen value of the transition matrix)
+#' - `lambda`: Mobility of the Markov Chain (eigenvalue of the transition matrix)
 #' - `ss1`: Steady state probability of state 1
 #' - `y_lat`: Latent score (probability)
 #'
@@ -332,13 +332,16 @@ NULL
 
 #' Mixed effect autoregressive model (order 1)
 #'
-#' @param max_score Maximum value that the score can take
+#' @param max_score Maximum value that the score can take.
+#' Note that even if `discrete=FALSE`, `max_score` must be an integer.
+#' @param discrete Whether to use a discrete normal distribution.
+#' This will be used to check whether the data is discrete or not, and for rounding predictions (cf. testing).
 #' @param prior Named list of the model's priors. If `NULL`, uses the default prior for the model (see [default_prior()]).
 #'
 #' @details
 #' - Details of the model are available in the [paper](#).
 #' The model takes as input a continuous score defined between 0 and `max_score`.
-#' - The model is naive as the likelihood distribution is not truncated.
+#' - The model is naive as the likelihood is non-truncated and not discretised (when `discrete = TRUE`).
 #' - Unlike the `AR1` model, the discretisation of predictions is not implemented
 #' - For more details see the [vignette](https://ghurault.github.io/EczemaPred/articles/ContinuousModels.html).
 #'

diff --git a/R/model_prior.R b/R/model_prior.R
@@ -52,9 +52,9 @@ validate_prior.BinRW <- function(model, ...) {
   stopifnot(
     is.list(prior),
     all(c("sigma", "mu_logit_y0", "sigma_logit_y0") %in% names(prior)),
-    all(sapply(prior, is.numeric)),
-    all(sapply(prior, function(x) {length(x) == 2})),
-    all(sapply(prior, function(x) {x[2] > 0}))
+    all(vapply(prior, is.numeric, logical(1))),
+    all(vapply(prior, function(x) {length(x) == 2}, logical(1))),
+    all(vapply(prior, function(x) {x[2] > 0}, logical(1)))
   )
 }
 
@@ -118,9 +118,9 @@ validate_prior.BinMC <- function(model, ...) {
   stopifnot(
     is.list(prior),
     all(c("sigma", "mu_logit_p10", "sigma_logit_p10", "logit_tss1_0") %in% names(prior)),
-    all(sapply(prior, is.numeric)),
-    all(sapply(prior, function(x) {length(x) == 2})),
-    all(sapply(prior, function(x) {x[2] > 0}))
+    all(vapply(prior, is.numeric, logical(1))),
+    all(vapply(prior, function(x) {length(x) == 2}, logical(1))),
+    all(vapply(prior, function(x) {x[2] > 0}, logical(1)))
   )
 }
 
@@ -174,8 +174,8 @@ validate_prior.Smoothing <- function(model, ...) {
   stopifnot(
     is.list(prior),
     all(c("sigma", "tau") %in% names(prior)),
-    all(sapply(prior, is.numeric)),
-    all(sapply(prior, function(x) {length(x) == 2})),
+    all(vapply(prior, is.numeric, logical(1))),
+    all(vapply(prior, function(x) {length(x) == 2}, logical(1))),
     prior$sigma[2] > 0,
     prior$tau[2] > 0
   )
@@ -203,8 +203,8 @@ validate_prior.AR1 <- function(model, ...) {
   stopifnot(
     is.list(prior),
     all(c("sigma", "slope", "y_inf") %in% names(prior)),
-    all(sapply(prior, is.numeric)),
-    all(sapply(prior, function(x) {length(x) == 2})),
+    all(vapply(prior, is.numeric, logical(1))),
+    all(vapply(prior, function(x) {length(x) == 2}, logical(1))),
     prior$sigma[2] > 0,
     prior$y_inf[2] > 0,
     all(prior$slope > 0)
@@ -236,9 +236,9 @@ validate_prior.MixedAR1 <- function(model, ...) {
     is.list(prior),
     all(c("sigma", "mu_logit_slope", "sigma_logit_slope",
           "mu_inf", "sigma_inf") %in% names(prior)),
-    all(sapply(prior, is.numeric)),
-    all(sapply(prior, function(x) {length(x) == 2})),
-    all(sapply(prior, function(x) {x[2] > 0}))
+    all(vapply(prior, is.numeric, logical(1))),
+    all(vapply(prior, function(x) {length(x) == 2}, logical(1))),
+    all(vapply(prior, function(x) {x[2] > 0}, logical(1)))
   )
 }
 

diff --git a/R/parameters.R b/R/parameters.R
@@ -2,7 +2,8 @@
 
 #' @export
 list_parameters.character <- function(model, ...) {
-  structure(list(name = model),
+  structure(list(name = model,
+                 discrete = FALSE),
             class = c(model)) %>%
     list_parameters(...)
 }
@@ -64,35 +65,51 @@ list_parameters.BinMC <- function(model, main = TRUE, ...) {
 
 #' @export
 list_parameters.RW <- function(model, ...) {
-  list(Population = "sigma",
-       PatientTime = "y_rep",
-       Test = c("y_pred", "lpd", "cum_err"),
-       Misc = "y_mis")
+  out <- list(Population = "sigma",
+              PatientTime = "y_rep",
+              Test = c("y_pred", "lpd"),
+              Misc = "y_mis")
+  if (model$discrete) {
+    out$Test <- c(out$Test, "cum_err")
+  }
+  return(out)
 }
 
 #' @export
 list_parameters.Smoothing <- function(model, ...) {
-  list(Population = c("sigma", "tau", "alpha"),
-       PatientTime = c("L", "y_rep"),
-       Test = c("y_pred", "lpd"),
-       Misc = "y_mis")
+  out <- list(Population = c("sigma", "tau", "alpha"),
+              PatientTime = c("L", "y_rep"),
+              Test = c("y_pred", "lpd"),
+              Misc = "y_mis")
+  if (model$discrete) {
+    out$Test <- c(out$Test, "cum_err")
+  }
+  return(out)
 }
 
 #' @export
 list_parameters.AR1 <- function(model, ...) {
-  list(Population = c("sigma", "slope", "intercept", "y_inf"),
-       PatientTime = "y_rep",
-       Test = c("y_pred", "lpd"),
-       Misc = "y_mis")
+  out <- list(Population = c("sigma", "slope", "intercept", "y_inf"),
+              PatientTime = "y_rep",
+              Test = c("y_pred", "lpd"),
+              Misc = "y_mis")
+  if (model$discrete) {
+    out$Test <- c(out$Test, "cum_err")
+  }
+  return(out)
 }
 
 #' @export
 list_parameters.MixedAR1 <- function(model, ...) {
-  list(Population = c("sigma", "mu_logit_slope", "sigma_logit_slope", "mu_inf", "sigma_inf"),
-       Patient = c("slope", "y_inf", "intercept"),
-       PatientTime = c("y_rep"),
-       Test = c("y_pred", "lpd"),
-       Misc = "y_mis")
+  out <- list(Population = c("sigma", "mu_logit_slope", "sigma_logit_slope", "mu_inf", "sigma_inf"),
+              Patient = c("slope", "y_inf", "intercept"),
+              PatientTime = c("y_rep"),
+              Test = c("y_pred", "lpd"),
+              Misc = "y_mis")
+  if (model$discrete) {
+    out$Test <- c(out$Test, "cum_err")
+  }
+  return(out)
 }
 
 #' @export

diff --git a/R/plot_fit.R b/R/plot_fit.R
@@ -5,6 +5,7 @@
 #' @param fit Stanfit object
 #' @param id Dataframe linking index in fit to (Patient, Time) pairs, cf. output from [get_index()]
 #' @param patient_id Patient ID
+#' @param ... Arguments to pass to [add_fanchart()]
 #'
 #' @return Ggplot
 #' - Horizontal lines correspond to the expected cut-offs
@@ -13,7 +14,7 @@
 #' @import ggplot2 dplyr
 #'
 #' @export
-plot_latent_OrderedRW <- function(fit, id, patient_id) {
+plot_latent_OrderedRW <- function(fit, id, patient_id, ...) {
 
   stopifnot(is_stanfit(fit),
             all(c("y_lat", "ct", "sigma_meas") %in% fit@model_pars),
@@ -24,8 +25,9 @@ plot_latent_OrderedRW <- function(fit, id, patient_id) {
 
   # Extract mean latent score and cutpoints
   id1 <- filter(id, .data$Patient == patient_id)
-  df <- mutate(id1, Mean = rstan::extract(fit, pars = paste0("y_lat[", id1[["Index"]], "]")) %>%
-                 sapply(mean))
+  df <- id1 %>%
+    mutate(Mean = rstan::extract(fit, pars = paste0("y_lat[", id1[["Index"]], "]")) %>%
+             vapply(mean, numeric(1)))
   ct <- rstan::extract(fit, pars = "ct")[[1]] %>%
     apply(2, mean)
   sigma_meas <- rstan::extract(fit, pars = "sigma_meas")[[1]] %>%
@@ -49,7 +51,8 @@ plot_latent_OrderedRW <- function(fit, id, patient_id) {
                 }) %>%
     bind_rows()
 
-  p <- plot_fanchart(ssi) +
+  p <- ggplot() +
+    add_fanchart(df = ssi, ...) +
     geom_hline(yintercept = ct) +
     geom_label(data = data.frame(Label = paste0("y = ", 0:max_score), x = 1, y = midpoint),
                aes_string(x = "x", y = "y", label = "Label"), hjust = 0) +