fix: use ID as character and use merge to combine data.frames/matrice…

…s. (#126)
mcanouil · Nov 7, 2024 · 493b87c · 493b87c
1 parent eceaaa8
commit 493b87c
Show file tree

Hide file tree

Showing 15 changed files with 19,920 additions and 711 deletions.
diff --git a/.github/workflows/check-pak.yml b/.github/workflows/check-pak.yml
@@ -25,8 +25,6 @@ jobs:
       fail-fast: false
       matrix:
         config:
-          - {os: ubuntu-20.04,    r: '4.3.2'}
-          - {os: ubuntu-22.04,    r: '4.3.2'}
           - {os: ubuntu-latest,   r: '4.3.2'}
 
     env:

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: eggla
 Title: Early Growth Genetics Longitudinal Analysis
-Version: 1.0.1
+Version: 1.0.2
 Authors@R:
     c(person(given = "Mickaël",
            family = "Canouil",

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,21 @@
+# eggla 1.0.2
+
+## Fixes
+
+- In `R/*_aucs.R`, `R/*_slopes.R`,
+  - fix: use ID as character and use merge to combine data.frames/matrices.
+
+## Chores
+
+- In `R`, `tests/testthat`,
+  - chore: replace `1:` with `seq_len` and `$` with `[[`.
+
+## Tests
+
+- In `tests/testthat`,
+  - test: cleanup duplicated and outdated tests.
+  - test: add snapshot tests.
+
 # eggla 1.0.1
 
 ## Build

diff --git a/R/compute_aucs.R b/R/compute_aucs.R
@@ -45,24 +45,32 @@ compute_aucs <- function(
 ) {
   stopifnot(inherits(fit, "lme"))
   id_var <- names(fit[["groups"]])
-  pred_auc <- matrix(data = NA_real_, nrow = length(unique(fit$data[[id_var]])), ncol = length(period) / 2)
-  colnames(pred_auc) <- paste0(
-    "auc_",
-    sapply(split(
-      x = period,
-      f = rep(
-        x = seq(1, length(period), length(period) %/% 4),
-        each = length(period) %/% 4
+
+  pred_auc <- matrix(
+    data = NA_real_,
+    nrow = length(unique(fit[["data"]][[id_var]])),
+    ncol = length(period) / 2,
+    dimnames = list(
+      as.character(unique(fit[["data"]][[id_var]])),
+      paste0(
+        "auc_",
+        sapply(split(
+          x = period,
+          f = rep(
+            x = seq(1, length(period), length(period) %/% 4),
+            each = length(period) %/% 4
+          )
+        ), paste, collapse = "--")
       )
-    ), paste, collapse = "--")
+    )
   )
 
   fxef <- nlme::fixef(fit)
   fxef <- unname(fxef[grep("\\(Intercept\\)|gsp\\(.*\\)|poly\\(.*\\)", names(fxef))])
   rnef <- nlme::ranef(fit)
   rnef <- rnef[, grep("\\(Intercept\\)|gsp\\(.*\\)|poly\\(.*\\)", names(rnef))]
 
-  out <- switch(
+  switch(
     EXPR = as.character(method),
     "cubic_slope" = {
       y <- function(x, coeff, knots) {
@@ -71,69 +79,67 @@ compute_aucs <- function(
           FUN = function(x) sum(coeff * mapply("^", rep(x, length(coeff)), seq_along(coeff) - 1))
         )
       }
-      for (i in seq_along(unique(fit$data[[id_var]]))) {
+      for (i in as.character(unique(fit[["data"]][[id_var]]))) {
         coeff <- fxef + as.numeric(rnef[i, ]) # this implies fixed = random
-        for (j in 1:(length(period) / 2)) {
+        for (j in seq_len(length(period) / 2)) {
           pred_auc[i, j] <- stats::integrate(
             f = y,
             coeff = coeff,
             lower = period[j * 2 - 1],
             upper = period[j * 2]
-          )$value
+          )[["value"]]
         }
       }
-      cbind.data.frame(ID = unique(fit$data[[id_var]]), pred_auc)
     },
     "linear_splines" = {
       y <- function(x, coeff, knots) {
         sapply(
           X = x,
           FUN = function(x) {
             x_pos <- findInterval(x, knots, left.open = TRUE)
-            sum(c(coeff * c(1, x - c(0, knots)))[1:(x_pos + 2)])
+            sum(c(coeff * c(1, x - c(0, knots)))[seq_len(x_pos + 2)])
           }
         )
       }
-      for (i in seq_along(unique(fit$data[[id_var]]))) {
+      for (i in as.character(unique(fit[["data"]][[id_var]]))) {
         coeff <- fxef + as.numeric(rnef[i, ]) # this implies fixed = random
-        for (j in 1:(length(period) / 2)) {
+        for (j in seq_len(length(period) / 2)) {
            pred_auc[i, j] <- stats::integrate(
              f = y,
              coeff = coeff,
              knots = knots,
              lower = period[j * 2 - 1],
              upper = period[j * 2]
-            )$value
+            )[["value"]]
         }
       }
-      cbind.data.frame(ID = unique(fit$data[[id_var]]), pred_auc)
     },
     "cubic_splines" = {
       y <- function(x, coeff, knots) {
         sapply(
           X = x,
           FUN = function(x) {
             y_tmp <- coeff * c(x^0, x^1, x^2, x^3, (x - knots)^3) / c(1, 1, 2, rep(6, 4))
-            sum(y_tmp[1:(4 + findInterval(x, knots, left.open = TRUE))])
+            sum(y_tmp[seq_len(4 + findInterval(x, knots, left.open = TRUE))])
           }
         )
       }
-      for (i in seq_along(unique(fit$data[[id_var]]))) {
+      for (i in as.character(unique(fit[["data"]][[id_var]]))) {
         coeff <- fxef + as.numeric(rnef[i, ]) # this implies fixed = random
-        for (j in 1:(length(period) / 2)) {
-           pred_auc[i, j] <- stats::integrate(
-             f = y,
-             coeff = coeff,
-             knots = knots,
-             lower = period[j * 2 - 1],
-             upper = period[j * 2]
-            )$value
+        for (j in seq_len(length(period) / 2)) {
+          pred_auc[i, j] <- stats::integrate(
+            f = y,
+            coeff = coeff,
+            knots = knots,
+            lower = period[j * 2 - 1],
+            upper = period[j * 2]
+          )[["value"]]
         }
       }
-      cbind.data.frame(ID = unique(fit$data[[id_var]]), pred_auc)
     }
   )
 
-  names(out)[1] <- id_var
+  out <- cbind.data.frame(Row.names = rownames(pred_auc), pred_auc)
+  names(out)[grepl("Row.names", names(out), fixed = TRUE)] <- id_var
   out
 }
diff --git a/R/compute_slopes.R b/R/compute_slopes.R
@@ -44,31 +44,46 @@ compute_slopes <- function(
 ) {
   stopifnot(inherits(fit, "lme"))
   id_var <- names(fit[["groups"]])
-  slopes <- matrix(data = NA_real_, nrow = length(unique(fit$data[[id_var]])), ncol = length(period) / 2)
-  colnames(slopes) <- paste0(
-    "slope_",
-    sapply(split(
-      x = period,
-      f = rep(
-        x = seq(1, length(period), length(period) %/% 4),
-        each = length(period) %/% 4
+
+  slopes <- matrix(
+    data = NA_real_,
+    nrow = length(unique(fit[["data"]][[id_var]])),
+    ncol = length(period) / 2,
+    dimnames = list(
+      as.character(unique(fit[["data"]][[id_var]])),
+      paste0(
+        "slope_",
+        sapply(split(
+          x = period,
+          f = rep(
+            x = seq(1, length(period), length(period) %/% 4),
+            each = length(period) %/% 4
+          )
+        ), paste, collapse = "--")
       )
-    ), paste, collapse = "--")
+    )
+  )
+  pred <- matrix(
+    data = NA_real_,
+    nrow = length(unique(fit[["data"]][[id_var]])),
+    ncol = length(period),
+    dimnames = list(
+      as.character(unique(fit[["data"]][[id_var]])),
+      paste0("pred_period_", round(period, digits = 1))
+    )
   )
-  pred <- matrix(data = NA_real_, nrow = length(unique(fit$data[[id_var]])), ncol = length(period))
-  colnames(pred) <- paste0("pred_period_", round(period, digits = 1))
 
   fxef <- nlme::fixef(fit)
   fxef <- unname(fxef[grep("\\(Intercept\\)|gsp\\(.*\\)|poly\\(.*\\)", names(fxef))])
   rnef <- nlme::ranef(fit)
   rnef <- rnef[, grep("\\(Intercept\\)|gsp\\(.*\\)|poly\\(.*\\)", names(rnef))]
 
-  out <- switch(
+  switch(
     EXPR = as.character(method),
     "cubic_slope" = {
-      for (i in seq_along(unique(fit$data[[id_var]]))) {
+      for (i in as.character(unique(fit[["data"]][[id_var]]))) {
         coeff <- fxef + as.numeric(rnef[i, ]) # this implies fixed = random
-        for (j in 1:(length(period) / 2)) {
+        for (j in seq_len(length(period) / 2)) {
           x1 <- period[j * 2 - 1]
           y1 <- sum(coeff * mapply("^", rep(x1, length(coeff)), seq_along(coeff) - 1))
 
@@ -80,48 +95,46 @@ compute_slopes <- function(
           slopes[i, j] <- (y2 - y1) / (x2 - x1)
         }
       }
-      cbind.data.frame(ID = unique(fit$data[[id_var]]), pred, slopes)
     },
     "linear_splines" = {
-      for (i in seq_along(unique(fit$data[[id_var]]))) {
+      for (i in as.character(unique(fit[["data"]][[id_var]]))) {
         coeff <- fxef + as.numeric(rnef[i, ]) # this implies fixed = random
-        for (j in 1:(length(period) / 2)) {
+        for (j in seq_len(length(period) / 2)) {
           x1 <- period[j * 2 - 1]
           x1_pos <- findInterval(x1, knots, left.open = TRUE)
-          y1 <- sum(c(coeff * c(1, x1 - c(0, knots)))[1:(x1_pos + 2)])
+          y1 <- sum(c(coeff * c(1, x1 - c(0, knots)))[seq_len(x1_pos + 2)])
 
           x2 <- period[j * 2]
           x2_pos <- findInterval(x2, knots, left.open = TRUE)
-          y2 <- sum(c(coeff * c(1, x2 - c(0, knots)))[1:(x2_pos + 2)])
+          y2 <- sum(c(coeff * c(1, x2 - c(0, knots)))[seq_len(x2_pos + 2)])
 
           pred[i, j * 2 - 1] <- y1
           pred[i, j * 2] <- y2
           slopes[i, j] <- (y2 - y1) / (x2 - x1)
         }
       }
-      cbind.data.frame(ID = unique(fit$data[[id_var]]), pred, slopes)
     },
     "cubic_splines" = {
-      for (i in seq_along(unique(fit$data[[id_var]]))) {
+      for (i in as.character(unique(fit[["data"]][[id_var]]))) {
         coeff <- fxef + as.numeric(rnef[i, ]) # this implies fixed = random
-        for (j in 1:(length(period) / 2)) {
+        for (j in seq_len(length(period) / 2)) {
           x1 <- period[j * 2 - 1]
           y1_tmp <- coeff * c(x1^0, x1^1, x1^2, x1^3, (x1 - knots)^3) / c(1, 1, 2, rep(6, 4))
-          y1 <- sum(y1_tmp[1:(4 + findInterval(x1, knots, left.open = TRUE))])
+          y1 <- sum(y1_tmp[seq_len(4 + findInterval(x1, knots, left.open = TRUE))])
 
           x2 <- period[j * 2]
           y2_tmp <- coeff * c(x2^0, x2^1, x2^2, x2^3, (x2 - knots)^3) / c(1, 1, 2, rep(6, 4))
-          y2 <- sum(y2_tmp[1:(4 + findInterval(x2, knots, left.open = TRUE))])
+          y2 <- sum(y2_tmp[seq_len(4 + findInterval(x2, knots, left.open = TRUE))])
 
           pred[i, j * 2 - 1] <- y1
           pred[i, j * 2] <- y2
           slopes[i, j] <- (y2 - y1) / (x2 - x1)
         }
       }
-      cbind.data.frame(ID = unique(fit$data[[id_var]]), pred, slopes)
     }
   )
 
-  names(out)[1] <- id_var
+  out <- merge(x = pred,y = slopes, by = "row.names")
+  names(out)[grepl("Row.names", names(out), fixed = TRUE)] <- id_var
   out
 }
diff --git a/R/egg_aucs.R b/R/egg_aucs.R
@@ -1,7 +1,7 @@
 #' Derived areas under the curve from a cubic splines mixed-effects model by `egg_model()`.
 #'
 #' Derived areas under the curve (AUCs) for differentintervals based
-#' on a fitted cubic splines mixed-effects model from `egg_model()`. 
+#' on a fitted cubic splines mixed-effects model from `egg_model()`.
 #' This function is a specific version of `compute_aucs`
 #' designed to work specifically on `egg_model()`.
 #'
@@ -39,24 +39,25 @@ egg_aucs <- function(
 
   pred_auc <- matrix(
     data = NA_real_,
-    nrow = length(unique(fit$data[[id_var]])),
-    ncol = length(period) / 2
-  )
-  colnames(pred_auc) <- paste0(
-    "auc_",
-    sapply(split(
-      x = period,
-      f = rep(
-        x = seq(1, length(period), length(period) %/% 4),
-        each = length(period) %/% 4
+    nrow = length(unique(fit[["data"]][[id_var]])),
+    ncol = length(period) / 2,
+    dimnames = list(
+      as.character(unique(fit[["data"]][[id_var]])),
+      paste0(
+        "auc_",
+        sapply(split(
+          x = period,
+          f = rep(
+            x = seq(1, length(period), length(period) %/% 4),
+            each = length(period) %/% 4
+          )
+        ), paste, collapse = "--")
       )
-    ), paste, collapse = "--")
+    )
   )
 
   fxef <- nlme::fixef(fit)
-  fxef <- unname(fxef[
-    grep("\\(Intercept\\)|gsp\\(.*\\)|poly\\(.*\\)", names(fxef))
-  ])
+  fxef <- unname(fxef[grep("\\(Intercept\\)|gsp\\(.*\\)|poly\\(.*\\)", names(fxef))])
   rnef <- nlme::ranef(fit)
   rnef <- rnef[, grep("\\(Intercept\\)|gsp\\(.*\\)|poly\\(.*\\)", names(rnef))]
 
@@ -73,26 +74,25 @@ egg_aucs <- function(
     sapply(
       X = x,
       FUN = function(x) {
-        y_tmp <- coeff *
-          c(x^0, x^1, x^2, x^3, (x - knots)^3) /
-          c(1, 1, 2, rep(6, 4))
-        sum(y_tmp[1:(4 + findInterval(x, knots, left.open = TRUE))])
+        y_tmp <- coeff * c(x^0, x^1, x^2, x^3, (x - knots)^3) / c(1, 1, 2, rep(6, 4))
+        sum(y_tmp[seq_len(4 + findInterval(x, knots, left.open = TRUE))])
       }
     )
   }
-  for (i in seq_along(unique(fit$data[[id_var]]))) {
+  for (i in as.character(unique(fit[["data"]][[id_var]]))) {
     coeff <- fxef + as.numeric(rnef[i, ])
-    for (j in 1:(length(period) / 2)) {
+    for (j in seq_len(length(period) / 2)) {
         pred_auc[i, j] <- stats::integrate(
           f = y,
           coeff = coeff,
           knots = knots,
           lower = period[j * 2 - 1],
           upper = period[j * 2]
-        )$value
+        )[["value"]]
     }
   }
-  out <- cbind.data.frame(ID = unique(fit$data[[id_var]]), pred_auc)
-  names(out)[1] <- id_var
+
+  out <- cbind.data.frame(Row.names = rownames(pred_auc), pred_auc)
+  names(out)[grepl("Row.names", names(out), fixed = TRUE)] <- id_var
   out
 }