Skip to content

makepredictcall for degroup() #643

@mattansb

Description

@mattansb

This will allow using demean() or degroup() inside a formula, which can then play nice with brms or marginaleffects etc...

Code
degroup.numeric <- function(x,
                            ...,
                            means = NULL,
                            center = "mean",
                            suffix_demean = "_within",
                            suffix_groupmean = "_between") {
  
  
  dat <- data.frame(x, ...)
  x_name <- insight::safe_deparse(substitute(x))
  colnames(dat)[1] <- x_name
  
  if (is.null(means)) {
    group_cols <- colnames(dat)[-1]
    
    dat_degrouped <- degroup(dat,
                             select = colnames(dat)[1],
                             by = group_cols,
                             center = center,
                             suffix_demean = suffix_demean,
                             suffix_groupmean = suffix_groupmean, 
                             append = TRUE)
    cols <- colnames(dat_degrouped)
    between_cols <- cols[grepl(suffix_groupmean, cols)]
    within_cols <- cols[grepl(suffix_demean, cols)]
    
    table_cols <- c(group_cols, between_cols)
    means <- unique(dat_degrouped[table_cols])
    
    out <- dat_degrouped[c(between_cols, within_cols)]
  } else {
    dat_groupmeans <- data_join(dat, means)
    
    cols <- colnames(dat_groupmeans)
    between_cols <- cols[grepl(suffix_groupmean, cols)]
    
    # # Replace missing with grandmeans?
    # if (anyNA(dat_groupmeans[between_cols])) {
    #   grand_means <- lapply(means[between_cols], mean, na.rm = TRUE)
    #   
    #   for (nm in names(grand_means)) {
    #     dat_groupmeans[is.na(dat_groupmeans[[nm]]),nm] <- grand_means[[nm]]
    #   }
    # }
    
    sum_group_means <- Reduce("+", dat_groupmeans[between_cols])
    
    dat_groupmeans$x_within <- dat_groupmeans[[x_name]] - sum_group_means
    out <- dat_groupmeans[c(between_cols, "x_within")]
    colnames(out)[ncol(out)] <- paste0(x_name, suffix_demean)
    out <- as.matrix(out)
  }
  
  out <- as.matrix(out)
  rownames(out) <- NULL
  attr(out, "means") <- means
  class(out) <- c("dw_degroup", class(out))
  out
}

demean.numeric <- function(x,
                           ...,
                           means = NULL,
                           center = "mean",
                           suffix_demean = "_within",
                           suffix_groupmean = "_between") {
  cl <- match.call()
  cl[[1]] <- quote(datawizard::degroup)
  cl$center <- "mean"
  eval.parent(cl)
}


dat <- data.frame(
  a = c(1, 2, 3, 4, 1, 2, 3, 4),
  x = c(4, 3, 3, 4, 1, 2, 1, 2),
  y = c(1, 2, 1, 2, 4, 3, 2, 1),
  ID = c(1, 2, 3, 1, 2, 3, 1, 2)
)


makepredictcall.dw_degroup <- function(var, call) {
  call$means <- attr(var, "means")
  call
}

Example:

data("egsingle", package = "mlmRev")

mod <- lm(math ~ degroup(grade, childid, schoolid), 
          data = egsingle)

parameters::model_parameters(mod)
#> Parameter                                               | Coefficient |   SE |         95% CI | t(7226) |      p
#> ----------------------------------------------------------------------------------------------------------------
#> (Intercept)                                             |       -3.11 | 0.09 | [-3.29, -2.94] |  -34.60 | < .001
#> degroup(grade, childid, schoolid)grade childid between  |        1.00 | 0.02 | [ 0.95,  1.04] |   42.55 | < .001
#> degroup(grade, childid, schoolid)grade schoolid between |        1.19 | 0.06 | [ 1.08,  1.30] |   21.50 | < .001
#> degroup(grade, childid, schoolid)grade within           |        0.77 | 0.01 | [ 0.75,  0.78] |   76.16 | < .001
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed using a Wald
#>   t-distribution approximation.

predict(
  mod, 
  newdata = data.frame(grade = 3, schoolid = 2020, childid = 273026452)
)
#>         1 
#> 0.6773423 

marginaleffects::avg_slopes(mod, variables = "grade")
#> 
#>  Estimate Std. Error    z Pr(>|z|)   S 2.5 % 97.5 %
#>     0.765       0.01 76.2   <0.001 Inf 0.746  0.785
#> 
#> Term: grade
#> Type: response
#> Comparison: dY/dX
#> 

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions