-
-
Notifications
You must be signed in to change notification settings - Fork 16
Open
Labels
Description
This will allow using demean()
or degroup()
inside a formula, which can then play nice with brms or marginaleffects etc...
Code
degroup.numeric <- function(x,
...,
means = NULL,
center = "mean",
suffix_demean = "_within",
suffix_groupmean = "_between") {
dat <- data.frame(x, ...)
x_name <- insight::safe_deparse(substitute(x))
colnames(dat)[1] <- x_name
if (is.null(means)) {
group_cols <- colnames(dat)[-1]
dat_degrouped <- degroup(dat,
select = colnames(dat)[1],
by = group_cols,
center = center,
suffix_demean = suffix_demean,
suffix_groupmean = suffix_groupmean,
append = TRUE)
cols <- colnames(dat_degrouped)
between_cols <- cols[grepl(suffix_groupmean, cols)]
within_cols <- cols[grepl(suffix_demean, cols)]
table_cols <- c(group_cols, between_cols)
means <- unique(dat_degrouped[table_cols])
out <- dat_degrouped[c(between_cols, within_cols)]
} else {
dat_groupmeans <- data_join(dat, means)
cols <- colnames(dat_groupmeans)
between_cols <- cols[grepl(suffix_groupmean, cols)]
# # Replace missing with grandmeans?
# if (anyNA(dat_groupmeans[between_cols])) {
# grand_means <- lapply(means[between_cols], mean, na.rm = TRUE)
#
# for (nm in names(grand_means)) {
# dat_groupmeans[is.na(dat_groupmeans[[nm]]),nm] <- grand_means[[nm]]
# }
# }
sum_group_means <- Reduce("+", dat_groupmeans[between_cols])
dat_groupmeans$x_within <- dat_groupmeans[[x_name]] - sum_group_means
out <- dat_groupmeans[c(between_cols, "x_within")]
colnames(out)[ncol(out)] <- paste0(x_name, suffix_demean)
out <- as.matrix(out)
}
out <- as.matrix(out)
rownames(out) <- NULL
attr(out, "means") <- means
class(out) <- c("dw_degroup", class(out))
out
}
demean.numeric <- function(x,
...,
means = NULL,
center = "mean",
suffix_demean = "_within",
suffix_groupmean = "_between") {
cl <- match.call()
cl[[1]] <- quote(datawizard::degroup)
cl$center <- "mean"
eval.parent(cl)
}
dat <- data.frame(
a = c(1, 2, 3, 4, 1, 2, 3, 4),
x = c(4, 3, 3, 4, 1, 2, 1, 2),
y = c(1, 2, 1, 2, 4, 3, 2, 1),
ID = c(1, 2, 3, 1, 2, 3, 1, 2)
)
makepredictcall.dw_degroup <- function(var, call) {
call$means <- attr(var, "means")
call
}
Example:
data("egsingle", package = "mlmRev")
mod <- lm(math ~ degroup(grade, childid, schoolid),
data = egsingle)
parameters::model_parameters(mod)
#> Parameter | Coefficient | SE | 95% CI | t(7226) | p
#> ----------------------------------------------------------------------------------------------------------------
#> (Intercept) | -3.11 | 0.09 | [-3.29, -2.94] | -34.60 | < .001
#> degroup(grade, childid, schoolid)grade childid between | 1.00 | 0.02 | [ 0.95, 1.04] | 42.55 | < .001
#> degroup(grade, childid, schoolid)grade schoolid between | 1.19 | 0.06 | [ 1.08, 1.30] | 21.50 | < .001
#> degroup(grade, childid, schoolid)grade within | 0.77 | 0.01 | [ 0.75, 0.78] | 76.16 | < .001
#> Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed using a Wald
#> t-distribution approximation.
predict(
mod,
newdata = data.frame(grade = 3, schoolid = 2020, childid = 273026452)
)
#> 1
#> 0.6773423
marginaleffects::avg_slopes(mod, variables = "grade")
#>
#> Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
#> 0.765 0.01 76.2 <0.001 Inf 0.746 0.785
#>
#> Term: grade
#> Type: response
#> Comparison: dY/dX
#>