From bf636eec5c90c33b4af00af5ecacc3968f5d2721 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 24 May 2024 18:23:02 +0200 Subject: [PATCH] Rename `group`/`group_by` arguments into `by` (#433) * Rename `group`/`group_by` arguments into `by` * fix report_participants * fix news * use insight remotes * deprecation warning * lintr * ... * fix * update tests * fix * fix * fix * update snapshots * update readme * add remotes --- DESCRIPTION | 3 +- NEWS.md | 9 ++ R/report.lm.R | 16 ++-- R/report_htest_ttest.R | 18 ++-- R/report_participants.R | 86 +++++++++-------- R/report_sample.R | 96 ++++++++++--------- R/report_text.R | 2 +- README.Rmd | 4 +- README.md | 72 +++++++------- man/report_participants.Rd | 9 +- man/report_sample.Rd | 11 ++- .../testthat/_snaps/windows/report_sample.md | 16 ++-- tests/testthat/test-report_participants.R | 4 +- tests/testthat/test-report_sample.R | 26 ++--- 14 files changed, 205 insertions(+), 167 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 04be3e39..7ad0c624 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: report Type: Package Title: Automated Reporting of Results and Statistical Models -Version: 0.5.8.2 +Version: 0.5.8.3 Authors@R: c(person(given = "Dominique", family = "Makowski", @@ -150,3 +150,4 @@ Collate: 'utils_grouped_df.R' 'zzz.R' Roxygen: list(markdown = TRUE) +Remotes: easystats/insight, easystats/datawizard, easystats/parameters, easystats/performance, easystats/modelbased diff --git a/NEWS.md b/NEWS.md index 07728045..1d5f9e21 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,14 @@ # report 0.5.9 +Breaking + +* Arguments named `group`, `at` and `group_by` will be deprecated in future + releases. of _easystats_ packages. Please use `by` instead. This affects + following functions in *report*: + + * `report_participants()` + * `report_sample()` + Minor changes * `report` now supports reporting of Bayesian model comparison with variables of class `brms::loo_compare`. diff --git a/R/report.lm.R b/R/report.lm.R index b13449eb..904c8ce1 100644 --- a/R/report.lm.R +++ b/R/report.lm.R @@ -483,7 +483,7 @@ report_info.lm <- function(x, effectsize <- report_effectsize(x, ...) } - text <- .info_effectsize(x, effectsize = effectsize, include_effectsize = include_effectsize) + info_text <- .info_effectsize(x, effectsize = effectsize, include_effectsize = include_effectsize) if (is.null(parameters)) { parameters <- report_parameters(x, ...) @@ -495,7 +495,7 @@ report_info.lm <- function(x, } if ("ci_method" %in% names(att)) { - text <- paste0(text, " ", .info_df( + info_text <- paste0(info_text, " ", .info_df( ci = att$ci, ci_method = att$ci_method, test_statistic = att$test_statistic, @@ -508,7 +508,7 @@ report_info.lm <- function(x, # } - as.report_info(text) + as.report_info(info_text) } @@ -538,25 +538,25 @@ report_text.lm <- function(x, table = NULL, ...) { model, ". ", perf, - ifelse(nchar(perf) > 0, ". ", ""), + ifelse(nzchar(perf, keepNA = TRUE), ". ", ""), intercept, params_text_full, "\n\n", info ) - text <- paste0( + summary_text <- paste0( "We fitted a ", summary(model), ". ", summary(perf), - ifelse(nchar(perf) > 0, ". ", ""), + ifelse(nzchar(perf, keepNA = TRUE), ". ", ""), summary(intercept), params_text ) - as.report_text(text_full, summary = text) + as.report_text(text_full, summary = summary_text) } @@ -569,7 +569,7 @@ report_text.lm <- function(x, table = NULL, ...) { if (!is.null(coefname) && coefname %in% names(table)) { estimate <- attributes(table)$coefficient_name } else { - estimate <- datawizard::data_find(table, candidates, regex = TRUE, verbose = FALSE)[1] + estimate <- datawizard::extract_column_names(table, candidates, regex = TRUE, verbose = FALSE)[1] } estimate } diff --git a/R/report_htest_ttest.R b/R/report_htest_ttest.R index 0d5de125..0fb20463 100644 --- a/R/report_htest_ttest.R +++ b/R/report_htest_ttest.R @@ -33,25 +33,25 @@ .report_table_ttest <- function(table_full, effsize) { table_full <- cbind(table_full, attributes(effsize)$table) - table <- datawizard::data_remove( + table_small <- datawizard::data_remove( table_full, c("Parameter", "Group", "Mean_Group1", "Mean_Group2", "Method", "d_CI_low", "d_CI_high") ) - list(table = table, table_full = table_full) + list(table = table_small, table_full = table_full) } # report_effectsize --------------------- .report_effectsize_ttest <- function(x, table, dot_args, type, rules = "cohen1988") { - args <- c(list(x), dot_args) - table <- do.call(effectsize::effectsize, args) + my_args <- c(list(x), dot_args) + table <- do.call(effectsize::effectsize, my_args) ci <- attributes(table)$ci estimate <- names(table)[1] rules <- ifelse(is.null(dot_args$rules), rules, dot_args$rules) - args <- list(table, rules = rules, dot_args) - interpretation <- do.call(effectsize::interpret, args)$Interpretation + my_args <- list(table, rules = rules, dot_args) + interpretation <- do.call(effectsize::interpret, my_args)$Interpretation rules <- .text_effectsize(attr(attr(interpretation, "rules"), "rule_name")) if (estimate %in% c("d", "Cohens_d")) { @@ -88,7 +88,7 @@ .report_model_ttest <- function(x, table) { # If against mu if (names(x$null.value) == "mean") { - # TODO: @DominiqueMakowski why do we need "table" here? + # TODO: @DominiqueMakowski why do we need "table" here?? table$Difference <- x$estimate - x$null.value means <- paste0(" (mean = ", insight::format_value(x$estimate), ")") @@ -106,12 +106,12 @@ vars <- paste0(x$data.name) } - text <- paste0( + final_text <- paste0( trimws(x$method), " testing the difference ", ifelse(grepl(" by ", x$data.name, fixed = TRUE), "of ", "between "), vars_full ) - text + final_text } diff --git a/R/report_participants.R b/R/report_participants.R index 885d1eb1..40e6dfc5 100644 --- a/R/report_participants.R +++ b/R/report_participants.R @@ -21,11 +21,12 @@ #' so countries that represent less than 10% will be combined in the "other" category). #' @param participants The name of the participants' identifier column (for #' instance in the case of repeated measures). -#' @param group A character vector indicating the name(s) of the column(s) used +#' @param by A character vector indicating the name(s) of the column(s) used #' for stratified description. #' @param spell_n Logical, fully spell the sample size (`"Three participants"` #' instead of `"3 participants"`). #' @inheritParams report.numeric +#' @param group Deprecated. Use `by` instead. #' #' @return A character vector with description of the "participants", based on #' the information provided in `data`. @@ -106,7 +107,7 @@ #' sex = "Sex", #' gender = "Gender", #' participants = "Participant", -#' group = "Condition" +#' by = "Condition" #' ) #' #' # Spell sample size @@ -123,14 +124,21 @@ report_participants <- function(data, country = NULL, race = NULL, participants = NULL, - group = NULL, + by = NULL, spell_n = FALSE, digits = 1, threshold = 10, + group = NULL, ...) { + ## TODO: deprecate later + if (!is.null(group)) { + insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint + by <- group + } + # Convert empty strings to NA data_list <- lapply(data, function(x) { - x[which(x == "")] <- NA + x[which(x == "")] <- NA # nolint x }) data <- as.data.frame(data_list, stringsAsFactors = FALSE) @@ -165,8 +173,8 @@ report_participants <- function(data, race <- .find_race_in_data(data) } - if (is.null(group)) { - text <- .report_participants( + if (is.null(by)) { + final_text <- .report_participants( data, age = age, sex = sex, @@ -181,9 +189,9 @@ report_participants <- function(data, ... ) } else { - text <- NULL - data[[group]] <- as.character(data[[group]]) - for (i in split(data, data[group])) { + final_text <- NULL + data[[by]] <- as.character(data[[by]]) + for (i in split(data, data[by])) { current_text <- .report_participants( i, age = age, @@ -200,15 +208,15 @@ report_participants <- function(data, pre_text <- paste0( "the '", - paste0(names(i[group]), " - ", vapply(i[group], unique, "character"), collapse = " and "), + paste0(names(i[by]), " - ", vapply(i[by], unique, "character"), collapse = " and "), "' group: " ) - text <- c(text, paste0(pre_text, current_text)) + final_text <- c(final_text, paste0(pre_text, current_text)) } - text <- paste("For", datawizard::text_concatenate(text, sep = ", for ", last = " and for ")) + final_text <- paste("For", datawizard::text_concatenate(final_text, sep = ", for ", last = " and for ")) } - text + final_text } #' @keywords internal @@ -338,9 +346,7 @@ report_participants <- function(data, ) %in% c("male", "m", "female", "f", NA, "na")]) / nrow(data) * 100, digits = digits), "% other", - if (!insight::format_value(length(data[[sex]][tolower( - data[[sex]] - ) %in% c(NA, "na")]) / nrow(data) * 100) == "0.00") { + if (insight::format_value(length(data[[sex]][tolower(data[[sex]]) %in% c(NA, "na")]) / nrow(data) * 100) != "0.00") { # nolint paste0(", ", insight::format_value(length(data[[sex]][tolower( data[[sex]] ) %in% c(NA, "na")]) / nrow(data) * 100), "% missing") @@ -375,9 +381,9 @@ report_participants <- function(data, data[[gender]] ) %in% both_genders]) / nrow(data) * 100), "% non-binary", - if (!insight::format_value(length(data[[gender]][tolower( + if (insight::format_value(length(data[[gender]][tolower( data[[gender]] - ) %in% c(NA, "na")]) / nrow(data) * 100) == "0.00") { + ) %in% c(NA, "na")]) / nrow(data) * 100) != "0.00") { paste0(", ", insight::format_value(length(data[[gender]][tolower( data[[gender]] ) %in% c(NA, "na")]) / nrow(data) * 100), "% missing") @@ -387,31 +393,29 @@ report_participants <- function(data, if (all(is.na(data[[education]]))) { text_education <- "" - } else { - if (is.numeric(data[[education]])) { - text_education <- summary( - report_statistics( - data[[education]], - n = FALSE, - centrality = "mean", - missing_percentage = NULL, - digits = digits, - ... - ) - ) - - text_education <- sub("Mean =", "Mean education =", text_education, fixed = TRUE) - } else { - data[which(data[[education]] %in% c(NA, "NA")), education] <- "missing" - txt <- summary(report_statistics( - as.factor(data[[education]]), - levels_percentage = TRUE, + } else if (is.numeric(data[[education]])) { + text_education <- summary( + report_statistics( + data[[education]], + n = FALSE, + centrality = "mean", + missing_percentage = NULL, digits = digits, ... - )) + ) + ) - text_education <- paste0("Education: ", txt) - } + text_education <- sub("Mean =", "Mean education =", text_education, fixed = TRUE) + } else { + data[which(data[[education]] %in% c(NA, "NA")), education] <- "missing" + txt <- summary(report_statistics( + as.factor(data[[education]]), + levels_percentage = TRUE, + digits = digits, + ... + )) + + text_education <- paste0("Education: ", txt) } text_country <- if (all(is.na(data[[country]]))) { @@ -468,6 +472,7 @@ report_participants <- function(data, text_race <- paste("Race:", value_string) } + # nolint start paste0( size, " participants (", @@ -491,6 +496,7 @@ report_participants <- function(data, ), text_race)), ")" ) + # nolint end } #' @keywords internal diff --git a/R/report_sample.R b/R/report_sample.R index 8518e61b..41d6a8f8 100644 --- a/R/report_sample.R +++ b/R/report_sample.R @@ -3,7 +3,7 @@ #' Create sample description table (also referred to as "Table 1"). #' #' @param data A data frame for which descriptive statistics should be created. -#' @param group_by Character vector, indicating the column(s) for possible grouping +#' @param by Character vector, indicating the column(s) for possible grouping #' of the descriptive table. Note that weighting (see `weights`) does not work #' with more than one grouping column. #' @param centrality Character, indicates the statistics that should be @@ -43,6 +43,7 @@ #' @param digits Number of decimals. #' @param n Logical, actual sample size used in the calculation of the #' reported descriptive statistics (i.e., without the missing values). +#' @param group_by Deprecated. Use `by` instead. #' @inheritParams report.data.frame #' #' @return A data frame of class `report_sample` with variable names and @@ -61,8 +62,8 @@ #' #' report_sample(iris[, 1:4]) #' report_sample(iris, select = c("Sepal.Length", "Petal.Length", "Species")) -#' report_sample(iris, group_by = "Species") -#' report_sample(airquality, group_by = "Month", n = TRUE, total = FALSE) +#' report_sample(iris, by = "Species") +#' report_sample(airquality, by = "Month", n = TRUE, total = FALSE) #' #' # confidence intervals for proportions #' set.seed(123) @@ -72,7 +73,7 @@ #' report_sample(d, ci = 0.95, ci_correct = TRUE) # continuity correction #' @export report_sample <- function(data, - group_by = NULL, + by = NULL, centrality = "mean", ci = NULL, ci_method = "wilson", @@ -83,7 +84,14 @@ report_sample <- function(data, total = TRUE, digits = 2, n = FALSE, + group_by = NULL, ...) { + ## TODO: deprecate later + if (!is.null(group_by)) { + insight::format_warning("Argument `group_by` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint + by <- group_by + } + # check for correct input type if (!is.data.frame(data)) { data <- tryCatch( @@ -119,16 +127,16 @@ report_sample <- function(data, variables <- setdiff(variables, exclude) } - # for grouped data frames, use groups as group_by argument - if (inherits(data, "grouped_df") && is.null(group_by)) { - group_by <- setdiff(colnames(attributes(data)$groups), ".rows") + # for grouped data frames, use groups as by argument + if (inherits(data, "grouped_df") && is.null(by)) { + by <- setdiff(colnames(attributes(data)$groups), ".rows") } # grouped by? - grouping <- !is.null(group_by) && all(group_by %in% colnames(data)) + do_grouping <- !is.null(by) && all(by %in% colnames(data)) # sanity check - weights and grouping - if (!is.null(group_by) && length(group_by) > 1 && !is.null(weights)) { + if (!is.null(by) && length(by) > 1 && !is.null(weights)) { insight::format_error("Cannot apply `weights` when grouping is done by more than one variable.") } @@ -143,12 +151,12 @@ report_sample <- function(data, i }) - # coerce group_by columns to factor - groups <- as.data.frame(lapply(data[group_by], factor)) + # coerce by columns to factor + groups <- as.data.frame(lapply(data[by], factor)) - out <- if (isTRUE(grouping)) { + out <- if (isTRUE(do_grouping)) { result <- lapply(split(data[variables], groups), function(x) { - x[group_by] <- NULL + x[by] <- NULL .generate_descriptive_table( x, centrality, @@ -162,7 +170,7 @@ report_sample <- function(data, }) # for more than one group, fix column names. we don't want "a.b (n=10)", # but rather ""a, b (n=10)"" - if (length(group_by) > 1) { + if (length(by) > 1) { old_names <- datawizard::data_unite( unique(groups), new_column = ".old_names", @@ -179,9 +187,9 @@ report_sample <- function(data, variable <- result[[1]]["Variable"] # number of observation, based on weights if (!is.null(weights)) { - n_obs <- round(as.vector(stats::xtabs(data[[weights]] ~ data[[group_by]]))) + n_obs <- round(as.vector(stats::xtabs(data[[weights]] ~ data[[by]]))) } else { - n_obs <- as.vector(table(data[group_by])) + n_obs <- as.vector(table(data[by])) } # column names for groups cn <- sprintf("%s (n=%g)", names(result), n_obs) @@ -189,13 +197,13 @@ report_sample <- function(data, summaries <- do.call(cbind, lapply(result, function(i) i["Summary"])) colnames(summaries) <- cn # generate data for total column, but make sure to remove missings - total_data <- data[stats::complete.cases(data[group_by]), unique(c(variables, group_by))] + total_data <- data[stats::complete.cases(data[by]), unique(c(variables, by))] # bind all together, including total column final <- cbind( variable, summaries, Total = .generate_descriptive_table( - total_data[setdiff(variables, group_by)], + total_data[setdiff(variables, by)], centrality, weights, digits, @@ -211,9 +219,9 @@ report_sample <- function(data, } # define total N, based on weights if (!is.null(weights)) { - total_n <- round(sum(as.vector(table(data[group_by]))) * mean(data[[weights]], na.rm = TRUE)) + total_n <- round(sum(as.vector(table(data[by]))) * mean(data[[weights]], na.rm = TRUE)) } else { - total_n <- sum(as.vector(table(data[group_by]))) + total_n <- sum(as.vector(table(data[by]))) } # add N to column name colnames(final)[ncol(final)] <- sprintf( @@ -335,36 +343,36 @@ report_sample <- function(data, weights[is.na(x)] <- NA weights <- stats::na.omit(weights) x <- stats::na.omit(x) - proportions <- prop.table(stats::xtabs(weights ~ x)) + table_proportions <- prop.table(stats::xtabs(weights ~ x)) } else { - proportions <- prop.table(table(x)) + table_proportions <- prop.table(table(x)) } # for binary factors, just need one level if (nlevels(x) == 2) { - proportions <- proportions[2] + table_proportions <- table_proportions[2] } # CI for proportions? if (!is.null(ci)) { - ci_low_high <- .ci_proportion(x, proportions, weights, ci, ci_method, ci_correct) + ci_low_high <- .ci_proportion(x, table_proportions, weights, ci, ci_method, ci_correct) .summary <- sprintf( "%.1f [%.1f, %.1f]", - 100 * proportions, + 100 * table_proportions, 100 * ci_low_high$ci_low, 100 * ci_low_high$ci_high ) } else { - .summary <- sprintf("%.1f", 100 * proportions) + .summary <- sprintf("%.1f", 100 * table_proportions) } if (isTRUE(n)) { - .summary <- paste0(.summary, ", ", round(sum(!is.na(x)) * as.vector(proportions))) + .summary <- paste0(.summary, ", ", round(sum(!is.na(x)) * as.vector(table_proportions))) } n_label <- ifelse(n, ", n", "") data.frame( - Variable = sprintf("%s [%s], %%%s", column, names(proportions), n_label), + Variable = sprintf("%s [%s], %%%s", column, names(table_proportions), n_label), Summary = as.vector(.summary), stringsAsFactors = FALSE ) @@ -377,12 +385,12 @@ report_sample <- function(data, # Standard error for confidence interval of proportions ---- -.ci_proportion <- function(x, proportions, weights, ci, ci_method, ci_correct) { +.ci_proportion <- function(x, table_proportions, weights, ci, ci_method, ci_correct) { ci_method <- match.arg(tolower(ci_method), c("wald", "wilson")) # variables - p <- as.vector(proportions) - q <- 1 - p + p <- as.vector(table_proportions) + quant <- 1 - p n <- length(stats::na.omit(x)) z <- stats::qnorm((1 + ci) / 2) @@ -399,21 +407,21 @@ report_sample <- function(data, if (ci_method == "wilson") { # Wilson CIs ------------------- if (isTRUE(ci_correct)) { - ci_low <- (2 * n * p + z^2 - 1 - z * sqrt(z^2 - 2 - 1 / n + 4 * p * (n * q + 1))) / (2 * (n + z^2)) - ci_high <- (2 * n * p + z^2 + 1 + z * sqrt(z^2 + 2 - 1 / n + 4 * p * (n * q - 1))) / (2 * (n + z^2)) + ci_low <- (2 * n * p + z^2 - 1 - z * sqrt(z^2 - 2 - 1 / n + 4 * p * (n * quant + 1))) / (2 * (n + z^2)) + ci_high <- (2 * n * p + z^2 + 1 + z * sqrt(z^2 + 2 - 1 / n + 4 * p * (n * quant - 1))) / (2 * (n + z^2)) # close to 0 or 1, then CI is 0 resp. 1 - fix <- p < 0.00001 | ci_low < 0.00001 - if (any(fix)) { - ci_low[fix] <- 0 + fix_ci <- p < 0.00001 | ci_low < 0.00001 + if (any(fix_ci)) { + ci_low[fix_ci] <- 0 } - fix <- p > 0.99999 | ci_high > 0.99999 - if (any(fix)) { - ci_high[fix] <- 1 + fix_ci <- p > 0.99999 | ci_high > 0.99999 + if (any(fix_ci)) { + ci_high[fix_ci] <- 1 } out <- list(ci_low = ci_low, ci_high = ci_high) } else { prop <- (2 * n * p) + z^2 - moe <- z * sqrt(z^2 + 4 * n * p * q) + moe <- z * sqrt(z^2 + 4 * n * p * quant) correction <- 2 * (n + z^2) out <- list( ci_low = (prop - moe) / correction, @@ -422,7 +430,7 @@ report_sample <- function(data, } } else { # Wald CIs ------------------- - moe <- z * suppressWarnings(sqrt(p * q / n)) + moe <- z * suppressWarnings(sqrt(p * quant / n)) if (isTRUE(ci_correct)) { moe <- moe + 1 / (2 * n) } @@ -511,9 +519,9 @@ print_md.report_sample <- function(x, layout = "horizontal", ...) { weights[is.na(x)] <- NA weights <- stats::na.omit(weights) x <- stats::na.omit(x) - order <- order(x) - x <- x[order] - weights <- weights[order] + x_order <- order(x) + x <- x[x_order] + weights <- weights[x_order] rw <- cumsum(weights) / sum(weights) md_values <- min(which(rw >= p)) if (rw[md_values] == p) { diff --git a/R/report_text.R b/R/report_text.R index 7b29e9ba..2c7faa9b 100644 --- a/R/report_text.R +++ b/R/report_text.R @@ -103,6 +103,6 @@ summary.report_text <- function(object, ...) { #' @export print.report_text <- function(x, width = NULL, ...) { - x <- datawizard::format_text(as.character(x), width = width, ...) + x <- datawizard::text_format(as.character(x), width = width, ...) cat(x) } diff --git a/README.Rmd b/README.Rmd index 7bca5c21..58dc8d13 100644 --- a/README.Rmd +++ b/README.Rmd @@ -262,11 +262,11 @@ paste( Report can also help you create a sample description table (also referred to as **Table 1**). ```{r, eval=FALSE} -report_sample(iris, group_by = "Species") +report_sample(iris, by = "Species") ``` ```{r, echo=FALSE} -knitr::kable(report_sample(iris, group_by = "Species")) +knitr::kable(report_sample(iris, by = "Species")) ``` ### Report system and packages diff --git a/README.md b/README.md index b040f241..5faabf46 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ The package documentation can be found ## Report all the things - +All the things meme by Allie Brosh ### General Workflow @@ -262,28 +262,28 @@ report(model) # of 1000 iterations and a warmup of 500) to predict mpg with qsec and wt # (formula: mpg ~ qsec + wt). Priors over parameters were all set as normal (mean # = 0.00, SD = 8.43; mean = 0.00, SD = 15.40) distributions. The model's - # explanatory power is substantial (R2 = 0.81, 95% CI [0.69, 0.89], adj. R2 = - # 0.79). The model's intercept, corresponding to qsec = 0 and wt = 0, is at 19.56 - # (95% CI [9.60, 30.59]). Within this model: + # explanatory power is substantial (R2 = 0.81, 95% CI [0.70, 0.90], adj. R2 = + # 0.79). The model's intercept, corresponding to qsec = 0 and wt = 0, is at 19.80 + # (95% CI [8.93, 29.80]). Within this model: # - # - The effect of qsec (Median = 0.94, 95% CI [0.38, 1.45]) has a 99.90% - # probability of being positive (> 0), 98.80% of being significant (> 0.30), and - # 0.05% of being large (> 1.81). The estimation successfully converged (Rhat = - # 1.001) and the indices are reliable (ESS = 1921) - # - The effect of wt (Median = -5.05, 95% CI [-6.01, -4.05]) has a 100.00% + # - The effect of qsec (Median = 0.93, 95% CI [0.40, 1.49]) has a 100.00% + # probability of being positive (> 0), 99.05% of being significant (> 0.30), and + # 0.25% of being large (> 1.81). The estimation successfully converged (Rhat = + # 1.000) and the indices are reliable (ESS = 1864) + # - The effect of wt (Median = -5.04, 95% CI [-5.99, -4.08]) has a 100.00% # probability of being negative (< 0), 100.00% of being significant (< -0.30), # and 100.00% of being large (< -1.81). The estimation successfully converged - # (Rhat = 1.000) and the indices are reliable (ESS = 2020) + # (Rhat = 0.999) and the indices are reliable (ESS = 2424) # # Following the Sequential Effect eXistence and sIgnificance Testing (SEXIT) # framework, we report the median of the posterior distribution and its 95% CI # (Highest Density Interval), along the probability of direction (pd), the # probability of significance and the probability of being large. The thresholds # beyond which the effect is considered as significant (i.e., non-negligible) and - # large are |0.30| and |1.81|. Convergence and stability of the Bayesian sampling - # has been assessed using R-hat, which should be below 1.01 (Vehtari et al., - # 2019), and Effective Sample Size (ESS), which should be greater than 1000 - # (Burkner, 2017). + # large are |0.30| and |1.81| (corresponding respectively to 0.05 and 0.30 of the + # outcome's SD). Convergence and stability of the Bayesian sampling has been + # assessed using R-hat, which should be below 1.01 (Vehtari et al., 2019), and + # Effective Sample Size (ESS), which should be greater than 1000 (Burkner, 2017). ## Other types of reports @@ -296,10 +296,16 @@ model <- lm(Sepal.Length ~ Species, data = iris) report_model(model) # linear model (estimated using OLS) to predict Sepal.Length with Species (formula: Sepal.Length ~ Species) +``` + +``` r report_performance(model) # The model explains a statistically significant and substantial proportion of # variance (R2 = 0.62, F(2, 147) = 119.26, p < .001, adj. R2 = 0.61) +``` + +``` r report_statistics(model) # beta = 5.01, 95% CI [4.86, 5.15], t(147) = 68.76, p < .001; Std. beta = -1.01, 95% CI [-1.18, -0.84] @@ -334,7 +340,7 @@ Report can also help you create a sample description table (also referred to as **Table 1**). ``` r -report_sample(iris, group_by = "Species") +report_sample(iris, by = "Species") ``` | Variable | setosa (n=50) | versicolor (n=50) | virginica (n=50) | Total (n=150) | @@ -353,32 +359,35 @@ analysis paragraph about the tools used. report(sessionInfo()) ``` - # Analyses were conducted using the R Statistical language (version 4.2.2; R Core - # Team, 2022) on macOS Ventura 13.1, using the packages lme4 (version 1.1.32; - # Bates D et al., 2015), Matrix (version 1.5.3; Bates D et al., 2022), Rcpp - # (version 1.0.10; Eddelbuettel D, François R, 2011), rstanarm (version 2.21.3; - # Goodrich B et al., 2022), report (version 0.5.7; Makowski D et al., 2023) and - # dplyr (version 1.1.0; Wickham H et al., 2023). + # Analyses were conducted using the R Statistical language (version 4.4.0; R Core + # Team, 2024) on Windows 11 x64 (build 22631), using the packages lme4 (version + # 1.1.35.3; Bates D et al., 2015), Matrix (version 1.7.0; Bates D et al., 2024), + # Rcpp (version 1.0.12; Eddelbuettel D et al., 2024), rstanarm (version 2.32.1; + # Goodrich B et al., 2024), report (version 0.5.8.3; Makowski D et al., 2023) and + # dplyr (version 1.1.4; Wickham H et al., 2023). # # References # ---------- # - Bates D, Mächler M, Bolker B, Walker S (2015). "Fitting Linear Mixed-Effects # Models Using lme4." _Journal of Statistical Software_, *67*(1), 1-48. # doi:10.18637/jss.v067.i01 . - # - Bates D, Maechler M, Jagan M (2022). _Matrix: Sparse and Dense Matrix Classes - # and Methods_. R package version 1.5-3, + # - Bates D, Maechler M, Jagan M (2024). _Matrix: Sparse and Dense Matrix Classes + # and Methods_. R package version 1.7-0, # . - # - Eddelbuettel D, François R (2011). "Rcpp: Seamless R and C++ Integration." - # _Journal of Statistical Software_, *40*(8), 1-18. doi:10.18637/jss.v040.i08 + # - Eddelbuettel D, Francois R, Allaire J, Ushey K, Kou Q, Russell N, Ucar I, + # Bates D, Chambers J (2024). _Rcpp: Seamless R and C++ Integration_. R package + # version 1.0.12, . Eddelbuettel D, + # François R (2011). "Rcpp: Seamless R and C++ Integration." _Journal of + # Statistical Software_, *40*(8), 1-18. doi:10.18637/jss.v040.i08 # . Eddelbuettel D (2013). _Seamless R and # C++ Integration with Rcpp_. Springer, New York. doi:10.1007/978-1-4614-6868-4 # , ISBN 978-1-4614-6867-7. - # Eddelbuettel D, Balamuta JJ (2018). "Extending extitR with extitC++: A Brief - # Introduction to extitRcpp." _The American Statistician_, *72*(1), 28-36. + # Eddelbuettel D, Balamuta J (2018). "Extending R with C++: A Brief Introduction + # to Rcpp." _The American Statistician_, *72*(1), 28-36. # doi:10.1080/00031305.2017.1375990 # . - # - Goodrich B, Gabry J, Ali I, Brilleman S (2022). "rstanarm: Bayesian applied - # regression modeling via Stan." R package version 2.21.3, + # - Goodrich B, Gabry J, Ali I, Brilleman S (2024). "rstanarm: Bayesian applied + # regression modeling via Stan." R package version 2.32.1, # . Brilleman S, Crowther M, Moreno-Betancur M, # Buros Novik J, Wolfe R (2018). "Joint longitudinal and time-to-event models via # Stan." StanCon 2018. 10-12 Jan 2018. Pacific Grove, CA, USA., @@ -387,11 +396,11 @@ report(sessionInfo()) # "Automated Results Reporting as a Practical Tool to Improve Reproducibility and # Methodological Best Practices Adoption." _CRAN_. # . - # - R Core Team (2022). _R: A Language and Environment for Statistical + # - R Core Team (2024). _R: A Language and Environment for Statistical # Computing_. R Foundation for Statistical Computing, Vienna, Austria. # . # - Wickham H, François R, Henry L, Müller K, Vaughan D (2023). _dplyr: A Grammar - # of Data Manipulation_. R package version 1.1.0, + # of Data Manipulation_. R package version 1.1.4, # . ## Credits @@ -401,7 +410,6 @@ as follows: ``` r citation("report") - To cite in publications use: Makowski, D., Lüdecke, D., Patil, I., Thériault, R., Ben-Shachar, diff --git a/man/report_participants.Rd b/man/report_participants.Rd index 2ce58b14..e1704666 100644 --- a/man/report_participants.Rd +++ b/man/report_participants.Rd @@ -13,10 +13,11 @@ report_participants( country = NULL, race = NULL, participants = NULL, - group = NULL, + by = NULL, spell_n = FALSE, digits = 1, threshold = 10, + group = NULL, ... ) } @@ -44,7 +45,7 @@ individuals in those groups as \code{"Non-Binary"}.} \item{participants}{The name of the participants' identifier column (for instance in the case of repeated measures).} -\item{group}{A character vector indicating the name(s) of the column(s) used +\item{by}{A character vector indicating the name(s) of the column(s) used for stratified description.} \item{spell_n}{Logical, fully spell the sample size (\code{"Three participants"} @@ -55,6 +56,8 @@ instead of \code{"3 participants"}).} \item{threshold}{Percentage after which to combine, e.g., countries (default is 10\%, so countries that represent less than 10\% will be combined in the "other" category).} +\item{group}{Deprecated. Use \code{by} instead.} + \item{...}{Arguments passed to or from other methods.} } \value{ @@ -141,7 +144,7 @@ report_participants(data, sex = "Sex", gender = "Gender", participants = "Participant", - group = "Condition" + by = "Condition" ) # Spell sample size diff --git a/man/report_sample.Rd b/man/report_sample.Rd index 7a66569a..08751462 100644 --- a/man/report_sample.Rd +++ b/man/report_sample.Rd @@ -6,7 +6,7 @@ \usage{ report_sample( data, - group_by = NULL, + by = NULL, centrality = "mean", ci = NULL, ci_method = "wilson", @@ -17,13 +17,14 @@ report_sample( total = TRUE, digits = 2, n = FALSE, + group_by = NULL, ... ) } \arguments{ \item{data}{A data frame for which descriptive statistics should be created.} -\item{group_by}{Character vector, indicating the column(s) for possible grouping +\item{by}{Character vector, indicating the column(s) for possible grouping of the descriptive table. Note that weighting (see \code{weights}) does not work with more than one grouping column.} @@ -75,6 +76,8 @@ weight-variable. Reported descriptive statistics will be weighted by \item{n}{Logical, actual sample size used in the calculation of the reported descriptive statistics (i.e., without the missing values).} +\item{group_by}{Deprecated. Use \code{by} instead.} + \item{...}{Arguments passed to or from other methods.} } \value{ @@ -89,8 +92,8 @@ library(report) report_sample(iris[, 1:4]) report_sample(iris, select = c("Sepal.Length", "Petal.Length", "Species")) -report_sample(iris, group_by = "Species") -report_sample(airquality, group_by = "Month", n = TRUE, total = FALSE) +report_sample(iris, by = "Species") +report_sample(airquality, by = "Month", n = TRUE, total = FALSE) # confidence intervals for proportions set.seed(123) diff --git a/tests/testthat/_snaps/windows/report_sample.md b/tests/testthat/_snaps/windows/report_sample.md index d9ad1fb8..ffec20c1 100644 --- a/tests/testthat/_snaps/windows/report_sample.md +++ b/tests/testthat/_snaps/windows/report_sample.md @@ -180,10 +180,10 @@ ------------------------- x [1], % | 2.9 [2.0, 4.2] -# report_sample group_by +# report_sample by Code - report_sample(airquality, group_by = "Month") + report_sample(airquality, by = "Month") Output # Descriptive Statistics @@ -198,7 +198,7 @@ --- Code - report_sample(mtcars, group_by = "cyl") + report_sample(mtcars, by = "cyl") Output # Descriptive Statistics @@ -218,7 +218,7 @@ --- Code - report_sample(iris, group_by = "Species") + report_sample(iris, by = "Species") Output # Descriptive Statistics @@ -456,7 +456,7 @@ --- Code - report_sample(airquality, group_by = "Month", total = TRUE) + report_sample(airquality, by = "Month", total = TRUE) Output # Descriptive Statistics @@ -471,7 +471,7 @@ --- Code - report_sample(airquality, group_by = "Month", total = FALSE) + report_sample(airquality, by = "Month", total = FALSE) Output # Descriptive Statistics @@ -486,7 +486,7 @@ --- Code - report_sample(airquality, group_by = "Month", total = FALSE, n = TRUE) + report_sample(airquality, by = "Month", total = FALSE, n = TRUE) Output # Descriptive Statistics @@ -501,7 +501,7 @@ --- Code - report_sample(airquality, group_by = "Month", total = TRUE, n = TRUE) + report_sample(airquality, by = "Month", total = TRUE, n = TRUE) Output # Descriptive Statistics diff --git a/tests/testthat/test-report_participants.R b/tests/testthat/test-report_participants.R index 20319ddc..d8231254 100644 --- a/tests/testthat/test-report_participants.R +++ b/tests/testthat/test-report_participants.R @@ -14,7 +14,7 @@ test_that("report_participants, argument gender works", { "Gender: 12.5% women, 37.5% men, 50.00% non-binary)" ) ) - out <- report_participants(data, group = "Condition") + out <- report_participants(data, by = "Condition") expect_identical( out, paste( @@ -26,7 +26,7 @@ test_that("report_participants, argument gender works", { ) ) # works when lowercase - out <- report_participants(data, group = "Condition") + out <- report_participants(data, by = "Condition") expect_identical( out, paste( diff --git a/tests/testthat/test-report_sample.R b/tests/testthat/test-report_sample.R index d37c53dd..a012f969 100644 --- a/tests/testthat/test-report_sample.R +++ b/tests/testthat/test-report_sample.R @@ -6,8 +6,8 @@ test_that("report_sample weights, coorect weighted N", { stringsAsFactors = FALSE ) - out1 <- report_sample(d, select = "x", group_by = "g") - out2 <- report_sample(d, select = "x", group_by = "g", weights = "w") + out1 <- report_sample(d, select = "x", by = "g") + out2 <- report_sample(d, select = "x", by = "g", weights = "w") expect_identical( capture.output(print(out1)), c( @@ -41,7 +41,7 @@ test_that("report_sample weights, coorect weighted N", { stringsAsFactors = FALSE ) expect_error( - report_sample(d, select = "x", group_by = c("g1", "g2"), weights = "w"), + report_sample(d, select = "x", by = c("g1", "g2"), weights = "w"), regex = "Cannot apply" ) }) @@ -123,18 +123,18 @@ test_that("report_sample CI", { expect_warning(report_sample(d, ci = 0.95, weights = "w", ci_method = "wald"), regex = "accurate") }) -test_that("report_sample group_by", { +test_that("report_sample by", { expect_snapshot( variant = "windows", - report_sample(airquality, group_by = "Month") + report_sample(airquality, by = "Month") ) expect_snapshot( variant = "windows", - report_sample(mtcars, group_by = "cyl") + report_sample(mtcars, by = "cyl") ) expect_snapshot( variant = "windows", - report_sample(iris, group_by = "Species") + report_sample(iris, by = "Species") ) }) @@ -206,19 +206,19 @@ test_that("report_sample total", { ) expect_snapshot( variant = "windows", - report_sample(airquality, group_by = "Month", total = TRUE) + report_sample(airquality, by = "Month", total = TRUE) ) expect_snapshot( variant = "windows", - report_sample(airquality, group_by = "Month", total = FALSE) + report_sample(airquality, by = "Month", total = FALSE) ) expect_snapshot( variant = "windows", - report_sample(airquality, group_by = "Month", total = FALSE, n = TRUE) + report_sample(airquality, by = "Month", total = FALSE, n = TRUE) ) expect_snapshot( variant = "windows", - report_sample(airquality, group_by = "Month", total = TRUE, n = TRUE) + report_sample(airquality, by = "Month", total = TRUE, n = TRUE) ) }) @@ -248,7 +248,7 @@ test_that("report_sample grouped data frames", { data(mtcars) mtcars_grouped <- datawizard::data_group(mtcars, "gear") out1 <- report_sample(mtcars_grouped, select = c("hp", "mpg")) - out2 <- report_sample(mtcars, group_by = "gear", select = c("hp", "mpg")) + out2 <- report_sample(mtcars, by = "gear", select = c("hp", "mpg")) expect_identical(out1, out2) }) @@ -258,7 +258,7 @@ test_that("report_sample, with more than one grouping variable", { iris$grp <- sample(letters[1:3], nrow(iris), TRUE) out <- report_sample( iris, - group_by = c("Species", "grp"), + by = c("Species", "grp"), select = c("Sepal.Length", "Sepal.Width") ) # verified against