From 26ff4471a362d6e8551421b15f727fb2571d9baa Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Wed, 19 Nov 2025 16:18:50 -0500 Subject: [PATCH 1/5] Clarify `summarise()`'s `.groups` message Tweak --- NEWS.md | 2 ++ R/summarise.R | 52 ++++++++++++++------------- R/utils.R | 18 ++++++++++ tests/testthat/_snaps/summarise.md | 58 ++++++++++++++++++++++++++---- tests/testthat/test-summarise.R | 55 +++++++++++++++++++++------- 5 files changed, 141 insertions(+), 44 deletions(-) diff --git a/NEWS.md b/NEWS.md index 5ecaf34d22..06c5ad86b7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # dplyr (development version) +* The `.groups` message emitted by `summarise()` is hopefully more clear now (#6986). + * `if_any()` and `if_all()` are now more consistent in all use cases (#7059, #7077, #7746, @jrwinget). In particular: * When called with zero inputs, `if_any()` returns `FALSE` and `if_all()` returns `TRUE`. diff --git a/R/summarise.R b/R/summarise.R index e3d6b9aa65..7fba335b36 100644 --- a/R/summarise.R +++ b/R/summarise.R @@ -149,27 +149,20 @@ summarise.grouped_df <- function(.data, ..., .by = NULL, .groups = NULL) { .groups <- "drop_last" } - group_vars <- by$names + old_groups <- by$names if (identical(.groups, "drop_last")) { - n <- length(group_vars) + n <- length(old_groups) if (n > 1) { + new_groups <- old_groups[-n] if (verbose) { - new_groups <- glue_collapse( - paste0("'", group_vars[-n], "'"), - sep = ", " - ) - summarise_inform("has grouped output by {new_groups}") + inform_implicit_drop_last_for_grouped_df(old_groups, new_groups) } - out <- grouped_df(out, group_vars[-n], group_by_drop_default(.data)) + out <- grouped_df(out, new_groups, group_by_drop_default(.data)) } } else if (identical(.groups, "keep")) { - if (verbose) { - new_groups <- glue_collapse(paste0("'", group_vars, "'"), sep = ", ") - summarise_inform("has grouped output by {new_groups}") - } - out <- grouped_df(out, group_vars, group_by_drop_default(.data)) + out <- grouped_df(out, old_groups, group_by_drop_default(.data)) } else if (identical(.groups, "rowwise")) { - out <- rowwise_df(out, group_vars) + out <- rowwise_df(out, old_groups) } else if (!identical(.groups, "drop")) { bullets <- c( paste0("`.groups` can't be ", as_label(.groups)), @@ -194,15 +187,14 @@ summarise.rowwise_df <- function(.data, ..., .by = NULL, .groups = NULL) { .groups <- "keep" } - group_vars <- by$names + old_groups <- by$names if (identical(.groups, "keep")) { - if (verbose && length(group_vars)) { - new_groups <- glue_collapse(paste0("'", group_vars, "'"), sep = ", ") - summarise_inform("has grouped output by {new_groups}") + if (verbose && length(old_groups) > 0L) { + inform_implicit_keep_for_rowwise_df(old_groups) } - out <- grouped_df(out, group_vars) + out <- grouped_df(out, old_groups) } else if (identical(.groups, "rowwise")) { - out <- rowwise_df(out, group_vars) + out <- rowwise_df(out, old_groups) } else if (!identical(.groups, "drop")) { bullets <- c( paste0("`.groups` can't be ", as_label(.groups)), @@ -475,10 +467,20 @@ summarise_verbose <- function(.groups, .env) { is_reference(topenv(.env), global_env()) } -summarise_inform <- function(..., .env = parent.frame()) { - inform(paste0( - "`summarise()` ", - glue(..., .envir = .env), - '. You can override using the `.groups` argument.' +inform_implicit_drop_last_for_grouped_df <- function(old, new) { + inform(cli_format_each_inline( + "{.fn summarise} has regrouped the output by dropping the last grouping column.", + i = "Summaries were computed grouped by {cli::col_blue(old)}.", + i = "Output is grouped by {cli::col_blue(new)}.", + i = "Use {.code summarise(.groups = \"drop_last\")} to silence this message." + )) +} + +inform_implicit_keep_for_rowwise_df <- function(groups) { + inform(cli_format_each_inline( + "{.fn summarise} has converted the output from a rowwise data frame to a grouped data frame.", + i = "Summaries were computed rowwise.", + i = "Output is grouped by {cli::col_blue(groups)}.", + i = "Use {.code summarise(.groups = \"keep\")} to silence this message." )) } diff --git a/R/utils.R b/R/utils.R index f9ae53c1a1..9593767dac 100644 --- a/R/utils.R +++ b/R/utils.R @@ -134,6 +134,24 @@ cli_collapse <- function(x, last = " and ", sep2 = " and ") { cli::cli_vec(x, style = list("vec-last" = last, "vec-sep2" = sep2)) } +# A version of `cli::format_inline()` that formats each string individually. +# Useful for constructing error bullets ahead of time. +cli_format_each_inline <- function( + ..., + .envir = parent.frame(), + collapse = TRUE, + keep_whitespace = TRUE +) { + map_chr(c(...), function(x) { + cli::format_inline( + x, + .envir = .envir, + collapse = collapse, + keep_whitespace = keep_whitespace + ) + }) +} + with_no_rlang_infix_labeling <- function(expr) { # TODO: Temporary patch for a slowdown seen with `rlang::as_label()` and infix # operators. A real solution likely involves lazy ALTREP vectors (#6681). diff --git a/tests/testthat/_snaps/summarise.md b/tests/testthat/_snaps/summarise.md index 8989d4b279..fd96b1b4bd 100644 --- a/tests/testthat/_snaps/summarise.md +++ b/tests/testthat/_snaps/summarise.md @@ -79,35 +79,79 @@ Error in `summarise()`: ! Can't transform a data frame with missing names. -# summarise() gives meaningful errors +# summarise() messages about implicit `.groups` default Code - summarise(group_by(tibble(x = 1, y = 2), x, y)) + summarise(group_by(df, x)) + Output + # A tibble: 1 x 1 + x + + 1 1 + +--- + + Code + summarise(rowwise(df)) + Output + # A tibble: 1 x 0 + +--- + + Code + summarise(group_by(df, x, y)) Message - `summarise()` has grouped output by 'x'. You can override using the `.groups` argument. + `summarise()` has regrouped the output by dropping the last grouping column. + i Summaries were computed grouped by x and y. + i Output is grouped by x. + i Use `summarise(.groups = "drop_last")` to silence this message. Output # A tibble: 1 x 2 # Groups: x [1] x y 1 1 2 + +--- + Code - summarise(rowwise(tibble(x = 1, y = 2), x, y)) + summarise(rowwise(df, x, y)) Message - `summarise()` has grouped output by 'x', 'y'. You can override using the `.groups` argument. + `summarise()` has converted the output from a rowwise data frame to a grouped data frame. + i Summaries were computed rowwise. + i Output is grouped by x and y. + i Use `summarise(.groups = "keep")` to silence this message. Output # A tibble: 1 x 2 # Groups: x, y [1] x y 1 1 2 + +# summarise() respects `dplyr.summarise.inform = FALSE` + Code - summarise(rowwise(tibble(x = 1, y = 2))) + eval_global(summarise(group_by(tibble(x = 1, y = 2), x, y))) Output - # A tibble: 1 x 0 + # A tibble: 1 x 2 + # Groups: x [1] + x y + + 1 1 2 --- + Code + eval_global(summarise(rowwise(tibble(x = 1, y = 2), x, y))) + Output + # A tibble: 1 x 2 + # Groups: x, y [1] + x y + + 1 1 2 + +# summarise() gives meaningful errors + Code (expect_error(summarise(tibble(x = 1, y = c(1, 2, 2), z = runif(3)), a = rlang::env( a = 1)))) diff --git a/tests/testthat/test-summarise.R b/tests/testthat/test-summarise.R index 6b996bbe35..9a376fe85a 100644 --- a/tests/testthat/test-summarise.R +++ b/tests/testthat/test-summarise.R @@ -429,19 +429,50 @@ test_that("`summarise()` doesn't allow data frames with missing or empty names ( }) }) -test_that("summarise() gives meaningful errors", { - eval( - envir = global_env(), - expr({ - expect_snapshot({ - # Messages about .groups= - tibble(x = 1, y = 2) |> group_by(x, y) |> summarise() - tibble(x = 1, y = 2) |> rowwise(x, y) |> summarise() - tibble(x = 1, y = 2) |> rowwise() |> summarise() - }) - }) - ) +test_that("summarise() messages about implicit `.groups` default", { + # Otherwise it only informs when called from the global env + local_options(dplyr.summarise.inform = TRUE) + + df <- tibble(x = 1, y = 2) + + # Nothing + expect_snapshot({ + df |> group_by(x) |> summarise() + }) + expect_snapshot({ + df |> rowwise() |> summarise() + }) + # Implicit `"drop_last"` + expect_snapshot({ + df |> group_by(x, y) |> summarise() + }) + + # Implicit `"keep"` + expect_snapshot({ + df |> rowwise(x, y) |> summarise() + }) +}) + +test_that("summarise() respects `dplyr.summarise.inform = FALSE`", { + local_options(dplyr.summarise.inform = FALSE) + + # Force evaluation in the global env so we can be very sure we are + # silencing the message. It only ever triggers in the global env. + eval_global <- function(expr) eval(expr, envir = globalenv()) + + # Implicit `"drop_last"` + expect_snapshot({ + eval_global(tibble(x = 1, y = 2) |> group_by(x, y) |> summarise()) + }) + + # Implicit `"keep"` + expect_snapshot({ + eval_global(tibble(x = 1, y = 2) |> rowwise(x, y) |> summarise()) + }) +}) + +test_that("summarise() gives meaningful errors", { eval( envir = global_env(), expr({ From 4a92787aec90e4eaf9093f53d6ffef488dedd13e Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Thu, 20 Nov 2025 10:26:39 -0500 Subject: [PATCH 2/5] Invert ordering --- R/summarise.R | 2 +- tests/testthat/_snaps/summarise.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/summarise.R b/R/summarise.R index 7fba335b36..1f164dc674 100644 --- a/R/summarise.R +++ b/R/summarise.R @@ -469,7 +469,7 @@ summarise_verbose <- function(.groups, .env) { inform_implicit_drop_last_for_grouped_df <- function(old, new) { inform(cli_format_each_inline( - "{.fn summarise} has regrouped the output by dropping the last grouping column.", + "{.fn summarise} has dropped the last grouping column while regrouping the output.", i = "Summaries were computed grouped by {cli::col_blue(old)}.", i = "Output is grouped by {cli::col_blue(new)}.", i = "Use {.code summarise(.groups = \"drop_last\")} to silence this message." diff --git a/tests/testthat/_snaps/summarise.md b/tests/testthat/_snaps/summarise.md index fd96b1b4bd..120a3ca75c 100644 --- a/tests/testthat/_snaps/summarise.md +++ b/tests/testthat/_snaps/summarise.md @@ -101,7 +101,7 @@ Code summarise(group_by(df, x, y)) Message - `summarise()` has regrouped the output by dropping the last grouping column. + `summarise()` has dropped the last grouping column while regrouping the output. i Summaries were computed grouped by x and y. i Output is grouped by x. i Use `summarise(.groups = "drop_last")` to silence this message. From 79cb028c6739b6284db06bf1061e60c9bfff28c5 Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Thu, 20 Nov 2025 10:37:55 -0500 Subject: [PATCH 3/5] One more revision --- R/summarise.R | 2 +- tests/testthat/_snaps/summarise.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/summarise.R b/R/summarise.R index 1f164dc674..775a5b48d5 100644 --- a/R/summarise.R +++ b/R/summarise.R @@ -469,7 +469,7 @@ summarise_verbose <- function(.groups, .env) { inform_implicit_drop_last_for_grouped_df <- function(old, new) { inform(cli_format_each_inline( - "{.fn summarise} has dropped the last grouping column while regrouping the output.", + "{.fn summarise} has regrouped the output.", i = "Summaries were computed grouped by {cli::col_blue(old)}.", i = "Output is grouped by {cli::col_blue(new)}.", i = "Use {.code summarise(.groups = \"drop_last\")} to silence this message." diff --git a/tests/testthat/_snaps/summarise.md b/tests/testthat/_snaps/summarise.md index 120a3ca75c..446f49bd3b 100644 --- a/tests/testthat/_snaps/summarise.md +++ b/tests/testthat/_snaps/summarise.md @@ -101,7 +101,7 @@ Code summarise(group_by(df, x, y)) Message - `summarise()` has dropped the last grouping column while regrouping the output. + `summarise()` has regrouped the output. i Summaries were computed grouped by x and y. i Output is grouped by x. i Use `summarise(.groups = "drop_last")` to silence this message. From f566d95b3c0837d682704980bac2e13b68b661f1 Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Thu, 20 Nov 2025 10:46:27 -0500 Subject: [PATCH 4/5] Nudge towards `.by` --- R/summarise.R | 7 ++++++- tests/testthat/_snaps/summarise.md | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/R/summarise.R b/R/summarise.R index 775a5b48d5..88a8400eec 100644 --- a/R/summarise.R +++ b/R/summarise.R @@ -468,11 +468,16 @@ summarise_verbose <- function(.groups, .env) { } inform_implicit_drop_last_for_grouped_df <- function(old, new) { + # Only going to show this message if `length(old) > 1`, so don't need to + # worry about the length 0 or length 1 cases. + by <- paste0("c(", paste0(old, collapse = ", "), ")") + inform(cli_format_each_inline( "{.fn summarise} has regrouped the output.", i = "Summaries were computed grouped by {cli::col_blue(old)}.", i = "Output is grouped by {cli::col_blue(new)}.", - i = "Use {.code summarise(.groups = \"drop_last\")} to silence this message." + i = "Use {.code summarise(.groups = \"drop_last\")} to silence this message.", + i = "Use {.code summarise(.by = {by})} for per-operation grouping instead." )) } diff --git a/tests/testthat/_snaps/summarise.md b/tests/testthat/_snaps/summarise.md index 446f49bd3b..86da6292a8 100644 --- a/tests/testthat/_snaps/summarise.md +++ b/tests/testthat/_snaps/summarise.md @@ -105,6 +105,7 @@ i Summaries were computed grouped by x and y. i Output is grouped by x. i Use `summarise(.groups = "drop_last")` to silence this message. + i Use `summarise(.by = c(x, y))` for per-operation grouping instead. Output # A tibble: 1 x 2 # Groups: x [1] From 82ac05d751d582e91c2368b0628a85a0660b0f23 Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Thu, 20 Nov 2025 10:50:38 -0500 Subject: [PATCH 5/5] Advertise the topic page! --- R/summarise.R | 2 +- tests/testthat/_snaps/summarise.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/summarise.R b/R/summarise.R index 88a8400eec..9b50f6bb59 100644 --- a/R/summarise.R +++ b/R/summarise.R @@ -477,7 +477,7 @@ inform_implicit_drop_last_for_grouped_df <- function(old, new) { i = "Summaries were computed grouped by {cli::col_blue(old)}.", i = "Output is grouped by {cli::col_blue(new)}.", i = "Use {.code summarise(.groups = \"drop_last\")} to silence this message.", - i = "Use {.code summarise(.by = {by})} for per-operation grouping instead." + i = "Use {.code summarise(.by = {by})} for {.topic [per-operation grouping](dplyr::dplyr_by)} instead." )) } diff --git a/tests/testthat/_snaps/summarise.md b/tests/testthat/_snaps/summarise.md index 86da6292a8..0024a5931b 100644 --- a/tests/testthat/_snaps/summarise.md +++ b/tests/testthat/_snaps/summarise.md @@ -105,7 +105,7 @@ i Summaries were computed grouped by x and y. i Output is grouped by x. i Use `summarise(.groups = "drop_last")` to silence this message. - i Use `summarise(.by = c(x, y))` for per-operation grouping instead. + i Use `summarise(.by = c(x, y))` for per-operation grouping (`?dplyr::dplyr_by`) instead. Output # A tibble: 1 x 2 # Groups: x [1]