Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# dplyr (development version)

* The `.groups` message emitted by `summarise()` is hopefully more clear now (#6986).

* `if_any()` and `if_all()` are now more consistent in all use cases (#7059, #7077, #7746, @jrwinget). In particular:

* When called with zero inputs, `if_any()` returns `FALSE` and `if_all()` returns `TRUE`.
Expand Down
57 changes: 32 additions & 25 deletions R/summarise.R
Original file line number Diff line number Diff line change
Expand Up @@ -149,27 +149,20 @@ summarise.grouped_df <- function(.data, ..., .by = NULL, .groups = NULL) {
.groups <- "drop_last"
}

group_vars <- by$names
old_groups <- by$names
if (identical(.groups, "drop_last")) {
n <- length(group_vars)
n <- length(old_groups)
if (n > 1) {
new_groups <- old_groups[-n]
if (verbose) {
new_groups <- glue_collapse(
paste0("'", group_vars[-n], "'"),
sep = ", "
)
summarise_inform("has grouped output by {new_groups}")
inform_implicit_drop_last_for_grouped_df(old_groups, new_groups)
}
out <- grouped_df(out, group_vars[-n], group_by_drop_default(.data))
out <- grouped_df(out, new_groups, group_by_drop_default(.data))
}
} else if (identical(.groups, "keep")) {
if (verbose) {
new_groups <- glue_collapse(paste0("'", group_vars, "'"), sep = ", ")
summarise_inform("has grouped output by {new_groups}")
}
out <- grouped_df(out, group_vars, group_by_drop_default(.data))
out <- grouped_df(out, old_groups, group_by_drop_default(.data))
} else if (identical(.groups, "rowwise")) {
out <- rowwise_df(out, group_vars)
out <- rowwise_df(out, old_groups)
} else if (!identical(.groups, "drop")) {
bullets <- c(
paste0("`.groups` can't be ", as_label(.groups)),
Expand All @@ -194,15 +187,14 @@ summarise.rowwise_df <- function(.data, ..., .by = NULL, .groups = NULL) {
.groups <- "keep"
}

group_vars <- by$names
old_groups <- by$names
if (identical(.groups, "keep")) {
if (verbose && length(group_vars)) {
new_groups <- glue_collapse(paste0("'", group_vars, "'"), sep = ", ")
summarise_inform("has grouped output by {new_groups}")
if (verbose && length(old_groups) > 0L) {
inform_implicit_keep_for_rowwise_df(old_groups)
}
out <- grouped_df(out, group_vars)
out <- grouped_df(out, old_groups)
} else if (identical(.groups, "rowwise")) {
out <- rowwise_df(out, group_vars)
out <- rowwise_df(out, old_groups)
} else if (!identical(.groups, "drop")) {
bullets <- c(
paste0("`.groups` can't be ", as_label(.groups)),
Expand Down Expand Up @@ -475,10 +467,25 @@ summarise_verbose <- function(.groups, .env) {
is_reference(topenv(.env), global_env())
}

summarise_inform <- function(..., .env = parent.frame()) {
inform(paste0(
"`summarise()` ",
glue(..., .envir = .env),
'. You can override using the `.groups` argument.'
inform_implicit_drop_last_for_grouped_df <- function(old, new) {
# Only going to show this message if `length(old) > 1`, so don't need to
# worry about the length 0 or length 1 cases.
by <- paste0("c(", paste0(old, collapse = ", "), ")")

inform(cli_format_each_inline(
"{.fn summarise} has regrouped the output.",
i = "Summaries were computed grouped by {cli::col_blue(old)}.",
i = "Output is grouped by {cli::col_blue(new)}.",
i = "Use {.code summarise(.groups = \"drop_last\")} to silence this message.",
i = "Use {.code summarise(.by = {by})} for {.topic [per-operation grouping](dplyr::dplyr_by)} instead."
))
}

inform_implicit_keep_for_rowwise_df <- function(groups) {
inform(cli_format_each_inline(
"{.fn summarise} has converted the output from a rowwise data frame to a grouped data frame.",
i = "Summaries were computed rowwise.",
i = "Output is grouped by {cli::col_blue(groups)}.",
i = "Use {.code summarise(.groups = \"keep\")} to silence this message."
))
}
18 changes: 18 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,24 @@ cli_collapse <- function(x, last = " and ", sep2 = " and ") {
cli::cli_vec(x, style = list("vec-last" = last, "vec-sep2" = sep2))
}

# A version of `cli::format_inline()` that formats each string individually.
# Useful for constructing error bullets ahead of time.
cli_format_each_inline <- function(
...,
.envir = parent.frame(),
collapse = TRUE,
keep_whitespace = TRUE
) {
map_chr(c(...), function(x) {
cli::format_inline(
x,
.envir = .envir,
collapse = collapse,
keep_whitespace = keep_whitespace
)
})
}

with_no_rlang_infix_labeling <- function(expr) {
# TODO: Temporary patch for a slowdown seen with `rlang::as_label()` and infix
# operators. A real solution likely involves lazy ALTREP vectors (#6681).
Expand Down
59 changes: 52 additions & 7 deletions tests/testthat/_snaps/summarise.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,35 +79,80 @@
Error in `summarise()`:
! Can't transform a data frame with missing names.

# summarise() gives meaningful errors
# summarise() messages about implicit `.groups` default

Code
summarise(group_by(tibble(x = 1, y = 2), x, y))
summarise(group_by(df, x))
Output
# A tibble: 1 x 1
x
<dbl>
1 1

---

Code
summarise(rowwise(df))
Output
# A tibble: 1 x 0

---

Code
summarise(group_by(df, x, y))
Message
`summarise()` has grouped output by 'x'. You can override using the `.groups` argument.
`summarise()` has regrouped the output.
i Summaries were computed grouped by x and y.
i Output is grouped by x.
i Use `summarise(.groups = "drop_last")` to silence this message.
i Use `summarise(.by = c(x, y))` for per-operation grouping (`?dplyr::dplyr_by`) instead.
Output
# A tibble: 1 x 2
# Groups: x [1]
x y
<dbl> <dbl>
1 1 2

---

Code
summarise(rowwise(tibble(x = 1, y = 2), x, y))
summarise(rowwise(df, x, y))
Message
`summarise()` has grouped output by 'x', 'y'. You can override using the `.groups` argument.
`summarise()` has converted the output from a rowwise data frame to a grouped data frame.
i Summaries were computed rowwise.
i Output is grouped by x and y.
i Use `summarise(.groups = "keep")` to silence this message.
Output
# A tibble: 1 x 2
# Groups: x, y [1]
x y
<dbl> <dbl>
1 1 2

# summarise() respects `dplyr.summarise.inform = FALSE`

Code
summarise(rowwise(tibble(x = 1, y = 2)))
eval_global(summarise(group_by(tibble(x = 1, y = 2), x, y)))
Output
# A tibble: 1 x 0
# A tibble: 1 x 2
# Groups: x [1]
x y
<dbl> <dbl>
1 1 2

---

Code
eval_global(summarise(rowwise(tibble(x = 1, y = 2), x, y)))
Output
# A tibble: 1 x 2
# Groups: x, y [1]
x y
<dbl> <dbl>
1 1 2

# summarise() gives meaningful errors

Code
(expect_error(summarise(tibble(x = 1, y = c(1, 2, 2), z = runif(3)), a = rlang::env(
a = 1))))
Expand Down
55 changes: 43 additions & 12 deletions tests/testthat/test-summarise.R
Original file line number Diff line number Diff line change
Expand Up @@ -429,19 +429,50 @@ test_that("`summarise()` doesn't allow data frames with missing or empty names (
})
})

test_that("summarise() gives meaningful errors", {
eval(
envir = global_env(),
expr({
expect_snapshot({
# Messages about .groups=
tibble(x = 1, y = 2) |> group_by(x, y) |> summarise()
tibble(x = 1, y = 2) |> rowwise(x, y) |> summarise()
tibble(x = 1, y = 2) |> rowwise() |> summarise()
})
})
)
test_that("summarise() messages about implicit `.groups` default", {
# Otherwise it only informs when called from the global env
local_options(dplyr.summarise.inform = TRUE)

df <- tibble(x = 1, y = 2)

# Nothing
expect_snapshot({
df |> group_by(x) |> summarise()
})
expect_snapshot({
df |> rowwise() |> summarise()
})

# Implicit `"drop_last"`
expect_snapshot({
df |> group_by(x, y) |> summarise()
})

# Implicit `"keep"`
expect_snapshot({
df |> rowwise(x, y) |> summarise()
})
})

test_that("summarise() respects `dplyr.summarise.inform = FALSE`", {
local_options(dplyr.summarise.inform = FALSE)

# Force evaluation in the global env so we can be very sure we are
# silencing the message. It only ever triggers in the global env.
eval_global <- function(expr) eval(expr, envir = globalenv())

# Implicit `"drop_last"`
expect_snapshot({
eval_global(tibble(x = 1, y = 2) |> group_by(x, y) |> summarise())
})

# Implicit `"keep"`
expect_snapshot({
eval_global(tibble(x = 1, y = 2) |> rowwise(x, y) |> summarise())
})
})

test_that("summarise() gives meaningful errors", {
eval(
envir = global_env(),
expr({
Expand Down