Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Client-side warning on join explosion #325

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions R/dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,11 @@ rows_check_y_unmatched <- dplyr$rows_check_y_unmatched
rows_df_in_place <- dplyr$rows_df_in_place
rowwise_df <- dplyr$rowwise_df
slice_rows <- dplyr$slice_rows
stop_join <- dplyr$stop_join
summarise_build <- dplyr$summarise_build
summarise_cols <- dplyr$summarise_cols
summarise_deprecate_variable_size <- dplyr$summarise_deprecate_variable_size
the <- dplyr$the
tick_if_needed <- dplyr$tick_if_needed
warn_join <- dplyr$warn_join
warn_join_cross_by <- dplyr$warn_join_cross_by
2 changes: 1 addition & 1 deletion R/full_join.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ full_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x"
"No implicit cross joins for full_join()" = is_cross_by(by),
"`multiple` not supported" = !identical(multiple, "all"),
{
out <- rel_join_impl(x, y, by, "full", na_matches, suffix, keep, error_call)
out <- rel_join_impl(x, y, by, "full", na_matches, suffix, keep, relationship, error_call)
return(out)
}
)
Expand Down
2 changes: 1 addition & 1 deletion R/inner_join.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ inner_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x
"`multiple` not supported" = !identical(multiple, "all"),
"`unmatched` not supported" = !identical(unmatched, "drop"),
{
out <- rel_join_impl(x, y, by, "inner", na_matches, suffix, keep, error_call)
out <- rel_join_impl(x, y, by, "inner", na_matches, suffix, keep, relationship, error_call)
return(out)
}
)
Expand Down
64 changes: 64 additions & 0 deletions R/join.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ rel_join_impl <- function(
na_matches,
suffix = c(".x", ".y"),
keep = NULL,
relationship = NULL,
error_call = caller_env()
) {
mutating <- !(join %in% c("semi", "anti"))
Expand All @@ -25,6 +26,10 @@ rel_join_impl <- function(
by <- as_join_by(by, error_call = error_call)
}

if (mutating) {
check_relationship(relationship, x, y, by, error_call = error_call)
}

x_by <- by$x
y_by <- by$y
x_rel <- duckdb_rel_from_df(x)
Expand Down Expand Up @@ -137,3 +142,62 @@ rel_join_impl <- function(

return(out)
}

check_relationship <- function(relationship, x, y, by, error_call) {
if (is_null(relationship)) {
# FIXME: Determine behavior based on option
if (!is_key(x, by$x) && !is_key(y, by$y)) {
warn_join(
message = c(
"Detected an unexpected many-to-many relationship between `x` and `y`.",
i = paste0(
"If a many-to-many relationship is expected, ",
"set `relationship = \"many-to-many\"` to silence this warning."
)
),
class = "dplyr_warning_join_relationship_many_to_many",
call = error_call
)
}
return()
}

if (relationship %in% c("one-to-many", "one-to-one")) {
if (!is_key(x, by$x)) {
stop_join(
message = c(
glue("Each row in `{x_name}` must match at most 1 row in `{y_name}`."),
),
class = paste0("dplyr_error_join_relationship_", gsub("-", "_", relationship)),
call = error_call
)
}
}

if (relationship %in% c("many-to-one", "one-to-one")) {
if (!is_key(y, by$y)) {
stop_join(
message = c(
glue("Each row in `{y_name}` must match at most 1 row in `{x_name}`."),
),
class = paste0("dplyr_error_join_relationship_", gsub("-", "_", relationship)),
call = error_call
)
}
}
}

is_key <- function(x, cols) {
local_options(duckdb.materialize_message = FALSE)

rows <-
x %>%
# FIXME: Why does this materialize
# as_duckplyr_tibble() %>%
summarize(.by = c(!!!syms(cols)), `___n` = n()) %>%
filter(`___n` > 1L) %>%
head(1L) %>%
nrow()

rows == 0
}
2 changes: 1 addition & 1 deletion R/left_join.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ left_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x"
"`multiple` not supported" = !identical(multiple, "all"),
"`unmatched` not supported" = !identical(unmatched, "drop"),
{
out <- rel_join_impl(x, y, by, "left", na_matches, suffix, keep, error_call)
out <- rel_join_impl(x, y, by, "left", na_matches, suffix, keep, relationship, error_call)
return(out)
}
)
Expand Down
2 changes: 1 addition & 1 deletion R/right_join.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ right_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x
"`multiple` not supported" = !identical(multiple, "all"),
"`unmatched` not supported" = !identical(unmatched, "drop"),
{
out <- rel_join_impl(x, y, by, "right", na_matches, suffix, keep, error_call)
out <- rel_join_impl(x, y, by, "right", na_matches, suffix, keep, relationship, error_call)
return(out)
}
)
Expand Down
6 changes: 2 additions & 4 deletions tests/testthat/_snaps/join-rows.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,10 @@
---

Code
left_join(df, df, by = join_by(x))
duckplyr_left_join(df, df, by = join_by(x))
Condition
Warning in `left_join()`:
Warning in `duckplyr_left_join()`:
Detected an unexpected many-to-many relationship between `x` and `y`.
i Row 1 of `x` matches multiple rows in `y`.
i Row 1 of `y` matches multiple rows in `x`.
i If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.
Output
x
Expand Down
9 changes: 9 additions & 0 deletions tests/testthat/_snaps/join.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@
Error:
! `na_matches` must be one of "na" or "never", not "foo".

# mutating joins trigger many-to-many warning

Code
out <- duckplyr_left_join(df, df, join_by(x))
Condition
Warning in `duckplyr_left_join()`:
Detected an unexpected many-to-many relationship between `x` and `y`.
i If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.

# mutating joins compute common columns

Code
Expand Down
1 change: 0 additions & 1 deletion tests/testthat/test-join-rows.R
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,6 @@ test_that("join_rows() gives meaningful many-to-one errors", {
})

test_that("join_rows() gives meaningful many-to-many warnings", {
skip("TODO duckdb")
expect_snapshot({
join_rows(c(1, 1), c(1, 1))
})
Expand Down
1 change: 0 additions & 1 deletion tests/testthat/test-join.R
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,6 @@ test_that("join_filter() validates arguments", {
})

test_that("mutating joins trigger many-to-many warning", {
skip("TODO duckdb")
df <- tibble(x = c(1, 1))
expect_snapshot(out <- duckplyr_left_join(df, df, join_by(x)))
})
Expand Down
6 changes: 1 addition & 5 deletions tools/00-funs.R
Original file line number Diff line number Diff line change
Expand Up @@ -235,14 +235,10 @@ duckplyr_tests <- head(n = -1, list(
NULL
),
"test-join-rows.R" = c(
"join_rows() gives meaningful many-to-many warnings",
NULL
),
"test-join.R" = c(
"mutating joins trigger multiple match warning",
"mutating joins don't trigger multiple match warning when called indirectly",

"mutating joins trigger many-to-many warning",
# FIXME: How to detect an indirect call?
"mutating joins don't trigger many-to-many warning when called indirectly",
NULL
),
Expand Down
Loading