Skip to content

Commit b3f2115

Browse files
authored
Refresh coalesce() on top of vctrs::vec_case_when() (#7725)
1 parent fbe4a4b commit b3f2115

File tree

4 files changed

+284
-59
lines changed

4 files changed

+284
-59
lines changed

NEWS.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
# dplyr (development version)
22

3-
* `if_else()` no longer allows `condition` to be a logical array. It must be a logical vector with no `dim` attribute (#7723).
3+
* The following vector functions have gotten significantly faster and use much less memory due to a rewrite in C via vctrs (#7723):
4+
5+
* `if_else()`
6+
* `coalesce()`
47

5-
* `if_else()` has gotten significantly faster and uses much less memory due to a rewrite in C via `vctrs::vec_if_else()` (#7723).
8+
* `if_else()` no longer allows `condition` to be a logical array. It must be a logical vector with no `dim` attribute (#7723).
69

710
* Passing `size` to `if_else()` is now deprecated. The output size is always taken from the `condition` (#7722).
811

R/coalesce.R

Lines changed: 111 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -45,36 +45,125 @@
4545
coalesce <- function(..., .ptype = NULL, .size = NULL) {
4646
args <- list2(...)
4747

48-
if (vec_any_missing(args)) {
49-
# Drop `NULL`s
50-
not_missing <- !vec_detect_missing(args)
51-
args <- vec_slice(args, not_missing)
52-
}
53-
5448
if (length(args) == 0L) {
5549
abort("`...` can't be empty.")
5650
}
51+
if (vec_all_missing(args)) {
52+
abort("`...` must contain at least 1 non-`NULL` value.")
53+
}
54+
55+
# We do vector, type, and size checks up front before dropping any `NULL`
56+
# values or extracting out a `default` to ensure that any errors report
57+
# the correct index
58+
list_check_all_vectors(args, allow_null = TRUE, arg = "")
59+
60+
.ptype <- vec_ptype_common(!!!args, .ptype = .ptype)
61+
args <- vec_cast_common(!!!args, .to = .ptype)
62+
63+
if (is_null(.size)) {
64+
.size <- vec_size_common(!!!args)
65+
} else {
66+
# Check recyclability, but delay actual recycling
67+
list_check_all_recyclable(args, .size, allow_null = TRUE, arg = "")
68+
}
5769

58-
# Recycle early so logical conditions computed below will be the same length,
59-
# as required by `vec_case_when()`
60-
args <- vec_recycle_common(!!!args, .size = .size)
70+
# From this point on we don't expect any errors
6171

62-
# Name early to get correct indexing in `vec_case_when()` error messages
63-
names <- names2(args)
64-
names <- names_as_error_names(names)
65-
args <- set_names(args, names)
72+
args <- convert_from_coalesce_to_case_when(args, .size)
73+
values <- args$values
74+
default <- args$default
6675

67-
conditions <- map(args, function(arg) {
68-
!vec_detect_missing(arg)
76+
cases <- map(values, function(value) {
77+
!vec_detect_missing(value)
6978
})
7079

71-
vec_case_when(
72-
conditions = conditions,
73-
values = args,
74-
conditions_arg = "",
75-
values_arg = "",
80+
vctrs::vec_case_when(
81+
cases = cases,
82+
values = values,
83+
default = default,
7684
ptype = .ptype,
77-
size = .size,
78-
call = current_env()
85+
size = .size
7986
)
8087
}
88+
89+
# Goal is to convert from `...` of `coalesce()` to `values` and `default`
90+
# of `vec_case_when()`
91+
#
92+
# Recognize that these are equivalent:
93+
#
94+
# ```
95+
# coalesce(x, y)
96+
# case_when(!vec_detect_missing(x) ~ x, !vec_detect_missing(y) ~ y)
97+
#
98+
# coalesce(x, y_with_no_missings)
99+
# case_when(!vec_detect_missing(x) ~ x, .default = y_with_no_missings)
100+
#
101+
# coalesce(x, NULL, y, 0)
102+
# case_when(!vec_detect_missing(x) ~ x, !vec_detect_missing(y) ~ y, .default = 0)
103+
# ```
104+
#
105+
# Note how the last element can be used as `default` if it doesn't contain any
106+
# missing values. This is a very nice optimization since `vec_case_when()`
107+
# doesn't need to recycle that value, and efficiently computes its output
108+
# locations!
109+
#
110+
# Note how `NULL`s are dropped during the conversion.
111+
convert_from_coalesce_to_case_when <- function(args, size) {
112+
if (vec_any_missing(args)) {
113+
# Drop `NULL`
114+
args <- vec_slice(args, vec_detect_complete(args))
115+
}
116+
117+
args_size <- length(args)
118+
119+
if (args_size == 0L) {
120+
abort("Checked for at least 1 non-`NULL` value earlier", .internal = TRUE)
121+
}
122+
123+
# Try to promote the `last` element of `args` to `default`
124+
#
125+
# For the 99% case of `coalesce(x, 0)`, this:
126+
# - Avoids recycling `0` to size `size`.
127+
# - Avoids computing `!vec_detect_missing()` on that recycled `0`.
128+
#
129+
# Can only do this if the `last` element doesn't contain missing values
130+
# due to how names are handled. We don't want to take the name from any `NA`
131+
# element, which is what would happen if we promoted the whole `y` vector here
132+
# to `default`.
133+
#
134+
# ```
135+
# x <- c(a = NA, b = 2)
136+
# y <- c(c = NA, d = 4)
137+
#
138+
# coalesce(x, y)
139+
# # Want c(NA, b = 2)
140+
# # Not c(c = NA, b = 2)
141+
#
142+
# # Compare to
143+
# case_when(!vec_detect_missing(x) ~ x, !vec_detect_missing(y) ~ y)
144+
# case_when(!vec_detect_missing(x) ~ x, .default = y)
145+
# ```
146+
last <- args[[args_size]]
147+
148+
if (vec_any_missing(last)) {
149+
default <- NULL
150+
} else {
151+
default <- last
152+
args <- args[-args_size]
153+
}
154+
155+
# Most of the time this recycle is a no-op. Two cases where it isn't:
156+
# - `coalesce(x, 0, 1)`, where `1` becomes `default` but we still have a
157+
# scalar `0`.
158+
# - `coalesce(x, NA)`, where `NA` can't be promoted, so we have a scalar `NA`.
159+
args <- vec_recycle_common(!!!args, .size = size)
160+
161+
list(values = args, default = default)
162+
}
163+
164+
vec_all_missing <- function(x) {
165+
if (!vec_any_missing(x)) {
166+
return(FALSE)
167+
}
168+
sum(vec_detect_missing(x)) == vec_size(x)
169+
}

tests/testthat/_snaps/coalesce.md

Lines changed: 58 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
# coalesce() gives meaningful error messages
22

33
Code
4-
(expect_error(coalesce(1:2, 1:3)))
5-
Output
6-
<error/vctrs_error_incompatible_size>
4+
coalesce(1:2, 1:3)
5+
Condition
76
Error in `coalesce()`:
87
! Can't recycle `..1` (size 2) to match `..2` (size 3).
8+
9+
---
10+
911
Code
10-
(expect_error(coalesce(1:2, letters[1:2])))
11-
Output
12-
<error/vctrs_error_ptype2>
12+
coalesce(1:2, letters[1:2])
13+
Condition
1314
Error in `coalesce()`:
1415
! Can't combine `..1` <integer> and `..2` <character>.
1516

@@ -21,21 +22,21 @@
2122
Error in `coalesce()`:
2223
! Can't recycle `..2` (size 2) to size 1.
2324

24-
# must have at least one non-`NULL` vector
25+
# can't be empty
2526

2627
Code
2728
coalesce()
2829
Condition
2930
Error in `coalesce()`:
3031
! `...` can't be empty.
3132

32-
---
33+
# must have at least one non-`NULL` vector
3334

3435
Code
3536
coalesce(NULL, NULL)
3637
Condition
3738
Error in `coalesce()`:
38-
! `...` can't be empty.
39+
! `...` must contain at least 1 non-`NULL` value.
3940

4041
# inputs must be vectors
4142

@@ -61,3 +62,51 @@
6162
Error in `coalesce()`:
6263
! Can't combine `..1` <double> and `y` <character>.
6364

65+
---
66+
67+
Code
68+
coalesce(1:2, 1:3)
69+
Condition
70+
Error in `coalesce()`:
71+
! Can't recycle `..1` (size 2) to match `..2` (size 3).
72+
73+
---
74+
75+
Code
76+
coalesce(1:2, y = 1:3)
77+
Condition
78+
Error in `coalesce()`:
79+
! Can't recycle `..1` (size 2) to match `y` (size 3).
80+
81+
---
82+
83+
Code
84+
coalesce(1, NULL, "x")
85+
Condition
86+
Error in `coalesce()`:
87+
! Can't combine `..1` <double> and `..3` <character>.
88+
89+
---
90+
91+
Code
92+
coalesce(1, NULL, y = "x")
93+
Condition
94+
Error in `coalesce()`:
95+
! Can't combine `..1` <double> and `y` <character>.
96+
97+
---
98+
99+
Code
100+
coalesce(1:2, NULL, 1:3)
101+
Condition
102+
Error in `coalesce()`:
103+
! Can't recycle `..1` (size 2) to match `..3` (size 3).
104+
105+
---
106+
107+
Code
108+
coalesce(1:2, NULL, y = 1:3)
109+
Condition
110+
Error in `coalesce()`:
111+
! Can't recycle `..1` (size 2) to match `y` (size 3).
112+

0 commit comments

Comments
 (0)