|
1 | 1 | #' A general vectorised `switch()` |
2 | 2 | #' |
3 | 3 | #' @description |
| 4 | +#' `r lifecycle::badge("superseded")` |
| 5 | +#' |
| 6 | +#' `case_match()` is superseded by [recode_values()] and [replace_values()], |
| 7 | +#' which are more powerful, have more intuitive names, and have better safety. |
| 8 | +#' In addition to the familiar two-sided formula interface, these functions also |
| 9 | +#' have `from` and `to` arguments which allow you to incorporate a lookup table |
| 10 | +#' into the recoding process. |
| 11 | +#' |
4 | 12 | #' This function allows you to vectorise multiple [switch()] statements. Each |
5 | 13 | #' case is evaluated sequentially and the first match for each element |
6 | 14 | #' determines the corresponding value in the output vector. If no cases match, |
7 | 15 | #' the `.default` is used. |
8 | 16 | #' |
9 | | -#' `case_match()` is an R equivalent of the SQL "simple" `CASE WHEN` statement. |
10 | | -#' |
11 | | -#' ## Connection to `case_when()` |
12 | | -#' |
13 | | -#' While [case_when()] uses logical expressions on the left-hand side of the |
14 | | -#' formula, `case_match()` uses values to match against `.x` with. The following |
15 | | -#' two statements are roughly equivalent: |
16 | | -#' |
17 | | -#' ``` |
18 | | -#' case_when( |
19 | | -#' x %in% c("a", "b") ~ 1, |
20 | | -#' x %in% "c" ~ 2, |
21 | | -#' x %in% c("d", "e") ~ 3 |
22 | | -#' ) |
23 | | -#' |
24 | | -#' case_match( |
25 | | -#' x, |
26 | | -#' c("a", "b") ~ 1, |
27 | | -#' "c" ~ 2, |
28 | | -#' c("d", "e") ~ 3 |
29 | | -#' ) |
30 | | -#' ``` |
31 | | -#' |
32 | 17 | #' @param .x A vector to match against. |
33 | 18 | #' |
34 | 19 | #' @param ... <[`dynamic-dots`][rlang::dyn-dots]> A sequence of two-sided |
|
58 | 43 | #' A vector with the same size as `.x` and the same type as the common type of |
59 | 44 | #' the RHS inputs and `.default` (if not overridden by `.ptype`). |
60 | 45 | #' |
61 | | -#' @seealso [case_when()] |
62 | | -#' |
63 | 46 | #' @export |
64 | 47 | #' @examples |
| 48 | +#' # `case_match()` has been superseded by `recode_values()` and |
| 49 | +#' # `replace_values()` |
| 50 | +#' |
65 | 51 | #' x <- c("a", "b", "a", "d", "b", NA, "c", "e") |
66 | 52 | #' |
67 | | -#' # `case_match()` acts like a vectorized `switch()`. |
68 | | -#' # Unmatched values "fall through" as a missing value. |
| 53 | +#' # `recode_values()` is a 1:1 replacement for `case_match()` |
69 | 54 | #' case_match( |
70 | 55 | #' x, |
71 | 56 | #' "a" ~ 1, |
72 | 57 | #' "b" ~ 2, |
73 | 58 | #' "c" ~ 3, |
74 | 59 | #' "d" ~ 4 |
75 | 60 | #' ) |
76 | | -#' |
77 | | -#' # Missing values can be matched exactly, and `.default` can be used to |
78 | | -#' # control the value used for unmatched values of `.x` |
79 | | -#' case_match( |
| 61 | +#' recode_values( |
80 | 62 | #' x, |
81 | 63 | #' "a" ~ 1, |
82 | 64 | #' "b" ~ 2, |
83 | 65 | #' "c" ~ 3, |
84 | | -#' "d" ~ 4, |
85 | | -#' NA ~ 0, |
86 | | -#' .default = 100 |
| 66 | +#' "d" ~ 4 |
87 | 67 | #' ) |
88 | 68 | #' |
89 | | -#' # Input values can be grouped into the same expression to map them to the |
90 | | -#' # same output value |
91 | | -#' case_match( |
| 69 | +#' # `recode_values()` has an additional `unmatched` argument to help you catch |
| 70 | +#' # missed mappings |
| 71 | +#' try(recode_values( |
92 | 72 | #' x, |
93 | | -#' c("a", "b") ~ "low", |
94 | | -#' c("c", "d", "e") ~ "high" |
| 73 | +#' "a" ~ 1, |
| 74 | +#' "b" ~ 2, |
| 75 | +#' "c" ~ 3, |
| 76 | +#' "d" ~ 4, |
| 77 | +#' unmatched = "error" |
| 78 | +#' )) |
| 79 | +#' |
| 80 | +#' # `recode_values()` also has additional `from` and `to` arguments, which are |
| 81 | +#' # useful when your lookup table is defined elsewhere (for example, it could |
| 82 | +#' # be read in from a CSV file). This is very difficult to do with |
| 83 | +#' # `case_match()`! |
| 84 | +#' lookup <- tribble( |
| 85 | +#' ~from, ~to, |
| 86 | +#' "a", 1, |
| 87 | +#' "b", 2, |
| 88 | +#' "c", 3, |
| 89 | +#' "d", 4 |
95 | 90 | #' ) |
96 | 91 | #' |
97 | | -#' # `case_match()` isn't limited to character input: |
98 | | -#' y <- c(1, 2, 1, 3, 1, NA, 2, 4) |
| 92 | +#' recode_values(x, from = lookup$from, to = lookup$to) |
| 93 | +#' |
| 94 | +#' # Both `case_match()` and `recode_values()` work with more than just |
| 95 | +#' # character inputs: |
| 96 | +#' y <- as.integer(c(1, 2, 1, 3, 1, NA, 2, 4)) |
99 | 97 | #' |
100 | 98 | #' case_match( |
101 | 99 | #' y, |
102 | 100 | #' c(1, 3) ~ "odd", |
103 | 101 | #' c(2, 4) ~ "even", |
104 | 102 | #' .default = "missing" |
105 | 103 | #' ) |
| 104 | +#' recode_values( |
| 105 | +#' y, |
| 106 | +#' c(1, 3) ~ "odd", |
| 107 | +#' c(2, 4) ~ "even", |
| 108 | +#' default = "missing" |
| 109 | +#' ) |
| 110 | +#' |
| 111 | +#' # Or with a lookup table |
| 112 | +#' lookup <- tribble( |
| 113 | +#' ~from, ~to, |
| 114 | +#' c(1, 3), "odd", |
| 115 | +#' c(2, 4), "even" |
| 116 | +#' ) |
| 117 | +#' recode_values(y, from = lookup$from, to = lookup$to, default = "missing") |
106 | 118 | #' |
107 | | -#' # Setting `.default` to the original vector is a useful way to replace |
108 | | -#' # selected values, leaving everything else as is |
| 119 | +#' # `replace_values()` is a convenient way to replace selected values, leaving |
| 120 | +#' # everything else as is. It's similar to `case_match(y, .default = y)`. |
| 121 | +#' replace_values(y, NA ~ 0) |
109 | 122 | #' case_match(y, NA ~ 0, .default = y) |
110 | 123 | #' |
| 124 | +#' # Notably, `replace_values()` is type stable, which means that `y` can't |
| 125 | +#' # change types out from under you, unlike with `case_match()`! |
| 126 | +#' typeof(y) |
| 127 | +#' typeof(replace_values(y, NA ~ 0)) |
| 128 | +#' typeof(case_match(y, NA ~ 0, .default = y)) |
| 129 | +#' |
| 130 | +#' # We believe that `replace_values()` better expresses intent when doing a |
| 131 | +#' # partial replacement. Compare these two `mutate()` calls, each with the |
| 132 | +#' # goals of: |
| 133 | +#' # - Replace missings in `hair_color` |
| 134 | +#' # - Replace some of the `species` |
111 | 135 | #' starwars |> |
112 | 136 | #' mutate( |
113 | | -#' # Replace missings, but leave everything else alone |
114 | 137 | #' hair_color = case_match(hair_color, NA ~ "unknown", .default = hair_color), |
115 | | -#' # Replace some, but not all, of the species |
116 | 138 | #' species = case_match( |
117 | 139 | #' species, |
118 | 140 | #' "Human" ~ "Humanoid", |
|
122 | 144 | #' ), |
123 | 145 | #' .keep = "used" |
124 | 146 | #' ) |
| 147 | +#' |
| 148 | +#' updates <- tribble( |
| 149 | +#' ~from, ~to, |
| 150 | +#' "Human", "Humanoid", |
| 151 | +#' "Droid", "Robot", |
| 152 | +#' c("Wookiee", "Ewok"), "Hairy" |
| 153 | +#' ) |
| 154 | +#' |
| 155 | +#' starwars |> |
| 156 | +#' mutate( |
| 157 | +#' hair_color = replace_values(hair_color, NA ~ "unknown"), |
| 158 | +#' species = replace_values(species, from = updates$from, to = updates$to), |
| 159 | +#' .keep = "used" |
| 160 | +#' ) |
125 | 161 | case_match <- function(.x, ..., .default = NULL, .ptype = NULL) { |
| 162 | + # Superseded in dplyr 1.2.0 |
| 163 | + lifecycle::signal_stage("superseded", "case_match()", "recode_values()") |
| 164 | + |
126 | 165 | # Matching historical behavior of `case_match()`, which was to work like |
127 | 166 | # `case_when()` and not allow empty `...`. Newer `replace_when()` and |
128 | 167 | # `replace_values()` are a no-op for this case, but we superseded |
|
0 commit comments