use cli functions

EmilHvitfeldt · EmilHvitfeldt · commit 30e1bd2bd7b0 · 2024-10-29T14:13:43.000-07:00
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -64,5 +64,5 @@ Config/testthat/edition: 3
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 SystemRequirements: "GNU make"
diff --git a/R/aaa.R b/R/aaa.R
@@ -12,16 +12,7 @@ factor_to_text <- function(data, names) {
 
 check_possible_tokenizers <- function(x, dict, call = caller_env(2)) {
   if (!(x %in% dict)) {
-    possible_tokenizers <- glue::glue_collapse(
-      dict,
-      sep = ", ", last = ", or "
-    )
-    rlang::abort(
-      glue(
-        "token should be one of the supported: {possible_tokenizers}"
-      ),
-      call = call
-    )
+    cli::cli_abort("Token should be one of {dict}.", call = call)
   }
 }
 
diff --git a/R/lda.R b/R/lda.R
@@ -267,12 +267,11 @@ check_lda_character <- function(dat) {
   all_good <- character_ind | factor_ind
 
   if (any(all_good)) {
-    rlang::abort(
-      glue(
+    cli::cli_abort(
+      c(
         "All columns selected for this step should be tokenlists.",
-        "\n",
-        "See https://github.com/tidymodels/textrecipes#breaking-changes",
-        " for more information."
+        "i" = "See {.url https://github.com/tidymodels/textrecipes#breaking-changes}
+           for more information."
       )
     )
   }
diff --git a/R/lemma.R b/R/lemma.R
@@ -115,12 +115,10 @@ bake.step_lemma <- function(object, new_data, ...) {
     variable <- new_data[[col_name]]
 
     if (is.null(maybe_get_lemma(variable))) {
-      rlang::abort(
-        glue(
-          "`{col_name}` doesn't have a lemma attribute. ",
-          "Make sure the tokenization step includes lemmatization."
-        )
-      )
+      cli::cli_abort(c(
+        "{.code {col_name}} doesn't have a lemma attribute.",
+        "i" = "Make sure the tokenization step includes lemmatization."
+      ))
     } else {
       lemma_variable <- tokenlist_lemma(variable)
     }
diff --git a/R/pos_filter.R b/R/pos_filter.R
@@ -121,11 +121,10 @@ bake.step_pos_filter <- function(object, new_data, ...) {
     variable <- new_data[[col_name]]
 
     if (is.null(maybe_get_pos(variable))) {
-      rlang::abort(
-        glue(
-          "`{col_name}` doesn't have a pos attribute. ",
-          "Make sure the tokenization step includes ",
-          "part of speech tagging."
+      cli::cli_abort(
+        c(
+          "{.arg {col_name}} doesn't have a pos attribute.",
+          "i" = "Make sure the tokenization step includes part of speech tagging."
         )
       )
     }
diff --git a/R/sequence_onehot.R b/R/sequence_onehot.R
@@ -85,11 +85,11 @@ step_sequence_onehot <-
            skip = FALSE,
            id = rand_id("sequence_onehot")) {
     if (length(padding) != 1 || !(padding %in% c("pre", "post"))) {
-      rlang::abort("`padding` should be one of: 'pre', 'post'")
+      cli::cli_abort("{.arg padding} should be one of: {.val pre}, {.val post}")
     }
 
     if (length(truncating) != 1 || !(truncating %in% c("pre", "post"))) {
-      rlang::abort("`truncating` should be one of: 'pre', 'post'")
+      cli::cli_abort("{.code truncating} should be {.val pre} or {.val post}.")
     }
 
     add_step(
diff --git a/R/text_normalization.R b/R/text_normalization.R
@@ -127,15 +127,13 @@ bake.step_text_normalization <- function(object, new_data, ...) {
     nfkd = stringi::stri_trans_nfkd,
     nfkc = stringi::stri_trans_nfkc,
     nfkc_casefold = stringi::stri_trans_nfkc_casefold,
-    rlang::abort(
-      glue(
-        "'normalization_form' must be one of",
-        "'nfc', 'nfd', 'nfkd', 'nfkc', or 'nfkc_casefold'",
-        "but was {object$normalization_form}."
-      )
+    cli::cli_abort(
+      "{.arg normalization_form} must be one of {.val nfc}, {.val nfd}, 
+      {.val nfkd}, {.val nfkc}, or {.val nfkc_casefold} but was 
+      {.val {object$normalization_form}}."
     )
   )
-
+  
   for (col_name in col_names) {
     new_data[[col_name]] <- normalization_fun(new_data[[col_name]])
     new_data[[col_name]] <- factor(new_data[[col_name]])
diff --git a/R/textfeature.R b/R/textfeature.R
@@ -208,14 +208,14 @@ validate_string2num <- function(fun) {
 
   out <- fun(string)
   if (!(is.numeric(out) | is.logical(out))) {
-    rlang::abort(paste0(deparse(substitute(fun)), " must return a numeric."))
+    cli::cli_abort("Function {.fn {fun}} must return a numeric.")
   }
 
   if (length(string) != length(out)) {
-    rlang::abort(paste0(
-      deparse(substitute(fun)),
-      " must return the same length output as its input."
-    ))
+    cli::cli_abort(
+      "{.fn {deparse(substitute(fun))}} must return the same length output as 
+      its input."
+    )
   }
 }
 
diff --git a/R/tfidf.R b/R/tfidf.R
@@ -271,10 +271,10 @@ dtm_to_tfidf <- function(dtm, idf_weights, smooth_idf, norm, sublinear_tf) {
     dtm@x <- 1 + log(dtm@x)
   }
   if (is.character(idf_weights)) {
-    rlang::warn(
+    cli::cli_warn(
       c(
         "Please retrain this recipe with version 0.5.1 or higher.",
-        "A data leakage bug has been fixed for `step_tfidf()`."
+        "i" = "A data leakage bug has been fixed for {.fn step_tfidf}."
       )
     )
     idf_weights <- log(smooth_idf + nrow(dtm) / Matrix::colSums(dtm > 0))
diff --git a/R/tokenfilter.R b/R/tokenfilter.R
@@ -102,11 +102,11 @@ step_tokenfilter <-
            id = rand_id("tokenfilter")) {
     if (percentage && (max_times > 1 | max_times < 0 |
       min_times > 1 | min_times < 0)) {
-      rlang::abort(
-        "`max_times` and `min_times` should be in the interval [0, 1]."
+      cli::cli_abort(
+        "{.arg max_times} and {.arg min_times} should be in the interval [0, 1]."
       )
     }
-
+    
     add_step(
       recipe,
       step_tokenfilter_new(
@@ -258,11 +258,9 @@ tokenfilter_fun <- function(data, max_times, min_times, max_tokens,
     names(sort(tf[ids], decreasing = TRUE))
   } else {
     if (max_tokens > sum(ids)) {
-      rlang::warn(
-        glue(
-          "max_tokens was set to '{max_tokens}', ",
-          "but only {sum(ids)} was available and selected."
-        )
+      cli::cli_warn(
+        "max_tokens was set to {.val {max_tokens}}, but only {sum(ids)} was 
+        available and selected."
       )
       max_tokens <- sum(ids)
     }
diff --git a/R/tokenize.R b/R/tokenize.R
@@ -454,7 +454,7 @@ tokenizer_switch <- function(name, object, data, call = caller_env()) {
     return(res)
   }
 
-  rlang::abort("`engine` argument is not valid.", call = call)
+  cli::cli_abort("The {.arg engine} argument is not valid.", call = call)
 }
 
 #' @rdname required_pkgs.step
diff --git a/R/tokenize_bpe.R b/R/tokenize_bpe.R
@@ -121,8 +121,9 @@ prep.step_tokenize_bpe <- function(x, training, info = NULL, ...) {
 
   bpe_options <- x$options
   if (!is.null(bpe_options$vocab_size)) {
-    rlang::abort(
-      "Please supply the vocabulary size using the `vocabulary_size` argument."
+    cli::cli_abort(
+      "Please supply the vocabulary size using the {.arg vocabulary_size} 
+      argument."
     )
   }
   bpe_options$vocab_size <- x$vocabulary_size
@@ -158,11 +159,9 @@ check_bpe_vocab_size <- function(text,
   text_count <- length(text_count)
 
   if (vocabulary_size < text_count) {
-    rlang::abort(
-      glue(
-        "`vocabulary_size` of {vocabulary_size} is too small for column ",
-        "`{column}` which has a unique character count of {text_count}"
-      ),
+    cli::cli_abort(
+      "{.arg vocabulary_size} of {vocabulary_size} is too small for column 
+      {.arg {column}} which has a unique character count of {text_count}",
       call = call
     )
   }
diff --git a/R/tokenize_sentencepiece.R b/R/tokenize_sentencepiece.R
@@ -120,8 +120,9 @@ prep.step_tokenize_sentencepiece <- function(x, training, info = NULL, ...) {
 
   sentencepiece_options <- x$options
   if (!is.null(sentencepiece_options$vocab_size)) {
-    rlang::abort(
-      "Please supply the vocabulary size using the `vocabulary_size` argument."
+    cli::cli_abort(
+      "Please supply the vocabulary size using the {.arg vocabulary_size} 
+      argument."
     )
   }
   sentencepiece_options$vocab_size <- x$vocabulary_size
@@ -160,12 +161,10 @@ check_sentencepiece_vocab_size <- function(text,
   text_count <- length(text_count)
 
   if (vocabulary_size < text_count) {
-    rlang::abort(
-      glue(
-        "`vocabulary_size` of {vocabulary_size} is too small for column ",
-        "`{column}` which has a unique character count of {text_count}."
-      ),
-      call = call
+    cli::cli_abort(
+      "The {.arg vocabulary_size} of {vocabulary_size} is too small for column {.arg {column}} 
+   which has a unique character count of {text_count}.",
+   call = call
     )
   }
 }
diff --git a/R/tokenlist.R b/R/tokenlist.R
@@ -44,10 +44,10 @@ new_tokenlist <- function(tokens = list(), lemma = NULL, pos = NULL,
                           unique_tokens = character()) {
   vec_assert(tokens, list())
   if (!(is.null(lemma) | is.list(lemma))) {
-    rlang::abort("`lemma` must be NULL or a list.")
+    cli::cli_abort("{.arg lemma} must be NULL or a list.")
   }
   if (!(is.null(pos) | is.list(pos))) {
-    rlang::abort("`pos` must be NULL or a list.")
+    cli::cli_abort("{.arg pos} must be NULL or a list.")
   }
   vec_assert(unique_tokens, character())
 
@@ -141,7 +141,7 @@ obj_print_footer.textrecipes_tokenlist <- function(x, ...) {
 # or removes (for keep = FALSE) the words
 tokenlist_filter <- function(x, dict, keep = FALSE) {
   if (!is_tokenlist(x)) {
-    rlang::abort("Input must be a tokenlist.")
+    cli::cli_abort("Input must be a tokenlist.")
   }
 
   if (!keep) {
@@ -180,7 +180,7 @@ tokenlist_filter <- function(x, dict, keep = FALSE) {
 
 tokenlist_filter_function <- function(x, fn) {
   if (!is_tokenlist(x)) {
-    rlang::abort("Input must be a tokenlist.")
+    cli::cli_abort("Input must be a {.cls tokenlist}.")
   }
 
   tokens <- get_tokens(x)
@@ -210,7 +210,7 @@ tokenlist_filter_function <- function(x, fn) {
 
 tokenlist_apply <- function(x, fun, arguments = NULL) {
   if (!is_tokenlist(x)) {
-    rlang::abort("Input must be a tokenlist.")
+    cli::cli_abort("Input must be {.cls tokenlist} object.")
   }
 
   tokens <- get_tokens(x)
@@ -226,7 +226,7 @@ tokenlist_apply <- function(x, fun, arguments = NULL) {
 # Takes a [tokenlist] and calculate the token count matrix
 tokenlist_to_dtm <- function(x, dict) {
   if (!is_tokenlist(x)) {
-    rlang::abort("Input must be a tokenlist.")
+    cli::cli_abort("Input must be a tokenlist.")
   }
 
   tokens <- get_tokens(x)
@@ -246,23 +246,23 @@ tokenlist_to_dtm <- function(x, dict) {
 
 tokenlist_lemma <- function(x) {
   if (!is_tokenlist(x)) {
-    rlang::abort("Input must be a tokenlist.")
+    cli::cli_abort("Input must be a tokenlist.")
   }
 
   if (is.null(maybe_get_lemma(x))) {
-    rlang::abort("`lemma` attribute not avaliable.")
+    cli::cli_abort("The {.code lemma} attribute is not available.")
   }
 
   tokenlist(maybe_get_lemma(x), pos = maybe_get_pos(x))
 }
 
 tokenlist_pos_filter <- function(x, pos_tags) {
   if (!is_tokenlist(x)) {
-    rlang::abort("Input must be a tokenlist.")
+    cli::cli_abort("Input must be a tokenlist.")
   }
 
   if (is.null(maybe_get_pos(x))) {
-    rlang::abort("pos attribute not avaliable.")
+    cli::cli_abort("{.arg pos} attribute not available.")
   }
 
   tokens <- get_tokens(x)
@@ -292,7 +292,7 @@ tokenlist_pos_filter <- function(x, pos_tags) {
 
 tokenlist_ngram <- function(x, n, n_min, delim) {
   if (!is_tokenlist(x)) {
-    rlang::abort("Input must be a tokenlist.")
+    cli::cli_abort("Input must be a tokenlist.")
   }
 
   tokenlist(cpp11_ngram(get_tokens(x), n, n_min, delim))
diff --git a/R/word_embeddings.R b/R/word_embeddings.R
@@ -110,16 +110,13 @@ step_word_embeddings <- function(recipe,
       ncol(embeddings) == 1 ||
       !all(map_lgl(embeddings[, 2:ncol(embeddings)], is.numeric))
   ) {
-    embeddings_message <- glue(
-      "embeddings should be a tibble with 1 character or factor column and ",
-      "additional numeric columns."
-    )
-    rlang::abort(
-      embeddings_message,
+    cli::cli_abort(
+      "embeddings should be a tibble with {.code 1} character or factor column 
+      and additional numeric columns.",
       class = "bad_embeddings"
     )
   }
-
+  
   aggregation <- match.arg(aggregation)
 
   add_step(
diff --git a/src/ngram.cpp b/src/ngram.cpp
@@ -101,4 +101,4 @@ cpp11_ngram(cpp11::list_of<cpp11::strings> x,
   }
   
   return(out);
-}
+}
diff --git a/tests/testthat/_snaps/lemma.md b/tests/testthat/_snaps/lemma.md
@@ -5,7 +5,8 @@
     Condition
       Error in `step_lemma()`:
       Caused by error in `bake()`:
-      ! `text` doesn't have a lemma attribute. Make sure the tokenization step includes lemmatization.
+      ! `text` doesn't have a lemma attribute.
+      i Make sure the tokenization step includes lemmatization.
 
 # bake method errors when needed non-standard role columns are missing
 
diff --git a/tests/testthat/_snaps/pos_filter.md b/tests/testthat/_snaps/pos_filter.md
@@ -5,7 +5,8 @@
     Condition
       Error in `step_pos_filter()`:
       Caused by error in `bake()`:
-      ! `text` doesn't have a pos attribute. Make sure the tokenization step includes part of speech tagging.
+      ! `text` doesn't have a pos attribute.
+      i Make sure the tokenization step includes part of speech tagging.
 
 # bake method errors when needed non-standard role columns are missing
 
diff --git a/tests/testthat/_snaps/sequence_onehot.md b/tests/testthat/_snaps/sequence_onehot.md
diff --git a/tests/testthat/_snaps/tfidf.md b/tests/testthat/_snaps/tfidf.md
diff --git a/tests/testthat/_snaps/tokenfilter.md b/tests/testthat/_snaps/tokenfilter.md
diff --git a/tests/testthat/_snaps/tokenize.md b/tests/testthat/_snaps/tokenize.md
diff --git a/tests/testthat/_snaps/tokenize_sentencepiece.md b/tests/testthat/_snaps/tokenize_sentencepiece.md
diff --git a/tests/testthat/_snaps/tokenlist.md b/tests/testthat/_snaps/tokenlist.md

Original file line number	Diff line number	Diff line change
`@@ -267,12 +267,11 @@ check_lda_character <- function(dat) {`
`267`	`267`	`all_good <- character_ind \| factor_ind`
`268`	`268`
`269`	`269`	`if (any(all_good)) {`
`270`		`- rlang::abort(`
`271`		`- glue(`
	`270`	`+ cli::cli_abort(`
	`271`	`+ c(`
`272`	`272`	`"All columns selected for this step should be tokenlists.",`
`273`		`- "\n",`
`274`		`- "See https://github.com/tidymodels/textrecipes#breaking-changes",`
`275`		`- " for more information."`
	`273`	`+ "i" = "See {.url https://github.com/tidymodels/textrecipes#breaking-changes}`
	`274`	`+ for more information."`
`276`	`275`	`)`
`277`	`276`	`)`
`278`	`277`	`}`
Original file line number	Diff line number	Diff line change
`@@ -121,11 +121,10 @@ bake.step_pos_filter <- function(object, new_data, ...) {`
`121`	`121`	`variable <- new_data[[col_name]]`
`122`	`122`
`123`	`123`	`if (is.null(maybe_get_pos(variable))) {`
`124`		`- rlang::abort(`
`125`		`- glue(`
`126`		- "`{col_name}` doesn't have a pos attribute. ",
`127`		`- "Make sure the tokenization step includes ",`
`128`		`- "part of speech tagging."`
	`124`	`+ cli::cli_abort(`
	`125`	`+ c(`
	`126`	`+ "{.arg {col_name}} doesn't have a pos attribute.",`
	`127`	`+ "i" = "Make sure the tokenization step includes part of speech tagging."`
`129`	`128`	`)`
`130`	`129`	`)`
`131`	`130`	`}`
Original file line number	Diff line number	Diff line change
`@@ -85,11 +85,11 @@ step_sequence_onehot <-`
`85`	`85`	`skip = FALSE,`
`86`	`86`	`id = rand_id("sequence_onehot")) {`
`87`	`87`	`if (length(padding) != 1 \|\| !(padding %in% c("pre", "post"))) {`
`88`		- rlang::abort("`padding` should be one of: 'pre', 'post'")
	`88`	`+ cli::cli_abort("{.arg padding} should be one of: {.val pre}, {.val post}")`
`89`	`89`	`}`
`90`	`90`
`91`	`91`	`if (length(truncating) != 1 \|\| !(truncating %in% c("pre", "post"))) {`
`92`		- rlang::abort("`truncating` should be one of: 'pre', 'post'")
	`92`	`+ cli::cli_abort("{.code truncating} should be {.val pre} or {.val post}.")`
`93`	`93`	`}`
`94`	`94`
`95`	`95`	`add_step(`
Original file line number	Diff line number	Diff line change
`@@ -208,14 +208,14 @@ validate_string2num <- function(fun) {`
`208`	`208`
`209`	`209`	`out <- fun(string)`
`210`	`210`	`if (!(is.numeric(out) \| is.logical(out))) {`
`211`		`- rlang::abort(paste0(deparse(substitute(fun)), " must return a numeric."))`
	`211`	`+ cli::cli_abort("Function {.fn {fun}} must return a numeric.")`
`212`	`212`	`}`
`213`	`213`
`214`	`214`	`if (length(string) != length(out)) {`
`215`		`- rlang::abort(paste0(`
`216`		`- deparse(substitute(fun)),`
`217`		`- " must return the same length output as its input."`
`218`		`- ))`
	`215`	`+ cli::cli_abort(`
	`216`	`+ "{.fn {deparse(substitute(fun))}} must return the same length output as`
	`217`	`+ its input."`
	`218`	`+ )`
`219`	`219`	`}`
`220`	`220`	`}`
`221`	`221`
Original file line number	Diff line number	Diff line change
`@@ -271,10 +271,10 @@ dtm_to_tfidf <- function(dtm, idf_weights, smooth_idf, norm, sublinear_tf) {`
`271`	`271`	`dtm@x <- 1 + log(dtm@x)`
`272`	`272`	`}`
`273`	`273`	`if (is.character(idf_weights)) {`
`274`		`- rlang::warn(`
	`274`	`+ cli::cli_warn(`
`275`	`275`	`c(`
`276`	`276`	`"Please retrain this recipe with version 0.5.1 or higher.",`
`277`		- "A data leakage bug has been fixed for `step_tfidf()`."
	`277`	`+ "i" = "A data leakage bug has been fixed for {.fn step_tfidf}."`
`278`	`278`	`)`
`279`	`279`	`)`
`280`	`280`	`idf_weights <- log(smooth_idf + nrow(dtm) / Matrix::colSums(dtm > 0))`
Original file line number	Diff line number	Diff line change
`@@ -454,7 +454,7 @@ tokenizer_switch <- function(name, object, data, call = caller_env()) {`
`454`	`454`	`return(res)`
`455`	`455`	`}`
`456`	`456`
`457`		- rlang::abort("`engine` argument is not valid.", call = call)
	`457`	`+ cli::cli_abort("The {.arg engine} argument is not valid.", call = call)`
`458`	`458`	`}`
`459`	`459`
`460`	`460`	`#' @rdname required_pkgs.step`
Original file line number	Diff line number	Diff line change
`@@ -121,8 +121,9 @@ prep.step_tokenize_bpe <- function(x, training, info = NULL, ...) {`
`121`	`121`
`122`	`122`	`bpe_options <- x$options`
`123`	`123`	`if (!is.null(bpe_options$vocab_size)) {`
`124`		`- rlang::abort(`
`125`		- "Please supply the vocabulary size using the `vocabulary_size` argument."
	`124`	`+ cli::cli_abort(`
	`125`	`+ "Please supply the vocabulary size using the {.arg vocabulary_size}`
	`126`	`+ argument."`
`126`	`127`	`)`
`127`	`128`	`}`
`128`	`129`	`bpe_options$vocab_size <- x$vocabulary_size`
`@@ -158,11 +159,9 @@ check_bpe_vocab_size <- function(text,`
`158`	`159`	`text_count <- length(text_count)`
`159`	`160`
`160`	`161`	`if (vocabulary_size < text_count) {`
`161`		`- rlang::abort(`
`162`		`- glue(`
`163`		- "`vocabulary_size` of {vocabulary_size} is too small for column ",
`164`		- "`{column}` which has a unique character count of {text_count}"
`165`		`- ),`
	`162`	`+ cli::cli_abort(`
	`163`	`+ "{.arg vocabulary_size} of {vocabulary_size} is too small for column`
	`164`	`+ {.arg {column}} which has a unique character count of {text_count}",`
`166`	`165`	`call = call`
`167`	`166`	`)`
`168`	`167`	`}`
Original file line number	Diff line number	Diff line change
`@@ -120,8 +120,9 @@ prep.step_tokenize_sentencepiece <- function(x, training, info = NULL, ...) {`
`120`	`120`
`121`	`121`	`sentencepiece_options <- x$options`
`122`	`122`	`if (!is.null(sentencepiece_options$vocab_size)) {`
`123`		`- rlang::abort(`
`124`		- "Please supply the vocabulary size using the `vocabulary_size` argument."
	`123`	`+ cli::cli_abort(`
	`124`	`+ "Please supply the vocabulary size using the {.arg vocabulary_size}`
	`125`	`+ argument."`
`125`	`126`	`)`
`126`	`127`	`}`
`127`	`128`	`sentencepiece_options$vocab_size <- x$vocabulary_size`
`@@ -160,12 +161,10 @@ check_sentencepiece_vocab_size <- function(text,`
`160`	`161`	`text_count <- length(text_count)`
`161`	`162`
`162`	`163`	`if (vocabulary_size < text_count) {`
`163`		`- rlang::abort(`
`164`		`- glue(`
`165`		- "`vocabulary_size` of {vocabulary_size} is too small for column ",
`166`		- "`{column}` which has a unique character count of {text_count}."
`167`		`- ),`
`168`		`- call = call`
	`164`	`+ cli::cli_abort(`
	`165`	`+ "The {.arg vocabulary_size} of {vocabulary_size} is too small for column {.arg {column}}`
	`166`	`+ which has a unique character count of {text_count}.",`
	`167`	`+ call = call`
`169`	`168`	`)`
`170`	`169`	`}`
`171`	`170`	`}`
Original file line number	Diff line number	Diff line change
`@@ -101,4 +101,4 @@ cpp11_ngram(cpp11::list_of<cpp11::strings> x,`
`101`	`101`	`}`
`102`	`102`
`103`	`103`	`return(out);`
`104`		`-}`
	`104`	`+}`