tidymodels
diff --git a/‎DESCRIPTION
Lines changed: 3 additions & 1 deletion b/‎DESCRIPTION
Lines changed: 3 additions & 1 deletion
diff --git a/‎tests/testthat/_snaps/R4.4/tokenize_bpe.md
Lines changed: 10 additions & 0 deletions b/‎tests/testthat/_snaps/R4.4/tokenize_bpe.md
Lines changed: 10 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/R4.4/tokenizer-tokenizersbpe.md
Lines changed: 10 additions & 0 deletions b/‎tests/testthat/_snaps/R4.4/tokenizer-tokenizersbpe.md
Lines changed: 10 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/clean_levels.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/clean_levels.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/clean_names.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/clean_names.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/dummy_hash.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/dummy_hash.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/lda.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/lda.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/lemma.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/lemma.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/ngram.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/ngram.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/pos_filter.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/pos_filter.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/sequence_onehot.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/sequence_onehot.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/stem.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/stem.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/stopwords.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/stopwords.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/text_normalization.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/text_normalization.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/textfeature.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/textfeature.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/texthash.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/texthash.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/tf.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/tf.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/tfidf.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/tfidf.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/tokenfilter.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/tokenfilter.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/tokenize.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/tokenize.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/tokenize_bpe.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/tokenize_bpe.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/tokenize_sentencepiece.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/tokenize_sentencepiece.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/tokenize_wordpiece.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/tokenize_wordpiece.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/tokenmerge.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/tokenmerge.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/untokenize.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/untokenize.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/word_embeddings.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/word_embeddings.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎tests/testthat/test-clean_levels.R
Lines changed: 4 additions & 4 deletions b/‎tests/testthat/test-clean_levels.R
Lines changed: 4 additions & 4 deletions
@@ -20,7 +20,7 @@ URL: https://github.com/tidymodels/textrecipes,
 BugReports: https://github.com/tidymodels/textrecipes/issues
 Depends: 
     R (>= 3.6),
-    recipes (>= 1.0.7)
+    recipes (>= 1.1.0.9000)
 Imports: 
     lifecycle,
     dplyr,
@@ -53,6 +53,8 @@ Suggests:
     tokenizers.bpe,
     udpipe,
     wordpiece
+Remotes:
+    tidymodels/recipes
 LinkingTo: 
     cpp11
 VignetteBuilder: 
 
@@ -0,0 +1,10 @@
+# Errors if vocabulary size is set to low.
+
+    Code
+      recipe(~text1, data = test_data) %>% step_tokenize_bpe(text1, vocabulary_size = 10) %>%
+        prep()
+    Condition
+      Error in `step_tokenize_bpe()`:
+      Caused by error in `prep()`:
+      ! `vocabulary_size` of 10 is too small for column `text1` which has a unique character count of 23
+
@@ -0,0 +1,10 @@
+# Errors if vocabulary size is set to low.
+
+    Code
+      recipe(~text, data = tibble(text = "hello")) %>% step_tokenize(text, engine = "tokenizers.bpe",
+        training_options = list(vocab_size = 2)) %>% prep()
+    Condition
+      Error in `step_tokenize()`:
+      Caused by error in `prep()`:
+      ! `vocabulary_size` of 2 is too small for column `text` which has a unique character count of 4
+
@@ -1,3 +1,11 @@
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = smith_tr[, -1])
+    Condition
+      Error in `step_clean_levels()`:
+      ! The following required column is missing from `new_data`: name.
+
 # empty printing
 
     Code
 
@@ -1,3 +1,11 @@
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = mtcars[, -3])
+    Condition
+      Error in `step_clean_names()`:
+      ! The following required column is missing from `new_data`: disp.
+
 # empty printing
 
     Code
 
@@ -8,6 +8,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `dummyhash_text_01`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = test_data[, -2])
+    Condition
+      Error in `step_dummy_hash()`:
+      ! The following required column is missing from `new_data`: sponsor_code.
+
 # empty printing
 
     Code
 
@@ -8,6 +8,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `lda_text_1`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = tokenized_test_data[, -1])
+    Condition
+      Error in `step_lda()`:
+      ! The following required column is missing from `new_data`: medium.
+
 # empty printing
 
     Code
 
@@ -7,6 +7,14 @@
       Caused by error in `bake()`:
       ! `text` doesn't have a lemma attribute. Make sure the tokenization step includes lemmatization.
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = tokenized_test_data[, -1])
+    Condition
+      Error in `step_lemma()`:
+      ! The following required column is missing from `new_data`: text.
+
 # empty printing
 
     Code
 
@@ -14,6 +14,14 @@
       Error:
       ! n must be a positive integer.
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = tokenized_test_data[, -1])
+    Condition
+      Error in `step_ngram()`:
+      ! The following required column is missing from `new_data`: text.
+
 # empty printing
 
     Code
 
@@ -7,6 +7,14 @@
       Caused by error in `bake()`:
       ! `text` doesn't have a pos attribute. Make sure the tokenization step includes part of speech tagging.
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = tokenized_test_data[, -1])
+    Condition
+      Error in `step_pos_filter()`:
+      ! The following required column is missing from `new_data`: text.
+
 # empty printing
 
     Code
 
@@ -41,6 +41,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `seq1hot_text_1`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = tokenized_test_data[, -1])
+    Condition
+      Error in `step_sequence_onehot()`:
+      ! The following required column is missing from `new_data`: text.
+
 # empty printing
 
     Code
 
@@ -1,3 +1,11 @@
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = tokenized_test_data[, -1])
+    Condition
+      Error in `step_stem()`:
+      ! The following required column is missing from `new_data`: text.
+
 # empty printing
 
     Code
 
@@ -1,3 +1,11 @@
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = tokenized_test_data[, -1])
+    Condition
+      Error in `step_stopwords()`:
+      ! The following required column is missing from `new_data`: text.
+
 # empty printing
 
     Code
 
@@ -1,3 +1,11 @@
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = ex_dat[, -1])
+    Condition
+      Error in `step_text_normalization()`:
+      ! The following required column is missing from `new_data`: text.
+
 # empty printing
 
     Code
 
@@ -29,6 +29,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `textfeature_text_n_words`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = test_data[, -1])
+    Condition
+      Error in `step_textfeature()`:
+      ! The following required column is missing from `new_data`: text.
+
 # empty printing
 
     Code
 
@@ -8,6 +8,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `texthash_text_0001`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = tokenized_test_data[, -1])
+    Condition
+      Error in `step_texthash()`:
+      ! The following required column is missing from `new_data`: text.
+
 # empty printing
 
     Code
 
@@ -8,6 +8,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `tf_text_i`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = tokenized_test_data[, -1])
+    Condition
+      Error in `step_tf()`:
+      ! The following required column is missing from `new_data`: text.
+
 # empty printing
 
     Code
 
@@ -28,6 +28,14 @@
       Please retrain this recipe with version 0.5.1 or higher.
       * A data leakage bug has been fixed for `step_tfidf()`.
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = tokenized_test_data[, -1])
+    Condition
+      Error in `step_tfidf()`:
+      ! The following required column is missing from `new_data`: text.
+
 # empty printing
 
     Code
 
@@ -36,6 +36,14 @@
       * Tokenization for: text | Trained
       * Text filtering for: text | Trained
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = tokenized_test_data[, -1])
+    Condition
+      Error in `step_tokenfilter()`:
+      ! The following required column is missing from `new_data`: text.
+
 # empty printing
 
     Code
 
@@ -16,6 +16,14 @@
       Caused by error in `prep()`:
       ! `engine` argument is not valid.
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = test_data[, -1])
+    Condition
+      Error in `step_tokenize()`:
+      ! The following required column is missing from `new_data`: text.
+
 # empty printing
 
     Code
 
@@ -1,3 +1,11 @@
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = test_data[, -1])
+    Condition
+      Error in `step_tokenize_bpe()`:
+      ! The following required column is missing from `new_data`: text1.
+
 # empty printing
 
     Code
 
@@ -8,6 +8,14 @@
       Caused by error in `prep()`:
       ! `vocabulary_size` of 10 is too small for column `text1` which has a unique character count of 23.
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = test_data[, -1])
+    Condition
+      Error in `step_tokenize_sentencepiece()`:
+      ! The following required column is missing from `new_data`: text1.
+
 # empty printing
 
     Code
 
@@ -1,3 +1,11 @@
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = test_data[, -1])
+    Condition
+      Error in `step_tokenize_wordpiece()`:
+      ! The following required column is missing from `new_data`: text1.
+
 # empty printing
 
     Code
 
@@ -18,6 +18,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `tokenmerge`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = tokenized_test_data[, -1])
+    Condition
+      Error in `step_tokenmerge()`:
+      ! The following required column is missing from `new_data`: text1.
+
 # empty printing
 
     Code
 
@@ -1,3 +1,11 @@
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = tokenized_test_data[, -1])
+    Condition
+      Error in `step_untokenize()`:
+      ! The following required column is missing from `new_data`: text.
+
 # empty printing
 
     Code
 
@@ -8,6 +8,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `wordembed_text_d1`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(trained, new_data = tokenized_test_data[, -1])
+    Condition
+      Error in `step_word_embeddings()`:
+      ! The following required column is missing from `new_data`: text.
+
 # empty printing
 
     Code
 
@@ -77,9 +77,9 @@ test_that("bake method errors when needed non-standard role columns are missing"
 
   trained <- prep(rec, training = smith_tr, verbose = FALSE)
 
-  expect_error(
-    bake(trained, new_data = smith_tr[, -1]),
-    class = "new_data_missing_column"
+  expect_snapshot(
+    error = TRUE,
+    bake(trained, new_data = smith_tr[, -1])
   )
 })
 
@@ -126,4 +126,4 @@ test_that("printing", {
 
   expect_snapshot(print(rec))
   expect_snapshot(prep(rec))
-})
+})