From f97d68c31e2aa4081f7a95340ea81f12721bc7f8 Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 8 Nov 2024 13:17:42 -0800
Subject: [PATCH 1/7] add hard check

---
 .github/workflows/R-CMD-check-hard.yaml | 59 +++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 .github/workflows/R-CMD-check-hard.yaml

diff --git a/.github/workflows/R-CMD-check-hard.yaml b/.github/workflows/R-CMD-check-hard.yaml
new file mode 100644
index 00000000..ac3bc0fd
--- /dev/null
+++ b/.github/workflows/R-CMD-check-hard.yaml
@@ -0,0 +1,59 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+#
+# NOTE: This workflow only directly installs "hard" dependencies, i.e. Depends,
+# Imports, and LinkingTo dependencies. Notably, Suggests dependencies are never
+# installed, with the exception of testthat, knitr, and rmarkdown. The cache is
+# never used to avoid accidentally restoring a cache containing a suggested
+# dependency.
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+
+name: R-CMD-check-hard.yaml
+
+permissions: read-all
+
+jobs:
+  check-no-suggests:
+    runs-on: ${{ matrix.config.os }}
+
+    name: ${{ matrix.config.os }} (${{ matrix.config.r }})
+
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - {os: ubuntu-latest,   r: 'release'}
+
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      R_KEEP_PKG_SOURCE: yes
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: ${{ matrix.config.r }}
+          http-user-agent: ${{ matrix.config.http-user-agent }}
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          dependencies: '"hard"'
+          cache: false
+          extra-packages: |
+            any::rcmdcheck
+            any::testthat
+            any::knitr
+            any::rmarkdown
+          needs: check
+
+      - uses: r-lib/actions/check-r-package@v2
+        with:
+          upload-snapshots: true
+          build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'

From f50d36b4547bd1b0f699453bf0672733de179bd6 Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 8 Nov 2024 13:47:27 -0800
Subject: [PATCH 2/7] add stop on vignette

---
 ...ookbook---using-more-complex-recipes-involving-text.Rmd | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd b/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd
index b14b2c5b..95c4d43e 100644
--- a/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd
+++ b/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd
@@ -10,7 +10,14 @@ vignette: >
 ---
 
 ```{r setup, include = FALSE}
+if (rlang::is_installed(c("modeldata")) {
+  run <- TRUE
+} else {
+  run <- FALSE
+}
+
 knitr::opts_chunk$set(
+  eval = run,
   collapse = TRUE,
   comment = "#>"
 )

From a25fb654839f6628fe51546153435a693d085062 Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 8 Nov 2024 13:59:48 -0800
Subject: [PATCH 3/7] missing paran

---
 .../cookbook---using-more-complex-recipes-involving-text.Rmd    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd b/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd
index 95c4d43e..3a64042d 100644
--- a/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd
+++ b/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd
@@ -10,7 +10,7 @@ vignette: >
 ---
 
 ```{r setup, include = FALSE}
-if (rlang::is_installed(c("modeldata")) {
+if (rlang::is_installed(c("modeldata"))) {
   run <- TRUE
 } else {
   run <- FALSE

From fad67ec69486a973462a01d423ae71fd1109a1dc Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 8 Nov 2024 15:29:37 -0800
Subject: [PATCH 4/7] add lots of skip if not installed

---
 .../testthat/_snaps/R4.4/tokenize_bpe.new.md  | 16 ++++
 tests/testthat/_snaps/dummy_hash.md           | 10 +--
 tests/testthat/_snaps/lda.md                  | 17 +++--
 tests/testthat/test-clean_levels.R            | 26 ++++---
 tests/testthat/test-clean_names.R             | 19 ++---
 tests/testthat/test-dummy_hash.R              | 76 ++++++++++++++++---
 tests/testthat/test-lda.R                     | 50 ++++++++----
 tests/testthat/test-lemma.R                   |  4 +
 tests/testthat/test-pos_filter.R              |  6 ++
 tests/testthat/test-stopwords.R               |  8 ++
 tests/testthat/test-texthash.R                |  4 +
 tests/testthat/test-tokenize.R                |  1 +
 tests/testthat/test-tokenize_bpe.R            | 14 ++++
 tests/testthat/test-tokenize_sentencepiece.R  | 12 +++
 tests/testthat/test-tokenize_wordpiece.R      | 17 ++++-
 tests/testthat/test-tokenizer-spacyr.R        |  1 +
 tests/testthat/test-tokenizer-tokenizersbpe.R |  6 ++
 17 files changed, 226 insertions(+), 61 deletions(-)
 create mode 100644 tests/testthat/_snaps/R4.4/tokenize_bpe.new.md

diff --git a/tests/testthat/_snaps/R4.4/tokenize_bpe.new.md b/tests/testthat/_snaps/R4.4/tokenize_bpe.new.md
new file mode 100644
index 00000000..0f4258f8
--- /dev/null
+++ b/tests/testthat/_snaps/R4.4/tokenize_bpe.new.md
@@ -0,0 +1,16 @@
+# Errors if vocabulary size is set to low.
+
+    Code
+      recipe(~text1, data = test_data) %>% step_tokenize_bpe(text1, vocabulary_size = 10) %>%
+        prep()
+    Condition
+      Warning in `read.dcf()`:
+      cannot open compressed file '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/tokenizers.bpe/DESCRIPTION', probable reason 'No such file or directory'
+    Message
+      1 package (tokenizers.bpe) is needed for this step but is not installed.
+      To install run: `install.packages("tokenizers.bpe")`
+    Condition
+      Error in `step_tokenize_bpe()`:
+      Caused by error in `prep()`:
+      ! `vocabulary_size` of 10 is too small for column `text1` which has a unique character count of 23
+
diff --git a/tests/testthat/_snaps/dummy_hash.md b/tests/testthat/_snaps/dummy_hash.md
index 916a3164..12a1d82e 100644
--- a/tests/testthat/_snaps/dummy_hash.md
+++ b/tests/testthat/_snaps/dummy_hash.md
@@ -70,10 +70,10 @@
       
       -- Inputs 
       Number of variables by role
-      predictor: 2
+      predictor: 5
       
       -- Operations 
-      * Feature hashing with: sponsor_code
+      * Feature hashing with: Species
 
 ---
 
@@ -85,11 +85,11 @@
       
       -- Inputs 
       Number of variables by role
-      predictor: 2
+      predictor: 5
       
       -- Training information 
-      Training data contained 20 data points and no incomplete rows.
+      Training data contained 150 data points and no incomplete rows.
       
       -- Operations 
-      * Feature hashing with: sponsor_code | Trained
+      * Feature hashing with: Species | Trained
 
diff --git a/tests/testthat/_snaps/lda.md b/tests/testthat/_snaps/lda.md
index 39bb3bd2..7c293622 100644
--- a/tests/testthat/_snaps/lda.md
+++ b/tests/testthat/_snaps/lda.md
@@ -70,28 +70,31 @@
       
       -- Inputs 
       Number of variables by role
-      predictor: 2
+      predictor: 5
       
       -- Operations 
-      * Tokenization for: medium
-      * Text feature extraction for: medium
+      * Tokenization for: Species
+      * Text feature extraction for: Species
 
 ---
 
     Code
       prep(rec)
+    Condition
+      Warning in `get_dtm()`:
+      dtm has 0 rows. Empty iterator?
     Message
       
       -- Recipe ----------------------------------------------------------------------
       
       -- Inputs 
       Number of variables by role
-      predictor: 2
+      predictor: 5
       
       -- Training information 
-      Training data contained 100 data points and no incomplete rows.
+      Training data contained 150 data points and no incomplete rows.
       
       -- Operations 
-      * Tokenization for: medium | Trained
-      * Text feature extraction for: medium | Trained
+      * Tokenization for: Species | Trained
+      * Text feature extraction for: Species | Trained
 
diff --git a/tests/testthat/test-clean_levels.R b/tests/testthat/test-clean_levels.R
index f7ad597e..b9fd822c 100644
--- a/tests/testthat/test-clean_levels.R
+++ b/tests/testthat/test-clean_levels.R
@@ -1,15 +1,13 @@
-library(testthat)
-library(textrecipes)
-library(modeldata)
-data("Smithsonian")
-smith_tr <- Smithsonian[1:15, ]
-smith_te <- Smithsonian[16:20, ]
-
-rec <- recipe(~., data = smith_tr)
-
 test_that("character input", {
   skip_if_not_installed("janitor")
-  cleaned <- rec %>% step_clean_levels(name, id = "")
+  skip_if_not_installed("modeldata")
+
+  data("Smithsonian", package = "modeldata")
+  smith_tr <- Smithsonian[1:15, ]
+  smith_te <- Smithsonian[16:20, ]
+
+  cleaned <- recipe(~., data = smith_tr) %>% 
+    step_clean_levels(name, id = "")
 
   tidy_exp_un <- tibble(
     terms = c("name"),
@@ -50,6 +48,9 @@ test_that("character input", {
 
 test_that("factor input", {
   skip_if_not_installed("janitor")
+  skip_if_not_installed("modeldata")
+
+  data("Smithsonian", package = "modeldata")
   smith_tr <- Smithsonian[1:15, ]
   smith_tr$name <- as.factor(smith_tr$name)
   smith_te <- Smithsonian[16:20, ]
@@ -71,6 +72,11 @@ test_that("factor input", {
 
 test_that("bake method errors when needed non-standard role columns are missing", {
   skip_if_not_installed("janitor")
+  skip_if_not_installed("modeldata")
+
+  data("Smithsonian", package = "modeldata")
+  smith_tr <- Smithsonian[1:15, ]
+  
   rec <- recipe(~name, data = smith_tr) %>%
     step_clean_levels(name) %>%
     update_role(name, new_role = "potato") %>%
diff --git a/tests/testthat/test-clean_names.R b/tests/testthat/test-clean_names.R
index b7b80310..31d70a69 100644
--- a/tests/testthat/test-clean_names.R
+++ b/tests/testthat/test-clean_names.R
@@ -1,15 +1,14 @@
-library(testthat)
-library(textrecipes)
-data(airquality)
+test_that("can clean names", {
+  skip_if_not_installed("janitor")
+  skip_if_not_installed("modeldata")
 
-air_tr <- airquality[1:20, ]
-air_te <- airquality[101:110, ]
+  data("airquality", package = "modeldata")
 
-rec <- recipe(~., data = air_tr)
+  air_tr <- airquality[1:20, ]
+  air_te <- airquality[101:110, ]
 
-test_that("can clean names", {
-  skip_if_not_installed("janitor")
-  cleaned <- rec %>% step_clean_names(all_predictors(), id = "")
+  cleaned <- recipe(~., data = air_tr) %>% 
+  step_clean_names(all_predictors(), id = "")
 
   tidy_exp_un <- tibble(
     terms = c("all_predictors()"),
@@ -35,6 +34,8 @@ test_that("can clean names", {
 # Infrastructure ---------------------------------------------------------------
 
 test_that("bake method errors when needed non-standard role columns are missing", {
+  skip_if_not_installed("janitor")
+  
   rec <- recipe(mtcars) %>%
     step_clean_names(disp) %>%
     update_role(disp, new_role = "potato") %>%
diff --git a/tests/testthat/test-dummy_hash.R b/tests/testthat/test-dummy_hash.R
index 112c8f2b..87d18138 100644
--- a/tests/testthat/test-dummy_hash.R
+++ b/tests/testthat/test-dummy_hash.R
@@ -1,18 +1,19 @@
 library(textrecipes)
 library(recipes)
-data(grants, package = "modeldata")
 
-test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
-test_data <- tibble::as_tibble(test_data)
-
-rec <- recipe(~., data = test_data)
 
 test_that("hashing gives double outputs", {
   skip_if_not_installed("text2vec")
   skip_if_not_installed("data.table")
+  skip_if_not_installed("modeldata")
   data.table::setDTthreads(2) # because data.table uses all cores by default 
   
-  rec <- rec %>%
+  data("grants", package = "modeldata")
+
+  test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
+  test_data <- tibble::as_tibble(test_data)
+
+  rec <- recipe(~., data = test_data) %>%
     step_dummy_hash(sponsor_code)
 
   obj <- rec %>%
@@ -32,9 +33,16 @@ test_that("hashing gives double outputs", {
 
 test_that("hashing multiple factors", {
   skip_if_not_installed("data.table")
+  skip_if_not_installed("modeldata")
+  skip_if_not_installed("text2vec")
   data.table::setDTthreads(2) # because data.table uses all cores by default 
   
-  res <- rec %>%
+  data("grants", package = "modeldata")
+
+  test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
+  test_data <- tibble::as_tibble(test_data)
+
+  res <- recipe(~., data = test_data) %>%
     step_dummy_hash(all_nominal_predictors(), num_terms = 12) %>%
     prep() %>%
     bake(new_data = NULL)
@@ -46,9 +54,16 @@ test_that("hashing multiple factors", {
 
 test_that("hashing collapsed multiple factors", {
   skip_if_not_installed("data.table")
+  skip_if_not_installed("modeldata")
+  skip_if_not_installed("text2vec")
   data.table::setDTthreads(2) # because data.table uses all cores by default 
   
-  res <- rec %>%
+  data("grants", package = "modeldata")
+
+  test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
+  test_data <- tibble::as_tibble(test_data)
+
+  res <- recipe(~., data = test_data) %>%
     step_dummy_hash(all_nominal_predictors(), num_terms = 4, collapse = TRUE) %>%
     prep() %>%
     bake(new_data = NULL)
@@ -60,9 +75,15 @@ test_that("hashing collapsed multiple factors", {
 test_that("hashing output width changes accordingly with num_terms", {
   skip_if_not_installed("text2vec")
   skip_if_not_installed("data.table")
+  skip_if_not_installed("modeldata")
   data.table::setDTthreads(2) # because data.table uses all cores by default 
   
-  rec <- rec %>%
+  data("grants", package = "modeldata")
+
+  test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
+  test_data <- tibble::as_tibble(test_data)
+
+  rec <- recipe(~., data = test_data) %>%
     step_dummy_hash(sponsor_code, num_terms = 256) %>%
     prep()
 
@@ -77,7 +98,13 @@ test_that("hashing output width changes accordingly with num_terms", {
 test_that("hashing output width changes accordingly with num_terms", {
   skip_if_not_installed("text2vec")
   skip_if_not_installed("data.table")
+  skip_if_not_installed("modeldata")
   data.table::setDTthreads(2) # because data.table uses all cores by default 
+  
+  data("grants", package = "modeldata")
+
+  test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
+  test_data <- tibble::as_tibble(test_data)
 
   signed <- recipe(~., data = test_data) %>%
     step_dummy_hash(all_predictors(), num_terms = 2) %>%
@@ -98,8 +125,14 @@ test_that("hashing output width changes accordingly with num_terms", {
 test_that("check_name() is used", {
   skip_if_not_installed("text2vec")
   skip_if_not_installed("data.table")
+  skip_if_not_installed("modeldata")
   data.table::setDTthreads(2) # because data.table uses all cores by default 
   
+  data("grants", package = "modeldata")
+
+  test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
+  test_data <- tibble::as_tibble(test_data)
+
   dat <- test_data
   dat$text <- dat$sponsor_code
   dat$dummyhash_text_01 <- dat$sponsor_code
@@ -131,6 +164,15 @@ test_that("tunable", {
 # Infrastructure ---------------------------------------------------------------
 
 test_that("bake method errors when needed non-standard role columns are missing", {
+  skip_if_not_installed("modeldata")
+  skip_if_not_installed("text2vec")
+  data.table::setDTthreads(2) # because data.table uses all cores by default 
+  
+  data("grants", package = "modeldata")
+
+  test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
+  test_data <- tibble::as_tibble(test_data)
+
   rec <- recipe(~sponsor_code, data = test_data) %>%
     step_dummy_hash(sponsor_code) %>%
     update_role(sponsor_code, new_role = "potato") %>%
@@ -190,8 +232,14 @@ test_that("empty selection tidy method works", {
 test_that("keep_original_cols works", {
   skip_if_not_installed("text2vec")
   skip_if_not_installed("data.table")
+  skip_if_not_installed("modeldata")
   data.table::setDTthreads(2) # because data.table uses all cores by default 
   
+  data("grants", package = "modeldata")
+
+  test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
+  test_data <- tibble::as_tibble(test_data)
+
   new_names <- paste0("dummyhash_sponsor_code_", 1:5)
   
   rec <- recipe(~ sponsor_code, data = test_data) %>%
@@ -220,8 +268,14 @@ test_that("keep_original_cols works", {
 test_that("keep_original_cols - can prep recipes with it missing", {
   skip_if_not_installed("text2vec")
   skip_if_not_installed("data.table")
+  skip_if_not_installed("modeldata")
   data.table::setDTthreads(2) # because data.table uses all cores by default 
   
+  data("grants", package = "modeldata")
+
+  test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
+  test_data <- tibble::as_tibble(test_data)
+
   rec <- recipe(~ sponsor_code, data = test_data) %>%
     step_dummy_hash(sponsor_code)
   
@@ -242,8 +296,8 @@ test_that("printing", {
   skip_if_not_installed("data.table")
   data.table::setDTthreads(2) # because data.table uses all cores by default 
   
-  rec <- rec %>%
-    step_dummy_hash(sponsor_code)
+  rec <- recipe(~., data = iris) %>%
+    step_dummy_hash(Species)
   
   expect_snapshot(print(rec))
   expect_snapshot(prep(rec))
diff --git a/tests/testthat/test-lda.R b/tests/testthat/test-lda.R
index 40ec2dba..81c1b27b 100644
--- a/tests/testthat/test-lda.R
+++ b/tests/testthat/test-lda.R
@@ -1,19 +1,15 @@
-set.seed(1234)
-library(recipes)
-library(textrecipes)
-library(modeldata)
-data(tate_text)
-
-n_rows <- 100
-rec <- recipe(~ medium + artist, data = tate_text[seq_len(n_rows), ])
-
 test_that("step_lda works as intended", {
   skip_if_not_installed("text2vec")
   skip_if_not_installed("data.table")
+  skip_if_not_installed("modeldata")
   data.table::setDTthreads(2) # because data.table uses all cores by default 
   
+  data("tate_text", package = "modeldata")
+
+  n_rows <- 100
   n_top <- 10
-  rec1 <- rec %>%
+
+  rec1 <- recipe(~ medium + artist, data = tate_text[seq_len(n_rows), ]) %>%
     step_tokenize(medium) %>%
     step_lda(medium, num_topics = n_top)
 
@@ -29,10 +25,14 @@ test_that("step_lda works as intended", {
 test_that("step_lda works with num_topics argument", {
   skip_if_not_installed("text2vec")
   skip_if_not_installed("data.table")
+  skip_if_not_installed("modeldata")
   data.table::setDTthreads(2) # because data.table uses all cores by default 
   
+  data("tate_text", package = "modeldata")
+
+  n_rows <- 100
   n_top <- 100
-  rec1 <- rec %>%
+  rec1 <- recipe(~ medium + artist, data = tate_text[seq_len(n_rows), ]) %>%
     step_tokenize(medium) %>%
     step_lda(medium, num_topics = n_top)
 
@@ -45,8 +45,11 @@ test_that("step_lda works with num_topics argument", {
 test_that("check_name() is used", {
   skip_if_not_installed("text2vec")
   skip_if_not_installed("data.table")
+  skip_if_not_installed("modeldata")
   data.table::setDTthreads(2) # because data.table uses all cores by default 
   
+  data("tate_text", package = "modeldata")
+
   dat <- tate_text[seq_len(100), ]
   dat$text <- dat$medium
   dat$lda_text_1 <- dat$text
@@ -66,9 +69,15 @@ test_that("check_name() is used", {
 test_that("bake method errors when needed non-standard role columns are missing", {
   skip_if_not_installed("text2vec")
   skip_if_not_installed("data.table")
+  skip_if_not_installed("modeldata")
   data.table::setDTthreads(2) # because data.table uses all cores by default 
   
-  tokenized_test_data <- rec %>%
+  data("tate_text", package = "modeldata")
+
+  n_rows <- 100
+  
+  tokenized_test_data <- recipe(~ medium + artist, 
+                                data = tate_text[seq_len(n_rows), ]) %>%
     step_tokenize(medium) %>%
     prep() %>%
     bake(new_data = NULL)
@@ -131,10 +140,15 @@ test_that("empty selection tidy method works", {
 test_that("keep_original_cols works", {
   skip_if_not_installed("text2vec")
   skip_if_not_installed("data.table")
+  skip_if_not_installed("modeldata")
   data.table::setDTthreads(2) # because data.table uses all cores by default 
   
+  data("tate_text", package = "modeldata")
+
   new_names <- paste0("lda_medium_", 1:10)
   
+  n_rows <- 100
+
   rec <- recipe(~ medium, data = tate_text[seq_len(n_rows), ]) %>%
     step_tokenize(medium) %>%
     step_lda(medium, keep_original_cols = FALSE)
@@ -163,8 +177,13 @@ test_that("keep_original_cols works", {
 test_that("keep_original_cols - can prep recipes with it missing", {
   skip_if_not_installed("text2vec")
   skip_if_not_installed("data.table")
+  skip_if_not_installed("modeldata")
   data.table::setDTthreads(2) # because data.table uses all cores by default 
   
+  data("tate_text", package = "modeldata")
+
+  n_rows <- 100
+
   rec <- recipe(~ medium, data = tate_text[seq_len(n_rows), ]) %>%
     step_tokenize(medium) %>%
     step_lda(medium, keep_original_cols = TRUE)
@@ -180,15 +199,14 @@ test_that("keep_original_cols - can prep recipes with it missing", {
   )
 })
 
-
 test_that("printing", {
   skip_if_not_installed("text2vec")
   skip_if_not_installed("data.table")
   data.table::setDTthreads(2) # because data.table uses all cores by default 
   
-  rec <- rec %>%
-    step_tokenize(medium) %>%
-    step_lda(medium)
+  rec <- recipe(~., data = iris) %>%
+    step_tokenize(Species) %>%
+    step_lda(Species)
   
   expect_snapshot(print(rec))
   expect_snapshot(prep(rec))
diff --git a/tests/testthat/test-lemma.R b/tests/testthat/test-lemma.R
index a7956383..dc598cd2 100644
--- a/tests/testthat/test-lemma.R
+++ b/tests/testthat/test-lemma.R
@@ -11,6 +11,7 @@ text <- tibble(text = c(
 
 test_that("lemmatization works", {
   skip_on_cran()
+  skip_if_not_installed("spacyr")
   skip_if_no_python_or_no_spacy()
 
   rec <- recipe(~text, data = text) %>%
@@ -53,6 +54,7 @@ test_that("lemmatization errors if lemma attribute doesn't exists", {
 
 test_that("bake method errors when needed non-standard role columns are missing", {
   skip_on_cran()
+  skip_if_not_installed("spacyr")
   skip_if_no_python_or_no_spacy()
 
   tokenized_test_data <- recipe(~text, data = text) %>%
@@ -112,7 +114,9 @@ test_that("empty selection tidy method works", {
 
 test_that("printing", {
   skip_on_cran()
+  skip_if_not_installed("spacyr")
   skip_if_no_python_or_no_spacy()
+  
   rec <- recipe(~text, data = text) %>%
     step_tokenize(all_predictors(), engine = "spacyr") %>%
     step_lemma(all_predictors())
diff --git a/tests/testthat/test-pos_filter.R b/tests/testthat/test-pos_filter.R
index 71722648..22c44213 100644
--- a/tests/testthat/test-pos_filter.R
+++ b/tests/testthat/test-pos_filter.R
@@ -11,6 +11,7 @@ text <- tibble(text = c(
 
 test_that("part of speech filtering works", {
   skip_on_cran()
+  skip_if_not_installed("spacyr")
   skip_if_no_python_or_no_spacy()
 
   rec <- recipe(~text, data = text) %>%
@@ -36,6 +37,7 @@ test_that("part of speech filtering works", {
 
 test_that("part of speech filtering removes everything", {
   skip_on_cran()
+  skip_if_not_installed("spacyr")
   skip_if_no_python_or_no_spacy()
 
   rec <- recipe(~text, data = text) %>%
@@ -61,6 +63,7 @@ test_that("part of speech filtering removes everything", {
 
 test_that("part of speech filtering works with multiple tags", {
   skip_on_cran()
+  skip_if_not_installed("spacyr")
   skip_if_no_python_or_no_spacy()
 
   rec <- recipe(~text, data = text) %>%
@@ -99,6 +102,7 @@ test_that("lemmatization errors if lemma attribute doesn't exists", {
 
 test_that("bake method errors when needed non-standard role columns are missing", {
   skip_on_cran()
+  skip_if_not_installed("spacyr")
   skip_if_no_python_or_no_spacy()
   
   tokenized_test_data <- recipe(~text, data = text) %>%
@@ -158,7 +162,9 @@ test_that("empty selection tidy method works", {
 
 test_that("printing", {
   skip_on_cran()
+  skip_if_not_installed("spacyr")
   skip_if_no_python_or_no_spacy()
+
   rec <- recipe(~text, data = text) %>%
     step_tokenize(all_predictors(), engine = "spacyr") %>%
     step_pos_filter(all_predictors())
diff --git a/tests/testthat/test-stopwords.R b/tests/testthat/test-stopwords.R
index 575f918f..1171aa63 100644
--- a/tests/testthat/test-stopwords.R
+++ b/tests/testthat/test-stopwords.R
@@ -79,6 +79,8 @@ test_that("custom stopwords are supported", {
 # Infrastructure ---------------------------------------------------------------
 
 test_that("bake method errors when needed non-standard role columns are missing", {
+  skip_if_not_installed("stopwords")
+  
   tokenized_test_data <- recipe(~text, data = test_data) %>%
     step_tokenize(text) %>%
     prep() %>%
@@ -99,6 +101,8 @@ test_that("bake method errors when needed non-standard role columns are missing"
 })
 
 test_that("empty printing", {
+  skip_if_not_installed("stopwords")
+  
   rec <- recipe(mpg ~ ., mtcars)
   rec <- step_stopwords(rec)
   
@@ -110,6 +114,8 @@ test_that("empty printing", {
 })
 
 test_that("empty selection prep/bake is a no-op", {
+  skip_if_not_installed("stopwords")
+  
   rec1 <- recipe(mpg ~ ., mtcars)
   rec2 <- step_stopwords(rec1)
   
@@ -123,6 +129,8 @@ test_that("empty selection prep/bake is a no-op", {
 })
 
 test_that("empty selection tidy method works", {
+  skip_if_not_installed("stopwords")
+
   rec <- recipe(mpg ~ ., mtcars)
   rec <- step_stopwords(rec)
   
diff --git a/tests/testthat/test-texthash.R b/tests/testthat/test-texthash.R
index 2b57339d..e55ae152 100644
--- a/tests/testthat/test-texthash.R
+++ b/tests/testthat/test-texthash.R
@@ -111,6 +111,8 @@ test_that("tunable", {
 # Infrastructure ---------------------------------------------------------------
 
 test_that("bake method errors when needed non-standard role columns are missing", {
+  skip_if_not_installed("text2vec")
+  
   tokenized_test_data <- recipe(~text, data = test_data) %>%
     step_tokenize(text) %>%
     prep() %>%
@@ -205,6 +207,8 @@ test_that("keep_original_cols works", {
 })
 
 test_that("keep_original_cols - can prep recipes with it missing", {
+  skip_if_not_installed("text2vec")
+
   rec <- recipe(~text, data = test_data) %>%
     step_tokenize(text) %>%
     step_texthash(text)
diff --git a/tests/testthat/test-tokenize.R b/tests/testthat/test-tokenize.R
index 2d7699e2..359f8b33 100644
--- a/tests/testthat/test-tokenize.R
+++ b/tests/testthat/test-tokenize.R
@@ -102,6 +102,7 @@ test_that("tokenization errors with wrong engines", {
 
 test_that("tokenization includes lemma attribute when avaliable", {
   skip_on_cran()
+  skip_if_not_installed("spacyr")
   skip_if_no_python_or_no_spacy()
 
   expect_type(
diff --git a/tests/testthat/test-tokenize_bpe.R b/tests/testthat/test-tokenize_bpe.R
index 52afd52b..70d1a4ad 100644
--- a/tests/testthat/test-tokenize_bpe.R
+++ b/tests/testthat/test-tokenize_bpe.R
@@ -62,6 +62,8 @@ text2_out <- list(
 )
 
 test_that("output is list when length is 1 or 0", {
+  skip_if_not_installed("tokenizers.bpe")
+  
   data <- tibble(a = rep(c("a", ""), 20))
 
   data_rec <- recipe(~., data = data) %>%
@@ -72,6 +74,8 @@ test_that("output is list when length is 1 or 0", {
 })
 
 test_that("step_tokenize_bpe works", {
+  skip_if_not_installed("tokenizers.bpe")
+  
   res <- recipe(~text1, data = test_data) %>%
     step_tokenize_bpe(text1) %>%
     prep() %>%
@@ -84,6 +88,8 @@ test_that("step_tokenize_bpe works", {
 })
 
 test_that("step_tokenize_bpe works with tokenizers.bpe and multiple colunms", {
+  skip_if_not_installed("tokenizers.bpe")
+  
   res <- recipe(~., data = test_data) %>%
     step_tokenize_bpe(all_predictors()) %>%
     prep() %>%
@@ -101,6 +107,8 @@ test_that("step_tokenize_bpe works with tokenizers.bpe and multiple colunms", {
 })
 
 test_that("arguments are passed to tokenizers.bpe", {
+  skip_if_not_installed("tokenizers.bpe")
+  
   res <- recipe(~text1, data = test_data) %>%
     step_tokenize_bpe(text1, vocabulary_size = 60) %>%
     prep() %>%
@@ -123,6 +131,8 @@ test_that("arguments are passed to tokenizers.bpe", {
 })
 
 test_that("Errors if vocabulary size is set to low.", {
+  skip_if_not_installed("tokenizers.bpe")
+
   expect_snapshot(
     error = TRUE, 
     variant = r_version(),
@@ -151,6 +161,8 @@ test_that("tunable", {
 # Infrastructure ---------------------------------------------------------------
 
 test_that("bake method errors when needed non-standard role columns are missing", {
+  skip_if_not_installed("tokenizers.bpe")
+  
   rec <- recipe(~text1, data = test_data) %>%
     step_tokenize_bpe(text1) %>%
     update_role(text1, new_role = "potato") %>%
@@ -202,6 +214,8 @@ test_that("empty selection tidy method works", {
 })
 
 test_that("printing", {
+  skip_if_not_installed("tokenizers.bpe")
+  
   rec <- recipe(~., data = test_data) %>%
     step_tokenize_bpe(text1)
   
diff --git a/tests/testthat/test-tokenize_sentencepiece.R b/tests/testthat/test-tokenize_sentencepiece.R
index 1d6fd52e..2a1f8ad6 100644
--- a/tests/testthat/test-tokenize_sentencepiece.R
+++ b/tests/testthat/test-tokenize_sentencepiece.R
@@ -60,6 +60,8 @@ text2_out <- list(
 )
 
 test_that("step_tokenize_sentencepiece works", {
+  skip_if_not_installed("sentencepiece")
+  
   res <- recipe(~text1, data = test_data) %>%
     step_tokenize_sentencepiece(text1, vocabulary_size = 80) %>%
     prep() %>%
@@ -72,6 +74,8 @@ test_that("step_tokenize_sentencepiece works", {
 })
 
 test_that("step_tokenize_sentencepiece works with tokenizers.sentencepiece and multiple colunms", {
+  skip_if_not_installed("sentencepiece")
+  
   res <- recipe(~., data = test_data) %>%
     step_tokenize_sentencepiece(all_predictors(), vocabulary_size = 80) %>%
     prep() %>%
@@ -89,6 +93,8 @@ test_that("step_tokenize_sentencepiece works with tokenizers.sentencepiece and m
 })
 
 test_that("arguments are passed to tokenizers.sentencepiece", {
+  skip_if_not_installed("sentencepiece")
+  
   res <- recipe(~text1, data = test_data) %>%
     step_tokenize_sentencepiece(text1, vocabulary_size = 60) %>%
     prep() %>%
@@ -111,6 +117,8 @@ test_that("arguments are passed to tokenizers.sentencepiece", {
 })
 
 test_that("Errors if vocabulary size is set to low.", {
+  skip_if_not_installed("sentencepiece")
+  
   expect_snapshot(
     error = TRUE,
     recipe(~text1, data = test_data) %>%
@@ -122,6 +130,8 @@ test_that("Errors if vocabulary size is set to low.", {
 # Infrastructure ---------------------------------------------------------------
 
 test_that("bake method errors when needed non-standard role columns are missing", {
+  skip_if_not_installed("sentencepiece")
+  
   rec <- recipe(~text1, data = test_data) %>%
     step_tokenize_sentencepiece(text1, vocabulary_size = 100) %>%
     update_role(text1, new_role = "potato") %>%
@@ -173,6 +183,8 @@ test_that("empty selection tidy method works", {
 })
 
 test_that("printing", {
+  skip_if_not_installed("sentencepiece")
+
   rec <- recipe(~., data = test_data) %>%
     step_tokenize_sentencepiece(text1, vocabulary_size = 100)
   
diff --git a/tests/testthat/test-tokenize_wordpiece.R b/tests/testthat/test-tokenize_wordpiece.R
index aa351753..3989c38e 100644
--- a/tests/testthat/test-tokenize_wordpiece.R
+++ b/tests/testthat/test-tokenize_wordpiece.R
@@ -1,6 +1,3 @@
-library(textrecipes)
-library(recipes)
-
 text1 <- c(
   "I would not eat them here or there.",
   "I would not eat them anywhere.",
@@ -32,6 +29,8 @@ text2_out <- list(
 )
 
 test_that("step_tokenize_wordpiece works", {
+  skip_if_not_installed("wordpiece")
+
   res <- recipe(~text1, data = test_data) %>%
     step_tokenize_wordpiece(text1) %>%
     prep() %>%
@@ -44,6 +43,8 @@ test_that("step_tokenize_wordpiece works", {
 })
 
 test_that("step_tokenize_wordpiece works with tokenizers.wordpiece and multiple colunms", {
+  skip_if_not_installed("wordpiece")
+  
   res <- recipe(~., data = test_data) %>%
     step_tokenize_wordpiece(all_predictors()) %>%
     prep() %>%
@@ -63,6 +64,8 @@ test_that("step_tokenize_wordpiece works with tokenizers.wordpiece and multiple
 # Infrastructure ---------------------------------------------------------------
 
 test_that("bake method errors when needed non-standard role columns are missing", {
+  skip_if_not_installed("wordpiece")
+  
   rec <- recipe(~ text1 + text2, data = test_data) %>%
     step_tokenize_wordpiece(text1, text2) %>%
     update_role(text1, new_role = "potato") %>%
@@ -77,6 +80,8 @@ test_that("bake method errors when needed non-standard role columns are missing"
 })
 
 test_that("empty printing", {
+  skip_if_not_installed("wordpiece")
+
   rec <- recipe(mpg ~ ., mtcars)
   rec <- step_tokenize_wordpiece(rec)
   
@@ -88,6 +93,8 @@ test_that("empty printing", {
 })
 
 test_that("empty selection prep/bake is a no-op", {
+  skip_if_not_installed("wordpiece")
+
   rec1 <- recipe(mpg ~ ., mtcars)
   rec2 <- step_tokenize_wordpiece(rec1)
   
@@ -101,6 +108,8 @@ test_that("empty selection prep/bake is a no-op", {
 })
 
 test_that("empty selection tidy method works", {
+  skip_if_not_installed("wordpiece")
+
   rec <- recipe(mpg ~ ., mtcars)
   rec <- step_tokenize_wordpiece(rec)
   
@@ -114,6 +123,8 @@ test_that("empty selection tidy method works", {
 })
 
 test_that("printing", {
+  skip_if_not_installed("wordpiece")
+
   rec <- recipe(~., data = test_data) %>%
     step_tokenize_wordpiece(text1)
   
diff --git a/tests/testthat/test-tokenizer-spacyr.R b/tests/testthat/test-tokenizer-spacyr.R
index a0365e5e..9c04c4e8 100644
--- a/tests/testthat/test-tokenizer-spacyr.R
+++ b/tests/testthat/test-tokenizer-spacyr.R
@@ -10,6 +10,7 @@ text <- c(
 
 test_that("tokenizer works", {
   skip_on_cran()
+  skip_if_not_installed("spacyr")
   skip_if_no_python_or_no_spacy()
   out <- spacyr_tokenizer_words(text)
 
diff --git a/tests/testthat/test-tokenizer-tokenizersbpe.R b/tests/testthat/test-tokenizer-tokenizersbpe.R
index afe28153..2137fdc9 100644
--- a/tests/testthat/test-tokenizer-tokenizersbpe.R
+++ b/tests/testthat/test-tokenizer-tokenizersbpe.R
@@ -97,6 +97,8 @@ test_that("tokenizer works", {
 })
 
 test_that("step_tokenize works with tokenizers.bpe", {
+  skip_if_not_installed("tokenizers.bpe")
+  
   res <- recipe(~text1, data = test_data) %>%
     step_tokenize(text1, engine = "tokenizers.bpe") %>%
     prep() %>%
@@ -109,6 +111,8 @@ test_that("step_tokenize works with tokenizers.bpe", {
 })
 
 test_that("step_tokenize works with tokenizers.bpe and multiple colunms", {
+  skip_if_not_installed("tokenizers.bpe")
+  
   res <- recipe(~., data = test_data) %>%
     step_tokenize(all_predictors(), engine = "tokenizers.bpe") %>%
     prep() %>%
@@ -126,6 +130,8 @@ test_that("step_tokenize works with tokenizers.bpe and multiple colunms", {
 })
 
 test_that("arguments are passed to tokenizers.bpe", {
+  skip_if_not_installed("tokenizers.bpe")
+  
   res <- recipe(~text1, data = test_data) %>%
     step_tokenize(text1,
       engine = "tokenizers.bpe",

From f6e105535e035273fb661bb21951dad224e80cbf Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 8 Nov 2024 15:39:13 -0800
Subject: [PATCH 5/7] remove library calls in tests

---
 tests/embeddings/embeddings-references.R      |  3 ---
 tests/testthat/test-dummy_hash.R              |  4 ---
 tests/testthat/test-lemma.R                   |  4 ---
 tests/testthat/test-ngram.R                   |  3 ---
 tests/testthat/test-pos_filter.R              |  4 ---
 tests/testthat/test-s3-methods.R              |  3 ---
 tests/testthat/test-sequence_onehot.R         |  4 ---
 tests/testthat/test-stem.R                    |  3 ---
 tests/testthat/test-stopwords.R               |  3 ---
 tests/testthat/test-text_normalization.R      |  4 ---
 tests/testthat/test-textfeature.R             |  3 ---
 tests/testthat/test-texthash.R                |  3 ---
 tests/testthat/test-tf.R                      |  3 ---
 tests/testthat/test-tfidf.R                   |  3 ---
 tests/testthat/test-tokenfilter.R             |  3 ---
 tests/testthat/test-tokenize.R                |  3 ---
 tests/testthat/test-tokenize_bpe.R            |  3 ---
 tests/testthat/test-tokenize_sentencepiece.R  |  3 ---
 tests/testthat/test-tokenizer-spacyr.R        |  3 ---
 tests/testthat/test-tokenizer-tokenizersbpe.R |  3 ---
 tests/testthat/test-tokenlist.R               | 27 +++++++++----------
 tests/testthat/test-tokenmerge.R              |  3 ---
 tests/testthat/test-untokenize.R              |  3 ---
 tests/testthat/test-word_embeddings.R         |  2 --
 24 files changed, 12 insertions(+), 88 deletions(-)

diff --git a/tests/embeddings/embeddings-references.R b/tests/embeddings/embeddings-references.R
index 7709f178..b0e089cf 100644
--- a/tests/embeddings/embeddings-references.R
+++ b/tests/embeddings/embeddings-references.R
@@ -1,6 +1,3 @@
-library(textrecipes)
-library(testthat)
-
 test_data <- tibble(text = c(
   "I would not eat them here or there.",
   "I would not eat them anywhere.",
diff --git a/tests/testthat/test-dummy_hash.R b/tests/testthat/test-dummy_hash.R
index 87d18138..c4cbb775 100644
--- a/tests/testthat/test-dummy_hash.R
+++ b/tests/testthat/test-dummy_hash.R
@@ -1,7 +1,3 @@
-library(textrecipes)
-library(recipes)
-
-
 test_that("hashing gives double outputs", {
   skip_if_not_installed("text2vec")
   skip_if_not_installed("data.table")
diff --git a/tests/testthat/test-lemma.R b/tests/testthat/test-lemma.R
index dc598cd2..ddcd59cf 100644
--- a/tests/testthat/test-lemma.R
+++ b/tests/testthat/test-lemma.R
@@ -1,7 +1,3 @@
-library(textrecipes)
-library(recipes)
-library(tibble)
-
 text <- tibble(text = c(
   "I would not eat them here or there.",
   "I would not eat them anywhere.",
diff --git a/tests/testthat/test-ngram.R b/tests/testthat/test-ngram.R
index e396a35e..4fc4ac31 100644
--- a/tests/testthat/test-ngram.R
+++ b/tests/testthat/test-ngram.R
@@ -135,9 +135,6 @@ test_that("ngram returns length zero vectors when length(x) < n", {
   )
 })
 
-library(recipes)
-library(textrecipes)
-
 test_tibble <- tibble(text = c(
   "not eat them here or there.",
   "not eat them anywhere."
diff --git a/tests/testthat/test-pos_filter.R b/tests/testthat/test-pos_filter.R
index 22c44213..9d5e826f 100644
--- a/tests/testthat/test-pos_filter.R
+++ b/tests/testthat/test-pos_filter.R
@@ -1,7 +1,3 @@
-library(textrecipes)
-library(recipes)
-library(tibble)
-
 text <- tibble(text = c(
   "I would not eat them here or there.",
   "I would not eat them anywhere.",
diff --git a/tests/testthat/test-s3-methods.R b/tests/testthat/test-s3-methods.R
index f00c1f20..9c0ce5a3 100644
--- a/tests/testthat/test-s3-methods.R
+++ b/tests/testthat/test-s3-methods.R
@@ -1,6 +1,3 @@
-library(recipes)
-library(textrecipes)
-
 test_data <- tibble(text = c(
   "I would not eat them here or there.",
   "I would not eat them anywhere.",
diff --git a/tests/testthat/test-sequence_onehot.R b/tests/testthat/test-sequence_onehot.R
index de1d7236..3c55a1d2 100644
--- a/tests/testthat/test-sequence_onehot.R
+++ b/tests/testthat/test-sequence_onehot.R
@@ -1,7 +1,3 @@
-library(testthat)
-library(recipes)
-library(textrecipes)
-
 test_data <- tibble(text = c(
   "I would not eat them here or there.",
   "I would not eat them anywhere.",
diff --git a/tests/testthat/test-stem.R b/tests/testthat/test-stem.R
index 8eaca0ec..96069f7c 100644
--- a/tests/testthat/test-stem.R
+++ b/tests/testthat/test-stem.R
@@ -1,6 +1,3 @@
-library(recipes)
-library(textrecipes)
-
 test_data <- tibble(text = c(
   "I would not eat them here or there.",
   "I would not eat them anywhere.",
diff --git a/tests/testthat/test-stopwords.R b/tests/testthat/test-stopwords.R
index 1171aa63..bfdd3028 100644
--- a/tests/testthat/test-stopwords.R
+++ b/tests/testthat/test-stopwords.R
@@ -1,6 +1,3 @@
-library(recipes)
-library(textrecipes)
-
 test_data <- tibble(text = c(
   "I would not eat them here or there.",
   "I would not eat them anywhere.",
diff --git a/tests/testthat/test-text_normalization.R b/tests/testthat/test-text_normalization.R
index d95e2322..25774994 100644
--- a/tests/testthat/test-text_normalization.R
+++ b/tests/testthat/test-text_normalization.R
@@ -1,7 +1,3 @@
-library(testthat)
-library(recipes)
-library(tibble)
-
 ex_dat <- tibble(text = c("sch\U00f6n", "scho\U0308n"))
 
 test_that("simple sqrt trans", {
diff --git a/tests/testthat/test-textfeature.R b/tests/testthat/test-textfeature.R
index cfd4990f..0c755464 100644
--- a/tests/testthat/test-textfeature.R
+++ b/tests/testthat/test-textfeature.R
@@ -1,6 +1,3 @@
-library(recipes)
-library(textrecipes)
-
 test_data <- tibble(text = c(
   "I would not eat them here or there.",
   "I would not eat them anywhere.",
diff --git a/tests/testthat/test-texthash.R b/tests/testthat/test-texthash.R
index e55ae152..e36d24f4 100644
--- a/tests/testthat/test-texthash.R
+++ b/tests/testthat/test-texthash.R
@@ -1,6 +1,3 @@
-library(textrecipes)
-library(recipes)
-
 test_data <- tibble(text = c(
   "I would not eat them here or there.",
   "I would not eat them anywhere.",
diff --git a/tests/testthat/test-tf.R b/tests/testthat/test-tf.R
index 69ab9b9f..0affe2e1 100644
--- a/tests/testthat/test-tf.R
+++ b/tests/testthat/test-tf.R
@@ -1,6 +1,3 @@
-library(recipes)
-library(textrecipes)
-
 test_data <- tibble(text = c(
   "I would not eat them here or there.",
   "I would not eat them anywhere.",
diff --git a/tests/testthat/test-tfidf.R b/tests/testthat/test-tfidf.R
index e0b2e37a..c9a846d1 100644
--- a/tests/testthat/test-tfidf.R
+++ b/tests/testthat/test-tfidf.R
@@ -1,6 +1,3 @@
-library(recipes)
-library(textrecipes)
-
 test_data <- tibble(text = c(
   "I would not eat them here or there.",
   "I would not eat them anywhere.",
diff --git a/tests/testthat/test-tokenfilter.R b/tests/testthat/test-tokenfilter.R
index 39295d99..e2bb0af9 100644
--- a/tests/testthat/test-tokenfilter.R
+++ b/tests/testthat/test-tokenfilter.R
@@ -1,6 +1,3 @@
-library(textrecipes)
-library(recipes)
-
 test_data <- tibble(text = c(
   "I would not eat them here or there.",
   "I would not eat them anywhere.",
diff --git a/tests/testthat/test-tokenize.R b/tests/testthat/test-tokenize.R
index 359f8b33..c6691b86 100644
--- a/tests/testthat/test-tokenize.R
+++ b/tests/testthat/test-tokenize.R
@@ -1,6 +1,3 @@
-library(textrecipes)
-library(recipes)
-
 test_data <- tibble(text = c(
   "I would not eat them here or there.",
   "I would not eat them anywhere.",
diff --git a/tests/testthat/test-tokenize_bpe.R b/tests/testthat/test-tokenize_bpe.R
index 70d1a4ad..8ab94853 100644
--- a/tests/testthat/test-tokenize_bpe.R
+++ b/tests/testthat/test-tokenize_bpe.R
@@ -1,6 +1,3 @@
-library(textrecipes)
-library(recipes)
-
 r_version <- function() paste0("R", getRversion()[, 1:2])
 
 text1 <- c(
diff --git a/tests/testthat/test-tokenize_sentencepiece.R b/tests/testthat/test-tokenize_sentencepiece.R
index 2a1f8ad6..5a5da763 100644
--- a/tests/testthat/test-tokenize_sentencepiece.R
+++ b/tests/testthat/test-tokenize_sentencepiece.R
@@ -1,6 +1,3 @@
-library(textrecipes)
-library(recipes)
-
 text1 <- c(
   "I would not eat them here or there.",
   "I would not eat them anywhere.",
diff --git a/tests/testthat/test-tokenizer-spacyr.R b/tests/testthat/test-tokenizer-spacyr.R
index 9c04c4e8..0b281c3d 100644
--- a/tests/testthat/test-tokenizer-spacyr.R
+++ b/tests/testthat/test-tokenizer-spacyr.R
@@ -1,6 +1,3 @@
-library(textrecipes)
-library(recipes)
-
 text <- c(
   "I would not eat them here or there.",
   "I would not eat them anywhere.",
diff --git a/tests/testthat/test-tokenizer-tokenizersbpe.R b/tests/testthat/test-tokenizer-tokenizersbpe.R
index 2137fdc9..0a56b73e 100644
--- a/tests/testthat/test-tokenizer-tokenizersbpe.R
+++ b/tests/testthat/test-tokenizer-tokenizersbpe.R
@@ -1,6 +1,3 @@
-library(textrecipes)
-library(recipes)
-
 r_version <- function() paste0("R", getRversion()[, 1:2])
 
 text1 <- c(
diff --git a/tests/testthat/test-tokenlist.R b/tests/testthat/test-tokenlist.R
index 94058803..d4c3e193 100644
--- a/tests/testthat/test-tokenlist.R
+++ b/tests/testthat/test-tokenlist.R
@@ -1,6 +1,3 @@
-library(testthat)
-library(vctrs)
-
 ## Creation -------------------------------------------------------------------
 
 test_that("tokenlist creation works", {
@@ -10,12 +7,12 @@ test_that("tokenlist creation works", {
   expect_s3_class(tkn_list, "textrecipes_tokenlist")
 
   expect_equal(
-    fields(tkn_list),
+     vctrs::fields(tkn_list),
     "tokens"
   )
 
   expect_equal(
-    field(tkn_list, "tokens"),
+    vctrs::field(tkn_list, "tokens"),
     list(letters, letters)
   )
 
@@ -30,17 +27,17 @@ test_that("tokenlist creation works", {
   expect_s3_class(tkn_list, "textrecipes_tokenlist")
 
   expect_equal(
-    fields(tkn_list),
+    vctrs::fields(tkn_list),
     c("tokens", "lemma")
   )
 
   expect_equal(
-    field(tkn_list, "tokens"),
+    vctrs::field(tkn_list, "tokens"),
     list(letters, letters)
   )
 
   expect_equal(
-    field(tkn_list, "lemma"),
+    vctrs::field(tkn_list, "lemma"),
     list(LETTERS, LETTERS)
   )
 
@@ -55,17 +52,17 @@ test_that("tokenlist creation works", {
   expect_s3_class(tkn_list, "textrecipes_tokenlist")
 
   expect_equal(
-    fields(tkn_list),
+    vctrs::fields(tkn_list),
     c("tokens", "pos")
   )
 
   expect_equal(
-    field(tkn_list, "tokens"),
+    vctrs::field(tkn_list, "tokens"),
     list(letters, letters)
   )
 
   expect_equal(
-    field(tkn_list, "pos"),
+    vctrs::field(tkn_list, "pos"),
     list(LETTERS, LETTERS)
   )
 
@@ -83,22 +80,22 @@ test_that("tokenlist creation works", {
   expect_s3_class(tkn_list, "textrecipes_tokenlist")
 
   expect_equal(
-    fields(tkn_list),
+    vctrs::fields(tkn_list),
     c("tokens", "lemma", "pos")
   )
 
   expect_equal(
-    field(tkn_list, "tokens"),
+    vctrs::field(tkn_list, "tokens"),
     list(letters, letters)
   )
 
   expect_equal(
-    field(tkn_list, "lemma"),
+    vctrs::field(tkn_list, "lemma"),
     list(letters, LETTERS)
   )
 
   expect_equal(
-    field(tkn_list, "pos"),
+    vctrs::field(tkn_list, "pos"),
     list(LETTERS, LETTERS)
   )
 
diff --git a/tests/testthat/test-tokenmerge.R b/tests/testthat/test-tokenmerge.R
index 30596df9..f2509115 100644
--- a/tests/testthat/test-tokenmerge.R
+++ b/tests/testthat/test-tokenmerge.R
@@ -1,6 +1,3 @@
-library(textrecipes)
-library(recipes)
-
 test_data <- tibble(
   text1 = c(
     "I would not eat them here or there.",
diff --git a/tests/testthat/test-untokenize.R b/tests/testthat/test-untokenize.R
index 4a71461f..0c993900 100644
--- a/tests/testthat/test-untokenize.R
+++ b/tests/testthat/test-untokenize.R
@@ -1,6 +1,3 @@
-library(recipes)
-library(textrecipes)
-
 test_data <- tibble(text = c(
   "I would not eat them here or there.",
   "I would not eat them anywhere.",
diff --git a/tests/testthat/test-word_embeddings.R b/tests/testthat/test-word_embeddings.R
index 06330a86..64509d5f 100644
--- a/tests/testthat/test-word_embeddings.R
+++ b/tests/testthat/test-word_embeddings.R
@@ -1,5 +1,3 @@
-library(recipes)
-
 embeddings <- readRDS(test_path("emb-data", "embeddings.rds"))
 
 sentence_embeddings_long <- readRDS(test_path("emb-data", "long.rds"))

From 02bb9b94e375f661b738a82122d5341124893765 Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 8 Nov 2024 15:44:14 -0800
Subject: [PATCH 6/7] fix more issues

---
 R/clean_levels.R                      |  2 +-
 tests/testthat/_snaps/clean_levels.md | 10 +++++-----
 tests/testthat/_snaps/clean_names.md  |  8 ++++----
 tests/testthat/test-clean_levels.R    |  3 ++-
 tests/testthat/test-clean_names.R     |  6 +++---
 5 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/R/clean_levels.R b/R/clean_levels.R
index c6f8683e..be6afaee 100644
--- a/R/clean_levels.R
+++ b/R/clean_levels.R
@@ -139,7 +139,7 @@ bake.step_clean_levels <- function(object, new_data, ...) {
       new_data[[col_name]] <- janitor::make_clean_names(new_data[[col_name]])
 
     }
-  }
+}
 
   new_data
 }
diff --git a/tests/testthat/_snaps/clean_levels.md b/tests/testthat/_snaps/clean_levels.md
index fe29e220..a3077070 100644
--- a/tests/testthat/_snaps/clean_levels.md
+++ b/tests/testthat/_snaps/clean_levels.md
@@ -51,10 +51,10 @@
       
       -- Inputs 
       Number of variables by role
-      predictor: 3
+      predictor: 5
       
       -- Operations 
-      * Cleaning factor levels for: name
+      * Cleaning factor levels for: Species
 
 ---
 
@@ -66,11 +66,11 @@
       
       -- Inputs 
       Number of variables by role
-      predictor: 3
+      predictor: 5
       
       -- Training information 
-      Training data contained 15 data points and no incomplete rows.
+      Training data contained 150 data points and no incomplete rows.
       
       -- Operations 
-      * Cleaning factor levels for: name | Trained
+      * Cleaning factor levels for: Species | Trained
 
diff --git a/tests/testthat/_snaps/clean_names.md b/tests/testthat/_snaps/clean_names.md
index e6426902..3db2bb79 100644
--- a/tests/testthat/_snaps/clean_names.md
+++ b/tests/testthat/_snaps/clean_names.md
@@ -51,7 +51,7 @@
       
       -- Inputs 
       Number of variables by role
-      predictor: 6
+      predictor: 11
       
       -- Operations 
       * Cleaning variable names for: all_predictors()
@@ -66,11 +66,11 @@
       
       -- Inputs 
       Number of variables by role
-      predictor: 6
+      predictor: 11
       
       -- Training information 
-      Training data contained 20 data points and 4 incomplete rows.
+      Training data contained 32 data points and no incomplete rows.
       
       -- Operations 
-      * Cleaning variable names for: Ozone, Solar.R, Wind, Temp, ... | Trained
+      * Cleaning variable names for: mpg, cyl, disp, hp, drat, wt, ... | Trained
 
diff --git a/tests/testthat/test-clean_levels.R b/tests/testthat/test-clean_levels.R
index b9fd822c..8016fa63 100644
--- a/tests/testthat/test-clean_levels.R
+++ b/tests/testthat/test-clean_levels.R
@@ -129,7 +129,8 @@ test_that("empty selection tidy method works", {
 
 test_that("printing", {
   skip_if_not_installed("janitor")
-  rec <- rec %>% step_clean_levels(name)
+  rec <- recipe(~., data = iris) %>% 
+    step_clean_levels(Species)
   
   expect_snapshot(print(rec))
   expect_snapshot(prep(rec))
diff --git a/tests/testthat/test-clean_names.R b/tests/testthat/test-clean_names.R
index 31d70a69..475e0d5c 100644
--- a/tests/testthat/test-clean_names.R
+++ b/tests/testthat/test-clean_names.R
@@ -2,8 +2,6 @@ test_that("can clean names", {
   skip_if_not_installed("janitor")
   skip_if_not_installed("modeldata")
 
-  data("airquality", package = "modeldata")
-
   air_tr <- airquality[1:20, ]
   air_te <- airquality[101:110, ]
 
@@ -88,7 +86,9 @@ test_that("empty selection tidy method works", {
 
 test_that("printing", {
   skip_if_not_installed("janitor")
-  rec <- rec %>% step_clean_names(all_predictors())
+  
+  rec <- recipe(~., data = mtcars) %>% 
+    step_clean_names(all_predictors())
   
   expect_snapshot(print(rec))
   expect_snapshot(prep(rec))

From 827d61f538918286df889ec5ffa08ad176e938e2 Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Sat, 9 Nov 2024 10:21:47 -0800
Subject: [PATCH 7/7] update examplesIf

---
 R/clean_levels.R                   | 2 +-
 R/dummy_hash.R                     | 2 +-
 R/lda.R                            | 2 +-
 R/ngram.R                          | 2 +-
 R/sequence_onehot.R                | 2 +-
 R/show_tokens.R                    | 2 +-
 R/stem.R                           | 2 +-
 R/stopwords.R                      | 2 +-
 R/textfeature.R                    | 2 +-
 R/texthash.R                       | 2 +-
 R/tf.R                             | 2 +-
 R/tfidf.R                          | 2 +-
 R/tokenfilter.R                    | 2 +-
 R/tokenize.R                       | 2 +-
 R/tokenize_bpe.R                   | 2 +-
 R/tokenize_sentencepiece.R         | 2 +-
 R/tokenize_wordpiece.R             | 2 +-
 R/tokenlist.R                      | 2 +-
 R/tokenmerge.R                     | 2 +-
 R/untokenize.R                     | 2 +-
 man/show_tokens.Rd                 | 2 ++
 man/step_clean_levels.Rd           | 2 +-
 man/step_dummy_hash.Rd             | 2 +-
 man/step_lda.Rd                    | 2 +-
 man/step_ngram.Rd                  | 2 ++
 man/step_sequence_onehot.Rd        | 2 ++
 man/step_stem.Rd                   | 2 ++
 man/step_stopwords.Rd              | 2 +-
 man/step_textfeature.Rd            | 2 ++
 man/step_texthash.Rd               | 2 +-
 man/step_tf.Rd                     | 3 ++-
 man/step_tfidf.Rd                  | 3 ++-
 man/step_tokenfilter.Rd            | 2 ++
 man/step_tokenize.Rd               | 2 ++
 man/step_tokenize_bpe.Rd           | 2 +-
 man/step_tokenize_sentencepiece.Rd | 2 +-
 man/step_tokenize_wordpiece.Rd     | 2 +-
 man/step_tokenmerge.Rd             | 2 ++
 man/step_untokenize.Rd             | 2 ++
 man/tokenlist.Rd                   | 2 ++
 40 files changed, 52 insertions(+), 30 deletions(-)

diff --git a/R/clean_levels.R b/R/clean_levels.R
index be6afaee..8ab1d8a7 100644
--- a/R/clean_levels.R
+++ b/R/clean_levels.R
@@ -41,7 +41,7 @@
 #'   [recipes::step_unknown()], [recipes::step_novel()], [recipes::step_other()]
 #' @family Steps for Text Cleaning
 #'
-#' @examplesIf rlang::is_installed("janitor")
+#' @examplesIf rlang::is_installed(c("modeldata", "janitor"))
 #' library(recipes)
 #' library(modeldata)
 #' data(Smithsonian)
diff --git a/R/dummy_hash.R b/R/dummy_hash.R
index 3cfc8aad..bedd246b 100644
--- a/R/dummy_hash.R
+++ b/R/dummy_hash.R
@@ -72,7 +72,7 @@
 #' @seealso [recipes::step_dummy()]
 #' @family Steps for Numeric Variables From Characters
 #'
-#' @examplesIf all(c("text2vec", "data.table") %in% rownames(installed.packages()))
+#' @examplesIf all(c("modeldata", "text2vec", "data.table") %in% rownames(installed.packages()))
 #' \dontshow{library(data.table)}
 #' \dontshow{data.table::setDTthreads(2)}
 #' \dontshow{Sys.setenv("OMP_NUM_THREADS" = 1)}
diff --git a/R/lda.R b/R/lda.R
index 2bd2c334..c767f75b 100644
--- a/R/lda.R
+++ b/R/lda.R
@@ -38,7 +38,7 @@
 #'
 #' @family Steps for Numeric Variables From Tokens
 #'
-#' @examplesIf all(c("text2vec", "data.table") %in% rownames(installed.packages()))
+#' @examplesIf all(c("modeldata", "text2vec", "data.table") %in% rownames(installed.packages()))
 #' \dontshow{library(data.table)}
 #' \dontshow{data.table::setDTthreads(2)}
 #' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)}
diff --git a/R/ngram.R b/R/ngram.R
index 6fa8d1c0..eabfef85 100644
--- a/R/ngram.R
+++ b/R/ngram.R
@@ -49,7 +49,7 @@
 #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()]
 #' @family Steps for Token Modification
 #'
-#' @examples
+#' @examplesIf rlang::is_installed("modeldata")
 #' library(recipes)
 #' library(modeldata)
 #' data(tate_text)
diff --git a/R/sequence_onehot.R b/R/sequence_onehot.R
index 223f852c..b38f0af8 100644
--- a/R/sequence_onehot.R
+++ b/R/sequence_onehot.R
@@ -52,7 +52,7 @@
 #'
 #' @family Steps for Numeric Variables From Characters
 #'
-#' @examples
+#' @examplesIf rlang::is_installed("modeldata")
 #' library(recipes)
 #' library(modeldata)
 #' data(tate_text)
diff --git a/R/show_tokens.R b/R/show_tokens.R
index bcbfb6d8..49e302d5 100644
--- a/R/show_tokens.R
+++ b/R/show_tokens.R
@@ -12,7 +12,7 @@
 #' @return A list of character vectors
 #' @export
 #'
-#' @examples
+#' @examplesIf rlang::is_installed("modeldata")
 #' text_tibble <- tibble(text = c("This is words", "They are nice!"))
 #'
 #' recipe(~text, data = text_tibble) %>%
diff --git a/R/stem.R b/R/stem.R
index c49b4bde..a3c285a6 100644
--- a/R/stem.R
+++ b/R/stem.R
@@ -42,7 +42,7 @@
 #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()]
 #' @family Steps for Token Modification
 #'
-#' @examples
+#' @examplesIf rlang::is_installed("modeldata")
 #' library(recipes)
 #' library(modeldata)
 #' data(tate_text)
diff --git a/R/stopwords.R b/R/stopwords.R
index a00b4eb8..ae8ca7cb 100644
--- a/R/stopwords.R
+++ b/R/stopwords.R
@@ -49,7 +49,7 @@
 #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()]
 #' @family Steps for Token Modification
 #'
-#' @examplesIf rlang::is_installed("stopwords")
+#' @examplesIf rlang::is_installed(c("modeldata", "stopwords"))
 #' library(recipes)
 #' library(modeldata)
 #' data(tate_text)
diff --git a/R/textfeature.R b/R/textfeature.R
index 57e2b313..02857b95 100644
--- a/R/textfeature.R
+++ b/R/textfeature.R
@@ -42,7 +42,7 @@
 #'
 #' @family Steps for Numeric Variables From Characters
 #'
-#' @examples
+#' @examplesIf rlang::is_installed("modeldata")
 #' library(recipes)
 #' library(modeldata)
 #' data(tate_text)
diff --git a/R/texthash.R b/R/texthash.R
index 48a549fa..8178dbcf 100644
--- a/R/texthash.R
+++ b/R/texthash.R
@@ -62,7 +62,7 @@
 #'   [step_text_normalization()] to perform text normalization.
 #' @family Steps for Numeric Variables From Tokens
 #'
-#' @examplesIf all(c("text2vec", "data.table") %in% rownames(installed.packages()))
+#' @examplesIf all(c("modeldata", "text2vec", "data.table") %in% rownames(installed.packages()))
 #' \dontshow{library(data.table)}
 #' \dontshow{data.table::setDTthreads(2)}
 #' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)}
diff --git a/R/tf.R b/R/tf.R
index f1502632..f8098bb2 100644
--- a/R/tf.R
+++ b/R/tf.R
@@ -74,7 +74,7 @@
 #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()]
 #' @family Steps for Numeric Variables From Tokens
 #'
-#' @examples
+#' @examplesIf rlang::is_installed("modeldata")
 #' \donttest{
 #' library(recipes)
 #' library(modeldata)
diff --git a/R/tfidf.R b/R/tfidf.R
index 73ad9a72..437a4606 100644
--- a/R/tfidf.R
+++ b/R/tfidf.R
@@ -68,7 +68,7 @@
 #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()]
 #' @family Steps for Numeric Variables From Tokens
 #'
-#' @examples
+#' @examplesIf rlang::is_installed("modeldata")
 #' \donttest{
 #' library(recipes)
 #' library(modeldata)
diff --git a/R/tokenfilter.R b/R/tokenfilter.R
index 669a325e..b131aa5f 100644
--- a/R/tokenfilter.R
+++ b/R/tokenfilter.R
@@ -64,7 +64,7 @@
 #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()]
 #' @family Steps for Token Modification
 #'
-#' @examples
+#' @examplesIf rlang::is_installed("modeldata")
 #' library(recipes)
 #' library(modeldata)
 #' data(tate_text)
diff --git a/R/tokenize.R b/R/tokenize.R
index c2e60611..a39fe757 100644
--- a/R/tokenize.R
+++ b/R/tokenize.R
@@ -202,7 +202,7 @@
 #' @seealso [step_untokenize()] to untokenize.
 #' @family Steps for Tokenization
 #'
-#' @examples
+#' @examplesIf rlang::is_installed("modeldata")
 #' library(recipes)
 #' library(modeldata)
 #' data(tate_text)
diff --git a/R/tokenize_bpe.R b/R/tokenize_bpe.R
index 57a2f351..f8e79752 100644
--- a/R/tokenize_bpe.R
+++ b/R/tokenize_bpe.R
@@ -42,7 +42,7 @@
 #' @seealso [step_untokenize()] to untokenize.
 #' @family Steps for Tokenization
 #'
-#' @examplesIf rlang::is_installed("tokenizers.bpe")
+#' @examplesIf rlang::is_installed(c("modeldata", "tokenizers.bpe"))
 #' library(recipes)
 #' library(modeldata)
 #' data(tate_text)
diff --git a/R/tokenize_sentencepiece.R b/R/tokenize_sentencepiece.R
index 88590c74..783dba0c 100644
--- a/R/tokenize_sentencepiece.R
+++ b/R/tokenize_sentencepiece.R
@@ -41,7 +41,7 @@
 #' @seealso [step_untokenize()] to untokenize.
 #' @family Steps for Tokenization
 #'
-#' @examplesIf rlang::is_installed("sentencepiece")
+#' @examplesIf rlang::is_installed(c("modeldata", "sentencepiece"))
 #' library(recipes)
 #' library(modeldata)
 #' data(tate_text)
diff --git a/R/tokenize_wordpiece.R b/R/tokenize_wordpiece.R
index 48d8996f..13f57296 100644
--- a/R/tokenize_wordpiece.R
+++ b/R/tokenize_wordpiece.R
@@ -35,7 +35,7 @@
 #' @seealso [step_untokenize()] to untokenize.
 #' @family Steps for Tokenization
 #'
-#' @examplesIf rlang::is_installed("wordpiece")
+#' @examplesIf rlang::is_installed(c("modeldata", "wordpiece"))
 #' library(recipes)
 #' library(modeldata)
 #' data(tate_text)
diff --git a/R/tokenlist.R b/R/tokenlist.R
index 81d262ae..bb267160 100644
--- a/R/tokenlist.R
+++ b/R/tokenlist.R
@@ -9,7 +9,7 @@
 #'
 #' @return a [tokenlist] object.
 #'
-#' @examples
+#' @examplesIf rlang::is_installed("modeldata")
 #' abc <- list(letters, LETTERS)
 #' tokenlist(abc)
 #'
diff --git a/R/tokenmerge.R b/R/tokenmerge.R
index 2f4738ad..81292f75 100644
--- a/R/tokenmerge.R
+++ b/R/tokenmerge.R
@@ -33,7 +33,7 @@
 #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()]
 #' @family Steps for Token Modification
 #'
-#' @examples
+#' @examplesIf rlang::is_installed("modeldata")
 #' library(recipes)
 #' library(modeldata)
 #' data(tate_text)
diff --git a/R/untokenize.R b/R/untokenize.R
index 024c440d..074783b4 100644
--- a/R/untokenize.R
+++ b/R/untokenize.R
@@ -37,7 +37,7 @@
 #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()]
 #' @family Steps for Un-Tokenization
 #'
-#' @examples
+#' @examplesIf rlang::is_installed("modeldata")
 #' library(recipes)
 #' library(modeldata)
 #' data(tate_text)
diff --git a/man/show_tokens.Rd b/man/show_tokens.Rd
index 4a6d3177..1bb178d1 100644
--- a/man/show_tokens.Rd
+++ b/man/show_tokens.Rd
@@ -23,6 +23,7 @@ used in final recipe steps. Note that this function will both prep() and
 bake() the recipe it is used on.
 }
 \examples{
+\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 text_tibble <- tibble(text = c("This is words", "They are nice!"))
 
 recipe(~text, data = text_tibble) \%>\%
@@ -35,4 +36,5 @@ data(tate_text)
 recipe(~., data = tate_text) \%>\%
   step_tokenize(medium) \%>\%
   show_tokens(medium)
+\dontshow{\}) # examplesIf}
 }
diff --git a/man/step_clean_levels.Rd b/man/step_clean_levels.Rd
index 04ec607c..eff12e58 100644
--- a/man/step_clean_levels.Rd
+++ b/man/step_clean_levels.Rd
@@ -74,7 +74,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
-\dontshow{if (rlang::is_installed("janitor")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (rlang::is_installed(c("modeldata", "janitor"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 library(recipes)
 library(modeldata)
 data(Smithsonian)
diff --git a/man/step_dummy_hash.Rd b/man/step_dummy_hash.Rd
index 09d42a53..8e3c3345 100644
--- a/man/step_dummy_hash.Rd
+++ b/man/step_dummy_hash.Rd
@@ -122,7 +122,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
-\dontshow{if (all(c("text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (all(c("modeldata", "text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 \dontshow{library(data.table)}
 \dontshow{data.table::setDTthreads(2)}
 \dontshow{Sys.setenv("OMP_NUM_THREADS" = 1)}
diff --git a/man/step_lda.Rd b/man/step_lda.Rd
index 6447c422..a4a710e4 100644
--- a/man/step_lda.Rd
+++ b/man/step_lda.Rd
@@ -87,7 +87,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
-\dontshow{if (all(c("text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (all(c("modeldata", "text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 \dontshow{library(data.table)}
 \dontshow{data.table::setDTthreads(2)}
 \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)}
diff --git a/man/step_ngram.Rd b/man/step_ngram.Rd
index 08c443af..8d20d9d2 100644
--- a/man/step_ngram.Rd
+++ b/man/step_ngram.Rd
@@ -93,6 +93,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
+\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 library(recipes)
 library(modeldata)
 data(tate_text)
@@ -113,6 +114,7 @@ bake(tate_obj, new_data = NULL) \%>\%
 
 tidy(tate_rec, number = 2)
 tidy(tate_obj, number = 2)
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}}
diff --git a/man/step_sequence_onehot.Rd b/man/step_sequence_onehot.Rd
index 0259b0cf..dcbfb7d4 100644
--- a/man/step_sequence_onehot.Rd
+++ b/man/step_sequence_onehot.Rd
@@ -104,6 +104,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
+\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 library(recipes)
 library(modeldata)
 data(tate_text)
@@ -120,6 +121,7 @@ bake(tate_obj, new_data = NULL)
 
 tidy(tate_rec, number = 3)
 tidy(tate_obj, number = 3)
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 Other Steps for Numeric Variables From Characters: 
diff --git a/man/step_stem.Rd b/man/step_stem.Rd
index 8b753583..4a22b496 100644
--- a/man/step_stem.Rd
+++ b/man/step_stem.Rd
@@ -83,6 +83,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
+\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 library(recipes)
 library(modeldata)
 data(tate_text)
@@ -121,6 +122,7 @@ bake(tate_obj, new_data = NULL, medium) \%>\%
 bake(tate_obj, new_data = NULL) \%>\%
   slice(2) \%>\%
   pull(medium)
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}}
diff --git a/man/step_stopwords.Rd b/man/step_stopwords.Rd
index afc6d9b6..7ded7972 100644
--- a/man/step_stopwords.Rd
+++ b/man/step_stopwords.Rd
@@ -94,7 +94,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
-\dontshow{if (rlang::is_installed("stopwords")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (rlang::is_installed(c("modeldata", "stopwords"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 library(recipes)
 library(modeldata)
 data(tate_text)
diff --git a/man/step_textfeature.Rd b/man/step_textfeature.Rd
index a216f54d..d7997c42 100644
--- a/man/step_textfeature.Rd
+++ b/man/step_textfeature.Rd
@@ -88,6 +88,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
+\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 library(recipes)
 library(modeldata)
 data(tate_text)
@@ -116,6 +117,7 @@ recipe(~., data = tate_text) \%>\%
   ) \%>\%
   prep() \%>\%
   bake(new_data = NULL)
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 Other Steps for Numeric Variables From Characters: 
diff --git a/man/step_texthash.Rd b/man/step_texthash.Rd
index a9a7ea8c..ee9189ab 100644
--- a/man/step_texthash.Rd
+++ b/man/step_texthash.Rd
@@ -114,7 +114,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
-\dontshow{if (all(c("text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (all(c("modeldata", "text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 \dontshow{library(data.table)}
 \dontshow{data.table::setDTthreads(2)}
 \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)}
diff --git a/man/step_tf.Rd b/man/step_tf.Rd
index c0a21373..07e4cfa4 100644
--- a/man/step_tf.Rd
+++ b/man/step_tf.Rd
@@ -132,6 +132,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
+\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 \donttest{
 library(recipes)
 library(modeldata)
@@ -149,7 +150,7 @@ bake(tate_obj, tate_text)
 tidy(tate_rec, number = 2)
 tidy(tate_obj, number = 2)
 }
-
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}}
diff --git a/man/step_tfidf.Rd b/man/step_tfidf.Rd
index fe8208cf..297993c1 100644
--- a/man/step_tfidf.Rd
+++ b/man/step_tfidf.Rd
@@ -128,6 +128,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
+\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 \donttest{
 library(recipes)
 library(modeldata)
@@ -145,7 +146,7 @@ bake(tate_obj, tate_text)
 tidy(tate_rec, number = 2)
 tidy(tate_obj, number = 2)
 }
-
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}}
diff --git a/man/step_tokenfilter.Rd b/man/step_tokenfilter.Rd
index 48d82350..42f0b858 100644
--- a/man/step_tokenfilter.Rd
+++ b/man/step_tokenfilter.Rd
@@ -116,6 +116,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
+\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 library(recipes)
 library(modeldata)
 data(tate_text)
@@ -136,6 +137,7 @@ bake(tate_obj, new_data = NULL) \%>\%
 
 tidy(tate_rec, number = 2)
 tidy(tate_obj, number = 2)
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}}
diff --git a/man/step_tokenize.Rd b/man/step_tokenize.Rd
index 0da6c165..0515caa5 100644
--- a/man/step_tokenize.Rd
+++ b/man/step_tokenize.Rd
@@ -269,6 +269,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
+\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 library(recipes)
 library(modeldata)
 data(tate_text)
@@ -296,6 +297,7 @@ tate_obj_chars <- recipe(~., data = tate_text) \%>\%
 bake(tate_obj, new_data = NULL) \%>\%
   slice(2) \%>\%
   pull(medium)
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \code{\link[=step_untokenize]{step_untokenize()}} to untokenize.
diff --git a/man/step_tokenize_bpe.Rd b/man/step_tokenize_bpe.Rd
index cd6d1d07..cedfe951 100644
--- a/man/step_tokenize_bpe.Rd
+++ b/man/step_tokenize_bpe.Rd
@@ -85,7 +85,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
-\dontshow{if (rlang::is_installed("tokenizers.bpe")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (rlang::is_installed(c("modeldata", "tokenizers.bpe"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 library(recipes)
 library(modeldata)
 data(tate_text)
diff --git a/man/step_tokenize_sentencepiece.Rd b/man/step_tokenize_sentencepiece.Rd
index ead33b8d..5c48788b 100644
--- a/man/step_tokenize_sentencepiece.Rd
+++ b/man/step_tokenize_sentencepiece.Rd
@@ -84,7 +84,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
-\dontshow{if (rlang::is_installed("sentencepiece")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (rlang::is_installed(c("modeldata", "sentencepiece"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 library(recipes)
 library(modeldata)
 data(tate_text)
diff --git a/man/step_tokenize_wordpiece.Rd b/man/step_tokenize_wordpiece.Rd
index 72cefe6a..72295135 100644
--- a/man/step_tokenize_wordpiece.Rd
+++ b/man/step_tokenize_wordpiece.Rd
@@ -77,7 +77,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
-\dontshow{if (rlang::is_installed("wordpiece")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (rlang::is_installed(c("modeldata", "wordpiece"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 library(recipes)
 library(modeldata)
 data(tate_text)
diff --git a/man/step_tokenmerge.Rd b/man/step_tokenmerge.Rd
index 383dee36..f39cb657 100644
--- a/man/step_tokenmerge.Rd
+++ b/man/step_tokenmerge.Rd
@@ -76,6 +76,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
+\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 library(recipes)
 library(modeldata)
 data(tate_text)
@@ -91,6 +92,7 @@ bake(tate_obj, new_data = NULL)
 
 tidy(tate_rec, number = 2)
 tidy(tate_obj, number = 2)
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}}
diff --git a/man/step_untokenize.Rd b/man/step_untokenize.Rd
index 070f3ca4..8d03841c 100644
--- a/man/step_untokenize.Rd
+++ b/man/step_untokenize.Rd
@@ -76,6 +76,7 @@ The underlying operation does not allow for case weights.
 }
 
 \examples{
+\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 library(recipes)
 library(modeldata)
 data(tate_text)
@@ -96,6 +97,7 @@ bake(tate_obj, new_data = NULL) \%>\%
 
 tidy(tate_rec, number = 2)
 tidy(tate_obj, number = 2)
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}}
diff --git a/man/tokenlist.Rd b/man/tokenlist.Rd
index 102bb0bb..af8429ae 100644
--- a/man/tokenlist.Rd
+++ b/man/tokenlist.Rd
@@ -21,6 +21,7 @@ A \link{tokenlist} object is a thin wrapper around a list of character vectors,
 with a few attributes.
 }
 \examples{
+\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 abc <- list(letters, LETTERS)
 tokenlist(abc)
 
@@ -34,4 +35,5 @@ data(tate_text)
 tokens <- tokenize_words(as.character(tate_text$medium))
 
 tokenlist(tokens)
+\dontshow{\}) # examplesIf}
 }