From f97d68c31e2aa4081f7a95340ea81f12721bc7f8 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 8 Nov 2024 13:17:42 -0800 Subject: [PATCH 1/7] add hard check --- .github/workflows/R-CMD-check-hard.yaml | 59 +++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 .github/workflows/R-CMD-check-hard.yaml diff --git a/.github/workflows/R-CMD-check-hard.yaml b/.github/workflows/R-CMD-check-hard.yaml new file mode 100644 index 00000000..ac3bc0fd --- /dev/null +++ b/.github/workflows/R-CMD-check-hard.yaml @@ -0,0 +1,59 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +# +# NOTE: This workflow only directly installs "hard" dependencies, i.e. Depends, +# Imports, and LinkingTo dependencies. Notably, Suggests dependencies are never +# installed, with the exception of testthat, knitr, and rmarkdown. The cache is +# never used to avoid accidentally restoring a cache containing a suggested +# dependency. +on: + push: + branches: [main, master] + pull_request: + +name: R-CMD-check-hard.yaml + +permissions: read-all + +jobs: + check-no-suggests: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: ubuntu-latest, r: 'release'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: '"hard"' + cache: false + extra-packages: | + any::rcmdcheck + any::testthat + any::knitr + any::rmarkdown + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' From f50d36b4547bd1b0f699453bf0672733de179bd6 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 8 Nov 2024 13:47:27 -0800 Subject: [PATCH 2/7] add stop on vignette --- ...ookbook---using-more-complex-recipes-involving-text.Rmd | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd b/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd index b14b2c5b..95c4d43e 100644 --- a/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd +++ b/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd @@ -10,7 +10,14 @@ vignette: > --- ```{r setup, include = FALSE} +if (rlang::is_installed(c("modeldata")) { + run <- TRUE +} else { + run <- FALSE +} + knitr::opts_chunk$set( + eval = run, collapse = TRUE, comment = "#>" ) From a25fb654839f6628fe51546153435a693d085062 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 8 Nov 2024 13:59:48 -0800 Subject: [PATCH 3/7] missing paran --- .../cookbook---using-more-complex-recipes-involving-text.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd b/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd index 95c4d43e..3a64042d 100644 --- a/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd +++ b/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd @@ -10,7 +10,7 @@ vignette: > --- ```{r setup, include = FALSE} -if (rlang::is_installed(c("modeldata")) { +if (rlang::is_installed(c("modeldata"))) { run <- TRUE } else { run <- FALSE From fad67ec69486a973462a01d423ae71fd1109a1dc Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 8 Nov 2024 15:29:37 -0800 Subject: [PATCH 4/7] add lots of skip if not installed --- .../testthat/_snaps/R4.4/tokenize_bpe.new.md | 16 ++++ tests/testthat/_snaps/dummy_hash.md | 10 +-- tests/testthat/_snaps/lda.md | 17 +++-- tests/testthat/test-clean_levels.R | 26 ++++--- tests/testthat/test-clean_names.R | 19 ++--- tests/testthat/test-dummy_hash.R | 76 ++++++++++++++++--- tests/testthat/test-lda.R | 50 ++++++++---- tests/testthat/test-lemma.R | 4 + tests/testthat/test-pos_filter.R | 6 ++ tests/testthat/test-stopwords.R | 8 ++ tests/testthat/test-texthash.R | 4 + tests/testthat/test-tokenize.R | 1 + tests/testthat/test-tokenize_bpe.R | 14 ++++ tests/testthat/test-tokenize_sentencepiece.R | 12 +++ tests/testthat/test-tokenize_wordpiece.R | 17 ++++- tests/testthat/test-tokenizer-spacyr.R | 1 + tests/testthat/test-tokenizer-tokenizersbpe.R | 6 ++ 17 files changed, 226 insertions(+), 61 deletions(-) create mode 100644 tests/testthat/_snaps/R4.4/tokenize_bpe.new.md diff --git a/tests/testthat/_snaps/R4.4/tokenize_bpe.new.md b/tests/testthat/_snaps/R4.4/tokenize_bpe.new.md new file mode 100644 index 00000000..0f4258f8 --- /dev/null +++ b/tests/testthat/_snaps/R4.4/tokenize_bpe.new.md @@ -0,0 +1,16 @@ +# Errors if vocabulary size is set to low. + + Code + recipe(~text1, data = test_data) %>% step_tokenize_bpe(text1, vocabulary_size = 10) %>% + prep() + Condition + Warning in `read.dcf()`: + cannot open compressed file '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/tokenizers.bpe/DESCRIPTION', probable reason 'No such file or directory' + Message + 1 package (tokenizers.bpe) is needed for this step but is not installed. + To install run: `install.packages("tokenizers.bpe")` + Condition + Error in `step_tokenize_bpe()`: + Caused by error in `prep()`: + ! `vocabulary_size` of 10 is too small for column `text1` which has a unique character count of 23 + diff --git a/tests/testthat/_snaps/dummy_hash.md b/tests/testthat/_snaps/dummy_hash.md index 916a3164..12a1d82e 100644 --- a/tests/testthat/_snaps/dummy_hash.md +++ b/tests/testthat/_snaps/dummy_hash.md @@ -70,10 +70,10 @@ -- Inputs Number of variables by role - predictor: 2 + predictor: 5 -- Operations - * Feature hashing with: sponsor_code + * Feature hashing with: Species --- @@ -85,11 +85,11 @@ -- Inputs Number of variables by role - predictor: 2 + predictor: 5 -- Training information - Training data contained 20 data points and no incomplete rows. + Training data contained 150 data points and no incomplete rows. -- Operations - * Feature hashing with: sponsor_code | Trained + * Feature hashing with: Species | Trained diff --git a/tests/testthat/_snaps/lda.md b/tests/testthat/_snaps/lda.md index 39bb3bd2..7c293622 100644 --- a/tests/testthat/_snaps/lda.md +++ b/tests/testthat/_snaps/lda.md @@ -70,28 +70,31 @@ -- Inputs Number of variables by role - predictor: 2 + predictor: 5 -- Operations - * Tokenization for: medium - * Text feature extraction for: medium + * Tokenization for: Species + * Text feature extraction for: Species --- Code prep(rec) + Condition + Warning in `get_dtm()`: + dtm has 0 rows. Empty iterator? Message -- Recipe ---------------------------------------------------------------------- -- Inputs Number of variables by role - predictor: 2 + predictor: 5 -- Training information - Training data contained 100 data points and no incomplete rows. + Training data contained 150 data points and no incomplete rows. -- Operations - * Tokenization for: medium | Trained - * Text feature extraction for: medium | Trained + * Tokenization for: Species | Trained + * Text feature extraction for: Species | Trained diff --git a/tests/testthat/test-clean_levels.R b/tests/testthat/test-clean_levels.R index f7ad597e..b9fd822c 100644 --- a/tests/testthat/test-clean_levels.R +++ b/tests/testthat/test-clean_levels.R @@ -1,15 +1,13 @@ -library(testthat) -library(textrecipes) -library(modeldata) -data("Smithsonian") -smith_tr <- Smithsonian[1:15, ] -smith_te <- Smithsonian[16:20, ] - -rec <- recipe(~., data = smith_tr) - test_that("character input", { skip_if_not_installed("janitor") - cleaned <- rec %>% step_clean_levels(name, id = "") + skip_if_not_installed("modeldata") + + data("Smithsonian", package = "modeldata") + smith_tr <- Smithsonian[1:15, ] + smith_te <- Smithsonian[16:20, ] + + cleaned <- recipe(~., data = smith_tr) %>% + step_clean_levels(name, id = "") tidy_exp_un <- tibble( terms = c("name"), @@ -50,6 +48,9 @@ test_that("character input", { test_that("factor input", { skip_if_not_installed("janitor") + skip_if_not_installed("modeldata") + + data("Smithsonian", package = "modeldata") smith_tr <- Smithsonian[1:15, ] smith_tr$name <- as.factor(smith_tr$name) smith_te <- Smithsonian[16:20, ] @@ -71,6 +72,11 @@ test_that("factor input", { test_that("bake method errors when needed non-standard role columns are missing", { skip_if_not_installed("janitor") + skip_if_not_installed("modeldata") + + data("Smithsonian", package = "modeldata") + smith_tr <- Smithsonian[1:15, ] + rec <- recipe(~name, data = smith_tr) %>% step_clean_levels(name) %>% update_role(name, new_role = "potato") %>% diff --git a/tests/testthat/test-clean_names.R b/tests/testthat/test-clean_names.R index b7b80310..31d70a69 100644 --- a/tests/testthat/test-clean_names.R +++ b/tests/testthat/test-clean_names.R @@ -1,15 +1,14 @@ -library(testthat) -library(textrecipes) -data(airquality) +test_that("can clean names", { + skip_if_not_installed("janitor") + skip_if_not_installed("modeldata") -air_tr <- airquality[1:20, ] -air_te <- airquality[101:110, ] + data("airquality", package = "modeldata") -rec <- recipe(~., data = air_tr) + air_tr <- airquality[1:20, ] + air_te <- airquality[101:110, ] -test_that("can clean names", { - skip_if_not_installed("janitor") - cleaned <- rec %>% step_clean_names(all_predictors(), id = "") + cleaned <- recipe(~., data = air_tr) %>% + step_clean_names(all_predictors(), id = "") tidy_exp_un <- tibble( terms = c("all_predictors()"), @@ -35,6 +34,8 @@ test_that("can clean names", { # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { + skip_if_not_installed("janitor") + rec <- recipe(mtcars) %>% step_clean_names(disp) %>% update_role(disp, new_role = "potato") %>% diff --git a/tests/testthat/test-dummy_hash.R b/tests/testthat/test-dummy_hash.R index 112c8f2b..87d18138 100644 --- a/tests/testthat/test-dummy_hash.R +++ b/tests/testthat/test-dummy_hash.R @@ -1,18 +1,19 @@ library(textrecipes) library(recipes) -data(grants, package = "modeldata") -test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] -test_data <- tibble::as_tibble(test_data) - -rec <- recipe(~., data = test_data) test_that("hashing gives double outputs", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default - rec <- rec %>% + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) + + rec <- recipe(~., data = test_data) %>% step_dummy_hash(sponsor_code) obj <- rec %>% @@ -32,9 +33,16 @@ test_that("hashing gives double outputs", { test_that("hashing multiple factors", { skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") + skip_if_not_installed("text2vec") data.table::setDTthreads(2) # because data.table uses all cores by default - res <- rec %>% + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) + + res <- recipe(~., data = test_data) %>% step_dummy_hash(all_nominal_predictors(), num_terms = 12) %>% prep() %>% bake(new_data = NULL) @@ -46,9 +54,16 @@ test_that("hashing multiple factors", { test_that("hashing collapsed multiple factors", { skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") + skip_if_not_installed("text2vec") data.table::setDTthreads(2) # because data.table uses all cores by default - res <- rec %>% + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) + + res <- recipe(~., data = test_data) %>% step_dummy_hash(all_nominal_predictors(), num_terms = 4, collapse = TRUE) %>% prep() %>% bake(new_data = NULL) @@ -60,9 +75,15 @@ test_that("hashing collapsed multiple factors", { test_that("hashing output width changes accordingly with num_terms", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default - rec <- rec %>% + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) + + rec <- recipe(~., data = test_data) %>% step_dummy_hash(sponsor_code, num_terms = 256) %>% prep() @@ -77,7 +98,13 @@ test_that("hashing output width changes accordingly with num_terms", { test_that("hashing output width changes accordingly with num_terms", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) signed <- recipe(~., data = test_data) %>% step_dummy_hash(all_predictors(), num_terms = 2) %>% @@ -98,8 +125,14 @@ test_that("hashing output width changes accordingly with num_terms", { test_that("check_name() is used", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) + dat <- test_data dat$text <- dat$sponsor_code dat$dummyhash_text_01 <- dat$sponsor_code @@ -131,6 +164,15 @@ test_that("tunable", { # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { + skip_if_not_installed("modeldata") + skip_if_not_installed("text2vec") + data.table::setDTthreads(2) # because data.table uses all cores by default + + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) + rec <- recipe(~sponsor_code, data = test_data) %>% step_dummy_hash(sponsor_code) %>% update_role(sponsor_code, new_role = "potato") %>% @@ -190,8 +232,14 @@ test_that("empty selection tidy method works", { test_that("keep_original_cols works", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) + new_names <- paste0("dummyhash_sponsor_code_", 1:5) rec <- recipe(~ sponsor_code, data = test_data) %>% @@ -220,8 +268,14 @@ test_that("keep_original_cols works", { test_that("keep_original_cols - can prep recipes with it missing", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) + rec <- recipe(~ sponsor_code, data = test_data) %>% step_dummy_hash(sponsor_code) @@ -242,8 +296,8 @@ test_that("printing", { skip_if_not_installed("data.table") data.table::setDTthreads(2) # because data.table uses all cores by default - rec <- rec %>% - step_dummy_hash(sponsor_code) + rec <- recipe(~., data = iris) %>% + step_dummy_hash(Species) expect_snapshot(print(rec)) expect_snapshot(prep(rec)) diff --git a/tests/testthat/test-lda.R b/tests/testthat/test-lda.R index 40ec2dba..81c1b27b 100644 --- a/tests/testthat/test-lda.R +++ b/tests/testthat/test-lda.R @@ -1,19 +1,15 @@ -set.seed(1234) -library(recipes) -library(textrecipes) -library(modeldata) -data(tate_text) - -n_rows <- 100 -rec <- recipe(~ medium + artist, data = tate_text[seq_len(n_rows), ]) - test_that("step_lda works as intended", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + data("tate_text", package = "modeldata") + + n_rows <- 100 n_top <- 10 - rec1 <- rec %>% + + rec1 <- recipe(~ medium + artist, data = tate_text[seq_len(n_rows), ]) %>% step_tokenize(medium) %>% step_lda(medium, num_topics = n_top) @@ -29,10 +25,14 @@ test_that("step_lda works as intended", { test_that("step_lda works with num_topics argument", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + data("tate_text", package = "modeldata") + + n_rows <- 100 n_top <- 100 - rec1 <- rec %>% + rec1 <- recipe(~ medium + artist, data = tate_text[seq_len(n_rows), ]) %>% step_tokenize(medium) %>% step_lda(medium, num_topics = n_top) @@ -45,8 +45,11 @@ test_that("step_lda works with num_topics argument", { test_that("check_name() is used", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + data("tate_text", package = "modeldata") + dat <- tate_text[seq_len(100), ] dat$text <- dat$medium dat$lda_text_1 <- dat$text @@ -66,9 +69,15 @@ test_that("check_name() is used", { test_that("bake method errors when needed non-standard role columns are missing", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default - tokenized_test_data <- rec %>% + data("tate_text", package = "modeldata") + + n_rows <- 100 + + tokenized_test_data <- recipe(~ medium + artist, + data = tate_text[seq_len(n_rows), ]) %>% step_tokenize(medium) %>% prep() %>% bake(new_data = NULL) @@ -131,10 +140,15 @@ test_that("empty selection tidy method works", { test_that("keep_original_cols works", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + data("tate_text", package = "modeldata") + new_names <- paste0("lda_medium_", 1:10) + n_rows <- 100 + rec <- recipe(~ medium, data = tate_text[seq_len(n_rows), ]) %>% step_tokenize(medium) %>% step_lda(medium, keep_original_cols = FALSE) @@ -163,8 +177,13 @@ test_that("keep_original_cols works", { test_that("keep_original_cols - can prep recipes with it missing", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + data("tate_text", package = "modeldata") + + n_rows <- 100 + rec <- recipe(~ medium, data = tate_text[seq_len(n_rows), ]) %>% step_tokenize(medium) %>% step_lda(medium, keep_original_cols = TRUE) @@ -180,15 +199,14 @@ test_that("keep_original_cols - can prep recipes with it missing", { ) }) - test_that("printing", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") data.table::setDTthreads(2) # because data.table uses all cores by default - rec <- rec %>% - step_tokenize(medium) %>% - step_lda(medium) + rec <- recipe(~., data = iris) %>% + step_tokenize(Species) %>% + step_lda(Species) expect_snapshot(print(rec)) expect_snapshot(prep(rec)) diff --git a/tests/testthat/test-lemma.R b/tests/testthat/test-lemma.R index a7956383..dc598cd2 100644 --- a/tests/testthat/test-lemma.R +++ b/tests/testthat/test-lemma.R @@ -11,6 +11,7 @@ text <- tibble(text = c( test_that("lemmatization works", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() rec <- recipe(~text, data = text) %>% @@ -53,6 +54,7 @@ test_that("lemmatization errors if lemma attribute doesn't exists", { test_that("bake method errors when needed non-standard role columns are missing", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() tokenized_test_data <- recipe(~text, data = text) %>% @@ -112,7 +114,9 @@ test_that("empty selection tidy method works", { test_that("printing", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() + rec <- recipe(~text, data = text) %>% step_tokenize(all_predictors(), engine = "spacyr") %>% step_lemma(all_predictors()) diff --git a/tests/testthat/test-pos_filter.R b/tests/testthat/test-pos_filter.R index 71722648..22c44213 100644 --- a/tests/testthat/test-pos_filter.R +++ b/tests/testthat/test-pos_filter.R @@ -11,6 +11,7 @@ text <- tibble(text = c( test_that("part of speech filtering works", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() rec <- recipe(~text, data = text) %>% @@ -36,6 +37,7 @@ test_that("part of speech filtering works", { test_that("part of speech filtering removes everything", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() rec <- recipe(~text, data = text) %>% @@ -61,6 +63,7 @@ test_that("part of speech filtering removes everything", { test_that("part of speech filtering works with multiple tags", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() rec <- recipe(~text, data = text) %>% @@ -99,6 +102,7 @@ test_that("lemmatization errors if lemma attribute doesn't exists", { test_that("bake method errors when needed non-standard role columns are missing", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() tokenized_test_data <- recipe(~text, data = text) %>% @@ -158,7 +162,9 @@ test_that("empty selection tidy method works", { test_that("printing", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() + rec <- recipe(~text, data = text) %>% step_tokenize(all_predictors(), engine = "spacyr") %>% step_pos_filter(all_predictors()) diff --git a/tests/testthat/test-stopwords.R b/tests/testthat/test-stopwords.R index 575f918f..1171aa63 100644 --- a/tests/testthat/test-stopwords.R +++ b/tests/testthat/test-stopwords.R @@ -79,6 +79,8 @@ test_that("custom stopwords are supported", { # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { + skip_if_not_installed("stopwords") + tokenized_test_data <- recipe(~text, data = test_data) %>% step_tokenize(text) %>% prep() %>% @@ -99,6 +101,8 @@ test_that("bake method errors when needed non-standard role columns are missing" }) test_that("empty printing", { + skip_if_not_installed("stopwords") + rec <- recipe(mpg ~ ., mtcars) rec <- step_stopwords(rec) @@ -110,6 +114,8 @@ test_that("empty printing", { }) test_that("empty selection prep/bake is a no-op", { + skip_if_not_installed("stopwords") + rec1 <- recipe(mpg ~ ., mtcars) rec2 <- step_stopwords(rec1) @@ -123,6 +129,8 @@ test_that("empty selection prep/bake is a no-op", { }) test_that("empty selection tidy method works", { + skip_if_not_installed("stopwords") + rec <- recipe(mpg ~ ., mtcars) rec <- step_stopwords(rec) diff --git a/tests/testthat/test-texthash.R b/tests/testthat/test-texthash.R index 2b57339d..e55ae152 100644 --- a/tests/testthat/test-texthash.R +++ b/tests/testthat/test-texthash.R @@ -111,6 +111,8 @@ test_that("tunable", { # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { + skip_if_not_installed("text2vec") + tokenized_test_data <- recipe(~text, data = test_data) %>% step_tokenize(text) %>% prep() %>% @@ -205,6 +207,8 @@ test_that("keep_original_cols works", { }) test_that("keep_original_cols - can prep recipes with it missing", { + skip_if_not_installed("text2vec") + rec <- recipe(~text, data = test_data) %>% step_tokenize(text) %>% step_texthash(text) diff --git a/tests/testthat/test-tokenize.R b/tests/testthat/test-tokenize.R index 2d7699e2..359f8b33 100644 --- a/tests/testthat/test-tokenize.R +++ b/tests/testthat/test-tokenize.R @@ -102,6 +102,7 @@ test_that("tokenization errors with wrong engines", { test_that("tokenization includes lemma attribute when avaliable", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() expect_type( diff --git a/tests/testthat/test-tokenize_bpe.R b/tests/testthat/test-tokenize_bpe.R index 52afd52b..70d1a4ad 100644 --- a/tests/testthat/test-tokenize_bpe.R +++ b/tests/testthat/test-tokenize_bpe.R @@ -62,6 +62,8 @@ text2_out <- list( ) test_that("output is list when length is 1 or 0", { + skip_if_not_installed("tokenizers.bpe") + data <- tibble(a = rep(c("a", ""), 20)) data_rec <- recipe(~., data = data) %>% @@ -72,6 +74,8 @@ test_that("output is list when length is 1 or 0", { }) test_that("step_tokenize_bpe works", { + skip_if_not_installed("tokenizers.bpe") + res <- recipe(~text1, data = test_data) %>% step_tokenize_bpe(text1) %>% prep() %>% @@ -84,6 +88,8 @@ test_that("step_tokenize_bpe works", { }) test_that("step_tokenize_bpe works with tokenizers.bpe and multiple colunms", { + skip_if_not_installed("tokenizers.bpe") + res <- recipe(~., data = test_data) %>% step_tokenize_bpe(all_predictors()) %>% prep() %>% @@ -101,6 +107,8 @@ test_that("step_tokenize_bpe works with tokenizers.bpe and multiple colunms", { }) test_that("arguments are passed to tokenizers.bpe", { + skip_if_not_installed("tokenizers.bpe") + res <- recipe(~text1, data = test_data) %>% step_tokenize_bpe(text1, vocabulary_size = 60) %>% prep() %>% @@ -123,6 +131,8 @@ test_that("arguments are passed to tokenizers.bpe", { }) test_that("Errors if vocabulary size is set to low.", { + skip_if_not_installed("tokenizers.bpe") + expect_snapshot( error = TRUE, variant = r_version(), @@ -151,6 +161,8 @@ test_that("tunable", { # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { + skip_if_not_installed("tokenizers.bpe") + rec <- recipe(~text1, data = test_data) %>% step_tokenize_bpe(text1) %>% update_role(text1, new_role = "potato") %>% @@ -202,6 +214,8 @@ test_that("empty selection tidy method works", { }) test_that("printing", { + skip_if_not_installed("tokenizers.bpe") + rec <- recipe(~., data = test_data) %>% step_tokenize_bpe(text1) diff --git a/tests/testthat/test-tokenize_sentencepiece.R b/tests/testthat/test-tokenize_sentencepiece.R index 1d6fd52e..2a1f8ad6 100644 --- a/tests/testthat/test-tokenize_sentencepiece.R +++ b/tests/testthat/test-tokenize_sentencepiece.R @@ -60,6 +60,8 @@ text2_out <- list( ) test_that("step_tokenize_sentencepiece works", { + skip_if_not_installed("sentencepiece") + res <- recipe(~text1, data = test_data) %>% step_tokenize_sentencepiece(text1, vocabulary_size = 80) %>% prep() %>% @@ -72,6 +74,8 @@ test_that("step_tokenize_sentencepiece works", { }) test_that("step_tokenize_sentencepiece works with tokenizers.sentencepiece and multiple colunms", { + skip_if_not_installed("sentencepiece") + res <- recipe(~., data = test_data) %>% step_tokenize_sentencepiece(all_predictors(), vocabulary_size = 80) %>% prep() %>% @@ -89,6 +93,8 @@ test_that("step_tokenize_sentencepiece works with tokenizers.sentencepiece and m }) test_that("arguments are passed to tokenizers.sentencepiece", { + skip_if_not_installed("sentencepiece") + res <- recipe(~text1, data = test_data) %>% step_tokenize_sentencepiece(text1, vocabulary_size = 60) %>% prep() %>% @@ -111,6 +117,8 @@ test_that("arguments are passed to tokenizers.sentencepiece", { }) test_that("Errors if vocabulary size is set to low.", { + skip_if_not_installed("sentencepiece") + expect_snapshot( error = TRUE, recipe(~text1, data = test_data) %>% @@ -122,6 +130,8 @@ test_that("Errors if vocabulary size is set to low.", { # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { + skip_if_not_installed("sentencepiece") + rec <- recipe(~text1, data = test_data) %>% step_tokenize_sentencepiece(text1, vocabulary_size = 100) %>% update_role(text1, new_role = "potato") %>% @@ -173,6 +183,8 @@ test_that("empty selection tidy method works", { }) test_that("printing", { + skip_if_not_installed("sentencepiece") + rec <- recipe(~., data = test_data) %>% step_tokenize_sentencepiece(text1, vocabulary_size = 100) diff --git a/tests/testthat/test-tokenize_wordpiece.R b/tests/testthat/test-tokenize_wordpiece.R index aa351753..3989c38e 100644 --- a/tests/testthat/test-tokenize_wordpiece.R +++ b/tests/testthat/test-tokenize_wordpiece.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - text1 <- c( "I would not eat them here or there.", "I would not eat them anywhere.", @@ -32,6 +29,8 @@ text2_out <- list( ) test_that("step_tokenize_wordpiece works", { + skip_if_not_installed("wordpiece") + res <- recipe(~text1, data = test_data) %>% step_tokenize_wordpiece(text1) %>% prep() %>% @@ -44,6 +43,8 @@ test_that("step_tokenize_wordpiece works", { }) test_that("step_tokenize_wordpiece works with tokenizers.wordpiece and multiple colunms", { + skip_if_not_installed("wordpiece") + res <- recipe(~., data = test_data) %>% step_tokenize_wordpiece(all_predictors()) %>% prep() %>% @@ -63,6 +64,8 @@ test_that("step_tokenize_wordpiece works with tokenizers.wordpiece and multiple # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { + skip_if_not_installed("wordpiece") + rec <- recipe(~ text1 + text2, data = test_data) %>% step_tokenize_wordpiece(text1, text2) %>% update_role(text1, new_role = "potato") %>% @@ -77,6 +80,8 @@ test_that("bake method errors when needed non-standard role columns are missing" }) test_that("empty printing", { + skip_if_not_installed("wordpiece") + rec <- recipe(mpg ~ ., mtcars) rec <- step_tokenize_wordpiece(rec) @@ -88,6 +93,8 @@ test_that("empty printing", { }) test_that("empty selection prep/bake is a no-op", { + skip_if_not_installed("wordpiece") + rec1 <- recipe(mpg ~ ., mtcars) rec2 <- step_tokenize_wordpiece(rec1) @@ -101,6 +108,8 @@ test_that("empty selection prep/bake is a no-op", { }) test_that("empty selection tidy method works", { + skip_if_not_installed("wordpiece") + rec <- recipe(mpg ~ ., mtcars) rec <- step_tokenize_wordpiece(rec) @@ -114,6 +123,8 @@ test_that("empty selection tidy method works", { }) test_that("printing", { + skip_if_not_installed("wordpiece") + rec <- recipe(~., data = test_data) %>% step_tokenize_wordpiece(text1) diff --git a/tests/testthat/test-tokenizer-spacyr.R b/tests/testthat/test-tokenizer-spacyr.R index a0365e5e..9c04c4e8 100644 --- a/tests/testthat/test-tokenizer-spacyr.R +++ b/tests/testthat/test-tokenizer-spacyr.R @@ -10,6 +10,7 @@ text <- c( test_that("tokenizer works", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() out <- spacyr_tokenizer_words(text) diff --git a/tests/testthat/test-tokenizer-tokenizersbpe.R b/tests/testthat/test-tokenizer-tokenizersbpe.R index afe28153..2137fdc9 100644 --- a/tests/testthat/test-tokenizer-tokenizersbpe.R +++ b/tests/testthat/test-tokenizer-tokenizersbpe.R @@ -97,6 +97,8 @@ test_that("tokenizer works", { }) test_that("step_tokenize works with tokenizers.bpe", { + skip_if_not_installed("tokenizers.bpe") + res <- recipe(~text1, data = test_data) %>% step_tokenize(text1, engine = "tokenizers.bpe") %>% prep() %>% @@ -109,6 +111,8 @@ test_that("step_tokenize works with tokenizers.bpe", { }) test_that("step_tokenize works with tokenizers.bpe and multiple colunms", { + skip_if_not_installed("tokenizers.bpe") + res <- recipe(~., data = test_data) %>% step_tokenize(all_predictors(), engine = "tokenizers.bpe") %>% prep() %>% @@ -126,6 +130,8 @@ test_that("step_tokenize works with tokenizers.bpe and multiple colunms", { }) test_that("arguments are passed to tokenizers.bpe", { + skip_if_not_installed("tokenizers.bpe") + res <- recipe(~text1, data = test_data) %>% step_tokenize(text1, engine = "tokenizers.bpe", From f6e105535e035273fb661bb21951dad224e80cbf Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 8 Nov 2024 15:39:13 -0800 Subject: [PATCH 5/7] remove library calls in tests --- tests/embeddings/embeddings-references.R | 3 --- tests/testthat/test-dummy_hash.R | 4 --- tests/testthat/test-lemma.R | 4 --- tests/testthat/test-ngram.R | 3 --- tests/testthat/test-pos_filter.R | 4 --- tests/testthat/test-s3-methods.R | 3 --- tests/testthat/test-sequence_onehot.R | 4 --- tests/testthat/test-stem.R | 3 --- tests/testthat/test-stopwords.R | 3 --- tests/testthat/test-text_normalization.R | 4 --- tests/testthat/test-textfeature.R | 3 --- tests/testthat/test-texthash.R | 3 --- tests/testthat/test-tf.R | 3 --- tests/testthat/test-tfidf.R | 3 --- tests/testthat/test-tokenfilter.R | 3 --- tests/testthat/test-tokenize.R | 3 --- tests/testthat/test-tokenize_bpe.R | 3 --- tests/testthat/test-tokenize_sentencepiece.R | 3 --- tests/testthat/test-tokenizer-spacyr.R | 3 --- tests/testthat/test-tokenizer-tokenizersbpe.R | 3 --- tests/testthat/test-tokenlist.R | 27 +++++++++---------- tests/testthat/test-tokenmerge.R | 3 --- tests/testthat/test-untokenize.R | 3 --- tests/testthat/test-word_embeddings.R | 2 -- 24 files changed, 12 insertions(+), 88 deletions(-) diff --git a/tests/embeddings/embeddings-references.R b/tests/embeddings/embeddings-references.R index 7709f178..b0e089cf 100644 --- a/tests/embeddings/embeddings-references.R +++ b/tests/embeddings/embeddings-references.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(testthat) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-dummy_hash.R b/tests/testthat/test-dummy_hash.R index 87d18138..c4cbb775 100644 --- a/tests/testthat/test-dummy_hash.R +++ b/tests/testthat/test-dummy_hash.R @@ -1,7 +1,3 @@ -library(textrecipes) -library(recipes) - - test_that("hashing gives double outputs", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") diff --git a/tests/testthat/test-lemma.R b/tests/testthat/test-lemma.R index dc598cd2..ddcd59cf 100644 --- a/tests/testthat/test-lemma.R +++ b/tests/testthat/test-lemma.R @@ -1,7 +1,3 @@ -library(textrecipes) -library(recipes) -library(tibble) - text <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-ngram.R b/tests/testthat/test-ngram.R index e396a35e..4fc4ac31 100644 --- a/tests/testthat/test-ngram.R +++ b/tests/testthat/test-ngram.R @@ -135,9 +135,6 @@ test_that("ngram returns length zero vectors when length(x) < n", { ) }) -library(recipes) -library(textrecipes) - test_tibble <- tibble(text = c( "not eat them here or there.", "not eat them anywhere." diff --git a/tests/testthat/test-pos_filter.R b/tests/testthat/test-pos_filter.R index 22c44213..9d5e826f 100644 --- a/tests/testthat/test-pos_filter.R +++ b/tests/testthat/test-pos_filter.R @@ -1,7 +1,3 @@ -library(textrecipes) -library(recipes) -library(tibble) - text <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-s3-methods.R b/tests/testthat/test-s3-methods.R index f00c1f20..9c0ce5a3 100644 --- a/tests/testthat/test-s3-methods.R +++ b/tests/testthat/test-s3-methods.R @@ -1,6 +1,3 @@ -library(recipes) -library(textrecipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-sequence_onehot.R b/tests/testthat/test-sequence_onehot.R index de1d7236..3c55a1d2 100644 --- a/tests/testthat/test-sequence_onehot.R +++ b/tests/testthat/test-sequence_onehot.R @@ -1,7 +1,3 @@ -library(testthat) -library(recipes) -library(textrecipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-stem.R b/tests/testthat/test-stem.R index 8eaca0ec..96069f7c 100644 --- a/tests/testthat/test-stem.R +++ b/tests/testthat/test-stem.R @@ -1,6 +1,3 @@ -library(recipes) -library(textrecipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-stopwords.R b/tests/testthat/test-stopwords.R index 1171aa63..bfdd3028 100644 --- a/tests/testthat/test-stopwords.R +++ b/tests/testthat/test-stopwords.R @@ -1,6 +1,3 @@ -library(recipes) -library(textrecipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-text_normalization.R b/tests/testthat/test-text_normalization.R index d95e2322..25774994 100644 --- a/tests/testthat/test-text_normalization.R +++ b/tests/testthat/test-text_normalization.R @@ -1,7 +1,3 @@ -library(testthat) -library(recipes) -library(tibble) - ex_dat <- tibble(text = c("sch\U00f6n", "scho\U0308n")) test_that("simple sqrt trans", { diff --git a/tests/testthat/test-textfeature.R b/tests/testthat/test-textfeature.R index cfd4990f..0c755464 100644 --- a/tests/testthat/test-textfeature.R +++ b/tests/testthat/test-textfeature.R @@ -1,6 +1,3 @@ -library(recipes) -library(textrecipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-texthash.R b/tests/testthat/test-texthash.R index e55ae152..e36d24f4 100644 --- a/tests/testthat/test-texthash.R +++ b/tests/testthat/test-texthash.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-tf.R b/tests/testthat/test-tf.R index 69ab9b9f..0affe2e1 100644 --- a/tests/testthat/test-tf.R +++ b/tests/testthat/test-tf.R @@ -1,6 +1,3 @@ -library(recipes) -library(textrecipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-tfidf.R b/tests/testthat/test-tfidf.R index e0b2e37a..c9a846d1 100644 --- a/tests/testthat/test-tfidf.R +++ b/tests/testthat/test-tfidf.R @@ -1,6 +1,3 @@ -library(recipes) -library(textrecipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-tokenfilter.R b/tests/testthat/test-tokenfilter.R index 39295d99..e2bb0af9 100644 --- a/tests/testthat/test-tokenfilter.R +++ b/tests/testthat/test-tokenfilter.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-tokenize.R b/tests/testthat/test-tokenize.R index 359f8b33..c6691b86 100644 --- a/tests/testthat/test-tokenize.R +++ b/tests/testthat/test-tokenize.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-tokenize_bpe.R b/tests/testthat/test-tokenize_bpe.R index 70d1a4ad..8ab94853 100644 --- a/tests/testthat/test-tokenize_bpe.R +++ b/tests/testthat/test-tokenize_bpe.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - r_version <- function() paste0("R", getRversion()[, 1:2]) text1 <- c( diff --git a/tests/testthat/test-tokenize_sentencepiece.R b/tests/testthat/test-tokenize_sentencepiece.R index 2a1f8ad6..5a5da763 100644 --- a/tests/testthat/test-tokenize_sentencepiece.R +++ b/tests/testthat/test-tokenize_sentencepiece.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - text1 <- c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-tokenizer-spacyr.R b/tests/testthat/test-tokenizer-spacyr.R index 9c04c4e8..0b281c3d 100644 --- a/tests/testthat/test-tokenizer-spacyr.R +++ b/tests/testthat/test-tokenizer-spacyr.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - text <- c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-tokenizer-tokenizersbpe.R b/tests/testthat/test-tokenizer-tokenizersbpe.R index 2137fdc9..0a56b73e 100644 --- a/tests/testthat/test-tokenizer-tokenizersbpe.R +++ b/tests/testthat/test-tokenizer-tokenizersbpe.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - r_version <- function() paste0("R", getRversion()[, 1:2]) text1 <- c( diff --git a/tests/testthat/test-tokenlist.R b/tests/testthat/test-tokenlist.R index 94058803..d4c3e193 100644 --- a/tests/testthat/test-tokenlist.R +++ b/tests/testthat/test-tokenlist.R @@ -1,6 +1,3 @@ -library(testthat) -library(vctrs) - ## Creation ------------------------------------------------------------------- test_that("tokenlist creation works", { @@ -10,12 +7,12 @@ test_that("tokenlist creation works", { expect_s3_class(tkn_list, "textrecipes_tokenlist") expect_equal( - fields(tkn_list), + vctrs::fields(tkn_list), "tokens" ) expect_equal( - field(tkn_list, "tokens"), + vctrs::field(tkn_list, "tokens"), list(letters, letters) ) @@ -30,17 +27,17 @@ test_that("tokenlist creation works", { expect_s3_class(tkn_list, "textrecipes_tokenlist") expect_equal( - fields(tkn_list), + vctrs::fields(tkn_list), c("tokens", "lemma") ) expect_equal( - field(tkn_list, "tokens"), + vctrs::field(tkn_list, "tokens"), list(letters, letters) ) expect_equal( - field(tkn_list, "lemma"), + vctrs::field(tkn_list, "lemma"), list(LETTERS, LETTERS) ) @@ -55,17 +52,17 @@ test_that("tokenlist creation works", { expect_s3_class(tkn_list, "textrecipes_tokenlist") expect_equal( - fields(tkn_list), + vctrs::fields(tkn_list), c("tokens", "pos") ) expect_equal( - field(tkn_list, "tokens"), + vctrs::field(tkn_list, "tokens"), list(letters, letters) ) expect_equal( - field(tkn_list, "pos"), + vctrs::field(tkn_list, "pos"), list(LETTERS, LETTERS) ) @@ -83,22 +80,22 @@ test_that("tokenlist creation works", { expect_s3_class(tkn_list, "textrecipes_tokenlist") expect_equal( - fields(tkn_list), + vctrs::fields(tkn_list), c("tokens", "lemma", "pos") ) expect_equal( - field(tkn_list, "tokens"), + vctrs::field(tkn_list, "tokens"), list(letters, letters) ) expect_equal( - field(tkn_list, "lemma"), + vctrs::field(tkn_list, "lemma"), list(letters, LETTERS) ) expect_equal( - field(tkn_list, "pos"), + vctrs::field(tkn_list, "pos"), list(LETTERS, LETTERS) ) diff --git a/tests/testthat/test-tokenmerge.R b/tests/testthat/test-tokenmerge.R index 30596df9..f2509115 100644 --- a/tests/testthat/test-tokenmerge.R +++ b/tests/testthat/test-tokenmerge.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - test_data <- tibble( text1 = c( "I would not eat them here or there.", diff --git a/tests/testthat/test-untokenize.R b/tests/testthat/test-untokenize.R index 4a71461f..0c993900 100644 --- a/tests/testthat/test-untokenize.R +++ b/tests/testthat/test-untokenize.R @@ -1,6 +1,3 @@ -library(recipes) -library(textrecipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-word_embeddings.R b/tests/testthat/test-word_embeddings.R index 06330a86..64509d5f 100644 --- a/tests/testthat/test-word_embeddings.R +++ b/tests/testthat/test-word_embeddings.R @@ -1,5 +1,3 @@ -library(recipes) - embeddings <- readRDS(test_path("emb-data", "embeddings.rds")) sentence_embeddings_long <- readRDS(test_path("emb-data", "long.rds")) From 02bb9b94e375f661b738a82122d5341124893765 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 8 Nov 2024 15:44:14 -0800 Subject: [PATCH 6/7] fix more issues --- R/clean_levels.R | 2 +- tests/testthat/_snaps/clean_levels.md | 10 +++++----- tests/testthat/_snaps/clean_names.md | 8 ++++---- tests/testthat/test-clean_levels.R | 3 ++- tests/testthat/test-clean_names.R | 6 +++--- 5 files changed, 15 insertions(+), 14 deletions(-) diff --git a/R/clean_levels.R b/R/clean_levels.R index c6f8683e..be6afaee 100644 --- a/R/clean_levels.R +++ b/R/clean_levels.R @@ -139,7 +139,7 @@ bake.step_clean_levels <- function(object, new_data, ...) { new_data[[col_name]] <- janitor::make_clean_names(new_data[[col_name]]) } - } +} new_data } diff --git a/tests/testthat/_snaps/clean_levels.md b/tests/testthat/_snaps/clean_levels.md index fe29e220..a3077070 100644 --- a/tests/testthat/_snaps/clean_levels.md +++ b/tests/testthat/_snaps/clean_levels.md @@ -51,10 +51,10 @@ -- Inputs Number of variables by role - predictor: 3 + predictor: 5 -- Operations - * Cleaning factor levels for: name + * Cleaning factor levels for: Species --- @@ -66,11 +66,11 @@ -- Inputs Number of variables by role - predictor: 3 + predictor: 5 -- Training information - Training data contained 15 data points and no incomplete rows. + Training data contained 150 data points and no incomplete rows. -- Operations - * Cleaning factor levels for: name | Trained + * Cleaning factor levels for: Species | Trained diff --git a/tests/testthat/_snaps/clean_names.md b/tests/testthat/_snaps/clean_names.md index e6426902..3db2bb79 100644 --- a/tests/testthat/_snaps/clean_names.md +++ b/tests/testthat/_snaps/clean_names.md @@ -51,7 +51,7 @@ -- Inputs Number of variables by role - predictor: 6 + predictor: 11 -- Operations * Cleaning variable names for: all_predictors() @@ -66,11 +66,11 @@ -- Inputs Number of variables by role - predictor: 6 + predictor: 11 -- Training information - Training data contained 20 data points and 4 incomplete rows. + Training data contained 32 data points and no incomplete rows. -- Operations - * Cleaning variable names for: Ozone, Solar.R, Wind, Temp, ... | Trained + * Cleaning variable names for: mpg, cyl, disp, hp, drat, wt, ... | Trained diff --git a/tests/testthat/test-clean_levels.R b/tests/testthat/test-clean_levels.R index b9fd822c..8016fa63 100644 --- a/tests/testthat/test-clean_levels.R +++ b/tests/testthat/test-clean_levels.R @@ -129,7 +129,8 @@ test_that("empty selection tidy method works", { test_that("printing", { skip_if_not_installed("janitor") - rec <- rec %>% step_clean_levels(name) + rec <- recipe(~., data = iris) %>% + step_clean_levels(Species) expect_snapshot(print(rec)) expect_snapshot(prep(rec)) diff --git a/tests/testthat/test-clean_names.R b/tests/testthat/test-clean_names.R index 31d70a69..475e0d5c 100644 --- a/tests/testthat/test-clean_names.R +++ b/tests/testthat/test-clean_names.R @@ -2,8 +2,6 @@ test_that("can clean names", { skip_if_not_installed("janitor") skip_if_not_installed("modeldata") - data("airquality", package = "modeldata") - air_tr <- airquality[1:20, ] air_te <- airquality[101:110, ] @@ -88,7 +86,9 @@ test_that("empty selection tidy method works", { test_that("printing", { skip_if_not_installed("janitor") - rec <- rec %>% step_clean_names(all_predictors()) + + rec <- recipe(~., data = mtcars) %>% + step_clean_names(all_predictors()) expect_snapshot(print(rec)) expect_snapshot(prep(rec)) From 827d61f538918286df889ec5ffa08ad176e938e2 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Sat, 9 Nov 2024 10:21:47 -0800 Subject: [PATCH 7/7] update examplesIf --- R/clean_levels.R | 2 +- R/dummy_hash.R | 2 +- R/lda.R | 2 +- R/ngram.R | 2 +- R/sequence_onehot.R | 2 +- R/show_tokens.R | 2 +- R/stem.R | 2 +- R/stopwords.R | 2 +- R/textfeature.R | 2 +- R/texthash.R | 2 +- R/tf.R | 2 +- R/tfidf.R | 2 +- R/tokenfilter.R | 2 +- R/tokenize.R | 2 +- R/tokenize_bpe.R | 2 +- R/tokenize_sentencepiece.R | 2 +- R/tokenize_wordpiece.R | 2 +- R/tokenlist.R | 2 +- R/tokenmerge.R | 2 +- R/untokenize.R | 2 +- man/show_tokens.Rd | 2 ++ man/step_clean_levels.Rd | 2 +- man/step_dummy_hash.Rd | 2 +- man/step_lda.Rd | 2 +- man/step_ngram.Rd | 2 ++ man/step_sequence_onehot.Rd | 2 ++ man/step_stem.Rd | 2 ++ man/step_stopwords.Rd | 2 +- man/step_textfeature.Rd | 2 ++ man/step_texthash.Rd | 2 +- man/step_tf.Rd | 3 ++- man/step_tfidf.Rd | 3 ++- man/step_tokenfilter.Rd | 2 ++ man/step_tokenize.Rd | 2 ++ man/step_tokenize_bpe.Rd | 2 +- man/step_tokenize_sentencepiece.Rd | 2 +- man/step_tokenize_wordpiece.Rd | 2 +- man/step_tokenmerge.Rd | 2 ++ man/step_untokenize.Rd | 2 ++ man/tokenlist.Rd | 2 ++ 40 files changed, 52 insertions(+), 30 deletions(-) diff --git a/R/clean_levels.R b/R/clean_levels.R index be6afaee..8ab1d8a7 100644 --- a/R/clean_levels.R +++ b/R/clean_levels.R @@ -41,7 +41,7 @@ #' [recipes::step_unknown()], [recipes::step_novel()], [recipes::step_other()] #' @family Steps for Text Cleaning #' -#' @examplesIf rlang::is_installed("janitor") +#' @examplesIf rlang::is_installed(c("modeldata", "janitor")) #' library(recipes) #' library(modeldata) #' data(Smithsonian) diff --git a/R/dummy_hash.R b/R/dummy_hash.R index 3cfc8aad..bedd246b 100644 --- a/R/dummy_hash.R +++ b/R/dummy_hash.R @@ -72,7 +72,7 @@ #' @seealso [recipes::step_dummy()] #' @family Steps for Numeric Variables From Characters #' -#' @examplesIf all(c("text2vec", "data.table") %in% rownames(installed.packages())) +#' @examplesIf all(c("modeldata", "text2vec", "data.table") %in% rownames(installed.packages())) #' \dontshow{library(data.table)} #' \dontshow{data.table::setDTthreads(2)} #' \dontshow{Sys.setenv("OMP_NUM_THREADS" = 1)} diff --git a/R/lda.R b/R/lda.R index 2bd2c334..c767f75b 100644 --- a/R/lda.R +++ b/R/lda.R @@ -38,7 +38,7 @@ #' #' @family Steps for Numeric Variables From Tokens #' -#' @examplesIf all(c("text2vec", "data.table") %in% rownames(installed.packages())) +#' @examplesIf all(c("modeldata", "text2vec", "data.table") %in% rownames(installed.packages())) #' \dontshow{library(data.table)} #' \dontshow{data.table::setDTthreads(2)} #' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} diff --git a/R/ngram.R b/R/ngram.R index 6fa8d1c0..eabfef85 100644 --- a/R/ngram.R +++ b/R/ngram.R @@ -49,7 +49,7 @@ #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()] #' @family Steps for Token Modification #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/sequence_onehot.R b/R/sequence_onehot.R index 223f852c..b38f0af8 100644 --- a/R/sequence_onehot.R +++ b/R/sequence_onehot.R @@ -52,7 +52,7 @@ #' #' @family Steps for Numeric Variables From Characters #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/show_tokens.R b/R/show_tokens.R index bcbfb6d8..49e302d5 100644 --- a/R/show_tokens.R +++ b/R/show_tokens.R @@ -12,7 +12,7 @@ #' @return A list of character vectors #' @export #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' text_tibble <- tibble(text = c("This is words", "They are nice!")) #' #' recipe(~text, data = text_tibble) %>% diff --git a/R/stem.R b/R/stem.R index c49b4bde..a3c285a6 100644 --- a/R/stem.R +++ b/R/stem.R @@ -42,7 +42,7 @@ #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()] #' @family Steps for Token Modification #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/stopwords.R b/R/stopwords.R index a00b4eb8..ae8ca7cb 100644 --- a/R/stopwords.R +++ b/R/stopwords.R @@ -49,7 +49,7 @@ #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()] #' @family Steps for Token Modification #' -#' @examplesIf rlang::is_installed("stopwords") +#' @examplesIf rlang::is_installed(c("modeldata", "stopwords")) #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/textfeature.R b/R/textfeature.R index 57e2b313..02857b95 100644 --- a/R/textfeature.R +++ b/R/textfeature.R @@ -42,7 +42,7 @@ #' #' @family Steps for Numeric Variables From Characters #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/texthash.R b/R/texthash.R index 48a549fa..8178dbcf 100644 --- a/R/texthash.R +++ b/R/texthash.R @@ -62,7 +62,7 @@ #' [step_text_normalization()] to perform text normalization. #' @family Steps for Numeric Variables From Tokens #' -#' @examplesIf all(c("text2vec", "data.table") %in% rownames(installed.packages())) +#' @examplesIf all(c("modeldata", "text2vec", "data.table") %in% rownames(installed.packages())) #' \dontshow{library(data.table)} #' \dontshow{data.table::setDTthreads(2)} #' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} diff --git a/R/tf.R b/R/tf.R index f1502632..f8098bb2 100644 --- a/R/tf.R +++ b/R/tf.R @@ -74,7 +74,7 @@ #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()] #' @family Steps for Numeric Variables From Tokens #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' \donttest{ #' library(recipes) #' library(modeldata) diff --git a/R/tfidf.R b/R/tfidf.R index 73ad9a72..437a4606 100644 --- a/R/tfidf.R +++ b/R/tfidf.R @@ -68,7 +68,7 @@ #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()] #' @family Steps for Numeric Variables From Tokens #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' \donttest{ #' library(recipes) #' library(modeldata) diff --git a/R/tokenfilter.R b/R/tokenfilter.R index 669a325e..b131aa5f 100644 --- a/R/tokenfilter.R +++ b/R/tokenfilter.R @@ -64,7 +64,7 @@ #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()] #' @family Steps for Token Modification #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/tokenize.R b/R/tokenize.R index c2e60611..a39fe757 100644 --- a/R/tokenize.R +++ b/R/tokenize.R @@ -202,7 +202,7 @@ #' @seealso [step_untokenize()] to untokenize. #' @family Steps for Tokenization #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/tokenize_bpe.R b/R/tokenize_bpe.R index 57a2f351..f8e79752 100644 --- a/R/tokenize_bpe.R +++ b/R/tokenize_bpe.R @@ -42,7 +42,7 @@ #' @seealso [step_untokenize()] to untokenize. #' @family Steps for Tokenization #' -#' @examplesIf rlang::is_installed("tokenizers.bpe") +#' @examplesIf rlang::is_installed(c("modeldata", "tokenizers.bpe")) #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/tokenize_sentencepiece.R b/R/tokenize_sentencepiece.R index 88590c74..783dba0c 100644 --- a/R/tokenize_sentencepiece.R +++ b/R/tokenize_sentencepiece.R @@ -41,7 +41,7 @@ #' @seealso [step_untokenize()] to untokenize. #' @family Steps for Tokenization #' -#' @examplesIf rlang::is_installed("sentencepiece") +#' @examplesIf rlang::is_installed(c("modeldata", "sentencepiece")) #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/tokenize_wordpiece.R b/R/tokenize_wordpiece.R index 48d8996f..13f57296 100644 --- a/R/tokenize_wordpiece.R +++ b/R/tokenize_wordpiece.R @@ -35,7 +35,7 @@ #' @seealso [step_untokenize()] to untokenize. #' @family Steps for Tokenization #' -#' @examplesIf rlang::is_installed("wordpiece") +#' @examplesIf rlang::is_installed(c("modeldata", "wordpiece")) #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/tokenlist.R b/R/tokenlist.R index 81d262ae..bb267160 100644 --- a/R/tokenlist.R +++ b/R/tokenlist.R @@ -9,7 +9,7 @@ #' #' @return a [tokenlist] object. #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' abc <- list(letters, LETTERS) #' tokenlist(abc) #' diff --git a/R/tokenmerge.R b/R/tokenmerge.R index 2f4738ad..81292f75 100644 --- a/R/tokenmerge.R +++ b/R/tokenmerge.R @@ -33,7 +33,7 @@ #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()] #' @family Steps for Token Modification #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/untokenize.R b/R/untokenize.R index 024c440d..074783b4 100644 --- a/R/untokenize.R +++ b/R/untokenize.R @@ -37,7 +37,7 @@ #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()] #' @family Steps for Un-Tokenization #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/man/show_tokens.Rd b/man/show_tokens.Rd index 4a6d3177..1bb178d1 100644 --- a/man/show_tokens.Rd +++ b/man/show_tokens.Rd @@ -23,6 +23,7 @@ used in final recipe steps. Note that this function will both prep() and bake() the recipe it is used on. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} text_tibble <- tibble(text = c("This is words", "They are nice!")) recipe(~text, data = text_tibble) \%>\% @@ -35,4 +36,5 @@ data(tate_text) recipe(~., data = tate_text) \%>\% step_tokenize(medium) \%>\% show_tokens(medium) +\dontshow{\}) # examplesIf} } diff --git a/man/step_clean_levels.Rd b/man/step_clean_levels.Rd index 04ec607c..eff12e58 100644 --- a/man/step_clean_levels.Rd +++ b/man/step_clean_levels.Rd @@ -74,7 +74,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (rlang::is_installed("janitor")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("modeldata", "janitor"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(Smithsonian) diff --git a/man/step_dummy_hash.Rd b/man/step_dummy_hash.Rd index 09d42a53..8e3c3345 100644 --- a/man/step_dummy_hash.Rd +++ b/man/step_dummy_hash.Rd @@ -122,7 +122,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (all(c("text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(c("modeldata", "text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontshow{library(data.table)} \dontshow{data.table::setDTthreads(2)} \dontshow{Sys.setenv("OMP_NUM_THREADS" = 1)} diff --git a/man/step_lda.Rd b/man/step_lda.Rd index 6447c422..a4a710e4 100644 --- a/man/step_lda.Rd +++ b/man/step_lda.Rd @@ -87,7 +87,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (all(c("text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(c("modeldata", "text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontshow{library(data.table)} \dontshow{data.table::setDTthreads(2)} \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} diff --git a/man/step_ngram.Rd b/man/step_ngram.Rd index 08c443af..8d20d9d2 100644 --- a/man/step_ngram.Rd +++ b/man/step_ngram.Rd @@ -93,6 +93,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) @@ -113,6 +114,7 @@ bake(tate_obj, new_data = NULL) \%>\% tidy(tate_rec, number = 2) tidy(tate_obj, number = 2) +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}} diff --git a/man/step_sequence_onehot.Rd b/man/step_sequence_onehot.Rd index 0259b0cf..dcbfb7d4 100644 --- a/man/step_sequence_onehot.Rd +++ b/man/step_sequence_onehot.Rd @@ -104,6 +104,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) @@ -120,6 +121,7 @@ bake(tate_obj, new_data = NULL) tidy(tate_rec, number = 3) tidy(tate_obj, number = 3) +\dontshow{\}) # examplesIf} } \seealso{ Other Steps for Numeric Variables From Characters: diff --git a/man/step_stem.Rd b/man/step_stem.Rd index 8b753583..4a22b496 100644 --- a/man/step_stem.Rd +++ b/man/step_stem.Rd @@ -83,6 +83,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) @@ -121,6 +122,7 @@ bake(tate_obj, new_data = NULL, medium) \%>\% bake(tate_obj, new_data = NULL) \%>\% slice(2) \%>\% pull(medium) +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}} diff --git a/man/step_stopwords.Rd b/man/step_stopwords.Rd index afc6d9b6..7ded7972 100644 --- a/man/step_stopwords.Rd +++ b/man/step_stopwords.Rd @@ -94,7 +94,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (rlang::is_installed("stopwords")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("modeldata", "stopwords"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) diff --git a/man/step_textfeature.Rd b/man/step_textfeature.Rd index a216f54d..d7997c42 100644 --- a/man/step_textfeature.Rd +++ b/man/step_textfeature.Rd @@ -88,6 +88,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) @@ -116,6 +117,7 @@ recipe(~., data = tate_text) \%>\% ) \%>\% prep() \%>\% bake(new_data = NULL) +\dontshow{\}) # examplesIf} } \seealso{ Other Steps for Numeric Variables From Characters: diff --git a/man/step_texthash.Rd b/man/step_texthash.Rd index a9a7ea8c..ee9189ab 100644 --- a/man/step_texthash.Rd +++ b/man/step_texthash.Rd @@ -114,7 +114,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (all(c("text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(c("modeldata", "text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontshow{library(data.table)} \dontshow{data.table::setDTthreads(2)} \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} diff --git a/man/step_tf.Rd b/man/step_tf.Rd index c0a21373..07e4cfa4 100644 --- a/man/step_tf.Rd +++ b/man/step_tf.Rd @@ -132,6 +132,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ library(recipes) library(modeldata) @@ -149,7 +150,7 @@ bake(tate_obj, tate_text) tidy(tate_rec, number = 2) tidy(tate_obj, number = 2) } - +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}} diff --git a/man/step_tfidf.Rd b/man/step_tfidf.Rd index fe8208cf..297993c1 100644 --- a/man/step_tfidf.Rd +++ b/man/step_tfidf.Rd @@ -128,6 +128,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ library(recipes) library(modeldata) @@ -145,7 +146,7 @@ bake(tate_obj, tate_text) tidy(tate_rec, number = 2) tidy(tate_obj, number = 2) } - +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}} diff --git a/man/step_tokenfilter.Rd b/man/step_tokenfilter.Rd index 48d82350..42f0b858 100644 --- a/man/step_tokenfilter.Rd +++ b/man/step_tokenfilter.Rd @@ -116,6 +116,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) @@ -136,6 +137,7 @@ bake(tate_obj, new_data = NULL) \%>\% tidy(tate_rec, number = 2) tidy(tate_obj, number = 2) +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}} diff --git a/man/step_tokenize.Rd b/man/step_tokenize.Rd index 0da6c165..0515caa5 100644 --- a/man/step_tokenize.Rd +++ b/man/step_tokenize.Rd @@ -269,6 +269,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) @@ -296,6 +297,7 @@ tate_obj_chars <- recipe(~., data = tate_text) \%>\% bake(tate_obj, new_data = NULL) \%>\% slice(2) \%>\% pull(medium) +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=step_untokenize]{step_untokenize()}} to untokenize. diff --git a/man/step_tokenize_bpe.Rd b/man/step_tokenize_bpe.Rd index cd6d1d07..cedfe951 100644 --- a/man/step_tokenize_bpe.Rd +++ b/man/step_tokenize_bpe.Rd @@ -85,7 +85,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (rlang::is_installed("tokenizers.bpe")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("modeldata", "tokenizers.bpe"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) diff --git a/man/step_tokenize_sentencepiece.Rd b/man/step_tokenize_sentencepiece.Rd index ead33b8d..5c48788b 100644 --- a/man/step_tokenize_sentencepiece.Rd +++ b/man/step_tokenize_sentencepiece.Rd @@ -84,7 +84,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (rlang::is_installed("sentencepiece")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("modeldata", "sentencepiece"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) diff --git a/man/step_tokenize_wordpiece.Rd b/man/step_tokenize_wordpiece.Rd index 72cefe6a..72295135 100644 --- a/man/step_tokenize_wordpiece.Rd +++ b/man/step_tokenize_wordpiece.Rd @@ -77,7 +77,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (rlang::is_installed("wordpiece")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("modeldata", "wordpiece"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) diff --git a/man/step_tokenmerge.Rd b/man/step_tokenmerge.Rd index 383dee36..f39cb657 100644 --- a/man/step_tokenmerge.Rd +++ b/man/step_tokenmerge.Rd @@ -76,6 +76,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) @@ -91,6 +92,7 @@ bake(tate_obj, new_data = NULL) tidy(tate_rec, number = 2) tidy(tate_obj, number = 2) +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}} diff --git a/man/step_untokenize.Rd b/man/step_untokenize.Rd index 070f3ca4..8d03841c 100644 --- a/man/step_untokenize.Rd +++ b/man/step_untokenize.Rd @@ -76,6 +76,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) @@ -96,6 +97,7 @@ bake(tate_obj, new_data = NULL) \%>\% tidy(tate_rec, number = 2) tidy(tate_obj, number = 2) +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}} diff --git a/man/tokenlist.Rd b/man/tokenlist.Rd index 102bb0bb..af8429ae 100644 --- a/man/tokenlist.Rd +++ b/man/tokenlist.Rd @@ -21,6 +21,7 @@ A \link{tokenlist} object is a thin wrapper around a list of character vectors, with a few attributes. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} abc <- list(letters, LETTERS) tokenlist(abc) @@ -34,4 +35,5 @@ data(tate_text) tokens <- tokenize_words(as.character(tate_text$medium)) tokenlist(tokens) +\dontshow{\}) # examplesIf} }