Skip to content

Commit 60999a6

Browse files
Merge pull request #271 from tidymodels/more-snapshots
More snapshots
2 parents c7f2a31 + 2ee21c2 commit 60999a6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+322
-126
lines changed

DESCRIPTION

+3-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ URL: https://github.com/tidymodels/textrecipes,
2020
BugReports: https://github.com/tidymodels/textrecipes/issues
2121
Depends:
2222
R (>= 3.6),
23-
recipes (>= 1.0.7)
23+
recipes (>= 1.1.0.9000)
2424
Imports:
2525
lifecycle,
2626
dplyr,
@@ -53,6 +53,8 @@ Suggests:
5353
tokenizers.bpe,
5454
udpipe,
5555
wordpiece
56+
Remotes:
57+
tidymodels/recipes
5658
LinkingTo:
5759
cpp11
5860
VignetteBuilder:
+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Errors if vocabulary size is set to low.
2+
3+
Code
4+
recipe(~text1, data = test_data) %>% step_tokenize_bpe(text1, vocabulary_size = 10) %>%
5+
prep()
6+
Condition
7+
Error in `step_tokenize_bpe()`:
8+
Caused by error in `prep()`:
9+
! `vocabulary_size` of 10 is too small for column `text1` which has a unique character count of 23
10+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Errors if vocabulary size is set to low.
2+
3+
Code
4+
recipe(~text, data = tibble(text = "hello")) %>% step_tokenize(text, engine = "tokenizers.bpe",
5+
training_options = list(vocab_size = 2)) %>% prep()
6+
Condition
7+
Error in `step_tokenize()`:
8+
Caused by error in `prep()`:
9+
! `vocabulary_size` of 2 is too small for column `text` which has a unique character count of 4
10+

tests/testthat/_snaps/clean_levels.md

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
# bake method errors when needed non-standard role columns are missing
2+
3+
Code
4+
bake(trained, new_data = smith_tr[, -1])
5+
Condition
6+
Error in `step_clean_levels()`:
7+
! The following required column is missing from `new_data`: name.
8+
19
# empty printing
210

311
Code

tests/testthat/_snaps/clean_names.md

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
# bake method errors when needed non-standard role columns are missing
2+
3+
Code
4+
bake(trained, new_data = mtcars[, -3])
5+
Condition
6+
Error in `step_clean_names()`:
7+
! The following required column is missing from `new_data`: disp.
8+
19
# empty printing
210

311
Code

tests/testthat/_snaps/dummy_hash.md

+8
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@
88
! Name collision occurred. The following variable names already exist:
99
* `dummyhash_text_01`
1010

11+
# bake method errors when needed non-standard role columns are missing
12+
13+
Code
14+
bake(trained, new_data = test_data[, -2])
15+
Condition
16+
Error in `step_dummy_hash()`:
17+
! The following required column is missing from `new_data`: sponsor_code.
18+
1119
# empty printing
1220

1321
Code

tests/testthat/_snaps/lda.md

+8
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@
88
! Name collision occurred. The following variable names already exist:
99
* `lda_text_1`
1010

11+
# bake method errors when needed non-standard role columns are missing
12+
13+
Code
14+
bake(trained, new_data = tokenized_test_data[, -1])
15+
Condition
16+
Error in `step_lda()`:
17+
! The following required column is missing from `new_data`: medium.
18+
1119
# empty printing
1220

1321
Code

tests/testthat/_snaps/lemma.md

+8
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@
77
Caused by error in `bake()`:
88
! `text` doesn't have a lemma attribute. Make sure the tokenization step includes lemmatization.
99

10+
# bake method errors when needed non-standard role columns are missing
11+
12+
Code
13+
bake(trained, new_data = tokenized_test_data[, -1])
14+
Condition
15+
Error in `step_lemma()`:
16+
! The following required column is missing from `new_data`: text.
17+
1018
# empty printing
1119

1220
Code

tests/testthat/_snaps/ngram.md

+8
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@
1414
Error:
1515
! n must be a positive integer.
1616

17+
# bake method errors when needed non-standard role columns are missing
18+
19+
Code
20+
bake(trained, new_data = tokenized_test_data[, -1])
21+
Condition
22+
Error in `step_ngram()`:
23+
! The following required column is missing from `new_data`: text.
24+
1725
# empty printing
1826

1927
Code

tests/testthat/_snaps/pos_filter.md

+8
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@
77
Caused by error in `bake()`:
88
! `text` doesn't have a pos attribute. Make sure the tokenization step includes part of speech tagging.
99

10+
# bake method errors when needed non-standard role columns are missing
11+
12+
Code
13+
bake(trained, new_data = tokenized_test_data[, -1])
14+
Condition
15+
Error in `step_pos_filter()`:
16+
! The following required column is missing from `new_data`: text.
17+
1018
# empty printing
1119

1220
Code

tests/testthat/_snaps/sequence_onehot.md

+8
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,14 @@
4141
! Name collision occurred. The following variable names already exist:
4242
* `seq1hot_text_1`
4343

44+
# bake method errors when needed non-standard role columns are missing
45+
46+
Code
47+
bake(trained, new_data = tokenized_test_data[, -1])
48+
Condition
49+
Error in `step_sequence_onehot()`:
50+
! The following required column is missing from `new_data`: text.
51+
4452
# empty printing
4553

4654
Code

tests/testthat/_snaps/stem.md

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
# bake method errors when needed non-standard role columns are missing
2+
3+
Code
4+
bake(trained, new_data = tokenized_test_data[, -1])
5+
Condition
6+
Error in `step_stem()`:
7+
! The following required column is missing from `new_data`: text.
8+
19
# empty printing
210

311
Code

tests/testthat/_snaps/stopwords.md

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
# bake method errors when needed non-standard role columns are missing
2+
3+
Code
4+
bake(trained, new_data = tokenized_test_data[, -1])
5+
Condition
6+
Error in `step_stopwords()`:
7+
! The following required column is missing from `new_data`: text.
8+
19
# empty printing
210

311
Code

tests/testthat/_snaps/text_normalization.md

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
# bake method errors when needed non-standard role columns are missing
2+
3+
Code
4+
bake(trained, new_data = ex_dat[, -1])
5+
Condition
6+
Error in `step_text_normalization()`:
7+
! The following required column is missing from `new_data`: text.
8+
19
# empty printing
210

311
Code

tests/testthat/_snaps/textfeature.md

+8
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@
2929
! Name collision occurred. The following variable names already exist:
3030
* `textfeature_text_n_words`
3131

32+
# bake method errors when needed non-standard role columns are missing
33+
34+
Code
35+
bake(trained, new_data = test_data[, -1])
36+
Condition
37+
Error in `step_textfeature()`:
38+
! The following required column is missing from `new_data`: text.
39+
3240
# empty printing
3341

3442
Code

tests/testthat/_snaps/texthash.md

+8
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@
88
! Name collision occurred. The following variable names already exist:
99
* `texthash_text_0001`
1010

11+
# bake method errors when needed non-standard role columns are missing
12+
13+
Code
14+
bake(trained, new_data = tokenized_test_data[, -1])
15+
Condition
16+
Error in `step_texthash()`:
17+
! The following required column is missing from `new_data`: text.
18+
1119
# empty printing
1220

1321
Code

tests/testthat/_snaps/tf.md

+8
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@
88
! Name collision occurred. The following variable names already exist:
99
* `tf_text_i`
1010

11+
# bake method errors when needed non-standard role columns are missing
12+
13+
Code
14+
bake(trained, new_data = tokenized_test_data[, -1])
15+
Condition
16+
Error in `step_tf()`:
17+
! The following required column is missing from `new_data`: text.
18+
1119
# empty printing
1220

1321
Code

tests/testthat/_snaps/tfidf.md

+8
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@
2828
Please retrain this recipe with version 0.5.1 or higher.
2929
* A data leakage bug has been fixed for `step_tfidf()`.
3030

31+
# bake method errors when needed non-standard role columns are missing
32+
33+
Code
34+
bake(trained, new_data = tokenized_test_data[, -1])
35+
Condition
36+
Error in `step_tfidf()`:
37+
! The following required column is missing from `new_data`: text.
38+
3139
# empty printing
3240

3341
Code

tests/testthat/_snaps/tokenfilter.md

+8
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,14 @@
3636
* Tokenization for: text | Trained
3737
* Text filtering for: text | Trained
3838

39+
# bake method errors when needed non-standard role columns are missing
40+
41+
Code
42+
bake(trained, new_data = tokenized_test_data[, -1])
43+
Condition
44+
Error in `step_tokenfilter()`:
45+
! The following required column is missing from `new_data`: text.
46+
3947
# empty printing
4048

4149
Code

tests/testthat/_snaps/tokenize.md

+8
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,14 @@
1616
Caused by error in `prep()`:
1717
! `engine` argument is not valid.
1818

19+
# bake method errors when needed non-standard role columns are missing
20+
21+
Code
22+
bake(trained, new_data = test_data[, -1])
23+
Condition
24+
Error in `step_tokenize()`:
25+
! The following required column is missing from `new_data`: text.
26+
1927
# empty printing
2028

2129
Code

tests/testthat/_snaps/tokenize_bpe.md

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
# bake method errors when needed non-standard role columns are missing
2+
3+
Code
4+
bake(trained, new_data = test_data[, -1])
5+
Condition
6+
Error in `step_tokenize_bpe()`:
7+
! The following required column is missing from `new_data`: text1.
8+
19
# empty printing
210

311
Code

tests/testthat/_snaps/tokenize_sentencepiece.md

+8
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@
88
Caused by error in `prep()`:
99
! `vocabulary_size` of 10 is too small for column `text1` which has a unique character count of 23.
1010

11+
# bake method errors when needed non-standard role columns are missing
12+
13+
Code
14+
bake(trained, new_data = test_data[, -1])
15+
Condition
16+
Error in `step_tokenize_sentencepiece()`:
17+
! The following required column is missing from `new_data`: text1.
18+
1119
# empty printing
1220

1321
Code

tests/testthat/_snaps/tokenize_wordpiece.md

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
# bake method errors when needed non-standard role columns are missing
2+
3+
Code
4+
bake(trained, new_data = test_data[, -1])
5+
Condition
6+
Error in `step_tokenize_wordpiece()`:
7+
! The following required column is missing from `new_data`: text1.
8+
19
# empty printing
210

311
Code

tests/testthat/_snaps/tokenmerge.md

+8
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,14 @@
1818
! Name collision occurred. The following variable names already exist:
1919
* `tokenmerge`
2020

21+
# bake method errors when needed non-standard role columns are missing
22+
23+
Code
24+
bake(trained, new_data = tokenized_test_data[, -1])
25+
Condition
26+
Error in `step_tokenmerge()`:
27+
! The following required column is missing from `new_data`: text1.
28+
2129
# empty printing
2230

2331
Code

tests/testthat/_snaps/untokenize.md

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
# bake method errors when needed non-standard role columns are missing
2+
3+
Code
4+
bake(trained, new_data = tokenized_test_data[, -1])
5+
Condition
6+
Error in `step_untokenize()`:
7+
! The following required column is missing from `new_data`: text.
8+
19
# empty printing
210

311
Code

tests/testthat/_snaps/word_embeddings.md

+8
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@
88
! Name collision occurred. The following variable names already exist:
99
* `wordembed_text_d1`
1010

11+
# bake method errors when needed non-standard role columns are missing
12+
13+
Code
14+
bake(trained, new_data = tokenized_test_data[, -1])
15+
Condition
16+
Error in `step_word_embeddings()`:
17+
! The following required column is missing from `new_data`: text.
18+
1119
# empty printing
1220

1321
Code

tests/testthat/test-clean_levels.R

+4-4
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,9 @@ test_that("bake method errors when needed non-standard role columns are missing"
7777

7878
trained <- prep(rec, training = smith_tr, verbose = FALSE)
7979

80-
expect_error(
81-
bake(trained, new_data = smith_tr[, -1]),
82-
class = "new_data_missing_column"
80+
expect_snapshot(
81+
error = TRUE,
82+
bake(trained, new_data = smith_tr[, -1])
8383
)
8484
})
8585

@@ -126,4 +126,4 @@ test_that("printing", {
126126

127127
expect_snapshot(print(rec))
128128
expect_snapshot(prep(rec))
129-
})
129+
})

0 commit comments

Comments
 (0)