diff --git a/dev/search.json b/dev/search.json
index 51be5251..74008602 100644
--- a/dev/search.json
+++ b/dev/search.json
@@ -1 +1 @@
-[{"path":[]},{"path":"https://rsample.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"https://rsample.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"https://rsample.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"https://rsample.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"https://rsample.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement codeofconduct@posit.co. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"https://rsample.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"https://rsample.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"https://rsample.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"https://rsample.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"https://rsample.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"https://rsample.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.1, available https://www.contributor-covenant.org/version/2/1/code_of_conduct.html. Community Impact Guidelines inspired [Mozilla’s code conduct enforcement ladder][https://github.com/mozilla/inclusion]. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https://www.contributor-covenant.org/translations.","code":""},{"path":"https://rsample.tidymodels.org/dev/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to tidymodels","title":"Contributing to tidymodels","text":"detailed information contributing tidymodels packages, see development contributing guide.","code":""},{"path":"https://rsample.tidymodels.org/dev/CONTRIBUTING.html","id":"documentation","dir":"","previous_headings":"","what":"Documentation","title":"Contributing to tidymodels","text":"Typos grammatical errors documentation may edited directly using GitHub web interface, long changes made source file. YES ✅: edit roxygen comment .R file R/ directory. 🚫: edit .Rd file man/ directory. use roxygen2, Markdown syntax, documentation.","code":""},{"path":"https://rsample.tidymodels.org/dev/CONTRIBUTING.html","id":"code","dir":"","previous_headings":"","what":"Code","title":"Contributing to tidymodels","text":"submit 🎯 pull request tidymodels package, always file issue confirm tidymodels team agrees idea happy basic proposal. tidymodels packages work together. package contains unit tests, integration tests tests using packages contained extratests. pull requests, recommend create fork repo usethis::create_from_github(), initiate new branch usethis::pr_init(). Look build status making changes. README contains badges continuous integration services used package. New code follow tidyverse style guide. can use styler package apply styles, please don’t restyle code nothing PR. user-facing changes, add bullet top NEWS.md current development version header describing changes made followed GitHub username, links relevant issue(s)/PR(s). use testthat. Contributions test cases included easier accept. contribution spans use one package, consider building extratests changes check breakages /adding new tests . Let us know PR ran extra tests.","code":""},{"path":"https://rsample.tidymodels.org/dev/CONTRIBUTING.html","id":"code-of-conduct","dir":"","previous_headings":"Code","what":"Code of Conduct","title":"Contributing to tidymodels","text":"project released Contributor Code Conduct. contributing project, agree abide terms.","code":""},{"path":"https://rsample.tidymodels.org/dev/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2021 rsample authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://rsample.tidymodels.org/dev/articles/Applications/Intervals.html","id":"a-nonlinear-regression-example","dir":"Articles > Applications","previous_headings":"","what":"A nonlinear regression example","title":"Bootstrap confidence intervals","text":"demonstrate computations different types intervals, ’ll use nonlinear regression example Baty et al (2015). showed data monitored oxygen uptake patient rest exercise phases (data frame O2K). authors fit segmented regression model transition point known (time exercise commenced). model : broom::tidy() returns analysis object standardized way. column names shown used types objects allows us use results easily. rsample, ’ll rely tidy() method work bootstrap estimates need confidence intervals. ’s example end univariate statistic isn’t automatically formatted tidy(). run model different bootstraps, ’ll write function uses split object input produces tidy data frame: First, let’s create set resamples fit separate models . options apparent = TRUE set. creates final resample copy original (unsampled) data set. required interval methods. Let’s look data see outliers aberrant results: Now let’s create scatterplot matrix: One potential outlier right VO2peak ’ll leave . univariate distributions :","code":"library(tidymodels) library(nlstools) library(GGally) data(O2K) ggplot(O2K, aes(x = t, y = VO2)) + geom_point() nonlin_form <- as.formula( VO2 ~ (t <= 5.883) * VO2rest + (t > 5.883) * (VO2rest + (VO2peak - VO2rest) * (1 - exp(-(t - 5.883) / mu))) ) # Starting values from visual inspection start_vals <- list(VO2rest = 400, VO2peak = 1600, mu = 1) res <- nls(nonlin_form, start = start_vals, data = O2K) tidy(res) ## # A tibble: 3 × 5 ## term estimate std.error statistic p.value ##
## 1 VO2rest 357. 11.4 31.3 4.27e-26 ## 2 VO2peak 1631. 21.5 75.9 1.29e-38 ## 3 mu 1.19 0.0766 15.5 1.08e-16 # Will be used to fit the models to different bootstrap data sets: fit_fun <- function(split, ...) { # We could check for convergence, make new parameters, etc. nls(nonlin_form, data = analysis(split), ...) %>% tidy() } set.seed(462) nlin_bt <- bootstraps(O2K, times = 2000, apparent = TRUE) %>% mutate(models = map(splits, ~ fit_fun(.x, start = start_vals))) nlin_bt ## # Bootstrap sampling with apparent sample ## # A tibble: 2,001 × 3 ## splits id models ## ## 1 Bootstrap0001 ## 2 Bootstrap0002 ## 3 Bootstrap0003 ## 4 Bootstrap0004 ## 5 Bootstrap0005 ## 6 Bootstrap0006 ## 7 Bootstrap0007 ## 8 Bootstrap0008 ## 9 Bootstrap0009 ## 10 Bootstrap0010 ## # ℹ 1,991 more rows nlin_bt$models[[1]] ## # A tibble: 3 × 5 ## term estimate std.error statistic p.value ## ## 1 VO2rest 359. 10.7 33.5 4.59e-27 ## 2 VO2peak 1656. 31.1 53.3 1.39e-33 ## 3 mu 1.23 0.113 10.9 2.01e-12 library(tidyr) nls_coef <- nlin_bt %>% dplyr::select(-splits) %>% # Turn it into a tibble by stacking the `models` col unnest(cols = models) %>% # Get rid of unneeded columns dplyr::select(id, term, estimate) head(nls_coef) ## # A tibble: 6 × 3 ## id term estimate ## ## 1 Bootstrap0001 VO2rest 359. ## 2 Bootstrap0001 VO2peak 1656. ## 3 Bootstrap0001 mu 1.23 ## 4 Bootstrap0002 VO2rest 358. ## 5 Bootstrap0002 VO2peak 1662. ## 6 Bootstrap0002 mu 1.26 nls_coef %>% # Put different parameters in columns tidyr::pivot_wider(names_from = term, values_from = estimate) %>% # Keep only numeric columns dplyr::select(-id) %>% ggscatmat(alpha = .25) nls_coef %>% ggplot(aes(x = estimate)) + geom_histogram(bins = 20, col = \"white\") + facet_wrap(~ term, scales = \"free_x\")"},{"path":"https://rsample.tidymodels.org/dev/articles/Applications/Intervals.html","id":"percentile-intervals","dir":"Articles > Applications","previous_headings":"A nonlinear regression example","what":"Percentile intervals","title":"Bootstrap confidence intervals","text":"basic type interval uses percentiles resampling distribution. get percentile intervals, rset object passed first argument second argument list column tidy results: overlaid univariate distributions: intervals compare parametric asymptotic values? percentile intervals wider parametric intervals (assume asymptotic normality). estimates appear normally distributed? can look quantile-quantile plots:","code":"p_ints <- int_pctl(nlin_bt, models) p_ints ## # A tibble: 3 × 6 ## term .lower .estimate .upper .alpha .method ## ## 1 VO2peak 1576. 1632. 1694. 0.05 percentile ## 2 VO2rest 344. 357. 370. 0.05 percentile ## 3 mu 1.00 1.18 1.35 0.05 percentile nls_coef %>% ggplot(aes(x = estimate)) + geom_histogram(bins = 20, col = \"white\") + facet_wrap(~ term, scales = \"free_x\") + geom_vline(data = p_ints, aes(xintercept = .lower), col = \"red\") + geom_vline(data = p_ints, aes(xintercept = .upper), col = \"red\") parametric <- tidy(res, conf.int = TRUE) %>% dplyr::select( term, .lower = conf.low, .estimate = estimate, .upper = conf.high ) %>% mutate( .alpha = 0.05, .method = \"parametric\" ) intervals <- bind_rows(parametric, p_ints) %>% arrange(term, .method) intervals %>% split(intervals$term) ## $mu ## # A tibble: 2 × 6 ## term .lower .estimate .upper .alpha .method ## ## 1 mu 1.05 1.19 1.34 0.05 parametric ## 2 mu 1.00 1.18 1.35 0.05 percentile ## ## $VO2peak ## # A tibble: 2 × 6 ## term .lower .estimate .upper .alpha .method ## ## 1 VO2peak 1590. 1631. 1675. 0.05 parametric ## 2 VO2peak 1576. 1632. 1694. 0.05 percentile ## ## $VO2rest ## # A tibble: 2 × 6 ## term .lower .estimate .upper .alpha .method ## ## 1 VO2rest 334. 357. 380. 0.05 parametric ## 2 VO2rest 344. 357. 370. 0.05 percentile nls_coef %>% ggplot(aes(sample = estimate)) + stat_qq() + stat_qq_line(alpha = .25) + facet_wrap(~ term, scales = \"free\")"},{"path":"https://rsample.tidymodels.org/dev/articles/Applications/Intervals.html","id":"t-intervals","dir":"Articles > Applications","previous_headings":"A nonlinear regression example","what":"t-intervals","title":"Bootstrap confidence intervals","text":"Bootstrap t-intervals estimated computing intermediate statistics t-like structure. use , require estimated variance individual resampled estimate. example, comes along fitted model object. can extract standard errors parameters. Luckily, tidy() methods provide column named std.error. arguments intervals :","code":"t_stats <- int_t(nlin_bt, models) intervals <- bind_rows(intervals, t_stats) %>% arrange(term, .method) intervals %>% split(intervals$term) ## $mu ## # A tibble: 3 × 6 ## term .lower .estimate .upper .alpha .method ## ## 1 mu 1.05 1.19 1.34 0.05 parametric ## 2 mu 1.00 1.18 1.35 0.05 percentile ## 3 mu 1.00 1.18 1.35 0.05 student-t ## ## $VO2peak ## # A tibble: 3 × 6 ## term .lower .estimate .upper .alpha .method ## ## 1 VO2peak 1590. 1631. 1675. 0.05 parametric ## 2 VO2peak 1576. 1632. 1694. 0.05 percentile ## 3 VO2peak 1568. 1632. 1691. 0.05 student-t ## ## $VO2rest ## # A tibble: 3 × 6 ## term .lower .estimate .upper .alpha .method ## ## 1 VO2rest 334. 357. 380. 0.05 parametric ## 2 VO2rest 344. 357. 370. 0.05 percentile ## 3 VO2rest 342. 357. 370. 0.05 student-t"},{"path":"https://rsample.tidymodels.org/dev/articles/Applications/Intervals.html","id":"bias-corrected-and-accelerated-intervals","dir":"Articles > Applications","previous_headings":"A nonlinear regression example","what":"Bias-corrected and accelerated intervals","title":"Bootstrap confidence intervals","text":"bias-corrected accelerated (BCa) intervals, additional argument required. .fn argument function computes statistic interest. first argument rsplit object arguments can passed using ellipses. intervals use internal leave-one-resample compute Jackknife statistic recompute statistic every bootstrap resample. statistic expensive compute, may take time. calculations, use furrr package can computed parallel set parallel processing plan (see ?future::plan). user-facing function takes argument function ellipses.","code":"bias_corr <- int_bca(nlin_bt, models, .fn = fit_fun, start = start_vals) intervals <- bind_rows(intervals, bias_corr) %>% arrange(term, .method) intervals %>% split(intervals$term) ## $mu ## # A tibble: 4 × 6 ## term .lower .estimate .upper .alpha .method ## ## 1 mu 0.996 1.18 1.34 0.05 BCa ## 2 mu 1.05 1.19 1.34 0.05 parametric ## 3 mu 1.00 1.18 1.35 0.05 percentile ## 4 mu 1.00 1.18 1.35 0.05 student-t ## ## $VO2peak ## # A tibble: 4 × 6 ## term .lower .estimate .upper .alpha .method ## ## 1 VO2peak 1561. 1632. 1680. 0.05 BCa ## 2 VO2peak 1590. 1631. 1675. 0.05 parametric ## 3 VO2peak 1576. 1632. 1694. 0.05 percentile ## 4 VO2peak 1568. 1632. 1691. 0.05 student-t ## ## $VO2rest ## # A tibble: 4 × 6 ## term .lower .estimate .upper .alpha .method ## ## 1 VO2rest 343. 357. 368. 0.05 BCa ## 2 VO2rest 334. 357. 380. 0.05 parametric ## 3 VO2rest 344. 357. 370. 0.05 percentile ## 4 VO2rest 342. 357. 370. 0.05 student-t"},{"path":"https://rsample.tidymodels.org/dev/articles/Applications/Intervals.html","id":"no-existing-tidy-method","dir":"Articles > Applications","previous_headings":"","what":"No existing tidy method","title":"Bootstrap confidence intervals","text":"case, function can emulate minimum results: character column called term, numeric column called estimate, , optionally, numeric column called std.error. last column needed int_t(). Suppose just want estimate fold-increase outcome 90th 10th percentiles course experiment. function might look like: Everything else works :","code":"fold_incr <- function(split, ...) { dat <- analysis(split) quants <- quantile(dat$VO2, probs = c(.1, .9)) tibble( term = \"fold increase\", estimate = unname(quants[2]/quants[1]), # We don't know the analytical formula for this std.error = NA_real_ ) } nlin_bt <- nlin_bt %>% mutate(folds = map(splits, fold_incr)) int_pctl(nlin_bt, folds) ## # A tibble: 1 × 6 ## term .lower .estimate .upper .alpha .method ## ## 1 fold increase 4.42 4.76 5.05 0.05 percentile int_bca(nlin_bt, folds, .fn = fold_incr) ## # A tibble: 1 × 6 ## term .lower .estimate .upper .alpha .method ## ## 1 fold increase 4.53 4.76 5.36 0.05 BCa"},{"path":"https://rsample.tidymodels.org/dev/articles/Applications/Intervals.html","id":"intervals-for-linearish-parametric-intervals","dir":"Articles > Applications","previous_headings":"","what":"Intervals for linear(ish) parametric intervals","title":"Bootstrap confidence intervals","text":"rsample also contains reg_intervals() function can used linear regression (via lm()), generalized linear models (glm()), log-linear survival models (survival::survreg() survival::coxph()). function makes easier get intervals models. simple example logistic regression using dementia data modeldata package: Let’s fit model predictors: Let’s use model student-t intervals: can also save resamples plotting: Now can unnest data use ggplot:","code":"data(ad_data, package = \"modeldata\") lr_mod <- glm(Class ~ male + age + Ab_42 + tau, data = ad_data, family = binomial) glance(lr_mod) ## # A tibble: 1 × 8 ## null.deviance df.null logLik AIC BIC deviance df.residual nobs ## ## 1 391. 332 -140. 289. 308. 279. 328 333 tidy(lr_mod) ## # A tibble: 5 × 5 ## term estimate std.error statistic p.value ## ## 1 (Intercept) 129. 112. 1.15 0.250 ## 2 male -0.744 0.307 -2.43 0.0152 ## 3 age -125. 114. -1.10 0.272 ## 4 Ab_42 0.534 0.104 5.14 0.000000282 ## 5 tau -1.78 0.309 -5.77 0.00000000807 set.seed(29832) lr_int <- reg_intervals(Class ~ male + age + Ab_42 + tau, data = ad_data, model_fn = \"glm\", family = binomial) lr_int ## # A tibble: 4 × 6 ## term .lower .estimate .upper .alpha .method ## ## 1 Ab_42 0.316 0.548 0.765 0.05 student-t ## 2 age -332. -133. 85.7 0.05 student-t ## 3 male -1.35 -0.755 -0.133 0.05 student-t ## 4 tau -2.38 -1.83 -1.17 0.05 student-t set.seed(29832) lr_int <- reg_intervals(Class ~ male + age + Ab_42 + tau, data = ad_data, keep_reps = TRUE, model_fn = \"glm\", family = binomial) lr_int ## # A tibble: 4 × 7 ## term .lower .estimate .upper .alpha .method .replicates ## > ## 1 Ab_42 0.316 0.548 0.765 0.05 student-t [1,001 × 2] ## 2 age -332. -133. 85.7 0.05 student-t [1,001 × 2] ## 3 male -1.35 -0.755 -0.133 0.05 student-t [1,001 × 2] ## 4 tau -2.38 -1.83 -1.17 0.05 student-t [1,001 × 2] lr_int %>% select(term, .replicates) %>% unnest(cols = .replicates) %>% ggplot(aes(x = estimate)) + geom_histogram(bins = 30) + facet_wrap(~ term, scales = \"free_x\") + geom_vline(data = lr_int, aes(xintercept = .lower), col = \"red\") + geom_vline(data = lr_int, aes(xintercept = .upper), col = \"red\") + geom_vline(xintercept = 0, col = \"green\")"},{"path":"https://rsample.tidymodels.org/dev/articles/Applications/Recipes_and_rsample.html","id":"an-example-recipe","dir":"Articles > Applications","previous_headings":"","what":"An Example Recipe","title":"Recipes with rsample","text":"illustration, Ames housing data used. sale prices homes along various descriptors property: Suppose fit simple regression model formula: distribution lot size right-skewed: might benefit model estimate transformation data using Box-Cox procedure. Also, note frequencies neighborhoods can vary: resampled, neighborhoods included test set result column dummy variables zero entries. true House_Style variable. might want collapse rarely occurring values “” categories. define design matrix, initial recipe created: recreates work formula method traditionally uses additional steps. original data object ames used call, used define variables characteristics single recipe valid across resampled versions data. recipe can estimated analysis component resample. execute recipe entire data set: get values data, bake function can used: Note fewer dummy variables Neighborhood House_Style data. Also, code using prep() benefits default argument retain = TRUE, keeps processed version data set don’t reapply steps extract processed values. data used train recipe, used: next section explore recipes bootstrap resampling modeling:","code":"data(ames, package = \"modeldata\") log10(Sale_Price) ~ Neighborhood + House_Style + Year_Sold + Lot_Area library(ggplot2) theme_set(theme_bw()) ggplot(ames, aes(x = Lot_Area)) + geom_histogram(binwidth = 5000, col = \"red\", fill =\"red\", alpha = .5) ggplot(ames, aes(x = Neighborhood)) + geom_bar() + coord_flip() + xlab(\"\") library(recipes) # Apply log10 transformation outside the recipe # https://www.tmwr.org/recipes.html#skip-equals-true ames <- ames %>% mutate(Sale_Price = log10(Sale_Price)) rec <- recipe(Sale_Price ~ Neighborhood + House_Style + Year_Sold + Lot_Area, data = ames) %>% # Collapse rarely occurring jobs into \"other\" step_other(Neighborhood, House_Style, threshold = 0.05) %>% # Dummy variables on the qualitative predictors step_dummy(all_nominal()) %>% # Unskew a predictor step_BoxCox(Lot_Area) %>% # Normalize step_center(all_predictors()) %>% step_scale(all_predictors()) rec rec_training_set <- prep(rec, training = ames) rec_training_set ## ## ── Recipe ──────────────────────────────────────────────────────────────── ## ## ── Inputs ## Number of variables by role ## outcome: 1 ## predictor: 4 ## ## ── Training information ## Training data contained 2930 data points and no incomplete rows. ## ## ── Operations ## • Collapsing factor levels for: Neighborhood and House_Style | Trained ## • Dummy variables from: Neighborhood and House_Style | Trained ## • Box-Cox transformation on: Lot_Area | Trained ## • Centering for: Year_Sold and Lot_Area, ... | Trained ## • Scaling for: Year_Sold and Lot_Area, ... | Trained # By default, the selector `everything()` is used to # return all the variables. Other selectors can be used too. bake(rec_training_set, new_data = head(ames)) ## # A tibble: 6 × 14 ## Year_Sold Lot_Area Sale_Price Neighborhood_College_Creek ## ## 1 1.68 2.70 5.33 -0.317 ## 2 1.68 0.506 5.02 -0.317 ## 3 1.68 0.930 5.24 -0.317 ## 4 1.68 0.423 5.39 -0.317 ## 5 1.68 0.865 5.28 -0.317 ## 6 1.68 0.197 5.29 -0.317 ## # ℹ 10 more variables: Neighborhood_Old_Town , ## # Neighborhood_Edwards , Neighborhood_Somerset , ## # Neighborhood_Northridge_Heights , Neighborhood_Gilbert , ## # Neighborhood_Sawyer , Neighborhood_other , ## # House_Style_One_Story , House_Style_Two_Story , ## # House_Style_other bake(rec_training_set, new_data = NULL) %>% head ## # A tibble: 6 × 14 ## Year_Sold Lot_Area Sale_Price Neighborhood_College_Creek ## ## 1 1.68 2.70 5.33 -0.317 ## 2 1.68 0.506 5.02 -0.317 ## 3 1.68 0.930 5.24 -0.317 ## 4 1.68 0.423 5.39 -0.317 ## 5 1.68 0.865 5.28 -0.317 ## 6 1.68 0.197 5.29 -0.317 ## # ℹ 10 more variables: Neighborhood_Old_Town , ## # Neighborhood_Edwards , Neighborhood_Somerset , ## # Neighborhood_Northridge_Heights , Neighborhood_Gilbert , ## # Neighborhood_Sawyer , Neighborhood_other , ## # House_Style_One_Story , House_Style_Two_Story , ## # House_Style_other library(rsample) set.seed(7712) bt_samples <- bootstraps(ames) bt_samples ## # Bootstrap sampling ## # A tibble: 25 × 2 ## splits id ## ## 1 Bootstrap01 ## 2 Bootstrap02 ## 3 Bootstrap03 ## 4 Bootstrap04 ## 5 Bootstrap05 ## 6 Bootstrap06 ## 7 Bootstrap07 ## 8 Bootstrap08 ## 9 Bootstrap09 ## 10 Bootstrap10 ## # ℹ 15 more rows bt_samples$splits[[1]] ## ## <2930/1095/2930>"},{"path":"https://rsample.tidymodels.org/dev/articles/Applications/Recipes_and_rsample.html","id":"working-with-resamples","dir":"Articles > Applications","previous_headings":"","what":"Working with Resamples","title":"Recipes with rsample","text":"can add recipe column tibble. recipes convenience function called prepper() can used call prep() split object first argument (easier purrring): Now, fit model, fit function needs recipe input. code implicitly used retain = TRUE option prep(). Otherwise, split objects also needed bake() recipe (prediction function ). get predictions, function needs three arguments: splits (get assessment data), recipe (process ), model. iterate , function purrr::pmap() used: Calculating RMSE:","code":"library(purrr) bt_samples$recipes <- map(bt_samples$splits, prepper, recipe = rec) bt_samples ## # Bootstrap sampling ## # A tibble: 25 × 3 ## splits id recipes ## ## 1 Bootstrap01 ## 2 Bootstrap02 ## 3 Bootstrap03 ## 4 Bootstrap04 ## 5 Bootstrap05 ## 6 Bootstrap06 ## 7 Bootstrap07 ## 8 Bootstrap08 ## 9 Bootstrap09 ## 10 Bootstrap10 ## # ℹ 15 more rows bt_samples$recipes[[1]] ## ## ── Recipe ──────────────────────────────────────────────────────────────── ## ## ── Inputs ## Number of variables by role ## outcome: 1 ## predictor: 4 ## ## ── Training information ## Training data contained 2930 data points and no incomplete rows. ## ## ── Operations ## • Collapsing factor levels for: Neighborhood and House_Style | Trained ## • Dummy variables from: Neighborhood and House_Style | Trained ## • Box-Cox transformation on: Lot_Area | Trained ## • Centering for: Year_Sold and Lot_Area, ... | Trained ## • Scaling for: Year_Sold and Lot_Area, ... | Trained fit_lm <- function(rec_obj, ...) lm(..., data = bake(rec_obj, new_data = NULL, everything())) bt_samples$lm_mod <- map( bt_samples$recipes, fit_lm, Sale_Price ~ . ) bt_samples ## # Bootstrap sampling ## # A tibble: 25 × 4 ## splits id recipes lm_mod ## ## 1 Bootstrap01 ## 2 Bootstrap02 ## 3 Bootstrap03 ## 4 Bootstrap04 ## 5 Bootstrap05 ## 6 Bootstrap06 ## 7 Bootstrap07 ## 8 Bootstrap08 ## 9 Bootstrap09 ## 10 Bootstrap10 ## # ℹ 15 more rows pred_lm <- function(split_obj, rec_obj, model_obj, ...) { mod_data <- bake( rec_obj, new_data = assessment(split_obj), all_predictors(), all_outcomes() ) out <- mod_data %>% select(Sale_Price) out$predicted <- predict(model_obj, newdata = mod_data %>% select(-Sale_Price)) out } bt_samples$pred <- pmap( lst( split_obj = bt_samples$splits, rec_obj = bt_samples$recipes, model_obj = bt_samples$lm_mod ), pred_lm ) bt_samples ## # Bootstrap sampling ## # A tibble: 25 × 5 ## splits id recipes lm_mod pred ## ## 1 Bootstrap01 ## 2 Bootstrap02 ## 3 Bootstrap03 ## 4 Bootstrap04