-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdb_31_solutions.R
31 lines (22 loc) · 998 Bytes
/
db_31_solutions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# attach relevant packages
library(tidyverse)
library(DBI)
### Extract, Transform, Load ###################################################
# Extract: Raw data ------------------------------------------------------------
pixar_films_raw <- pixarfilms::pixar_films
# Transform: Fix column type, extract sequel column ----------------------------
pixar_films_clean <-
pixar_films_raw |>
separate(film, into = c("franchise", "sequel"),
sep = " (?=[0-9]+$)", fill = "right", remove = FALSE
) |>
mutate(across(c(number, sequel), as.integer)) |>
mutate(.by = franchise, sequel = if_else(is.na(sequel) & n() > 1, 1L, sequel))
# Exercises --------------------------------------------------------------------
# 1. Adapt the ETL workflow to convert the `run_time` column to a duration.
pixar_films_clean <-
pixar_films_clean |>
mutate(run_time = hms::hms(minutes = run_time))
pixar_films_clean
# - Hint: Use `mutate()` with `hms::hms(minutes = ...)` .
# 2. Re-run the workflow.