-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdb_33.R
139 lines (104 loc) · 3.16 KB
/
db_33.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# attach relevant packages
library(tidyverse)
library(dm)
# display chosen presentation (it might take a few seconds to appear)
slide_viewer <- function(path) {
tmp <- tempfile(fileext = ".html")
file.copy(path, tmp)
rstudioapi::viewer(tmp)
}
# slide_viewer("materials/databases.html")
### Data models ################################################################
# Data model objects -----
pixar_dm <- dm_pixarfilms()
pixar_dm
pixar_dm |>
dm_draw()
names(pixar_dm)
pixar_dm$pixar_films
pixar_dm$academy
pixar_dm |>
dm_get_tables()
# Showcase: wrapping all tables in a data model:
pixar_films_wrapped <-
pixar_dm |>
dm_wrap_tbl(pixar_films) |>
pull_tbl(pixar_films)
pixar_films_wrapped
pixar_films_wrapped$academy[1:2]
### Keys, constraints, normalization ###########################################
# Data model object ------
pixar_dm <- dm_pixarfilms()
# Primary keys ----
any(duplicated(pixar_dm$pixar_films$film))
check_key(pixar_dm$pixar_films, film)
any(duplicated(pixar_dm$academy[c("film", "award_type")]))
check_key(pixar_dm$academy, film, award_type)
try(
check_key(pixar_dm$academy, film)
)
# Foreign keys ----
all(pixar_dm$academy$film %in% pixar_dm$pixar_films$film)
check_subset(pixar_dm$academy, film, pixar_dm$pixar_films, film)
try(
check_subset(pixar_dm$pixar_films, film, pixar_dm$academy, film)
)
# Constraints ----
pixar_dm |>
dm_examine_constraints()
dm_pixarfilms(consistent = TRUE) |>
dm_examine_constraints()
dm_nycflights13() |>
dm_examine_constraints()
# Joins ----
pixar_dm |>
dm_zoom_to(academy)
# With zooming:
pixar_dm |>
dm_zoom_to(academy) |>
left_join(pixar_films, select = c(film, release_date))
# With flattening:
pixar_dm |>
dm_flatten_to_tbl(academy)
dm_nycflights13() |>
dm_select(weather, -year, -month, -day, -hour) |>
dm_flatten_to_tbl(flights)
# Joining is easy, leave the tables separate for as long as possible!
# Exercises --------------------------------------------------------------------
venue <- tibble(
venue_id = character(),
floor = character(),
capacity = integer(),
)
event <- tibble(
event_id = character(),
event_name = character(),
event_type = character(),
venue_id = character(),
date_start = vctrs::new_datetime(),
date_end = vctrs::new_datetime(),
)
attendee <- tibble(
attendee_name = character(),
favorite_package = character(),
)
speaker <- tibble(
speaker_name = character(),
event_id = character(),
)
event_attendee <- tibble(
event_id = character(),
attendee_name = character(),
)
# 1. Explore <https://dm.cynkra.com> and the built-in data models
# `dm_nycflights13()` and `dm_pixarfilms()`
# 2. Given the table structure above, create a dm object setting suitable
# PK and FK relationships and unique keys.
# Each speaker is an attendee, each event has a venue and exactly one speaker.
# The helper table event_attendees matches attendees to events.
# - Hint: Use the `dm()` function to create a dm object from scratch
# - Hint: Use a unique key on `speakers$event_name`
# 3. Draw the dm object
# 4. Colour the tables
# - Hint: Review colors at <https://rpubs.com/krlmlr/colors>
# 5. Deploy the data model to a DuckDB database