forked from tidyverse/dbplyr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsrc_dbi.R
156 lines (147 loc) · 5.11 KB
/
src_dbi.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#' Use dplyr verbs with a remote database table
#'
#' All data manipulation on SQL tbls are lazy: they will not actually
#' run the query or retrieve the data unless you ask for it: they all return
#' a new `tbl_dbi` object. Use [compute()] to run the query and save the
#' results in a temporary table in the database, or use [collect()] to retrieve the
#' results to R. You can see the query with [show_query()].
#'
#' @details
#' For best performance, the database should have an index on the variables
#' that you are grouping by. Use [explain()] to check that the database is using
#' the indexes that you expect.
#'
#' There is one verb that is not lazy: [do()] is eager because it must pull
#' the data into R.
#'
#' @param src A `DBIConnection` object produced by `DBI::dbConnect()`.
#' @param from Either a table identifier or a literal [sql()] string.
#'
#' Use a string to identify a table in the current schema/catalog. We
#' recommend using `I()` to identify a table outside the default catalog or
#' schema, e.g. `I("schema.table")` or `I("catalog.schema.table")`. You can
#' also use [in_schema()]/[in_catalog()] or [DBI::Id()].
#' @param ... Passed on to [tbl_sql()]
#' @export
#' @examples
#' library(dplyr)
#'
#' # Connect to a temporary in-memory SQLite database
#' con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
#'
#' # Add some data
#' copy_to(con, mtcars)
#' DBI::dbListTables(con)
#'
#' # To retrieve a single table from a source, use `tbl()`
#' con %>% tbl("mtcars")
#'
#' # Use `I()` for qualified table names
#' con %>% tbl(I("temp.mtcars")) %>% head(1)
#'
#' # You can also use pass raw SQL if you want a more sophisticated query
#' con %>% tbl(sql("SELECT * FROM mtcars WHERE cyl = 8"))
#'
#' # If you just want a temporary in-memory database, use src_memdb()
#' src2 <- src_memdb()
#'
#' # To show off the full features of dplyr's database integration,
#' # we'll use the Lahman database. lahman_sqlite() takes care of
#' # creating the database.
#'
#' if (requireNamespace("Lahman", quietly = TRUE)) {
#' batting <- copy_to(con, Lahman::Batting)
#' batting
#'
#' # Basic data manipulation verbs work in the same way as with a tibble
#' batting %>% filter(yearID > 2005, G > 130)
#' batting %>% select(playerID:lgID)
#' batting %>% arrange(playerID, desc(yearID))
#' batting %>% summarise(G = mean(G), n = n())
#'
#' # There are a few exceptions. For example, databases give integer results
#' # when dividing one integer by another. Multiply by 1 to fix the problem
#' batting %>%
#' select(playerID:lgID, AB, R, G) %>%
#' mutate(
#' R_per_game1 = R / G,
#' R_per_game2 = R * 1.0 / G
#' )
#'
#' # All operations are lazy: they don't do anything until you request the
#' # data, either by `print()`ing it (which shows the first ten rows),
#' # or by `collect()`ing the results locally.
#' system.time(recent <- filter(batting, yearID > 2010))
#' system.time(collect(recent))
#'
#' # You can see the query that dplyr creates with show_query()
#' batting %>%
#' filter(G > 0) %>%
#' group_by(playerID) %>%
#' summarise(n = n()) %>%
#' show_query()
#' }
#' @importFrom dplyr tbl
#' @aliases tbl_dbi
tbl.src_dbi <- function(src, from, ...) {
subclass <- class(src$con)[[1]] # prefix added by dplyr::make_tbl
tbl_sql(c(subclass, "dbi"), src = src, from = from, ...)
}
# Internal calls to `tbl()` should be avoided in favor of tbl_src_dbi().
# The former may query the database for column names if `vars` is omitted,
# the latter always requires `vars`.
tbl_src_dbi <- function(src, from, vars) {
force(vars)
tbl(src, from, vars = vars)
}
#' Database src
#'
#' @description
#' `r lifecycle::badge("superseded")`
#'
#' Since can generate a `tbl()` directly from a DBI connection we no longer
#' recommend using `src_dbi()`.
#'
#' @param con An object that inherits from [DBI::DBIConnection-class],
#' typically generated by [DBI::dbConnect]
#' @param auto_disconnect Should the connection be automatically closed when
#' the src is deleted? Set to `TRUE` if you initialize the connection
#' the call to `src_dbi()`. Pass `NA` to auto-disconnect but print a message
#' when this happens.
#' @return An S3 object with class `src_dbi`, `src_sql`, `src`.
#' @keywords internal
#' @export
src_dbi <- function(con, auto_disconnect = FALSE) {
# Avoid registering disconnector if con can't be evaluated
force(con)
# stopifnot(is(con, "DBIConnection"))
if (is_false(auto_disconnect)) {
disco <- NULL
} else {
disco <- db_disconnector(con, quiet = is_true(auto_disconnect)) # nocov
}
structure(
list(
con = con,
disco = disco
),
class = connection_s3_class(con)
)
}
connection_s3_class <- function(con) {
subclass <- setdiff(methods::is(con), methods::extends("DBIConnection"))
c(paste0("src_", subclass), "src_dbi", "src_sql", "src")
}
methods::setOldClass(c("src_dbi", "src_sql", "src"))
# nocov start
# Creates an environment that disconnects the database when it's GC'd
db_disconnector <- function(con, quiet = FALSE) {
reg.finalizer(environment(), function(...) {
if (!quiet) {
message("Auto-disconnecting ", class(con)[[1]])
}
dbDisconnect(con)
})
environment()
}
# nocov end