Skip to content

Commit eb35907

Browse files
committed
refactor duckdb to use internal function for writing to file
1 parent 00eae56 commit eb35907

File tree

16 files changed

+142
-67
lines changed

16 files changed

+142
-67
lines changed

DESCRIPTION

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ Authors@R: c(
1616
role = "aut",
1717
comment = c(ORCID = "0000-0003-1444-9135")),
1818
person("Rich", "FitzJohn", role = "aut"),
19-
person("Jeroen", "Ooms", role = "aut")
19+
person("Jeroen", "Ooms", role = "aut"),
20+
person("Ivan", "Tarbakou", role = "cph",
21+
comment = "mongo-to-sql-converter library")
2022
)
2123
License: MIT + file LICENSE
2224
LazyData: true

NEWS.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# nodbi 0.11.0.9000
22

3-
## Bug fixes
3+
## Improvements
4+
* Use `duckdb` internal function for writing NDJSON to file
45
* Removing message about RSQLite handling NDJSON file name as value
56

67
# nodbi 0.11.0

R/get.R

Lines changed: 47 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ docdb_get <- function(src, key, limit = NULL, ...) {
3434
if (length(params[["fields"]]) ||
3535
length(params[["query"]]) ||
3636
length(params[["listfields"]])) stop(
37-
"Use docdb_query() to specify fields or query parameters.")
37+
"Use docdb_query() to specify fields or query parameters.")
3838

3939
# dispatch
4040
UseMethod("docdb_get", src)
@@ -167,29 +167,52 @@ sqlGet <- function(src, key, limit = NULL, getFunction, ...) {
167167
# canonical sorting in nodbi
168168
"ORDER BY _id ASC;")
169169

170-
# temporary file for streaming
171-
tfname <- tempfile()
172-
tfnameCon <- file(description = tfname, open = "wt")
173-
# register to remove file after used for streaming
174-
on.exit(try(close(tfnameCon), silent = TRUE), add = TRUE)
175-
on.exit(try(unlink(tfname), silent = TRUE), add = TRUE)
176-
177-
# get data, write to file in ndjson format
178-
writeLines(
179-
stringi::stri_replace_all_fixed(
180-
str = paste0(
181-
"", # protect against empty query result
182-
stats::na.omit( # eliminate rows without json
183-
DBI::dbGetQuery(
184-
conn = src$con,
185-
statement = statement,
186-
n = n)[["json"]])),
187-
pattern = "\n",
188-
replacement = "\\n"),
189-
con = tfnameCon,
190-
sep = "\n",
191-
useBytes = TRUE)
192-
close(tfnameCon)
170+
# use duckdb internal function
171+
if (inherits(src, "src_duckdb")) {
172+
173+
# temporary file for streaming
174+
tfname <- tempfile()
175+
on.exit(try(unlink(tfname), silent = TRUE), add = TRUE)
176+
177+
# modify statement to export as file
178+
statement <- paste0(
179+
"COPY (", sub(";$", "", statement),
180+
ifelse(n == -1L, "", paste0(" LIMIT ", n)),
181+
") TO '", tfname, "' (HEADER false, QUOTE '');"
182+
)
183+
184+
# write ndjson
185+
DBI::dbExecute(
186+
conn = src$con,
187+
statement = statement)
188+
189+
} else {
190+
191+
# temporary file for streaming
192+
tfname <- tempfile()
193+
tfnameCon <- file(description = tfname, open = "wt")
194+
# register to remove file after used for streaming
195+
on.exit(try(close(tfnameCon), silent = TRUE), add = TRUE)
196+
on.exit(try(unlink(tfname), silent = TRUE), add = TRUE)
197+
198+
# get data, write to file in ndjson format
199+
writeLines(
200+
stringi::stri_replace_all_fixed(
201+
str = paste0(
202+
"", # protect against empty query result
203+
stats::na.omit( # eliminate rows without json
204+
DBI::dbGetQuery(
205+
conn = src$con,
206+
statement = statement,
207+
n = n)[["json"]])),
208+
pattern = "\n",
209+
replacement = "\\n"),
210+
con = tfnameCon,
211+
sep = "\n",
212+
useBytes = TRUE)
213+
close(tfnameCon)
214+
215+
}
193216

194217
# stream in ndjson records
195218
return(jsonlite::stream_in(file(tfname), verbose = FALSE))

R/query.R

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1464,14 +1464,43 @@ processDbGetQuery <- function(
14641464
eval.parent(parse(text = getData))),
14651465
verbose = FALSE))
14661466

1467-
# temporary file and connection
1468-
tfname <- tempfile()
1469-
on.exit(try(unlink(tfname), silent = TRUE), add = TRUE)
1467+
# TODO
1468+
1469+
# use duckdb internal function
1470+
if (inherits(eval.parent(parse(text = "src")), "src_duckdb")) {
1471+
1472+
# get data
1473+
statement <- eval.parent(parse(text = "statement"))
1474+
n <- eval.parent(parse(text = "n"))
1475+
1476+
# temporary file for streaming
1477+
tfname <- tempfile()
1478+
on.exit(try(unlink(tfname), silent = TRUE), add = TRUE)
1479+
1480+
# modify statement to export as file
1481+
statement <- paste0(
1482+
"COPY (", sub(";$", "", statement),
1483+
ifelse(n == -1L, "", paste0(" LIMIT ", n)),
1484+
") TO '", tfname, "' (HEADER false, QUOTE '');"
1485+
)
1486+
1487+
# write ndjson
1488+
DBI::dbExecute(
1489+
conn = eval.parent(parse(text = "src"))$con,
1490+
statement = statement)
1491+
1492+
} else {
14701493

1471-
# write out
1472-
writeLines(
1473-
eval.parent(parse(text = getData)),
1474-
con = tfname)
1494+
# temporary file and connection
1495+
tfname <- tempfile()
1496+
on.exit(try(unlink(tfname), silent = TRUE), add = TRUE)
1497+
1498+
# write out
1499+
writeLines(
1500+
eval.parent(parse(text = getData)),
1501+
con = tfname)
1502+
1503+
}
14751504

14761505
# early exit
14771506
if (file.size(tfname) <= 2L) return(NULL)

README.Rmd

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -325,14 +325,14 @@ result <- rbenchmark::benchmark(
325325
)
326326
327327
# 2025-03-29 with M3 hardware, databases via homebrew
328-
result[rev(order(result$elapsed)), c("test", "replications", "elapsed")]
328+
# result[rev(order(result$elapsed)), c("test", "replications", "elapsed")]
329329
# test replications elapsed
330-
# 4 CouchDB 3 69.69
331-
# 3 Elastic 3 26.79
332-
# 1 MongoDB 3 1.61
330+
# 4 CouchDB 3 61.22
331+
# 3 Elastic 3 27.79
333332
# 5 PostgreSQL 3 1.52
334-
# 6 DuckDB 3 1.12
335-
# 2 SQLite 3 0.67
333+
# 1 MongoDB 3 1.51
334+
# 6 DuckDB 3 1.01
335+
# 2 SQLite 3 0.65
336336
337337
message(R.version$version.string)
338338
# R Under development (unstable) (2025-03-10 r87922)

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -358,14 +358,14 @@ result <- rbenchmark::benchmark(
358358
)
359359

360360
# 2025-03-29 with M3 hardware, databases via homebrew
361-
result[rev(order(result$elapsed)), c("test", "replications", "elapsed")]
361+
# result[rev(order(result$elapsed)), c("test", "replications", "elapsed")]
362362
# test replications elapsed
363-
# 4 CouchDB 3 69.69
364-
# 3 Elastic 3 26.79
365-
# 1 MongoDB 3 1.61
363+
# 4 CouchDB 3 61.22
364+
# 3 Elastic 3 27.79
366365
# 5 PostgreSQL 3 1.52
367-
# 6 DuckDB 3 1.12
368-
# 2 SQLite 3 0.67
366+
# 1 MongoDB 3 1.51
367+
# 6 DuckDB 3 1.01
368+
# 2 SQLite 3 0.65
369369

370370
message(R.version$version.string)
371371
# R Under development (unstable) (2025-03-10 r87922)

codemeta.json

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,13 @@
4646
"familyName": "Ooms"
4747
}
4848
],
49+
"copyrightHolder": [
50+
{
51+
"@type": "Person",
52+
"givenName": "Ivan",
53+
"familyName": "Tarbakou"
54+
}
55+
],
4956
"maintainer": [
5057
{
5158
"@type": "Person",
@@ -309,7 +316,7 @@
309316
"applicationCategory": "Databases",
310317
"isPartOf": "https://ropensci.org",
311318
"keywords": ["database", "MongoDB", "Elasticsearch", "CouchDB", "SQLite", "PostgreSQL", "DuckDB", "NoSQL", "JSON", "documents", "rstats", "nosql", "couchdb", "mongodb", "elasticsearch", "r", "r-package", "sqlite", "postgresql"],
312-
"fileSize": "665.808KB",
319+
"fileSize": "667.226KB",
313320
"releaseNotes": "https://github.com/ropensci/nodbi/blob/master/NEWS.md",
314321
"readme": "https://github.com/ropensci/nodbi/blob/master/README.md",
315322
"contIntegration": ["https://github.com/ropensci/nodbi/actions?query=workflow%3AR-CMD-check", "https://app.codecov.io/gh/rfhb/nodbi"],

docs/articles/nodbi-overview.html

Lines changed: 17 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/authors.html

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/index.html

Lines changed: 7 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/news/index.html

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/pkgdown.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ pkgdown: 2.1.1
33
pkgdown_sha: ~
44
articles:
55
nodbi-overview: nodbi-overview.html
6-
last_built: 2025-03-29T16:07Z
6+
last_built: 2025-03-29T18:05Z
77
urls:
88
reference: https://docs.ropensci.org/nodbi/reference
99
article: https://docs.ropensci.org/nodbi/articles

docs/reference/src_postgres.html

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/search.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

inst/js/update.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
# git diff > ../update.patch
88
# rm -rf mongo-to-sql-converter
99
#
10+
# Ivan Tarbakou, MIT License
1011
module="mongo-to-sql-converter"
1112
cd inst/js
1213
npm install $module

man/src_postgres.Rd

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)