From fee8c82bddab839dc0fd27f2b928354168c588ac Mon Sep 17 00:00:00 2001 From: Vries Date: Mon, 11 Apr 2022 17:41:25 +0200 Subject: [PATCH 1/6] Further restricted the "full" list_ecotox_fields in order to avoid failure of building simple search queries. --- DESCRIPTION | 13 ++++++----- NAMESPACE | 17 ++++++++++++++ R/database_access.r | 8 ++++--- R/init.r | 45 ++++++++++++++++++++++++++++--------- R/wrappers.r | 2 +- man/build_ecotox_sqlite.Rd | 5 ++++- man/download_ecotox_data.Rd | 13 +++++++---- man/list_ecotox_fields.Rd | 8 ++++--- man/search_ecotox.Rd | 2 +- tests/testthat/test_that.r | 2 +- 10 files changed, 85 insertions(+), 30 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e39cc6c..1b916a8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: ECOTOXr Type: Package Title: Download and Extract Data from US EPA's ECOTOX Database -Version: 0.1.1 -Date: 2021-10-04 +Version: 0.1.2 +Date: TODO Authors@R: c(person("Pepijn", "de Vries", role = c("aut", "cre", "dtc"), email = "pepijn.devries@outlook.com")) Author: @@ -19,15 +19,16 @@ Depends: RSQLite Imports: crayon, - dplyr, rappdirs, readr, rvest, stringr, + tibble, utils -Suggests: - testthat (>= 3.0.0), - webchem +Suggests: + DBI, + webchem, + testthat (>= 3.0.0) URL: BugReports: https://github.com/pepijn-devries/ECOTOXr/issues License: GPL (>= 3) diff --git a/NAMESPACE b/NAMESPACE index cd80a46..827aa5d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,20 @@ # Generated by roxygen2: do not edit by hand +S3method("[",cas) +S3method("[<-",cas) +S3method("[[",cas) +S3method("[[<-",cas) +S3method(as.character,cas) +S3method(as.data.frame,cas) +S3method(as.double,cas) +S3method(as.integer,cas) +S3method(as.list,cas) +S3method(c,cas) +S3method(format,cas) +S3method(print,cas) +export(as.cas) export(build_ecotox_sqlite) +export(cas) export(check_ecotox_availability) export(cite_ecotox) export(dbConnectEcotox) @@ -9,9 +23,12 @@ export(download_ecotox_data) export(get_ecotox_info) export(get_ecotox_path) export(get_ecotox_sqlite_file) +export(get_ecotox_url) +export(is.cas) export(list_ecotox_fields) export(search_ecotox) export(search_query_ecotox) +export(show.cas) importFrom(RSQLite,dbConnect) importFrom(RSQLite,dbDisconnect) importFrom(RSQLite,dbExecute) diff --git a/R/database_access.r b/R/database_access.r index 8426127..166de97 100644 --- a/R/database_access.r +++ b/R/database_access.r @@ -143,7 +143,8 @@ get_ecotox_info <- function(path = get_ecotox_path(), version) { #' are available from the database, for searching and output. #' @param which A \code{character} string that specifies which fields to return. Can be any of: #' '\code{default}': returns default output field names; '\code{all}': returns all fields; or -#' '\code{full}': returns all except fields from table 'dose_response_details'. +#' '\code{full}': returns all except fields from table 'chemical_carriers', 'media_characteristics', 'doses', 'dose_responses', +#' 'dose_response_details', 'dose_response_links' and 'dose_stat_method_codes'. #' @param include_table A \code{logical} value indicating whether the table name should be included #' as prefix. Default is \code{TRUE}. #' @return Returns a \code{vector} of type \code{character} containing the field names from the ECOTOX database. @@ -156,7 +157,8 @@ get_ecotox_info <- function(path = get_ecotox_path(), version) { #' ## All fields that are available from the ECOTOX database: #' list_ecotox_fields("all") #' -#' ## All except fields from the table 'dose_response_details' +#' ## All except fields from the tables 'chemical_carriers', 'media_characteristics', 'doses', +#' ## 'dose_responses', 'dose_response_details', 'dose_response_links' and 'dose_stat_method_codes' #' ## that are available from the ECOTOX database: #' list_ecotox_fields("full") #' @author Pepijn de Vries @@ -166,6 +168,6 @@ list_ecotox_fields <- function(which = c("default", "full", "all"), include_tabl result <- .db_specs$field_name if (include_table) result <- paste(.db_specs$table, result, sep = ".") if (which == "default") result <- result[.db_specs$default_output] - if (which == "full") result <- result[.db_specs$table != "dose_response_details"] + if (which == "full") result <- result[!(.db_specs$table %in% c("chemical_carriers", "media_characteristics", "doses", "dose_response_details", "dose_response_links", "dose_stat_method_codes"))] return(result) } diff --git a/R/init.r b/R/init.r index c17330a..21be37b 100644 --- a/R/init.r +++ b/R/init.r @@ -1,3 +1,32 @@ +#' Get ECOTOX download URL from EPA website +#' +#' This function downloads the webpage at \url{https://cfpub.epa.gov/ecotox/index.cfm}. It then searches for the +#' download link for the complete ECOTOX database and extract its URL. +#' +#' This function is called by \code{\link{download_ecotox_data}} which tries to download the file from the resulting +#' URL. On some machines this fails due to issues with the SSL certificate. The user can try to download the file +#' by using this URL in a different browser (or on a different machine). +#' @return Returns a \code{character} string containing the download URL of the latest version of the EPA ECOTOX +#' database. +#' @rdname get_ecotox_url +#' @name get_ecotox_url +#' @examples +#' \dontrun{ +#' get_ecotox_url() +#' } +#' @author Pepijn de Vries +#' @export +get_ecotox_url <- function() { + con <- url("https://cfpub.epa.gov/ecotox/index.cfm") + link <- rvest::read_html(con) + link <- rvest::html_nodes(link, "a.ascii-link") + link <- rvest::html_attr(link, "href") + link <- link[!is.na(link) & endsWith(link, ".zip")] + closeAllConnections() + if (length(link) == 0) stop("Could not find ASCII download link...") + return(link) +} + #' Check whether a ECOTOX database exists locally #' #' Tests whether a local copy of the US EPA ECOTOX database exists in \code{\link{get_ecotox_path}}. @@ -69,17 +98,19 @@ get_ecotox_path <- function() { #' In order for this package to fully function, a local copy of the ECOTOX database needs to be build. #' This function will download the required data and build the database. #' -#' This function will attempt to find the latest download url for the ECOTOX database from the EPA website. +#' This function will attempt to find the latest download url for the ECOTOX database from the +#' \href{https://cfpub.epa.gov/ecotox/index.cfm}{EPA website} (see \code{\link{get_ecotox_url}()}). #' When found it will attempt to download the zipped archive containing all required data. This data is than #' extracted and a local copy of the database is build. #' #' Use '\code{\link{suppressMessages}}' to suppress the progress report. #' @section Known issues: -#' On some machines this function fails to connect to the database download URL from the EPA website due to missing +#' On some machines this function fails to connect to the database download URL from the +#' \href{https://cfpub.epa.gov/ecotox/index.cfm}{EPA website} due to missing #' SSL certificates. Unfortunately, there is no easy fix for this in this package. A work around is to download and #' unzip the file manually using a different machine or browser that is less strict with SSL certificates. You can #' then call \code{\link{build_ecotox_sqlite}()} and point the \code{source} location to the manually extracted zip -#' archive. +#' archive. For this purpose \code{\link{get_ecotox_url}()} can be used. #' #' @param target Target directory where the files will be downloaded and the database compiled. Default is #' \code{\link{get_ecotox_path}()}. @@ -112,14 +143,8 @@ download_ecotox_data <- function(target = get_ecotox_path(), write_log = TRUE, a if (!dir.exists(target)) dir.create(target, recursive = T) ## Obtain download link from EPA website: message(crayon::white("Obtaining download link from EPA website... ")) - con <- url("https://cfpub.epa.gov/ecotox/index.cfm") - link <- rvest::read_html(con) - link <- rvest::html_nodes(link, "a.ascii-link") - link <- rvest::html_attr(link, "href") - link <- link[!is.na(link) & endsWith(link, ".zip")] + link <- get_ecotox_url() dest_path <- file.path(target, utils::tail(unlist(strsplit(link, "/")), 1)) - closeAllConnections() - if (length(link) == 0) stop("Could not find ASCII download link...") message(crayon::green("Done\n")) proceed.download <- T if (file.exists(dest_path) && ask) { diff --git a/R/wrappers.r b/R/wrappers.r index fdb0e23..5fedf2f 100644 --- a/R/wrappers.r +++ b/R/wrappers.r @@ -199,7 +199,7 @@ search_query_ecotox <- function(search, output_fields = list_ecotox_fields("defa repeat { i <- which(search.tables$table == tab) links <- subset(db_links, (db_links$table == tab & db_links$foreign_table != "") | db_links$foreign_table == tab) - exclude <- c("chemical_carriers", "doses", "dose_responses", "dose_response_details") + exclude <- c("chemical_carriers", "doses", "dose_responses", "dose_response_details", "dose_response_links", "dose_stat_method_codes") exclude <- exclude[!(exclude %in% output_fields$table)] links <- subset(links, !links$table %in% exclude) inverselink <- subset(links, links$table == tab & links$field_name %in% c("test_id", "result_id")) diff --git a/man/build_ecotox_sqlite.Rd b/man/build_ecotox_sqlite.Rd index 5f61cf0..e1908e5 100644 --- a/man/build_ecotox_sqlite.Rd +++ b/man/build_ecotox_sqlite.Rd @@ -43,6 +43,8 @@ This problem only seems to occur for characters that are listed as 'control char consequences for reproducibility, but only if you build search queries that look for such special characters. It is therefore advised to stick to common (non-accented) alpha-numerical characters in your searches, for the sake of reproducibility. + +Use '\code{\link{suppressMessages}}' to suppress the progress report. } \examples{ \dontrun{ @@ -55,7 +57,8 @@ if (test) { dir <- gsub(".sqlite", "", files$database, fixed = T) path <- files$path if (dir.exists(file.path(path, dir))) { - build_ecotox_sqlite(source = file.path(path, dir), destination = get_ecotox_path()) + ## This will build the database in your temp directory: + build_ecotox_sqlite(source = file.path(path, dir), destination = tempdir()) } } } diff --git a/man/download_ecotox_data.Rd b/man/download_ecotox_data.Rd index 2ad2d4f..e1e847d 100644 --- a/man/download_ecotox_data.Rd +++ b/man/download_ecotox_data.Rd @@ -25,22 +25,27 @@ In order for this package to fully function, a local copy of the ECOTOX database This function will download the required data and build the database. } \details{ -This function will attempt to find the latest download url for the ECOTOX database from the EPA website. +This function will attempt to find the latest download url for the ECOTOX database from the +\href{https://cfpub.epa.gov/ecotox/index.cfm}{EPA website} (see \code{\link{get_ecotox_url}()}). When found it will attempt to download the zipped archive containing all required data. This data is than extracted and a local copy of the database is build. + +Use '\code{\link{suppressMessages}}' to suppress the progress report. } \section{Known issues}{ -On some machines this function fails to connect to the database download URL from the EPA website due to missing +On some machines this function fails to connect to the database download URL from the +\href{https://cfpub.epa.gov/ecotox/index.cfm}{EPA website} due to missing SSL certificates. Unfortunately, there is no easy fix for this in this package. A work around is to download and unzip the file manually using a different machine or browser that is less strict with SSL certificates. You can then call \code{\link{build_ecotox_sqlite}()} and point the \code{source} location to the manually extracted zip -archive. +archive. For this purpose \code{\link{get_ecotox_url}()} can be used. } \examples{ \dontrun{ -download_ecotox_data() +## This will download and build the database in your temp dir: +download_ecotox_data(tempdir()) } } \author{ diff --git a/man/list_ecotox_fields.Rd b/man/list_ecotox_fields.Rd index 05a65b4..acf9857 100644 --- a/man/list_ecotox_fields.Rd +++ b/man/list_ecotox_fields.Rd @@ -9,7 +9,8 @@ list_ecotox_fields(which = c("default", "full", "all"), include_table = TRUE) \arguments{ \item{which}{A \code{character} string that specifies which fields to return. Can be any of: '\code{default}': returns default output field names; '\code{all}': returns all fields; or -'\code{full}': returns all except fields from table 'dose_response_details'.} +'\code{full}': returns all except fields from table 'chemical_carriers', 'media_characteristics', 'doses', 'dose_responses', + 'dose_response_details', 'dose_response_links' and 'dose_stat_method_codes'.} \item{include_table}{A \code{logical} value indicating whether the table name should be included as prefix. Default is \code{TRUE}.} @@ -31,9 +32,10 @@ list_ecotox_fields("default") ## All fields that are available from the ECOTOX database: list_ecotox_fields("all") -## All except fields from the table 'dose_response_details' +## All except fields from the tables 'chemical_carriers', 'media_characteristics', 'doses', +## 'dose_responses', 'dose_response_details', 'dose_response_links' and 'dose_stat_method_codes' ## that are available from the ECOTOX database: -list_ecotox_fields("all") +list_ecotox_fields("full") } \author{ Pepijn de Vries diff --git a/man/search_ecotox.Rd b/man/search_ecotox.Rd index 9749a83..baaebac 100644 --- a/man/search_ecotox.Rd +++ b/man/search_ecotox.Rd @@ -102,7 +102,7 @@ CRAN and document the package and database version that are used to generate spe \examples{ \dontrun{ ## let's find the ids of all ecotox tests on species -## where latin names contain either of 2 specific genus names and +## where Latin names contain either of 2 specific genus names and ## where they were exposed to the chemical benzene if (check_ecotox_availability()) { search <- diff --git a/tests/testthat/test_that.r b/tests/testthat/test_that.r index 767f475..ddc5304 100644 --- a/tests/testthat/test_that.r +++ b/tests/testthat/test_that.r @@ -176,7 +176,7 @@ test_that("A simple search results in unique result ids", { ################################# ################################# -test_that("A simple when there is a reference number there is a publication year.", { +test_that("In a simple search test that there is a publication year when there is a reference number.", { check_db() expect_false({ any(is.na(simple_search1$publication_year) & !is.na(simple_search1$reference_number)) From 663301cf5611d5d079755d6392df26bb791bc28e Mon Sep 17 00:00:00 2001 From: Vries Date: Thu, 5 May 2022 16:44:16 +0200 Subject: [PATCH 2/6] Fix for issues with SSL download certicifcate --- DESCRIPTION | 1 + R/ECOTOXr.r | 3 ++- R/init.r | 42 ++++++++++++++++++------------------- man/ECOTOXr.Rd | 3 ++- man/download_ecotox_data.Rd | 11 +++++++++- 5 files changed, 35 insertions(+), 25 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1b916a8..fe3de5c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,6 +19,7 @@ Depends: RSQLite Imports: crayon, + httr, rappdirs, readr, rvest, diff --git a/R/ECOTOXr.r b/R/ECOTOXr.r index 2808b41..c828603 100644 --- a/R/ECOTOXr.r +++ b/R/ECOTOXr.r @@ -18,7 +18,8 @@ #' First download a copy of the complete EPA database. This can be done by calling \code{\link{download_ecotox_data}}. #' This may not always work on all machines as R does not always accept the website SSL certificate from the EPA. #' In those cases the zipped archive with the database files can be downloaded manually with a different (more -#' forgiving) browser. The files from the zip archive can be extracted to a location of choice. +#' forgiving) browser. The files from the zip archive can be extracted to a location of choice. Alternatively, +#' the user could try to use \code{\link{download_ecotox_data(ssl_verifypeer = 0L)}} when the download URL is trusted. #' } #' \item{ #' Next, an SQLite database needs to be build from the downloaded files. This will be done automatically when diff --git a/R/init.r b/R/init.r index 21be37b..77f9e1d 100644 --- a/R/init.r +++ b/R/init.r @@ -5,7 +5,9 @@ #' #' This function is called by \code{\link{download_ecotox_data}} which tries to download the file from the resulting #' URL. On some machines this fails due to issues with the SSL certificate. The user can try to download the file -#' by using this URL in a different browser (or on a different machine). +#' by using this URL in a different browser (or on a different machine). Alternatively, the user could try to use +#' \code{\link{download_ecotox_data(ssl_verifypeer = 0L)}} when the download URL is trusted. +#' @param ... arguments passed on to \code{\link[httr]{GET}} #' @return Returns a \code{character} string containing the download URL of the latest version of the EPA ECOTOX #' database. #' @rdname get_ecotox_url @@ -16,13 +18,12 @@ #' } #' @author Pepijn de Vries #' @export -get_ecotox_url <- function() { - con <- url("https://cfpub.epa.gov/ecotox/index.cfm") - link <- rvest::read_html(con) - link <- rvest::html_nodes(link, "a.ascii-link") +get_ecotox_url <- function(...) { + link <- httr::GET("https://cfpub.epa.gov/ecotox/index.cfm", ...) + link <- rvest::read_html(link) + link <- rvest::html_elements(link, "a.ascii-link") link <- rvest::html_attr(link, "href") link <- link[!is.na(link) & endsWith(link, ".zip")] - closeAllConnections() if (length(link) == 0) stop("Could not find ASCII download link...") return(link) } @@ -120,6 +121,9 @@ get_ecotox_path <- function() { #' @param ask There are several steps in which files are (potentially) overwritten or deleted. In those cases #' the user is asked on the command line what to do in those cases. Set this parameter to \code{FALSE} in order #' to continue without warning and asking. +#' @param ... Arguments passed on to \code{\link[httr]{GET}}. When this function fails with the error: "Peer +#' certificate cannot be authenticated with given CA certificates", you could try to rerun the function with +#' the option \code{ssl_verifypeer = 0L}. Only do so when you trust the indicated URL. #' @return Returns \code{NULL} invisibly. #' @rdname download_ecotox_data #' @name download_ecotox_data @@ -130,7 +134,7 @@ get_ecotox_path <- function() { #' } #' @author Pepijn de Vries #' @export -download_ecotox_data <- function(target = get_ecotox_path(), write_log = TRUE, ask = TRUE) { +download_ecotox_data <- function(target = get_ecotox_path(), write_log = TRUE, ask = TRUE, ...) { avail <- check_ecotox_availability() if (avail && ask) { cat(sprintf("A local database already exists (%s).", paste(attributes(avail)$file$database, collapse = ", "))) @@ -153,20 +157,14 @@ download_ecotox_data <- function(target = get_ecotox_path(), write_log = TRUE, a } if (proceed.download) { message(crayon::white(sprintf("Start downloading ECOTOX data from %s...\n", link))) - con <- url(link, "rb") - dest <- file(gsub(".zip", ".incomplete.download", dest_path, fixed = T), "wb") - mb <- 0 - repeat { - read <- readBin(con, "raw", 1024*1024) ## download in 1MB chunks. - writeBin(read, dest) - mb <- mb + 1 - message(crayon::white(sprintf("\r%i MB downloaded...", mb)), appendLF = F) - if (length(read) == 0) break - } - closeAllConnections() + httr::GET(link, httr::config( + noprogress = 0L, + progressfunction = function(down, up) { + message(crayon::white(sprintf("\r%0.1f MB downloaded...", down[2]/(1024*1024))), appendLF = F) + TRUE + }), httr::write_disk(dest_path, overwrite = TRUE)) message(crayon::green(" Done\n")) } - file.rename(gsub(".zip", ".incomplete.download", dest_path, fixed = T), dest_path) ## create bib-file for later reference con <- file(gsub(".zip", "_cit.txt", dest_path), "w+") @@ -185,7 +183,7 @@ download_ecotox_data <- function(target = get_ecotox_path(), write_log = TRUE, a if (dir.exists(extr.path)) { test.files <- list.files(extr.path) if (length(test.files) >= 12 && any(test.files == "chemical_carriers.txt") && ask) { - cat("EXtracted zip files already appear to exist.\n") + cat("Extracted zip files already appear to exist.\n") prompt <- readline(prompt = "Continue unzipping and overwriting these files (y/n)? ") proceed.unzip <- startsWith("Y", toupper(prompt)) } @@ -302,7 +300,7 @@ build_ecotox_sqlite <- function(source, destination = get_ecotox_path(), write_l lines.read <- 1 ## Copy tables in 50000 line fragments to database, to avoid memory issues frag.size <- 50000 - message(crayon::white(sprintf("\r 0 lines (incl. header) added of '%s' added to database", tab$table[[1]])), + message(crayon::white(sprintf("\r 0 lines (incl. header) of '%s' added to database", tab$table[[1]])), appendLF = F) repeat { if (is.null(head)) { @@ -330,7 +328,7 @@ build_ecotox_sqlite <- function(source, destination = get_ecotox_path(), write_l stringsAsFactors = F, strip.white = F) RSQLite::dbWriteTable(dbcon, tab$table[[1]], table.frag, append = T) - message(crayon::white(sprintf("\r %i lines (incl. header) added of '%s' added to database", lines.read, tab$table[[1]])), + message(crayon::white(sprintf("\r %i lines (incl. header) of '%s' added to database", lines.read, tab$table[[1]])), appendLF = F) if (length(body) < testsize) break } diff --git a/man/ECOTOXr.Rd b/man/ECOTOXr.Rd index 1601020..8d1117d 100644 --- a/man/ECOTOXr.Rd +++ b/man/ECOTOXr.Rd @@ -25,7 +25,8 @@ in order to use the package to its full capacity. First download a copy of the complete EPA database. This can be done by calling \code{\link{download_ecotox_data}}. This may not always work on all machines as R does not always accept the website SSL certificate from the EPA. In those cases the zipped archive with the database files can be downloaded manually with a different (more -forgiving) browser. The files from the zip archive can be extracted to a location of choice. +forgiving) browser. The files from the zip archive can be extracted to a location of choice. Alternatively, +the user could try to use \code{\link{download_ecotox_data(ssl_verifypeer = 0L)}} when the download URL is trusted. } \item{ Next, an SQLite database needs to be build from the downloaded files. This will be done automatically when diff --git a/man/download_ecotox_data.Rd b/man/download_ecotox_data.Rd index e1e847d..d062868 100644 --- a/man/download_ecotox_data.Rd +++ b/man/download_ecotox_data.Rd @@ -4,7 +4,12 @@ \alias{download_ecotox_data} \title{Download and extract ECOTOX database files and compose database} \usage{ -download_ecotox_data(target = get_ecotox_path(), write_log = TRUE, ask = TRUE) +download_ecotox_data( + target = get_ecotox_path(), + write_log = TRUE, + ask = TRUE, + ... +) } \arguments{ \item{target}{Target directory where the files will be downloaded and the database compiled. Default is @@ -16,6 +21,10 @@ download_ecotox_data(target = get_ecotox_path(), write_log = TRUE, ask = TRUE) \item{ask}{There are several steps in which files are (potentially) overwritten or deleted. In those cases the user is asked on the command line what to do in those cases. Set this parameter to \code{FALSE} in order to continue without warning and asking.} + +\item{...}{Arguments passed on to \code{\link[httr]{GET}}. When this function fails with the error: "Peer +certificate cannot be authenticated with given CA certificates", you could try to rerun the function with +the option \code{ssl_verifypeer = 0L}. Only do so when you trust the indicated URL.} } \value{ Returns \code{NULL} invisibly. From 1665b4d3b24e3133fe747c2464bc3f4bbb8aa0e0 Mon Sep 17 00:00:00 2001 From: pepijn-devries Date: Thu, 22 Sep 2022 09:02:21 +0200 Subject: [PATCH 3/6] Continue build with unexpected fields When encountering unexpected columns in downloaded ecotox tables, try to continue with the build and ignore the columns --- R/init.r | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/R/init.r b/R/init.r index 77f9e1d..32b9b59 100644 --- a/R/init.r +++ b/R/init.r @@ -327,7 +327,16 @@ build_ecotox_sqlite <- function(source, destination = get_ecotox_path(), write_l sep = "|", header = T, quote = "", comment.char = "", stringsAsFactors = F, strip.white = F) - RSQLite::dbWriteTable(dbcon, tab$table[[1]], table.frag, append = T) + missing_cols <- tab$field_name[!tab$field_name %in% colnames(table.frag)] + unexpected_cols <- colnames(table.frag)[!colnames(table.frag) %in% tab$field_name] + if (length(unexpected_cols) > 0) + message(sprintf("\r Ignoring unexpected column(s) '%s' in '%s'", paste(unexpected_cols, collapse = "', '"), + tab$table[[1]])) + if (length(missing_cols) > 0) + message(sprintf("\r Missing column(s) '%s' in '%s'", paste(missing_cols, collapse = "', '"), tab$table[[1]])) + RSQLite::dbWriteTable(dbcon, tab$table[[1]], + table.frag[,setdiff(tab$field_name, missing_cols), drop = F], append = T) + message(crayon::white(sprintf("\r %i lines (incl. header) of '%s' added to database", lines.read, tab$table[[1]])), appendLF = F) if (length(body) < testsize) break From dfbdcae7cdeedbaf92e1b194f28a5be68d60ef56 Mon Sep 17 00:00:00 2001 From: pepijn-devries Date: Thu, 22 Sep 2022 10:10:02 +0200 Subject: [PATCH 4/6] Unzipping target directory was wrong Use the zip file name to create extraction directory --- R/init.r | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/init.r b/R/init.r index 32b9b59..6121152 100644 --- a/R/init.r +++ b/R/init.r @@ -190,7 +190,8 @@ download_ecotox_data <- function(target = get_ecotox_path(), write_log = TRUE, a } if (proceed.unzip) { message(crayon::white("Extracting downloaded zip file... ")) - utils::unzip(file.path(target, utils::tail(unlist(strsplit(link, "/")), 1)), exdir = target) + utils::unzip(file.path(target, utils::tail(unlist(strsplit(link, "/")), 1)), + exdir = file.path(target, gsub(".zip", "", basename(link)))) message(crayon::green("Done\n")) if (ask && startsWith("Y", toupper(readline(prompt = "Done extracting zip file, remove it to save disk space (y/n)? ")))) { From 33c533696a73eec23c3defb07c99ebe8045dabe9 Mon Sep 17 00:00:00 2001 From: pepijn-devries Date: Thu, 22 Sep 2022 13:21:57 +0200 Subject: [PATCH 5/6] cas_handlers was missing in original commit --- R/cas_handlers.r | 279 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 279 insertions(+) create mode 100644 R/cas_handlers.r diff --git a/R/cas_handlers.r b/R/cas_handlers.r new file mode 100644 index 0000000..9ca881a --- /dev/null +++ b/R/cas_handlers.r @@ -0,0 +1,279 @@ +#' Functions for handling chemical abstract service (CAS) registry numbers +#' +#' Functions for handling chemical abstract service (CAS) registry numbers +#' +#' In the database \href{https://en.wikipedia.org/wiki/Chemical_Abstracts_Service}{CAS registry} numbers are stored +#' as text (type \code{character}). As CAS numbers can consist of a maximum of 10 digits (plus two hyphens) this means +#' that each CAS number can consume up to 12 bytes of memory or disk space. By storing the data numerically, only +#' 5 bytes are required. These functions provide the means to handle CAS registry numbers and coerce from and to +#' different formats and types. +#' @param x Object from which data needs to be extracted or replaced, or needs to be coerced into a specific +#' format. For nearly all of the functions documented here, this needs to be an object of the S3 class 'cas', +#' which can be created with \code{as.cas}. For \code{as.cas}, \code{x} can be a \code{character} (CAS registry number +#' with or without hyphenation) or a \code{numeric} value. Note that \code{as.cas} will only accept correctly +#' formatted and valid CAS registry numbers. +#' @param i Index specifying element(s) to extract or replace. See also \code{\link[base:Extract]{Extract}}. +#' @param value A replacement value, can be anything that can be converted into an S3 cas-class object with \code{as.cas}. +#' @param length A non-negative \code{integer} specifying the desired length. Double values will be coerced to +#' \code{integer}: supplying an argument of length other than one is an error. +#' @param hyphenate A \code{logical} value indicating whether the formatted CAS number needs to be hyphenated. +#' Default is \code{TRUE}. +#' @param ... Arguments passed to other functions +#' @return Functions \code{cas}, \code{c} and \code{as.cas} return S3 class 'cas' objects. Coercion functions +#' (starting with 'as') return the object as specified by their respective function names (i.e., \code{integer}, +#' \code{double}, \code{character}, \code{list} and \code{data.frame}). The \code{show.cas} and \code{print} functions +#' also return formatted \code{charater}s. The function \code{is.cas} will return a single \code{logical} value, +#' indicating whether \code{x} is a valid S3 cas-class object. The square brackets return the selected index/indices, +#' or the \code{vector} of cas objects where the selected elements are replaced by \code{value}. +#' @rdname cas +#' @name cas +#' @examples +#' ## This will generate a vector of cas objects containing 10 +#' ## fictive (0-00-0), but valid registry numbers: +#' cas(10) +#' +#' ## This is a cas-object: +#' is.cas(cas(0L)) +#' +#' ## This is not a cas-object: +#' is.cas(0L) +#' +#' ## Three different ways of creating a cas object from +#' ## Benzene's CAS registry number (the result is the same) +#' as.cas("71-43-2") +#' as.cas("71432") +#' as.cas(71432L) +#' +#' ## This is one way of creating a vector with multiple CAS registry numbers: +#' cas_data <- as.cas(c("64175", "71432", "58082")) +#' +#' ## This is how you select a specific element(s) from the vector: +#' cas_data[2:3] +#' cas_data[[2]] +#' +#' ## You can also replace specific elements in the vector: +#' cas_data[1] <- "7440-23-5" +#' cas_data[[2]] <- "129-00-0" +#' +#' ## You can format CAS numbers with or without hyphens: +#' format(cas_data, TRUE) +#' format(cas_data, FALSE) +#' +#' ## The same can be achieved using as.character +#' as.character(cas_data, TRUE) +#' as.character(cas_data, FALSE) +#' +#' ## There are also show and print methods available: +#' show(cas_data) +#' print(cas_data) +#' +#' ## Numeric values can be obtained from CAS using as.numeric, as.double or as.integer +#' as.numeric(cas_data) +#' +#' ## Be careful, however. Some CAS numbers cannot be represented by R's 32 bit integers +#' ## and will produce NA's. This will work OK: +#' huge_cas <- as.cas("9999999-99-5") +#' +#' \dontrun{ +#' ## This will not: +#' as.integer(huge_cas) +#' } +#' +#' ## The trick applied by this package is that the final +#' ## validation digit is stored separately as attribute: +#' unclass(huge_cas) +#' +#' ## This is how cas objects can be concatenated: +#' cas_data <- c(huge_cas, cas_data) +#' +#' ## This will create a data.frame +#' as.data.frame(cas_data) +#' +#' ## This will create a list: +#' as.list(cas_data) +#' @author Pepijn de Vries +#' @export +cas <- function(length = 0L) { + structure ( + integer(length), ## The registry number is stored as integer without the final digit (=checksum) + checksum = raw(length), ## last digit of CAS number, which serves as a checksum, stored as raw value + class = "cas" + ) +} + +#' @rdname cas +#' @name is.cas +#' @export +is.cas <- function(x) { + if (!(class(x) %in% "cas")) return(F) + checksums <- attributes(x)$checksum + if (length(checksums) != length(x)) stop("Each CAS registry in the vector needs a checksum") + validate <- outer(unclass(x), 0:9, function(x, y) { + floor(x/(10^y)) %% 10 + }) + validate <- apply(validate, 1, function(x) { + x <- sum(seq_along(x)*x) %% 10 + }) + return(all(validate == as.numeric(checksums))) +} + +#' @rdname cas +#' @name as.cas +#' @export +as.cas <- function(x) { + if (is.cas(x)) return(x) + x <- as.character(x) + is_hyphenated <- stringr::str_sub(x, -2, -2) == "-" & stringr::str_sub(x, -5, -5) == "-" + x[is_hyphenated] <- paste0( + stringr::str_sub(x[is_hyphenated], 1, -6), + stringr::str_sub(x[is_hyphenated], -4, -3), + stringr::str_sub(x[is_hyphenated], -1, -1) + ) + if (any(!grepl("^[0-9]+$", x))) stop("CAS numbers can only contain hyphens at correct positions and numeric characters otherwise...") + registry <- as.integer(stringr::str_sub(x, 1, -2)) + registry[is.na(registry)] <- 0L + attributes(registry)$checksum <- as.raw(as.integer(stringr::str_sub(x, -1, -1))) + class(registry) <- "cas" + if (!is.cas(registry)) stop("Input contains invalid CAS numbers") + registry +} + +#' @rdname cas +#' @name [[.cas +#' @export +`[[.cas` <- function(x, i) { + attribs <- attributes(x) + attribs$checksum <- attribs$checksum[[i]] + attribs$names <- attribs$names[[i]] + x <- unclass(x) + x <- x[[i]] + attributes(x) <- attribs + x +} + +#' @rdname cas +#' @name [.cas +#' @export +`[.cas` <- function(x, i) { + attribs <- attributes(x) + attribs$checksum <- attribs$checksum[i] + attribs$names <- attribs$names[i] + x <- unclass(x) + x <- x[i] + attributes(x) <- attribs + x +} + +#' @rdname cas +#' @name [[<-.cas +#' @export +`[[<-.cas` <- function(x, i, value) { + value <- as.cas(value) + attribs <- attributes(x) + attribs$checksum[[i]] <- attributes(value)$checksum + attribs$names[[i]] <- attributes(value)$names + x <- unclass(x) + x[[i]] <- unclass(value) + attributes(x) <- attribs + x +} + +#' @rdname cas +#' @name [<-.cas +#' @export +`[<-.cas` <- function(x, i, value) { + value <- as.cas(value) + attribs <- attributes(x) + attribs$checksum[i] <- attributes(value)$checksum + attribs$names[i] <- attributes(value)$names + x <- unclass(x) + x[i] <- unclass(value) + attributes(x) <- attribs + x +} + +#' @rdname cas +#' @name format.cas +#' @export +format.cas <- function(x, hyphenate = TRUE, ...) { + checksums <- attributes(x)$checksum + x <- unclass(x) + repp <- x + repp[repp == 0] <- 1 + repp <- ceiling(2 - log10(repp)) + repp[repp < 0] <- 0 + x <- paste0(strrep("0", repp), x) + sprintf("%s%s%s%s%01i", + stringr::str_sub(x, 1, -3), + ifelse(hyphenate, "-", ""), + stringr::str_sub(x, -2, -1), + ifelse(hyphenate, "-", ""), + as.numeric(checksums) + ) +} + +#' @rdname cas +#' @name as.character.cas +#' @export +as.character.cas <- function(x, ...) { + format(x, ...) +} + +#' @rdname cas +#' @name show.cas +#' @export +show.cas <- function(x, ...) { + format(x, ...) +} + +#' @rdname cas +#' @name print.cas +#' @export +print.cas <- function(x, ...) { + if (length(x) == 0) + cat("cas(0)\n") else print(format.cas(x), ...) +} + +#' @rdname cas +#' @name as.list.cas +#' @export +as.list.cas <- function(x, ...) { + lapply(seq_along(x), function(i) x[i]) +} + +#' @rdname cas +#' @name as.double.cas +#' @export +as.double.cas <- function(x, ...) { + as.double(as.integer.cas(x, ...), ...) +} + +#' @rdname cas +#' @name as.integer.cas +#' @export +as.integer.cas <- function(x, ...) { + checksums <- as.integer(attributes(x)$checksum, ...) + x <- 10L*unclass(x) + attributes(x) <- NULL + x + checksums +} + +#' @rdname cas +#' @name c.cas +#' @export +c.cas <- function(...) { + result <- list(...) + result <- lapply(result, as.cas) + checksums <- do.call(c, lapply(result, function(x) attributes(x)$checksum)) + result <- do.call(c, lapply(result, function(x) unclass(x))) + class(result) <- "cas" + attributes(result)$checksum <- checksums + result +} + +#' @rdname cas +#' @name as.data.frame.cas +#' @export +as.data.frame.cas <- function(...) { + as.data.frame(tibble::tibble(...)) +} From c97b2b4260be7eb274fe6b49e5d3a43d3696cedb Mon Sep 17 00:00:00 2001 From: pepijn-devries Date: Thu, 17 Nov 2022 10:59:21 +0100 Subject: [PATCH 6/6] merge dbplyr into main work-in-progress branch (#7) * Initial implementation of dplyr verbs * Corrected the datatype for NCBI_TAXID field * minor corrections * Final fixes before submitting to CRAN Co-authored-by: Vries --- DESCRIPTION | 20 +- NAMESPACE | 80 +++--- NEWS | 16 ++ R/ECOTOXr.r | 2 +- R/cas_handlers.r | 558 ++++++++++++++++++------------------ R/database_access.r | 19 +- R/helpers.r | 377 ++++++++++++++++++++++++ R/imports.r | 15 + R/init.r | 37 ++- R/sysdata.rda | Bin 8174 -> 8188 bytes R/wrappers.r | 241 ++++------------ man/ECOTOXr.Rd | 2 +- man/cas.Rd | 158 ++++++++++ man/download_ecotox_data.Rd | 2 +- man/get_ecotox_url.Rd | 33 +++ man/get_path.Rd | 3 +- man/list_ecotox_fields.Rd | 13 +- man/reexports.Rd | 13 + man/search_ecotox.Rd | 27 +- tests/testthat/test_cas.r | 56 ++++ tests/testthat/test_that.r | 33 ++- 21 files changed, 1150 insertions(+), 555 deletions(-) create mode 100644 man/cas.Rd create mode 100644 man/get_ecotox_url.Rd create mode 100644 man/reexports.Rd create mode 100644 tests/testthat/test_cas.r diff --git a/DESCRIPTION b/DESCRIPTION index fe3de5c..cd49460 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,12 +1,13 @@ Package: ECOTOXr Type: Package Title: Download and Extract Data from US EPA's ECOTOX Database -Version: 0.1.2 -Date: TODO +Version: 0.2.0 +Date: 2022-11-17 Authors@R: c(person("Pepijn", "de Vries", role = c("aut", "cre", "dtc"), - email = "pepijn.devries@outlook.com")) + email = "pepijn.devries@outlook.com", + comment = list(ORCID = "0000-0002-7961-6646"))) Author: - Pepijn de Vries [aut, cre, dtc] + Pepijn de Vries [aut, cre, dtc] (0000-0002-7961-6646) Maintainer: Pepijn de Vries Description: The US EPA ECOTOX database is a freely available database with a treasure of aquatic and terrestrial ecotoxicological data. @@ -19,17 +20,24 @@ Depends: RSQLite Imports: crayon, + dbplyr, + dplyr, httr, + purrr, rappdirs, readr, + rlang, rvest, stringr, tibble, + tidyr, + tidyselect, utils Suggests: DBI, - webchem, - testthat (>= 3.0.0) + standartox, + testthat (>= 3.0.0), + webchem URL: BugReports: https://github.com/pepijn-devries/ECOTOXr/issues License: GPL (>= 3) diff --git a/NAMESPACE b/NAMESPACE index 827aa5d..e860a07 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,35 +1,45 @@ -# Generated by roxygen2: do not edit by hand - -S3method("[",cas) -S3method("[<-",cas) -S3method("[[",cas) -S3method("[[<-",cas) -S3method(as.character,cas) -S3method(as.data.frame,cas) -S3method(as.double,cas) -S3method(as.integer,cas) -S3method(as.list,cas) -S3method(c,cas) -S3method(format,cas) -S3method(print,cas) -export(as.cas) -export(build_ecotox_sqlite) -export(cas) -export(check_ecotox_availability) -export(cite_ecotox) -export(dbConnectEcotox) -export(dbDisconnectEcotox) -export(download_ecotox_data) -export(get_ecotox_info) -export(get_ecotox_path) -export(get_ecotox_sqlite_file) -export(get_ecotox_url) -export(is.cas) -export(list_ecotox_fields) -export(search_ecotox) -export(search_query_ecotox) -export(show.cas) -importFrom(RSQLite,dbConnect) -importFrom(RSQLite,dbDisconnect) -importFrom(RSQLite,dbExecute) -importFrom(RSQLite,dbWriteTable) +# Generated by roxygen2: do not edit by hand + +S3method("[",cas) +S3method("[<-",cas) +S3method("[[",cas) +S3method("[[<-",cas) +S3method(as.character,cas) +S3method(as.data.frame,cas) +S3method(as.double,cas) +S3method(as.integer,cas) +S3method(as.list,cas) +S3method(c,cas) +S3method(format,cas) +S3method(print,cas) +export("%>%") +export(as.cas) +export(build_ecotox_sqlite) +export(cas) +export(check_ecotox_availability) +export(cite_ecotox) +export(dbConnectEcotox) +export(dbDisconnectEcotox) +export(download_ecotox_data) +export(get_ecotox_info) +export(get_ecotox_path) +export(get_ecotox_sqlite_file) +export(get_ecotox_url) +export(is.cas) +export(list_ecotox_fields) +export(search_ecotox) +export(search_ecotox_lazy) +export(search_query_ecotox) +export(show.cas) +importFrom(RSQLite,dbConnect) +importFrom(RSQLite,dbDisconnect) +importFrom(RSQLite,dbExecute) +importFrom(RSQLite,dbWriteTable) +importFrom(dplyr,"%>%") +importFrom(dplyr,collect) +importFrom(dplyr,inner_join) +importFrom(dplyr,left_join) +importFrom(dplyr,select) +importFrom(dplyr,sql) +importFrom(dplyr,tbl) +importFrom(rlang,":=") diff --git a/NEWS b/NEWS index 445c55a..845b7db 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,19 @@ +ECOTOXr v0.2.0 (Release date: ***********TODO) +============= + + * Major changes: + + * Modified searching routines to make advantage of + the sql parser and optimisers implemented in + the package 'dplyr'. Performance (i.e. speed) of the + search routines have improved considerably. + * Added support for handling Chemical Abstracts + Service (CAS) numbers. + + * Several minor adjustments and corrections to code + and manual. These include fixes to address notes + from CRAN checks. + ECOTOXr v0.1.1 (Release date: 2021-10-04) ============= diff --git a/R/ECOTOXr.r b/R/ECOTOXr.r index c828603..7d31fd2 100644 --- a/R/ECOTOXr.r +++ b/R/ECOTOXr.r @@ -19,7 +19,7 @@ #' This may not always work on all machines as R does not always accept the website SSL certificate from the EPA. #' In those cases the zipped archive with the database files can be downloaded manually with a different (more #' forgiving) browser. The files from the zip archive can be extracted to a location of choice. Alternatively, -#' the user could try to use \code{\link{download_ecotox_data(ssl_verifypeer = 0L)}} when the download URL is trusted. +#' the user could try to use \code{\link{download_ecotox_data}(ssl_verifypeer = 0L)} when the download URL is trusted. #' } #' \item{ #' Next, an SQLite database needs to be build from the downloaded files. This will be done automatically when diff --git a/R/cas_handlers.r b/R/cas_handlers.r index 9ca881a..2682964 100644 --- a/R/cas_handlers.r +++ b/R/cas_handlers.r @@ -1,279 +1,279 @@ -#' Functions for handling chemical abstract service (CAS) registry numbers -#' -#' Functions for handling chemical abstract service (CAS) registry numbers -#' -#' In the database \href{https://en.wikipedia.org/wiki/Chemical_Abstracts_Service}{CAS registry} numbers are stored -#' as text (type \code{character}). As CAS numbers can consist of a maximum of 10 digits (plus two hyphens) this means -#' that each CAS number can consume up to 12 bytes of memory or disk space. By storing the data numerically, only -#' 5 bytes are required. These functions provide the means to handle CAS registry numbers and coerce from and to -#' different formats and types. -#' @param x Object from which data needs to be extracted or replaced, or needs to be coerced into a specific -#' format. For nearly all of the functions documented here, this needs to be an object of the S3 class 'cas', -#' which can be created with \code{as.cas}. For \code{as.cas}, \code{x} can be a \code{character} (CAS registry number -#' with or without hyphenation) or a \code{numeric} value. Note that \code{as.cas} will only accept correctly -#' formatted and valid CAS registry numbers. -#' @param i Index specifying element(s) to extract or replace. See also \code{\link[base:Extract]{Extract}}. -#' @param value A replacement value, can be anything that can be converted into an S3 cas-class object with \code{as.cas}. -#' @param length A non-negative \code{integer} specifying the desired length. Double values will be coerced to -#' \code{integer}: supplying an argument of length other than one is an error. -#' @param hyphenate A \code{logical} value indicating whether the formatted CAS number needs to be hyphenated. -#' Default is \code{TRUE}. -#' @param ... Arguments passed to other functions -#' @return Functions \code{cas}, \code{c} and \code{as.cas} return S3 class 'cas' objects. Coercion functions -#' (starting with 'as') return the object as specified by their respective function names (i.e., \code{integer}, -#' \code{double}, \code{character}, \code{list} and \code{data.frame}). The \code{show.cas} and \code{print} functions -#' also return formatted \code{charater}s. The function \code{is.cas} will return a single \code{logical} value, -#' indicating whether \code{x} is a valid S3 cas-class object. The square brackets return the selected index/indices, -#' or the \code{vector} of cas objects where the selected elements are replaced by \code{value}. -#' @rdname cas -#' @name cas -#' @examples -#' ## This will generate a vector of cas objects containing 10 -#' ## fictive (0-00-0), but valid registry numbers: -#' cas(10) -#' -#' ## This is a cas-object: -#' is.cas(cas(0L)) -#' -#' ## This is not a cas-object: -#' is.cas(0L) -#' -#' ## Three different ways of creating a cas object from -#' ## Benzene's CAS registry number (the result is the same) -#' as.cas("71-43-2") -#' as.cas("71432") -#' as.cas(71432L) -#' -#' ## This is one way of creating a vector with multiple CAS registry numbers: -#' cas_data <- as.cas(c("64175", "71432", "58082")) -#' -#' ## This is how you select a specific element(s) from the vector: -#' cas_data[2:3] -#' cas_data[[2]] -#' -#' ## You can also replace specific elements in the vector: -#' cas_data[1] <- "7440-23-5" -#' cas_data[[2]] <- "129-00-0" -#' -#' ## You can format CAS numbers with or without hyphens: -#' format(cas_data, TRUE) -#' format(cas_data, FALSE) -#' -#' ## The same can be achieved using as.character -#' as.character(cas_data, TRUE) -#' as.character(cas_data, FALSE) -#' -#' ## There are also show and print methods available: -#' show(cas_data) -#' print(cas_data) -#' -#' ## Numeric values can be obtained from CAS using as.numeric, as.double or as.integer -#' as.numeric(cas_data) -#' -#' ## Be careful, however. Some CAS numbers cannot be represented by R's 32 bit integers -#' ## and will produce NA's. This will work OK: -#' huge_cas <- as.cas("9999999-99-5") -#' -#' \dontrun{ -#' ## This will not: -#' as.integer(huge_cas) -#' } -#' -#' ## The trick applied by this package is that the final -#' ## validation digit is stored separately as attribute: -#' unclass(huge_cas) -#' -#' ## This is how cas objects can be concatenated: -#' cas_data <- c(huge_cas, cas_data) -#' -#' ## This will create a data.frame -#' as.data.frame(cas_data) -#' -#' ## This will create a list: -#' as.list(cas_data) -#' @author Pepijn de Vries -#' @export -cas <- function(length = 0L) { - structure ( - integer(length), ## The registry number is stored as integer without the final digit (=checksum) - checksum = raw(length), ## last digit of CAS number, which serves as a checksum, stored as raw value - class = "cas" - ) -} - -#' @rdname cas -#' @name is.cas -#' @export -is.cas <- function(x) { - if (!(class(x) %in% "cas")) return(F) - checksums <- attributes(x)$checksum - if (length(checksums) != length(x)) stop("Each CAS registry in the vector needs a checksum") - validate <- outer(unclass(x), 0:9, function(x, y) { - floor(x/(10^y)) %% 10 - }) - validate <- apply(validate, 1, function(x) { - x <- sum(seq_along(x)*x) %% 10 - }) - return(all(validate == as.numeric(checksums))) -} - -#' @rdname cas -#' @name as.cas -#' @export -as.cas <- function(x) { - if (is.cas(x)) return(x) - x <- as.character(x) - is_hyphenated <- stringr::str_sub(x, -2, -2) == "-" & stringr::str_sub(x, -5, -5) == "-" - x[is_hyphenated] <- paste0( - stringr::str_sub(x[is_hyphenated], 1, -6), - stringr::str_sub(x[is_hyphenated], -4, -3), - stringr::str_sub(x[is_hyphenated], -1, -1) - ) - if (any(!grepl("^[0-9]+$", x))) stop("CAS numbers can only contain hyphens at correct positions and numeric characters otherwise...") - registry <- as.integer(stringr::str_sub(x, 1, -2)) - registry[is.na(registry)] <- 0L - attributes(registry)$checksum <- as.raw(as.integer(stringr::str_sub(x, -1, -1))) - class(registry) <- "cas" - if (!is.cas(registry)) stop("Input contains invalid CAS numbers") - registry -} - -#' @rdname cas -#' @name [[.cas -#' @export -`[[.cas` <- function(x, i) { - attribs <- attributes(x) - attribs$checksum <- attribs$checksum[[i]] - attribs$names <- attribs$names[[i]] - x <- unclass(x) - x <- x[[i]] - attributes(x) <- attribs - x -} - -#' @rdname cas -#' @name [.cas -#' @export -`[.cas` <- function(x, i) { - attribs <- attributes(x) - attribs$checksum <- attribs$checksum[i] - attribs$names <- attribs$names[i] - x <- unclass(x) - x <- x[i] - attributes(x) <- attribs - x -} - -#' @rdname cas -#' @name [[<-.cas -#' @export -`[[<-.cas` <- function(x, i, value) { - value <- as.cas(value) - attribs <- attributes(x) - attribs$checksum[[i]] <- attributes(value)$checksum - attribs$names[[i]] <- attributes(value)$names - x <- unclass(x) - x[[i]] <- unclass(value) - attributes(x) <- attribs - x -} - -#' @rdname cas -#' @name [<-.cas -#' @export -`[<-.cas` <- function(x, i, value) { - value <- as.cas(value) - attribs <- attributes(x) - attribs$checksum[i] <- attributes(value)$checksum - attribs$names[i] <- attributes(value)$names - x <- unclass(x) - x[i] <- unclass(value) - attributes(x) <- attribs - x -} - -#' @rdname cas -#' @name format.cas -#' @export -format.cas <- function(x, hyphenate = TRUE, ...) { - checksums <- attributes(x)$checksum - x <- unclass(x) - repp <- x - repp[repp == 0] <- 1 - repp <- ceiling(2 - log10(repp)) - repp[repp < 0] <- 0 - x <- paste0(strrep("0", repp), x) - sprintf("%s%s%s%s%01i", - stringr::str_sub(x, 1, -3), - ifelse(hyphenate, "-", ""), - stringr::str_sub(x, -2, -1), - ifelse(hyphenate, "-", ""), - as.numeric(checksums) - ) -} - -#' @rdname cas -#' @name as.character.cas -#' @export -as.character.cas <- function(x, ...) { - format(x, ...) -} - -#' @rdname cas -#' @name show.cas -#' @export -show.cas <- function(x, ...) { - format(x, ...) -} - -#' @rdname cas -#' @name print.cas -#' @export -print.cas <- function(x, ...) { - if (length(x) == 0) - cat("cas(0)\n") else print(format.cas(x), ...) -} - -#' @rdname cas -#' @name as.list.cas -#' @export -as.list.cas <- function(x, ...) { - lapply(seq_along(x), function(i) x[i]) -} - -#' @rdname cas -#' @name as.double.cas -#' @export -as.double.cas <- function(x, ...) { - as.double(as.integer.cas(x, ...), ...) -} - -#' @rdname cas -#' @name as.integer.cas -#' @export -as.integer.cas <- function(x, ...) { - checksums <- as.integer(attributes(x)$checksum, ...) - x <- 10L*unclass(x) - attributes(x) <- NULL - x + checksums -} - -#' @rdname cas -#' @name c.cas -#' @export -c.cas <- function(...) { - result <- list(...) - result <- lapply(result, as.cas) - checksums <- do.call(c, lapply(result, function(x) attributes(x)$checksum)) - result <- do.call(c, lapply(result, function(x) unclass(x))) - class(result) <- "cas" - attributes(result)$checksum <- checksums - result -} - -#' @rdname cas -#' @name as.data.frame.cas -#' @export -as.data.frame.cas <- function(...) { - as.data.frame(tibble::tibble(...)) -} +#' Functions for handling chemical abstract service (CAS) registry numbers +#' +#' Functions for handling chemical abstract service (CAS) registry numbers +#' +#' In the database \href{https://en.wikipedia.org/wiki/Chemical_Abstracts_Service}{CAS registry} numbers are stored +#' as text (type \code{character}). As CAS numbers can consist of a maximum of 10 digits (plus two hyphens) this means +#' that each CAS number can consume up to 12 bytes of memory or disk space. By storing the data numerically, only +#' 5 bytes are required. These functions provide the means to handle CAS registry numbers and coerce from and to +#' different formats and types. +#' @param x Object from which data needs to be extracted or replaced, or needs to be coerced into a specific +#' format. For nearly all of the functions documented here, this needs to be an object of the S3 class 'cas', +#' which can be created with \code{as.cas}. For \code{as.cas}, \code{x} can be a \code{character} (CAS registry number +#' with or without hyphenation) or a \code{numeric} value. Note that \code{as.cas} will only accept correctly +#' formatted and valid CAS registry numbers. +#' @param i Index specifying element(s) to extract or replace. See also \code{\link[base:Extract]{Extract}}. +#' @param value A replacement value, can be anything that can be converted into an S3 cas-class object with \code{as.cas}. +#' @param length A non-negative \code{integer} specifying the desired length. Double values will be coerced to +#' \code{integer}: supplying an argument of length other than one is an error. +#' @param hyphenate A \code{logical} value indicating whether the formatted CAS number needs to be hyphenated. +#' Default is \code{TRUE}. +#' @param ... Arguments passed to other functions +#' @return Functions \code{cas}, \code{c} and \code{as.cas} return S3 class 'cas' objects. Coercion functions +#' (starting with 'as') return the object as specified by their respective function names (i.e., \code{integer}, +#' \code{double}, \code{character}, \code{list} and \code{data.frame}). The \code{show.cas} and \code{print} functions +#' also return formatted \code{charater}s. The function \code{is.cas} will return a single \code{logical} value, +#' indicating whether \code{x} is a valid S3 cas-class object. The square brackets return the selected index/indices, +#' or the \code{vector} of cas objects where the selected elements are replaced by \code{value}. +#' @rdname cas +#' @name cas +#' @examples +#' ## This will generate a vector of cas objects containing 10 +#' ## fictive (0-00-0), but valid registry numbers: +#' cas(10) +#' +#' ## This is a cas-object: +#' is.cas(cas(0L)) +#' +#' ## This is not a cas-object: +#' is.cas(0L) +#' +#' ## Three different ways of creating a cas object from +#' ## Benzene's CAS registry number (the result is the same) +#' as.cas("71-43-2") +#' as.cas("71432") +#' as.cas(71432L) +#' +#' ## This is one way of creating a vector with multiple CAS registry numbers: +#' cas_data <- as.cas(c("64175", "71432", "58082")) +#' +#' ## This is how you select a specific element(s) from the vector: +#' cas_data[2:3] +#' cas_data[[2]] +#' +#' ## You can also replace specific elements in the vector: +#' cas_data[1] <- "7440-23-5" +#' cas_data[[2]] <- "129-00-0" +#' +#' ## You can format CAS numbers with or without hyphens: +#' format(cas_data, TRUE) +#' format(cas_data, FALSE) +#' +#' ## The same can be achieved using as.character +#' as.character(cas_data, TRUE) +#' as.character(cas_data, FALSE) +#' +#' ## There are also show and print methods available: +#' show(cas_data) +#' print(cas_data) +#' +#' ## Numeric values can be obtained from CAS using as.numeric, as.double or as.integer +#' as.numeric(cas_data) +#' +#' ## Be careful, however. Some CAS numbers cannot be represented by R's 32 bit integers +#' ## and will produce NA's. This will work OK: +#' huge_cas <- as.cas("9999999-99-5") +#' +#' \dontrun{ +#' ## This will not: +#' as.integer(huge_cas) +#' } +#' +#' ## The trick applied by this package is that the final +#' ## validation digit is stored separately as attribute: +#' unclass(huge_cas) +#' +#' ## This is how cas objects can be concatenated: +#' cas_data <- c(huge_cas, cas_data) +#' +#' ## This will create a data.frame +#' as.data.frame(cas_data) +#' +#' ## This will create a list: +#' as.list(cas_data) +#' @author Pepijn de Vries +#' @export +cas <- function(length = 0L) { + structure ( + integer(length), ## The registry number is stored as integer without the final digit (=checksum) + checksum = raw(length), ## last digit of CAS number, which serves as a checksum, stored as raw value + class = "cas" + ) +} + +#' @rdname cas +#' @name is.cas +#' @export +is.cas <- function(x) { + if (!(class(x) %in% "cas")) return(F) + checksums <- attributes(x)$checksum + if (length(checksums) != length(x)) stop("Each CAS registry in the vector needs a checksum") + validate <- outer(unclass(x), 0:9, function(x, y) { + floor(x/(10^y)) %% 10 + }) + validate <- apply(validate, 1, function(x) { + x <- sum(seq_along(x)*x) %% 10 + }) + return(all(validate == as.numeric(checksums))) +} + +#' @rdname cas +#' @name as.cas +#' @export +as.cas <- function(x) { + if (is.cas(x)) return(x) + x <- as.character(x) + is_hyphenated <- stringr::str_sub(x, -2, -2) == "-" & stringr::str_sub(x, -5, -5) == "-" + x[is_hyphenated] <- paste0( + stringr::str_sub(x[is_hyphenated], 1, -6), + stringr::str_sub(x[is_hyphenated], -4, -3), + stringr::str_sub(x[is_hyphenated], -1, -1) + ) + if (any(!grepl("^[0-9]+$", x))) stop("CAS numbers can only contain hyphens at correct positions and numeric characters otherwise...") + registry <- as.integer(stringr::str_sub(x, 1, -2)) + registry[is.na(registry)] <- 0L + attributes(registry)$checksum <- as.raw(as.integer(stringr::str_sub(x, -1, -1))) + class(registry) <- "cas" + if (!is.cas(registry)) stop("Input contains invalid CAS numbers") + registry +} + +#' @rdname cas +#' @name [[.cas +#' @export +`[[.cas` <- function(x, i) { + attribs <- attributes(x) + attribs$checksum <- attribs$checksum[[i]] + attribs$names <- attribs$names[[i]] + x <- unclass(x) + x <- x[[i]] + attributes(x) <- attribs + x +} + +#' @rdname cas +#' @name [.cas +#' @export +`[.cas` <- function(x, i) { + attribs <- attributes(x) + attribs$checksum <- attribs$checksum[i] + attribs$names <- attribs$names[i] + x <- unclass(x) + x <- x[i] + attributes(x) <- attribs + x +} + +#' @rdname cas +#' @name [[<-.cas +#' @export +`[[<-.cas` <- function(x, i, value) { + value <- as.cas(value) + attribs <- attributes(x) + attribs$checksum[[i]] <- attributes(value)$checksum + attribs$names[[i]] <- attributes(value)$names + x <- unclass(x) + x[[i]] <- unclass(value) + attributes(x) <- attribs + x +} + +#' @rdname cas +#' @name [<-.cas +#' @export +`[<-.cas` <- function(x, i, value) { + value <- as.cas(value) + attribs <- attributes(x) + attribs$checksum[i] <- attributes(value)$checksum + attribs$names[i] <- attributes(value)$names + x <- unclass(x) + x[i] <- unclass(value) + attributes(x) <- attribs + x +} + +#' @rdname cas +#' @name format.cas +#' @export +format.cas <- function(x, hyphenate = TRUE, ...) { + checksums <- attributes(x)$checksum + x <- unclass(x) + repp <- x + repp[repp == 0] <- 1 + repp <- ceiling(2 - log10(repp)) + repp[repp < 0] <- 0 + x <- paste0(strrep("0", repp), x) + sprintf("%s%s%s%s%01i", + stringr::str_sub(x, 1, -3), + ifelse(hyphenate, "-", ""), + stringr::str_sub(x, -2, -1), + ifelse(hyphenate, "-", ""), + as.numeric(checksums) + ) +} + +#' @rdname cas +#' @name as.character.cas +#' @export +as.character.cas <- function(x, ...) { + format(x, ...) +} + +#' @rdname cas +#' @name show.cas +#' @export +show.cas <- function(x, ...) { + format(x, ...) +} + +#' @rdname cas +#' @name print.cas +#' @export +print.cas <- function(x, ...) { + if (length(x) == 0) + cat("cas(0)\n") else print(format.cas(x), ...) +} + +#' @rdname cas +#' @name as.list.cas +#' @export +as.list.cas <- function(x, ...) { + lapply(seq_along(x), function(i) x[i]) +} + +#' @rdname cas +#' @name as.double.cas +#' @export +as.double.cas <- function(x, ...) { + as.double(as.integer.cas(x, ...), ...) +} + +#' @rdname cas +#' @name as.integer.cas +#' @export +as.integer.cas <- function(x, ...) { + checksums <- as.integer(attributes(x)$checksum, ...) + x <- 10L*unclass(x) + attributes(x) <- NULL + x + checksums +} + +#' @rdname cas +#' @name c.cas +#' @export +c.cas <- function(...) { + result <- list(...) + result <- lapply(result, as.cas) + checksums <- do.call(c, lapply(result, function(x) attributes(x)$checksum)) + result <- do.call(c, lapply(result, function(x) unclass(x))) + class(result) <- "cas" + attributes(result)$checksum <- checksums + result +} + +#' @rdname cas +#' @name as.data.frame.cas +#' @export +as.data.frame.cas <- function(...) { + as.data.frame(tibble::tibble(...)) +} diff --git a/R/database_access.r b/R/database_access.r index 166de97..dc756a2 100644 --- a/R/database_access.r +++ b/R/database_access.r @@ -142,9 +142,11 @@ get_ecotox_info <- function(path = get_ecotox_path(), version) { #' This can be useful when specifying a \code{\link{search_ecotox}}, to identify which fields #' are available from the database, for searching and output. #' @param which A \code{character} string that specifies which fields to return. Can be any of: -#' '\code{default}': returns default output field names; '\code{all}': returns all fields; or -#' '\code{full}': returns all except fields from table 'chemical_carriers', 'media_characteristics', 'doses', 'dose_responses', -#' 'dose_response_details', 'dose_response_links' and 'dose_stat_method_codes'. +#' '\code{default}': returns default output field names; '\code{all}': returns all fields; +#' '\code{extended}': returns all fields of the default tables; or +#' '\code{full}': returns all fields except those from tables 'chemical_carriers', +#' 'media_characteristics', 'doses', 'dose_responses', +#' 'dose_response_details', 'dose_response_links' and 'dose_stat_method_codes'. #' @param include_table A \code{logical} value indicating whether the table name should be included #' as prefix. Default is \code{TRUE}. #' @return Returns a \code{vector} of type \code{character} containing the field names from the ECOTOX database. @@ -163,11 +165,14 @@ get_ecotox_info <- function(path = get_ecotox_path(), version) { #' list_ecotox_fields("full") #' @author Pepijn de Vries #' @export -list_ecotox_fields <- function(which = c("default", "full", "all"), include_table = TRUE) { +list_ecotox_fields <- function(which = c("default", "extended", "full", "all"), include_table = TRUE) { which <- match.arg(which) result <- .db_specs$field_name - if (include_table) result <- paste(.db_specs$table, result, sep = ".") - if (which == "default") result <- result[.db_specs$default_output] - if (which == "full") result <- result[!(.db_specs$table %in% c("chemical_carriers", "media_characteristics", "doses", "dose_response_details", "dose_response_links", "dose_stat_method_codes"))] + if (include_table) result <- paste(.db_specs$table, result, sep = ".") + if (which == "default") result <- result[.db_specs$default_output] + if (which == "extended") result <- result[.db_specs$table %in% unique(.db_specs$table[.db_specs$default_output])] + if (which == "full") result <- result[ + !(.db_specs$table %in% c("chemical_carriers", "media_characteristics", "doses", "dose_response_details", + "dose_response_links", "dose_stat_method_codes"))] return(result) } diff --git a/R/helpers.r b/R/helpers.r index 141c07b..27164d8 100644 --- a/R/helpers.r +++ b/R/helpers.r @@ -12,3 +12,380 @@ stop("No local database located. Download data first by calling 'download_ecotox_data()'") } else return(test) } + +.search_ecotox_lazy_get_result_ids <- function(search, dbcon) { + # Declare variables to pass CRAN checks + .data <- field <- terms <- table_mod <- NULL + + search_result <- + lapply(search, as.data.frame, strings.as.factors = F) %>% + dplyr::bind_rows(.id = "field") %>% + dplyr::mutate( + table = lapply(field, function(x) { + switch( + x, + result_id = "results", + test_id = "tests", + .db_specs$table[.db_specs$field_name %in% x]) + }) + ) %>% + tidyr::unnest("table") %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c("table", "field", "method")))) %>% + dplyr::summarise( + where = { + method <- tryCatch({.data$method[[1]]}, error = function(e) "contains") + if (method == "exact") { + sprintf("`%s` COLLATE NOCASE IN ('%s')", field[[1]], paste(.data$terms, collapse = "', '")) + } else if(method == "contains"){ + paste(sprintf("`%s` LIKE '%%%s%%'", field, terms), collapse = " OR ") + } else { + stop("Sorry, specified search method is not implemented.") + } + }) %>% + dplyr::group_by(table) %>% + dplyr::group_map(~{ + dplyr::tibble( + tbl = list(tbl(dbcon, .x$table[[1]]) %>% dplyr::filter(!!sql(paste(sprintf("(%s)", .x$where), collapse = " AND ")))), + table = .x$table[[1]] + )}, .keep = T) %>% + dplyr::bind_rows() %>% + dplyr::group_by(table_mod = gsub("_synonyms", "", table)) %>% + dplyr::summarise( + table = table_mod[[1]], + tbl = + if (length(tbl) > 1) { + tbl %>% + purrr::map(~{select(., dplyr::any_of("species_number"))}) %>% + purrr::reduce(~{dplyr::union(.x, .y)}) %>% + list() + } else { + tbl + }) %>% + dplyr::group_by(dplyr::across(dplyr::any_of("table"))) %>% + dplyr::group_map(~{ + #keep linking tables until result_ids are obtained + my_tab <- ..1$tbl[[1]] + count <- 0 + repeat { + count <- count + 1 + if (count > 5) stop("Sorry could not build a query for the search you have specified.") + cur_fields <- colnames(my_tab) + if (any(c("result_id", "test_id") %in% cur_fields)) break + referring <- .db_specs[.db_specs$table %in% c("results", "tests") & + .db_specs$foreign_key %in% sprintf("%s(%s)", ..2$table, cur_fields),,drop = F] + if (nrow(referring) == 1) { + foreign <- unlist(regmatches(referring$foreign_key, gregexpr("(?<=\\().+(?=\\))", referring$foreign_key, perl = T))) + my_tab <- + my_tab %>% + left_join(tbl(dbcon, referring$table), by = structure(referring$field_name, names = foreign)) + } else { + stop("Sorry could not build a query for the search you have specified.") + } + } + dplyr::tibble(tbl = list(my_tab %>% select(dplyr::any_of(c("result_id", "test_id"))))) + }) %>% + dplyr::bind_rows() %>% + dplyr::group_by(result_available = tbl %>% + purrr::map(~ paste(sort(match(colnames(.), c("test_id", "result_id"))), collapse = "_")) %>% + unlist()) %>% + dplyr::group_map(~{ + switch( + .y$result_available, + `1` = { + purrr::reduce(.x$tbl, ~{inner_join(.x, .y, by = "test_id")}) %>% + left_join(tbl(dbcon, "results"), by = "test_id") %>% + select(dplyr::all_of(c("test_id", "result_id"))) + }, + `2` = { + purrr::reduce(.x$tbl, ~{inner_join(.x, .y, by = "result_id")}) %>% + left_join(tbl(dbcon, "results"), by = "result_id") %>% + select(dplyr::all_of(c("test_id", "result_id"))) + }, + `1_2` = { + purrr::reduce(.x$tbl, ~{inner_join(.x, .y, by = c("test_id", "result_id"))}) + }) + }) %>% + purrr::reduce(~{inner_join(.x, .y, by = c("test_id", "result_id"))}) + + return(search_result) +} + +.search_ecotox_lazy_append_fields <- function(dbcon, search_result, output_fields, compute, ...) { + con <- search_result[["src"]]$con + output_fields <- as.data.frame(do.call(rbind, strsplit(output_fields, ".", fixed = T)), stringsAsFactors = F) + names(output_fields) <- c("table", "field") + ## Duplicated field names need to be renamed to avoid ambiguous results, keep track of those in 'new_field' + output_fields$new_field <- output_fields$field + output_fields_tables <- unique(output_fields$table) + ## Find out if there are any field names duplicated which are not used as joining keys: + duplicated_fields <- unique(sort(.db_specs$field_name[duplicated(.db_specs$field_name)])) + duplicated_fields <- duplicated_fields[ + duplicated_fields %in% + .db_specs$field_name[.db_specs$primary_key == "" & .db_specs$foreign_key == ""] + ] + + .process_lookups <- function(result, parent_tab, prefix = "") { + REPLACE <- NULL # This is used in dplyr routines below but are not globally bound + join_params <- list() + + foreigns <- gsub("\\(code\\)", "", + unique(.db_specs$foreign_key[.db_specs$table == parent_tab & endsWith(.db_specs$foreign_key, "(code)")])) + tabs <- .db_specs[ + (.db_specs$table == parent_tab & .db_specs$field_name %in% output_fields$field & + endsWith(.db_specs$foreign_key, "(code)")) | + .db_specs$table == parent_tab & + grepl(paste0(foreigns[foreigns %in% output_fields$table], "\\(",collapse="|"), .db_specs$foreign_key),, + drop = F] + tabs$foreign_tab <- gsub("\\(code\\)", "", tabs$foreign_key) + + .db_specs[ + .db_specs$table == parent_tab & + startsWith(.db_specs$foreign_key, paste0(foreigns[foreigns %in% output_fields$table], "(")),,drop=F] + if (nrow(tabs) > 0) { + for (tab in unique(tabs$foreign_tab)) { + flds <- tabs$field_name[tabs$foreign_tab == tab] + for (fld in flds) { + get_idx <- function(f) {output_fields$table == tab & output_fields$field == f} + if (any(gsub(prefix, "", output_fields$new_field[get_idx("code")]) != output_fields$field[get_idx("code")])) { + output_fields <<- dplyr::bind_rows( + output_fields, + data.frame(field = "code", table = tab, new_field = paste0(prefix, fld)) + ) + } else { + output_fields$new_field[get_idx("code")] <<- paste0(prefix, fld) + } + + descr <- paste0(fld, "_description_") + if (any(gsub(prefix, "", output_fields$new_field[get_idx("description")]) != + output_fields$field[get_idx("description")])) { + output_fields <<- dplyr::bind_rows( + output_fields, + data.frame(field = "description", table = tab, new_field = paste0(prefix, descr)) + ) + } else { + output_fields$new_field[get_idx("description")] <<- paste0(prefix, descr) + } + + if (nrow(output_fields[output_fields$table == tab,]) > 0) { + join_params[[length(join_params) + 1]] <- + list( + left_by = paste0(prefix, fld, "_temp"), + right_table = tab, + right_by = "code", + right_select = structure("description", names = paste0(prefix, descr)) + ) + } + } + } + } + if (length(join_params) > 0) { + ## Create temp fields where forward slash is removed for joining with lookup values + lefties <- join_params %>% purrr::map_chr(~.$left_by) + lefties_repl <- lefties %>% purrr::map(~rlang::expr(REPLACE(!!rlang::sym(gsub("_temp$", "", .)), "/", ""))) + names(lefties_repl) <- lefties + result <- result %>% dplyr::mutate(!!!lefties_repl) + + for (i in seq_along(join_params)) { + jp <- join_params[[i]] + result <- + result %>% + left_join(tbl(dbcon, jp$right_table) %>% dplyr::rename(!!!jp$right_select), + by = structure(jp$right_by, names = jp$left_by)) + } + result <- result %>% select(-dplyr::any_of(lefties)) + if (compute) result <- result %>% dplyr::compute() + } + return(result) + } + + result <- search_result + if (compute) result <- result %>% dplyr::compute() + ## start with results table as this is needed for obtaining dose link ids + + results_output <- union(output_fields$field[output_fields$table == "results"], "result_id") + dose_tables <- c("dose_responses", "dose_response_details", "doses", "dose_response_links", "dose_stat_method_codes") + if (any(dose_tables %in% output_fields_tables)) { + + out_rename <- output_fields$table %in% dose_tables & !output_fields$field %in% c("test_id", "result_id") + output_fields$new_field[out_rename] <- paste0("dose_link_", output_fields$field[out_rename]) + result <- + result %>% + left_join(tbl(con, "dose_response_links"), "result_id") %>% + left_join(tbl(con, "dose_responses"), c("dose_resp_id", "test_id")) %>% + left_join(tbl(con, "dose_response_details"), "dose_resp_id") %>% + left_join(tbl(con, "doses"), c("dose_id", "test_id")) %>% + dplyr::rename_with(~paste0("dose_link_", .), !dplyr::any_of(c("test_id", "result_id"))) %>% + .process_lookups("dose_response_links") %>% + .process_lookups("doses", "dose_link_") + } + + if (any(c("media_characteristics", "organic_matter_type_codes") %in% output_fields_tables)) { + foreigns <- .db_specs$field_name[.db_specs$table == "media_characteristics" & .db_specs$foreign_key != ""] + result <- + result %>% + left_join(tbl(con, "media_characteristics"), "result_id") %>% + select(dplyr::any_of(union( + colnames(result), + union( + foreigns, + output_fields$field[output_fields$table == "media_characteristics"] + ) + ))) %>% + .process_lookups("media_characteristics") + } + + result_foreigns <- data.frame( + table = gsub("\\(.*?\\)", "", + .db_specs$foreign_key[.db_specs$foreign_key != "" & .db_specs$table == "results"]), + field = .db_specs$field_name[.db_specs$foreign_key != "" & .db_specs$table == "results"] + ) %>% dplyr::filter(!table %in% c("tests", "results")) + + results_output <- union(results_output, c("result_id", result_foreigns$field)) + result <- result %>% + left_join(tbl(con, "results") %>% + select(dplyr::any_of(union(c("result_id", "test_id"), results_output))) %>% + dplyr::rename_with(function(x) paste0("result_", x), + dplyr::any_of(c("created_date", "modified_date", "additional_comments"))), + c("result_id", "test_id")) + output_fields$new_field[output_fields$table == "results" & output_fields$field == "created_date"] <- + "result_created_date" + output_fields$new_field[output_fields$table == "results" & output_fields$field == "modified_date"] <- + "result_modified_date" + output_fields$new_field[output_fields$table == "results" & output_fields$field == "additional_comments"] <- + "result_additional_comments" + + if (compute) result <- result %>% dplyr::compute() + + result <- result %>% .process_lookups("results") + + ## continue with linking all requested tables to the test data + + if ("chemical_carriers" %in% output_fields_tables) { + car_sel <- output_fields$table == "chemical_carriers" & !output_fields$field %in% c("carrier_id", "test_id") + output_fields$new_field[car_sel] <- paste0("carrier_", output_fields$new_field[car_sel]) + result <- + result %>% + left_join(tbl(con, "chemical_carriers") %>% + dplyr::rename_with(~paste0("carrier_", .), !dplyr::any_of(c("carrier_id", "test_id"))), + "test_id") %>% + select(dplyr::any_of(union( + colnames(result), + output_fields$new_field[output_fields$table == "chemical_carriers"] + ))) + } + + test_foreigns <- data.frame( + table = gsub("\\(.*?\\)", "", + .db_specs$foreign_key[.db_specs$foreign_key != "" & .db_specs$table == "tests"]), + field = .db_specs$field_name[.db_specs$foreign_key != "" & .db_specs$table == "tests"] + ) %>% dplyr::filter(!table %in% c("tests", "results")) + + test_foreigns <- test_foreigns[test_foreigns$table %in% output_fields$table,,drop = F] + test_foreigns$foreign <- .db_specs$field_name[.db_specs$primary_key != ""][ + match(test_foreigns$table, .db_specs$table[.db_specs$primary_key != ""])] + + required_test_fields <- + union(output_fields$field[output_fields$table == "tests"], test_foreigns$field) + + result <- result %>% + left_join(tbl(con, "tests") %>% + select(dplyr::any_of(union("test_id", required_test_fields))) %>% + dplyr::rename_with(function(x) paste0("test_", x), + dplyr::any_of(c("created_date", "modified_date", "additional_comments"))), + "test_id") + output_fields$new_field[output_fields$table == "tests" & output_fields$field == "created_date"] <- + "test_created_date" + output_fields$new_field[output_fields$table == "tests" & output_fields$field == "modified_date"] <- + "test_modified_date" + output_fields$new_field[output_fields$table == "tests" & output_fields$field == "additional_comments"] <- + "test_additional_comments" + + if (any(c("species", "species_synonyms") %in% output_fields_tables)) { + species_fields <- output_fields$field[output_fields$table == "species"] + species_syn_fields <- output_fields$field[output_fields$table == "species_synonyms"] + result <- result %>% + left_join( + tbl(con, "species") %>% select(union(species_fields, "species_number")), + "species_number") + if (length(species_syn_fields) > 0) { + output_fields$new_field[output_fields$table == "species_synonyms" & output_fields$field == "latin_name"] <- + "species_synonyms" + spec_syns <- + tbl(con, "species_synonyms") %>% + select(union(species_syn_fields, "species_number")) + if ("species_synonyms" %in% output_fields$table) { + spec_syns <- spec_syns %>% dplyr::rename(species_synonyms = "latin_name") + } + result <- result %>% left_join(spec_syns, "species_number") + } + } + + if (!is.null(list(...)$group_by_results) && list(...)$group_by_results && "species_synonyms" %in% colnames(result)) { + result <- result %>% + dplyr::group_by(dplyr::across(!dplyr::any_of("species_synonyms"))) %>% + dplyr::summarise(dplyr::across(dplyr::any_of("species_synonyms"), ~GROUP_CONCAT(., "|")), .groups = "keep") %>% + dplyr::ungroup() + } + + if ("chemicals" %in% output_fields$table) { + output_fields$new_field[output_fields$table == "chemicals" & output_fields$field == "cas_number"] <- + "test_cas" + output_fields$new_field[output_fields$table == "chemicals" & output_fields$field == "chemical_name"] <- + "test_chemical" + output_fields$new_field[output_fields$table == "chemicals" & output_fields$field == "ecotox_group"] <- + "test_chemical_group" + result <- result %>% + left_join( + tbl(con, "chemicals") %>% + select(union("cas_number", output_fields$field[output_fields$table == "chemicals"])) %>% + dplyr::rename_with( + function(x){ + c("test_cas", "test_chemical", "test_chemical_group")[match(x, c("cas_number", "chemical_name", "ecotox_group"))] + }, dplyr::any_of(c("cas_number", "chemical_name", "ecotox_group"))), + "test_cas") + } + + if ("references" %in% output_fields$table) { + result <- result %>% + left_join( + tbl(con, "references") %>% + select(union("reference_number", output_fields$field[output_fields$table == "references"])), + "reference_number" + ) + } + + ## join remaining lookup values + result <- result %>% .process_lookups("tests") + + renamed <- output_fields$field != output_fields$new_field + message( + crayon::white( + paste( + sprintf("'%s.%s' was renamed '%s'", + output_fields$table[renamed], + output_fields$field[renamed], + output_fields$new_field[renamed]), + collapse = "\n") + ) + ) + + result <- result %>% select(dplyr::any_of(union(unique(output_fields$new_field), "result_id"))) + return(result) +} + +.group_nest_results <- function(search_result) { + if (nrow(search_result) > 0) { + group_by <- + search_result %>% + dplyr::group_by(dplyr::across(dplyr::any_of("result_id"))) %>% + dplyr::summarise_all(~all(is.na(.)) || all(.[[1]] == .)) %>% + dplyr::ungroup() %>% + select(tidyselect::vars_select_helpers$where(~ all(.))) %>% + colnames() + if (all(colnames(search_result) %in% group_by)) return (search_result) else{ + search_result <- + search_result %>% + dplyr::group_nest(dplyr::across(dplyr::any_of(group_by)), .key = "nested_data") + } + } else search_result +} diff --git a/R/imports.r b/R/imports.r index 7a9f950..9f7114a 100644 --- a/R/imports.r +++ b/R/imports.r @@ -9,4 +9,19 @@ } #' @importFrom RSQLite dbExecute dbConnect dbDisconnect dbWriteTable +#' @importFrom dplyr collect left_join inner_join select sql tbl +#' @importFrom rlang := +NULL + +#' Objects exported from other packages +#' +#' Objects imported and exported from other packages. See original documentation for more details. +#' +#' \describe{ +#' \item{dplyr}{\code{\link[dplyr:reexports]{\%>\%}}} +#' } +#' @importFrom dplyr %>% +#' @export %>% +#' @name %>% +#' @rdname reexports NULL diff --git a/R/init.r b/R/init.r index 6121152..6eb1c1a 100644 --- a/R/init.r +++ b/R/init.r @@ -6,7 +6,7 @@ #' This function is called by \code{\link{download_ecotox_data}} which tries to download the file from the resulting #' URL. On some machines this fails due to issues with the SSL certificate. The user can try to download the file #' by using this URL in a different browser (or on a different machine). Alternatively, the user could try to use -#' \code{\link{download_ecotox_data(ssl_verifypeer = 0L)}} when the download URL is trusted. +#' \code{\link{download_ecotox_data}(ssl_verifypeer = 0L)} when the download URL is trusted. #' @param ... arguments passed on to \code{\link[httr]{GET}} #' @return Returns a \code{character} string containing the download URL of the latest version of the EPA ECOTOX #' database. @@ -69,7 +69,8 @@ check_ecotox_availability <- function(target = get_ecotox_path()) { #' Obtain the local path to where the ECOTOX database is (or will be) placed. #' #' It can be useful to know where the database is located on your disk. This function -#' returns the location as provided by \code{\link[rappdirs]{app_dir}}. +#' returns the location as provided by \code{\link[rappdirs]{app_dir}}, or as +#' specified by you using \code{options(ECOTOXr_path = "mypath")}. #' #' @param path When you have a copy of the database somewhere other than the default #' directory (\code{\link{get_ecotox_path}()}), you can provide the path here. @@ -91,7 +92,7 @@ check_ecotox_availability <- function(target = get_ecotox_path()) { #' @author Pepijn de Vries #' @export get_ecotox_path <- function() { - rappdirs::app_dir("ECOTOXr")$cache() + getOption("ECOTOXr_path", rappdirs::app_dir("ECOTOXr")$cache()) } #' Download and extract ECOTOX database files and compose database @@ -101,7 +102,7 @@ get_ecotox_path <- function() { #' #' This function will attempt to find the latest download url for the ECOTOX database from the #' \href{https://cfpub.epa.gov/ecotox/index.cfm}{EPA website} (see \code{\link{get_ecotox_url}()}). -#' When found it will attempt to download the zipped archive containing all required data. This data is than +#' When found it will attempt to download the zipped archive containing all required data. This data is then #' extracted and a local copy of the database is build. #' #' Use '\code{\link{suppressMessages}}' to suppress the progress report. @@ -147,7 +148,7 @@ download_ecotox_data <- function(target = get_ecotox_path(), write_log = TRUE, a if (!dir.exists(target)) dir.create(target, recursive = T) ## Obtain download link from EPA website: message(crayon::white("Obtaining download link from EPA website... ")) - link <- get_ecotox_url() + link <- get_ecotox_url(...) dest_path <- file.path(target, utils::tail(unlist(strsplit(link, "/")), 1)) message(crayon::green("Done\n")) proceed.download <- T @@ -271,6 +272,7 @@ build_ecotox_sqlite <- function(source, destination = get_ecotox_path(), write_l ## Loop the text file tables and add them to the sqlite database 1 by 1 i <- 0 + by(.db_specs, .db_specs$table, function(tab) { i <<- i + 1 message(crayon::white(sprintf("Adding '%s' table (%i/%i) to database:\n", @@ -313,15 +315,19 @@ build_ecotox_sqlite <- function(source, destination = get_ecotox_path(), write_l ## Replace pipe-characters with dashes when they are between brackets "("and ")", ## These should not be interpreted as table separators and will mess up the table.read call body <- stringr::str_replace_all(body, "(?<=\\().+?(?=\\))", function(x){ - if (grepl("[\\(/]", x)) return(x) ## there should not be another opening bracket or forward slash! in that case leave as is + ## there should not be another opening bracket, double pipe or forward slash! in that case leave as is + if (grepl("[\\(/]", x) || grepl("||", x, fixed = T)) return(x) gsub("[|]", "-", x) }) lines.read <- lines.read + length(body) - table.frag <- utils::read.table(text = c(head, body[1:1]), - sep = "|", header = T, quote = "", comment.char = "", - stringsAsFactors = F, strip.white = F) + ## Join lines when number of pipes is to small (probably caused by unintended linefeed) + count_pipes <- unlist(lapply(regmatches(body, gregexpr("[|]", body)), length)) + join_lines <- which(count_pipes < length(regmatches(head, gregexpr("[|]", head))[[1]])) + if (length(join_lines) > 0) { + body <- c(body[-join_lines], paste(body[join_lines], collapse = " ")) + } ## strip.white is set to F, as they occur in primary keys! table.frag <- utils::read.table(text = c(head, body), @@ -331,10 +337,17 @@ build_ecotox_sqlite <- function(source, destination = get_ecotox_path(), write_l missing_cols <- tab$field_name[!tab$field_name %in% colnames(table.frag)] unexpected_cols <- colnames(table.frag)[!colnames(table.frag) %in% tab$field_name] if (length(unexpected_cols) > 0) - message(sprintf("\r Ignoring unexpected column(s) '%s' in '%s'", paste(unexpected_cols, collapse = "', '"), - tab$table[[1]])) + message(stringr::str_pad( + sprintf("\r Ignoring unexpected column(s) '%s' in '%s'", paste(unexpected_cols, collapse = "', '"), + tab$table[[1]]), + width = 80, "right") + ) if (length(missing_cols) > 0) - message(sprintf("\r Missing column(s) '%s' in '%s'", paste(missing_cols, collapse = "', '"), tab$table[[1]])) + message(stringr::str_pad( + sprintf("\r Missing column(s) '%s' in '%s'", paste(missing_cols, collapse = "', '"), + tab$table[[1]]), + width = 80, "right") + ) RSQLite::dbWriteTable(dbcon, tab$table[[1]], table.frag[,setdiff(tab$field_name, missing_cols), drop = F], append = T) diff --git a/R/sysdata.rda b/R/sysdata.rda index 3c891c8c2af1d3f79302f2bf83b3d73e941f6d5f..a76e0d5bdae5b80175c253e6491fa5cb8d1d7ddd 100644 GIT binary patch delta 7915 zcmY+I1yEc+*zR$6r#Qvk9m?YFE=zHDiqpm27k7uE#fy7!Slk_o6?ZF_|J-|LzHc(m zFLTc1Ig`9^l1a|_m)9?eB%rLXjia^Kyz11mt#SMtrHo^L42tkHRtMMa&8ggI3K5VI zpd5b$98*As#U7e5-$>TZ&;p4k@QI>J8UZ<5ncUV^Q#pd}&}v(cZQf6Z%>WK}Q!4_X_0lThWC5Z9wDUeNG*t0&>vdux z;ysmvr70dTY`fyrX!+6&``Qb2f#kvo2e5IPh->5I|MPo&f0L!T3yLQI>$a2abq1mE z>=y!00W`XeIQ*}X;M%zWUQ(W6@o-;Y1tIprdS?Q^A{oTy@2@;rrpj5)3o>Q2LK2$n zJ$X3dIF_w^m)qm2g2<#Di~a@cOD#KBgrTY^wxqIvuso`~CcUEP?XMBQCP zaR$pIyHwR7LiCIC6M_%1_DZ*))mRvQBozb^()Z}d0#Q={rQsGTO!!(P{L!cULn5|DQDbiZ<2IOt{rJmS`K&?SUL3R87D^wv% zf#4){p6EYvHsg!aMQ}sD*I35<{8DPEv9*L$#6R$X*p(_E9f3&hHA9+J?G#u2sX%+9 z;8|MZ)DwUuZf-?E>9>ElPnr~L>>jutXdaAP%|cQCbATN` zo1;51l-U7e0#3RDBU3~7c$Bi%*eQ5qQRS2F-l{3JW4I5r(1Df5Np54VAVIHFbkS^! zTU?=HhV2^Y*KdexRf&|!5G}h%xH^8OJJ+`I_irIyAsVC)?AR9T1!y|-mjAI*iuj!s zdS4RzJFiTd+TyH|1YX1Ve3XQb0vVmUu(C%?IqDwgB$lemr8f(;+50)gPxU@G1%OKT zfv{+0%v7>1lIxHKklKyCyMCs+9^H3`lfvi<3&e&lX&ub`k*lx&@j!{Um!2mVlda7X z-bgHZ;;BKAK_M&;r*Wzs_z+1qL9upD6%GCs-S*Mze~d}B$yGf17NyL4Ov(feMU|#O zSWj{*POW5G5{5`cbuIo-%yr6ZXU*|T;kZU=NaccvwYa2>^S7qm`zyU3@vgjKv!FJh zRB<`g=z4yYTzI@aJuMf$JXl(%jM~=ilUoV5pH*o4XKIPAGJC>`>8_RyvD%Cd9633a zy00z)u@RB&Jh{i_q}0m2x11P#GBhmCa?oQxHm#rtI}zA)v+&^&@4;LdY-4#n~(zICNPxriZ3zEoo_Whqor&Q!+NWhY15(FtP#k{@iGQ z5>7az^uh5Yp9akS{gGv424zVGF{h$BNeEj%8f?B!MY2q@-YqYj!63-JYYxp42DA~Q z+V9E>f9cRm-d!$_ro>Gf|N2dXjwQWsFr>GkW>~sR!Ht!!@x(+W%EQFK8b6noCq7EY zHVtwl)}?Z>i_M+|bL$NvWdtTN0}~m6Sa|GmcXUKTthLz zyV6m+(!i@CsVnL_Z}n1dbuFXI+Qy(mj(d=BrfUcoYw{GfW9<(tB0M7blO+ePzzA_e zGIvl&4xq4m@7kQEPVA4`K1HAY2|b$rXyd#p(f&!RQMQS+i8{z-oNQ z%^Hop@K0#?wI-(eafc-V%|}eFq{?{I}#}yojr{qT>7D5`lWS;@^qMy_QrkdpR(O zsanz4t-msZ0c7=Df+C+e8`0Q@7JO&MFR7XMVb=mIJ8Wblyc!fWavT*urDQ;(R)G~J z0>e_7X6)-=M(_oG{4`&4_5$<{H^tzjO!9%kGRSDLwP?*fQ>4@ik-CZp0sa1S$S~6Z z=&Q;X`?pA!;0jvCc*_{dxgsvT+!R~&|GMR*4xQ3!L&?7_jXcJuOv$@RLS3OiXFDU# zV+h2qmxc`+-K2vBb!?&JE}2p3;*mO(e>ELFn))wfvF9R^;ci+x7jj?k^cEUGZ?LAu zU*)4?U@;Kz&9zagz)Zg@@pLL_gcwm$2p@lKxf&>U99-fi&@xUvA)=(z>LTK%3+z1b1xBg}R~f{ItsoR|$d7YYa21vu5NVahExR$&UHAdEM5VH8X1irW zj6%$%W*-=olH~qlZOHNi2Q?;|!Eo)NuyMD=N8E2@;F94Gcs^nVLPoPuD~u!}!jcoS z2S6wLe>F4*1>kZ+Idc;m~5ADJTDD!PI(F7 zPmUd;9X}@YJ_RKJcCKld6aAyf5i7UFM&w`Ad3T+&+_5RiymQGN28vabXi;=6A zL53-@m+>FXEj4Ah2sJJ22)&z8n^yZVG!;sCcfB%@;y~s&^K6%PgmBHM8qG1CtI`IJ zrXWVz*-uw5ilM(bk?jb5nqz`jrA|-h10pjD@)UcW2j7yw)AcKkOZ>OB3 z5*Y?tFIWoI+wi5rP;w;T)eB+D)q-^8Z1hU7w%i&+We#-k@R{6D@TJ3|b0nmJRE47x zNkvKIj35K7Je(l|@Z5U@hR`OZ$oWqe>t5(`ILjh!#=NAAD4o(@2JAux`0C$VF?GV< zPzDj*+eV@yE-OBE=#ePN66A~zS_sZ1;VORtmDfzY1hrHFkpAA=dH{oR%(U~7gZI9c zBi3N%61FBpO1Ga`A^B5;Zpw2bpRU-3-Pap{j@?ak6A3sJPYQ>}kF>ZXpB_`^MD6^@ zj0n5-Gzmq?w>v$9Z+UK-3qXm~dU}ux=$-=!#Nf}m&sKa=sej8cu>w47yhtDLZpee2 z@|R&>z9V`t<@!Mn+OK(uN8*~mvtll+CSBag8_hV93HoG=4v4Cewfh*S?i%v43G- zAgRLa{_L-d?D|F}pABKaQR(n~@8DtGVfN?8=^hVNE!xogRbn@REmMY7BC#Y-o^i`j zyeMldZ(TRhq(9tCvj6}>Mb+c@oLvKgHjVwsB0*TMZGC4#25ea5b~8ZA(U&@ zP(5x+5X`B8CADP3UtKxkT3F+*i*>&%V{#DF~h;nSDIVgu#7 zdwEaGHM*^ecj=2UW8WYvnFh{g80y)ph4x za`z((Lggf0|ha`0m+1kX-)R znbAJcOftRLeiF+T-nZAjk_B+{7`Ww1+8Erc6U-GX8BQ#*%2IkCpu5D!4~V$($BVb3 zzuK~m_6A>&3-vnkhy2_UixX2#U&u&GA2Dsm=WGKqFO_|kxz>NTl(916JL8$flQ-nVcBB0C<>gCNwfwu7=Y;|^PkDTcTEmn-@q8S-65iLp>4dFQ z=6n1R)LB*)VBX+=-}%!D7RO?Owg%f{*??Yr7uJ2bJ~A_%x`#5l6dKb&$v;HZ+z`+FlJ3} zVOhEX-ubcI7zw)O>MWgOoq!w8UNA0Mr&~QmiO>Y=g0$n)@s_Id{{|vCrYYX}S~hDq zSD)R#Vs*4Fh0+C?P|*HvkJWT2+5+|WDgi&$lp zkhO&z&gWYnS{3{ja<5;Rsv}NR+^cSz8cNHh$Q&nRU#`Ke*fb!S zASH(3U`tWUPB@%ckzNC`CXpV;g(B>~Q43*ATKpOpreyu(M$e^~%8g;PZ>g;VV^*cW zAE~7Xde5#s26ep-$=pl=#WXjeS^Ej@6->{@G6;SyIEHMzIDJP`!L54R@XZ?^xAroj#M|^ zswfiI$1#u5(BI!iE;JqM`G0;7Eb3<kdo!PP-s}zTmS_l$;Kd z>(|KIH{$r~%G*+Jl3+ady{0`~*IsQ(EJveEgVlqi;?%)EWy}5G@rOphy}zn43ZPfB z(r9^e-#AgP*sLU=|HpN;##+UE>Nou-AFuNIg!az@ebn9J;$02Kl|6w$#DRPF%=Wz9 z=ehS#f33pmI4)3*>CinmrdAO=msXEKrjNMDvGtbZ?nfyx)Rjs$`4+TI@_lyJT>J`) zG7XfZ`dO=_PGF1FeK~Ryc4jw>vOO0LP2q8k@alafveD%7h1?H?>f`T6|)iLl|bW4_WUSP3l5 zyhi<~Zr#zK&T4B(JVdHR)jH#(ui~Uw;_l!L{beabNNxTLh=(D0CXoEM!tswaWq*YS z_LenzQ}JlS$i8Ut&$b%L|Kd(T_@x^gHFmr6e5a`t4?8q3mQG4^>QvEVT1@+)4zJb0 zI!uiNGO+SI@em)YhxzGw2Lk6<)1J_a(W1Cz^}I)xue zoTQ_THd2R63z-gXnIi0>HRq{KV=p3NkHGs`%NTq?)bRI-benfu*pCylPBH%_m9NkM zyOt6OrOlH{0(oF@{2lHD>vN9m4>XV#iQj^oIXb+Z`t-`Ne$`B5poA+<6urvT!yw~7 zMw8^UoMeM|?nDXD1U^r-*!^hatvsA<)K$rV5KiU>W2vBQt_JhktK6Bs<)MC8(@Uqp zV*R>xuf31Hy&r!YoZ(Np59_j|Yf8!bzo7^*+;ID>ZB)m{^w zF77dn=miiQnTVflu>;Xq=21HD;_w;xPX5K$4d=>~>eumjdfCN+qWy>Cx7H-(_jA1V z)Eer0!cN%4st2cIzm$7>V!&+oAUMXDT2)IV{#F8ma#<%vlZGZT-7QMXM%ZCxw3o#P zET$Y-u{Y%s^7MQfTed8ih6cT&d8Kk}J!0dt(Q;4`Om~PEJ@y9`4JT#p^lRFYL5o}j zKJ_kcV-dG+7hXLYEea-vekO*^8cmf(CXIaC0)b3VkOv{NS%RM}y{8`byPCEL7X-Th zDlt|9m=+Wa*+~{;{<&G??SMLhKJC-{_&JF4KWKWCjIP3q2?f(I&vbyh2&jc%2F(qR z^vj)D{Kpnio^u(h1UO4%{Z^cFE{TLtQ1jO9$*bqoZ{#? z2ZD(SDPf)cYr!Ifw4nWFJGTyWYsgQ7mYsDmRfwmrxM7XCJWqSYm8rJHgl*KzWY#N) zE)?3eVsuEyglT!iOq2{5P?}ydWoN~TE@vNN@~npRh(Ta1 zbWj}57D2uhml>Pl z)9|Kycl5(GH<6x5+8fxQ#i2?|QedwA&gd!xWl7DbrM!|VyS@r-Xed{Qb;l0&IhnW3 zJ8d=?WLtDx?65=+&eSJYbB|v8qTiV%F%(e10$kV~+}=n5Pu?zz`<8UAud|)?=`3Wi z(Cm)4fUqdye2S)oU-p%Ln8Sl4qz6)O*z}#da}kfG07`_j&sTx zJC3^ot>2fjR!aJ%>FFc4gGkdg8C^FlPPv080B*V zC2Jgl$NYf4nyCbV7Ew9;21V+JGC!0AZ);m(2{8C%s|R^Q)`z^g8bAir^&taEAHvp8 zOp`GnC@D<}hZoIBgcl_(hZp^n`k~AZ{ZDK0hqc%hWB}Rw@a}zh_x_hR@54Lwv!GS} zX$P$|ucE&U>Hn9*dL)y*Vt4jS`-<5m*(>L$e%tekWxhPKG(RQ+-v$3nbEdli^Mn9~ zY>9^)h&G+2iPQ~agiOM}!D;tyxR!jEw}eXsw%3t!_JCnT(Ec0jZQ*XBZ>!4nRrsSE zCySAmY=NVXag}34%ffYgT@VE&_Ld$rRN7%nc2Xcj*p@EeKT>nZ(Togg$q%<{x9Y$T zaEbnuOv<+W@I+5^T%3A!T37_0*6ZgH$>$OoFcbv5C2}Eafgh#*s$4|<9^Jjb|50jN zIvjVQiSv~Ol%FozIQ^iP$4Bojo7WnUoX+1!_W0l9g-1Gs><_-2-q-8Mw-rpjn*>!I zJCY9Jh1igb$03SQO$?d~l!xVzj8sTf;o2A|1liZ8z4FUG>HTn}OvIa}bY|mwQP!&; z!nZtxUaLegbnynT2vy>{S)4~&i(i>FqRW9GAP@wEDZFFo6D=G|j&MDisvPpr9o>(O zq(74u5_w_|CLY4JWyuDeCI0RRs#Jqh+0?cQuFSYrdUbC)suwtp(y&j>*-!9g5UP7` z>U=tzo3(mI3b3kGoyX*q@)>bo9Jx;wGL!}C$f|3}ww5^Qr0K(dvpDbwXDth$X0_t^ zn19zX#{>9tVxvrS6M(E5nY{Oa`*Jahtg+vGJ5QN%EBk%;CrQ|3--gc#PzE}}Bx(&A zlm=Px==KOuoVPye?f+ZH8mhI273SSO)&P5kcLaF-NNiO%i{TR5JI>`&$=ekd8@0)c z_D1o^+uaWsM5G9x6I?~V3(bDN1_hrqBntC?T^K2$!hWLZFlyK7n2d>!NJBGR#Ck4f z;88Ue1>-lx$ws0{ip8|pppEdFq4(i>8)b2Kum7}h*<+5Keni>h=_1vvH9R+GkO7ZA^Bc+XbbkM<8GaubGd&6BK$<=fhq| zT_;JGFgtr;mmK-WWc&7^Nke8#OJNjb+WJqiCA2m8%M9ou&8@g#UcAL*)Od851g>=d zCJel6O#bYS4&ESrpIhl4Aj8^Kw4cU@TYN5-~pe!bc$i5*v)KM%{L?)FB-(fZ}!)60x9 z%UyySz_M=cs-3q3mL}N%d%5I`(PM}anr~m70?IQ2A@hE{(Fkg}-~7E04tAXxT={#l zifV@MaJf^@oP%&(ZlJH?y>rO~;GObdxH>Jk8L?)r85YN7Bk7v+WB$wQOT3gI$1ypk zc4)LncI^EsUuh%z_&2iesaospfaFsuy#2V}4_xbpot<`n3$pUn*;?6h#2OI37CoBP z?bswi)lYm=hVu1%Z5AO6C!pc@y>zSK!QdV-DlF zpI~$cVV90#*sxg48|0NPdHE1$pOU$$@D`$dBT;pD2SGfT4ctyh-AIP_Q*)jcuKyVc z<@sm(vA41C=Ey78TgI{}IHb zuYWA_#BW0`Qq~S4Y0vJNdCP3!1pT@2e~X*z`A+#1K=7n*4x*^JVR-s|=M3ReJ>Akj zljd7gbr&rEd(j`kUD3VkeM_CShj=+G^6G_*TYO9cbd9nSpD_@Z$dp#K;u*Mwjn(zN zI+4XyLBO3Yt&PYVcQKxDBbp{;OiP%N!c`#vueI?BG|kMB0>AvdlLSuNl;s^oMWrWE v+ee6L#98LwQFig$|FT)y?!kN~`}fe=^L7E<`AYHp@ZNE#YkKSj4fTHj-g{by delta 7959 zcmYkBbyVD3wDy4_r4)Ck6n85!Whn0ME(K~(TZWashA@s~JKfWn%0QR6qAtoVK7+4oi+iA8<6BvIK+_hnxe@feyJ zsy}dftPSi>Z|)c7g>Lcz)7ETwj=*LFv3MRv=%o*D*u`h3osp4rZYUev0qGZf_7`Ul z2xLc#n^5T1ct?wHV>~@}sP!;7G&nsSjk2p16T&t!(sg*G`;bR`Da`B#tVG7yqdsTm zrr(`*;DiLvaqjW6F55I(ueteA60L*b*+Qt^jz%|g^6m7kq-CyT7B>?5ea8&QnGgQ5 zL{(2d?}yBj2ajF~N2oL5(o!87xI;9cJdSonU67G!v^lDZQcvEx2x|0c#Yb2}6)iCr zenT?xzlg>EtR|AdzXe`HiAd}Z>sYsW%Ls{i-baCXCm}m?D;auD-)u*JG(REzSHOLM z;SSjCCN4wlm#$h$vIXy1gl55r8!@Yg&2d;D!zPctz}yFpbC6AA8!wTcc{&7D$;%kd zIXFw9skPCIg#QoKsSN_9r*FwgU;6vLGt>?Ka~SVLpKYTcKve81W~5RJf95-niqd5# zEO-glN~)AhDO6(=u?QW-cpKY(4Y&8TwOW<-P0KqBOcCbXMuZkv&aQL!cXGrr6~cNH z|J}D)N(PAZVD`NS;mVx%7IzcywxpVh9X=oi&#Sjg2#@+pkJzVq*;g^yDFjbTKYiBo z=7wm;&L!EgfU4H94}J;&qVoV6xpATA0bG@1GTf&>c+oVtS+OGdni`jSIe?j)iH6G& z)|lCNXAK@VVm4&`>S+#c9pMruX5Jb0X8iy@Qb=fxkpII(aLSw3X(#N{YT?SC)|Xar znPK02h+#sGh?5{VXP|UkAoLWP%KVdJ{wfc^qEh+}NEnLKk7ZWP<7B%a*JFv`^cN#m zN%>7YzB^Bz2sPl1tv@B;m+H^ghT-`6j2%0k+iOl+{)PLX8nhxBlD7zR!|+pz-5sr3 zh3A*3J`uwVv16AkB)AT7e|wlWCKDly9_c<|DkU58!fWdd#9>Vk3({$7mjdOd{Kj^cUrt1A}h8$YM*;FuZqLa8{x)yTS^Yy zIEznp#--x4r<>ec@|=38X6_VXM(86o;^&?PF*nlu^QU3<*ZaWu5%*cu5bQnnIh20i|S z_-7)DGa2@e0gA*i#%6fY1p+urwO4fvB0qBk#pg%SvDIk{4<)1}t%9|J$`kQ1Hb<@j zf^b)SbNv+Vb&}LPXPML@cge3UZM8i`132xas%r!)zF2a9of~lyIde^NXeZ(R-kMd; zr>!G04@`}Lt@|TFV{5bOcO&t#b#o1 zr(J7k3{p*IL23ODebYT0$P#ZAr}GLEz!b^^Z{~kcgjIV(eboN4Lpfh!AD#!1*I z9SNQ!aafx~0}b6rP_zVtYL?++M{tfe;*VO&+xN!#P}0jH0Zte7X&V0RdI_!vj4!qgnQ57jLQAb^~;k6r1%0b zWr^&uRms2xSuMGX?kB9v<}#~9O&ML);+vku``*&>FbreP$8s)$?x{uPbRBT-^%#4Z zD@FzlDEDpfBiKTPwufS_}&+d1M>?kms&a;2y8l#-P$Rz-*|_Xo!bT*mh{WE)ey!rU+SCrxCxn?>uh2 z>lFFpFXc--f{5W5Qn@9+P6bsiJ^A%WG*}TUfOjWlpK^~Xrg_eCg^S#L&pN-OHK~P1 zcK&F}4~Y4U#3c*9dY-TVd+t#~fnnnN+=cgc%2E9LJh-K&R81pX>Bz0WiWNjg?G<=_2(&ed&8mdUIX%e`)Ed!)>#Y44}KK z1k@_vN=M&>ye&_V(dEG!P0NBYrXkCo_dBMj&h>2_3s-X-?Y;JocE% zL^wVBBhVs2Ok7lD{B^X;610uqZQY>YeymwdnLe>EZvPNaDT4DwTs@5|*GECrA?EI&5t zM+`m~PPk!CLMrM{CaLDviQ+H8@6>1#UOn-U>KYt1@FsBX=i!}VD*Ivq;&vOhKaD>; z`fHmZydhC%0H~3X5fjFBmHlWaiBh-USDJ8?n$2}>5Qo#&5nyN?(vIm`sBc&sx8B@^ z9|rh21%e=ZHigaN(9Q6 zquRmS@vc|P$y(({;uYv0VGna8eXrI`?w!*9Xf`2Ymz3K6snMh7 z{mXY-s;){o)5~&=blZ0=n#R;&@EjBxH>G0MLcXId+^5*|hYW39vas|qoUZ=7Me(K? zt%h^XVbU~d`AjM-hR*LY361tZz;ddbIUoO}oQfF%h-SV5kv%7Qf5g8W>QL(bxtRxhG% zgAy^a){=G__Wvzs{z;LW#17dTA%4`Z_)i2?1kI(+W$zN;AGh za731+&T(Foy4K>}Qr?V*5@;k#q}<2q+Dt zK!dSrE;2?_B54g4m~7I4eD+75&2jUCAQP7}o#Q5!CBi?~8FStepB`Vcthn&agIQrA zwEB=_Yf+VG&q4#%`4{#@4#jqRVIRN7+qM!TtfAz)P04q8y6WcpRBGO@%;OgUNFxcJ zduLpr(1Y6@Y%AvhHz(n*W_56Jz65eVhyfaUQdP)+AvyzT=aHH zLj&QTgSEG%dH-nRvq;qq3S)W28U z_xV{k!lGIg+6U^>9{EaODT~HPYq-Dw({S{~B#BYio{4+|vWEg33QJ0X z7)vS_r$s73%Md)G+N+*BGY7;Wh2JFf)&7%CkDic`9ZGrGtD8)j>2El)4xK^Md5`M9 zywyIb>FY_d7N)2Smzuwx--@^)O1DF>A!rrKxtSW)AuEj(uNgNF7H zUspeP{ij~FZR$aFWooh(swHHoYFAjo>64Zd`J+|Kbx#$E1`wmWOI8N@p`{kQV^#G! z?_Prt2yG?d|7Wiamv;t3a7^YkBm=_8AVRFxJ)g41I_T_mQzOp_J*?`x!J6fCzy9cz z&o4!8V!;@R<-%-H1^CnMC#8RWP`lBv0^D+hi;hN%%Fd^)7JPDUQ;v1v=@Z%cYR zt)JkJv<7zeTed|u>F^=(v1F^@{Ky{P?achuv0bxDK`LrX%~sZ9y=XUo+;_*^yKdV> z3|MozxJ^)k+qG{$JgkNp#(I<8OrU(Z$-fa8wc-c)Lrjhf*`*M)<=!l=M8hqJK-AbM zmQfTfps0GHYaQNmL|-2Wi60j-LZW$Xi}Y9Xq`nB1(p=v_?T8WdO<;KHMsk+q-4ILif_|yH=m#8 zj)!+ny`QQE&%@d+_QlafGkAr{2)ZMnxlql?yvp~2zT=iCkHrLElIVtI0kFf$o43uX zc0f7It*pMlyc?(yuxdvx`KI}o=dDOT2*JS5m2K%8>O3?v2h&H6&_O333ezC*La%^! zYcte~K6E0bU7*F(Jr>j&u%$0pt76`k%D zu$11i60L_M+}N2TdRs%m4+(ix4UqNmOw&!1tk-OIMh;!(eoC`E<~z-A-SclNLk|?4 zDlRK)i%Z1GZ3d(s&bY{_XO={3bVCEE@OU$EbA}N6!eq)aRV!hMtJv=VaesM+{s42Z zX&WL=lwhYWdA(qV&1?boQQ2%TlRBU3i3xcZh5mgrrb1mX?Nf47_YkDmuq?S4S^M6k zL$|uqI-!n{zmVo4C}S(usg&v7KoZ%@{#=*s6V9K#v4AG+^@=(HTv^vm(Xh)B&6pE+ zaMVd27eo+c(06pok@zJMf^G37VNuTFvzK!iZoYvZ52sV?OYBk&ETKBWg zh`plKxM)Vv(G~5T6tro^MSItAvD~)nvUs=Iz#EP7PPW{CrQ)JRf|ebK4=UyvY&Kr_ zYh`WPLDBmCN!NwFdM+=^hd)`qSa*+Cd4BPhp<{RG%(VFau{Wm$FTf9~-dxi@*h=+N zvRC>cNp0JsxKX8y`LOnU`dHDF(nZQXK*Br5&(pkB9YB4<4c8BwpR@8_{S1R&7=35- z^*{V61(H4CtOoR>s)lKx`;;Ds$aGk6XM&MXLOp##Xi(N7F<^Gg%r>bq+Ux^=El-2M z3f&;npo-vcL(MHR`Th;#@2)AuuP=z{pafc_iv7?!EnMsp9X0RD2Av<)Haw(W52V!@ zUe&HnVCW3X0K=?~V2V6=w)PX~;maVREi9=au>8kXDLuIKYX%5dj$m1a4gD9Hjh-#v zRA@?Ub@nn4-X9}UL2O@FnBqR9gHXWe^n@a$xLO9Ab!)s_7D}I^Pp5O!Y?L@H9CI}< z)SVjQiV!FjwJkKL&00y3-d#9F@U7x>$twk(EtF{r;lWYKQ`3!^x6a2#)=f%Xet1n- zf%yKCQ$;jSr1Jo?Y-_9DY)_fZ9P^i>Zkg+fl{EPRQM}|b^#RnyTOFhy>X6WAxY7ICat{XDWZ?GUF~dYNW>s(Wl-2q$JVY z?!_%G4;4?%Rc6cN8wv0A_M_rq0%k{o_b=(}lh1X?<>tT-NxSv#3+IsBiZou~iSuj< zFq{2Q1O(ghXp6yOA*Cv zTxX|j`x_pT$-UMLzxFl!*&;u$f;0iFe?Pd@rW!_;qssUdq1!0$_QUFbFv?$w_V1U} zxn7?yaKzfB`0j__n3b((p2&@2KS#^f8Co74&yWE4cQNE0O`!<`0w;a>W~Knztbw<5 zcrGy@d3HpG*a^H@BIq8`4yKAur+WeEr6qohOy}MmkX48`LXIl0@ediyTGXX(i>DCy zc@DAZ7*vl4Kf(JJ#y`DxOU6gcPx!v8i@S1vAUw4(Bl_S%*9G^v(wyb|>&BKd=KjMP z%vaSs`FHL0?^>&33ytRA5Sva1IF0lC6PKPMR%07~zWuo#A4}vDLapa=j71-&Sy{bm zl3B@TN3)y_pw6FUOUlE`TA1&~)H!Kz2}ua#Ny9w(kT^&{4SLgKs<%g6QqAh!GWYI2 zo1ucAvq;Ko(JuRx2=opucbxRA@zZ!#mp1!4$Hv{HFgEKpRzKw-j<%hDA}t}+8yoNM ziPD6$Ku?2}V=FpzqDHrIV?{`5!qt;Q?}w=jPfO{UiH6yTW%$Er#v{NK1n*Ef*sE#8 zurTlyOm4WUHE2d%IqoC@y2DB5XEypuV)e8wuL7i;T65znI_|MQF$C88^Ra&N$@0Z~ z@+bCaezt1|ODfX1Ag~E5ok{%eT)MF7mvD9mpd!%4>vltS4gWPe(!7>1P`v4IS)m>B ziiC8+swo#WX@#}Bhi$s=SfzLNY7Cc;wO97iwX`dIV>vtX+)1214J>m!A<8ZiM!)=o zCy*xX7udV1z{;q&PVy%Ud7qSkPQ{^7b)tsuf%;nzwWJ;5Q$}E?Ojt;VWVl`ALwZ!& zu&NU$s}p;qk{HpnB5~K$jw*hAx!y#%rbtBw(vJhY`f{C#a!HY4y$mD;A}41teGpRg z9Z%3c9?uRbjsDC##>J6F@B%$;Ny;$*sbMVV*)}OZh{<5q!ctR^712p{&QwsZN}AoR zBf{vjbFA6)CK!!N)^`w!geZ@ey70AV`wMVhF<*Vpa(DYT)N|=i4^p2J^;IShDjq$$8aiG|-`i-`2t?d!$Tr z$}h%*1QH#zMTf1?K%wuP+q&*kNti5~{amsyNJ-iaqdHDdqMo@ zQ_tlKR(C!pmP-rSRSSW-)6+8+d+P{yKXjix9O9T+~9Z z?{WsFcg&6lE*h22&moO(a$h&MvX60Ey4w&?(c~$9etGxNKPsGo@!?pB1%Bhtj`Da1`=W zN1I<%lTW$xDdR_U4z>=-E90x>gaa`IU;=uuiq(&V#77lOiQTK*WWEj$%_Vut{PB^i zvilDq4458cB#XZ_p@##*2MiGhdnq%cLtGVkF_l?kHNEvpMZHmyBrHt5+nB<3vG+$H8#MHU>o zC;Gx!)vqMAq+|%F8W1?{B3F{6^UzS|LQ>`Ct(rV0&n6N~hDk&Ltl*up3TSOWGh!nXvOX=M)HD1(6Yt;)Yj3665F7MlN~Y;= zG`@mavx3UBU!3Na=%~g$t@Q3J76=tLZU{@*vkK;Rf>HtHr%xxe>RT=YTPnKu1`0Tn zk7!xXIhsS#>9UM>>_>CJV9KfHY03S`s;Vts63M_db2IaudthFYccVQ{>#!J5Wm22@ z7XW3BPse#Do=1*+{xycmma^xz^JIT_5}If|wSBArzD>X=e_&hkELjW5cp?DpbQ`5@ z7_6e!9;%61M(j%AGpL!5C zKXFxNgv`rOw2stB5%yaR8IiU1ufc-xXde6j3MEq1n=Vo7mqru&sA{hj6PR0u&2BWV zvfbcwbdtbp(d030R<-6j&!xpx7@_L3s!+%_pvEX?k;{{>QvAbFYsl5YjqHZ!bJNEO zSmMg@Abu?RhiYdOvRwTI(q%W{dylFw6Y}GSpC4M$zq(8PMlrazn)mwl0j;(JLwG!< zBW1U1A^@>3m-%-;trz@)-Tvc5|Ay75t|ci~HFAkTwbzn}(HwPx9hL35O53LcF*i2> zL;x87C7HhaV}@P(6MMa~%_9ClJ&Fm{*zpC0qUV^IVJHRbs7)e0&XMrh_Hwr6eIa*)|CX-1%c#{@|A1 zdPlmNYe{+meIA=%<9|9tNmzr%=YG1L1VFI~cKR>KtQYmq*|Uc3nQ1#*ii>N(m{4V( z-k*CCJk^-%u^zW4ICPR~d~HKGqrL@qfM~jek||I3WnrY2=UIdlt|B7tRB=^kPOrVe zh$GQBK4VJk% collect() + dbDisconnect(dbcon) + ## group by result_id if requested + + if (group_by_results) search_result <- .group_nest_results(search_result) + + ## remove temporary fields + if (!is.null(temp_field)) + search_result <- + search_result %>% + select(!dplyr::any_of(gsub("^.*?[.]", "", temp_field))) + if (as_data_frame) search_result <- search_result %>% as.data.frame() + return(.add_tags(search_result, database_file)) } #' @rdname search_ecotox -#' @name search_query_ecotox +#' @name search_ecotox_lazy #' @export -search_query_ecotox <- function(search, output_fields = list_ecotox_fields("default"), group_by_results = TRUE) { +search_ecotox_lazy <- function(search, output_fields = list_ecotox_fields("default"), + compute = FALSE, ...) { ignored_fields <- !(output_fields %in% list_ecotox_fields("all")) if (any(ignored_fields)) warning(sprintf("The following fields are unknown and ignored: %s.", paste(output_fields[ignored_fields], collapse =", "))) output_fields <- output_fields[!ignored_fields] - if (!any(grepl("^results.", output_fields))) { - warning("Output fields should contain at least 1 field from table 'results'. Adding 'test_id'.") - output_fields <- c("results.test_id") - } - - ## identify key fields that are required for joining tables - db_links <- cbind(.db_specs, - do.call(rbind, lapply(strsplit(.db_specs$foreign_key, "\\(|\\)"), function(x) { - if (length(x) < 2) return(data.frame(foreign_table = "", foreign_field = "")) else - return(data.frame(foreign_table = x[1], foreign_field = x[2])) - }))) - db_links$is_key <- db_links$primary_key == "PRIMARY KEY" | db_links$foreign_key != "" | db_links$foreign_table != "" - key_output_fields <- .db_specs[db_links$is_key | paste(.db_specs$table, .db_specs$field_name, sep = ".") %in% output_fields,,drop = F] - output_fields <- .db_specs[paste(.db_specs$table, .db_specs$field_name, sep = ".") %in% output_fields,,drop = F] - - if (!is.list(search)) stop("Parameter 'search' needs to be a list!") - if (!all(unlist(lapply(search, is.list)))) stop("Each element of parameter 'search' should contain a list") - if (any(duplicated(names(search)))) stop("You have used duplicated search fields. Use each field only once in your search!") - search.tables <- do.call(rbind, lapply(names(search), function(fn) { - tables <- unique(.db_specs$table[.db_specs$field_name %in% fn]) - if (length(tables) == 0) stop(sprintf("Unknown search field: %s", fn)) - if (fn == "test_id") tables <- "tests" - x <- search[[fn]] - if (!all(names(x) %in% c("terms", "method"))) stop("Each search field can only contain two elements: 'terms' and 'method'.") - method <- match.arg(x[["method"]], c("exact", "contains")) - wildcard <- ifelse(method == "contains", "%", "") - collapse <- ifelse(method == "contains", " OR ", ", ") - prefix <- ifelse(method == "contains", sprintf("\"%s\" LIKE ", fn), "") - if (typeof(x$terms) != "character" || length(x$terms) == 0) stop("Provide at least 1 search term (type 'character')") - - terms <- paste(sprintf("%s\"%s%s%s\"", - prefix, - wildcard, - x$terms, - wildcard), - collapse = collapse) - if (method == "exact") { - terms <- sprintf("\"%s\" COLLATE NOCASE IN (%s)", fn, terms) - } - return (data.frame(table = tables, - terms = terms, - method = method)) - })) - search.tables <- rbind(search.tables, - data.frame(table = unique(c("results", "tests", with(output_fields, table[!(table %in% search.tables$table)]))), - terms = "", method = "")) - search.tables$id <- sprintf("search%03i", seq_len(nrow(search.tables))) - search.tables$select <- unlist(lapply(seq_len(nrow(search.tables)), function(i) { - out <- key_output_fields[key_output_fields$table == search.tables$table[i],,drop = F] - paste(paste(search.tables$id[i], sprintf("\"%s\"", out$field_name), sep = "."), collapse = ", ") - })) - search.tables$query <- - with(search.tables, - sprintf("SELECT %s FROM \"%s\" AS %s%s", - select, - table, - id, - ifelse(terms != "", sprintf(" WHERE %s", terms), "") - ) - ) - ## species and species_synonyms need to be combined before we can continue - if (any(search.tables$table == "species_synonyms")) { - sp_id <- search.tables$table == "species" - ss_id <- search.tables$table == "species_synonyms" - sp <- search.tables[sp_id,] - select <- gsub(sp$id, "syns", sp$select) - q <- search.tables$query[ss_id] - q <- sprintf("SELECT %s FROM species AS syns INNER JOIN (%s) USING(species_number)", - select, - q) - search.tables$query[sp_id] <- sprintf("SELECT * FROM (%s UNION ALL %s) AS spec", search.tables$query[sp_id], q) - search.tables$id[sp_id] <- "syns" - search.tables$select[sp_id] <- select - search.tables <- search.tables[!ss_id,] - } - search.tables$linked_to <- "" - search.tables$linked_by <- "" - search.tables$linked_from <- "" - j <- 1 - for (tab in search.tables$table[!(search.tables$table %in% c("results", "tests"))]) { - repeat { - i <- which(search.tables$table == tab) - links <- subset(db_links, (db_links$table == tab & db_links$foreign_table != "") | db_links$foreign_table == tab) - exclude <- c("chemical_carriers", "doses", "dose_responses", "dose_response_details", "dose_response_links", "dose_stat_method_codes") - exclude <- exclude[!(exclude %in% output_fields$table)] - links <- subset(links, !links$table %in% exclude) - inverselink <- subset(links, links$table == tab & links$field_name %in% c("test_id", "result_id")) - if (nrow(inverselink) > 0) { - search.tables$linked_to[i] <- inverselink$foreign_table - search.tables$linked_by[i] <- inverselink$foreign_field - search.tables$linked_from[i] <- inverselink$field_name - break - } else { - links <- links[1,] - if (links$table %in% c("results", "tests")) { - search.tables$linked_to[i] <- links$table - search.tables$linked_by[i] <- links$field_name - search.tables$linked_from[i] <- links$foreign_field - break - } - temp_sel <- db_links$field_name[db_links$table == links$table] - search.tables$select[i] <- gsub(search.tables$id[i], sprintf("target%03i", j), search.tables$select[i]) - search.tables$id[i] <- sprintf("target%03i", j) - search.tables$select[i] <- paste(c(search.tables$select[i], - sprintf("%s.\"%s\"", sprintf("source%03i", j), temp_sel)), collapse = ", ") - search.tables$query[i] <- - sprintf("SELECT %s FROM \"%s\" AS source%03i\nLEFT JOIN (%s) target%03i ON source%03i.%s = target%03i.\"%s\"", - search.tables$select[i], - links$table, - j, - search.tables$q[i], - j, j, - links$field_name, - j, - links$foreign_field) - if (tab == links$table) stop("Can't build an SQL query using these parameters.") - tab <- links$table - } - } - j <- j + 1 - } - tests.query.tabs <- subset(search.tables, search.tables$linked_to == "tests") - tests.query.withs <- paste0("WITH ", paste(sprintf("%s AS (\n%s\n)", tests.query.tabs$id, tests.query.tabs$query), collapse = ",\n")) - tests.query.select <- unique(sprintf("\"%s\"", key_output_fields$field_name[key_output_fields$table == "tests"])) - - results.query.tabs <- subset(search.tables, search.tables$table == "results") - results.query.where <- results.query.tabs$terms[results.query.tabs$terms != ""] - tests_from_results <- sprintf("tests.test_id IN (SELECT DISTINCT test_id FROM results WHERE %s)", results.query.where) - - tests.query.where <- subset(search.tables, search.tables$table == "tests" & search.tables$terms != "") - tests.query.where <- paste( - sprintf("(%s)", - c(if (length(tests_from_results) > 0) tests_from_results else NULL, - if (length(tests.query.where$terms) > 0) tests.query.where$terms else NULL, - with(subset(tests.query.tabs, tests.query.tabs$terms != ""), - sprintf("tests.\"%s\" IN (SELECT \"%s\" FROM \"%s\")", - linked_by, linked_from, id)))), - collapse = " AND ") - tests.query.tabs$extrawhere <- rep("", nrow(tests.query.tabs)) - tests.query.tabs$extrawhere <- sprintf(" WHERE \"%s\".\"%s\" IN (SELECT DISTINCT tests_agg.\"%s\" FROM tests_agg)", - tests.query.tabs$table, - tests.query.tabs$linked_from, - tests.query.tabs$linked_by) - tests.query <- sprintf("%s\nSELECT %s FROM tests%s%s\n", - tests.query.withs, - paste(tests.query.select, collapse = ", "), - ifelse(tests.query.where == "", "", " WHERE "), - tests.query.where) - - tests.query <- paste0("WITH tests_agg AS (", tests.query, - ")\nSELECT * FROM tests_agg\n", - paste(sprintf("LEFT JOIN (SELECT * FROM \"%s\"%s) AS %s ON tests_agg.\"%s\" = %s.\"%s\"", - tests.query.tabs$table, - tests.query.tabs$extrawhere, - tests.query.tabs$id, - tests.query.tabs$linked_by, - tests.query.tabs$id, - tests.query.tabs$linked_from), collapse = "\n")) - results.query.where <- paste( - c(sprintf("(%s)", results.query.where), - "results.test_id IN (SELECT DISTINCT test_id FROM tests_agg)"), - collapse = " AND ") - results.query <- sprintf(paste0("WITH tests_agg AS (%s)\n", - "SELECT * FROM (SELECT DISTINCT * FROM results WHERE %s)\n", - "INNER JOIN (SELECT * FROM tests_agg) USING(test_id)%s"), - tests.query, - results.query.where, - ifelse(group_by_results, "GROUP BY result_id", "") - ) - - results.query <- sprintf("SELECT %s FROM\n(%s)", - paste(sprintf("\"%s\"", unique(output_fields$field_name)), collapse = ", "), - results.query - ) - return(.add_tags(results.query)) + ## Note that the database connection is opened here, but not closed. It's the end-users responsibility + ## to close the connection when no longer required. + dbcon <- dbConnectEcotox(...) + search_result <- .search_ecotox_lazy_get_result_ids(search, dbcon) + search_result <- .search_ecotox_lazy_append_fields(dbcon, search_result, output_fields, compute, ...) + return(.add_tags(search_result, attributes(dbcon)$database_file)) } + +#' @rdname search_ecotox +#' @name search_query_ecotox +#' @export +search_query_ecotox <- function(search, output_fields = list_ecotox_fields("default"), ...) { + search_result <- search_ecotox_lazy(search, output_fields, ...) + database_file <- attributes(search_result)$database_file + search_result <- search_result %>% dbplyr::sql_render() + return(.add_tags(search_result, database_file)) +} \ No newline at end of file diff --git a/man/ECOTOXr.Rd b/man/ECOTOXr.Rd index 8d1117d..37f9338 100644 --- a/man/ECOTOXr.Rd +++ b/man/ECOTOXr.Rd @@ -26,7 +26,7 @@ First download a copy of the complete EPA database. This can be done by calling This may not always work on all machines as R does not always accept the website SSL certificate from the EPA. In those cases the zipped archive with the database files can be downloaded manually with a different (more forgiving) browser. The files from the zip archive can be extracted to a location of choice. Alternatively, -the user could try to use \code{\link{download_ecotox_data(ssl_verifypeer = 0L)}} when the download URL is trusted. +the user could try to use \code{\link{download_ecotox_data}(ssl_verifypeer = 0L)} when the download URL is trusted. } \item{ Next, an SQLite database needs to be build from the downloaded files. This will be done automatically when diff --git a/man/cas.Rd b/man/cas.Rd new file mode 100644 index 0000000..c53dad6 --- /dev/null +++ b/man/cas.Rd @@ -0,0 +1,158 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cas_handlers.r +\name{cas} +\alias{cas} +\alias{is.cas} +\alias{as.cas} +\alias{[[.cas} +\alias{[.cas} +\alias{[[<-.cas} +\alias{[<-.cas} +\alias{format.cas} +\alias{as.character.cas} +\alias{show.cas} +\alias{print.cas} +\alias{as.list.cas} +\alias{as.double.cas} +\alias{as.integer.cas} +\alias{c.cas} +\alias{as.data.frame.cas} +\title{Functions for handling chemical abstract service (CAS) registry numbers} +\usage{ +cas(length = 0L) + +is.cas(x) + +as.cas(x) + +\method{[[}{cas}(x, i) + +\method{[}{cas}(x, i) + +\method{[[}{cas}(x, i) <- value + +\method{[}{cas}(x, i) <- value + +\method{format}{cas}(x, hyphenate = TRUE, ...) + +\method{as.character}{cas}(x, ...) + +show.cas(x, ...) + +\method{print}{cas}(x, ...) + +\method{as.list}{cas}(x, ...) + +\method{as.double}{cas}(x, ...) + +\method{as.integer}{cas}(x, ...) + +\method{c}{cas}(...) + +\method{as.data.frame}{cas}(...) +} +\arguments{ +\item{length}{A non-negative \code{integer} specifying the desired length. Double values will be coerced to +\code{integer}: supplying an argument of length other than one is an error.} + +\item{x}{Object from which data needs to be extracted or replaced, or needs to be coerced into a specific +format. For nearly all of the functions documented here, this needs to be an object of the S3 class 'cas', +which can be created with \code{as.cas}. For \code{as.cas}, \code{x} can be a \code{character} (CAS registry number +with or without hyphenation) or a \code{numeric} value. Note that \code{as.cas} will only accept correctly +formatted and valid CAS registry numbers.} + +\item{i}{Index specifying element(s) to extract or replace. See also \code{\link[base:Extract]{Extract}}.} + +\item{value}{A replacement value, can be anything that can be converted into an S3 cas-class object with \code{as.cas}.} + +\item{hyphenate}{A \code{logical} value indicating whether the formatted CAS number needs to be hyphenated. +Default is \code{TRUE}.} + +\item{...}{Arguments passed to other functions} +} +\value{ +Functions \code{cas}, \code{c} and \code{as.cas} return S3 class 'cas' objects. Coercion functions +(starting with 'as') return the object as specified by their respective function names (i.e., \code{integer}, +\code{double}, \code{character}, \code{list} and \code{data.frame}). The \code{show.cas} and \code{print} functions +also return formatted \code{charater}s. The function \code{is.cas} will return a single \code{logical} value, +indicating whether \code{x} is a valid S3 cas-class object. The square brackets return the selected index/indices, +or the \code{vector} of cas objects where the selected elements are replaced by \code{value}. +} +\description{ +Functions for handling chemical abstract service (CAS) registry numbers +} +\details{ +In the database \href{https://en.wikipedia.org/wiki/Chemical_Abstracts_Service}{CAS registry} numbers are stored +as text (type \code{character}). As CAS numbers can consist of a maximum of 10 digits (plus two hyphens) this means +that each CAS number can consume up to 12 bytes of memory or disk space. By storing the data numerically, only +5 bytes are required. These functions provide the means to handle CAS registry numbers and coerce from and to +different formats and types. +} +\examples{ +## This will generate a vector of cas objects containing 10 +## fictive (0-00-0), but valid registry numbers: +cas(10) + +## This is a cas-object: +is.cas(cas(0L)) + +## This is not a cas-object: +is.cas(0L) + +## Three different ways of creating a cas object from +## Benzene's CAS registry number (the result is the same) +as.cas("71-43-2") +as.cas("71432") +as.cas(71432L) + +## This is one way of creating a vector with multiple CAS registry numbers: +cas_data <- as.cas(c("64175", "71432", "58082")) + +## This is how you select a specific element(s) from the vector: +cas_data[2:3] +cas_data[[2]] + +## You can also replace specific elements in the vector: +cas_data[1] <- "7440-23-5" +cas_data[[2]] <- "129-00-0" + +## You can format CAS numbers with or without hyphens: +format(cas_data, TRUE) +format(cas_data, FALSE) + +## The same can be achieved using as.character +as.character(cas_data, TRUE) +as.character(cas_data, FALSE) + +## There are also show and print methods available: +show(cas_data) +print(cas_data) + +## Numeric values can be obtained from CAS using as.numeric, as.double or as.integer +as.numeric(cas_data) + +## Be careful, however. Some CAS numbers cannot be represented by R's 32 bit integers +## and will produce NA's. This will work OK: +huge_cas <- as.cas("9999999-99-5") + +\dontrun{ +## This will not: +as.integer(huge_cas) +} + +## The trick applied by this package is that the final +## validation digit is stored separately as attribute: +unclass(huge_cas) + +## This is how cas objects can be concatenated: +cas_data <- c(huge_cas, cas_data) + +## This will create a data.frame +as.data.frame(cas_data) + +## This will create a list: +as.list(cas_data) +} +\author{ +Pepijn de Vries +} diff --git a/man/download_ecotox_data.Rd b/man/download_ecotox_data.Rd index d062868..5b55dcd 100644 --- a/man/download_ecotox_data.Rd +++ b/man/download_ecotox_data.Rd @@ -36,7 +36,7 @@ This function will download the required data and build the database. \details{ This function will attempt to find the latest download url for the ECOTOX database from the \href{https://cfpub.epa.gov/ecotox/index.cfm}{EPA website} (see \code{\link{get_ecotox_url}()}). -When found it will attempt to download the zipped archive containing all required data. This data is than +When found it will attempt to download the zipped archive containing all required data. This data is then extracted and a local copy of the database is build. Use '\code{\link{suppressMessages}}' to suppress the progress report. diff --git a/man/get_ecotox_url.Rd b/man/get_ecotox_url.Rd new file mode 100644 index 0000000..2f4c88e --- /dev/null +++ b/man/get_ecotox_url.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/init.r +\name{get_ecotox_url} +\alias{get_ecotox_url} +\title{Get ECOTOX download URL from EPA website} +\usage{ +get_ecotox_url(...) +} +\arguments{ +\item{...}{arguments passed on to \code{\link[httr]{GET}}} +} +\value{ +Returns a \code{character} string containing the download URL of the latest version of the EPA ECOTOX +database. +} +\description{ +This function downloads the webpage at \url{https://cfpub.epa.gov/ecotox/index.cfm}. It then searches for the +download link for the complete ECOTOX database and extract its URL. +} +\details{ +This function is called by \code{\link{download_ecotox_data}} which tries to download the file from the resulting +URL. On some machines this fails due to issues with the SSL certificate. The user can try to download the file +by using this URL in a different browser (or on a different machine). Alternatively, the user could try to use +\code{\link{download_ecotox_data}(ssl_verifypeer = 0L)} when the download URL is trusted. +} +\examples{ +\dontrun{ +get_ecotox_url() +} +} +\author{ +Pepijn de Vries +} diff --git a/man/get_path.Rd b/man/get_path.Rd index 407857c..f5bd4bb 100644 --- a/man/get_path.Rd +++ b/man/get_path.Rd @@ -27,7 +27,8 @@ Obtain the local path to where the ECOTOX database is (or will be) placed. } \details{ It can be useful to know where the database is located on your disk. This function -returns the location as provided by \code{\link[rappdirs]{app_dir}}. +returns the location as provided by \code{\link[rappdirs]{app_dir}}, or as +specified by you using \code{options(ECOTOXr_path = "mypath")}. } \examples{ get_ecotox_path() diff --git a/man/list_ecotox_fields.Rd b/man/list_ecotox_fields.Rd index acf9857..034fded 100644 --- a/man/list_ecotox_fields.Rd +++ b/man/list_ecotox_fields.Rd @@ -4,13 +4,18 @@ \alias{list_ecotox_fields} \title{List the field names that are available from the ECOTOX database} \usage{ -list_ecotox_fields(which = c("default", "full", "all"), include_table = TRUE) +list_ecotox_fields( + which = c("default", "extended", "full", "all"), + include_table = TRUE +) } \arguments{ \item{which}{A \code{character} string that specifies which fields to return. Can be any of: -'\code{default}': returns default output field names; '\code{all}': returns all fields; or -'\code{full}': returns all except fields from table 'chemical_carriers', 'media_characteristics', 'doses', 'dose_responses', - 'dose_response_details', 'dose_response_links' and 'dose_stat_method_codes'.} +'\code{default}': returns default output field names; '\code{all}': returns all fields; +'\code{extended}': returns all fields of the default tables; or +'\code{full}': returns all fields except those from tables 'chemical_carriers', +'media_characteristics', 'doses', 'dose_responses', +'dose_response_details', 'dose_response_links' and 'dose_stat_method_codes'.} \item{include_table}{A \code{logical} value indicating whether the table name should be included as prefix. Default is \code{TRUE}.} diff --git a/man/reexports.Rd b/man/reexports.Rd new file mode 100644 index 0000000..a8f8239 --- /dev/null +++ b/man/reexports.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/imports.r +\name{\%>\%} +\alias{\%>\%} +\title{Objects exported from other packages} +\description{ +Objects imported and exported from other packages. See original documentation for more details. +} +\details{ +\describe{ +\item{dplyr}{\code{\link[dplyr:reexports]{\%>\%}}} +} +} diff --git a/man/search_ecotox.Rd b/man/search_ecotox.Rd index baaebac..8ef94b8 100644 --- a/man/search_ecotox.Rd +++ b/man/search_ecotox.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/wrappers.r \name{search_ecotox} \alias{search_ecotox} +\alias{search_ecotox_lazy} \alias{search_query_ecotox} \title{Search and retrieve toxicity records from the database} \usage{ @@ -9,14 +10,19 @@ search_ecotox( search, output_fields = list_ecotox_fields("default"), group_by_results = TRUE, + compute = FALSE, + as_data_frame = TRUE, ... ) -search_query_ecotox( +search_ecotox_lazy( search, output_fields = list_ecotox_fields("default"), - group_by_results = TRUE + compute = FALSE, + ... ) + +search_query_ecotox(search, output_fields = list_ecotox_fields("default"), ...) } \arguments{ \item{search}{A named \code{list} containing the search terms. The names of the elements should refer to @@ -25,7 +31,7 @@ obtain a list of available field names. Each element in that list should contain another list with at least one element named 'terms'. This should contain a \code{vector} of \code{character} strings with search terms. Optionally, a second element -named 'method' can be provided which should be set to either '\code{contain}' (default, when missing) or +named 'method' can be provided which should be set to either '\code{contains}' (default, when missing) or '\code{exact}'. In the first case the query will match any record in the indicated field that contains the search term. In case of '\code{exact}' it will only return exact matches. Note that searches are not case sensitive, but are picky with special (accented) characters. While building the local database @@ -58,9 +64,18 @@ similarly, multiple doses can also be linked to a single test result. By default the search results are grouped by test results. As a result not all doses or chemical carriers may be displayed in the output. Set the \code{group_by_results} parameter to \code{FALSE} in order to force SQLite -to output all data (all carriers and doses). But beware that test results may be duplicated in those cases.} +to output all data (e.g., all carriers). But beware that test results may be duplicated in those cases.} + +\item{compute}{The ECOTOXr package tries to construct database queries as lazy as possible. Meaning that R +moves as much of the heavy lifting as possible to the database. When your search becomes complicated (e.g., when +including many output fields), you may run into trouble and hit the SQL parser limits. In those cases you can set +this parameter to \code{TRUE}. Database queries are then computed in the process of joining tables. This is generally +slower. Alternatively, you could try to include less output fields in order to simplify the query.} + +\item{as_data_frame}{\code{logical} value indicating whether the result should be converted into a \code{data.frame} +(default is \code{TRUE}). When set to \code{FALSE} the data will be returned as a \code{\link[dplyr:tibble]{tbl_df}}.} -\item{...}{Arguments passed to \code{\link{dbConnectEcotox}}. You can use this when the database +\item{...}{Arguments passed to \code{\link{dbConnectEcotox}} and other functions. You can use this when the database is not located at the default path (\code{\link{get_ecotox_path}()}).} } \value{ @@ -116,7 +131,7 @@ if (check_ecotox_availability()) { method = "exact" ) ) - ## numbers in result each represent a unique test id from the database + ## rows in result each represent a unique test id from the database result <- search_ecotox(search) query <- search_query_ecotox(search) cat(query) diff --git a/tests/testthat/test_cas.r b/tests/testthat/test_cas.r new file mode 100644 index 0000000..42cac05 --- /dev/null +++ b/tests/testthat/test_cas.r @@ -0,0 +1,56 @@ +test_that("Function cas works and generates the correct length", { + expect_length(cas(10), 10) +}) + +test_that("Converting either a numerical or a character to cas, results in the same object", { + expect_identical(as.cas(71432L), as.cas("71432")) +}) + +test_that("Hyphenation can be omitted in CAS", { + expect_identical(as.cas("71-43-2"), as.cas("71432")) +}) + +test_that("Concatenating cas objects works correctly", { + expect_equal(length(c(as.cas(71432L), "71432")), 2) +}) + +test_that("is.cas doesn't throw errors", { + expect_true(is.cas(c(as.cas(71432L), "71432"))) +}) + +test_that("EXtract and replace methods don't throw errors for cas objects", { + expect_true({ + cas_data <- cas(100) + cas_data[4:10] + cas_data[[50]] + cas_data[3:4] <- as.cas(rep("71432", 2)) + cas_data[[50]] <- as.cas(11) + TRUE + }) +}) + +test_that("as.data.frame.cas doesn't throw errors", { + expect_true({ + "data.frame" %in% class(as.data.frame(c(cas(10), as.cas("71432")))) + }) +}) + +test_that("as.integer.cas returns an integer", { + expect_type(as.integer(as.cas("71432")), "integer") +}) + +test_that("as.double.cas returns a double", { + expect_type(as.double(as.cas("71432")), "double") +}) + +test_that("as.character.cas returns a character", { + expect_type(as.character(as.cas("71432")), "character") +}) + +test_that("as.list.cas returns a list", { + expect_type(as.list(as.cas(rep("71432", 4))), "list") +}) + +test_that("as.cas returns a cas", { + expect_s3_class(as.cas("71432"), "cas") +}) \ No newline at end of file diff --git a/tests/testthat/test_that.r b/tests/testthat/test_that.r index ddc5304..328b192 100644 --- a/tests/testthat/test_that.r +++ b/tests/testthat/test_that.r @@ -5,22 +5,28 @@ check_db <- function() { } simple_search1 <- if (check_ecotox_availability()) { - suppressWarnings(search_ecotox( + suppressMessages(suppressWarnings(search_ecotox( list(latin_name = list(terms = "Daphnia magna"), chemical_name = list(terms = "benzene")), - c(list_ecotox_fields(), "results.result_id", "results.test_id", "tests.reference_number"))) + c(list_ecotox_fields(), "results.result_id", "results.test_id", "tests.reference_number"), + compute = TRUE))) } else NULL simple_search2 <- if (check_ecotox_availability()) { - suppressWarnings({search_ecotox(list(test_id = list(terms = "1")))}) + suppressMessages(suppressWarnings({search_ecotox(list(test_id = list(terms = "1", method = "exact")))})) } else NULL simple_search3 <- if (check_ecotox_availability()) { - suppressWarnings({search_ecotox(list(latin_name = list(terms = "perdix perdix"), test_cas = list(terms="1336363")), - c(list_ecotox_fields(), "results.result_id", "results.test_id", "tests.reference_number"))}) + suppressMessages(suppressWarnings({search_ecotox(list(latin_name = list(terms = "perdix perdix"), + test_cas = list(terms = "1336363")), + c(list_ecotox_fields(), "results.result_id", "results.test_id", "tests.reference_number"))})) +} else NULL + +search_q <- if (check_ecotox_availability()) { + suppressMessages(suppressWarnings({search_query_ecotox(list(test_id = list(terms = "1", method = "exact")))})) } else NULL throws_errors <- function(expression) { - result <- F + result <- FALSE tryCatch(expression, error = function(e) {result <<- T}, warning = function(w) {invisible(NULL)}) result } @@ -142,12 +148,11 @@ test_that("A simple search results in expected table", { expect_true({ ## Compare result with anticipated ids: all( - simple_search1$test_id %in% - c("1020021", "1020022", "1020023", "1022155", "1031085", "1031086", "1031087", "1031088", "1031196", "1031197", - "1064409", "1064410", "1064411", "1072942", "1072943", "1072944", "1083684", "1083685", "1083686", "1098939", - "1098940", "1098941", "1098942", "1098943", "1098944", "1098945", "1098946", "1098947", "1098948", "1098949", - "1098950", "1125798", "1136665", "1136666", "1142641", "1152541", "1185661", "1185662", "1185663", "1187783", - "1189253", "1237724", "2113979", "2114101", "2194929") + c("1020021", "1020022", "1020023", "1022155", "1031085", "1031086", "1031087", "1031088", "1031196", "1031197", + "1064409", "1072942", "1072943", "1072944", "1083684", "1083685", "1083686", "1098939", "1098940", "1098941", + "1098942", "1098943", "1098944", "1098945", "1098946", "1098947", "1098948", "1098949", "1098950", "1125798", + "1136665", "1136666", "1142641", "1152541", "1185661", "1185662", "1185663", "1187783", "1189253", "1237724", + "2113979", "2114101", "2194929") %in% simple_search1$test_id ) }) }) @@ -230,8 +235,7 @@ test_that("Default field names are fewer than all field names", { test_that("A simple search query returns a single element of type character", { check_db() expect_true({ - search <- search_query_ecotox(list(test_id = list(terms = "1"))) - length(search == 1) && typeof(search) == "character" + length(search_q) == 1 && typeof(search_q) == "character" }) }) @@ -291,3 +295,4 @@ test_that("When multiple doses are linked to a result, no duplicates are returne test_that("get_ecotox_info doesn't throw an error.", { expect_false({ throws_errors(get_ecotox_info()) }) }) +