diff --git a/.github/workflows/R-CMD-check-latest.yaml b/.github/workflows/R-CMD-check-latest.yaml index df7c1649..081d0802 100644 --- a/.github/workflows/R-CMD-check-latest.yaml +++ b/.github/workflows/R-CMD-check-latest.yaml @@ -23,10 +23,8 @@ jobs: fail-fast: false matrix: config: - - {os: ubuntu-20.04, r: 'release'} - - {os: ubuntu-20.04, r: 'devel'} - - {os: ubuntu-18.04, r: 'release'} - - {os: ubuntu-18.04, r: 'devel'} + - {os: ubuntu-22.04, r: 'release'} + - {os: ubuntu-22.04, r: 'devel'} - {os: windows-latest, r: 'release'} - {os: macOS-latest, r: 'release'} @@ -35,61 +33,29 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: r-lib/actions/setup-r@v1 - with: - r-version: ${{ matrix.config.r }} + - uses: r-lib/actions/setup-pandoc@v2 - - uses: r-lib/actions/setup-pandoc@v1 - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Restore (or define new) R package cache - uses: actions/cache@v2 + - uses: r-lib/actions/setup-r@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ matrix.config.os }}-${{ secrets.CACHE_VERSION }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: false - - name: Install system dependencies (Linux) + - name: Add ubuntugis-unstable PPA (Linux) if: runner.os == 'Linux' run: | - sudo add-apt-repository -y ppa:ubuntugis/ubuntugis-unstable - sudo apt-get update - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'release <- system("lsb_release -rs", intern = TRUE); writeLines(remotes::system_requirements("ubuntu", release))') + # Add ubuntugis-unstable PPA + sudo mkdir -p /root/.gnupg + sudo chmod 700 /root/.gnupg + sudo gpg --no-default-keyring --keyring /etc/apt/keyrings/ubuntugis-unstable-archive-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 6B827C12C2D425E227EDCA75089EBE08314DF160 + sudo sh -c 'echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/ubuntugis-unstable-archive-keyring.gpg] http://ppa.launchpad.net/ubuntugis/ubuntugis-unstable/ubuntu `lsb_release -c -s` main" > /etc/apt/sources.list.d/ubuntugis-unstable.list' - - name: Install system dependencies (macOS) - if: runner.os == 'macOS' - run: | - brew install pkg-config - brew install udunits - brew install gdal - - - name: Install package dependencies - run: | - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: | - options(rmarkdown.html_vignette.check_title = FALSE) - rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main + - uses: r-lib/actions/setup-r-dependencies@v2 with: - name: ${{ matrix.config.os }}-r${{ matrix.config.r }}-results - path: check + cache-version: ${{ secrets.CACHE_VERSION }} + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 621d707f..6ad57a29 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -17,8 +17,7 @@ jobs: fail-fast: false matrix: config: - - {os: ubuntu-20.04, r: 'release'} - - {os: ubuntu-18.04, r: 'release'} + - {os: ubuntu-22.04, r: 'release'} - {os: windows-latest, r: 'release'} - {os: macOS-latest, r: 'release'} @@ -28,7 +27,7 @@ jobs: R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: r-lib/actions/setup-pandoc@v2 diff --git a/.github/workflows/site-deploy.yaml b/.github/workflows/site-deploy.yaml index 03d53dbf..8a749c99 100644 --- a/.github/workflows/site-deploy.yaml +++ b/.github/workflows/site-deploy.yaml @@ -8,11 +8,11 @@ name: site-deploy jobs: site-deploy: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: r-lib/actions/setup-pandoc@v2 @@ -23,14 +23,9 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: cache-version: ${{ secrets.CACHE_VERSION }} - extra-packages: local::. + extra-packages: local::., pkgdown@1.6.1 needs: website - - name: Install pkgdown version - run: | - remotes::install_version("pkgdown", "1.6.1") - shell: Rscript {0} - - name: Deploy package run: | git config --local user.email "actions@github.com" diff --git a/.github/workflows/site-devel.yaml b/.github/workflows/site-devel.yaml index a1c13941..8df634a1 100644 --- a/.github/workflows/site-devel.yaml +++ b/.github/workflows/site-devel.yaml @@ -8,11 +8,11 @@ name: site-devel jobs: site-devel: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: r-lib/actions/setup-pandoc@v2 @@ -23,20 +23,15 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: cache-version: ${{ secrets.CACHE_VERSION }} - extra-packages: local::. + extra-packages: local::., pkgdown@1.6.1 needs: website - - name: Install pkgdown version - run: | - remotes::install_version("pkgdown", "1.6.1") - shell: Rscript {0} - - name: Build site run: | Rscript -e 'options(rmarkdown.html_vignette.check_title = FALSE); pkgdown::build_site()' - name: Upload pkgdown-site as artifact - uses: actions/upload-artifact@main + uses: actions/upload-artifact@v3 with: name: pkgdown-site path: docs diff --git a/.zenodo.json b/.zenodo.json index 26ad7a26..19b6fd20 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -4,7 +4,7 @@ "license": "GPL-3.0", "upload_type": "software", "access_right": "open", - "version": "0.8.0", + "version": "0.9.0", "creators": [ { "name": "Vanderhaeghe, Floris", diff --git a/DESCRIPTION b/DESCRIPTION index 7e30d9c6..f14853a5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: n2khab Title: Providing Preprocessed Reference Data for Flemish Natura 2000 Habitat Analyses -Version: 0.8.0 +Version: 0.9.0 Authors@R: c( person("Floris", "Vanderhaeghe", email = "floris.vanderhaeghe@inbo.be", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-6378-6229")), person("Toon", "Westra", email = "toon.westra@inbo.be", role = c("aut"), comment = c(ORCID = "0000-0003-2478-9459")), @@ -14,6 +14,7 @@ Authors@R: c( person("Luc", "Denys", email = "luc.denys@inbo.be", role = c("ctb"), comment = c(ORCID = "0000-0002-1841-6579")), person("An", "Leyssen", email = "an.leyssen@inbo.be", role = c("ctb"), comment = c(ORCID = "0000-0003-3537-286X")), person("Patrik", "Oosterlynck", email = "patrik.oosterlynck@inbo.be", role = c("ctb"), comment = c(ORCID = "0000-0002-5712-0770")), + person("Jeroen", "Vanden Borre", email = "jeroen.vandenborre@inbo.be", role = c("ctb"), comment = c(ORCID = "0000-0002-0153-7243")), person("Nathalie", "Cools", email = "nathalie.cools@inbo.be", role = c("ctb"), comment = c(ORCID = "0000-0002-7059-2318")), person("Bruno", "De Vos", email = "bruno.devos@inbo.be", role = c("ctb"), comment = c(ORCID = "0000-0001-9523-3453")), person("Suzanna", "Lettens", email = "suzanna.lettens@inbo.be", role = c("ctb"), comment = c(ORCID = "0000-0001-5032-495X")), @@ -30,38 +31,38 @@ Depends: R (>= 3.5.0) Imports: assertthat, + curl, dplyr, forcats, git2rdata (>= 0.4.0), magrittr, plyr, purrr, + remotes, rlang, rprojroot, sf, stringr, - tibble, tidyr (>= 1.0.0), - withr Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 Suggests: bib2df, - curl, digest, googledrive, jsonlite, knitr, mapview, openssl, - parallel, - raster (>= 3.3-16), + raster (>= 3.6-3), readxl, - remotes, rmarkdown, + testthat (>= 3.0.0), tidyverse, tools, units, - utils + utils, + withr VignetteBuilder: knitr +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 69470902..bd954866 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -36,6 +36,7 @@ importFrom(assertthat,assert_that) importFrom(assertthat,is.flag) importFrom(assertthat,is.string) importFrom(assertthat,noNA) +importFrom(curl,nslookup) importFrom(dplyr,"%>%") importFrom(dplyr,across) importFrom(dplyr,anti_join) @@ -85,6 +86,8 @@ importFrom(sf,"st_crs<-") importFrom(sf,read_sf) importFrom(sf,st_centroid) importFrom(sf,st_drop_geometry) +importFrom(sf,st_is_valid) +importFrom(sf,st_make_valid) importFrom(sf,st_transform) importFrom(stats,setNames) importFrom(stringr,fixed) @@ -102,5 +105,5 @@ importFrom(tidyr,gather) importFrom(tidyr,nest) importFrom(tidyr,spread) importFrom(tidyr,unnest) +importFrom(utils,packageDescription) importFrom(utils,packageVersion) -importFrom(withr,with_options) diff --git a/NEWS.md b/NEWS.md index b56c8a79..f15b1f15 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,18 @@ +# n2khab 0.9.0 (2023-11-22) + +- `download_zenodo()` has been improved (#169): + - fixes in order to use the renewed Zenodo website; + - speedup the initiation of a multi-file download; + - set concurrent (parallel) downloading as default for multi-file records; + - add unit tests to regularly check that the function still works. +- `read_watersurfaces()` gained extra capabilities (#168): + - data source version `watersurfaces_v1.2` is now supported; + - a new `fix_geom` argument allows to fix invalid or corrupt geometries on the fly. + This behaviour is turned off by default (see #60). +- Package startup messages have been updated, including a check whether the latest release is being used (#170, [502c3a2](https://github.com/inbo/n2khab/commit/502c3a2)). +- Internally, an update has been applied ([9691ec2](https://github.com/inbo/n2khab/commit/9691ec2)) to googlesheets authentication, used in updating the built-in textual data sources. +These data sources have not changed since previous release. + # n2khab 0.8.0 (2022-11-18) - Reference list `namelist` has been updated with improved type names (#163; thanks @jeroenvdborre). diff --git a/R/GRTSmh.R b/R/GRTSmh.R index ab356d32..002e394a 100644 --- a/R/GRTSmh.R +++ b/R/GRTSmh.R @@ -44,28 +44,30 @@ #' #' @export convert_dec_to_base4frac <- - function(x) { - - sapply(x, - - function(x) { - ifelse(is.na(x), NA, - as.double( - ifelse(x > 0,{ - d <- floor(log(x, 4) + 1) - paste(c("0", "1", "2", "3")[ - as.integer(abs(diff(x %% 4 ^ seq(d, 0))) %/% - 4 ^ seq(d - 1, 0) + 1)], - collapse = "") - }, - '0' - )) / 10 ^ 13 - ) - } - - ) - - } + function(x) { + sapply( + x, + function(x) { + ifelse(is.na(x), NA, + as.double( + ifelse(x > 0, + { + d <- floor(log(x, 4) + 1) + paste( + c("0", "1", "2", "3")[ + as.integer(abs(diff(x %% 4^seq(d, 0))) %/% + 4^seq(d - 1, 0) + 1) + ], + collapse = "" + ) + }, + "0" + ) + ) / 10^13 + ) + } + ) + } @@ -147,48 +149,50 @@ convert_dec_to_base4frac <- #' # vector, level 5: #' convert_base4frac_to_dec(c(NA, 0.1010101010101), level = 5) #' # same vector, all sensible levels computed: -#' sapply(0:12, function(i) convert_base4frac_to_dec(c(NA, 0.1010101010101), -#' level = i) -#' ) +#' sapply(0:12, function(i) { +#' convert_base4frac_to_dec(c(NA, 0.1010101010101), +#' level = i +#' ) +#' }) #' options(oldoption) #' #' @export #' @importFrom dplyr %>% -#' @importFrom withr with_options #' @importFrom stringr str_sub str_pad str_split convert_base4frac_to_dec <- - function(x, level) { - - with_options( - c(scipen = 999, - digits = 15), { - - multipliers <- as.matrix(4 ^ ((13 - level - 1):0)) - - sapply(x, - - function(x, level2 = level) { - ifelse(is.na(x), NA, { - a <- x * 10 ^ level2 - a <- round(a - floor(a), 13 - level2) - a <- a %>% - as.character %>% - str_sub(start = 3) %>% - str_pad(width = 13 - level2, - side = "right", - pad = "0") %>% - str_split("", simplify = TRUE) %>% - as.numeric - t(a) %*% multipliers - } - ) - } - + function(x, level) { + require_pkgs("withr") + withr::with_options( + c( + scipen = 999, + digits = 15 + ), + { + multipliers <- as.matrix(4^((13 - level - 1):0)) + + sapply( + x, + function(x, level2 = level) { + ifelse(is.na(x), NA, { + a <- x * 10^level2 + a <- round(a - floor(a), 13 - level2) + a <- a %>% + as.character() %>% + str_sub(start = 3) %>% + str_pad( + width = 13 - level2, + side = "right", + pad = "0" + ) %>% + str_split("", simplify = TRUE) %>% + as.numeric() + t(a) %*% multipliers + }) + } ) - - }) - - } + } + ) + } @@ -311,30 +315,35 @@ convert_base4frac_to_dec <- #' @export #' @importFrom stringr str_c read_GRTSmh <- - function(file = file.path(fileman_up("n2khab_data"), - c("10_raw/GRTSmaster_habitats/GRTSmaster_habitats.tif", - "20_processed/GRTSmh_brick/GRTSmh_brick.tif")), - brick = FALSE) { - - require_pkgs("raster") - - if (brick) { - if (missing(file)) { - b <- raster::brick(file[2])} else { - b <- raster::brick(file) - } - names(b) <- str_c("level", 0:(raster::nlayers(b) - 1)) - result <- b - } else { - if (missing(file)) { - r <- raster::raster(file[1])} else { - r <- raster::raster(file) - } - result <- r - } - raster::crs(result) <- "EPSG:31370" - return(result) + function(file = file.path( + fileman_up("n2khab_data"), + c( + "10_raw/GRTSmaster_habitats/GRTSmaster_habitats.tif", + "20_processed/GRTSmh_brick/GRTSmh_brick.tif" + ) + ), + brick = FALSE) { + require_pkgs("raster") + + if (brick) { + if (missing(file)) { + b <- raster::brick(file[2]) + } else { + b <- raster::brick(file) + } + names(b) <- str_c("level", 0:(raster::nlayers(b) - 1)) + result <- b + } else { + if (missing(file)) { + r <- raster::raster(file[1]) + } else { + r <- raster::raster(file) + } + result <- r } + raster::crs(result) <- "EPSG:31370" + return(result) + } @@ -423,15 +432,16 @@ read_GRTSmh <- #' #' @export read_GRTSmh_base4frac <- - function(file = file.path(fileman_up("n2khab_data"), - "20_processed/GRTSmh_base4frac/GRTSmh_base4frac.tif")) { + function(file = file.path( + fileman_up("n2khab_data"), + "20_processed/GRTSmh_base4frac/GRTSmh_base4frac.tif" + )) { + require_pkgs("raster") - require_pkgs("raster") - - r <- raster::raster(file) - raster::crs(r) <- "EPSG:31370" - return(r) - } + r <- raster::raster(file) + raster::crs(r) <- "EPSG:31370" + return(r) + } @@ -574,56 +584,39 @@ read_GRTSmh_base4frac <- #' read_sf #' st_crs<- read_GRTSmh_diffres <- - function(dir = file.path(fileman_up("n2khab_data"), "20_processed/GRTSmh_diffres"), - level, - polygon = FALSE) { - - if (!(level %in% 1:9 & level %% 1 == 0)) { - stop("level must be an integer in the range 1 to 9.") - } - - if (polygon) { - - if (!(level %in% 4:9)) { - stop("When polygon = TRUE, level must be an integer in the range 4 to 9.") - } - - p <- read_sf(file.path(dir, - "GRTSmh_diffres.gpkg"), - layer = str_c("GRTSmh_polygonized_level", level)) - suppressWarnings(st_crs(p) <- 31370) - p - - } else { - - require_pkgs("raster") - - r <- raster::raster(file.path(dir, - str_c("GRTSmh_diffres.", - level, ".tif"))) - names(r) <- str_c("level", level) - raster::crs(r) <- "EPSG:31370" - r - - } + function(dir = file.path(fileman_up("n2khab_data"), "20_processed/GRTSmh_diffres"), + level, + polygon = FALSE) { + if (!(level %in% 1:9 & level %% 1 == 0)) { + stop("level must be an integer in the range 1 to 9.") } - - - - - - - - - - - - - - - - - - - + if (polygon) { + if (!(level %in% 4:9)) { + stop("When polygon = TRUE, level must be an integer in the range 4 to 9.") + } + + p <- read_sf( + file.path( + dir, + "GRTSmh_diffres.gpkg" + ), + layer = str_c("GRTSmh_polygonized_level", level) + ) + suppressWarnings(st_crs(p) <- 31370) + p + } else { + require_pkgs("raster") + + r <- raster::raster(file.path( + dir, + str_c( + "GRTSmh_diffres.", + level, ".tif" + ) + )) + names(r) <- str_c("level", level) + raster::crs(r) <- "EPSG:31370" + r + } + } diff --git a/R/datawrangling.R b/R/datawrangling.R index cad19a27..c46826ec 100644 --- a/R/datawrangling.R +++ b/R/datawrangling.R @@ -73,30 +73,30 @@ #' @examples #' library(dplyr) #' x <- -#' read_scheme_types() %>% -#' filter(scheme == "GW_05.1_terr") +#' read_scheme_types() %>% +#' filter(scheme == "GW_05.1_terr") #' expand_types(x) #' expand_types(x, strict = FALSE) #' #' x <- -#' read_scheme_types() %>% -#' filter(scheme == "GW_05.1_terr") %>% -#' group_by(typegroup) +#' read_scheme_types() %>% +#' filter(scheme == "GW_05.1_terr") %>% +#' group_by(typegroup) #' expand_types(x) #' expand_types(x, use_grouping = FALSE) # equals above example #' #' x <- -#' tribble( -#' ~mycode, ~obs, -#' "2130", 5, -#' "2190", 45, -#' "2330_bu", 8, -#' "2330_dw", 8, -#' "5130_hei", 7, -#' "6410_mo", 78, -#' "6410_ve", 4, -#' "91E0_vn", 10 -#' ) +#' tribble( +#' ~mycode, ~obs, +#' "2130", 5, +#' "2190", 45, +#' "2330_bu", 8, +#' "2330_dw", 8, +#' "5130_hei", 7, +#' "6410_mo", 78, +#' "6410_ve", 4, +#' "91E0_vn", 10 +#' ) #' expand_types(x, type_var = "mycode") #' expand_types(x, type_var = "mycode", strict = FALSE) #' @@ -123,37 +123,34 @@ expand_types <- function(x, type_var = "type", use_grouping = TRUE, strict = TRUE) { - - assert_that(inherits(x, "data.frame")) - assert_that(is.string(type_var)) - assert_that(type_var %in% colnames(x), - msg = "type_var must be a variable name in x.") - assert_that(is.flag(use_grouping), noNA(use_grouping)) - assert_that(is.flag(strict), noNA(strict)) - - if (!use_grouping) { - - expand_types_plain(x = x, - type_var = type_var, - strict = strict) - - } else { - + assert_that(inherits(x, "data.frame")) + assert_that(is.string(type_var)) + assert_that(type_var %in% colnames(x), + msg = "type_var must be a variable name in x." + ) + assert_that(is.flag(use_grouping), noNA(use_grouping)) + assert_that(is.flag(strict), noNA(strict)) + + if (!use_grouping) { + expand_types_plain( + x = x, + type_var = type_var, + strict = strict + ) + } else { x %>% - nest(data = -!!(group_vars(x))) %>% - ungroup %>% - mutate(newdata = map(.data$data, - expand_types_plain, - type_var = type_var, - strict = strict) - ) %>% - select(-.data$data) %>% - unnest(cols = .data$newdata) %>% - group_by_at(x %>% group_vars()) %>% - select(colnames(x)) - - } - + nest(data = -!!(group_vars(x))) %>% + ungroup() %>% + mutate(newdata = map(.data$data, + expand_types_plain, + type_var = type_var, + strict = strict + )) %>% + select(-.data$data) %>% + unnest(cols = .data$newdata) %>% + group_by_at(x %>% group_vars()) %>% + select(colnames(x)) + } } @@ -189,85 +186,96 @@ expand_types <- function(x, #' @importFrom rlang .data #' @keywords internal expand_types_plain <- function(x, - type_var = "type", - strict = TRUE) { - types <- - read_types() %>% - select(1:3) - - subtypes <- - types %>% - filter(.data$typelevel == "subtype") %>% - select(1, 3) - - orig_types <- - x[, type_var] %>% - rename(orig_abcd = type_var) - - if (!all(unique(orig_types$orig_abcd) %in% types$type)) { - warning("The dataframe contains type codes which are not standard.") - } - - # main types to add: - suppressWarnings( + type_var = "type", + strict = TRUE) { + types <- + read_types() %>% + select(1:3) + + subtypes <- + types %>% + filter(.data$typelevel == "subtype") %>% + select(1, 3) + + orig_types <- + x[, type_var] %>% + rename(orig_abcd = type_var) + + if (!all(unique(orig_types$orig_abcd) %in% types$type)) { + warning("The dataframe contains type codes which are not standard.") + } + + # main types to add: + suppressWarnings( join_main_types <- - subtypes %>% - filter(.data$main_type == "2330" | - .data$type %in% c("6230_ha", "6230_hmo", "6230_hnk", - "5130_hei", - "91E0_va", "91E0_vm", "91E0_vn")) %>% - left_join(orig_types %>% - mutate(present = 1), - by = c("type" = "orig_abcd")) %>% - group_by(.data$main_type) %>% - summarise(add = if (strict) all(!is.na(.data$present)) else { - any(!is.na(.data$present)) - } - ) %>% - filter(.data$add) %>% - # only adding codes absent from original dataframe: - anti_join(orig_types, by = c("main_type" = "orig_abcd")) %>% - pull(.data$main_type) - ) - - # expanding main types to their subtypes and adding the latter: - suppressWarnings( + subtypes %>% + filter(.data$main_type == "2330" | + .data$type %in% c( + "6230_ha", "6230_hmo", "6230_hnk", + "5130_hei", + "91E0_va", "91E0_vm", "91E0_vn" + )) %>% + left_join( + orig_types %>% + mutate(present = 1), + by = c("type" = "orig_abcd") + ) %>% + group_by(.data$main_type) %>% + summarise(add = if (strict) { + all(!is.na(.data$present)) + } else { + any(!is.na(.data$present)) + }) %>% + filter(.data$add) %>% + # only adding codes absent from original dataframe: + anti_join(orig_types, by = c("main_type" = "orig_abcd")) %>% + pull(.data$main_type) + ) + + # expanding main types to their subtypes and adding the latter: + suppressWarnings( x_expanded <- + x %>% + rename(orig_abcd = type_var) %>% + inner_join(subtypes %>% rename(type_abcd = .data$type), + by = c("orig_abcd" = "main_type") + ) %>% + mutate(orig_abcd = .data$type_abcd) %>% + select(-.data$type_abcd) %>% + anti_join( x %>% - rename(orig_abcd = type_var) %>% - inner_join(subtypes %>% rename(type_abcd = .data$type), - by = c("orig_abcd" = "main_type")) %>% - mutate(orig_abcd = .data$type_abcd) %>% - select(-.data$type_abcd) %>% - anti_join(x %>% - rename(orig_abcd = type_var), - by = "orig_abcd") %>% - set_colnames(gsub("orig_abcd", type_var, colnames(.))) %>% - bind_rows(x, .) - ) - - # adding main_types: - suppressWarnings( + rename(orig_abcd = type_var), + by = "orig_abcd" + ) %>% + set_colnames(gsub("orig_abcd", type_var, colnames(.))) %>% + bind_rows(x, .) + ) + + # adding main_types: + suppressWarnings( x_expanded <- - x %>% - rename(orig_abcd = type_var) %>% - inner_join(subtypes %>% - rename(main_type_abcd = .data$main_type), - by = c("orig_abcd" = "type")) %>% - filter(.data$main_type_abcd %in% join_main_types) %>% - mutate(orig_abcd = if (is.factor(.data$orig_abcd)) { - factor(.data$main_type_abcd, - levels = levels(.data$orig_abcd)) - } else .data$main_type_abcd - ) %>% - select(-.data$main_type_abcd) %>% - distinct %>% - set_colnames(gsub("orig_abcd", type_var, colnames(.))) %>% - bind_rows(x_expanded, .) - ) - - return(x_expanded) - + x %>% + rename(orig_abcd = type_var) %>% + inner_join( + subtypes %>% + rename(main_type_abcd = .data$main_type), + by = c("orig_abcd" = "type") + ) %>% + filter(.data$main_type_abcd %in% join_main_types) %>% + mutate(orig_abcd = if (is.factor(.data$orig_abcd)) { + factor(.data$main_type_abcd, + levels = levels(.data$orig_abcd) + ) + } else { + .data$main_type_abcd + }) %>% + select(-.data$main_type_abcd) %>% + distinct() %>% + set_colnames(gsub("orig_abcd", type_var, colnames(.))) %>% + bind_rows(x_expanded, .) + ) + + return(x_expanded) } @@ -311,44 +319,54 @@ convertdf_enc <- function(x, to = "UTF-8", sub = NA, colnames = FALSE) { - - assert_that(inherits(x, "data.frame")) - assert_that(is.string(to), - is.string(from), - is.string(sub) | is.na(sub)) - assert_that(is.flag(colnames), noNA(colnames)) - - - is_chfact <- function(vec) { - if (is.factor(vec)) { - is.character(levels(vec)) - } else FALSE + assert_that(inherits(x, "data.frame")) + assert_that( + is.string(to), + is.string(from), + is.string(sub) | is.na(sub) + ) + assert_that(is.flag(colnames), noNA(colnames)) + + + is_chfact <- function(vec) { + if (is.factor(vec)) { + is.character(levels(vec)) + } else { + FALSE } + } - conv_levels <- function(fact, from, to, sub) { - levels(fact) <- iconv(levels(fact), - from = from, - to = to, - sub = sub) - return(fact) + conv_levels <- function(fact, from, to, sub) { + levels(fact) <- iconv(levels(fact), + from = from, + to = to, + sub = sub + ) + return(fact) + } + + x %>% + mutate_if(is.character, + iconv, + from = from, + to = to, + sub = sub + ) %>% + mutate_if(is_chfact, + conv_levels, + from = from, + to = to, + sub = sub + ) %>% + { + if (colnames) { + `colnames<-`(., iconv(colnames(.), + from = from, + to = to, + sub = sub + )) + } else { + . + } } - - x %>% - mutate_if(is.character, - iconv, - from = from, - to = to, - sub = sub) %>% - mutate_if(is_chfact, - conv_levels, - from = from, - to = to, - sub = sub) %>% - {if (colnames) { - `colnames<-`(., iconv(colnames(.), - from = from, - to = to, - sub = sub)) - } else .} } - diff --git a/R/filemanagement.R b/R/filemanagement.R index 9e6e1b0f..ffe5db72 100644 --- a/R/filemanagement.R +++ b/R/filemanagement.R @@ -31,55 +31,55 @@ #' #' @examples #' \dontrun{ -#'fileman_folders() -#'datapath <- fileman_folders(root = "git") +#' fileman_folders() +#' datapath <- fileman_folders(root = "git") #' } #' fileman_folders <- function(root = c("rproj", "git"), path = NA) { - # directory setup - if (!is.na(path)) { - if (dir.exists(path)) { - datapath <- normalizePath(file.path(path, "n2khab_data")) - } else { - stop("The specified path does not exist.") - } + # directory setup + if (!is.na(path)) { + if (dir.exists(path)) { + datapath <- normalizePath(file.path(path, "n2khab_data")) } else { - root <- tolower(root) - root <- match.arg(root) - - if (root == "git") { - root <- find_root(is_git_root) - } + stop("The specified path does not exist.") + } + } else { + root <- tolower(root) + root <- match.arg(root) - if (root == "rproj") { - root <- find_root(is_rstudio_project) - } + if (root == "git") { + root <- find_root(is_git_root) + } - datapath <- file.path(root, "n2khab_data") + if (root == "rproj") { + root <- find_root(is_rstudio_project) } + datapath <- file.path(root, "n2khab_data") + } - # check for existence of the folder - if (!dir.exists(datapath)) { - # create a new directory - dir.create(file.path(datapath)) - message(paste0("Created ", datapath)) - } else { - message(paste0("The path to ", datapath, " already exists")) - } - # create subfolders - subfolders <- c("10_raw", "20_processed") - for (subfolder in subfolders) { - if (!dir.exists(file.path(datapath, subfolder))) { - dir.create(file.path(datapath, subfolder)) - message(paste0("Created subfolder ", subfolder)) - } else { - message(paste0("The subfolder ", subfolder, " already exists")) - } + # check for existence of the folder + if (!dir.exists(datapath)) { + # create a new directory + dir.create(file.path(datapath)) + message(paste0("Created ", datapath)) + } else { + message(paste0("The path to ", datapath, " already exists")) + } + + # create subfolders + subfolders <- c("10_raw", "20_processed") + for (subfolder in subfolders) { + if (!dir.exists(file.path(datapath, subfolder))) { + dir.create(file.path(datapath, subfolder)) + message(paste0("Created subfolder ", subfolder)) + } else { + message(paste0("The subfolder ", subfolder, " already exists")) } - datapath + } + datapath } @@ -98,11 +98,9 @@ fileman_folders <- function(root = c("rproj", "git"), path = NA) { #' @param path Path where the data must be downloaded. #' Defaults to the working directory. #' @param doi a doi pointer to the Zenodo archive starting with '10.5281/zenodo.'. See examples. -#' @param parallel Logical (\code{FALSE} by default). -#' If \code{TRUE}, will run a number of parallel processes, each downloading -#' another file. -#' This is useful when multiple large files are present in the Zenodo -#' record, which otherwise would be downloaded sequentially. +#' @param parallel Logical. +#' If \code{TRUE} (the default), files will be +#' downloaded concurrently for multi-file records. #' Of course, the operation is limited by bandwidth and traffic limitations. #' @param quiet Logical (\code{FALSE} by default). #' Do you want to suppress informative messages (not warnings)? @@ -135,120 +133,100 @@ fileman_folders <- function(root = c("rproj", "git"), path = NA) { #' } download_zenodo <- function(doi, path = ".", - parallel = FALSE, + parallel = TRUE, quiet = FALSE) { + assert_that(is.string(doi), is.string(path)) + assert_that(is.flag(parallel), noNA(parallel), is.flag(quiet), noNA(quiet)) + + require_pkgs(c("jsonlite", "curl", "tools")) + + # check for existence of the folder + stopifnot(dir.exists(path)) + + record <- str_remove(doi, fixed("10.5281/zenodo.")) + + # Retrieve file name by records call + base_url <- "https://zenodo.org/api/records/" + req <- curl::curl_fetch_memory(paste0(base_url, record)) + content <- jsonlite::fromJSON(rawToChar(req$content)) + + # Calculate total file size + totalsize <- sum(content$files$size) %>% + human_filesize() + + # extract individual file names and urls + file_urls <- content$files$links$self + filenames <- basename(content$files$key) + destfiles <- file.path(path, filenames) + + # extract check-sum(s) + file_md5 <- content$files$checksum + + # download files + if (!quiet) { + message( + "Will download ", + (nrfiles <- length(filenames)), + " file", + ifelse(nrfiles > 1, "s", ""), + " (total size: ", + totalsize, + ") from https://doi.org/", + doi, + " (", + content$metadata$title, + "; version: ", + ifelse(!is.null(content$metadata$version), + content$metadata$version, + content$metadata$relations$version[1, 1] + ), + ")\n" + ) + } - assert_that(is.string(doi), is.string(path)) - assert_that(is.flag(parallel), noNA(parallel), is.flag(quiet), noNA(quiet)) - - require_pkgs(c("jsonlite", "curl", "tools")) - - # check for existence of the folder - stopifnot(dir.exists(path)) - - record <- str_remove(doi, fixed("10.5281/zenodo.")) - - # Retrieve file name by records call - base_url <- 'https://zenodo.org/api/records/' - req <- curl::curl_fetch_memory(paste0(base_url, record)) - content <- jsonlite::fromJSON(rawToChar(req$content)) - - # Calculate total file size - totalsize <- sum(content$files$size) %>% - human_filesize() - - # extract individual file names and urls - file_urls <- content$files$links$self - filenames <- str_match(file_urls, ".+/([^/]+)")[,2] - destfiles <- file.path(path, filenames) - - # extract check-sum(s) - file_md5 <- content$files$checksum - - # download files - if (!quiet) { - message("Will download ", - (nrfiles <- length(filenames)), - " file", - ifelse(nrfiles > 1, "s", ""), - " (total size: ", - totalsize, - ") from https://doi.org/", - doi, - " (", - content$metadata$title, - "; version: ", - ifelse(!is.null(content$metadata$version), - content$metadata$version, - content$metadata$relations$version[1, 1] - ), - ")\n" + if (length(file_urls) > 1 && parallel) { + curl::multi_download( + urls = file_urls, + destfiles = destfiles, + progress = !quiet + ) + } else { + mapply(curl::curl_download, + file_urls, + destfiles, + MoreArgs = list(quiet = quiet) + ) + } + + # check each of the files + + if (!quiet) message("\nVerifying file integrity...\n") + + for (i in seq_along(file_urls)) { + filename <- filenames[i] + destfile <- destfiles[i] + md5 <- unname(tools::md5sum(destfile)) + zenodo_md5 <- str_split(file_md5[i], ":")[[1]][2] + if (identical(md5, zenodo_md5)) { + if (!quiet) { + message( + filename, + " was downloaded and its integrity verified (md5sum: ", + md5, + ")" ) - } - - if (parallel) { - - require_pkgs("parallel") - - nr_nodes <- min(10, length(file_urls)) - - if (!quiet) message("Initializing parallel download on ", - nr_nodes, - " R session nodes...\n") - - clus <- parallel::makeCluster(nr_nodes) - - if (!quiet) { - message("Starting parallel downloads. ", - "This may take a while (and I can't show you the overall progress).\n", - "Be patient...\n") - } - - parallel::clusterMap(clus, - function(src, dest) { - curl::curl_download(url = src, - destfile = dest, - quiet = quiet) - }, - file_urls, - destfiles) - - parallel::stopCluster(clus) - - if (!quiet) message("Ended parallel downloads.") - + } } else { - - mapply(curl::curl_download, - file_urls, - destfiles, - MoreArgs = list(quiet = quiet)) - - } - - # check each of the files - - if (!quiet) message("\nVerifying file integrity...\n") - - for (i in seq_along(file_urls)) { - filename <- filenames[i] - destfile <- destfiles[i] - md5 <- unname(tools::md5sum(destfile)) - zenodo_md5 <- str_split(file_md5[i], ":")[[1]][2] - if (all.equal(md5, zenodo_md5)) { - if (!quiet) message(filename, - " was downloaded and its integrity verified (md5sum: ", - md5, - ")") - } else { - warning("Incorrect download! md5sum ", - md5, - " for file", - filename, - " does not match the Zenodo archived md5sum ", - zenodo_md5) - } + warning( + "Incorrect download! md5sum ", + md5, + " for file", + filename, + " does not match the Zenodo archived md5sum ", + zenodo_md5 + ) } + } } @@ -275,27 +253,28 @@ download_zenodo <- function(doi, #' @importFrom dplyr #' %>% human_filesize <- function(x) { - assert_that(is.numeric(x)) - assert_that(all(x %% 1 == 0 & x >= 0)) - magnitude <- - log(x, base = 1024) %>% - floor %>% - pmin(8) - unit <- factor(magnitude, - levels = 0:8, - labels = c( - "B", - "KiB", - "MiB", - "GiB", - "TiB", - "PiB", - "EiB", - "ZiB", - "YiB") + assert_that(is.numeric(x)) + assert_that(all(x %% 1 == 0 & x >= 0)) + magnitude <- + log(x, base = 1024) %>% + floor() %>% + pmin(8) + unit <- factor(magnitude, + levels = 0:8, + labels = c( + "B", + "KiB", + "MiB", + "GiB", + "TiB", + "PiB", + "EiB", + "ZiB", + "YiB" ) - size <- (x / 1024^magnitude) %>% round(1) - return(paste(size, unit)) + ) + size <- (x / 1024^magnitude) %>% round(1) + return(paste(size, unit)) } @@ -343,30 +322,31 @@ human_filesize <- function(x) { fileman_up <- function(name, start = ".", levels = 10) { - - assert_that(is.string(name)) - assert_that(dir.exists(start), - msg = "The start directory does not exist.") - assert_that(levels %% 1 == 0 & levels >= 0, - msg = "levels must be a positive integer value.") - - path <- start - - for (i in 0:levels) { - ff <- list.files(path, - all.files = TRUE, - include.dirs = TRUE) - if (name %in% ff) break - path <- file.path(path, "..") - } - - if (name %in% ff) { - file.path(path, name) %>% - normalizePath() - } else { - stop(name, " was not found. Searched up to ", normalizePath(path)) - } - + assert_that(is.string(name)) + assert_that(dir.exists(start), + msg = "The start directory does not exist." + ) + assert_that(levels %% 1 == 0 & levels >= 0, + msg = "levels must be a positive integer value." + ) + + path <- start + + for (i in 0:levels) { + ff <- list.files(path, + all.files = TRUE, + include.dirs = TRUE + ) + if (name %in% ff) break + path <- file.path(path, "..") + } + + if (name %in% ff) { + file.path(path, name) %>% + normalizePath() + } else { + stop(name, " was not found. Searched up to ", normalizePath(path)) + } } @@ -439,24 +419,26 @@ fileman_up <- function(name, #' @export checksum <- function(files, hash_fun = c("xxh64", "md5", "sha256")) { - - assert_that_allfiles_exist(files) - hash_fun <- match.arg(hash_fun) - - if (str_detect(hash_fun, "^xxh")) { - require_pkgs("digest") - checksums <- map_chr(files, - ~digest::digest(., - algo = "xxhash64", - file = TRUE)) - } else { - require_pkgs("openssl") - fun <- eval(str2lang(paste0("openssl::", hash_fun))) - checksums <- map_chr(files, ~paste(fun(file(.)))) - } - - names(checksums) <- basename(files) - return(checksums) + assert_that_allfiles_exist(files) + hash_fun <- match.arg(hash_fun) + + if (str_detect(hash_fun, "^xxh")) { + require_pkgs("digest") + checksums <- map_chr( + files, + ~ digest::digest(., + algo = "xxhash64", + file = TRUE + ) + ) + } else { + require_pkgs("openssl") + fun <- eval(str2lang(paste0("openssl::", hash_fun))) + checksums <- map_chr(files, ~ paste(fun(file(.)))) + } + + names(checksums) <- basename(files) + return(checksums) } #' @rdname checksum @@ -476,16 +458,19 @@ sha256sum <- function(files) checksum(files, hash_fun = "sha256") #' assert_that #' @keywords internal assert_that_allfiles_exist <- function(x) { - exist <- file.exists(x) - assert_that(all(exist), - msg = paste0("The following path(s) do not exist:\n", - paste0(x[!exist], collapse = "\n"))) - isdir <- dir.exists(x) - assert_that(!any(isdir), - msg = paste0("Only files are accepted; ", - "the following path(s) are directories:\n", - paste0(x[isdir], collapse = "\n"))) + exist <- file.exists(x) + assert_that(all(exist), + msg = paste0( + "The following path(s) do not exist:\n", + paste0(x[!exist], collapse = "\n") + ) + ) + isdir <- dir.exists(x) + assert_that(!any(isdir), + msg = paste0( + "Only files are accepted; ", + "the following path(s) are directories:\n", + paste0(x[isdir], collapse = "\n") + ) + ) } - - - diff --git a/R/misc.R b/R/misc.R index a3f4520e..13a84ba7 100644 --- a/R/misc.R +++ b/R/misc.R @@ -18,20 +18,19 @@ #' assert_that #' @keywords internal require_pkgs <- function(pkgs) { - assert_that(is.character(pkgs)) - available <- map_lgl(pkgs, ~requireNamespace(., quietly = TRUE)) - if (!all(available)) { - multiple <- sum(!available) > 1 - stop(ifelse(multiple, "Multiple", "A"), - " package", - ifelse(multiple, "s", ""), - " needed for this function ", - ifelse(multiple, "are", "is"), - " missing.\nPlease install as follows: install.packages(", - deparse(pkgs[!available]), - ")", - call. = FALSE) - } + assert_that(is.character(pkgs)) + available <- map_lgl(pkgs, ~ requireNamespace(., quietly = TRUE)) + if (!all(available)) { + multiple <- sum(!available) > 1 + stop(ifelse(multiple, "Multiple", "A"), + " package", + ifelse(multiple, "s", ""), + " needed for this function ", + ifelse(multiple, "are", "is"), + " missing.\nPlease install as follows: install.packages(", + deparse(pkgs[!available]), + ")", + call. = FALSE + ) + } } - - diff --git a/R/n2khab.R b/R/n2khab.R index 1803dd11..d0937959 100644 --- a/R/n2khab.R +++ b/R/n2khab.R @@ -10,20 +10,44 @@ utils::globalVariables(c(".")) -#' @importFrom utils packageVersion +#' @importFrom utils packageVersion packageDescription +#' @importFrom curl nslookup .onAttach <- function(libname, pkgname) { - packageStartupMessage("Attaching n2khab version ", - packageVersion("n2khab"), ".\n") + packageStartupMessage( + "Attaching n2khab ", + packageVersion("n2khab"), + "." + ) + packageStartupMessage("Will use sf ", packageDescription("sf")$Version, ".") + if ( + length(find.package("raster", quiet = TRUE) > 0) && + packageVersion("raster") >= package_version("3.6-3") && + length(find.package("terra", quiet = TRUE) > 0) + ) { packageStartupMessage( - "When working with raster objects returned by n2khab, you can safely\n", - "mute proj4string degradation warnings:", - "\n\noptions(rgdal_show_exportToProj4_warnings = \"none\")\n\n", - "You must do this before using the n2khab functions ", - "depending on \nrgdal or raster, and before loading ", - "the latter (or sp).\n\n", - "Do note that those warnings are applicable: in the returned raster\n", - "objects, the proj4string is effectively degraded and should not ", - "be used.\n", - "See https://inbo.github.io/n2khab/#suppressing-rgdal-warnings-about-", - "proj4string-degradation\nfor more information.") + "Will use terra ", + packageDescription("terra")$Version, + " through raster." + ) + } + if (!is.null(nslookup("api.github.com", error = FALSE))) { + ref <- remotes::github_remote( + "inbo/n2khab", + ref = remotes::github_release() + )$ref + release <- package_version(gsub("\\p{L}*", "", ref, perl = TRUE)) + if (packageVersion("n2khab") < release) { + packageStartupMessage( + "\n", + rep("=", getOption("width")), + "\nIt is advised to upgrade n2khab to its current version ", + release, + ". Run:\n", + 'install.packages("n2khab", repos = c(inbo = "https://inbo.r-universe.dev", + CRAN = "https://cloud.r-project.org"))', + "\n", + rep("=", getOption("width")) + ) + } + } } diff --git a/R/read_admin_areas.R b/R/read_admin_areas.R index 6fd5ed4c..acca4b04 100644 --- a/R/read_admin_areas.R +++ b/R/read_admin_areas.R @@ -56,65 +56,85 @@ #' @importFrom rlang .data #' @export read_admin_areas <- - function(file = file.path(fileman_up("n2khab_data"), - c("10_raw/flanders", - "10_raw/provinces", - "10_raw/sac")), - dsn = c("flanders", "provinces", "sac")) { + function(file = file.path( + fileman_up("n2khab_data"), + c( + "10_raw/flanders", + "10_raw/provinces", + "10_raw/sac" + ) + ), + dsn = c("flanders", "provinces", "sac")) { + dsn <- match.arg(dsn) - dsn <- match.arg(dsn) - - if (missing(file)) { - file <- file[str_detect(file, dsn)][1] - } - - assert_that(file.exists(file)) - - suppressWarnings( - requested <- read_sf(file, - crs = 31370) - ) - - requested <- - switch(dsn, - - "flanders" = requested %>% - select(name = .data$NAAM) %>% - mutate(name = factor(.data$name)), - - "provinces" = requested %>% - select(name = .data$NAAM, - territory_id = .data$TERRID, - code_nis = .data$NISCODE, - code_nuts2 = .data$NUTS2) %>% - mutate(name = fct_reorder(.data$name, - .data$territory_id), - code_nis = fct_reorder(.data$code_nis, - .data$territory_id), - code_nuts2 = fct_reorder(.data$code_nuts2, - .data$territory_id), - territory_id = factor(.data$territory_id)), + if (missing(file)) { + file <- file[str_detect(file, dsn)][1] + } - "sac" = requested %>% - select(sac_code = .data$GEBCODE, - sac_name = .data$NAAM, - subsac_code = .data$DEELGEBIED, - polygon_id = .data$POLY_ID) %>% - arrange(.data$sac_code, - .data$subsac_code, - .data$polygon_id) %>% - mutate(id = 1:n(), - sac_code = fct_reorder(.data$sac_code, - .data$id), - sac_name = fct_reorder(.data$sac_name, - .data$id), - subsac_code = fct_reorder(.data$subsac_code, - .data$id) - ) %>% - select(-.data$id) + assert_that(file.exists(file)) - ) + suppressWarnings( + requested <- read_sf(file, + crs = 31370 + ) + ) - return(requested) + requested <- + switch(dsn, + "flanders" = requested %>% + select(name = .data$NAAM) %>% + mutate(name = factor(.data$name)), + "provinces" = requested %>% + select( + name = .data$NAAM, + territory_id = .data$TERRID, + code_nis = .data$NISCODE, + code_nuts2 = .data$NUTS2 + ) %>% + mutate( + name = fct_reorder( + .data$name, + .data$territory_id + ), + code_nis = fct_reorder( + .data$code_nis, + .data$territory_id + ), + code_nuts2 = fct_reorder( + .data$code_nuts2, + .data$territory_id + ), + territory_id = factor(.data$territory_id) + ), + "sac" = requested %>% + select( + sac_code = .data$GEBCODE, + sac_name = .data$NAAM, + subsac_code = .data$DEELGEBIED, + polygon_id = .data$POLY_ID + ) %>% + arrange( + .data$sac_code, + .data$subsac_code, + .data$polygon_id + ) %>% + mutate( + id = 1:n(), + sac_code = fct_reorder( + .data$sac_code, + .data$id + ), + sac_name = fct_reorder( + .data$sac_name, + .data$id + ), + subsac_code = fct_reorder( + .data$subsac_code, + .data$id + ) + ) %>% + select(-.data$id) + ) - } + return(requested) + } diff --git a/R/read_ecoregions.R b/R/read_ecoregions.R index 0a30372a..19337378 100644 --- a/R/read_ecoregions.R +++ b/R/read_ecoregions.R @@ -39,36 +39,42 @@ #' @importFrom rlang .data #' @export read_ecoregions <- - function(file = file.path(fileman_up("n2khab_data"), "10_raw/ecoregions")) { + function(file = file.path(fileman_up("n2khab_data"), "10_raw/ecoregions")) { + suppressWarnings( + ecoregions <- read_sf(file, + crs = 31370 + ) + ) - suppressWarnings( - ecoregions <- read_sf(file, - crs = 31370) - ) + ecoregions <- + ecoregions %>% + select( + polygon_code = .data$CODE, + polygon_id = .data$NR, + region_name = .data$REGIO, + district_name = .data$DISTRICT + ) %>% + arrange(.data$polygon_code, .data$polygon_id) - ecoregions <- - ecoregions %>% - select(polygon_code = .data$CODE, - polygon_id = .data$NR, - region_name = .data$REGIO, - district_name = .data$DISTRICT) %>% - arrange(.data$polygon_code, .data$polygon_id) + er_levels <- + ecoregions %>% + st_drop_geometry() %>% + select(-.data$district_name) - er_levels <- - ecoregions %>% - st_drop_geometry %>% - select(-.data$district_name) + ecoregions <- + ecoregions %>% + mutate( + polygon_code = factor(.data$polygon_code, + levels = er_levels$polygon_code + ), + polygon_id = factor(.data$polygon_id, + levels = er_levels$polygon_id + ), + region_name = factor(.data$region_name, + levels = unique(er_levels$region_name) + ), + district_name = factor(.data$district_name) + ) - ecoregions <- - ecoregions %>% - mutate(polygon_code = factor(.data$polygon_code, - levels = er_levels$polygon_code), - polygon_id = factor(.data$polygon_id, - levels = er_levels$polygon_id), - region_name = factor(.data$region_name, - levels = unique(er_levels$region_name)), - district_name = factor(.data$district_name)) - - return(ecoregions) - - } + return(ecoregions) + } diff --git a/R/read_habitatdata.R b/R/read_habitatdata.R index d48bdf4d..b9367b18 100644 --- a/R/read_habitatdata.R +++ b/R/read_habitatdata.R @@ -155,69 +155,81 @@ #' is.string #' read_habitatmap_stdized <- - function(file = file.path(fileman_up("n2khab_data"), - "20_processed/habitatmap_stdized/habitatmap_stdized.gpkg"), - version = c("habitatmap_stdized_2020_v1", - "habitatmap_stdized_2018_v2", - "habitatmap_stdized_2018_v1")){ - - version <- match.arg(version) - - habmap_polygons <- read_sf(file, - "habitatmap_polygons") - - habmap_polygons <- habmap_polygons %>% - mutate(polygon_id = factor(.data$polygon_id)) - - suppressWarnings(st_crs(habmap_polygons) <- 31370) - - if (version == "habitatmap_stdized_2018_v1") { - habmap_types <- suppressWarnings( - read_sf(file, - "habitatmap_patches") - ) - } else { - habmap_types <- suppressWarnings( - read_sf(file, - "habitatmap_types") - ) - } - - types <- suppressWarnings(read_types()) - - habmap_types <- habmap_types %>% - mutate( polygon_id = factor(.data$polygon_id, - levels = levels(habmap_polygons$polygon_id)), - certain = .data$certain == 1, - type = factor(.data$type, - levels = levels(types$type) - ) - ) - - if (grepl("2018", version)) { - habmap_types <- - habmap_types %>% - relocate(.data$polygon_id, - .data$type, - .data$certain) - } - - if (version == "habitatmap_stdized_2018_v1") { - - result <- list(habitatmap_polygons = habmap_polygons, - habitatmap_patches = habmap_types) - - } else { - - result <- list(habitatmap_polygons = habmap_polygons, - habitatmap_types = habmap_types) - - } + function(file = file.path( + fileman_up("n2khab_data"), + "20_processed/habitatmap_stdized/habitatmap_stdized.gpkg" + ), + version = c( + "habitatmap_stdized_2020_v1", + "habitatmap_stdized_2018_v2", + "habitatmap_stdized_2018_v1" + )) { + version <- match.arg(version) + + habmap_polygons <- read_sf( + file, + "habitatmap_polygons" + ) + + habmap_polygons <- habmap_polygons %>% + mutate(polygon_id = factor(.data$polygon_id)) + + suppressWarnings(st_crs(habmap_polygons) <- 31370) + + if (version == "habitatmap_stdized_2018_v1") { + habmap_types <- suppressWarnings( + read_sf( + file, + "habitatmap_patches" + ) + ) + } else { + habmap_types <- suppressWarnings( + read_sf( + file, + "habitatmap_types" + ) + ) + } - return(result) + types <- suppressWarnings(read_types()) + + habmap_types <- habmap_types %>% + mutate( + polygon_id = factor(.data$polygon_id, + levels = levels(habmap_polygons$polygon_id) + ), + certain = .data$certain == 1, + type = factor(.data$type, + levels = levels(types$type) + ) + ) + + if (grepl("2018", version)) { + habmap_types <- + habmap_types %>% + relocate( + .data$polygon_id, + .data$type, + .data$certain + ) + } + if (version == "habitatmap_stdized_2018_v1") { + result <- list( + habitatmap_polygons = habmap_polygons, + habitatmap_patches = habmap_types + ) + } else { + result <- list( + habitatmap_polygons = habmap_polygons, + habitatmap_types = habmap_types + ) } + return(result) + } + @@ -248,16 +260,16 @@ read_habitatmap_stdized <- #' \item \code{watersurfaces_hab_types}: a table in which every row corresponds with a combination of polygon and type. #' } #' -#'The polygons with 2190_a habitat (dune slack ponds) are generated by selecting all watersurface polygons that -#'overlap with dune habitat polygons (21xx) of the standardized habitat map. +#' The polygons with 2190_a habitat (dune slack ponds) are generated by selecting all watersurface polygons that +#' overlap with dune habitat polygons (21xx) of the standardized habitat map. #' -#'For each of the other considered habitat types (31xx and rbbah) we select the watersurface polygons that -#'overlap with the selected habitat type polygons of the standardized habitat map. We also select polygons of the -#'standardized habitat map that contain standing water types but do not overlap with any watersurface polygon of the -#'watersurface map. +#' For each of the other considered habitat types (31xx and rbbah) we select the watersurface polygons that +#' overlap with the selected habitat type polygons of the standardized habitat map. We also select polygons of the +#' standardized habitat map that contain standing water types but do not overlap with any watersurface polygon of the +#' watersurface map. #' -#'The R-code for creating the \code{watersurfaces_hab} data source can be found in the \href{https://github.com/inbo/n2khab-preprocessing}{n2khab-preprocessing} -#'repository. +#' The R-code for creating the \code{watersurfaces_hab} data source can be found in the \href{https://github.com/inbo/n2khab-preprocessing}{n2khab-preprocessing} +#' repository. #' #' #' @param interpreted If \code{TRUE}, the interpreted subtype is provided when the subtype is missing. This only @@ -341,71 +353,84 @@ read_habitatmap_stdized <- #' is.string #' read_watersurfaces_hab <- - function(file = file.path(fileman_up("n2khab_data"), - "20_processed/watersurfaces_hab/watersurfaces_hab.gpkg"), - interpreted = FALSE, - version = c("watersurfaces_hab_v4", - "watersurfaces_hab_v3", - "watersurfaces_hab_v2", - "watersurfaces_hab_v1")){ - - version <- match.arg(version) - - watersurfaces_polygons <- read_sf(file, - "watersurfaces_hab_polygons") - - watersurfaces_polygons <- watersurfaces_polygons %>% - mutate_at(.vars = vars(starts_with("polygon_id")), - .funs = factor) - - suppressWarnings(st_crs(watersurfaces_polygons) <- 31370) - - if (version %in% c("watersurfaces_hab_v1", "watersurfaces_hab_v2")) { - watersurfaces_types <- suppressWarnings( - read_sf(file, - "watersurfaces_hab_patches") - ) - } else { - watersurfaces_types <- suppressWarnings( - read_sf(file, - "watersurfaces_hab_types") - ) - } - - if (interpreted){ - watersurfaces_types <- watersurfaces_types %>% - mutate(type = ifelse(.data$type == "3130", "3130_aom", .data$type)) - } - - types <- suppressWarnings(read_types()) - - watersurfaces_types <- watersurfaces_types %>% - mutate( polygon_id = factor(.data$polygon_id, levels = levels(watersurfaces_polygons$polygon_id)), - certain = .data$certain == 1, - type = factor(.data$type, - levels = levels(types$type) - ) - ) %>% - relocate(.data$polygon_id, - .data$type, - .data$certain) - - if (version %in% c("watersurfaces_hab_v1", "watersurfaces_hab_v2")) { - - result <- list(watersurfaces_polygons = watersurfaces_polygons, - watersurfaces_patches = watersurfaces_types) - - } else { - - result <- list(watersurfaces_polygons = watersurfaces_polygons, - watersurfaces_types = watersurfaces_types) - - } + function(file = file.path( + fileman_up("n2khab_data"), + "20_processed/watersurfaces_hab/watersurfaces_hab.gpkg" + ), + interpreted = FALSE, + version = c( + "watersurfaces_hab_v4", + "watersurfaces_hab_v3", + "watersurfaces_hab_v2", + "watersurfaces_hab_v1" + )) { + version <- match.arg(version) + + watersurfaces_polygons <- read_sf( + file, + "watersurfaces_hab_polygons" + ) + + watersurfaces_polygons <- watersurfaces_polygons %>% + mutate_at( + .vars = vars(starts_with("polygon_id")), + .funs = factor + ) + + suppressWarnings(st_crs(watersurfaces_polygons) <- 31370) + + if (version %in% c("watersurfaces_hab_v1", "watersurfaces_hab_v2")) { + watersurfaces_types <- suppressWarnings( + read_sf( + file, + "watersurfaces_hab_patches" + ) + ) + } else { + watersurfaces_types <- suppressWarnings( + read_sf( + file, + "watersurfaces_hab_types" + ) + ) + } - return(result) + if (interpreted) { + watersurfaces_types <- watersurfaces_types %>% + mutate(type = ifelse(.data$type == "3130", "3130_aom", .data$type)) + } + types <- suppressWarnings(read_types()) + + watersurfaces_types <- watersurfaces_types %>% + mutate( + polygon_id = factor(.data$polygon_id, levels = levels(watersurfaces_polygons$polygon_id)), + certain = .data$certain == 1, + type = factor(.data$type, + levels = levels(types$type) + ) + ) %>% + relocate( + .data$polygon_id, + .data$type, + .data$certain + ) + + if (version %in% c("watersurfaces_hab_v1", "watersurfaces_hab_v2")) { + result <- list( + watersurfaces_polygons = watersurfaces_polygons, + watersurfaces_patches = watersurfaces_types + ) + } else { + result <- list( + watersurfaces_polygons = watersurfaces_polygons, + watersurfaces_types = watersurfaces_types + ) } + return(result) + } + @@ -422,7 +447,7 @@ read_watersurfaces_hab <- #' Return the data source \code{watersurfaces} as an \code{sf} polygon layer #' -#' Returns the raw data source \code{watersurfaces} (Leyssen et al., 2020) +#' Returns the raw data source \code{watersurfaces} (Scheers et al., 2022) #' as a standardized \code{sf} polygon layer #' (tidyverse-styled, internationalized) in the Belgian Lambert 72 CRS #' (EPSG-code \href{https://epsg.io/31370}{31370}). @@ -434,9 +459,9 @@ read_watersurfaces_hab <- #' data storage, you can specify your own \code{file}. #' In both cases: always make sure to specify the correct \code{version}, that #' is the version corresponding to the \code{file} (note that the \code{version} -#' defaults to the latest version, that is \code{watersurfaces_v1.1}). +#' defaults to the latest version). #' -#' See Leyssen et al. (2020) for an elaborate explanation of the data source +#' See Scheers et al. (2022) for an elaborate explanation of the data source #' and its attributes. #' #' @param file Optional string. An absolute or relative file path of @@ -453,6 +478,14 @@ read_watersurfaces_hab <- #' if \code{TRUE}, the variables \code{wfd_type_name} and #' \code{connectivity_name} are added. #' Defaults to \code{FALSE}. +#' @param fix_geom Logical. +#' Should invalid or corrupt geometries be fixed in the resulting \code{sf} +#' object in order to make them valid? +#' This prevents potential problems in geospatial operations, but beware that +#' fixed geometries are different from the original ones. +#' \code{\link[sf:st_make_valid]{sf::st_make_valid()}} is used to fix +#' geometries (with GEOS as backend). +#' Defaults to \code{FALSE}. #' #' @inheritParams read_habitatmap_stdized #' @@ -475,7 +508,9 @@ read_watersurfaces_hab <- #' Is there high confidence about the \code{wfd_type} determination? #' \item \code{depth_class}: class of water depth; #' \item \code{connectivity}: connectivity class; -#' \item \code{usage}: usage class. +#' \item \code{usage}: usage class; +#' \item \code{water_level_management}: water-level management class (not in +#' older versions). #' } #' #' @family functions involved in processing the 'watersurfaces' data source @@ -488,12 +523,10 @@ read_watersurfaces_hab <- #' wateren in Vlaanderen. #' Rapporten van het Instituut voor Natuur- en Bosonderzoek INBO.R.2009.34. #' Instituut voor Natuur- en Bosonderzoek, Brussel. -#' \item Leyssen A., Scheers K., Smeekens V., Wils C., Packet J., De Knijf G. & -#' Denys L. (2020). -#' Watervlakken versie 1.1: polygonenkaart van stilstaand water in Vlaanderen. -#' Uitgave 2020. Rapporten van het Instituut voor Natuur- en Bosonderzoek 2020 -#' (40). Instituut voor Natuur en Bosonderzoek, Brussel. -#' \doi{10.21436/inbor.19088385}. +#' \item Scheers K., Smeekens V., Wils C., Packet J., Leyssen A. & Denys L. +#' (2022). Watervlakken versie 1.2: Polygonenkaart van stilstaand water in +#' Vlaanderen. Uitgave 2022. Instituut voor Natuur- en Bosonderzoek. +#' \doi{10.21436/inbor.87014272}. #' } #' #' @examples @@ -508,12 +541,20 @@ read_watersurfaces_hab <- #' ws #' summary(ws) #' +#' ws_valid <- read_watersurfaces(fix_geom = TRUE) +#' ws_valid +#' +#' all(sf::st_is_valid(ws)) +#' all(sf::st_is_valid(ws_valid)) +#' #' ws2 <- read_watersurfaces(extended = TRUE) #' ws2 #' } #' #' @importFrom sf #' read_sf +#' st_is_valid +#' st_make_valid #' @importFrom plyr #' mapvalues #' @importFrom rlang @@ -537,196 +578,269 @@ read_watersurfaces_hab <- #' str_replace #' @export read_watersurfaces <- - function(file = NULL, - extended = FALSE, - version = c("watersurfaces_v1.1", "watersurfaces_v1.0")) { - - version <- match.arg(version) - - if (missing(file)) { - - if (version == "watersurfaces_v1.1") { - file <- file.path(fileman_up("n2khab_data"), - "10_raw/watersurfaces/watersurfaces.gpkg") - } else { - file <- file.path(fileman_up("n2khab_data"), - "10_raw/watersurfaces/watersurfaces.shp") - } - - assert_that(file.exists(file), - msg = paste("Path", file, "does not exist. Control the", - "path and specify the corresponding version", - "if you do not use", version)) - } else { - - assert_that(file.exists(file)) - - if (version == "watersurfaces_v1.1") { - if (substr(file, nchar(file) - 4, nchar(file)) != ".gpkg") { - stop(paste(version, "should be a GeoPackage (.gpkg).", - "Control the version and the path.")) - } - } - } - - if (version == "watersurfaces_v1.1") { - - suppressWarnings( - watersurfaces <- read_sf(file, - layer = "Watervlakken", - crs = 31370)) - - wfd_typetransl <- read_sf(file, layer = "LktKRWTYPE") %>% - mutate_if(., is.character, - .funs = function(x){return(`Encoding<-`(x, "UTF-8"))}) %>% - mutate(across(c(.data$Code), as.factor)) %>% - dplyr::rename(wfd_type = .data$Code, - wfd_type_name = .data$Omschrijving) - - } else { - - suppressWarnings( - watersurfaces <- read_sf(file, - crs = 31370) - ) - - wfd_typetransl <- - tribble(~wfd_type, ~wfd_type_name, - "B", "sterk brak", - "Bzl", "zeer licht brak", - "Ad", "alkalisch duinwater", - "Ai", "ondiep, alkalisch, ionenrijk", - "Ami", "ondiep, alkalisch, matig ionenrijk", - "Ami-e", "ondiep, alkalisch, matig ionenrijk, eutroof", - "Ami-om", "ondiep, alkalisch, matig ionenrijk, oligo-mesotroof", - "Aw", "groot-diep, alkalisch", - "Aw-e", "groot-diep, alkalisch, eutroof", - "Aw-om", "groot-diep, alkalisch, oligo-mesotroof", - "C", "circumneutraal", - "Cb", "circumneutraal, sterk gebufferd", - "CbFe", "circumneutraal, sterk gebufferd, ijzerrijk", - "Czb", "circumneutraal, zwak gebufferd", - "Z", "zuur", - "Zm", "zwak zuur", - "Zs", "sterk zuur" - ) %>% - mutate( - wfd_type = factor(.data$wfd_type, - levels = .$wfd_type) - ) - + function(file = NULL, + extended = FALSE, + fix_geom = FALSE, + version = c("watersurfaces_v1.2", "watersurfaces_v1.1", "watersurfaces_v1.0")) { + version <- match.arg(version) + assert_that(is.flag(extended), noNA(extended)) + assert_that(is.flag(fix_geom), noNA(fix_geom)) + + if (missing(file)) { + if (version != "watersurfaces_v1.0") { + file <- file.path( + fileman_up("n2khab_data"), + "10_raw/watersurfaces/watersurfaces.gpkg" + ) + } else { + file <- file.path( + fileman_up("n2khab_data"), + "10_raw/watersurfaces/watersurfaces.shp" + ) + } + + assert_that(file.exists(file), + msg = paste( + "Path", file, "does not exist. Control the", + "path and specify the corresponding version", + "if you do not use", version + ) + ) + } else { + assert_that(file.exists(file)) + + if (version != "watersurfaces_v1.0") { + if (substr(file, nchar(file) - 4, nchar(file)) != ".gpkg") { + stop(paste( + version, "should be a GeoPackage (.gpkg).", + "Control the version and the path." + )) } + } + } + if (version == "watersurfaces_v1.1") { + suppressWarnings( + watersurfaces <- read_sf(file, + layer = "Watervlakken", + crs = 31370 + ) + ) + + wfd_typetransl <- read_sf(file, layer = "LktKRWTYPE") %>% + mutate_if(., is.character, + .funs = function(x) { + return(`Encoding<-`(x, "UTF-8")) + } + ) %>% + mutate(across(c(.data$Code), as.factor)) %>% + rename( + wfd_type = .data$Code, + wfd_type_name = .data$Omschrijving + ) + } else { + suppressWarnings( + watersurfaces <- read_sf(file, + crs = 31370 + ) + ) + + wfd_typetransl <- + tribble( + ~wfd_type, ~wfd_type_name, + "B", "sterk brak", + "Bzl", "zeer licht brak", + "Ad", "alkalisch duinwater", + "Ai", "ondiep, alkalisch, ionenrijk", + "Ami", "ondiep, alkalisch, matig ionenrijk", + "Ami-e", "ondiep, alkalisch, matig ionenrijk, eutroof", + "Ami-om", "ondiep, alkalisch, matig ionenrijk, oligo-mesotroof", + "Aw", "groot-diep, alkalisch", + "Aw-e", "groot-diep, alkalisch, eutroof", + "Aw-om", "groot-diep, alkalisch, oligo-mesotroof", + "C", "circumneutraal", + "Cb", "circumneutraal, sterk gebufferd", + "CbFe", "circumneutraal, sterk gebufferd, ijzerrijk", + "Czb", "circumneutraal, zwak gebufferd", + "Z", "zuur", + "Zm", "zwak zuur", + "Zs", "sterk zuur" + ) %>% + mutate( + wfd_type = factor(.data$wfd_type, + levels = .$wfd_type + ) + ) + } + if (fix_geom) { + n_invalid <- sum( + !st_is_valid(watersurfaces) | is.na(st_is_valid(watersurfaces)) + ) + if (n_invalid > 0) { + watersurfaces <- st_make_valid(watersurfaces) + message("Fixed ", n_invalid, " invalid or corrupt geometries.") + } + } - watersurfaces <- - watersurfaces %>% - select(polygon_id = .data$WVLC, - wfd_code = .data$WTRLICHC, - hyla_code = .data$HYLAC, - name = .data$NAAM, - area_name = .data$GEBIED, - wfd_type = .data$KRWTYPE, - wfd_type_certain = .data$KRWTYPES, - depth_class = .data$DIEPKL, - connectivity = .data$CONNECT, - usage = .data$FUNCTIE) %>% - mutate(depth_class = str_replace(string = .data$depth_class, - pattern = "\u2265", - replacement = ">=")) %>% - mutate(across(c(.data$area_name, - .data$depth_class, - .data$connectivity, - .data$usage), - as.factor)) %>% - mutate(wfd_type = .data$wfd_type %>% - factor(levels = - levels(wfd_typetransl$wfd_type)), - hyla_code = ifelse(.data$hyla_code == 0, - NA, - .data$hyla_code) - ) %>% - arrange(.data$polygon_id) - - if (version == "watersurfaces_v1.0") { - watersurfaces <- - watersurfaces %>% - mutate_at(.vars = c("wfd_code", "name"), - .funs = function(x) { - ifelse(x == "", NA, x) - }) %>% - mutate(wfd_type_certain = ifelse(is.na(.data$wfd_type_certain), - na_lgl, - .data$wfd_type_certain %in% - c("zeker", - "definitief"))) + watersurfaces <- + watersurfaces %>% + { + if (version == "watersurfaces_v1.2") { + rename(., water_level_management = .data$PEILBEHEER) } else { - watersurfaces <- - watersurfaces %>% - mutate(wfd_type_certain = ifelse(is.na(.data$wfd_type_certain), - na_lgl, - .data$wfd_type_certain == - "definitief")) + . } + } %>% + select( + polygon_id = .data$WVLC, + wfd_code = .data$WTRLICHC, + hyla_code = .data$HYLAC, + name = .data$NAAM, + area_name = .data$GEBIED, + wfd_type = .data$KRWTYPE, + wfd_type_certain = .data$KRWTYPES, + depth_class = .data$DIEPKL, + connectivity = .data$CONNECT, + usage = .data$FUNCTIE, + matches("^water_level_management$") + ) %>% + mutate(depth_class = str_replace( + string = .data$depth_class, + pattern = "\u2265", + replacement = ">=" + )) %>% + mutate(across( + c( + .data$area_name, + .data$depth_class, + .data$connectivity, + .data$usage, + matches("^water_level_management$") + ), + as.factor + )) %>% + mutate( + wfd_type = .data$wfd_type %>% + factor( + levels = + levels(wfd_typetransl$wfd_type) + ), + hyla_code = ifelse(.data$hyla_code == 0, + NA, + .data$hyla_code + ) + ) %>% + arrange(.data$polygon_id) + + if (version == "watersurfaces_v1.0") { + watersurfaces <- + watersurfaces %>% + mutate_at( + .vars = c("wfd_code", "name"), + .funs = function(x) { + ifelse(x == "", NA, x) + } + ) %>% + mutate(wfd_type_certain = ifelse(is.na(.data$wfd_type_certain), + na_lgl, + .data$wfd_type_certain %in% + c( + "zeker", + "definitief" + ) + )) + } else { + watersurfaces <- + watersurfaces %>% + { + if (version != "watersurfaces_v1.2") { + . + } else { + mutate(., area_name = ifelse(.data$area_name == "", + NA, + .data$area_name + )) + } + } %>% + mutate(wfd_type_certain = ifelse(is.na(.data$wfd_type_certain), + na_lgl, + .data$wfd_type_certain == + "definitief" + )) + } - if (extended) { - - if (version == "watersurfaces_v1.1") { - - connectivitytransl <- read_sf(file, layer = "LktCONNECT") %>% - mutate_if(., is.character, - .funs = function(x){return(`Encoding<-`(x, "UTF-8"))}) %>% - mutate(across(c(.data$Code), as.factor)) %>% - rename(connectivity = .data$Code, - connectivity_name = .data$Omschr) - - } else { - - connectivitytransl <- - tribble(~connectivity, ~connectivity_name, - paste0("ge","\u00EF","soleerd"), - "niet verbonden met een waterloop", - "periodiek", - paste0("tijdelijk (door peilbeheer of droogte) ", - "in verbinding met minstens ","\u00E9", - "\u00E9","n waterloop"), - "permanent", - paste0("permanent in verbinding met minstens ", - "\u00E9","\u00E9","n waterloop") - ) %>% - mutate( - connectivity = factor(.data$connectivity, - levels = .$connectivity) - ) + if (extended) { + if (version == "watersurfaces_v1.1") { + connectivitytransl <- read_sf(file, layer = "LktCONNECT") %>% + mutate_if(., is.character, + .funs = function(x) { + return(`Encoding<-`(x, "UTF-8")) } - - watersurfaces <- - watersurfaces %>% - left_join(wfd_typetransl, by = "wfd_type") %>% - mutate( - wfd_type_name = - .data$wfd_type %>% - mapvalues(from = wfd_typetransl$wfd_type, - to = wfd_typetransl$wfd_type_name) - ) %>% - left_join(connectivitytransl, by = "connectivity") %>% - mutate( - connectivity_name = - .data$connectivity %>% - mapvalues(from = connectivitytransl$connectivity, - to = connectivitytransl$connectivity_name) - ) %>% - select(1:6, - .data$wfd_type_name, - 7:9, - .data$connectivity_name, - everything()) - } - - return(watersurfaces) - + ) %>% + mutate(across(c(.data$Code), as.factor)) %>% + rename( + connectivity = .data$Code, + connectivity_name = .data$Omschr + ) + } else { + connectivitytransl <- + tribble( + ~connectivity, ~connectivity_name, + paste0("ge", "\u00EF", "soleerd"), + "niet verbonden met een waterloop", + "periodiek", + paste0( + "tijdelijk (door peilbeheer of droogte) ", + "in verbinding met minstens ", "\u00E9", + "\u00E9", "n waterloop" + ), + "permanent", + paste0( + "permanent in verbinding met minstens ", + "\u00E9", "\u00E9", "n waterloop" + ) + ) %>% + mutate( + connectivity = factor(.data$connectivity, + levels = .$connectivity + ) + ) + } + + watersurfaces <- + watersurfaces %>% + left_join(wfd_typetransl, by = "wfd_type") %>% + mutate( + wfd_type_name = + .data$wfd_type %>% + mapvalues( + from = wfd_typetransl$wfd_type, + to = wfd_typetransl$wfd_type_name + ) + ) %>% + # following match is only partial in case of v1.2 + left_join(connectivitytransl, by = "connectivity") %>% + mutate( + connectivity_name = + .data$connectivity %>% + mapvalues( + from = connectivitytransl$connectivity, + to = connectivitytransl$connectivity_name + ) + ) %>% + select( + 1:6, + .data$wfd_type_name, + 7:9, + .data$connectivity_name, + everything() + ) } + return(watersurfaces) + } + @@ -821,97 +935,102 @@ read_watersurfaces <- #' starts_with #' read_habitatmap <- - function(file = file.path(fileman_up("n2khab_data"), "10_raw/habitatmap"), - filter_hab = FALSE, - version = c("habitatmap_2020", - "habitatmap_2018")){ - - assert_that(file.exists(file)) - assert_that(is.flag(filter_hab), noNA(filter_hab)) - version <- match.arg(version) - - if (filter_hab) { - # version control: version habitatmap == version habitatmap_stdized - xxh64sum_habitatmap_stdized_present <- xxh64sum(file.path( - fileman_up("n2khab_data"), - "20_processed/habitatmap_stdized/habitatmap_stdized.gpkg")) - - if (version == "habitatmap_2020") { - xxh64sum_habitatmap_stdized_expected <- "3109c26f0a27a0f3" - } else { - xxh64sum_habitatmap_stdized_expected <- c("b80f469f33636c8b","8e9c4e09f5f67c3e") - } - - if (!(xxh64sum_habitatmap_stdized_present %in% - xxh64sum_habitatmap_stdized_expected)) { - stop("You are trying to use habitatmap version '", version,"' ", - "with another version of habitatmap_stdized. ", - "Specify the correct version as argument (version =) ", - "and add the corresponding files under ", - "'n2khab_data/10_raw/habitatmap' and ", - "'n2khab_data/20_processed/habitatmap_stdized'.", - call. = FALSE) - } - } - - habitatmap <- read_sf(file, - "habitatmap") - - colnames(habitatmap) <- tolower(colnames(habitatmap)) - - habitatmap <- habitatmap %>% - select(polygon_id = .data$tag, - .data$eval, - starts_with("eenh"), - .data$v1, - .data$v2, - .data$v3, - source = .data$herk, - .data$info, - bwk_label = .data$bwklabel, - .data$hab1, - .data$phab1, - .data$hab2, - .data$phab2, - .data$hab3, - .data$phab3, - .data$hab4, - .data$phab4, - .data$hab5, - .data$phab5, - source_hab = .data$herkhab, - source_phab = .data$herkphab, - hab_legend = .data$hablegende, - area_m2 = .data$oppervl) - - habitatmap <- habitatmap %>% - mutate(eval = factor(.data$eval), - hab_legend = factor(.data$hab_legend) - ) - - if (filter_hab) { - - # we only select polygons with habitat or RIB, i.e. polygons in habitatmap_stdized data source - if (xxh64sum_habitatmap_stdized_present == "8e9c4e09f5f67c3e") { - # 2018_v1 - hab_stdized <- read_habitatmap_stdized(version = "habitatmap_stdized_2018_v1") - } else { - hab_stdized <- read_habitatmap_stdized() - } - - hab_stdized <- hab_stdized$habitatmap_polygons - - habitatmap <- habitatmap %>% - filter(.data$polygon_id %in% hab_stdized$polygon_id) %>% - mutate(polygon_id = factor(.data$polygon_id, levels = hab_stdized$polygon_id)) - - } + function(file = file.path(fileman_up("n2khab_data"), "10_raw/habitatmap"), + filter_hab = FALSE, + version = c( + "habitatmap_2020", + "habitatmap_2018" + )) { + assert_that(file.exists(file)) + assert_that(is.flag(filter_hab), noNA(filter_hab)) + version <- match.arg(version) + + if (filter_hab) { + # version control: version habitatmap == version habitatmap_stdized + xxh64sum_habitatmap_stdized_present <- xxh64sum(file.path( + fileman_up("n2khab_data"), + "20_processed/habitatmap_stdized/habitatmap_stdized.gpkg" + )) + + if (version == "habitatmap_2020") { + xxh64sum_habitatmap_stdized_expected <- "3109c26f0a27a0f3" + } else { + xxh64sum_habitatmap_stdized_expected <- c("b80f469f33636c8b", "8e9c4e09f5f67c3e") + } + + if (!(xxh64sum_habitatmap_stdized_present %in% + xxh64sum_habitatmap_stdized_expected)) { + stop("You are trying to use habitatmap version '", version, "' ", + "with another version of habitatmap_stdized. ", + "Specify the correct version as argument (version =) ", + "and add the corresponding files under ", + "'n2khab_data/10_raw/habitatmap' and ", + "'n2khab_data/20_processed/habitatmap_stdized'.", + call. = FALSE + ) + } + } - suppressWarnings(st_crs(habitatmap) <- 31370) + habitatmap <- read_sf( + file, + "habitatmap" + ) + + colnames(habitatmap) <- tolower(colnames(habitatmap)) + + habitatmap <- habitatmap %>% + select( + polygon_id = .data$tag, + .data$eval, + starts_with("eenh"), + .data$v1, + .data$v2, + .data$v3, + source = .data$herk, + .data$info, + bwk_label = .data$bwklabel, + .data$hab1, + .data$phab1, + .data$hab2, + .data$phab2, + .data$hab3, + .data$phab3, + .data$hab4, + .data$phab4, + .data$hab5, + .data$phab5, + source_hab = .data$herkhab, + source_phab = .data$herkphab, + hab_legend = .data$hablegende, + area_m2 = .data$oppervl + ) + + habitatmap <- habitatmap %>% + mutate( + eval = factor(.data$eval), + hab_legend = factor(.data$hab_legend) + ) + + if (filter_hab) { + # we only select polygons with habitat or RIB, i.e. polygons in habitatmap_stdized data source + if (xxh64sum_habitatmap_stdized_present == "8e9c4e09f5f67c3e") { + # 2018_v1 + hab_stdized <- read_habitatmap_stdized(version = "habitatmap_stdized_2018_v1") + } else { + hab_stdized <- read_habitatmap_stdized() + } + + hab_stdized <- hab_stdized$habitatmap_polygons + + habitatmap <- habitatmap %>% + filter(.data$polygon_id %in% hab_stdized$polygon_id) %>% + mutate(polygon_id = factor(.data$polygon_id, levels = hab_stdized$polygon_id)) + } - return(habitatmap) + suppressWarnings(st_crs(habitatmap) <- 31370) - } + return(habitatmap) + } @@ -1085,95 +1204,113 @@ read_habitatmap <- #' filter #' relocate read_habitatmap_terr <- - function(file = file.path(fileman_up("n2khab_data"), - "20_processed/habitatmap_terr/habitatmap_terr.gpkg"), - keep_aq_types = TRUE, - drop_7220 = TRUE, - version = c("habitatmap_terr_2020_v1", - "habitatmap_terr_2018_v2", - "habitatmap_terr_2018_v1")){ - - assert_that(is.flag(keep_aq_types), noNA(keep_aq_types)) - assert_that(is.flag(drop_7220), noNA(drop_7220)) - version <- match.arg(version) - - habmap_terr_polygons <- read_sf(file, - "habitatmap_terr_polygons") - - habmap_terr_polygons <- habmap_terr_polygons %>% - mutate(polygon_id = factor(.data$polygon_id), - source = factor(.data$source)) - - suppressWarnings(st_crs(habmap_terr_polygons) <- 31370) - - if (version == "habitatmap_terr_2018_v1") { - habmap_terr_types <- suppressWarnings( - read_sf(file, - "habitatmap_terr_patches") - ) - } else { - habmap_terr_types <- suppressWarnings( - read_sf(file, - "habitatmap_terr_types") - ) - } - - types <- read_types() - - habmap_terr_types <- habmap_terr_types %>% - mutate(polygon_id = factor(.data$polygon_id, - levels = levels(habmap_terr_polygons$polygon_id)), - certain = .data$certain == 1, - type = factor(.data$type, - levels = levels(types$type) - ), - source = factor(.data$source) - ) - - if (!keep_aq_types) { - habmap_terr_types <- - habmap_terr_types %>% - filter(!(.data$type %in% (types %>% - filter(.data$hydr_class == "HC3") %>% - .$type) - )) - # The below step is unneeded (and takes several seconds), - # because polygons with _no_ terrestrial types were already - # excluded in the data source. - # - # habmap_terr_polygons %>% - # dplyr::semi_join(habmap_terr_types, - # by = "polygon_id") - } - - if (drop_7220) { - habmap_terr_types <- - habmap_terr_types %>% - filter(.data$type != "7220") - # note that no polygons need to be discarded: 7220 never occurred - # alone - } + function(file = file.path( + fileman_up("n2khab_data"), + "20_processed/habitatmap_terr/habitatmap_terr.gpkg" + ), + keep_aq_types = TRUE, + drop_7220 = TRUE, + version = c( + "habitatmap_terr_2020_v1", + "habitatmap_terr_2018_v2", + "habitatmap_terr_2018_v1" + )) { + assert_that(is.flag(keep_aq_types), noNA(keep_aq_types)) + assert_that(is.flag(drop_7220), noNA(drop_7220)) + version <- match.arg(version) + + habmap_terr_polygons <- read_sf( + file, + "habitatmap_terr_polygons" + ) + + habmap_terr_polygons <- habmap_terr_polygons %>% + mutate( + polygon_id = factor(.data$polygon_id), + source = factor(.data$source) + ) + + suppressWarnings(st_crs(habmap_terr_polygons) <- 31370) + + if (version == "habitatmap_terr_2018_v1") { + habmap_terr_types <- suppressWarnings( + read_sf( + file, + "habitatmap_terr_patches" + ) + ) + } else { + habmap_terr_types <- suppressWarnings( + read_sf( + file, + "habitatmap_terr_types" + ) + ) + } - if (grepl("2018", version)) { - habmap_terr_types <- - habmap_terr_types %>% - relocate(.data$polygon_id, - .data$type, - .data$certain) - } + types <- read_types() + + habmap_terr_types <- habmap_terr_types %>% + mutate( + polygon_id = factor(.data$polygon_id, + levels = levels(habmap_terr_polygons$polygon_id) + ), + certain = .data$certain == 1, + type = factor(.data$type, + levels = levels(types$type) + ), + source = factor(.data$source) + ) + + if (!keep_aq_types) { + habmap_terr_types <- + habmap_terr_types %>% + filter(!(.data$type %in% (types %>% + filter(.data$hydr_class == "HC3") %>% + .$type) + )) + # The below step is unneeded (and takes several seconds), + # because polygons with _no_ terrestrial types were already + # excluded in the data source. + # + # habmap_terr_polygons %>% + # dplyr::semi_join(habmap_terr_types, + # by = "polygon_id") + } - if (version == "habitatmap_terr_2018_v1") { - result <- list(habitatmap_terr_polygons = habmap_terr_polygons, - habitatmap_terr_patches = habmap_terr_types) - } else { - result <- list(habitatmap_terr_polygons = habmap_terr_polygons, - habitatmap_terr_types = habmap_terr_types) - } + if (drop_7220) { + habmap_terr_types <- + habmap_terr_types %>% + filter(.data$type != "7220") + # note that no polygons need to be discarded: 7220 never occurred + # alone + } - return(result) + if (grepl("2018", version)) { + habmap_terr_types <- + habmap_terr_types %>% + relocate( + .data$polygon_id, + .data$type, + .data$certain + ) + } + if (version == "habitatmap_terr_2018_v1") { + result <- list( + habitatmap_terr_polygons = habmap_terr_polygons, + habitatmap_terr_patches = habmap_terr_types + ) + } else { + result <- list( + habitatmap_terr_polygons = habmap_terr_polygons, + habitatmap_terr_types = habmap_terr_types + ) } + return(result) + } + @@ -1234,8 +1371,10 @@ read_habitatmap_terr <- #' hs #' hs2 <- read_habitatstreams(source_text = TRUE) #' hs2 -#' all.equal(hs %>% st_drop_geometry, -#' hs2$lines %>% st_drop_geometry) +#' all.equal( +#' hs %>% st_drop_geometry(), +#' hs2$lines %>% st_drop_geometry() +#' ) #' } #' #' @importFrom assertthat @@ -1259,64 +1398,84 @@ read_habitatmap_terr <- #' str_to_title #' @export read_habitatstreams <- - function(file = file.path(fileman_up("n2khab_data"), - "10_raw/habitatstreams"), - source_text = FALSE){ - - assert_that(file.exists(file)) - - assert_that(is.flag(source_text), noNA(source_text)) - - habitatstreams <- - suppressWarnings( - read_sf(file, - crs = 31370) - ) - - lines <- - habitatstreams %>% - select(river_name = .data$NAAM, - source_id = .data$BRON) %>% - mutate(river_name = factor( - gsub(pattern = "(^|[[:punct:]])([[:alpha:]])", - replacement = "\\1\\U\\2", - str_replace(str_to_title( - str_squish(.data$river_name)), - pattern = "Ij", - replacement = "IJ"), - perl = TRUE)), - source_id = factor(.data$source_id), - type = "3260" %>% - factor(levels = read_types() %>% - .$type %>% - levels)) %>% - select(.data$river_name, - .data$source_id, - .data$type) - - if (source_text) { - - sources <- - habitatstreams %>% - st_drop_geometry %>% - distinct(source_id = .data$BRON, - source_text = .data$OMSCHR) %>% - mutate(source_id = factor(.data$source_id, - levels = lines %>% .$source_id %>% - levels), - source_text = fct_reorder(.data$source_text, - as.numeric(.data$source_id))) - - result <- list(lines = lines, - sources = sources) - - } else { - result <- lines - } - - return(result) + function(file = file.path( + fileman_up("n2khab_data"), + "10_raw/habitatstreams" + ), + source_text = FALSE) { + assert_that(file.exists(file)) + + assert_that(is.flag(source_text), noNA(source_text)) + + habitatstreams <- + suppressWarnings( + read_sf(file, + crs = 31370 + ) + ) + + lines <- + habitatstreams %>% + select( + river_name = .data$NAAM, + source_id = .data$BRON + ) %>% + mutate( + river_name = factor( + gsub( + pattern = "(^|[[:punct:]])([[:alpha:]])", + replacement = "\\1\\U\\2", + str_replace( + str_to_title( + str_squish(.data$river_name) + ), + pattern = "Ij", + replacement = "IJ" + ), + perl = TRUE + ) + ), + source_id = factor(.data$source_id), + type = "3260" %>% + factor(levels = read_types() %>% + .$type %>% + levels()) + ) %>% + select( + .data$river_name, + .data$source_id, + .data$type + ) + + if (source_text) { + sources <- + habitatstreams %>% + st_drop_geometry() %>% + distinct( + source_id = .data$BRON, + source_text = .data$OMSCHR + ) %>% + mutate( + source_id = factor(.data$source_id, + levels = lines %>% .$source_id %>% + levels() + ), + source_text = fct_reorder( + .data$source_text, + as.numeric(.data$source_id) + ) + ) + + result <- list( + lines = lines, + sources = sources + ) + } else { + result <- lines + } - } + return(result) + } @@ -1439,93 +1598,118 @@ read_habitatstreams <- #' vars #' @export read_habitatsprings <- - function(file = file.path(fileman_up("n2khab_data"), - "10_raw/habitatsprings/habitatsprings.geojson"), - filter_hab = FALSE, - units_7220 = FALSE, - version = "habitatsprings_2020v2"){ - - assert_that(file.exists(file)) - assert_that(is.flag(filter_hab), noNA(filter_hab)) - assert_that(is.flag(units_7220), noNA(units_7220)) - assert_that(is.string(version)) - - typelevels <- - read_types() %>% - .$type %>% - levels + function(file = file.path( + fileman_up("n2khab_data"), + "10_raw/habitatsprings/habitatsprings.geojson" + ), + filter_hab = FALSE, + units_7220 = FALSE, + version = "habitatsprings_2020v2") { + assert_that(file.exists(file)) + assert_that(is.flag(filter_hab), noNA(filter_hab)) + assert_that(is.flag(units_7220), noNA(units_7220)) + assert_that(is.string(version)) + + typelevels <- + read_types() %>% + .$type %>% + levels() + + habitatsprings <- + read_sf(file) %>% + st_transform(31370) %>% + mutate( + area_m2 = ifelse(.data$area_m2 > 0, + .data$area_m2, + NA + ), + year = ifelse(.data$year > 0, + .data$year, + NA + ), + in_sac = (.data$sbz == 1), + type = str_sub(.data$habitattype, end = 4) %>% + factor(levels = typelevels), + certain = (.data$validity_status == "gecontroleerd") + ) %>% + { + if (filter_hab) filter(., !is.na(.$type)) else . + } %>% + select( + point_id = .data$id, + .data$name, + code_orig = .data$habitattype, + .data$type, + .data$certain, + .data$area_m2, + .data$year, + .data$in_sac, + everything(), + -.data$validity_status, + -.data$sbz + ) + + if (version != "habitatsprings_2019v1") { + habitatsprings <- + habitatsprings %>% + mutate(system_type = factor(.data$system_type)) %>% + select( + 1:2, + .data$system_type, + 3:5, + .data$unit_id, + everything() + ) + } + if (units_7220) { + assert_that(version != "habitatsprings_2019v1", + msg = paste( + "'units_7220 = TRUE' is not supported for", + "version habitatsprings_2019v1." + ) + ) + suppressWarnings( habitatsprings <- - read_sf(file) %>% - st_transform(31370) %>% - mutate( - area_m2 = ifelse(.data$area_m2 > 0, - .data$area_m2, - NA), - year = ifelse(.data$year > 0, - .data$year, - NA), - in_sac = (.data$sbz == 1), - type = str_sub(.data$habitattype, end = 4) %>% - factor(levels = typelevels), - certain = (.data$validity_status == "gecontroleerd") - ) %>% - {if (filter_hab) filter(., !is.na(.$type)) else .} %>% - select(point_id = .data$id, - .data$name, - code_orig = .data$habitattype, - .data$type, - .data$certain, - .data$area_m2, - .data$year, - .data$in_sac, - everything(), - -.data$validity_status, - -.data$sbz) - - if (version != "habitatsprings_2019v1") { - habitatsprings <- - habitatsprings %>% - mutate(system_type = factor(.data$system_type)) %>% - select(1:2, - .data$system_type, - 3:5, - .data$unit_id, - everything()) - } - - if (units_7220) { - assert_that(version != "habitatsprings_2019v1", - msg = paste("'units_7220 = TRUE' is not supported for", - "version habitatsprings_2019v1.")) - suppressWarnings( - habitatsprings <- - habitatsprings %>% - filter(.data$type == "7220") %>% - select(-.data$point_id) %>% - group_by(.data$unit_id) %>% - mutate(area_m2 = sum(.data$area_m2), - system_type = as.character(.data$system_type), - type = as.character(.data$type), - nr_of_points = n()) %>% - summarise_if(function(x) {!inherits(x, "sfc")}, - max) %>% - mutate(type = .data$type %>% factor(levels = typelevels), - system_type = factor(.data$system_type)) %>% - mutate_at(vars(.data$certain, - .data$in_sac), - as.logical) %>% - st_centroid() %>% - select(.data$unit_id, - .data$nr_of_points, - everything()) - ) - } - - return(habitatsprings) - + habitatsprings %>% + filter(.data$type == "7220") %>% + select(-.data$point_id) %>% + group_by(.data$unit_id) %>% + mutate( + area_m2 = sum(.data$area_m2), + system_type = as.character(.data$system_type), + type = as.character(.data$type), + nr_of_points = n() + ) %>% + summarise_if( + function(x) { + !inherits(x, "sfc") + }, + max + ) %>% + mutate( + type = .data$type %>% factor(levels = typelevels), + system_type = factor(.data$system_type) + ) %>% + mutate_at( + vars( + .data$certain, + .data$in_sac + ), + as.logical + ) %>% + st_centroid() %>% + select( + .data$unit_id, + .data$nr_of_points, + everything() + ) + ) } + return(habitatsprings) + } + @@ -1607,7 +1791,8 @@ read_habitatsprings <- #' hq2 #' hq3 <- read_habitatquarries(references = TRUE) #' hq3 -#' read_habitatquarries(bibtex = TRUE)} +#' read_habitatquarries(bibtex = TRUE) +#' } #' #' @importFrom assertthat #' assert_that @@ -1628,77 +1813,89 @@ read_habitatsprings <- #' filter #' @export read_habitatquarries <- - function(file = file.path(fileman_up("n2khab_data"), - "10_raw/habitatquarries/habitatquarries.gpkg"), - filter_hab = FALSE, - references = FALSE, - bibtex = FALSE, - version = "habitatquarries_2020v1"){ - - assert_that(file.exists(file)) - assert_that(is.flag(filter_hab), noNA(filter_hab)) - assert_that(is.flag(references), noNA(references)) - assert_that(is.flag(bibtex), noNA(bibtex)) - assert_that(is.string(version)) - - if ((references | filter_hab) & bibtex) { - warning("Will not read spatial layer when bibtex = TRUE. ", - "Ignoring other argument(s) that are set to TRUE.") - } + function(file = file.path( + fileman_up("n2khab_data"), + "10_raw/habitatquarries/habitatquarries.gpkg" + ), + filter_hab = FALSE, + references = FALSE, + bibtex = FALSE, + version = "habitatquarries_2020v1") { + assert_that(file.exists(file)) + assert_that(is.flag(filter_hab), noNA(filter_hab)) + assert_that(is.flag(references), noNA(references)) + assert_that(is.flag(bibtex), noNA(bibtex)) + assert_that(is.string(version)) + + if ((references | filter_hab) & bibtex) { + warning( + "Will not read spatial layer when bibtex = TRUE. ", + "Ignoring other argument(s) that are set to TRUE." + ) + } - if (references | bibtex) { - extra_references <- - read_sf(file, - layer = "extra_references") - if (bibtex) - { - if (!requireNamespace("bib2df", quietly = TRUE)) { - stop("Package \"bib2df\" is needed when bibtex = TRUE. ", - "Please install it from GitHub with: ", - "remotes::install_github(\"ropensci/bib2df\")", - call. = FALSE) - } - message("You can copy below output into a *.bib file ", - "for further use.\n") - extra_references %>% - mutate(author = str_split(.data$author, " and ")) %>% - set_colnames(toupper(colnames(.))) %>% - bib2df::df2bib() - return(invisible(NULL)) - } + if (references | bibtex) { + extra_references <- + read_sf(file, + layer = "extra_references" + ) + if (bibtex) { + if (!requireNamespace("bib2df", quietly = TRUE)) { + stop("Package \"bib2df\" is needed when bibtex = TRUE. ", + "Please install it from GitHub with: ", + "remotes::install_github(\"ropensci/bib2df\")", + call. = FALSE + ) } + message( + "You can copy below output into a *.bib file ", + "for further use.\n" + ) + extra_references %>% + mutate(author = str_split(.data$author, " and ")) %>% + set_colnames(toupper(colnames(.))) %>% + bib2df::df2bib() + return(invisible(NULL)) + } + } - typelevels <- - read_types() %>% - .$type %>% - levels - - habitatquarries <- - suppressWarnings( - read_sf(file, - layer = "habitatquarries", - crs = 31370) - ) %>% - mutate( - type = ifelse(.data$habitattype == "8310", - "8310", - NA_character_) %>% - factor(levels = typelevels), - extra_reference = factor(.data$extra_reference) - ) %>% - {if (filter_hab) filter(., !is.na(.$type)) else .} %>% - select(.data$polygon_id, - .data$unit_id, - .data$name, - code_orig = .data$habitattype, - .data$type, - .data$extra_reference) - -if (references) { - habitatquarries <- list(habitatquarries = habitatquarries, - extra_references = extra_references) -} - - return(habitatquarries) - + typelevels <- + read_types() %>% + .$type %>% + levels() + + habitatquarries <- + suppressWarnings( + read_sf(file, + layer = "habitatquarries", + crs = 31370 + ) + ) %>% + mutate( + type = ifelse(.data$habitattype == "8310", + "8310", + NA_character_ + ) %>% + factor(levels = typelevels), + extra_reference = factor(.data$extra_reference) + ) %>% + { + if (filter_hab) filter(., !is.na(.$type)) else . + } %>% + select(.data$polygon_id, + .data$unit_id, + .data$name, + code_orig = .data$habitattype, + .data$type, + .data$extra_reference + ) + + if (references) { + habitatquarries <- list( + habitatquarries = habitatquarries, + extra_references = extra_references + ) } + + return(habitatquarries) + } diff --git a/R/read_raster_runif.R b/R/read_raster_runif.R index 649e96e9..cbdabcdc 100644 --- a/R/read_raster_runif.R +++ b/R/read_raster_runif.R @@ -41,16 +41,17 @@ #' assert_that read_raster_runif <- - function(file = file.path(fileman_up("n2khab_data"), - "10_raw/raster_runif/raster_runif.tif"), - version = "raster_runif_v1") { + function(file = file.path( + fileman_up("n2khab_data"), + "10_raw/raster_runif/raster_runif.tif" + ), + version = "raster_runif_v1") { + assert_that(file.exists(file)) - assert_that(file.exists(file)) + require_pkgs("raster") - require_pkgs("raster") + r <- raster::raster(file) + raster::crs(r) <- "EPSG:31370" - r <- raster::raster(file) - raster::crs(r) <- "EPSG:31370" - - return(r) - } + return(r) + } diff --git a/R/read_shallowgroundwater.R b/R/read_shallowgroundwater.R index d4e0e589..a8c63852 100644 --- a/R/read_shallowgroundwater.R +++ b/R/read_shallowgroundwater.R @@ -193,17 +193,18 @@ #' read_sf #' @export read_shallowgroundwater <- - function(file = file.path(fileman_up("n2khab_data"), - "10_raw/shallowgroundwater/shallowgroundwater.gpkg")){ + function(file = file.path( + fileman_up("n2khab_data"), + "10_raw/shallowgroundwater/shallowgroundwater.gpkg" + )) { + assert_that(file.exists(file)) - assert_that(file.exists(file)) + shallowgroundwater <- + suppressWarnings( + read_sf(file, + crs = 31370 + ) + ) - shallowgroundwater <- - suppressWarnings( - read_sf(file, - crs = 31370) - ) - - return(shallowgroundwater) - - } + return(shallowgroundwater) + } diff --git a/R/read_soilmap.R b/R/read_soilmap.R index ad6f6208..86221ed0 100644 --- a/R/read_soilmap.R +++ b/R/read_soilmap.R @@ -233,10 +233,10 @@ #' soilmap_simple #' soilmap_simple %>% #' filter(!is.na(bsm_mo_substr)) %>% -#' glimpse +#' glimpse() #' soilmap_simple %>% #' filter(bsm_converted) %>% -#' glimpse +#' glimpse() #' } #' #' @importFrom assertthat @@ -269,292 +269,361 @@ #' @importFrom rlang .data #' @export read_soilmap <- - function(file = file.path(fileman_up("n2khab_data"), - "20_processed/soilmap_simple/soilmap_simple.gpkg"), - file_raw = file.path(fileman_up("n2khab_data"), "10_raw/soilmap"), - use_processed = TRUE, - version_processed = "soilmap_simple_v2", - standardize_coastalplain = FALSE, - simplify = FALSE, - explan = FALSE) { + function(file = file.path( + fileman_up("n2khab_data"), + "20_processed/soilmap_simple/soilmap_simple.gpkg" + ), + file_raw = file.path(fileman_up("n2khab_data"), "10_raw/soilmap"), + use_processed = TRUE, + version_processed = "soilmap_simple_v2", + standardize_coastalplain = FALSE, + simplify = FALSE, + explan = FALSE) { + assert_that(is.flag(simplify), noNA(simplify)) + assert_that( + is.flag(standardize_coastalplain), + noNA(standardize_coastalplain) + ) + assert_that(is.flag(use_processed), noNA(use_processed)) + assert_that(is.flag(explan), noNA(explan)) + assert_that(is.string(version_processed)) - assert_that(is.flag(simplify), noNA(simplify)) - assert_that(is.flag(standardize_coastalplain), - noNA(standardize_coastalplain)) - assert_that(is.flag(use_processed), noNA(use_processed)) - assert_that(is.flag(explan), noNA(explan)) - assert_that(is.string(version_processed)) + ####### 1. Reading soilmap_simple #### + ###################################### - ####### 1. Reading soilmap_simple #### - ###################################### + if (use_processed) { + soilmap_simple_path <- file + assert_that(file.exists(soilmap_simple_path)) - if (use_processed) { + soilmap_simple <- + read_sf( + soilmap_simple_path, + "soilmap_simple" + ) %>% + mutate_if(is.character, factor) - soilmap_simple_path <- file - assert_that(file.exists(soilmap_simple_path)) - - soilmap_simple <- - read_sf(soilmap_simple_path, - "soilmap_simple") %>% - mutate_if(is.character, factor) - - if (explan & version_processed == "soilmap_simple_v1") { - warning("Version soilmap_simple_v1 is not supported for ", - "adding explanatory variables.") - } + if (explan & version_processed == "soilmap_simple_v1") { + warning( + "Version soilmap_simple_v1 is not supported for ", + "adding explanatory variables." + ) + } - if (explan & version_processed != "soilmap_simple_v1") { + if (explan & version_processed != "soilmap_simple_v1") { + suppressWarnings( + explanations <- + read_sf( + soilmap_simple_path, + "explanations" + ) %>% + split(.$subject) %>% + lapply(function(df) { + select(df, -.data$subject) + }) + ) - suppressWarnings( - explanations <- - read_sf(soilmap_simple_path, - "explanations") %>% - split(.$subject) %>% - lapply(function(df) { - select(df, -.data$subject) - }) - ) + soilmap_simple <- + soilmap_simple %>% + mutate( + bsm_mo_substr_explan = + namelist_factor(.data$bsm_mo_substr, + codelist = explanations[["bsm_mo_substr"]] + ), + bsm_mo_tex_explan = + namelist_factor(.data$bsm_mo_tex, + codelist = explanations[["bsm_mo_tex"]] + ), + bsm_mo_drain_explan = + namelist_factor(.data$bsm_mo_drain, + codelist = explanations[["bsm_mo_drain"]] + ), + bsm_mo_prof_explan = + namelist_factor(.data$bsm_mo_prof, + codelist = explanations[["bsm_mo_prof"]] + ), + bsm_mo_parentmat_explan = + namelist_factor(.data$bsm_mo_parentmat, + codelist = explanations[["bsm_mo_parentmat"]] + ), + bsm_mo_profvar_explan = + namelist_factor(.data$bsm_mo_profvar, + codelist = explanations[["bsm_mo_profvar"]] + ) + ) %>% + select( + .data$bsm_poly_id:.data$bsm_mo_soilunitype, + !!(c(5, 12) + rep(0:5, each = 2)) + ) + } - soilmap_simple <- - soilmap_simple %>% - mutate(bsm_mo_substr_explan = - namelist_factor(.data$bsm_mo_substr, - codelist = explanations[["bsm_mo_substr"]]), - bsm_mo_tex_explan = - namelist_factor(.data$bsm_mo_tex, - codelist = explanations[["bsm_mo_tex"]]), - bsm_mo_drain_explan = - namelist_factor(.data$bsm_mo_drain, - codelist = explanations[["bsm_mo_drain"]]), - bsm_mo_prof_explan = - namelist_factor(.data$bsm_mo_prof, - codelist = explanations[["bsm_mo_prof"]]), - bsm_mo_parentmat_explan = - namelist_factor(.data$bsm_mo_parentmat, - codelist = explanations[["bsm_mo_parentmat"]]), - bsm_mo_profvar_explan = - namelist_factor(.data$bsm_mo_profvar, - codelist = explanations[["bsm_mo_profvar"]]) - ) %>% - select(.data$bsm_poly_id:.data$bsm_mo_soilunitype, - !!(c(5, 12) + rep(0:5, each = 2))) + if (version_processed == "soilmap_simple_v1") { + soilmap_simple <- + select( + soilmap_simple, + -.data$bsm_ge_coastalplain + ) + suppressWarnings(st_crs(soilmap_simple) <- 31370) + } - } + return(soilmap_simple) + } else { + ####### 2. Reading soilmap #### + ###################################### - if (version_processed == "soilmap_simple_v1") { - soilmap_simple <- - select(soilmap_simple, - -.data$bsm_ge_coastalplain) - suppressWarnings(st_crs(soilmap_simple) <- 31370) - } + soilmap_path <- file_raw + assert_that(file.exists(soilmap_path)) - return(soilmap_simple) + suppressWarnings( + soilmap <- read_sf(soilmap_path, + crs = 31370 + ) + ) - } else { + soilmap <- + soilmap %>% + convertdf_enc(from = "latin1", to = "UTF-8") %>% + select( + bsm_poly_id = .data$gid, + bsm_map_id = .data$Kaartbldnr, + bsm_region = .data$Streek, + bsm_ge_region = .data$Streek_c, + bsm_legend = .data$Grove_leg, + bsm_legend_title = .data$Uitleg_tit, + bsm_legend_explan = .data$Uitleg, + bsm_soiltype_id = .data$codeid, + bsm_soiltype = .data$Bodemtype, + bsm_ge_typology = .data$Type_class, + bsm_soiltype_region = .data$Bodtypstr, + bsm_soilseries = .data$Bodemser_c, + bsm_soilseries_explan = .data$Bodemserie, + bsm_mo_soilunitype = .data$Unitype, + bsm_mo_substr = .data$Substr_V_c, + bsm_mo_substr_explan = .data$SubstraatV, + bsm_mo_tex = .data$Textuur_c, + bsm_mo_tex_explan = .data$Textuur, + bsm_mo_drain = .data$Drainage_c, + bsm_mo_drain_explan = .data$Drainage, + bsm_mo_prof = .data$Profontw_c, + bsm_mo_prof_explan = .data$Profontw, + bsm_mo_parentmat = .data$Varimoma_c, + bsm_mo_parentmat_explan = .data$Varimoma, + bsm_mo_profvar = .data$Variprof_c, + bsm_mo_profvar_explan = .data$Variprof, + bsm_mo_phase = .data$Fase_c, + bsm_ge_substr = .data$Substr_p_c, + bsm_ge_substr_explan = .data$Substr_pol, + bsm_ge_series = .data$Serie_c, + bsm_ge_series_explan = .data$Serie, + bsm_ge_subseries = .data$Subserie_c, + bsm_ge_subseries_explan = .data$Subserie, + bsm_map_url = .data$Scan_kbl, + bsm_book_url = .data$Scan_boek, + bsm_detailmap_url = .data$Scan_5000, + bsm_profloc_url = .data$Scan_stip + ) %>% + mutate(bsm_ge_typology = .data$bsm_ge_typology == "Zeepolders") %>% + mutate_at( + .vars = vars( + -.data$bsm_poly_id, + -.data$bsm_soiltype_id, + -.data$bsm_ge_typology, + -.data$bsm_soiltype_id, + -.data$geometry, + -matches(".+_mo_.+_explan") + ), + .funs = factor + ) - ####### 2. Reading soilmap #### - ###################################### + # setting factor levels of mo_*_explan variables in the same + # order as mo_* - soilmap_path <- file_raw - assert_that(file.exists(soilmap_path)) + keyvars <- c( + "bsm_mo_substr", + "bsm_mo_tex", + "bsm_mo_drain", + "bsm_mo_prof", + "bsm_mo_parentmat", + "bsm_mo_profvar" + ) + keys <- list() + soilmap_df <- + soilmap %>% + st_drop_geometry() + for (i in keyvars) { + temp_df <- + soilmap_df %>% + select(matches(str_c(i, ".*"))) %>% + select(1:2) %>% + filter_at(1, function(x) !is.na(x)) %>% + distinct() + keys[[i]] <- + setNames( + temp_df %>% pull(2), + temp_df %>% pull(1) + ) + } - suppressWarnings( - soilmap <- read_sf(soilmap_path, - crs = 31370) + soilmap <- + soilmap %>% + mutate( + bsm_mo_substr_explan = recode( + .data$bsm_mo_substr, + !!!keys[["bsm_mo_substr"]] + ), + bsm_mo_tex_explan = recode( + .data$bsm_mo_tex, + !!!keys[["bsm_mo_tex"]] + ), + bsm_mo_drain_explan = recode( + .data$bsm_mo_drain, + !!!keys[["bsm_mo_drain"]] + ), + bsm_mo_prof_explan = recode( + .data$bsm_mo_prof, + !!!keys[["bsm_mo_prof"]] + ), + bsm_mo_parentmat_explan = recode( + .data$bsm_mo_parentmat, + !!!keys[["bsm_mo_parentmat"]] + ), + bsm_mo_profvar_explan = recode( + .data$bsm_mo_profvar, + !!!keys[["bsm_mo_profvar"]] + ), ) - soilmap <- - soilmap %>% - convertdf_enc(from = "latin1", to = "UTF-8") %>% - select(bsm_poly_id = .data$gid, - bsm_map_id = .data$Kaartbldnr, - bsm_region = .data$Streek, - bsm_ge_region = .data$Streek_c, - bsm_legend = .data$Grove_leg, - bsm_legend_title = .data$Uitleg_tit, - bsm_legend_explan = .data$Uitleg, - bsm_soiltype_id = .data$codeid, - bsm_soiltype = .data$Bodemtype, - bsm_ge_typology = .data$Type_class, - bsm_soiltype_region = .data$Bodtypstr, - bsm_soilseries = .data$Bodemser_c, - bsm_soilseries_explan = .data$Bodemserie, - bsm_mo_soilunitype = .data$Unitype, - bsm_mo_substr = .data$Substr_V_c, - bsm_mo_substr_explan = .data$SubstraatV, - bsm_mo_tex = .data$Textuur_c, - bsm_mo_tex_explan = .data$Textuur, - bsm_mo_drain = .data$Drainage_c, - bsm_mo_drain_explan = .data$Drainage, - bsm_mo_prof = .data$Profontw_c, - bsm_mo_prof_explan = .data$Profontw, - bsm_mo_parentmat = .data$Varimoma_c, - bsm_mo_parentmat_explan = .data$Varimoma, - bsm_mo_profvar = .data$Variprof_c, - bsm_mo_profvar_explan = .data$Variprof, - bsm_mo_phase = .data$Fase_c, - bsm_ge_substr = .data$Substr_p_c, - bsm_ge_substr_explan = .data$Substr_pol, - bsm_ge_series = .data$Serie_c, - bsm_ge_series_explan = .data$Serie, - bsm_ge_subseries = .data$Subserie_c, - bsm_ge_subseries_explan = .data$Subserie, - bsm_map_url = .data$Scan_kbl, - bsm_book_url = .data$Scan_boek, - bsm_detailmap_url = .data$Scan_5000, - bsm_profloc_url = .data$Scan_stip - ) %>% - mutate(bsm_ge_typology = .data$bsm_ge_typology == "Zeepolders") %>% - mutate_at(.vars = vars(-.data$bsm_poly_id, - -.data$bsm_soiltype_id, - -.data$bsm_ge_typology, - -.data$bsm_soiltype_id, - -.data$geometry, - -matches(".+_mo_.+_explan")), - .funs = factor) + ####### STANDARDIZATION FOR COASTAL PLAIN FEATURES ################ - # setting factor levels of mo_*_explan variables in the same - # order as mo_* - - keyvars <- c("bsm_mo_substr", - "bsm_mo_tex", - "bsm_mo_drain", - "bsm_mo_prof", - "bsm_mo_parentmat", - "bsm_mo_profvar") - keys <- list() - soilmap_df <- - soilmap %>% - st_drop_geometry - for (i in keyvars) { - temp_df <- - soilmap_df %>% - select(matches(str_c(i, ".*"))) %>% - select(1:2) %>% - filter_at(1, function(x) !is.na(x)) %>% - distinct - keys[[i]] <- - setNames(temp_df %>% pull(2), - temp_df %>% pull(1)) - } + if (standardize_coastalplain) { + transl <- read_vc( + file = "soil_translation_coastalplain", + root = pkgdatasource_path( + "textdata/soil_translation_coastalplain", ".yml" + ) + ) %>% + mutate(soiltype_orig = factor(.data$soiltype_orig, + levels = + levels(soilmap$bsm_soiltype) + )) %>% + filter(!is.na(.data$texture_transl)) %>% + mutate( + tex_explan_transl = recode( + .data$texture_transl, + !!!keys[["bsm_mo_tex"]] + ), + drain_explan_transl = recode( + .data$drainage_transl, + !!!keys[["bsm_mo_drain"]] + ), + bsm_converted = TRUE + ) + stand_vars <- c( + "bsm_mo_substr", + "bsm_mo_tex", + "bsm_mo_drain" + ) soilmap <- - soilmap %>% - mutate( - bsm_mo_substr_explan = recode(.data$bsm_mo_substr, - !!!keys[["bsm_mo_substr"]]), - bsm_mo_tex_explan = recode(.data$bsm_mo_tex, - !!!keys[["bsm_mo_tex"]]), - bsm_mo_drain_explan = recode(.data$bsm_mo_drain, - !!!keys[["bsm_mo_drain"]]), - bsm_mo_prof_explan = recode(.data$bsm_mo_prof, - !!!keys[["bsm_mo_prof"]]), - bsm_mo_parentmat_explan = recode(.data$bsm_mo_parentmat, - !!!keys[["bsm_mo_parentmat"]]), - bsm_mo_profvar_explan = recode(.data$bsm_mo_profvar, - !!!keys[["bsm_mo_profvar"]]), + soilmap %>% + left_join(transl, by = c("bsm_soiltype" = "soiltype_orig")) %>% + mutate_at( + c(stand_vars, paste0(stand_vars, "_explan")), + as.character + ) %>% + mutate( + bsm_mo_substr = ifelse(is.na(.data$bsm_mo_substr) & + !is.na(.data$bsm_ge_substr), + as.character(.data$bsm_ge_substr), + .data$bsm_mo_substr + ) %>% + factor(levels = levels(soilmap$bsm_mo_substr)), + bsm_mo_substr_explan = ifelse(is.na(.data$bsm_mo_substr_explan) & + !is.na(.data$bsm_ge_substr_explan), + as.character(.data$bsm_ge_substr_explan), + .data$bsm_mo_substr_explan + ) %>% + factor(levels = levels(soilmap$bsm_mo_substr_explan)), + bsm_mo_tex = ifelse(is.na(.data$bsm_mo_tex) & + !is.na(.data$texture_transl), + .data$texture_transl, + .data$bsm_mo_tex + ) %>% + factor(levels = levels(soilmap$bsm_mo_tex)), + bsm_mo_tex_explan = ifelse(is.na(.data$bsm_mo_tex_explan) & + !is.na(.data$tex_explan_transl), + .data$tex_explan_transl, + .data$bsm_mo_tex_explan + ) %>% + factor(levels = levels(soilmap$bsm_mo_tex_explan)), + bsm_mo_drain = ifelse(is.na(.data$bsm_mo_drain) & + !is.na(.data$drainage_transl), + .data$drainage_transl, + .data$bsm_mo_drain + ) %>% + factor(levels = levels(soilmap$bsm_mo_drain)), + bsm_mo_drain_explan = ifelse(is.na(.data$bsm_mo_drain_explan) & + !is.na(.data$drain_explan_transl), + .data$drain_explan_transl, + .data$bsm_mo_drain_explan + ) %>% + factor(levels = levels(soilmap$bsm_mo_drain_explan)), + bsm_converted = ifelse(is.na(.data$bsm_converted), + FALSE, + .data$bsm_converted ) + ) %>% + select(-contains("transl")) %>% + select( + .data$bsm_poly_id:.data$bsm_soilseries_explan, + .data$bsm_converted, + everything() + ) + } - ####### STANDARDIZATION FOR COASTAL PLAIN FEATURES ################ - - if (standardize_coastalplain) { - transl <- read_vc(file = "soil_translation_coastalplain", - root = pkgdatasource_path( - "textdata/soil_translation_coastalplain", ".yml")) %>% - mutate(soiltype_orig = factor(.data$soiltype_orig, - levels = - levels(soilmap$bsm_soiltype)) - ) %>% - filter(!is.na(.data$texture_transl)) %>% - mutate(tex_explan_transl = recode(.data$texture_transl, - !!!keys[["bsm_mo_tex"]]), - drain_explan_transl = recode(.data$drainage_transl, - !!!keys[["bsm_mo_drain"]]), - bsm_converted = TRUE) - - stand_vars <- c("bsm_mo_substr", - "bsm_mo_tex", - "bsm_mo_drain") - soilmap <- - soilmap %>% - left_join(transl, by = c("bsm_soiltype" = "soiltype_orig")) %>% - mutate_at(c(stand_vars, paste0(stand_vars, "_explan")), - as.character) %>% - mutate(bsm_mo_substr = ifelse(is.na(.data$bsm_mo_substr) & - !is.na(.data$bsm_ge_substr), - as.character(.data$bsm_ge_substr), - .data$bsm_mo_substr) %>% - factor(levels = levels(soilmap$bsm_mo_substr)), - bsm_mo_substr_explan = ifelse(is.na(.data$bsm_mo_substr_explan) & - !is.na(.data$bsm_ge_substr_explan), - as.character(.data$bsm_ge_substr_explan), - .data$bsm_mo_substr_explan) %>% - factor(levels = levels(soilmap$bsm_mo_substr_explan)), - bsm_mo_tex = ifelse(is.na(.data$bsm_mo_tex) & - !is.na(.data$texture_transl), - .data$texture_transl, - .data$bsm_mo_tex) %>% - factor(levels = levels(soilmap$bsm_mo_tex)), - bsm_mo_tex_explan = ifelse(is.na(.data$bsm_mo_tex_explan) & - !is.na(.data$tex_explan_transl), - .data$tex_explan_transl, - .data$bsm_mo_tex_explan) %>% - factor(levels = levels(soilmap$bsm_mo_tex_explan)), - bsm_mo_drain = ifelse(is.na(.data$bsm_mo_drain) & - !is.na(.data$drainage_transl), - .data$drainage_transl, - .data$bsm_mo_drain) %>% - factor(levels = levels(soilmap$bsm_mo_drain)), - bsm_mo_drain_explan = ifelse(is.na(.data$bsm_mo_drain_explan) & - !is.na(.data$drain_explan_transl), - .data$drain_explan_transl, - .data$bsm_mo_drain_explan) %>% - factor(levels = levels(soilmap$bsm_mo_drain_explan)), - bsm_converted = ifelse(is.na(.data$bsm_converted), - FALSE, - .data$bsm_converted) - ) %>% - select(-contains("transl")) %>% - select(.data$bsm_poly_id:.data$bsm_soilseries_explan, - .data$bsm_converted, - everything()) - - } - - ########## SIMPLIFICATION ############ + ########## SIMPLIFICATION ############ - if (simplify) { - soilmap <- - soilmap %>% - {if (standardize_coastalplain) . else { - mutate(., - bsm_converted = NA) - }} %>% - select(.data$bsm_poly_id, - .data$bsm_region, - .data$bsm_converted, - .data$bsm_mo_soilunitype, - .data$bsm_mo_substr, - .data$bsm_mo_substr_explan, - .data$bsm_mo_tex, - .data$bsm_mo_tex_explan, - .data$bsm_mo_drain, - .data$bsm_mo_drain_explan, - .data$bsm_mo_prof, - .data$bsm_mo_prof_explan, - .data$bsm_mo_parentmat, - .data$bsm_mo_parentmat_explan, - .data$bsm_mo_profvar, - .data$bsm_mo_profvar_explan - ) %>% - {if (explan) . else select(., -matches("_explan"))} %>% - {if (standardize_coastalplain) . else { - select(., -.data$bsm_converted) - }} - } - - return(soilmap) - - } + if (simplify) { + soilmap <- + soilmap %>% + { + if (standardize_coastalplain) { + . + } else { + mutate(., + bsm_converted = NA + ) + } + } %>% + select( + .data$bsm_poly_id, + .data$bsm_region, + .data$bsm_converted, + .data$bsm_mo_soilunitype, + .data$bsm_mo_substr, + .data$bsm_mo_substr_explan, + .data$bsm_mo_tex, + .data$bsm_mo_tex_explan, + .data$bsm_mo_drain, + .data$bsm_mo_drain_explan, + .data$bsm_mo_prof, + .data$bsm_mo_prof_explan, + .data$bsm_mo_parentmat, + .data$bsm_mo_parentmat_explan, + .data$bsm_mo_profvar, + .data$bsm_mo_profvar_explan + ) %>% + { + if (explan) . else select(., -matches("_explan")) + } %>% + { + if (standardize_coastalplain) { + . + } else { + select(., -.data$bsm_converted) + } + } + } + return(soilmap) } + } diff --git a/R/read_textdata.R b/R/read_textdata.R index 4e420360..ace39557 100644 --- a/R/read_textdata.R +++ b/R/read_textdata.R @@ -54,26 +54,25 @@ #' is.string #' @importFrom dplyr %>% filter as_tibble read_namelist <- - function(path = pkgdatasource_path("textdata/namelist", ".yml"), - file = "namelist", - lang = "en") { - - assert_that(is.string(lang)) + function(path = pkgdatasource_path("textdata/namelist", ".yml"), + file = "namelist", + lang = "en") { + assert_that(is.string(lang)) - if (lang == "all") { - result <- - read_vc(file = file, root = path) - } else { - result <- - read_vc(file = file, root = path) %>% - filter(lang == !!lang) - } + if (lang == "all") { + result <- + read_vc(file = file, root = path) + } else { + result <- + read_vc(file = file, root = path) %>% + filter(lang == !!lang) + } - attr(result, "source") <- NULL + attr(result, "source") <- NULL - result %>% - as_tibble - } + result %>% + as_tibble() + } @@ -99,12 +98,12 @@ read_namelist <- #' @importFrom stringr str_c #' @keywords internal pkgdatasource_path <- - function(file, extension = "") { - system.file(str_c(file, extension), - package = "n2khab" - ) %>% - dirname - } + function(file, extension = "") { + system.file(str_c(file, extension), + package = "n2khab" + ) %>% + dirname() + } @@ -131,21 +130,22 @@ pkgdatasource_path <- #' @importFrom plyr mapvalues #' @keywords internal namelist_factor <- - function(x, pick = "name", codelist) { - - suppressWarnings( - mapped_levels <- - data.frame(code = levels(x)) %>% - left_join(codelist, - by = "code") %>% - select(.data$code, !!pick) - ) - - x %>% - mapvalues(from = mapped_levels$code, - to = mapped_levels[,2]) + function(x, pick = "name", codelist) { + suppressWarnings( + mapped_levels <- + data.frame(code = levels(x)) %>% + left_join(codelist, + by = "code" + ) %>% + select(.data$code, !!pick) + ) - } + x %>% + mapvalues( + from = mapped_levels$code, + to = mapped_levels[, 2] + ) + } @@ -267,115 +267,158 @@ namelist_factor <- #' @importFrom plyr mapvalues #' @importFrom rlang .data read_types <- - function(path = pkgdatasource_path("textdata/types", ".yml"), - file = "types", - file_namelist = "namelist", - lang = "en") { - - assert_that(is.string(lang)) - - langs <- - read_namelist(path = path, - file = file_namelist, - lang = "all") %>% - distinct(.data$lang) %>% - pull(lang) - - assert_that(any(lang %in% langs), - msg = "Your setting of lang is not supported.") - - namelist <- - read_namelist(path = path, - file = file_namelist, - lang = lang) %>% - select(.data$code, - .data$name, - .data$shortname) - - types_base <- - read_vc(file = file, root = path) - - suppressMessages(suppressWarnings({ - type_levels <- - tibble(codelevel = types_base$type %>% levels) %>% - left_join(namelist, - by = c("codelevel" = "code")) %>% - rename(namelevel = .data$name, - shortnamelevel = .data$shortname) - - typeclass_levels <- - tibble(codelevel = types_base$typeclass %>% levels) %>% - left_join(namelist %>% select(-.data$shortname), - by = c("codelevel" = "code")) %>% - rename(namelevel = .data$name) - - types_base %>% - left_join(namelist, by = c("type" = "code")) %>% - rename(type_name = .data$name, - type_shortname = .data$shortname) %>% - mutate(type = factor(.data$type, - levels = types_base$type %>% - levels), - type_name = - .data$type %>% - mapvalues(from = type_levels$codelevel, - to = type_levels$namelevel), - type_shortname = - .data$type %>% - mapvalues(from = type_levels$codelevel, - to = type_levels$shortnamelevel), - typeclass_name = - .data$typeclass %>% - mapvalues(from = typeclass_levels$codelevel, - to = typeclass_levels$namelevel), - hydr_class_name = - .data$hydr_class %>% - mapvalues(from = namelist$code, - to = namelist$name), - hydr_class_shortname = - .data$hydr_class %>% - mapvalues(from = namelist$code, - to = namelist$shortname), - groundw_dep_name = - .data$groundw_dep %>% - mapvalues(from = namelist$code, - to = namelist$name), - groundw_dep_shortname = - .data$groundw_dep %>% - mapvalues(from = namelist$code, - to = namelist$shortname), - flood_dep_name = - .data$flood_dep %>% - mapvalues(from = namelist$code, - to = namelist$name), - flood_dep_shortname = - .data$flood_dep %>% - mapvalues(from = namelist$code, - to = namelist$shortname) - ) %>% - left_join(namelist, - by = c("tag_1" = "code")) %>% - rename(tag_1_name = .data$name, - tag_1_shortname = .data$shortname) %>% - left_join(namelist, - by = c("tag_2" = "code")) %>% - rename(tag_2_name = .data$name, - tag_2_shortname = .data$shortname) %>% - left_join(namelist, - by = c("tag_3" = "code")) %>% - rename(tag_3_name = .data$name, - tag_3_shortname = .data$shortname) %>% - select(1:3, 11:12, - 4, 13, - 5, 14:15, - 6, 16:17, - 7, 18:19, - 8, 20:21, - 9, 22:23, - 10, 24:25) %>% - as_tibble - })) - } + function(path = pkgdatasource_path("textdata/types", ".yml"), + file = "types", + file_namelist = "namelist", + lang = "en") { + assert_that(is.string(lang)) + + langs <- + read_namelist( + path = path, + file = file_namelist, + lang = "all" + ) %>% + distinct(.data$lang) %>% + pull(lang) + + assert_that(any(lang %in% langs), + msg = "Your setting of lang is not supported." + ) + + namelist <- + read_namelist( + path = path, + file = file_namelist, + lang = lang + ) %>% + select( + .data$code, + .data$name, + .data$shortname + ) + + types_base <- + read_vc(file = file, root = path) + + suppressMessages(suppressWarnings({ + type_levels <- + tibble(codelevel = types_base$type %>% levels()) %>% + left_join(namelist, + by = c("codelevel" = "code") + ) %>% + rename( + namelevel = .data$name, + shortnamelevel = .data$shortname + ) + + typeclass_levels <- + tibble(codelevel = types_base$typeclass %>% levels()) %>% + left_join(namelist %>% select(-.data$shortname), + by = c("codelevel" = "code") + ) %>% + rename(namelevel = .data$name) + + types_base %>% + left_join(namelist, by = c("type" = "code")) %>% + rename( + type_name = .data$name, + type_shortname = .data$shortname + ) %>% + mutate( + type = factor(.data$type, + levels = types_base$type %>% + levels() + ), + type_name = + .data$type %>% + mapvalues( + from = type_levels$codelevel, + to = type_levels$namelevel + ), + type_shortname = + .data$type %>% + mapvalues( + from = type_levels$codelevel, + to = type_levels$shortnamelevel + ), + typeclass_name = + .data$typeclass %>% + mapvalues( + from = typeclass_levels$codelevel, + to = typeclass_levels$namelevel + ), + hydr_class_name = + .data$hydr_class %>% + mapvalues( + from = namelist$code, + to = namelist$name + ), + hydr_class_shortname = + .data$hydr_class %>% + mapvalues( + from = namelist$code, + to = namelist$shortname + ), + groundw_dep_name = + .data$groundw_dep %>% + mapvalues( + from = namelist$code, + to = namelist$name + ), + groundw_dep_shortname = + .data$groundw_dep %>% + mapvalues( + from = namelist$code, + to = namelist$shortname + ), + flood_dep_name = + .data$flood_dep %>% + mapvalues( + from = namelist$code, + to = namelist$name + ), + flood_dep_shortname = + .data$flood_dep %>% + mapvalues( + from = namelist$code, + to = namelist$shortname + ) + ) %>% + left_join(namelist, + by = c("tag_1" = "code") + ) %>% + rename( + tag_1_name = .data$name, + tag_1_shortname = .data$shortname + ) %>% + left_join(namelist, + by = c("tag_2" = "code") + ) %>% + rename( + tag_2_name = .data$name, + tag_2_shortname = .data$shortname + ) %>% + left_join(namelist, + by = c("tag_3" = "code") + ) %>% + rename( + tag_3_name = .data$name, + tag_3_shortname = .data$shortname + ) %>% + select( + 1:3, 11:12, + 4, 13, + 5, 14:15, + 6, 16:17, + 7, 18:19, + 8, 20:21, + 9, 22:23, + 10, 24:25 + ) %>% + as_tibble() + })) + } @@ -488,85 +531,102 @@ read_types <- #' pull #' @importFrom rlang .data read_env_pressures <- - function(path = pkgdatasource_path("textdata/env_pressures", ".yml"), - file = "env_pressures", - file_namelist = "namelist", - lang = "en") { - - assert_that(is.string(lang)) - - langs <- - read_namelist(path = path, - file = file_namelist, - lang = "all") %>% - distinct(.data$lang) %>% - pull(lang) - - assert_that(any(lang %in% langs), - msg = "Your setting of lang is not supported.") - - namelist <- - read_namelist(path = path, - file = file_namelist, - lang = lang) %>% - select(.data$code, - .data$name, - .data$shortname) - - env_pressures_base <- - read_vc(file = file, root = path) - - suppressWarnings( - env_pressures_base2 <- - read_vc(file = file, root = path) %>% - left_join(namelist, by = c("ep_code" = "code")) %>% - rename(ep_name = .data$name, - ep_abbrev = .data$shortname) %>% - mutate(ep_code = .data$ep_code %>% - factor(levels = env_pressures_base$ep_code %>% levels) - ) - ) + function(path = pkgdatasource_path("textdata/env_pressures", ".yml"), + file = "env_pressures", + file_namelist = "namelist", + lang = "en") { + assert_that(is.string(lang)) - ep_levels <- - env_pressures_base2 %>% - distinct(.data$ep_code, - .data$ep_name, - .data$ep_abbrev) %>% - arrange(.data$ep_code) - - ep_class_levels <- - tibble(codelevel = env_pressures_base$ep_class %>% levels) %>% - left_join(namelist %>% select(-.data$shortname), - by = c("codelevel" = "code")) %>% - rename(namelevel = .data$name) - - env_pressures_base2 %>% - mutate(ep_name = .data$ep_name %>% - factor(levels = ep_levels$ep_name), - ep_abbrev = .data$ep_abbrev %>% - factor(levels = ep_levels$ep_abbrev), - ep_class_name = - .data$ep_class %>% - mapvalues(from = ep_class_levels$codelevel, - to = ep_class_levels$namelevel) - ) %>% - left_join(namelist, - by = c("explanation" = "code")) %>% - select(-.data$explanation) %>% - rename(explanation = .data$name, - remarks = .data$shortname) %>% - mutate(ep_code = .data$ep_code %>% - factor(levels = env_pressures_base$ep_code %>% levels) - ) %>% - select(.data$ep_code, - .data$ep_abbrev, - .data$ep_name, - .data$ep_class, - .data$ep_class_name, - .data$explanation, - .data$remarks) %>% - as_tibble - } + langs <- + read_namelist( + path = path, + file = file_namelist, + lang = "all" + ) %>% + distinct(.data$lang) %>% + pull(lang) + + assert_that(any(lang %in% langs), + msg = "Your setting of lang is not supported." + ) + + namelist <- + read_namelist( + path = path, + file = file_namelist, + lang = lang + ) %>% + select( + .data$code, + .data$name, + .data$shortname + ) + + env_pressures_base <- + read_vc(file = file, root = path) + + suppressWarnings( + env_pressures_base2 <- + read_vc(file = file, root = path) %>% + left_join(namelist, by = c("ep_code" = "code")) %>% + rename( + ep_name = .data$name, + ep_abbrev = .data$shortname + ) %>% + mutate(ep_code = .data$ep_code %>% + factor(levels = env_pressures_base$ep_code %>% levels())) + ) + + ep_levels <- + env_pressures_base2 %>% + distinct( + .data$ep_code, + .data$ep_name, + .data$ep_abbrev + ) %>% + arrange(.data$ep_code) + + ep_class_levels <- + tibble(codelevel = env_pressures_base$ep_class %>% levels()) %>% + left_join(namelist %>% select(-.data$shortname), + by = c("codelevel" = "code") + ) %>% + rename(namelevel = .data$name) + + env_pressures_base2 %>% + mutate( + ep_name = .data$ep_name %>% + factor(levels = ep_levels$ep_name), + ep_abbrev = .data$ep_abbrev %>% + factor(levels = ep_levels$ep_abbrev), + ep_class_name = + .data$ep_class %>% + mapvalues( + from = ep_class_levels$codelevel, + to = ep_class_levels$namelevel + ) + ) %>% + left_join(namelist, + by = c("explanation" = "code") + ) %>% + select(-.data$explanation) %>% + rename( + explanation = .data$name, + remarks = .data$shortname + ) %>% + mutate(ep_code = .data$ep_code %>% + factor(levels = env_pressures_base$ep_code %>% levels())) %>% + select( + .data$ep_code, + .data$ep_abbrev, + .data$ep_name, + .data$ep_class, + .data$ep_class_name, + .data$explanation, + .data$remarks + ) %>% + as_tibble() + } @@ -679,79 +739,104 @@ read_env_pressures <- #' pull #' @importFrom rlang .data read_schemes <- - function(path = pkgdatasource_path("textdata/schemes", ".yml"), - file = "schemes", - file_namelist = "namelist", - lang = "en") { - - assert_that(is.string(lang)) - - langs <- - read_namelist(path = path, - file = file_namelist, - lang = "all") %>% - distinct(.data$lang) %>% - pull(lang) - - assert_that(any(lang %in% langs), - msg = "Your setting of lang is not supported.") - - namelist <- - read_namelist(path = path, - file = file_namelist, - lang = lang) %>% - select(.data$code, - .data$name, - .data$shortname) - - suppressWarnings( - read_vc(file = file, root = path) %>% - mutate( - scheme_name = namelist_factor(.data$scheme, - codelist = namelist), - scheme_shortname = namelist_factor(.data$scheme, - "shortname", - codelist = namelist), - programme_name = namelist_factor(.data$programme, - codelist = namelist), - attribute_1_name = namelist_factor(.data$attribute_1, - codelist = namelist), - attribute_1_shortname = namelist_factor(.data$attribute_1, - "shortname", - codelist = namelist), - attribute_2_name = namelist_factor(.data$attribute_2, - codelist = namelist), - attribute_2_shortname = namelist_factor(.data$attribute_2, - "shortname", - codelist = namelist), - attribute_3_name = namelist_factor(.data$attribute_3, - codelist = namelist), - attribute_3_shortname = namelist_factor(.data$attribute_3, - "shortname", - codelist = namelist) - ) %>% - left_join(namelist, - by = c("tag_1" = "code")) %>% - rename(tag_1_name = .data$name, - tag_1_shortname = .data$shortname) %>% - left_join(namelist, - by = c("tag_2" = "code")) %>% - rename(tag_2_name = .data$name, - tag_2_shortname = .data$shortname) %>% - left_join(namelist, - by = c("tag_3" = "code")) %>% - rename(tag_3_name = .data$name, - tag_3_shortname = .data$shortname) %>% - select(contains("scheme"), - contains("programme"), - contains("attribute"), - .data$spatial_restriction, - .data$notes, - contains("tag") - ) %>% - as_tibble) + function(path = pkgdatasource_path("textdata/schemes", ".yml"), + file = "schemes", + file_namelist = "namelist", + lang = "en") { + assert_that(is.string(lang)) - } + langs <- + read_namelist( + path = path, + file = file_namelist, + lang = "all" + ) %>% + distinct(.data$lang) %>% + pull(lang) + + assert_that(any(lang %in% langs), + msg = "Your setting of lang is not supported." + ) + + namelist <- + read_namelist( + path = path, + file = file_namelist, + lang = lang + ) %>% + select( + .data$code, + .data$name, + .data$shortname + ) + + suppressWarnings( + read_vc(file = file, root = path) %>% + mutate( + scheme_name = namelist_factor(.data$scheme, + codelist = namelist + ), + scheme_shortname = namelist_factor(.data$scheme, + "shortname", + codelist = namelist + ), + programme_name = namelist_factor(.data$programme, + codelist = namelist + ), + attribute_1_name = namelist_factor(.data$attribute_1, + codelist = namelist + ), + attribute_1_shortname = namelist_factor(.data$attribute_1, + "shortname", + codelist = namelist + ), + attribute_2_name = namelist_factor(.data$attribute_2, + codelist = namelist + ), + attribute_2_shortname = namelist_factor(.data$attribute_2, + "shortname", + codelist = namelist + ), + attribute_3_name = namelist_factor(.data$attribute_3, + codelist = namelist + ), + attribute_3_shortname = namelist_factor(.data$attribute_3, + "shortname", + codelist = namelist + ) + ) %>% + left_join(namelist, + by = c("tag_1" = "code") + ) %>% + rename( + tag_1_name = .data$name, + tag_1_shortname = .data$shortname + ) %>% + left_join(namelist, + by = c("tag_2" = "code") + ) %>% + rename( + tag_2_name = .data$name, + tag_2_shortname = .data$shortname + ) %>% + left_join(namelist, + by = c("tag_3" = "code") + ) %>% + rename( + tag_3_name = .data$name, + tag_3_shortname = .data$shortname + ) %>% + select( + contains("scheme"), + contains("programme"), + contains("attribute"), + .data$spatial_restriction, + .data$notes, + contains("tag") + ) %>% + as_tibble() + ) + } @@ -897,87 +982,101 @@ read_scheme_types <- function(path = pkgdatasource_path("textdata/scheme_types", file_namelist = "namelist", lang = "en", extended = FALSE) { - - assert_that(is.string(lang)) - - langs <- - read_namelist(path = path, - file = file_namelist, - lang = "all") %>% - distinct(.data$lang) %>% - pull(lang) - - assert_that(any(lang %in% langs), - msg = "Your setting of lang is not supported.") - - namelist <- - read_namelist(path = path, - file = file_namelist, - lang = lang) %>% - select(.data$code, - .data$name, - .data$shortname) - - scheme_types <- read_vc(file = file, root = path) - - if (extended) { - + assert_that(is.string(lang)) + + langs <- + read_namelist( + path = path, + file = file_namelist, + lang = "all" + ) %>% + distinct(.data$lang) %>% + pull(lang) + + assert_that(any(lang %in% langs), + msg = "Your setting of lang is not supported." + ) + + namelist <- + read_namelist( + path = path, + file = file_namelist, + lang = lang + ) %>% + select( + .data$code, + .data$name, + .data$shortname + ) + + scheme_types <- read_vc(file = file, root = path) + + if (extended) { schemes <- - read_schemes(path = path, - file = "schemes", - file_namelist = file_namelist, - lang = lang) %>% - gather(key = "key", - value = "value", - contains("tag")) %>% - mutate(key = str_c("scheme", .data$key)) %>% - spread(key = .data$key, value = .data$value) + read_schemes( + path = path, + file = "schemes", + file_namelist = file_namelist, + lang = lang + ) %>% + gather( + key = "key", + value = "value", + contains("tag") + ) %>% + mutate(key = str_c("scheme", .data$key)) %>% + spread(key = .data$key, value = .data$value) types <- - read_types(path = path, - file = "types", - file_namelist = file_namelist, - lang = lang) %>% - gather(key = "key", - value = "value", - contains("tag")) %>% - mutate(key = str_c("type", .data$key)) %>% - spread(key = .data$key, value = .data$value) + read_types( + path = path, + file = "types", + file_namelist = file_namelist, + lang = lang + ) %>% + gather( + key = "key", + value = "value", + contains("tag") + ) %>% + mutate(key = str_c("type", .data$key)) %>% + spread(key = .data$key, value = .data$value) scheme_types %>% - mutate(typegroup_name = namelist_factor(.data$typegroup, - codelist = namelist), - typegroup_shortname = namelist_factor(.data$typegroup, - "shortname", - codelist = namelist)) %>% - left_join(schemes, - by = "scheme") %>% - left_join(types, - by = "type") %>% - mutate(type = .data$type %>% - factor(levels = - read_vc(file = file, root = path) %>% - pull(.data$type) %>% - levels - )) %>% - as_tibble - - } else { - - scheme_types %>% - mutate(typegroup_name = namelist_factor(.data$typegroup, - codelist = namelist), - typegroup_shortname = namelist_factor(.data$typegroup, - "shortname", - codelist = namelist)) %>% - as_tibble - - } - + mutate( + typegroup_name = namelist_factor(.data$typegroup, + codelist = namelist + ), + typegroup_shortname = namelist_factor(.data$typegroup, + "shortname", + codelist = namelist + ) + ) %>% + left_join(schemes, + by = "scheme" + ) %>% + left_join(types, + by = "type" + ) %>% + mutate(type = .data$type %>% + factor( + levels = + read_vc(file = file, root = path) %>% + pull(.data$type) %>% + levels() + )) %>% + as_tibble() + } else { + scheme_types %>% + mutate( + typegroup_name = namelist_factor(.data$typegroup, + codelist = namelist + ), + typegroup_shortname = namelist_factor(.data$typegroup, + "shortname", + codelist = namelist + ) + ) %>% + as_tibble() + } } - - - - - - diff --git a/R/read_watercourses.R b/R/read_watercourses.R index cdb8f1d5..0ffa5f7b 100644 --- a/R/read_watercourses.R +++ b/R/read_watercourses.R @@ -100,29 +100,38 @@ #' @importFrom sf #' read_sf read_watercourse_100mseg <- - function(file = file.path(fileman_up("n2khab_data"), - "20_processed/watercourse_100mseg/watercourse_100mseg.gpkg"), - element = NULL, - version = "watercourse_100mseg_20200807v1"){ + function(file = file.path( + fileman_up("n2khab_data"), + "20_processed/watercourse_100mseg/watercourse_100mseg.gpkg" + ), + element = NULL, + version = "watercourse_100mseg_20200807v1") { + assert_that(is.string(version)) - assert_that(is.string(version)) - - if (!missing(element)) { - assert_that(is.string(element), - element %in% c("lines", "points")) - res <- - switch(element, - "lines" = read_sf(file, - layer = "watercourse_100mseg_lines"), - "points" = read_sf(file, - layer = "watercourse_100mseg_points") - ) - } else { - res <- - list("lines" = read_sf(file, - layer = "watercourse_100mseg_lines"), - "points" = read_sf(file, - layer = "watercourse_100mseg_points")) - } - return(res) + if (!missing(element)) { + assert_that( + is.string(element), + element %in% c("lines", "points") + ) + res <- + switch(element, + "lines" = read_sf(file, + layer = "watercourse_100mseg_lines" + ), + "points" = read_sf(file, + layer = "watercourse_100mseg_points" + ) + ) + } else { + res <- + list( + "lines" = read_sf(file, + layer = "watercourse_100mseg_lines" + ), + "points" = read_sf(file, + layer = "watercourse_100mseg_points" + ) + ) } + return(res) + } diff --git a/R/textdata.R b/R/textdata.R index 31d19bf3..fab55132 100644 --- a/R/textdata.R +++ b/R/textdata.R @@ -431,12 +431,3 @@ NULL #' #' @name scheme_types NULL - - - - - - - - - diff --git a/README.md b/README.md index 1f3c0735..6f174073 100644 --- a/README.md +++ b/README.md @@ -57,24 +57,14 @@ There is a major distinction between: - **raw data** ([Zenodo-link](https://zenodo.org/communities/n2khab-data-raw)), to be stored in a folder `n2khab_data/10_raw`; - **processed data** ([Zenodo-link](https://zenodo.org/communities/n2khab-data-processed)), to be stored in a folder `n2khab_data/20_processed`. -### Suppressing `rgdal` warnings about proj4string degradation +### Note: don't use proj4strings to define coordinate reference systems -Setting coordinate reference systems (CRS) of geospatial R objects is taken care of by the package, in a way that is compatible with older and current versions of PROJ and GDAL backend libraries. -This is done by gratefully implementing such features from the `sf` and `sp` packages. -The functions never specify a CRS by means of a proj4string, which is an aged format not supported by the current backend versions. +If you use `RasterLayer` or `RasterBrick` objects returned by **n2khab** functions, please make sure to not use the proj4string to represent its coordinate reference system (CRS), even while printing those objects will show a degraded (!) proj4string. +The proj4string is an aged format which has lost most of its ability to represent a geodetic datum. -Please note that nonetheless you will see warnings about degraded proj4strings when using certain `n2khab` functions or when converting resulting `sf` objects to `sp` objects. -This is normal! -It is the current default behaviour of `rgdal` to yield these warnings. -However in the case of `n2khab` functions and resulting objects these warnings are trivial and unnecessary. -You can suppress them in your R scripts by inserting below statement _before_ loading geospatial packages: - -```r -options(rgdal_show_exportToProj4_warnings = "none") -``` +The proper CRS representation is the WKT string, which is effectively returned by `raster::wkt()` and by `raster::crs()`. See [this](https://inbo.github.io/tutorials/tutorials/spatial_crs_coding/) tutorial if you would like to learn more about this. -In short: _don't_ use proj4strings to define CRSs! ## You are welcome to contribute! diff --git a/inst/CITATION b/inst/CITATION index d8232b0c..0d8bd950 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -4,10 +4,10 @@ citEntry( entry = "Manual", title = "R package n2khab: providing preprocessed reference data for Flemish Natura 2000 habitat analyses", author = "Floris Vanderhaeghe and Toon Westra and Cécile Herr and Hans Van Calster", - year = "2021", + year = "2023", url = "https://inbo.github.io/n2khab", - textVersion = paste("Vanderhaeghe F, Westra T, Herr C, Van Calster H (2021).", + textVersion = paste("Vanderhaeghe F, Westra T, Herr C, Van Calster H (2023).", "R package n2khab: providing preprocessed reference data for Flemish Natura 2000 habitat analyses.", "https://inbo.github.io/n2khab." ) diff --git a/man/convert_base4frac_to_dec.Rd b/man/convert_base4frac_to_dec.Rd index 587444e0..1c34528f 100644 --- a/man/convert_base4frac_to_dec.Rd +++ b/man/convert_base4frac_to_dec.Rd @@ -55,9 +55,11 @@ convert_base4frac_to_dec(c(NA, 0.1010101010101), level = 0) # vector, level 5: convert_base4frac_to_dec(c(NA, 0.1010101010101), level = 5) # same vector, all sensible levels computed: -sapply(0:12, function(i) convert_base4frac_to_dec(c(NA, 0.1010101010101), - level = i) - ) +sapply(0:12, function(i) { + convert_base4frac_to_dec(c(NA, 0.1010101010101), + level = i + ) +}) options(oldoption) } diff --git a/man/convertdf_enc.Rd b/man/convertdf_enc.Rd index bded3795..4a6b7440 100644 --- a/man/convertdf_enc.Rd +++ b/man/convertdf_enc.Rd @@ -10,9 +10,9 @@ convertdf_enc(x, from = "", to = "UTF-8", sub = NA, colnames = FALSE) \item{x}{An object with the \code{data.frame} class (such as \code{data.frame} or \code{sf})} -\item{from}{A character string describing the current encoding.} +\item{from}{a character string describing the current encoding.} -\item{to}{A character string describing the target encoding.} +\item{to}{a character string describing the target encoding.} \item{sub}{character string. If not \code{NA} it is used to replace any non-convertible bytes in the input. (This would normally be a diff --git a/man/download_zenodo.Rd b/man/download_zenodo.Rd index a0c3d2d6..b39cd00f 100644 --- a/man/download_zenodo.Rd +++ b/man/download_zenodo.Rd @@ -4,7 +4,7 @@ \alias{download_zenodo} \title{Get data from a Zenodo archive} \usage{ -download_zenodo(doi, path = ".", parallel = FALSE, quiet = FALSE) +download_zenodo(doi, path = ".", parallel = TRUE, quiet = FALSE) } \arguments{ \item{doi}{a doi pointer to the Zenodo archive starting with '10.5281/zenodo.'. See examples.} @@ -12,11 +12,9 @@ download_zenodo(doi, path = ".", parallel = FALSE, quiet = FALSE) \item{path}{Path where the data must be downloaded. Defaults to the working directory.} -\item{parallel}{Logical (\code{FALSE} by default). -If \code{TRUE}, will run a number of parallel processes, each downloading -another file. -This is useful when multiple large files are present in the Zenodo -record, which otherwise would be downloaded sequentially. +\item{parallel}{Logical. +If \code{TRUE} (the default), files will be +downloaded concurrently for multi-file records. Of course, the operation is limited by bandwidth and traffic limitations.} \item{quiet}{Logical (\code{FALSE} by default). diff --git a/man/expand_types.Rd b/man/expand_types.Rd index 8c47c062..a259bbca 100644 --- a/man/expand_types.Rd +++ b/man/expand_types.Rd @@ -78,30 +78,30 @@ main type but to an non-defined subtype with no specific code). \examples{ library(dplyr) x <- - read_scheme_types() \%>\% - filter(scheme == "GW_05.1_terr") + read_scheme_types() \%>\% + filter(scheme == "GW_05.1_terr") expand_types(x) expand_types(x, strict = FALSE) x <- - read_scheme_types() \%>\% - filter(scheme == "GW_05.1_terr") \%>\% - group_by(typegroup) + read_scheme_types() \%>\% + filter(scheme == "GW_05.1_terr") \%>\% + group_by(typegroup) expand_types(x) expand_types(x, use_grouping = FALSE) # equals above example x <- - tribble( - ~mycode, ~obs, - "2130", 5, - "2190", 45, - "2330_bu", 8, - "2330_dw", 8, - "5130_hei", 7, - "6410_mo", 78, - "6410_ve", 4, - "91E0_vn", 10 - ) + tribble( + ~mycode, ~obs, + "2130", 5, + "2190", 45, + "2330_bu", 8, + "2330_dw", 8, + "5130_hei", 7, + "6410_mo", 78, + "6410_ve", 4, + "91E0_vn", 10 + ) expand_types(x, type_var = "mycode") expand_types(x, type_var = "mycode", strict = FALSE) diff --git a/man/n2khab-package.Rd b/man/n2khab-package.Rd index a152e7de..60478cbb 100644 --- a/man/n2khab-package.Rd +++ b/man/n2khab-package.Rd @@ -44,6 +44,7 @@ Other contributors: \item Luc Denys \email{luc.denys@inbo.be} (\href{https://orcid.org/0000-0002-1841-6579}{ORCID}) [contributor] \item An Leyssen \email{an.leyssen@inbo.be} (\href{https://orcid.org/0000-0003-3537-286X}{ORCID}) [contributor] \item Patrik Oosterlynck \email{patrik.oosterlynck@inbo.be} (\href{https://orcid.org/0000-0002-5712-0770}{ORCID}) [contributor] + \item Jeroen Vanden Borre \email{jeroen.vandenborre@inbo.be} (\href{https://orcid.org/0000-0002-0153-7243}{ORCID}) [contributor] \item Nathalie Cools \email{nathalie.cools@inbo.be} (\href{https://orcid.org/0000-0002-7059-2318}{ORCID}) [contributor] \item Bruno De Vos \email{bruno.devos@inbo.be} (\href{https://orcid.org/0000-0001-9523-3453}{ORCID}) [contributor] \item Suzanna Lettens \email{suzanna.lettens@inbo.be} (\href{https://orcid.org/0000-0001-5032-495X}{ORCID}) [contributor] diff --git a/man/read_habitatquarries.Rd b/man/read_habitatquarries.Rd index d74d383b..33a6d96b 100644 --- a/man/read_habitatquarries.Rd +++ b/man/read_habitatquarries.Rd @@ -97,6 +97,7 @@ hq2 <- read_habitatquarries(filter_hab = TRUE) hq2 hq3 <- read_habitatquarries(references = TRUE) hq3 -read_habitatquarries(bibtex = TRUE)} +read_habitatquarries(bibtex = TRUE) +} } diff --git a/man/read_habitatstreams.Rd b/man/read_habitatstreams.Rd index ce2d13b2..1357ce74 100644 --- a/man/read_habitatstreams.Rd +++ b/man/read_habitatstreams.Rd @@ -55,8 +55,10 @@ hs <- read_habitatstreams() hs hs2 <- read_habitatstreams(source_text = TRUE) hs2 -all.equal(hs \%>\% st_drop_geometry, - hs2$lines \%>\% st_drop_geometry) +all.equal( + hs \%>\% st_drop_geometry(), + hs2$lines \%>\% st_drop_geometry() +) } } diff --git a/man/read_soilmap.Rd b/man/read_soilmap.Rd index bdfb2060..48474c13 100644 --- a/man/read_soilmap.Rd +++ b/man/read_soilmap.Rd @@ -256,10 +256,10 @@ soilmap_simple <- read_soilmap() soilmap_simple soilmap_simple \%>\% filter(!is.na(bsm_mo_substr)) \%>\% - glimpse + glimpse() soilmap_simple \%>\% filter(bsm_converted) \%>\% - glimpse + glimpse() } } diff --git a/man/read_watersurfaces.Rd b/man/read_watersurfaces.Rd index 93e7359d..10065bc8 100644 --- a/man/read_watersurfaces.Rd +++ b/man/read_watersurfaces.Rd @@ -7,7 +7,8 @@ read_watersurfaces( file = NULL, extended = FALSE, - version = c("watersurfaces_v1.1", "watersurfaces_v1.0") + fix_geom = FALSE, + version = c("watersurfaces_v1.2", "watersurfaces_v1.1", "watersurfaces_v1.0") ) } \arguments{ @@ -27,6 +28,15 @@ if \code{TRUE}, the variables \code{wfd_type_name} and \code{connectivity_name} are added. Defaults to \code{FALSE}.} +\item{fix_geom}{Logical. +Should invalid or corrupt geometries be fixed in the resulting \code{sf} +object in order to make them valid? +This prevents potential problems in geospatial operations, but beware that +fixed geometries are different from the original ones. +\code{\link[sf:st_make_valid]{sf::st_make_valid()}} is used to fix +geometries (with GEOS as backend). +Defaults to \code{FALSE}.} + \item{version}{Version ID of the data source. Defaults to the latest available version defined by the package.} } @@ -49,11 +59,13 @@ variables (not mentioning extra 'name' variables for Is there high confidence about the \code{wfd_type} determination? \item \code{depth_class}: class of water depth; \item \code{connectivity}: connectivity class; - \item \code{usage}: usage class. + \item \code{usage}: usage class; + \item \code{water_level_management}: water-level management class (not in + older versions). } } \description{ -Returns the raw data source \code{watersurfaces} (Leyssen et al., 2020) +Returns the raw data source \code{watersurfaces} (Scheers et al., 2022) as a standardized \code{sf} polygon layer (tidyverse-styled, internationalized) in the Belgian Lambert 72 CRS (EPSG-code \href{https://epsg.io/31370}{31370}). @@ -66,9 +78,9 @@ If you want to use another file or file structure than the default data storage, you can specify your own \code{file}. In both cases: always make sure to specify the correct \code{version}, that is the version corresponding to the \code{file} (note that the \code{version} -defaults to the latest version, that is \code{watersurfaces_v1.1}). +defaults to the latest version). -See Leyssen et al. (2020) for an elaborate explanation of the data source +See Scheers et al. (2022) for an elaborate explanation of the data source and its attributes. } \examples{ @@ -83,6 +95,12 @@ ws <- read_watersurfaces() ws summary(ws) +ws_valid <- read_watersurfaces(fix_geom = TRUE) +ws_valid + +all(sf::st_is_valid(ws)) +all(sf::st_is_valid(ws_valid)) + ws2 <- read_watersurfaces(extended = TRUE) ws2 } @@ -94,12 +112,10 @@ ws2 wateren in Vlaanderen. Rapporten van het Instituut voor Natuur- en Bosonderzoek INBO.R.2009.34. Instituut voor Natuur- en Bosonderzoek, Brussel. -\item Leyssen A., Scheers K., Smeekens V., Wils C., Packet J., De Knijf G. & -Denys L. (2020). -Watervlakken versie 1.1: polygonenkaart van stilstaand water in Vlaanderen. -Uitgave 2020. Rapporten van het Instituut voor Natuur- en Bosonderzoek 2020 -(40). Instituut voor Natuur en Bosonderzoek, Brussel. -\doi{10.21436/inbor.19088385}. +\item Scheers K., Smeekens V., Wils C., Packet J., Leyssen A. & Denys L. +(2022). Watervlakken versie 1.2: Polygonenkaart van stilstaand water in +Vlaanderen. Uitgave 2022. Instituut voor Natuur- en Bosonderzoek. +\doi{10.21436/inbor.87014272}. } } \seealso{ diff --git a/misc/generate_textdata/index.Rmd b/misc/generate_textdata/index.Rmd index 61a2525b..bcacc9b3 100644 --- a/misc/generate_textdata/index.Rmd +++ b/misc/generate_textdata/index.Rmd @@ -39,6 +39,15 @@ output: ```{r setup, include=FALSE} renv::restore() options(stringsAsFactors = FALSE) +# Setup for googlesheets4 authentication. Set the appropriate env vars in +# .Renviron and make sure you ran gs4_auth() interactively with these settings +# for the first run (or to renew an expired Oauth token) +if (Sys.getenv("GARGLE_OAUTH_EMAIL") != "") { + options(gargle_oauth_email = Sys.getenv("GARGLE_OAUTH_EMAIL")) +} +if (Sys.getenv("GARGLE_OAUTH_CACHE") != "") { + options(gargle_oauth_cache = Sys.getenv("GARGLE_OAUTH_CACHE")) +} library(tidyverse) library(stringr) library(knitr) diff --git a/misc/generate_textdata/renv.lock b/misc/generate_textdata/renv.lock index ca9bd4de..052d04cf 100644 --- a/misc/generate_textdata/renv.lock +++ b/misc/generate_textdata/renv.lock @@ -166,10 +166,10 @@ }, "cli": { "Package": "cli", - "Version": "3.1.1", + "Version": "3.6.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "da5160e769a652e3ec7111d63883f9bc" + "Hash": "89e6d8219950eac806ae0c489052048a" }, "clipr": { "Package": "clipr", @@ -215,10 +215,10 @@ }, "curl": { "Package": "curl", - "Version": "4.3.2", + "Version": "5.1.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "022c42d49c28e95d69ca60446dbabf88" + "Hash": "9123f3ef96a2c1a93927d828b2fe7d4c" }, "data.table": { "Package": "data.table", @@ -327,10 +327,10 @@ }, "gargle": { "Package": "gargle", - "Version": "1.2.0", + "Version": "1.5.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "9d234e6a87a6f8181792de6dc4a00e39" + "Hash": "fc0b272e5847c58cd5da9b20eedbd026" }, "generics": { "Package": "generics", @@ -390,17 +390,17 @@ }, "googledrive": { "Package": "googledrive", - "Version": "2.0.0", + "Version": "2.1.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "c3a25adbbfbb03f12e6f88c5fb1f3024" + "Hash": "e99641edef03e2a5e87f0a0b1fcc97f4" }, "googlesheets4": { "Package": "googlesheets4", - "Version": "1.0.0", + "Version": "1.1.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "9a6564184dc4a81daea4f1d7ce357c6a" + "Hash": "d6db1667059d027da730decdc214b959" }, "gtable": { "Package": "gtable", @@ -439,10 +439,10 @@ }, "httr": { "Package": "httr", - "Version": "1.4.2", + "Version": "1.4.7", "Source": "Repository", "Repository": "CRAN", - "Hash": "a525aba14184fec243f9eaec62fbed43" + "Hash": "ac107251d9d9fd72f0ca8049988f1d7f" }, "ids": { "Package": "ids", @@ -502,10 +502,10 @@ }, "lifecycle": { "Package": "lifecycle", - "Version": "1.0.1", + "Version": "1.0.4", "Source": "Repository", "Repository": "CRAN", - "Hash": "a6b6d352e3ed897373ab19d8395c98d0" + "Hash": "b8552d117e1b808b09a832f589b79035" }, "lubridate": { "Package": "lubridate", @@ -565,10 +565,10 @@ }, "openssl": { "Package": "openssl", - "Version": "1.4.6", + "Version": "2.1.1", "Source": "Repository", "Repository": "CRAN", - "Hash": "69fdf291af288f32fd4cd93315084ea8" + "Hash": "2a0dc8c6adfb6f032e4d4af82d258ab5" }, "pander": { "Package": "pander", @@ -579,10 +579,10 @@ }, "pillar": { "Package": "pillar", - "Version": "1.7.0", + "Version": "1.9.0", "Source": "Repository", "Repository": "CRAN", - "Hash": "51dfc97e1b7069e9f7e6f83f3589c22e" + "Hash": "15da5a8412f317beeee6175fbc76f4bb" }, "pkgbuild": { "Package": "pkgbuild", @@ -649,10 +649,10 @@ }, "purrr": { "Package": "purrr", - "Version": "0.3.4", + "Version": "1.0.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "97def703420c8ab10d8f0e6c72101e02" + "Hash": "1cba04a4e9414bdefc9dcaa99649a8dc" }, "rappdirs": { "Package": "rappdirs", @@ -719,10 +719,10 @@ }, "rlang": { "Package": "rlang", - "Version": "1.0.1", + "Version": "1.1.2", "Source": "Repository", "Repository": "CRAN", - "Hash": "3bf0219f19d9f5b3c682acbb3546a151" + "Hash": "50a6dbdc522936ca35afc5e2082ea91b" }, "rmarkdown": { "Package": "rmarkdown", @@ -887,10 +887,10 @@ }, "vctrs": { "Package": "vctrs", - "Version": "0.3.8", + "Version": "0.6.4", "Source": "Repository", "Repository": "CRAN", - "Hash": "ecf749a1b39ea72bd9b51b76292261f1" + "Hash": "266c1ca411266ba8f365fcc726444b87" }, "viridisLite": { "Package": "viridisLite", diff --git a/n2khab.Rproj b/n2khab.Rproj index 414ccc2a..6754fbd2 100644 --- a/n2khab.Rproj +++ b/n2khab.Rproj @@ -6,7 +6,7 @@ AlwaysSaveHistory: Default EnableCodeIndexing: Yes UseSpacesForTab: Yes -NumSpacesForTab: 4 +NumSpacesForTab: 2 Encoding: UTF-8 RnwWeave: knitr diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 00000000..dc0196a4 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,12 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview +# * https://testthat.r-lib.org/articles/special-files.html + +library(testthat) +library(n2khab) + +test_check("n2khab") diff --git a/tests/testthat/_snaps/zenodo.md b/tests/testthat/_snaps/zenodo.md new file mode 100644 index 00000000..df1de470 --- /dev/null +++ b/tests/testthat/_snaps/zenodo.md @@ -0,0 +1,54 @@ +# download_zenodo() works for a single-file record + + Code + download_zenodo(doi = "10.5281/zenodo.3784149", path = zenodo_dir) + Message + Will download 1 file (total size: 32.5 KiB) from https://doi.org/10.5281/zenodo.3784149 (Distribution of the Natura 2000 habitat type 7220 (Cratoneurion) in Flanders and Brussels Capital Region, Belgium (version 2020); version: habitatsprings_2020v2) + + + Verifying file integrity... + + habitatsprings.geojson was downloaded and its integrity verified (md5sum: 64c3db07d17274da047b3962aab28e80) + +# download_zenodo() works for a GitHub code record + + Code + download_zenodo(doi = "10.5281/zenodo.7335805", path = zenodo_dir) + Message + Will download 1 file (total size: 236.7 KiB) from https://doi.org/10.5281/zenodo.7335805 (R package n2khab: providing preprocessed reference data for Flemish Natura 2000 habitat analyses; version: 0.8.0) + + + Verifying file integrity... + + n2khab-v0.8.0.zip was downloaded and its integrity verified (md5sum: 25fb33360d257c085bce567da8f6a2cb) + +# download_zenodo() works for a multi-file record + + Code + download_zenodo(doi = "10.5281/zenodo.4420858", path = zenodo_dir) + Message + Will download 4 files (total size: 534.5 KiB) from https://doi.org/10.5281/zenodo.4420858 (Redistribution of the Natura 2000 habitat map of Flanders, partim habitat type 3260 (version 1.7); version: habitatstreams_v1.7) + + + Verifying file integrity... + + habitatstreams.dbf was downloaded and its integrity verified (md5sum: f66ddddacc9511133cc02d8c1960a917) + habitatstreams.shx was downloaded and its integrity verified (md5sum: e7725c8267ed671f3e5f09c5fcc68bff) + habitatstreams.shp was downloaded and its integrity verified (md5sum: 5c94b58c9dc7809c4eeeaf660aa3323c) + habitatstreams.prj was downloaded and its integrity verified (md5sum: f881f61a6c07741b58cb618d8bbb0b99) + +# download_zenodo() can work sequentially for a multi-file record + + Code + download_zenodo(doi = "10.5281/zenodo.4420858", path = zenodo_dir, parallel = FALSE) + Message + Will download 4 files (total size: 534.5 KiB) from https://doi.org/10.5281/zenodo.4420858 (Redistribution of the Natura 2000 habitat map of Flanders, partim habitat type 3260 (version 1.7); version: habitatstreams_v1.7) + + + Verifying file integrity... + + habitatstreams.dbf was downloaded and its integrity verified (md5sum: f66ddddacc9511133cc02d8c1960a917) + habitatstreams.shx was downloaded and its integrity verified (md5sum: e7725c8267ed671f3e5f09c5fcc68bff) + habitatstreams.shp was downloaded and its integrity verified (md5sum: 5c94b58c9dc7809c4eeeaf660aa3323c) + habitatstreams.prj was downloaded and its integrity verified (md5sum: f881f61a6c07741b58cb618d8bbb0b99) + diff --git a/tests/testthat/test-zenodo.R b/tests/testthat/test-zenodo.R new file mode 100644 index 00000000..63b86048 --- /dev/null +++ b/tests/testthat/test-zenodo.R @@ -0,0 +1,46 @@ +test_that("download_zenodo() works for a single-file record", { + skip_if_offline() + zenodo_dir <- tempfile() + withr::local_file(zenodo_dir) + dir.create(zenodo_dir) + expect_snapshot( + download_zenodo(doi = "10.5281/zenodo.3784149", path = zenodo_dir) + ) +}) + +test_that("download_zenodo() works for a GitHub code record", { + skip_if_offline() + zenodo_dir <- tempfile() + withr::local_file(zenodo_dir) + dir.create(zenodo_dir) + expect_snapshot( + download_zenodo(doi = "10.5281/zenodo.7335805", path = zenodo_dir) + ) +}) + +test_that("download_zenodo() works for a multi-file record", { + skip_if_offline() + zenodo_dir <- tempfile() + withr::local_file(zenodo_dir) + dir.create(zenodo_dir) + expect_snapshot( + download_zenodo( + doi = "10.5281/zenodo.4420858", + path = zenodo_dir + ) + ) +}) + +test_that("download_zenodo() can work sequentially for a multi-file record", { + skip_if_offline() + zenodo_dir <- tempfile() + withr::local_file(zenodo_dir) + dir.create(zenodo_dir) + expect_snapshot( + download_zenodo( + doi = "10.5281/zenodo.4420858", + path = zenodo_dir, + parallel = FALSE + ) + ) +}) diff --git a/vignettes/v022_example.Rmd b/vignettes/v022_example.Rmd index f31ef273..3621be29 100644 --- a/vignettes/v022_example.Rmd +++ b/vignettes/v022_example.Rmd @@ -37,8 +37,9 @@ file.copy(source, ".") Sys.setenv(R_REMOTES_NO_ERRORS_FROM_WARNINGS = "true") library(remotes) install_github("inbo/n2khab", - build_vignettes = TRUE, - upgrade = TRUE) + build_vignettes = TRUE, + upgrade = TRUE +) ``` ```{r eval=TRUE} @@ -72,8 +73,10 @@ It belongs to the collection of processed data ([Zenodo-link](https://zenodo.org ```{r} soilmap_simple_path <- file.path(n2khab_data_path, "20_processed/soilmap_simple") dir.create(soilmap_simple_path) -download_zenodo(doi = "10.5281/zenodo.3732903", - path = soilmap_simple_path) +download_zenodo( + doi = "10.5281/zenodo.3732903", + path = soilmap_simple_path +) ``` At some future time, the download will be performed automatically by `read_soilmap()` (if the `soilmap_simple` data source is missing). @@ -106,17 +109,17 @@ sm_simple #> CRS: 31370 #> # A tibble: 270,550 x 11 #> bsm_poly_id bsm_region bsm_converted bsm_mo_soilunit… bsm_mo_substr -#> * -#> 1 165740 Kunstmati… FALSE OB -#> 2 176046 Kunstmati… FALSE OB -#> 3 185239 Zandleems… FALSE Ldc -#> 4 162400 Kunstmati… FALSE OB -#> 5 173971 Kunstmati… FALSE OB -#> 6 173087 Zandleems… FALSE Ldp -#> 7 199453 Zandleems… FALSE Lep -#> 8 176922 Zandleems… FALSE Ldc -#> 9 227861 Zandleems… FALSE Abp(c) -#> 10 185390 Zandleems… FALSE Lca +#> * +#> 1 165740 Kunstmati… FALSE OB +#> 2 176046 Kunstmati… FALSE OB +#> 3 185239 Zandleems… FALSE Ldc +#> 4 162400 Kunstmati… FALSE OB +#> 5 173971 Kunstmati… FALSE OB +#> 6 173087 Zandleems… FALSE Ldp +#> 7 199453 Zandleems… FALSE Lep +#> 8 176922 Zandleems… FALSE Ldc +#> 9 227861 Zandleems… FALSE Abp(c) +#> 10 185390 Zandleems… FALSE Lca #> # … with 270,540 more rows, and 6 more variables: bsm_mo_tex , #> # bsm_mo_drain , bsm_mo_prof , bsm_mo_parentmat , #> # bsm_mo_profvar , geom @@ -168,24 +171,26 @@ glimpse(sm_simple) ```{r paged.print=FALSE} -tibble(drain_levels = levels(sm_simple$bsm_mo_drain), - drain_levels_explained = levels(sm_simple$bsm_mo_drain_explan)) +tibble( + drain_levels = levels(sm_simple$bsm_mo_drain), + drain_levels_explained = levels(sm_simple$bsm_mo_drain_explan) +) #> # A tibble: 15 x 2 -#> drain_levels drain_levels_explained -#> -#> 1 a zeer droog, niet gleyig -#> 2 a-b complex van zeer droog, niet gleyig tot droog, niet gleyig -#> 3 a-d complex van zeer droog, niet gleyig tot matig nat, matig gleyig -#> 4 b droog, niet gleyig -#> 5 c matig droog, zwak gleyig -#> 6 c-d complex van droog, zwak gleyig tot matig droog, matig gleyig -#> 7 d matig nat, matig gleyig -#> 8 e nat, sterk gleyig met reductiehorizont +#> drain_levels drain_levels_explained +#> +#> 1 a zeer droog, niet gleyig +#> 2 a-b complex van zeer droog, niet gleyig tot droog, niet gleyig +#> 3 a-d complex van zeer droog, niet gleyig tot matig nat, matig gleyig +#> 4 b droog, niet gleyig +#> 5 c matig droog, zwak gleyig +#> 6 c-d complex van droog, zwak gleyig tot matig droog, matig gleyig +#> 7 d matig nat, matig gleyig +#> 8 e nat, sterk gleyig met reductiehorizont #> 9 e-f complex van nat, matig gleyig tot zeer nat, zeer sterk gleyig m… #> 10 e-i complex nat, sterk gleyig met reductiehorizont tot zeer nat met… -#> 11 f zeer nat, zeer sterk gleyig met reductiehorizont -#> 12 g uiterst nat, gereduceerd -#> 13 h nat met relatief hoge ligging, sterk gleyig +#> 11 f zeer nat, zeer sterk gleyig met reductiehorizont +#> 12 g uiterst nat, gereduceerd +#> 13 h nat met relatief hoge ligging, sterk gleyig #> 14 h-i complex van nat met relatief hoge ligging, sterk gleyig tot zee… #> 15 i zeer nat met relatief hoge ligging, zeer sterk gleyig ``` @@ -196,28 +201,28 @@ How many polygons are available per region? ```{r} sm_simple %>% - st_drop_geometry %>% - count(bsm_region) + st_drop_geometry() %>% + count(bsm_region) ``` Wat is the average polygon area per region? ```{r} sm_simple %>% - mutate(area = st_area(.)) %>% - st_drop_geometry %>% - group_by(bsm_region) %>% - summarise(mean_area = mean(area)) + mutate(area = st_area(.)) %>% + st_drop_geometry() %>% + group_by(bsm_region) %>% + summarise(mean_area = mean(area)) ``` Plot polygons of the 'Zwin' region: ```{r} zwin_map <- - sm_simple %>% - filter(bsm_region == "Zwin") %>% - ggplot(aes(fill = bsm_mo_tex)) + - geom_sf() + sm_simple %>% + filter(bsm_region == "Zwin") %>% + ggplot(aes(fill = bsm_mo_tex)) + + geom_sf() ``` - with WGS84 graticule (though plotted in Belgian Lambert 72 = CRS 31370): @@ -236,11 +241,13 @@ zwin_map + coord_sf(datum = st_crs(31370)) ```{r} sm_simple %>% - filter(bsm_region == "Zwin") %>% - mutate(bsm_mo_tex = as.character(bsm_mo_tex)) %>% - mapview(zcol = "bsm_mo_tex", - alpha.region = 0.2, - map.types = c("OpenStreetMap", "OpenTopoMap")) + filter(bsm_region == "Zwin") %>% + mutate(bsm_mo_tex = as.character(bsm_mo_tex)) %>% + mapview( + zcol = "bsm_mo_tex", + alpha.region = 0.2, + map.types = c("OpenStreetMap", "OpenTopoMap") + ) ``` Clicking a feature on the above generated map reveals all attributes. @@ -261,9 +268,11 @@ The `soilmap` data source belongs to the raw data collection ([Zenodo-link](http ```{r} soilmap_path <- file.path(n2khab_data_path, "10_raw/soilmap") dir.create(soilmap_path) -download_zenodo(doi = "10.5281/zenodo.3387008", - path = soilmap_path, - parallel = TRUE) +download_zenodo( + doi = "10.5281/zenodo.3387008", + path = soilmap_path, + parallel = TRUE +) ``` At some time in future, the download will be performed automatically by `read_soilmap()` (if the `soilmap` data source is missing). @@ -282,12 +291,12 @@ sm #> CRS: EPSG:31370 #> # A tibble: 270,550 x 38 #> bsm_poly_id bsm_map_id bsm_region bsm_ge_region bsm_legend bsm_legend_title -#> * -#> 1 165740 61E Kunstmati… Antropoge… bodemserie OB -#> 2 176046 78W Kunstmati… Antropoge… bodemserie OB +#> * +#> 1 165740 61E Kunstmati… Antropoge… bodemserie OB +#> 2 176046 78W Kunstmati… Antropoge… bodemserie OB #> 3 185239 95W Zandleems… Vochtig z… bodemseries Lda… -#> 4 162400 75E Kunstmati… Antropoge… bodemserie OB -#> 5 173971 63W Kunstmati… Antropoge… bodemserie OB +#> 4 162400 75E Kunstmati… Antropoge… bodemserie OB +#> 5 173971 63W Kunstmati… Antropoge… bodemserie OB #> 6 173087 64W Zandleems… Vochtig z… bodemserie Ldp … #> 7 199453 98E Zandleems… Nat zandl… bodemserie Lep … #> 8 176922 81W Zandleems… Vochtig z… bodemseries Lda… @@ -316,8 +325,8 @@ Extract features that belong to the 'Middellandpolders' region: ```{r} sm_mp <- - sm %>% - filter(bsm_region == "Middellandpolders") + sm %>% + filter(bsm_region == "Middellandpolders") dim(sm_mp) #> [1] 3991 38 ``` @@ -326,24 +335,28 @@ dim(sm_mp) ```{r} sm_mp %>% - mutate(bsm_ge_series = as.character(bsm_ge_series)) %>% - mapview(zcol = "bsm_ge_series", - alpha.region = 0.2, - map.types = c("Wikimedia", "CartoDB.Positron"), - alpha = 0) + mutate(bsm_ge_series = as.character(bsm_ge_series)) %>% + mapview( + zcol = "bsm_ge_series", + alpha.region = 0.2, + map.types = c("Wikimedia", "CartoDB.Positron"), + alpha = 0 + ) ``` - calculate surface area (ha) and polygon count per `bsm_ge_series`: ```{r results='asis'} sm_mp %>% - mutate(area = st_area(.) %>% set_units("ha")) %>% - st_drop_geometry %>% - group_by(bsm_ge_series, bsm_ge_series_explan) %>% - summarise(area = sum(area) %>% round(2), - nr_polygons = n()) %>% - arrange(desc(area)) %>% - kable + mutate(area = st_area(.) %>% set_units("ha")) %>% + st_drop_geometry() %>% + group_by(bsm_ge_series, bsm_ge_series_explan) %>% + summarise( + area = sum(area) %>% round(2), + nr_polygons = n() + ) %>% + arrange(desc(area)) %>% + kable() ``` |bsm_ge_series |bsm_ge_series_explan | area| nr_polygons| @@ -361,14 +374,16 @@ sm_mp %>% ```{r results='asis'} sm_mp %>% - filter(bsm_ge_series == "D") %>% - mutate(area = st_area(.) %>% set_units("ha")) %>% - st_drop_geometry %>% - group_by(bsm_soilseries, bsm_soilseries_explan) %>% - summarise(area = sum(area) %>% round(2), - nr_polygons = n()) %>% - arrange(desc(area)) %>% - kable + filter(bsm_ge_series == "D") %>% + mutate(area = st_area(.) %>% set_units("ha")) %>% + st_drop_geometry() %>% + group_by(bsm_soilseries, bsm_soilseries_explan) %>% + summarise( + area = sum(area) %>% round(2), + nr_polygons = n() + ) %>% + arrange(desc(area)) %>% + kable() ``` |bsm_soilseries |bsm_soilseries_explan | area| nr_polygons| @@ -394,9 +409,11 @@ sm_mp %>% ```{r} sm_mp %>% - filter(bsm_soilseries == "m.D5") %>% - mapview(color = "red", - alpha = 1, - alpha.region = 0) + filter(bsm_soilseries == "m.D5") %>% + mapview( + color = "red", + alpha = 1, + alpha.region = 0 + ) ``` diff --git a/vignettes/v025_geospatial_hab.Rmd b/vignettes/v025_geospatial_hab.Rmd index 899495ac..34ddd2ba 100644 --- a/vignettes/v025_geospatial_hab.Rmd +++ b/vignettes/v025_geospatial_hab.Rmd @@ -102,18 +102,18 @@ habitatmap #> Bounding box: xmin: 21991.38 ymin: 153058.3 xmax: 258871.8 ymax: 244027.3 #> Projected CRS: Belge 1972 / Belgian Lambert 72 #> # A tibble: 646,589 x 31 -#> polygon_id eval eenh1 eenh2 eenh3 eenh4 eenh5 eenh6 eenh7 eenh8 v1 v2 +#> polygon_id eval eenh1 eenh2 eenh3 eenh4 eenh5 eenh6 eenh7 eenh8 v1 v2 #> * -#> 1 000098_v20… m b -#> 2 000132_v20… m bl -#> 3 000135_v20… m bl -#> 4 000136_v20… m bl -#> 5 000142_v20… m bl -#> 6 000150_v20… m bl -#> 7 000297_v20… m bl -#> 8 000991_v20… m bl -#> 9 000999_v20… m bl -#> 10 001000_v20… m bl +#> 1 000098_v20… m b +#> 2 000132_v20… m bl +#> 3 000135_v20… m bl +#> 4 000136_v20… m bl +#> 5 000142_v20… m bl +#> 6 000150_v20… m bl +#> 7 000297_v20… m bl +#> 8 000991_v20… m bl +#> 9 000999_v20… m bl +#> 10 001000_v20… m bl #> # … with 646,579 more rows, and 19 more variables: v3 , source , #> # info , bwk_label , hab1 , phab1 , hab2 , #> # phab2 , hab3 , phab3 , hab4 , phab4 , hab5 , @@ -158,11 +158,11 @@ habitatstreams With `read_habitatstreams(source_text = TRUE)` a second object `sources` is returned with the meaning of the `source_id` codes: ```{r} -read_habitatstreams(source_text = TRUE) %>% +read_habitatstreams(source_text = TRUE) %>% .$sources #> # A tibble: 7 x 2 -#> source_id source_text -#> +#> source_id source_text +#> #> 1 VMM "Gegevens afgeleid van macrofyteninventarisaties uitgevoer… #> 2 EcoInv "Tijdens ecologische inventarisatiestudies uitgevoerd in o… #> 3 extrapol "De conclusie van het nabijgelegen geïnventariseerde segme… @@ -210,18 +210,18 @@ habitatquarries #> Bounding box: xmin: 221427.3 ymin: 160393.5 xmax: 243211.1 ymax: 168965.1 #> Projected CRS: Belge 1972 / Belgian Lambert 72 #> # A tibble: 45 x 7 -#> polygon_id unit_id name code_orig type extra_reference -#> -#> 1 4 4 Avergat - … gh Lahaye 2018 -#> 2 6 6 Avergat - … 8310 8310 Lahaye 2018 -#> 3 5 5 Avergat - … gh Lahaye 2018 +#> polygon_id unit_id name code_orig type extra_reference +#> +#> 1 4 4 Avergat - … gh Lahaye 2018 +#> 2 6 6 Avergat - … 8310 8310 Lahaye 2018 +#> 3 5 5 Avergat - … gh Lahaye 2018 #> 4 20 20 Coolen 8310 8310 Limburgs Landschap 2020; pers… -#> 5 21 21 Coolen gh Limburgs Landschap 2020 -#> 6 29 29 Groeve Lin… 8310 8310 -#> 7 31 31 Grote berg… 8310 8310 De Haan & Lahaye 2018 -#> 8 37 37 Grote berg… 8310 8310 De Haan & Lahaye 2018 -#> 9 24 24 Henisdael … 8310 8310 Dusar et al. 2007 -#> 10 34 34 Henisdael … 8310 8310 Dusar et al. 2007 +#> 5 21 21 Coolen gh Limburgs Landschap 2020 +#> 6 29 29 Groeve Lin… 8310 8310 +#> 7 31 31 Grote berg… 8310 8310 De Haan & Lahaye 2018 +#> 8 37 37 Grote berg… 8310 8310 De Haan & Lahaye 2018 +#> 9 24 24 Henisdael … 8310 8310 Dusar et al. 2007 +#> 10 34 34 Henisdael … 8310 8310 Dusar et al. 2007 #> # … with 35 more rows, and 1 more variable: geom ``` @@ -252,14 +252,14 @@ habitatquarries2$extra_references #> # A tibble: 9 x 23 #> category bibtexkey address author booktitle journal month note number pages #> -#> 1 BOOK de_haan_… Brusse… De Ha… -#> 2 INCOLLE… dusar_me… Genk Dusar… Likona j… 6-13 -#> 3 BOOK jenneken… Riemst Jenne… -#> 4 INCOLLE… lahaye_g… Riemst Lahay… 12 -#> 5 BOOK verhoeve… Weert Verho… 1769 -#> 6 BOOK walschot… Walsc… -#> 7 MISC wikipedi… {Wiki… jan Page… -#> 8 MISC limburgs… {Limb… apr Libr… +#> 1 BOOK de_haan_… Brusse… De Ha… +#> 2 INCOLLE… dusar_me… Genk Dusar… Likona j… 6-13 +#> 3 BOOK jenneken… Riemst Jenne… +#> 4 INCOLLE… lahaye_g… Riemst Lahay… 12 +#> 5 BOOK verhoeve… Weert Verho… 1769 +#> 6 BOOK walschot… Walsc… +#> 7 MISC wikipedi… {Wiki… jan Page… +#> 8 MISC limburgs… {Limb… apr Libr… #> 9 ARTICLE silverta… Silve… Natuur… 12 334-… #> # … with 13 more variables: publisher , series , title , #> # volume , year , url , isbn , copyright , @@ -333,17 +333,16 @@ hms_occ Let's estimate the surface area per type, including uncertain occurrences of types and taking into account cover percentage per polygon (`phab`): ```{r} -hms_pol %>% - mutate(area = st_area(.)) %>% - st_drop_geometry %>% - inner_join(hms_occ, by = "polygon_id") %>% - # area of type within polygon: - mutate(area_type = area * phab / 100) %>% - group_by(type) %>% - summarise(area = sum(area_type) %>% - set_units("ha") %>% - round(2) - ) +hms_pol %>% + mutate(area = st_area(.)) %>% + st_drop_geometry() %>% + inner_join(hms_occ, by = "polygon_id") %>% + # area of type within polygon: + mutate(area_type = area * phab / 100) %>% + group_by(type) %>% + summarise(area = sum(area_type) %>% + set_units("ha") %>% + round(2)) #> # A tibble: 101 x 2 #> type area #> [ha] @@ -394,8 +393,8 @@ hmt$habitatmap_terr_polygons ```{r} hmt$habitatmap_terr_types #> # A tibble: 99,784 x 6 -#> polygon_id type certain code_orig phab source -#> +#> polygon_id type certain code_orig phab source +#> #> 1 000038_v2016 91E0_va TRUE 91E0_va 100 habitatmap_stdized #> 2 000043_v2016 9130_end TRUE 9130_end 100 habitatmap_stdized #> 3 000064_v2020 9130_end TRUE 9130_end 100 habitatmap_stdized @@ -416,29 +415,31 @@ This can be controlled by the `drop_7220` argument. As a consequence, some type codes are completely absent from `habitatmap_terr_types`: ```{r} -hms_occ %>% - distinct(type) %>% - anti_join(hmt$habitatmap_terr_types %>% - distinct(type), - by = "type") %>% +hms_occ %>% + distinct(type) %>% + anti_join( + hmt$habitatmap_terr_types %>% + distinct(type), + by = "type" + ) %>% arrange(type) #> # A tibble: 7 x 1 -#> type +#> type #> -#> 1 2190 -#> 2 6410 -#> 3 6430 -#> 4 6510 -#> 5 7140 -#> 6 7220 +#> 1 2190 +#> 2 6410 +#> 3 6430 +#> 4 6510 +#> 5 7140 +#> 6 7220 #> 7 9130 ``` About 3% of all type occurrences received a new type code: ```{r} -hmt$habitatmap_terr_types %>% - count(source) %>% +hmt$habitatmap_terr_types %>% + count(source) %>% mutate(pct = (n / sum(n) * 100) %>% round(0)) #> # A tibble: 2 x 3 #> source n pct @@ -463,18 +464,18 @@ wsh$watersurfaces_polygons #> Bounding box: xmin: 22546.57 ymin: 159273.1 xmax: 253896.9 ymax: 242960.1 #> Projected CRS: Belge 1972 / Belgian Lambert 72 #> # A tibble: 3,233 x 5 -#> polygon_id polygon_id_ws polygon_id_habitatm… description_orig -#> * -#> 1 ANTANT0082 ANTANT0082 596466_v2014 60% 3150; 20% rbbmr; 20% rbbsf -#> 2 ANTANT0234 ANTANT0234 633396_v2020 100% 3130_na -#> 3 ANTANT0251 ANTANT0251 113978_v2014 100% 3150 -#> 4 ANTANT0253 ANTANT0253 111606_v2014 100% 3150 +#> polygon_id polygon_id_ws polygon_id_habitatm… description_orig +#> * +#> 1 ANTANT0082 ANTANT0082 596466_v2014 60% 3150; 20% rbbmr; 20% rbbsf +#> 2 ANTANT0234 ANTANT0234 633396_v2020 100% 3130_na +#> 3 ANTANT0251 ANTANT0251 113978_v2014 100% 3150 +#> 4 ANTANT0253 ANTANT0253 111606_v2014 100% 3150 #> 5 ANTANT0297 ANTANT0297 409153_v2014+409153… 85% 3140; 15% 3150+85% 3140; 1… -#> 6 ANTANT0315 ANTANT0315 519082_v2018 100% 3140 -#> 7 ANTANT0319 ANTANT0319 601958_v2014 100% 3150,gh -#> 8 ANTANT0381 ANTANT0381 644003_v2014 85% gh; 15% 3140 +#> 6 ANTANT0315 ANTANT0315 519082_v2018 100% 3140 +#> 7 ANTANT0319 ANTANT0319 601958_v2014 100% 3150,gh +#> 8 ANTANT0381 ANTANT0381 644003_v2014 85% gh; 15% 3140 #> 9 ANTANT0383 ANTANT0383 631879_v2014+593522… 50% 3150; 40% rbbmr; 10% rbbsf… -#> 10 ANTANT0384 ANTANT0384 644003_v2014 85% gh; 15% 3140 +#> 10 ANTANT0384 ANTANT0384 644003_v2014 85% gh; 15% 3140 #> # … with 3,223 more rows, and 1 more variable: geom ``` @@ -482,39 +483,45 @@ wsh$watersurfaces_polygons wsh$watersurfaces_types #> # A tibble: 3,669 x 4 #> polygon_id type certain code_orig -#> -#> 1 ANTANT0082 3150 TRUE 3150 -#> 2 ANTANT0234 3130_na TRUE 3130_na -#> 3 ANTANT0251 3150 TRUE 3150 -#> 4 ANTANT0253 3150 TRUE 3150 -#> 5 ANTANT0297 3140 TRUE 3140 -#> 6 ANTANT0297 3150 TRUE 3150 -#> 7 ANTANT0315 3140 TRUE 3140 -#> 8 ANTANT0319 3150 FALSE 3150,gh -#> 9 ANTANT0381 3140 TRUE 3140 -#> 10 ANTANT0383 3150 TRUE 3150 +#> +#> 1 ANTANT0082 3150 TRUE 3150 +#> 2 ANTANT0234 3130_na TRUE 3130_na +#> 3 ANTANT0251 3150 TRUE 3150 +#> 4 ANTANT0253 3150 TRUE 3150 +#> 5 ANTANT0297 3140 TRUE 3140 +#> 6 ANTANT0297 3150 TRUE 3150 +#> 7 ANTANT0315 3140 TRUE 3140 +#> 8 ANTANT0319 3150 FALSE 3150,gh +#> 9 ANTANT0381 3140 TRUE 3140 +#> 10 ANTANT0383 3150 TRUE 3150 #> # … with 3,659 more rows ``` Let's compute some statistics of standing water types (ignoring the value `certain`): ```{r} -wsh$watersurfaces_polygons %>% - mutate(area = st_area(.)) %>% - st_drop_geometry %>% +wsh$watersurfaces_polygons %>% + mutate(area = st_area(.)) %>% + st_drop_geometry() %>% inner_join(wsh$watersurfaces_types, - by = "polygon_id") %>% - group_by(type) %>% - summarise(nr_watersurfaces = n_distinct(polygon_id), - total_area = sum(area), - area_min = min(area), - area_Q1 = quantile(area, 0.25), - area_Q2 = quantile(area, 0.5), - area_Q3 = quantile(area, 0.75), - max = max(area) - ) %>% - mutate_at(vars(matches("area|max")), - function(x) {set_units(x, "a") %>% round(1)}) + by = "polygon_id" + ) %>% + group_by(type) %>% + summarise( + nr_watersurfaces = n_distinct(polygon_id), + total_area = sum(area), + area_min = min(area), + area_Q1 = quantile(area, 0.25), + area_Q2 = quantile(area, 0.5), + area_Q3 = quantile(area, 0.75), + max = max(area) + ) %>% + mutate_at( + vars(matches("area|max")), + function(x) { + set_units(x, "a") %>% round(1) + } + ) #> # A tibble: 9 x 8 #> type nr_watersurfaces total_area area_min area_Q1 area_Q2 area_Q3 max #> [a] [a] [a] [a] [a] [a] @@ -535,10 +542,10 @@ Consequently we can make use of tools like `set_units()` to convert units (e.g. Because the main type code `3130` will mostly boil down to `3130_aom` in the field, a further interpreted flavour can be generated with `read_watersurfaces_hab(interpreted = TRUE)`. ```{r} -read_watersurfaces_hab(interpreted = TRUE) %>% - .$watersurfaces_types %>% - filter(type == "3130") %>% - nrow +read_watersurfaces_hab(interpreted = TRUE) %>% + .$watersurfaces_types %>% + filter(type == "3130") %>% + nrow() #> [1] 0 ``` @@ -557,24 +564,24 @@ An example (`df` is our dataframe): ```{r eval=TRUE} df <- - tribble( - ~mycode, ~obs, - "2130", 5, - "2190", 45, - "2330_bu", 8, - "2330_dw", 8, - "6410_mo", 78, - "6410_ve", 4, - "91E0_vn", 10 - ) + tribble( + ~mycode, ~obs, + "2130", 5, + "2190", 45, + "2330_bu", 8, + "2330_dw", 8, + "6410_mo", 78, + "6410_ve", 4, + "91E0_vn", 10 + ) df ``` With the `type_var` argument you specify which variable of your dataframe represents type codes: ```{r eval=TRUE} -df_exp <- - expand_types(df, type_var = "mycode") +df_exp <- + expand_types(df, type_var = "mycode") df_exp ``` @@ -583,18 +590,18 @@ More examples and features are explained in the documentation of `expand_types() Obviously, more rows of `habitatmap_terr` will be retained by joining `df_exp`: ```{r warning=FALSE} -hmt$habitatmap_terr_types %>% - semi_join(df_exp, by = c(type = "mycode")) %>% - nrow +hmt$habitatmap_terr_types %>% + semi_join(df_exp, by = c(type = "mycode")) %>% + nrow() #> [1] 6634 ``` When joining with `df`: ```{r warning=FALSE} -hmt$habitatmap_terr_types %>% - semi_join(df, by = c(type = "mycode")) %>% - nrow +hmt$habitatmap_terr_types %>% + semi_join(df, by = c(type = "mycode")) %>% + nrow() #> [1] 4984 ``` diff --git a/vignettes/v030_GRTSmh.Rmd b/vignettes/v030_GRTSmh.Rmd index f905d3dc..aa8382e9 100644 --- a/vignettes/v030_GRTSmh.Rmd +++ b/vignettes/v030_GRTSmh.Rmd @@ -69,13 +69,13 @@ See the `vignette("v020_datastorage")` for more information. ```{r eval=FALSE} read_GRTSmh() -#> class : RasterLayer +#> class : RasterLayer #> dimensions : 2843, 7401, 21041043 (nrow, ncol, ncell) #> resolution : 32, 32 (x, y) #> extent : 22029.59, 258861.6, 153054.1, 244030.1 (xmin, xmax, ymin, ymax) -#> crs : +proj=lcc +lat_1=51.16666723333333 +lat_2=49.8333339 +lat_0=90 +lon_0=4.367486666666666 +x_0=150000.013 +y_0=5400088.438 +ellps=intl +towgs84=-106.8686,52.2978,-103.7239,0.3366,-0.457,1.8422,-1.2747 +units=m +no_defs -#> source : [...]/n2khab_data/10_raw/GRTSmaster_habitats/GRTSmaster_habitats.tif -#> names : GRTSmaster_habitats +#> crs : +proj=lcc +lat_1=51.16666723333333 +lat_2=49.8333339 +lat_0=90 +lon_0=4.367486666666666 +x_0=150000.013 +y_0=5400088.438 +ellps=intl +towgs84=-106.8686,52.2978,-103.7239,0.3366,-0.457,1.8422,-1.2747 +units=m +no_defs +#> source : [...]/n2khab_data/10_raw/GRTSmaster_habitats/GRTSmaster_habitats.tif +#> names : GRTSmaster_habitats #> values : 1, 67108857 (min, max) ``` @@ -83,14 +83,14 @@ With the argument `brick = TRUE` however, you will get the `GRTSmh_brick` data s ```{r eval=FALSE} read_GRTSmh(brick = TRUE) -#> class : RasterBrick +#> class : RasterBrick #> dimensions : 2843, 7401, 21041043, 10 (nrow, ncol, ncell, nlayers) #> resolution : 32, 32 (x, y) #> extent : 22029.59, 258861.6, 153054.1, 244030.1 (xmin, xmax, ymin, ymax) -#> crs : +proj=lcc +lat_1=51.16666723333333 +lat_2=49.8333339 +lat_0=90 +lon_0=4.367486666666666 +x_0=150000.013 +y_0=5400088.438 +ellps=intl +towgs84=-106.8686,52.2978,-103.7239,0.3366,-0.457,1.8422,-1.2747 +units=m +no_defs -#> source : [...]/n2khab_data/20_processed/GRTSmh_brick/GRTSmh_brick.tif -#> names : level0, level1, level2, level3, level4, level5, level6, level7, level8, level9 -#> min values : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +#> crs : +proj=lcc +lat_1=51.16666723333333 +lat_2=49.8333339 +lat_0=90 +lon_0=4.367486666666666 +x_0=150000.013 +y_0=5400088.438 +ellps=intl +towgs84=-106.8686,52.2978,-103.7239,0.3366,-0.457,1.8422,-1.2747 +units=m +no_defs +#> source : [...]/n2khab_data/20_processed/GRTSmh_brick/GRTSmh_brick.tif +#> names : level0, level1, level2, level3, level4, level5, level6, level7, level8, level9 +#> min values : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 #> max values : 67108857, 16777209, 4194297, 1048569, 262137, 65529, 16377, 4089, 1017, 253 ``` @@ -105,8 +105,10 @@ This is done _at the corresponding spatial resolution_ of the GRTS algorithm, wh The resolutions of each level are the following (in meters): ```{r warning = FALSE, echo = FALSE} -data.frame(level = 1:9, - resolution = 32 * 2 ^ (1:9)) %>% +data.frame( + level = 1:9, + resolution = 32 * 2^(1:9) +) %>% kable(align = "r") ``` @@ -114,13 +116,13 @@ An example with level 5: ```{r eval=FALSE} read_GRTSmh_diffres(level = 5) -#> class : RasterLayer +#> class : RasterLayer #> dimensions : 89, 232, 20648 (nrow, ncol, ncell) #> resolution : 1024, 1024 (x, y) #> extent : 22030, 259598, 153054, 244190 (xmin, xmax, ymin, ymax) -#> crs : +proj=lcc +lat_1=51.16666723333333 +lat_2=49.8333339 +lat_0=90 +lon_0=4.367486666666666 +x_0=150000.013 +y_0=5400088.438 +ellps=intl +towgs84=-106.8686,52.2978,-103.7239,0.3366,-0.457,1.8422,-1.2747 +units=m +no_defs -#> source : [...]/n2khab_data/20_processed/GRTSmh_diffres/GRTSmh_diffres.5.tif -#> names : level5 +#> crs : +proj=lcc +lat_1=51.16666723333333 +lat_2=49.8333339 +lat_0=90 +lon_0=4.367486666666666 +x_0=150000.013 +y_0=5400088.438 +ellps=intl +towgs84=-106.8686,52.2978,-103.7239,0.3366,-0.457,1.8422,-1.2747 +units=m +no_defs +#> source : [...]/n2khab_data/20_processed/GRTSmh_diffres/GRTSmh_diffres.5.tif +#> names : level5 #> values : 1, 65529 (min, max) ``` @@ -157,13 +159,13 @@ Its use is just to return the base-4-fraction-converted `GRTSmaster_habitats` as ```{r eval=FALSE} options(scipen = 999, digits = 15) read_GRTSmh_base4frac() -#> class : RasterLayer +#> class : RasterLayer #> dimensions : 2843, 7401, 21041043 (nrow, ncol, ncell) #> resolution : 32, 32 (x, y) #> extent : 22029.591973471, 258861.591973471, 153054.113583292, 244030.113583292 (xmin, xmax, ymin, ymax) -#> crs : +proj=lcc +lat_1=51.16666723333333 +lat_2=49.8333339 +lat_0=90 +lon_0=4.367486666666666 +x_0=150000.013 +y_0=5400088.438 +ellps=intl +towgs84=-106.8686,52.2978,-103.7239,0.3366,-0.457,1.8422,-1.2747 +units=m +no_defs -#> source : [...]/n2khab_data/20_processed/GRTSmh_base4frac/GRTSmh_base4frac.tif -#> names : GRTSmh_base4frac +#> crs : +proj=lcc +lat_1=51.16666723333333 +lat_2=49.8333339 +lat_0=90 +lon_0=4.367486666666666 +x_0=150000.013 +y_0=5400088.438 +ellps=intl +towgs84=-106.8686,52.2978,-103.7239,0.3366,-0.457,1.8422,-1.2747 +units=m +no_defs +#> source : [...]/n2khab_data/20_processed/GRTSmh_base4frac/GRTSmh_base4frac.tif +#> names : GRTSmh_base4frac #> values : 0.0000000000001, 0.3333333333321 (min, max) ```