Skip to content

Commit 8016166

Browse files
authored
feat: Make the geoarrow_vctr more vctr-friendly (#62)
Before, errors occurred if you tried to do anything except a simple slice to a `geoarrow_vctr`. Rearranging a geoarrow_vctr is non-trivial and not implemented in nanoarrow or geoarrow-c, so we use arrow for this if available. The resulting structure seems to work reasonably well through most dplyr things, although it seems as though anything requiring `vec_ptype2` or `vec_cast` isn't quite there yet. ``` r library(geoarrow) library(tidyverse) xys <- wk::xy(runif(1e6), runif(1e6)) sfc <- sf::st_as_sfc(xys) vctr <- as_geoarrow_vctr(xys) chunked <- arrow::as_chunked_array(vctr) indices <- sample(1e6) bench::mark( xys[indices], vctr[indices], chunked[indices], sfc[indices], check = FALSE ) #> Warning: Some expressions had a GC in every iteration; so filtering is #> disabled. #> # A tibble: 4 × 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> #> 1 xys[indices] 4.12ms 4.85ms 53.6 15.3MB 22.2 #> 2 vctr[indices] 15.32ms 17.65ms 33.6 26.9MB 24.8 #> 3 chunked[indices] 7.98ms 10.05ms 57.7 22.9MB 31.8 #> 4 sfc[indices] 508.25ms 508.25ms 1.97 65MB 3.94 vctr_wkb <- as_geoarrow_vctr(vctr, schema = geoarrow_wkb()) bench::mark( c(vctr, vctr), c(vctr, vctr_wkb), c(sfc, sfc), c(xys, xys), check = FALSE ) #> Warning: Some expressions had a GC in every iteration; so filtering is #> disabled. #> # A tibble: 4 × 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> #> 1 c(vctr, vctr) 1.42ms 1.57ms 620. 120KB 2.00 #> 2 c(vctr, vctr_wkb) 12.4ms 13.24ms 72.5 42.6KB 0 #> 3 c(sfc, sfc) 299.24ms 350.95ms 2.85 76.5MB 7.12 #> 4 c(xys, xys) 5.01ms 6.7ms 73.3 30.5MB 37.6 nc <- sf::read_sf(system.file("shape/nc.shp", package = "sf")) df <- nc |> nanoarrow::as_nanoarrow_array_stream() |> as_tibble() |> select(1:3, "geometry") df #> # A tibble: 100 × 4 #> AREA PERIMETER CNTY_ geometry #> <dbl> <dbl> <dbl> <grrw_vct> #> 1 0.114 1.44 1825 <MULTIPOLYGON (((-81.4727554 36.2343559, -81.5408401 3… #> 2 0.061 1.23 1827 <MULTIPOLYGON (((-81.2398911 36.3653641, -81.2406921 3… #> 3 0.143 1.63 1828 <MULTIPOLYGON (((-80.4563446 36.2425575, -80.476387 36… #> 4 0.07 2.97 1831 <MULTIPOLYGON (((-76.0089722 36.3195953, -76.0173492 3… #> 5 0.153 2.21 1832 <MULTIPOLYGON (((-77.2176666 36.2409821, -77.2346115 3… #> 6 0.097 1.67 1833 <MULTIPOLYGON (((-76.7450638 36.2339172, -76.98069 36.… #> 7 0.062 1.55 1834 <MULTIPOLYGON (((-76.0089722 36.3195953, -75.9571838 3… #> 8 0.091 1.28 1835 <MULTIPOLYGON (((-76.5625076 36.3405685, -76.6042404 3… #> 9 0.118 1.42 1836 <MULTIPOLYGON (((-78.3087616 36.2600403, -78.2829285 3… #> 10 0.124 1.43 1837 <MULTIPOLYGON (((-80.0256729 36.2502327, -80.4530106 3… #> # ℹ 90 more rows df2 <- crossing(df, df |> select(geometry2 = geometry)) |> filter(geos::geos_touches(geometry, geometry2)) |> slice(1) df2 #> # A tibble: 1 × 5 #> AREA PERIMETER CNTY_ geometry geometry2 #> <dbl> <dbl> <dbl> <grrw_vct> <grrw_vc> #> 1 0.042 0.999 2238 <MULTIPOLYGON (((-77.9607315 34.1892433, -77.… <MULTIPO… df2 |> pivot_longer(c(geometry, geometry2), names_to = "cat") |> wk::wk_plot() #> Error in `pivot_longer()`: #> ! Can't combine `geometry` <geoarrow_vctr> and `geometry2` <geoarrow_vctr>. #> ✖ Some attributes are incompatible. #> ℹ The author of the class should implement vctrs methods. #> ℹ See <https://vctrs.r-lib.org/reference/faq-error-incompatible-attributes.html>. ``` <sup>Created on 2025-03-05 with [reprex v2.1.1](https://reprex.tidyverse.org)</sup> > The author of the class should implement vctrs methods. Aye.
1 parent 0ef84a4 commit 8016166

File tree

3 files changed

+86
-5
lines changed

3 files changed

+86
-5
lines changed

NAMESPACE

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
S3method("[",geoarrow_vctr)
34
S3method(as.character,geoarrow_vctr)
45
S3method(as_geoarrow_array,character)
56
S3method(as_geoarrow_array,default)
@@ -17,6 +18,7 @@ S3method(as_geoarrow_array_stream,nanoarrow_array_stream)
1718
S3method(as_nanoarrow_array,sfc)
1819
S3method(as_nanoarrow_array_extension,geoarrow_extension_spec)
1920
S3method(as_nanoarrow_array_stream,sfc)
21+
S3method(c,geoarrow_vctr)
2022
S3method(convert_array,geoarrow_vctr)
2123
S3method(convert_array,sfc)
2224
S3method(convert_array,wk_rct)

R/vctr.R

+52
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,58 @@ format.geoarrow_vctr <- function(x, ..., width = NULL, digits = NULL) {
5454
sprintf("<%s>", formatted_chr)
5555
}
5656

57+
#' @export
58+
`[.geoarrow_vctr` <- function(x, i) {
59+
tryCatch(
60+
NextMethod(),
61+
error = function(e) {
62+
if (!startsWith(conditionMessage(e), "Can't subset nanoarrow_vctr")) {
63+
stop(e)
64+
}
65+
66+
if (!requireNamespace("arrow", quietly = TRUE)) {
67+
stop("'arrow' is required to subset geoarrow_vctr with non-slice input")
68+
}
69+
70+
chunked <- as_chunked_array.geoarrow_vctr(x)[i]
71+
stream <- as_nanoarrow_array_stream(chunked)
72+
nanoarrow::as_nanoarrow_vctr(stream, subclass = "geoarrow_vctr")
73+
}
74+
)
75+
}
76+
77+
#' @export
78+
c.geoarrow_vctr <- function(...) {
79+
dots <- list(...)
80+
if (length(dots) == 1) {
81+
return(dots[[1]])
82+
}
83+
84+
wk::wk_crs_output(...)
85+
wk::wk_is_geodesic_output(...)
86+
streams <- lapply(dots, as_nanoarrow_array_stream)
87+
88+
schemas <- lapply(dots, attr, "schema")
89+
parsed <- lapply(schemas, geoarrow_schema_parse)
90+
ids <- unique(unlist(lapply(parsed, "[[", 1)))
91+
if (length(ids) != 1) {
92+
# We don't have a "cast common" operation here like we do in Python yet,
93+
# so just turn them all into WKB for now
94+
streams <- lapply(streams, as_geoarrow_array_stream, schema = geoarrow_wkb())
95+
schemas <- lapply(streams, nanoarrow::infer_nanoarrow_schema)
96+
}
97+
98+
collected <- lapply(streams, nanoarrow::collect_array_stream)
99+
all_batches <- do.call("c", collected)
100+
stream <- nanoarrow::basic_array_stream(
101+
all_batches,
102+
schema = schemas[[1]],
103+
validate = FALSE
104+
)
105+
106+
nanoarrow::as_nanoarrow_vctr(stream, subclass = "geoarrow_vctr")
107+
}
108+
57109
# Because RStudio's viewer uses this, we want to use the potentially abbreviated
58110
# WKT from the format method
59111
#' @export

tests/testthat/test-vctr.R

+32-5
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,39 @@ test_that("wk crs/edge getters/setters are implemented for geoarrow_vctr", {
5454
expect_false(wk::wk_is_geodesic(x))
5555
})
5656

57-
test_that("Errors occur for unsupported subset operations", {
58-
vctr <- as_geoarrow_vctr("POINT (0 1)")
59-
expect_error(
60-
vctr[5:1],
61-
"Can't subset nanoarrow_vctr with non-slice"
57+
test_that("geoarrow_vctrs can be arranged, subset, and concatenated", {
58+
skip_if_not_installed("arrow")
59+
60+
vctr <- as_geoarrow_vctr(wk::xy(1:5, 6:10))
61+
expect_identical(wk::as_xy(vctr[5:1]), wk::xy(5:1, 10:6))
62+
expect_identical(
63+
wk::as_xy(vctr[c(TRUE, TRUE, FALSE, FALSE, TRUE)]),
64+
wk::xy(c(1, 2, 5), c(6, 7, 10))
65+
)
66+
67+
expect_identical(
68+
wk::as_xy(c(vctr)),
69+
wk::xy(1:5, 6:10)
70+
)
71+
72+
expect_identical(
73+
wk::as_xy(c(vctr, vctr)),
74+
wk::xy(c(1:5, 1:5), c(6:10, 6:10))
75+
)
76+
77+
vctr_wkb <- as_geoarrow_vctr(vctr, schema = geoarrow_wkb())
78+
expect_identical(
79+
wk::as_xy(c(vctr, vctr_wkb)),
80+
wk::xy(c(1:5, 1:5), c(6:10, 6:10))
6281
)
82+
expect_identical(
83+
wk::as_xy(c(vctr_wkb, vctr)),
84+
wk::xy(c(1:5, 1:5), c(6:10, 6:10))
85+
)
86+
})
87+
88+
test_that("geoarrow_vctrs error for invalid subsets", {
89+
vctr <- as_geoarrow_vctr(wk::xy(1:5, 6:10))
6390

6491
expect_error(
6592
vctr[1] <- "something",

0 commit comments

Comments
 (0)