Skip to content

Commit

Permalink
with extension type
Browse files Browse the repository at this point in the history
  • Loading branch information
paleolimbot committed Dec 6, 2023
1 parent 5dcf5d5 commit 8d26a0f
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 2 deletions.
1 change: 1 addition & 0 deletions r/geoarrow/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Depends:
R (>= 2.10)
Suggests:
arrow,
R6,
sf,
testthat (>= 3.0.0)
Remotes:
Expand Down
8 changes: 6 additions & 2 deletions r/geoarrow/R/array.R
Original file line number Diff line number Diff line change
Expand Up @@ -318,9 +318,13 @@ as_validity_buffer <- function(x) {

# This really needs a helper in nanoarrow, but for now, we need a way to drop
# the extension type and convert storage only for testing
force_array_storage <- function(array) {
schema <- infer_nanoarrow_schema(array)
force_schema_storage <- function(schema) {
schema$metadata[["ARROW:extension:name"]] <- NULL
schema
}

force_array_storage <- function(array) {
schema <- force_schema_storage(infer_nanoarrow_schema(array))
array_shallow <- nanoarrow::nanoarrow_allocate_array()
nanoarrow::nanoarrow_pointer_export(array, array_shallow)
nanoarrow::nanoarrow_array_set_schema(array_shallow, schema, validate = FALSE)
Expand Down
119 changes: 119 additions & 0 deletions r/geoarrow/R/arrow-compat.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,122 @@ as_geoarrow_array_stream.Array <- function(x, ..., schema = NULL) {

as_geoarrow_array_stream(stream, schema = schema)
}

GeometryExtensionType <- new.env(parent = emptyenv())
GeometryExtensionType$create <- function(...) {
stop("Package 'arrow' must be loaded to use GeometryExtensionType")
}

# this runs in .onLoad(), where we can't get coverage
# nocov start
register_arrow_extension_type_or_set_hook <- function(...) {
# Register a hook for the arrow package being loaded to run the extension type
# registration
setHook(packageEvent("arrow", "onLoad"), register_arrow_extension_type)

# If arrow is already loaded, run the registration now
if (isNamespaceLoaded("arrow")) {
register_arrow_extension_type()
}
}

register_arrow_extension_type <- function(...) {
# for CMD check
self <- NULL
private <- NULL

GeometryExtensionType$cls <- R6::R6Class(
"GeometryExtensionType", inherit = arrow::ExtensionType,
public = list(

deserialize_instance = function() {
private$schema <- as_nanoarrow_schema(self)
private$parsed <- geoarrow_schema_parse(private$schema)
},

as_vector = function(array) {
as_geoarrow_vctr(array)
},

ToString = function() {
label <- self$extension_name()

crs <- self$crs
if (is.null(crs) || identical(crs, "")) {
crs <- "<crs: unspecified>"
} else if (nchar(crs) > 30) {
crs <- paste0("<CRS: ", substr(crs, 1, 27), "...")
} else {
crs <- paste0("<CRS: ", crs, ">")
}

if (self$edge_type != "PLANAR") {
label <- paste(tolower(self$edge_type), label)
}

sprintf("%s %s", label, crs)
}
),
active = list(
geoarrow_id = function() {
private$parsed$id
},
geometry_type = function() {
private$parsed$geometry_type
},
dimensions = function() {
enum_label(private$parsed$dimensions, "Dimensions")
},
coord_type = function() {
enum_label(private$parsed$coord_type, "CoordType")
},
crs = function() {
if (private$parsed$crs_type == enum$CrsType$NONE) {
NULL
} else {
private$parsed$crs
}
},
edge_type = function() {
enum_label(private$parsed$edge_type, "EdgeType")
}
),
private = list(
schema = NULL,
parsed = NULL
)
)

# This shouldn't be needed directly...these objects will get instantiated
# when the Type object gets surfaced to R provided that the extension types
# have been registered.
GeometryExtensionType$create <- function(schema) {
schema <- as_nanoarrow_schema(schema)
parsed <- geoarrow_schema_parse(schema)

arrow::new_extension_type(
storage_type = arrow::as_data_type(force_schema_storage(schema)),
extension_name = parsed$extension_name,
extension_metadata = "",
type_class = GeometryExtensionType$cls
)
}

representative_schemas <- list(
na_extension_wkt(),
na_extension_large_wkt(),
na_extension_wkb(),
na_extension_large_wkb(),
na_extension_geoarrow(enum$GeometryType$POINT),
na_extension_geoarrow(enum$GeometryType$LINESTRING),
na_extension_geoarrow(enum$GeometryType$POLYGON),
na_extension_geoarrow(enum$GeometryType$MULTIPOINT),
na_extension_geoarrow(enum$GeometryType$MULTILINESTRING),
na_extension_geoarrow(enum$GeometryType$MULTIPOLYGON)
)

for (schema in representative_schemas) {
arrow::reregister_extension_type(GeometryExtensionType$create(schema))
}
}
# nocov end
1 change: 1 addition & 0 deletions r/geoarrow/R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# nocov start
.onLoad <- function(...) {
register_geoarrow_extension()
register_arrow_extension_type_or_set_hook()

s3_register("sf::st_as_sfc", "geoarrow_vctr")
s3_register("arrow::as_arrow_array", "geoarrow_vctr")
Expand Down
56 changes: 56 additions & 0 deletions r/geoarrow/tests/testthat/test-arrow-compat.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,69 @@

test_that("arrow extension type methods return correct values", {
skip_if_not_installed("arrow")

type <- arrow::as_data_type(
na_extension_geoarrow("POINT", crs = "EPSG:1234", edges = "SPHERICAL")
)
expect_s3_class(type, "GeometryExtensionType")
expect_identical(type$geoarrow_id, 1L)
expect_identical(type$geometry_type, 1L)
expect_identical(type$extension_name(), "geoarrow.point")
expect_identical(type$crs, "EPSG:1234")
expect_identical(type$dimensions, "XY")
expect_identical(type$coord_type, "SEPARATE")
expect_identical(type$edge_type, "SPHERICAL")


})

test_that("arrow extension type method has a reasonable ToString() method", {
skip_if_not_installed("arrow")

type <- arrow::as_data_type(
na_extension_geoarrow("POINT", crs = "EPSG:1234", edges = "SPHERICAL")
)

expect_identical(
type$ToString(),
"spherical geoarrow.point <CRS: EPSG:1234>"
)

type_long_crs <- arrow::as_data_type(
na_extension_geoarrow(
"POINT",
crs = strrep("a", 100),
edges = "SPHERICAL"
)
)

expect_identical(
type_long_crs$ToString(),
"spherical geoarrow.point <CRS: aaaaaaaaaaaaaaaaaaaaaaaaaaa..."
)
})

test_that("as_chunked_array() works for geoarrow_vctr", {
skip_if_not_installed("arrow")

vctr <- as_geoarrow_vctr("POINT (0 1)")
chunked <- arrow::as_chunked_array(vctr)
expect_s3_class(chunked, "ChunkedArray")
expect_equal(chunked$length(), 1)
expect_s3_class(chunked$type, "GeometryExtensionType")
expect_identical(chunked$type$extension_name(), "geoarrow.wkt")
expect_identical(chunked$chunk(0)$storage()$as_vector(), "POINT (0 1)")

# Check conversion back to R
vctr_roundtrip <- as.vector(chunked)
expect_s3_class(vctr_roundtrip, "geoarrow_vctr")
expect_identical(format(vctr_roundtrip), "<POINT (0 1)>")

# Check with a requested type
chunked <- arrow::as_chunked_array(vctr, type = na_extension_wkb())
expect_equal(chunked$length(), 1)
expect_s3_class(chunked$type, "GeometryExtensionType")
expect_identical(chunked$type$extension_name(), "geoarrow.wkb")
})

test_that("as_arrow_array() works for geoarrow_vctr", {
Expand All @@ -35,3 +89,5 @@ test_that("as_arrow_array() works for geoarrow_vctr", {
# Check with a requested type
array <- arrow::as_arrow_array(vctr2, type = na_extension_wkb())
})


0 comments on commit 8d26a0f

Please sign in to comment.