Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add excel serializer/parser #975

Merged
merged 7 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ Suggests:
rapidoc,
sf,
ragg,
svglite
svglite,
readxl,
writexl,
utils
RoxygenNote: 7.3.2
Collate:
'async.R'
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export(include_rmd)
export(is_plumber)
export(options_plumber)
export(parser_csv)
export(parser_excel)
export(parser_feather)
export(parser_form)
export(parser_geojson)
Expand Down Expand Up @@ -80,6 +81,7 @@ export(serializer_cat)
export(serializer_content_type)
export(serializer_csv)
export(serializer_device)
export(serializer_excel)
export(serializer_feather)
export(serializer_format)
export(serializer_geojson)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
* Added support for graphic devices provided by ragg and svglite (@thomasp85 #964)
* `parse_rds()`, `parse_feather()`, and `parse_parquet()` no longer writes data to disk during parsing (@thomasp85, #942)

* New serializers
* `serializer_excel()`: Return an object serialized by `writexl::write_xlsx` (@r2evans, #973).

* New request body parsers
* `parser_excel()`: Parse request body as an excel workbook using `readxl::read_excel` (@r2evans, #973). This defaults to loading in the first worksheet only, you can use `@parse excel list(sheet=NA)` to import all worksheets. This always returns a list of frames, even if just one worksheet.

# plumber 1.2.2

* Allow to set plumber options using environment variables `?options_plumber`. (@meztez #934)
Expand Down
36 changes: 36 additions & 0 deletions R/parse-body.R
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,41 @@ parser_parquet <- function(...) {
}
}

# readxl's default behavior is to read only one worksheet at a time; in order for an endpoint to
# read multiple worksheets, its documentation suggests to iterate over discovered names (c.f.,
# https://readxl.tidyverse.org/articles/readxl-workflows.html#iterate-over-multiple-worksheets-in-a-workbook);
# for this reason, this parser detects an NA in the 'sheet=' argument and replaces it with all
# worksheet names found in the workbook

#' @describeIn parsers excel parser. See [readxl::read_excel()] for more details. (Defaults to reading in the first worksheet only, use `@parser excel list(sheet=NA)` to read in all worksheets.)
#' @param sheet Sheet to read. Either a string (the name of a sheet), or an
#' integer (the position of the sheet). Defaults to the first sheet. To read all
#' sheets, use `NA`.
#' @export
parser_excel <- function(..., sheet = NULL) {
if (!requireNamespace("readxl", quietly = TRUE)) {
stop("`readxl` must be installed for `parser_excel` to work")
}
parse_fn <- parser_read_file(function(tmpfile) {
if (is.null(sheet)) {
# we have to hard-code this since lapply won't iterate if NULL
sheet <- 1L
} else if (anyNA(sheet)) {
sheet <- readxl::excel_sheets(tmpfile)
}
if (is.character(sheet)) names(sheet) <- sheet
out <- suppressWarnings(
lapply(sheet, function(sht) {
readxl::read_excel(path = tmpfile, sheet = sht, ...)
})
)
out
})
function(value, ...) {
parse_fn(value)
}
}

#' @describeIn parsers Octet stream parser. Returns the raw content.
#' @export
parser_octet <- function() {
Expand Down Expand Up @@ -588,6 +623,7 @@ register_parsers_onLoad <- function() {
register_parser("rds", parser_rds, fixed = "application/rds")
register_parser("feather", parser_feather, fixed = c("application/vnd.apache.arrow.file", "application/feather"))
register_parser("parquet", parser_parquet, fixed = "application/vnd.apache.parquet")
register_parser("excel", parser_excel, fixed = c("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel"))
schloerke marked this conversation as resolved.
Show resolved Hide resolved
register_parser("text", parser_text, fixed = "text/plain", regex = "^text/")
register_parser("tsv", parser_tsv, fixed = c("application/tab-separated-values", "text/tab-separated-values"))
# yaml types: https://stackoverflow.com/a/38000954/591574
Expand Down
17 changes: 17 additions & 0 deletions R/serializer.R
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,22 @@ serializer_parquet <- function(type = "application/vnd.apache.parquet") {
)
}

#' @describeIn serializers excel serializer. See also: [writexl::write_xlsx()]
#' @export
serializer_excel <- function(..., type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") {
if (!requireNamespace("writexl", quietly = TRUE)) {
stop("The writexl package is not available but is required in order to use the writexl serializer",
call. = FALSE)
}

serializer_write_file(
schloerke marked this conversation as resolved.
Show resolved Hide resolved
fileext = ".xlsx",
type = type,
write_fn = function(val, tmpfile) {
writexl::write_xlsx(x = val, path = tmpfile, ...)
}
)
}

#' @describeIn serializers YAML serializer. See also: [yaml::as.yaml()]
#' @export
Expand Down Expand Up @@ -693,6 +709,7 @@ add_serializers_onLoad <- function() {
register_serializer("tsv", serializer_tsv)
register_serializer("feather", serializer_feather)
register_serializer("parquet", serializer_parquet)
register_serializer("excel", serializer_excel)
register_serializer("yaml", serializer_yaml)
register_serializer("geojson", serializer_geojson)

Expand Down
9 changes: 9 additions & 0 deletions man/parsers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions man/serializers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions tests/testthat/test-parse-body.R
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,32 @@ test_that("Test parquet parser", {
expect_equal(parsed, r_object)
})

test_that("Test excel parser", {
skip_if_not_installed("readxl")
skip_if_not_installed("writexl")

tmp <- tempfile(fileext = ".xlsx")
on.exit({
file.remove(tmp)
}, add = TRUE)

# note: factors will fail the round-trip test
r_object <- data.frame(chr = LETTERS[1:3], int = 1:3, num = pi+1:3, lgl = c(TRUE, FALSE, NA))
res <- try(writexl::write_xlsx(r_object, tmp), silent = TRUE)
skip_if(
inherits(res, "try-error"),
"writexl::write_xlsx() isn't working."
)

val <- readBin(tmp, "raw", 10000)

parsed <- parse_body(val, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", make_parser("excel"))
# convert from tibble to data.frame
parsed <- as.data.frame(parsed[[1]], stringsAsFactors = FALSE)

expect_equal(parsed, r_object)
})

test_that("Test geojson parser", {
skip_if_not_installed("geojsonsf")
skip_if_not_installed("sf")
Expand Down
34 changes: 34 additions & 0 deletions tests/testthat/test-serializer-excel.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
context("excel serializer")

test_that("excel serializes properly", {
skip_if_not_installed("writexl")

d <- data.frame(a=1, b=2, c="hi")
val <- serializer_excel()(d, data.frame(), PlumberResponse$new(), stop)
expect_equal(val$status, 200L)
expect_equal(val$headers$`Content-Type`, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")

# the remaining relies on the fact that xlsx files start as zip files
# https://en.wikipedia.org/wiki/List_of_file_signatures
expect_equal(val$body[1:4], as.raw(c(0x50, 0x4b, 0x03, 0x04)))
tf <- tempfile()
on.exit(unlink(tf), add = TRUE)
writeBin(val$body, tf)
zipcontents <- expect_silent(utils::unzip(tf, list = TRUE))
expect_s3_class(zipcontents, "data.frame")
expect_true("xl/workbook.xml" %in% zipcontents$Name)

})

test_that("Errors call error handler", {
skip_if_not_installed("writexl")

errors <- 0
errHandler <- function(req, res, err){
errors <<- errors + 1
}

expect_equal(errors, 0)
serializer_excel()(parse(text="hi"), data.frame(), PlumberResponse$new("csv"), errorHandler = errHandler)
expect_equal(errors, 1)
})
Loading