Skip to content

Commit

Permalink
feat: add excel serializer/parser (#975)
Browse files Browse the repository at this point in the history
  • Loading branch information
r2evans authored Jan 29, 2025
1 parent 2ba8a26 commit e1a30b0
Show file tree
Hide file tree
Showing 9 changed files with 142 additions and 1 deletion.
5 changes: 4 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ Suggests:
rapidoc,
sf,
ragg,
svglite
svglite,
readxl,
writexl,
utils
RoxygenNote: 7.3.2
Collate:
'async.R'
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export(include_rmd)
export(is_plumber)
export(options_plumber)
export(parser_csv)
export(parser_excel)
export(parser_feather)
export(parser_form)
export(parser_geojson)
Expand Down Expand Up @@ -80,6 +81,7 @@ export(serializer_cat)
export(serializer_content_type)
export(serializer_csv)
export(serializer_device)
export(serializer_excel)
export(serializer_feather)
export(serializer_format)
export(serializer_geojson)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
* `parse_rds()`, `parse_feather()`, and `parse_parquet()` no longer writes data to disk during parsing (@thomasp85, #942)
* Returning error messages are now turned off by default rather than being turned on if running interactively and turned off if not (@thomasp85, #962)

* New serializers
* `serializer_excel()`: Return an object serialized by `writexl::write_xlsx` (@r2evans, #973).

* New request body parsers
* `parser_excel()`: Parse request body as an excel workbook using `readxl::read_excel` (@r2evans, #973). This defaults to loading in the first worksheet only, you can use `@parse excel list(sheet=NA)` to import all worksheets. This always returns a list of frames, even if just one worksheet.

# plumber 1.2.2

* Allow to set plumber options using environment variables `?options_plumber`. (@meztez #934)
Expand Down
36 changes: 36 additions & 0 deletions R/parse-body.R
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,41 @@ parser_parquet <- function(...) {
}
}

# readxl's default behavior is to read only one worksheet at a time; in order for an endpoint to
# read multiple worksheets, its documentation suggests to iterate over discovered names (c.f.,
# https://readxl.tidyverse.org/articles/readxl-workflows.html#iterate-over-multiple-worksheets-in-a-workbook);
# for this reason, this parser detects an NA in the 'sheet=' argument and replaces it with all
# worksheet names found in the workbook

#' @describeIn parsers excel parser. See [readxl::read_excel()] for more details. (Defaults to reading in the first worksheet only, use `@parser excel list(sheet=NA)` to read in all worksheets.)
#' @param sheet Sheet to read. Either a string (the name of a sheet), or an
#' integer (the position of the sheet). Defaults to the first sheet. To read all
#' sheets, use `NA`.
#' @export
parser_excel <- function(..., sheet = NULL) {
if (!requireNamespace("readxl", quietly = TRUE)) {
stop("`readxl` must be installed for `parser_excel` to work")
}
parse_fn <- parser_read_file(function(tmpfile) {
if (is.null(sheet)) {
# we have to hard-code this since lapply won't iterate if NULL
sheet <- 1L
} else if (anyNA(sheet)) {
sheet <- readxl::excel_sheets(tmpfile)
}
if (is.character(sheet)) names(sheet) <- sheet
out <- suppressWarnings(
lapply(sheet, function(sht) {
readxl::read_excel(path = tmpfile, sheet = sht, ...)
})
)
out
})
function(value, ...) {
parse_fn(value)
}
}

#' @describeIn parsers Octet stream parser. Returns the raw content.
#' @export
parser_octet <- function() {
Expand Down Expand Up @@ -588,6 +623,7 @@ register_parsers_onLoad <- function() {
register_parser("rds", parser_rds, fixed = "application/rds")
register_parser("feather", parser_feather, fixed = c("application/vnd.apache.arrow.file", "application/feather"))
register_parser("parquet", parser_parquet, fixed = "application/vnd.apache.parquet")
register_parser("excel", parser_excel, fixed = c("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel"))
register_parser("text", parser_text, fixed = "text/plain", regex = "^text/")
register_parser("tsv", parser_tsv, fixed = c("application/tab-separated-values", "text/tab-separated-values"))
# yaml types: https://stackoverflow.com/a/38000954/591574
Expand Down
17 changes: 17 additions & 0 deletions R/serializer.R
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,22 @@ serializer_parquet <- function(type = "application/vnd.apache.parquet") {
)
}

#' @describeIn serializers excel serializer. See also: [writexl::write_xlsx()]
#' @export
serializer_excel <- function(..., type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") {
if (!requireNamespace("writexl", quietly = TRUE)) {
stop("The writexl package is not available but is required in order to use the writexl serializer",
call. = FALSE)
}

serializer_write_file(
fileext = ".xlsx",
type = type,
write_fn = function(val, tmpfile) {
writexl::write_xlsx(x = val, path = tmpfile, ...)
}
)
}

#' @describeIn serializers YAML serializer. See also: [yaml::as.yaml()]
#' @export
Expand Down Expand Up @@ -693,6 +709,7 @@ add_serializers_onLoad <- function() {
register_serializer("tsv", serializer_tsv)
register_serializer("feather", serializer_feather)
register_serializer("parquet", serializer_parquet)
register_serializer("excel", serializer_excel)
register_serializer("yaml", serializer_yaml)
register_serializer("geojson", serializer_geojson)

Expand Down
9 changes: 9 additions & 0 deletions man/parsers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions man/serializers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions tests/testthat/test-parse-body.R
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,32 @@ test_that("Test parquet parser", {
expect_equal(parsed, r_object)
})

test_that("Test excel parser", {
skip_if_not_installed("readxl")
skip_if_not_installed("writexl")

tmp <- tempfile(fileext = ".xlsx")
on.exit({
file.remove(tmp)
}, add = TRUE)

# note: factors will fail the round-trip test
r_object <- data.frame(chr = LETTERS[1:3], int = 1:3, num = pi+1:3, lgl = c(TRUE, FALSE, NA))
res <- try(writexl::write_xlsx(r_object, tmp), silent = TRUE)
skip_if(
inherits(res, "try-error"),
"writexl::write_xlsx() isn't working."
)

val <- readBin(tmp, "raw", 10000)

parsed <- parse_body(val, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", make_parser("excel"))
# convert from tibble to data.frame
parsed <- as.data.frame(parsed[[1]], stringsAsFactors = FALSE)

expect_equal(parsed, r_object)
})

test_that("Test geojson parser", {
skip_if_not_installed("geojsonsf")
skip_if_not_installed("sf")
Expand Down
34 changes: 34 additions & 0 deletions tests/testthat/test-serializer-excel.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
context("excel serializer")

test_that("excel serializes properly", {
skip_if_not_installed("writexl")

d <- data.frame(a=1, b=2, c="hi")
val <- serializer_excel()(d, data.frame(), PlumberResponse$new(), stop)
expect_equal(val$status, 200L)
expect_equal(val$headers$`Content-Type`, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")

# the remaining relies on the fact that xlsx files start as zip files
# https://en.wikipedia.org/wiki/List_of_file_signatures
expect_equal(val$body[1:4], as.raw(c(0x50, 0x4b, 0x03, 0x04)))
tf <- tempfile()
on.exit(unlink(tf), add = TRUE)
writeBin(val$body, tf)
zipcontents <- expect_silent(utils::unzip(tf, list = TRUE))
expect_s3_class(zipcontents, "data.frame")
expect_true("xl/workbook.xml" %in% zipcontents$Name)

})

test_that("Errors call error handler", {
skip_if_not_installed("writexl")

errors <- 0
errHandler <- function(req, res, err){
errors <<- errors + 1
}

expect_equal(errors, 0)
serializer_excel()(parse(text="hi"), data.frame(), PlumberResponse$new("csv"), errorHandler = errHandler)
expect_equal(errors, 1)
})

0 comments on commit e1a30b0

Please sign in to comment.