diff --git a/DESCRIPTION b/DESCRIPTION index b8c1cad4..3449c751 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -54,7 +54,10 @@ Suggests: rapidoc, sf, ragg, - svglite + svglite, + readxl, + writexl, + utils RoxygenNote: 7.3.2 Collate: 'async.R' diff --git a/NAMESPACE b/NAMESPACE index a61630b7..80cc824e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,7 @@ export(include_rmd) export(is_plumber) export(options_plumber) export(parser_csv) +export(parser_excel) export(parser_feather) export(parser_form) export(parser_geojson) @@ -80,6 +81,7 @@ export(serializer_cat) export(serializer_content_type) export(serializer_csv) export(serializer_device) +export(serializer_excel) export(serializer_feather) export(serializer_format) export(serializer_geojson) diff --git a/NEWS.md b/NEWS.md index 407d8316..825b53b1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,12 @@ * Added support for graphic devices provided by ragg and svglite (@thomasp85 #964) * `parse_rds()`, `parse_feather()`, and `parse_parquet()` no longer writes data to disk during parsing (@thomasp85, #942) +* New serializers + * `serializer_excel()`: Return an object serialized by `writexl::write_xlsx` (@r2evans, #973). + +* New request body parsers + * `parser_excel()`: Parse request body as an excel workbook using `readxl::read_excel` (@r2evans, #973). This defaults to loading in the first worksheet only, you can use `@parse excel list(sheet=NA)` to import all worksheets. This always returns a list of frames, even if just one worksheet. + # plumber 1.2.2 * Allow to set plumber options using environment variables `?options_plumber`. (@meztez #934) diff --git a/R/parse-body.R b/R/parse-body.R index ec2ec43a..f313cc1f 100644 --- a/R/parse-body.R +++ b/R/parse-body.R @@ -510,6 +510,41 @@ parser_parquet <- function(...) { } } +# readxl's default behavior is to read only one worksheet at a time; in order for an endpoint to +# read multiple worksheets, its documentation suggests to iterate over discovered names (c.f., +# https://readxl.tidyverse.org/articles/readxl-workflows.html#iterate-over-multiple-worksheets-in-a-workbook); +# for this reason, this parser detects an NA in the 'sheet=' argument and replaces it with all +# worksheet names found in the workbook + +#' @describeIn parsers excel parser. See [readxl::read_excel()] for more details. (Defaults to reading in the first worksheet only, use `@parser excel list(sheet=NA)` to read in all worksheets.) +#' @param sheet Sheet to read. Either a string (the name of a sheet), or an +#' integer (the position of the sheet). Defaults to the first sheet. To read all +#' sheets, use `NA`. +#' @export +parser_excel <- function(..., sheet = NULL) { + if (!requireNamespace("readxl", quietly = TRUE)) { + stop("`readxl` must be installed for `parser_excel` to work") + } + parse_fn <- parser_read_file(function(tmpfile) { + if (is.null(sheet)) { + # we have to hard-code this since lapply won't iterate if NULL + sheet <- 1L + } else if (anyNA(sheet)) { + sheet <- readxl::excel_sheets(tmpfile) + } + if (is.character(sheet)) names(sheet) <- sheet + out <- suppressWarnings( + lapply(sheet, function(sht) { + readxl::read_excel(path = tmpfile, sheet = sht, ...) + }) + ) + out + }) + function(value, ...) { + parse_fn(value) + } +} + #' @describeIn parsers Octet stream parser. Returns the raw content. #' @export parser_octet <- function() { @@ -588,6 +623,7 @@ register_parsers_onLoad <- function() { register_parser("rds", parser_rds, fixed = "application/rds") register_parser("feather", parser_feather, fixed = c("application/vnd.apache.arrow.file", "application/feather")) register_parser("parquet", parser_parquet, fixed = "application/vnd.apache.parquet") + register_parser("excel", parser_excel, fixed = c("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel")) register_parser("text", parser_text, fixed = "text/plain", regex = "^text/") register_parser("tsv", parser_tsv, fixed = c("application/tab-separated-values", "text/tab-separated-values")) # yaml types: https://stackoverflow.com/a/38000954/591574 diff --git a/R/serializer.R b/R/serializer.R index 2617a459..d861d54f 100644 --- a/R/serializer.R +++ b/R/serializer.R @@ -306,6 +306,22 @@ serializer_parquet <- function(type = "application/vnd.apache.parquet") { ) } +#' @describeIn serializers excel serializer. See also: [writexl::write_xlsx()] +#' @export +serializer_excel <- function(..., type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") { + if (!requireNamespace("writexl", quietly = TRUE)) { + stop("The writexl package is not available but is required in order to use the writexl serializer", + call. = FALSE) + } + + serializer_write_file( + fileext = ".xlsx", + type = type, + write_fn = function(val, tmpfile) { + writexl::write_xlsx(x = val, path = tmpfile, ...) + } + ) +} #' @describeIn serializers YAML serializer. See also: [yaml::as.yaml()] #' @export @@ -693,6 +709,7 @@ add_serializers_onLoad <- function() { register_serializer("tsv", serializer_tsv) register_serializer("feather", serializer_feather) register_serializer("parquet", serializer_parquet) + register_serializer("excel", serializer_excel) register_serializer("yaml", serializer_yaml) register_serializer("geojson", serializer_geojson) diff --git a/man/parsers.Rd b/man/parsers.Rd index 52089bf3..050c5393 100644 --- a/man/parsers.Rd +++ b/man/parsers.Rd @@ -12,6 +12,7 @@ \alias{parser_rds} \alias{parser_feather} \alias{parser_parquet} +\alias{parser_excel} \alias{parser_octet} \alias{parser_multi} \alias{parser_none} @@ -39,6 +40,8 @@ parser_feather(...) parser_parquet(...) +parser_excel(..., sheet = NULL) + parser_octet() parser_multi() @@ -51,6 +54,10 @@ parser_none() \item{parse_fn}{function to further decode a text string into an object} \item{read_fn}{function used to read a the content of a file. Ex: \code{\link[=readRDS]{readRDS()}}} + +\item{sheet}{Sheet to read. Either a string (the name of a sheet), or an +integer (the position of the sheet). Defaults to the first sheet. To read all +sheets, use \code{NA}.} } \description{ Parsers are used in Plumber to transform request body received @@ -93,6 +100,8 @@ This parser should be used when reading from a file is required. \item \code{parser_parquet()}: parquet parser. See \code{\link[arrow:read_parquet]{arrow::read_parquet()}} for more details. +\item \code{parser_excel()}: excel parser. See \code{\link[readxl:read_excel]{readxl::read_excel()}} for more details. (Defaults to reading in the first worksheet only, use \verb{@parser excel list(sheet=NA)} to read in all worksheets.) + \item \code{parser_octet()}: Octet stream parser. Returns the raw content. \item \code{parser_multi()}: Multi part parser. This parser will then parse each individual body with its respective parser. When this parser is used, \code{req$body} will contain the updated output from \code{\link[webutils:parse_multipart]{webutils::parse_multipart()}} by adding the \code{parsed} output to each part. Each part may contain detailed information, such as \code{name} (required), \code{content_type}, \code{content_disposition}, \code{filename}, (raw, original) \code{value}, and \code{parsed} (parsed \code{value}). When performing Plumber route argument matching, each multipart part will match its \code{name} to the \code{parsed} content. diff --git a/man/serializers.Rd b/man/serializers.Rd index 8345f8c2..f4f91926 100644 --- a/man/serializers.Rd +++ b/man/serializers.Rd @@ -13,6 +13,7 @@ \alias{serializer_rds} \alias{serializer_feather} \alias{serializer_parquet} +\alias{serializer_excel} \alias{serializer_yaml} \alias{serializer_text} \alias{serializer_format} @@ -57,6 +58,11 @@ serializer_feather(type = "application/vnd.apache.arrow.file") serializer_parquet(type = "application/vnd.apache.parquet") +serializer_excel( + ..., + type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" +) + serializer_yaml(..., type = "text/x-yaml; charset=UTF-8") serializer_text( @@ -160,6 +166,8 @@ not have a \code{"raw"} type, then an error will be thrown. \item \code{serializer_parquet()}: parquet serializer. See also: \code{\link[arrow:write_parquet]{arrow::write_parquet()}} +\item \code{serializer_excel()}: excel serializer. See also: \code{\link[writexl:write_xlsx]{writexl::write_xlsx()}} + \item \code{serializer_yaml()}: YAML serializer. See also: \code{\link[yaml:as.yaml]{yaml::as.yaml()}} \item \code{serializer_text()}: Text serializer. See also: \code{\link[=as.character]{as.character()}} diff --git a/tests/testthat/test-parse-body.R b/tests/testthat/test-parse-body.R index b701c14d..78d6d698 100644 --- a/tests/testthat/test-parse-body.R +++ b/tests/testthat/test-parse-body.R @@ -134,6 +134,32 @@ test_that("Test parquet parser", { expect_equal(parsed, r_object) }) +test_that("Test excel parser", { + skip_if_not_installed("readxl") + skip_if_not_installed("writexl") + + tmp <- tempfile(fileext = ".xlsx") + on.exit({ + file.remove(tmp) + }, add = TRUE) + + # note: factors will fail the round-trip test + r_object <- data.frame(chr = LETTERS[1:3], int = 1:3, num = pi+1:3, lgl = c(TRUE, FALSE, NA)) + res <- try(writexl::write_xlsx(r_object, tmp), silent = TRUE) + skip_if( + inherits(res, "try-error"), + "writexl::write_xlsx() isn't working." + ) + + val <- readBin(tmp, "raw", 10000) + + parsed <- parse_body(val, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", make_parser("excel")) + # convert from tibble to data.frame + parsed <- as.data.frame(parsed[[1]], stringsAsFactors = FALSE) + + expect_equal(parsed, r_object) +}) + test_that("Test geojson parser", { skip_if_not_installed("geojsonsf") skip_if_not_installed("sf") diff --git a/tests/testthat/test-serializer-excel.R b/tests/testthat/test-serializer-excel.R new file mode 100644 index 00000000..57b2db72 --- /dev/null +++ b/tests/testthat/test-serializer-excel.R @@ -0,0 +1,34 @@ +context("excel serializer") + +test_that("excel serializes properly", { + skip_if_not_installed("writexl") + + d <- data.frame(a=1, b=2, c="hi") + val <- serializer_excel()(d, data.frame(), PlumberResponse$new(), stop) + expect_equal(val$status, 200L) + expect_equal(val$headers$`Content-Type`, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") + + # the remaining relies on the fact that xlsx files start as zip files + # https://en.wikipedia.org/wiki/List_of_file_signatures + expect_equal(val$body[1:4], as.raw(c(0x50, 0x4b, 0x03, 0x04))) + tf <- tempfile() + on.exit(unlink(tf), add = TRUE) + writeBin(val$body, tf) + zipcontents <- expect_silent(utils::unzip(tf, list = TRUE)) + expect_s3_class(zipcontents, "data.frame") + expect_true("xl/workbook.xml" %in% zipcontents$Name) + +}) + +test_that("Errors call error handler", { + skip_if_not_installed("writexl") + + errors <- 0 + errHandler <- function(req, res, err){ + errors <<- errors + 1 + } + + expect_equal(errors, 0) + serializer_excel()(parse(text="hi"), data.frame(), PlumberResponse$new("csv"), errorHandler = errHandler) + expect_equal(errors, 1) +})