diff --git a/DESCRIPTION b/DESCRIPTION index 9f68bfc..8a89ee3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: rdhs Type: Package Title: API Client and Dataset Management for the Demographic and Health Survey (DHS) Data -Version: 0.8.1 +Version: 0.8.2 Authors@R: c(person(given = "OJ", family = "Watson", diff --git a/NEWS.md b/NEWS.md index a0982e0..1f30480 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # rdhs (development version) +## rdhs 0.8.2 + +* Spatial boundaries will be cached using the DHS client + ## rdhs 0.8.1 * Convert DHS dataset flat file data dictionaries to UTF-8. This addresses parsing diff --git a/R/client.R b/R/client.R index 4f51a33..caf8a18 100644 --- a/R/client.R +++ b/R/client.R @@ -84,6 +84,26 @@ client_dhs <- function(config=NULL, private$storr$del(key, "downloaded_datasets_variable_names") } } + + # Now do the same for spatial boundaries + dats <- client$dhs_api_request(api_endpoint = "dataupdates") + downloaded_spatial_keys <- private$storr$list("spatial_boundaries") + downloaded_surveyNums <- strsplit(downloaded_spatial_keys, "_") %>% + lapply(function(x) x[1]) %>% + unlist() + + # which of the updates have occured since our last client was created + chge <- dats$SurveyNum[mdy_hms(dats$FileDateLastModified) > client$get_cache_date()] + datasets_to_clear <- which(downloaded_spatial_keys %in% chge) + + # do any of them match those that have been updated since the + # last cache_date + if (length(datasets_to_clear) > 0) { + for (key in downloaded_spatial_keys[datasets_to_clear]) { + private$storr$del(key, "spatial_boundaries") + } + } + } } diff --git a/R/shapes.R b/R/shapes.R index 9936491..525de36 100644 --- a/R/shapes.R +++ b/R/shapes.R @@ -26,6 +26,9 @@ #' `method = "sf"`. Default is `TRUE`. #' @param server_sleep Numeric for length of sleep prior to downloading file #' from their survey. Default 5 seconds. +#' @param client If the request should be cached, then provide a client +#' object created by \code{\link{client_dhs}}. Default = `NULL`, which will +#' search for a client to use #' #' @details Downloads the spatial boundaries from the DHS spatial repository, #' which can be found at \url{https://spatialdata.dhsprogram.com/home/}. @@ -51,7 +54,8 @@ download_boundaries <- function(surveyNum=NULL, method = "sf", quiet_download = FALSE, quiet_parse = TRUE, - server_sleep = 5){ + server_sleep = 5, + client = NULL){ # helper funcs build_final_url <- function(jobId) { @@ -89,60 +93,86 @@ download_boundaries <- function(surveyNum=NULL, countryId <- dats$DHS_CountryCode[match(surveyNum, dats$SurveyNum)] } - # build our url - # ---------------------------------------------------------------------------- + # if no client was provided we'll look for + # the package environment client by default + if (is.null(client)) { + client <- check_for_client() + } - # create url from surveyNum - alt_url <- paste0("https://gis.dhsprogram.com/arcgis/rest/services/Tools/", - "DownloadSubnationalData/GPServer/", - "downloadSubNationalBoundaries/submitJob") + # create db key + key <- paste0(surveyNum, "_", method) - values <- list( - survey_ids = surveyNum, - spatial_format = "shp", - f = "json" + # first check against cache + out <- tryCatch( + client$.__enclos_env__$private$storr$get(key, "spatial_boundaries"), + KeyError = function(e) { + NULL + } ) - # fetch jobID - z <- httr::GET( - httr::modify_url(alt_url, query = values) - ) + # check out agianst cache, if fine then return just that + if (!is.null(out)) { + return(out) + } else { - tf <- tempfile() - y <- writeBin(z$content, con = tf) - h <- jsonlite::fromJSON(brio::read_lines(tf)) - url <- build_final_url(h$jobId) - - # pause for a second for the job id created to appear on their server - # i.e. the code only works with this... - Sys.sleep(server_sleep) - - # download the shape file and read it in - tf2 <- tempfile() - file <- download.file(url, tf2, quiet = quiet_download) - unzipped_files <- suppressWarnings(unzip(tf2, exdir = tempfile())) - file <- grep("dbf", unzipped_files, value=TRUE) - - # how are we reading the dataset in - methods <- c("sf") - if (method %in% methods) { - - # here if we want to add more read in options - if(method == "sf") { - res <- lapply(file, sf::st_read, quiet = quiet_parse) - names(res) <- vapply(file, - function(x) { sf::st_layers(x)$name }, - character(1)) + # build our url + # ---------------------------------------------------------------------------- + + # create url from surveyNum + alt_url <- paste0("https://gis.dhsprogram.com/arcgis/rest/services/Tools/", + "DownloadSubnationalData/GPServer/", + "downloadSubNationalBoundaries/submitJob") + + values <- list( + survey_ids = surveyNum, + spatial_format = "shp", + f = "json" + ) + + # fetch jobID + z <- httr::GET( + httr::modify_url(alt_url, query = values) + ) + + tf <- tempfile() + y <- writeBin(z$content, con = tf) + h <- jsonlite::fromJSON(brio::read_lines(tf)) + url <- build_final_url(h$jobId) + + # pause for a second for the job id created to appear on their server + # i.e. the code only works with this... + Sys.sleep(server_sleep) + + # download the shape file and read it in + tf2 <- tempfile() + file <- download.file(url, tf2, quiet = quiet_download) + unzipped_files <- suppressWarnings(unzip(tf2, exdir = tempfile())) + file <- grep("dbf", unzipped_files, value=TRUE) + + # how are we reading the dataset in + methods <- c("sf") + if (method %in% methods) { + + # here if we want to add more read in options + if(method == "sf") { + res <- lapply(file, sf::st_read, quiet = quiet_parse) + names(res) <- vapply(file, + function(x) { sf::st_layers(x)$name }, + character(1)) + } + + } else { + message("Provided method not found. Options are: \n", + paste(methods,collapse=" "), + "\nReturning zip files.") + res <- unzipped_files } - } else { - message("Provided method not found. Options are: \n", - paste(methods,collapse=" "), - "\nReturning zip files.") + ## then cache the resp and return the parsed resp + client$.__enclos_env__$private$storr$set(key, res, "spatial_boundaries") - return(unzipped_files) - } + return(res) - return(res) + } } diff --git a/man/download_boundaries.Rd b/man/download_boundaries.Rd index 8ebef23..fd68aa7 100644 --- a/man/download_boundaries.Rd +++ b/man/download_boundaries.Rd @@ -11,7 +11,8 @@ download_boundaries( method = "sf", quiet_download = FALSE, quiet_parse = TRUE, - server_sleep = 5 + server_sleep = 5, + client = NULL ) } \arguments{ @@ -46,6 +47,10 @@ To just return the file paths for the files use method = "zip".} \item{server_sleep}{Numeric for length of sleep prior to downloading file from their survey. Default 5 seconds.} + +\item{client}{If the request should be cached, then provide a client +object created by \code{\link{client_dhs}}. Default = `NULL`, which will +search for a client to use} } \value{ Returns either the spatial file as a `sf` (see [sf::sf]) object, or diff --git a/tests/testthat/test_spatial_boundaries.R b/tests/testthat/test_spatial_boundaries.R index eebb743..359e0e7 100644 --- a/tests/testthat/test_spatial_boundaries.R +++ b/tests/testthat/test_spatial_boundaries.R @@ -65,3 +65,18 @@ test_that("Timout Spatial Boundaries Test", { }) +test_that("caching works", { + testthat::skip_on_cran() + skip_if_slow_API() + + cli <- new_rand_client() + dat <- api_timeout_safe_test( + download_boundaries(surveyNum = 471, countryId = "AF", client = cli), cli + ) + expect_identical(cli$.__enclos_env__$private$storr$list("spatial_boundaries"), "471_sf") + + # check that no message is sent second time + expect_no_message(dat <- api_timeout_safe_test( + download_boundaries(surveyNum = 471, countryId = "AF", client = cli), cli + )) +})