Skip to content

Commit

Permalink
cache spatial boundaries
Browse files Browse the repository at this point in the history
  • Loading branch information
OJWatson committed Apr 3, 2024
1 parent 686bef3 commit f7389c4
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 49 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: rdhs
Type: Package
Title: API Client and Dataset Management for the Demographic and Health Survey (DHS) Data
Version: 0.8.1
Version: 0.8.2
Authors@R:
c(person(given = "OJ",
family = "Watson",
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# rdhs (development version)

## rdhs 0.8.2

* Spatial boundaries will be cached using the DHS client

## rdhs 0.8.1

* Convert DHS dataset flat file data dictionaries to UTF-8. This addresses parsing
Expand Down
20 changes: 20 additions & 0 deletions R/client.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,26 @@ client_dhs <- function(config=NULL,
private$storr$del(key, "downloaded_datasets_variable_names")
}
}

# Now do the same for spatial boundaries
dats <- client$dhs_api_request(api_endpoint = "dataupdates")
downloaded_spatial_keys <- private$storr$list("spatial_boundaries")
downloaded_surveyNums <- strsplit(downloaded_spatial_keys, "_") %>%
lapply(function(x) x[1]) %>%
unlist()

# which of the updates have occured since our last client was created
chge <- dats$SurveyNum[mdy_hms(dats$FileDateLastModified) > client$get_cache_date()]
datasets_to_clear <- which(downloaded_spatial_keys %in% chge)

# do any of them match those that have been updated since the
# last cache_date
if (length(datasets_to_clear) > 0) {
for (key in downloaded_spatial_keys[datasets_to_clear]) {
private$storr$del(key, "spatial_boundaries")
}
}

}
}

Expand Down
124 changes: 77 additions & 47 deletions R/shapes.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
#' `method = "sf"`. Default is `TRUE`.
#' @param server_sleep Numeric for length of sleep prior to downloading file
#' from their survey. Default 5 seconds.
#' @param client If the request should be cached, then provide a client
#' object created by \code{\link{client_dhs}}. Default = `NULL`, which will
#' search for a client to use
#'
#' @details Downloads the spatial boundaries from the DHS spatial repository,
#' which can be found at \url{https://spatialdata.dhsprogram.com/home/}.
Expand All @@ -51,7 +54,8 @@ download_boundaries <- function(surveyNum=NULL,
method = "sf",
quiet_download = FALSE,
quiet_parse = TRUE,
server_sleep = 5){
server_sleep = 5,
client = NULL){

# helper funcs
build_final_url <- function(jobId) {
Expand Down Expand Up @@ -89,60 +93,86 @@ download_boundaries <- function(surveyNum=NULL,
countryId <- dats$DHS_CountryCode[match(surveyNum, dats$SurveyNum)]
}

# build our url
# ----------------------------------------------------------------------------
# if no client was provided we'll look for
# the package environment client by default
if (is.null(client)) {
client <- check_for_client()
}

# create url from surveyNum
alt_url <- paste0("https://gis.dhsprogram.com/arcgis/rest/services/Tools/",
"DownloadSubnationalData/GPServer/",
"downloadSubNationalBoundaries/submitJob")
# create db key
key <- paste0(surveyNum, "_", method)

values <- list(
survey_ids = surveyNum,
spatial_format = "shp",
f = "json"
# first check against cache
out <- tryCatch(
client$.__enclos_env__$private$storr$get(key, "spatial_boundaries"),
KeyError = function(e) {
NULL
}
)

# fetch jobID
z <- httr::GET(
httr::modify_url(alt_url, query = values)
)
# check out agianst cache, if fine then return just that
if (!is.null(out)) {
return(out)
} else {

tf <- tempfile()
y <- writeBin(z$content, con = tf)
h <- jsonlite::fromJSON(brio::read_lines(tf))
url <- build_final_url(h$jobId)

# pause for a second for the job id created to appear on their server
# i.e. the code only works with this...
Sys.sleep(server_sleep)

# download the shape file and read it in
tf2 <- tempfile()
file <- download.file(url, tf2, quiet = quiet_download)
unzipped_files <- suppressWarnings(unzip(tf2, exdir = tempfile()))
file <- grep("dbf", unzipped_files, value=TRUE)

# how are we reading the dataset in
methods <- c("sf")
if (method %in% methods) {

# here if we want to add more read in options
if(method == "sf") {
res <- lapply(file, sf::st_read, quiet = quiet_parse)
names(res) <- vapply(file,
function(x) { sf::st_layers(x)$name },
character(1))
# build our url
# ----------------------------------------------------------------------------

# create url from surveyNum
alt_url <- paste0("https://gis.dhsprogram.com/arcgis/rest/services/Tools/",
"DownloadSubnationalData/GPServer/",
"downloadSubNationalBoundaries/submitJob")

values <- list(
survey_ids = surveyNum,
spatial_format = "shp",
f = "json"
)

# fetch jobID
z <- httr::GET(
httr::modify_url(alt_url, query = values)
)

tf <- tempfile()
y <- writeBin(z$content, con = tf)
h <- jsonlite::fromJSON(brio::read_lines(tf))
url <- build_final_url(h$jobId)

# pause for a second for the job id created to appear on their server
# i.e. the code only works with this...
Sys.sleep(server_sleep)

# download the shape file and read it in
tf2 <- tempfile()
file <- download.file(url, tf2, quiet = quiet_download)
unzipped_files <- suppressWarnings(unzip(tf2, exdir = tempfile()))
file <- grep("dbf", unzipped_files, value=TRUE)

# how are we reading the dataset in
methods <- c("sf")
if (method %in% methods) {

# here if we want to add more read in options
if(method == "sf") {
res <- lapply(file, sf::st_read, quiet = quiet_parse)
names(res) <- vapply(file,
function(x) { sf::st_layers(x)$name },
character(1))
}

} else {
message("Provided method not found. Options are: \n",
paste(methods,collapse=" "),
"\nReturning zip files.")
res <- unzipped_files
}

} else {
message("Provided method not found. Options are: \n",
paste(methods,collapse=" "),
"\nReturning zip files.")
## then cache the resp and return the parsed resp
client$.__enclos_env__$private$storr$set(key, res, "spatial_boundaries")

return(unzipped_files)
}
return(res)

return(res)
}

}
7 changes: 6 additions & 1 deletion man/download_boundaries.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions tests/testthat/test_spatial_boundaries.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,18 @@ test_that("Timout Spatial Boundaries Test", {
})


test_that("caching works", {
testthat::skip_on_cran()
skip_if_slow_API()

cli <- new_rand_client()
dat <- api_timeout_safe_test(
download_boundaries(surveyNum = 471, countryId = "AF", client = cli), cli
)
expect_identical(cli$.__enclos_env__$private$storr$list("spatial_boundaries"), "471_sf")

# check that no message is sent second time
expect_no_message(dat <- api_timeout_safe_test(
download_boundaries(surveyNum = 471, countryId = "AF", client = cli), cli
))
})

0 comments on commit f7389c4

Please sign in to comment.