Skip to content

Commit

Permalink
remove rplos dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
jeanetteclark committed Aug 6, 2024
1 parent 5990d63 commit af408d8
Show file tree
Hide file tree
Showing 8 changed files with 205 additions and 8 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Authors@R: c(
person("Matthew B.", "Jones", role = "aut", email = "jones@nceas.ucsb.edu", comment=c(ORCID = "0000-0003-0077-4738")),
person("Maya", "Samet", role = "aut", email = "samet@nceas.ucsb.edu", comment=c(ORCID = "0000-0002-5248-9712"))
)
Description: Harvests data package citations from several API sources, including PLOS, Scopus, and Springer.
Description: Harvests data package citations from several API sources, including PLOS, Scopus, and Springer. This package uses modified functions from `rplos`, which is no longer maintained.
License: Apache License (>= 2.0)
Encoding: UTF-8
LazyData: true
Expand All @@ -17,11 +17,11 @@ Imports:
jsonlite,
keyring,
rcrossref,
rplos,
solrium,
stats
Suggests:
covr,
purrr,
testthat (>= 3.0.0)
RoxygenNote: 7.1.2
RoxygenNote: 7.3.1
Config/testthat/edition: 3
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ export(write_citation_pairs)
import(dplyr)
importFrom(curl,curl)
importFrom(jsonlite,fromJSON)
importFrom(rplos,searchplos)
importFrom(stats,complete.cases)
122 changes: 119 additions & 3 deletions R/citation_search_plos.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
#'
#' This function searches for citations in PLOS. Requests are throttled
#' at one identifier every 6 seconds so as to not overload the PLOS
#' API.
#' API. This function uses modified source code from the `rplos` package,
#' which is no longer maintained.
#'
#' @param identifiers a vector of identifiers to be searched for
#'
#' @return tibble of matching dataset and publication identifiers
#' @export
#' @importFrom rplos searchplos
#' @examples
#' \dontrun{
#' identifiers <- c("10.18739/A22274", "10.18739/A2D08X", "10.5063/F1T151VR")
Expand All @@ -34,7 +34,7 @@ citation_search_plos <- function(identifiers) {
# search for identifier
results <- lapply(identifiers, function(x) {
Sys.sleep(6)
v <- rplos::searchplos(q = x,
v <- searchplos(q = x,
fl = c("id", "title"),
limit = 1000)
return(v)
Expand Down Expand Up @@ -71,3 +71,119 @@ citation_search_plos <- function(identifiers) {

return(plos_results)
}

#' A Modified Version of rplos::searchplos
#'
#' This function is adapted from the searchplos in the `rplos` package, which is no longer maintained.
#'
#' @param q Search terms, eg: field:query
#' @param fl Fields to return
#' @param fq Fields to filter query on
#' @param sort Sort results according to field
#' @param start Record to start at for pagination
#' @param limit Number of results to return for pagination
#' @param sleep Seconds to wait between requests
#' @param errors One of simple or complete
#' @param proxy List of args for proxy connection
#' @param callopts Optional curl options
#' @param progress Optional logic for progress bar
#' @param ... Addtl Solr arguments
searchplos <- function(q = NULL, fl = 'id', fq = NULL, sort = NULL, start = 0,
limit = 10, sleep = 6, errors = "simple", proxy = NULL, callopts = list(),
progress = NULL, ...) {

# Make sure limit is a numeric or integer
limit <- tryCatch(as.numeric(as.character(limit)), warning=function(e) e)
if("warning" %in% class(limit)){
stop("limit should be a numeric or integer class value", call. = FALSE)
}
if(!inherits(limit, "numeric") | is.na(limit))
stop("limit should be a numeric or integer class value", call. = FALSE)

if (is.null(limit)) limit <- 999
if (limit == 0) fl <- NULL
fl <- paste(fl, collapse = ",")

args <- list()
if (!is.null(fq[[1]])) {
if (length(fq) == 1) {
args$fq <- fq
} else {
args <- fq
names(args) <- rep("fq",length(args))
}
}
args <- c(args, ploscompact(list(q = q, fl = fl, start = as.integer(start),
rows = as.integer(limit), sort = sort, wt = 'json')))

conn_plos <- solrium::SolrClient$new(host = "api.plos.org", path = "search", port = NULL)

getnum_tmp <- suppressMessages(
conn_plos$search(params = list(q = q, fl = fl, rows = 0, wt = "json"))
)
getnumrecords <- attr(getnum_tmp, "numFound")

if (getnumrecords > limit) {
getnumrecords <- limit
} else {
getnumrecords <- getnumrecords
}

if (min(getnumrecords, limit) < 1000) {
if (!is.null(limit)) args$rows <- limit
if (length(args) == 0) args <- NULL
jsonout <- suppressMessages(
conn_plos$search(params = args, callopts = callopts,
minOptimizedRows = FALSE, progress = progress, ...)
)
meta <- dplyr::tibble(
numFound = attr(jsonout, "numFound"),
start = attr(jsonout, "start")
)
return(list(meta = meta, data = jsonout))
} else {
byby <- 500
getvecs <- seq(from = 0, to = getnumrecords - 1, by = byby)
lastnum <- as.numeric(strextract(getnumrecords, "[0-9]{3}$"))
if (lastnum == 0)
lastnum <- byby
if (lastnum > byby) {
lastnum <- getnumrecords - getvecs[length(getvecs)]
} else {
lastnum <- lastnum
}
getrows <- c(rep(byby, length(getvecs) - 1), lastnum)
out <- list()
for (i in seq_along(getvecs)) {
args$start <- as.integer(getvecs[i])
args$rows <- as.integer(getrows[i])
if (length(args) == 0) args <- NULL
jsonout <- suppressMessages(conn_plos$search(
params = ploscompact(list(q = args$q, fl = args$fl,
fq = args[names(args) == "fq"],
sort = args$sort,
rows = as.integer(args$rows), start = as.integer(args$start),
wt = "json")), minOptimizedRows = FALSE, callopts = callopts,
progress = progress, ...
))
out[[i]] <- jsonout
}
resdf <- dplyr::bind_rows(out)
meta <- dplyr::tibble(
numFound = attr(jsonout, "numFound"),
start = attr(jsonout, "start")
)
return(list(meta = meta, data = resdf))
}
}
#' This function is from the `rplos` package, which is no longer maintained.
#' @param l a list
ploscompact <- function(l) Filter(Negate(is.null), l)

#' This function is from the `rplos` package, which is no longer maintained.
#'
#' @param str A string
#' @param pattern A regex pattern
strextract <- function(str, pattern) {
regmatches(str, regexpr(pattern, str))
}
1 change: 1 addition & 0 deletions R/citation_search_springer.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#' @return tibble of matching dataset and publication identifiers
#' @importFrom jsonlite fromJSON
#' @importFrom curl curl
#' @importFrom stats complete.cases
#' @export
#' @examples
#' \dontrun{
Expand Down
3 changes: 2 additions & 1 deletion man/citation_search_plos.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions man/ploscompact.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 49 additions & 0 deletions man/searchplos.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions man/strextract.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit af408d8

Please sign in to comment.