diff --git a/.Rbuildignore b/.Rbuildignore index 9cb8832..f99bd84 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,5 @@ ^scythe\.Rproj$ ^\.Rproj\.user$ ^LICENSE\.md$ +^\.travis\.yml$ +^codecov\.yml$ diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..5584b74 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,7 @@ +# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r + +language: R +cache: packages + +after_success: + - Rscript -e 'covr::codecov()' diff --git a/DESCRIPTION b/DESCRIPTION index dd71d4d..e6e916a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,4 +12,6 @@ License: Apache License (>= 2.0) Encoding: UTF-8 LazyData: true Suggests: - testthat (>= 2.1.0) + testthat (>= 2.1.0), + covr +RoxygenNote: 7.0.2 diff --git a/NAMESPACE b/NAMESPACE index 884a631..1f76a38 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,9 @@ -# Generated by roxygen2: fake comment so roxygen2 overwrites silently. -exportPattern("^[^\\.]") +# Generated by roxygen2: do not edit by hand + +import(bib2df) +import(curl) +import(dataone) +import(dplyr) +import(jsonlite) +import(rcrossref) +import(xml2) diff --git a/R/brute_force_query_master.R b/R/brute_force_query_master.R index a272130..93f199c 100644 --- a/R/brute_force_query_master.R +++ b/R/brute_force_query_master.R @@ -5,63 +5,76 @@ # rdatacite package # opencitations.net -library(jsonlite) -library(dplyr) -library(xml2) -library(curl) - -## bash SCOPUS queries -# Need to use our API key in links below (saved separately) -# -# # multi-page ADC -# for pg in 0 26; do curl https://api.elsevier.com/content/search/scopus?query=ALL:10.18739\&date=2009-2019\&APIKey=${APIKEY}\&start=${pg} -o results/scopus-10.18739-2009-2019-pg${pg}.json; done -# -# # multi-page KNB -# for pg in 0 26; do curl https://api.elsevier.com/content/search/scopus?query==ALL:10.5063\&date=2009-2019\&APIKey=${APIKEY}\&start=${pg} -o results/scopus-10.5063-2009-2019-pg${pg}.json; done - -# query for ADC DOIs -cn <- CNode("PROD") -mn <- getMNode(cn, "urn:node:ARCTIC") - -result <- query(mn, list(q = "formatType:METADATA AND (*:* NOT obsoletedBy:*)", - fl = "identifier,rightsHolder,formatId", - start ="0", - rows = "15000"), - as="data.frame") - -dois <- grep("doi", result$identifier, value = T) %>% - gsub("doi:", "", .) - -# brute force query SCOPUS for each DOI -t <- list() -for (i in 1:length(dois)){ - t[[i]] <- fromJSON(curl(paste0("https://api.elsevier.com/content/search/scopus?query=ALL:",dois[i],"&APIKey=ae55f95a9d2f56c21147d3f9f6c4eef0"))) +#' @import dplyr +#' @import jsonlite +#' @import xml2 +#' @import curl +#' @import dataone +brute_force_query_master <- function() { + ## bash SCOPUS queries + # Need to use our API key in links below (saved separately) + # + # # multi-page ADC + # for pg in 0 26; do curl https://api.elsevier.com/content/search/scopus?query=ALL:10.18739\&date=2009-2019\&APIKey=${APIKEY}\&start=${pg} -o results/scopus-10.18739-2009-2019-pg${pg}.json; done + # + # # multi-page KNB + # for pg in 0 26; do curl https://api.elsevier.com/content/search/scopus?query==ALL:10.5063\&date=2009-2019\&APIKey=${APIKEY}\&start=${pg} -o results/scopus-10.5063-2009-2019-pg${pg}.json; done + + # query for ADC DOIs + cn <- CNode("PROD") + mn <- getMNode(cn, "urn:node:ARCTIC") + + result <- + query(mn, list( + q = "formatType:METADATA AND (*:* NOT obsoletedBy:*)", + fl = "identifier,rightsHolder,formatId", + start = "0", + rows = "15000"), + as = "data.frame" + ) + dois <- grep("doi", result$identifier, value = T) %>% + gsub("doi:", "", .) + + # brute force query SCOPUS for each DOI + t <- list() + for (i in 1:length(dois)) { + t[[i]] <- + fromJSON(curl(paste0("https://api.elsevier.com/content/search/scopus?query=ALL:", dois[i], "&APIKey=ae55f95a9d2f56c21147d3f9f6c4eef0") + )) + } + + # find the number of results per DOI + res <- + lapply(t, function(x) { + x$`search-results`$`opensearch:totalResults` + }) + + # filter out DOIs with no results + t_working <- t[which(res != 0)] + + # pull out information for datasets with results + t_results <- lapply(t_working, function(x) { + x$`search-results`$entry$search <- + x$`search-results`$`opensearch:Query`$`@searchTerms` + return(x$`search-results`$entry) + }) + + + results <- do.call(bind_rows, t_results) + + # trim out unnecessary information + results_slim <- results %>% + dplyr::select(`prism:doi`, search) %>% + mutate(search = gsub("ALL:", "", search)) %>% + rename( + journal = `prism:doi`, + adc_dataset = search + ) + + # left col - citer, right col - citee + write.csv( + results_slim, + "~/dataone-citations/results/brute_force_results.csv", + row.names = F + ) } - -# find the number of results per DOI -res <- lapply(t, function(x){x$`search-results`$`opensearch:totalResults`}) - -# filter out DOIs with no results -t_working <- t[which(res != 0)] - -# pull out information for datasets with results -t_results <- lapply(t_working, function(x){ - x$`search-results`$entry$search <- x$`search-results`$`opensearch:Query`$`@searchTerms` - return(x$`search-results`$entry) - }) - - -results <- do.call(bind_rows, t_results) - -# trim out unnecessary information -results_slim <- results %>% - dplyr::select(`prism:doi`, search) %>% - mutate(search = gsub("ALL:", "", search)) %>% - rename(journal = `prism:doi`, - adc_dataset = search) - -# left col - citer, right col - citee -write.csv(results_slim, "~/dataone-citations/results/brute_force_results.csv", row.names = F) - - - diff --git a/R/write_citation_pairs.R b/R/write_citation_pairs.R index a5c6891..22f69fa 100644 --- a/R/write_citation_pairs.R +++ b/R/write_citation_pairs.R @@ -1,34 +1,35 @@ -library(rcrossref) -library(jsonlite) -library(bib2df) +#' @import rcrossref +#' @import jsonlite +#' @import bib2df +write_citation_pairs <- function() { + # manually generated citations list + cit <- read.csv("DBO_citations/citationlist.csv", stringsAsFactors = F) -# manually generated citations list -cit <- read.csv("DBO_citations/citationlist.csv", stringsAsFactors = F) + # write list of citations to bib format + bib <- cr_cn(dois = cit$publicationDOI, format = "bibtex") + writeLines(unlist(bib), "DBO_citations/all_citations.bib" ) -# write list of citations to bib format -bib <- cr_cn(dois = cit$publicationDOI, format = "bibtex") -writeLines(unlist(bib), "DBO_citations/all_citations.bib" ) + # import as a dataframe + df <- bib2df("~/dataone-citations/DBO_citations/all_citations.bib") + df$datasetID <- cit$datasetID -# import as a dataframe -df <- bib2df("~/dataone-citations/DBO_citations/all_citations.bib") -df$datasetID <- cit$datasetID + # rename for database ingest + cit_full <- df %>% + rename(target_id = datasetID, + source_id = DOI, + source_url = URL, + origin = AUTHOR, + title = TITLE, + publisher = PUBLISHER, + journal = JOURNAL, + volume = VOLUME, + page = PAGES, + year_of_publishing = YEAR) %>% + select(target_id, source_id, source_url, origin, title, publisher, journal, volume, page, year_of_publishing) %>% + mutate(id = NA, report = NA, metadata = NA, link_publication_date = NA) %>% + mutate(publisher = ifelse(publisher == "Elsevier {BV", "Elsevier", "Copernicus")) -# rename for database ingest -cit_full <- df %>% - rename(target_id = datasetID, - source_id = DOI, - source_url = URL, - origin = AUTHOR, - title = TITLE, - publisher = PUBLISHER, - journal = JOURNAL, - volume = VOLUME, - page = PAGES, - year_of_publishing = YEAR) %>% - select(target_id, source_id, source_url, origin, title, publisher, journal, volume, page, year_of_publishing) %>% - mutate(id = NA, report = NA, metadata = NA, link_publication_date = NA) %>% - mutate(publisher = ifelse(publisher == "Elsevier {BV", "Elsevier", "Copernicus")) - -write_json(cit_full, "~/dataone-citations/DBO_citations/citations_export.json") + write_json(cit_full, "~/dataone-citations/DBO_citations/citations_export.json") +} diff --git a/README.md b/README.md index c1d5dda..0e33e7a 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/scythe)](https://cran.r-project.org/package=scythe) [![Build Status](https://travis-ci.org/DataONEorg/scythe.png?branch=master)](https://travis-ci.org/DataONEorg/scythe) +[![Codecov test coverage](https://codecov.io/gh/DataONEorg/scythe/branch/master/graph/badge.svg)](https://codecov.io/gh/DataONEorg/scythe?branch=master) [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip) - **Authors**: TBD diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..8f36b6c --- /dev/null +++ b/codecov.yml @@ -0,0 +1,12 @@ +comment: false + +coverage: + status: + project: + default: + target: auto + threshold: 1% + patch: + default: + target: auto + threshold: 1%