Skip to content

Commit

Permalink
Added travis and codecoverage support.
Browse files Browse the repository at this point in the history
  • Loading branch information
mbjones committed Apr 3, 2020
1 parent b9d5b08 commit d02a348
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 90 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
^scythe\.Rproj$
^\.Rproj\.user$
^LICENSE\.md$
^\.travis\.yml$
^codecov\.yml$
7 changes: 7 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r

language: R
cache: packages

after_success:
- Rscript -e 'covr::codecov()'
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,6 @@ License: Apache License (>= 2.0)
Encoding: UTF-8
LazyData: true
Suggests:
testthat (>= 2.1.0)
testthat (>= 2.1.0),
covr
RoxygenNote: 7.0.2
11 changes: 9 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,2 +1,9 @@
# Generated by roxygen2: fake comment so roxygen2 overwrites silently.
exportPattern("^[^\\.]")
# Generated by roxygen2: do not edit by hand

import(bib2df)
import(curl)
import(dataone)
import(dplyr)
import(jsonlite)
import(rcrossref)
import(xml2)
131 changes: 72 additions & 59 deletions R/brute_force_query_master.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,63 +5,76 @@
# rdatacite package
# opencitations.net

library(jsonlite)
library(dplyr)
library(xml2)
library(curl)

## bash SCOPUS queries
# Need to use our API key in links below (saved separately)
#
# # multi-page ADC
# for pg in 0 26; do curl https://api.elsevier.com/content/search/scopus?query=ALL:10.18739\&date=2009-2019\&APIKey=${APIKEY}\&start=${pg} -o results/scopus-10.18739-2009-2019-pg${pg}.json; done
#
# # multi-page KNB
# for pg in 0 26; do curl https://api.elsevier.com/content/search/scopus?query==ALL:10.5063\&date=2009-2019\&APIKey=${APIKEY}\&start=${pg} -o results/scopus-10.5063-2009-2019-pg${pg}.json; done

# query for ADC DOIs
cn <- CNode("PROD")
mn <- getMNode(cn, "urn:node:ARCTIC")

result <- query(mn, list(q = "formatType:METADATA AND (*:* NOT obsoletedBy:*)",
fl = "identifier,rightsHolder,formatId",
start ="0",
rows = "15000"),
as="data.frame")

dois <- grep("doi", result$identifier, value = T) %>%
gsub("doi:", "", .)

# brute force query SCOPUS for each DOI
t <- list()
for (i in 1:length(dois)){
t[[i]] <- fromJSON(curl(paste0("https://api.elsevier.com/content/search/scopus?query=ALL:",dois[i],"&APIKey=ae55f95a9d2f56c21147d3f9f6c4eef0")))
#' @import dplyr
#' @import jsonlite
#' @import xml2
#' @import curl
#' @import dataone
brute_force_query_master <- function() {
## bash SCOPUS queries
# Need to use our API key in links below (saved separately)
#
# # multi-page ADC
# for pg in 0 26; do curl https://api.elsevier.com/content/search/scopus?query=ALL:10.18739\&date=2009-2019\&APIKey=${APIKEY}\&start=${pg} -o results/scopus-10.18739-2009-2019-pg${pg}.json; done
#
# # multi-page KNB
# for pg in 0 26; do curl https://api.elsevier.com/content/search/scopus?query==ALL:10.5063\&date=2009-2019\&APIKey=${APIKEY}\&start=${pg} -o results/scopus-10.5063-2009-2019-pg${pg}.json; done

# query for ADC DOIs
cn <- CNode("PROD")
mn <- getMNode(cn, "urn:node:ARCTIC")

result <-
query(mn, list(
q = "formatType:METADATA AND (*:* NOT obsoletedBy:*)",
fl = "identifier,rightsHolder,formatId",
start = "0",
rows = "15000"),
as = "data.frame"
)
dois <- grep("doi", result$identifier, value = T) %>%
gsub("doi:", "", .)

# brute force query SCOPUS for each DOI
t <- list()
for (i in 1:length(dois)) {
t[[i]] <-
fromJSON(curl(paste0("https://api.elsevier.com/content/search/scopus?query=ALL:", dois[i], "&APIKey=ae55f95a9d2f56c21147d3f9f6c4eef0")
))
}

# find the number of results per DOI
res <-
lapply(t, function(x) {
x$`search-results`$`opensearch:totalResults`
})

# filter out DOIs with no results
t_working <- t[which(res != 0)]

# pull out information for datasets with results
t_results <- lapply(t_working, function(x) {
x$`search-results`$entry$search <-
x$`search-results`$`opensearch:Query`$`@searchTerms`
return(x$`search-results`$entry)
})


results <- do.call(bind_rows, t_results)

# trim out unnecessary information
results_slim <- results %>%
dplyr::select(`prism:doi`, search) %>%
mutate(search = gsub("ALL:", "", search)) %>%
rename(
journal = `prism:doi`,
adc_dataset = search
)

# left col - citer, right col - citee
write.csv(
results_slim,
"~/dataone-citations/results/brute_force_results.csv",
row.names = F
)
}

# find the number of results per DOI
res <- lapply(t, function(x){x$`search-results`$`opensearch:totalResults`})

# filter out DOIs with no results
t_working <- t[which(res != 0)]

# pull out information for datasets with results
t_results <- lapply(t_working, function(x){
x$`search-results`$entry$search <- x$`search-results`$`opensearch:Query`$`@searchTerms`
return(x$`search-results`$entry)
})


results <- do.call(bind_rows, t_results)

# trim out unnecessary information
results_slim <- results %>%
dplyr::select(`prism:doi`, search) %>%
mutate(search = gsub("ALL:", "", search)) %>%
rename(journal = `prism:doi`,
adc_dataset = search)

# left col - citer, right col - citee
write.csv(results_slim, "~/dataone-citations/results/brute_force_results.csv", row.names = F)



57 changes: 29 additions & 28 deletions R/write_citation_pairs.R
Original file line number Diff line number Diff line change
@@ -1,34 +1,35 @@
library(rcrossref)
library(jsonlite)
library(bib2df)
#' @import rcrossref
#' @import jsonlite
#' @import bib2df
write_citation_pairs <- function() {

# manually generated citations list
cit <- read.csv("DBO_citations/citationlist.csv", stringsAsFactors = F)

# manually generated citations list
cit <- read.csv("DBO_citations/citationlist.csv", stringsAsFactors = F)
# write list of citations to bib format
bib <- cr_cn(dois = cit$publicationDOI, format = "bibtex")
writeLines(unlist(bib), "DBO_citations/all_citations.bib" )

# write list of citations to bib format
bib <- cr_cn(dois = cit$publicationDOI, format = "bibtex")
writeLines(unlist(bib), "DBO_citations/all_citations.bib" )
# import as a dataframe
df <- bib2df("~/dataone-citations/DBO_citations/all_citations.bib")
df$datasetID <- cit$datasetID

# import as a dataframe
df <- bib2df("~/dataone-citations/DBO_citations/all_citations.bib")
df$datasetID <- cit$datasetID
# rename for database ingest
cit_full <- df %>%
rename(target_id = datasetID,
source_id = DOI,
source_url = URL,
origin = AUTHOR,
title = TITLE,
publisher = PUBLISHER,
journal = JOURNAL,
volume = VOLUME,
page = PAGES,
year_of_publishing = YEAR) %>%
select(target_id, source_id, source_url, origin, title, publisher, journal, volume, page, year_of_publishing) %>%
mutate(id = NA, report = NA, metadata = NA, link_publication_date = NA) %>%
mutate(publisher = ifelse(publisher == "Elsevier {BV", "Elsevier", "Copernicus"))

# rename for database ingest
cit_full <- df %>%
rename(target_id = datasetID,
source_id = DOI,
source_url = URL,
origin = AUTHOR,
title = TITLE,
publisher = PUBLISHER,
journal = JOURNAL,
volume = VOLUME,
page = PAGES,
year_of_publishing = YEAR) %>%
select(target_id, source_id, source_url, origin, title, publisher, journal, volume, page, year_of_publishing) %>%
mutate(id = NA, report = NA, metadata = NA, link_publication_date = NA) %>%
mutate(publisher = ifelse(publisher == "Elsevier {BV", "Elsevier", "Copernicus"))

write_json(cit_full, "~/dataone-citations/DBO_citations/citations_export.json")
write_json(cit_full, "~/dataone-citations/DBO_citations/citations_export.json")
}

1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/scythe)](https://cran.r-project.org/package=scythe)
[![Build Status](https://travis-ci.org/DataONEorg/scythe.png?branch=master)](https://travis-ci.org/DataONEorg/scythe)
[![Codecov test coverage](https://codecov.io/gh/DataONEorg/scythe/branch/master/graph/badge.svg)](https://codecov.io/gh/DataONEorg/scythe?branch=master)
[![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)

- **Authors**: TBD
Expand Down
12 changes: 12 additions & 0 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
comment: false

coverage:
status:
project:
default:
target: auto
threshold: 1%
patch:
default:
target: auto
threshold: 1%

0 comments on commit d02a348

Please sign in to comment.