Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Creates globi interaction dataset #18

Open
wants to merge 12 commits into
base: DataClean_Spring2018
Choose a base branch
from
55 changes: 55 additions & 0 deletions R/create-globi-interactions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Merge into GloBi format
library(dplyr)
# read in datasheets
asc <- read.csv(file = "data/associations.csv")
ref <- read.csv(file = "data/references.csv")
host <- read.csv(file = "data/hosts.csv")
virus <- read.csv(file = "data/viruses.csv")

# merge sheets
asc <- left_join(asc, ref, by = "referencekey") # merge in the references
asc <- left_join(asc, host, by = "hHostNameFinal") # merge in the host taxonomy
asc <- left_join(asc, virus, by = "vVirusNameCorrected") # merge in the virus taxonomy

# select columns for interactions sheet
asc <- dplyr::select(asc, vVirusNameCorrected, vOrder, vFamily, vGenus, hHostNameFinal, hOrder, hFamily, hGenus, Reference)


# create globi interactions sheet
globi_df = data.frame("sourceOccurrenceId" = rep("", nrow(asc)),
"sourceTaxonId" = rep("", nrow(asc)),
"sourceTaxonName" = asc$vVirusNameCorrected,
"sourceTaxonOrder" = asc$vOrder,
"sourceTaxonFamily" = asc$vFamily,
"sourceTaxonGenus" = asc$vGenus,
"sourceBodyPartId" = rep("", nrow(asc)),
"sourceBodyPartName" = rep("", nrow(asc)),
"sourceLifeStageId" = rep("", nrow(asc)),
"sourceLifeStageName" = rep("", nrow(asc)),
"interactionTypeId" = rep("http://purl.obolibrary.org/obo/RO_0002556", nrow(asc)),
"interactionTypeName" = rep("pathogen of", nrow(asc)),
"targetOccurrenceId" = rep("", nrow(asc)),
"targetTaxonId" = rep("", nrow(asc)),
"targetTaxonName" = asc$hHostNameFinal,
"targetTaxonOrder" = asc$hOrder,
"targetTaxonFamily" = asc$hFamily,
"targetTaxonGenus" = asc$hGenus,
"targetBodyPartId" = rep("", nrow(asc)),
"targetBodyPartName" = rep("", nrow(asc)),
"targetLifeStageId" = rep("", nrow(asc)),
"targetLifeStageName" = rep("", nrow(asc)),
"localityId" = rep("", nrow(asc)),
"localityName" = rep("", nrow(asc)),
"decimalLatitude" = rep("", nrow(asc)),
"decimalLongitude" = rep("", nrow(asc)),
"observationDateTime" = rep("", nrow(asc)),
"referenceDoi" = rep("", nrow(asc)),
"referenceUrl" = rep("", nrow(asc)),
"referenceCitation" = asc$Reference )

# remove underscore from host name and virus name
globi_df$sourceTaxonName <- gsub("_"," ",globi_df$sourceTaxonName)
globi_df$targetTaxonName <- gsub("_"," ",globi_df$targetTaxonName)

# save merged file
write.csv(globi_df, file = "data/globi_interactions.csv")
6 changes: 3 additions & 3 deletions data/associations.csv
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Laguna_Negra_virus,Akodon_simulator,wild,PCR,2,Levis et al. 2004
Andes_virus,Akodon_varius,wild,Antibodies,0,Sosa-Estani et al. 2002
Akabane_virus,Alcelaphus_buselaphus,wild,Antibodies,0,Davies & Jessett 1985
Alcelaphine_herpesvirus_2,Alcelaphus_buselaphus,wild,Isolation,2,Reid & Rowe 1973
Bluetongue_virus,Alcelaphus_buselaphus,wild,Antibodies,0,Hamblin et al.
Bluetongue_virus,Alcelaphus_buselaphus,wild,Antibodies,0,Hamblin et al. 1990
Bovine_viral_diarrhea_virus_1,Alcelaphus_buselaphus,wild,SNT,1,Hamblin & Hedger 1979
Crimean-Congo_hemorrhagic_fever_virus,Alcelaphus_buselaphus,wild,Antibodies,0,Shepherd et al. 1987
Foot-and-mouth_disease_virus,Alcelaphus_buselaphus,wild,Antibodies,0,Ayebazibwe et al. 2010
Expand Down Expand Up @@ -877,7 +877,7 @@ Yellow_fever_virus,Erythrocebus_patas,wild,Isolation,2,Deubel et al. 1985
Vesicular_exanthema_of_swine_virus,Eschrichtius_robustus,wild,Antibodies,0,Smith & Latham 1978
Rabies_virus,Euderma_maculatum,wild,Isolation,2,Nadin-Davis et al. 2001
Akabane_virus,Eudorcas_thomsonii,wild,Antibodies,0,Davies & Jessett 1985
Bluetongue_virus,Eudorcas_thomsonii,wild,Antibodies,0,Anderson et al. 1998
Bluetongue_virus,Eudorcas_thomsonii,wild,Antibodies,0,Davies & Walker 1974
Equid_herpesvirus_1,Eudorcas_thomsonii,captive,PCR,2,Wohlsein et al. 2011
Equid_herpesvirus_9,Eudorcas_thomsonii,wild,Isolation,2,Fukushi et al. 1997
Foot-and-mouth_disease_virus,Eudorcas_thomsonii,wild,Antibodies,0,Anderson 1981
Expand Down Expand Up @@ -2215,7 +2215,7 @@ Western_equine_encephalitis_virus,Procyon_lotor,wild,hemmaglutination,0,Hardy 19
Venezuelan_equine_encephalitis_virus,Proechimys_guairae,wild,Antibodies,0,Navarro et al. 2005
Aroa_virus,Proechimys_guyannensis,wild,isolation,2,Shope 2003
Bimiti_virus,Proechimys_guyannensis,wild,Isolation,2,Shope et al. 1988
Bujaru_virus,Proechimys_guyannensis,wild,Isolation,2,NA
Bujaru_virus,Proechimys_guyannensis,wild,Isolation,2,Woodall 1967
Caraparu_virus,Proechimys_guyannensis,wild,Isolation,2,Shope et al. 1988
Catu_virus,Proechimys_guyannensis,wild,Isolation,2,Shope et al. 1988
Encephalomyocarditis_virus,Proechimys_guyannensis,wild,Isolation,2,Tesh & Wallace 1978
Expand Down
Loading