-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #88 from microbiomedata/69-run-lexmatch-agaisnt-nm…
…dc-schema-and-push-sssom-output-to-repo run lexmatch agaisnt nmdc schema and push sssom output to repo
- Loading branch information
Showing
134 changed files
with
751,138 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
#!/bin/bash | ||
|
||
# Check if the input file exists | ||
if [ ! -f for-lexmatch.txt ]; then | ||
echo "File for-lexmatch.txt not found!" | ||
exit 1 | ||
fi | ||
|
||
rm -rf lexmatch-errors.txt | ||
#rm -rf lexmatch-output/* | ||
|
||
# Loop through each line in the input file | ||
while IFS= read -r ontology; do | ||
if [ -n "$ontology" ]; then | ||
|
||
# echo "nmdc mixs vs $ontology" | ||
# poetry run runoak \ | ||
# -i nmdc.db \ | ||
# -a sqlite:obo:$ontology \ | ||
# lexmatch \ | ||
# --add-pipeline-step CaseNormalization \ | ||
# --add-pipeline-step WhitespaceNormalization \ | ||
# --add-pipeline-step WordOrderNormalization \ | ||
# --output lexmatch-output/nmdc_mixs_vs_${ontology}.SSSOM.tsv \ | ||
# i^mixs @ .all \ | ||
# 2>>lexmatch-errors.txt | ||
# | ||
# echo "nmdc nmdc vs $ontology" | ||
# poetry run runoak \ | ||
# -i nmdc.db \ | ||
# -a sqlite:obo:$ontology \ | ||
# lexmatch \ | ||
# --add-pipeline-step CaseNormalization \ | ||
# --add-pipeline-step WhitespaceNormalization \ | ||
# --add-pipeline-step WordOrderNormalization \ | ||
# --output lexmatch-output/nmdc_nmdc_vs_${ontology}.SSSOM.tsv \ | ||
# i^nmdc @ .all \ | ||
# 2>>lexmatch-errors.txt | ||
# | ||
# echo "submission schema vs $ontology" | ||
# poetry run runoak \ | ||
# -i nmdc_submission_schema_no_from_schema_no_brackets.db \ | ||
# -a sqlite:obo:$ontology \ | ||
# lexmatch \ | ||
# --add-pipeline-step CaseNormalization \ | ||
# --add-pipeline-step WhitespaceNormalization \ | ||
# --add-pipeline-step WordOrderNormalization \ | ||
# --output lexmatch-output/submission_schema_vs_${ontology}.SSSOM.tsv \ | ||
# i^nmdc_sub_schema @ .all \ | ||
# 2>>lexmatch-errors.txt | ||
|
||
|
||
echo "env triad PVs vs $ontology" | ||
poetry run runoak \ | ||
-i env_triad_pvs_sheet.db \ | ||
-a sqlite:obo:$ontology \ | ||
lexmatch \ | ||
--add-pipeline-step CaseNormalization \ | ||
--add-pipeline-step WhitespaceNormalization \ | ||
--add-pipeline-step WordOrderNormalization \ | ||
--output lexmatch-output/env_triad_pvs_vs_${ontology}.SSSOM.tsv \ | ||
i^nmdc_sub_schema @ .all \ | ||
2>>lexmatch-errors.txt | ||
fi | ||
done < for-lexmatch.txt | ||
|
||
echo "All lexmatch operations completed." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
agro | ||
biolink | ||
chmo | ||
cob | ||
comet | ||
ecocore | ||
ecosim | ||
ecso | ||
ecto | ||
edam | ||
envo | ||
envthes | ||
goldterms | ||
iao | ||
ido | ||
mop | ||
ms | ||
obi | ||
omo | ||
omit | ||
pato | ||
po | ||
schema-dot-org | ||
sio | ||
sweetAll |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# curie_map: | ||
# COB: http://purl.obolibrary.org/obo/COB_ | ||
# ENVO: http://purl.obolibrary.org/obo/ENVO_ | ||
# RO: http://purl.obolibrary.org/obo/RO_ | ||
# owl: http://www.w3.org/2002/07/owl# | ||
# rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# | ||
# rdfs: http://www.w3.org/2000/01/rdf-schema# | ||
# semapv: https://w3id.org/semapv/vocab/ | ||
# skos: http://www.w3.org/2004/02/skos/core# | ||
# sssom: https://w3id.org/sssom/ | ||
# license: https://w3id.org/sssom/license/unspecified | ||
# mapping_set_id: https://w3id.org/sssom/mappings/f0aa1cf4-ee40-4933-be0d-903f7d8ed0cc | ||
subject_id subject_label predicate_id object_id object_label mapping_justification mapping_tool confidence subject_match_field object_match_field match_string | ||
COB:0000502 characteristic skos:closeMatch RO:0000052 characteristic of semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label characteristic | ||
ENVO:00002042 surface water skos:closeMatch ENVO:01001191 water surface semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label surface water | ||
ENVO:00003096 tap water skos:closeMatch ENVO:03600052 water tap semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label tap water |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# curie_map: | ||
# ENVO: http://purl.obolibrary.org/obo/ENVO_ | ||
# comet: http://w3id.org/sssom/unknown_prefix/comet/ | ||
# owl: http://www.w3.org/2002/07/owl# | ||
# rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# | ||
# rdfs: http://www.w3.org/2000/01/rdf-schema# | ||
# semapv: https://w3id.org/semapv/vocab/ | ||
# skos: http://www.w3.org/2004/02/skos/core# | ||
# sssom: https://w3id.org/sssom/ | ||
# license: https://w3id.org/sssom/license/unspecified | ||
# mapping_set_id: https://w3id.org/sssom/mappings/7bfe79fb-0c2c-4195-b83b-9c1dc0db925c | ||
subject_id subject_label predicate_id object_id object_label mapping_justification mapping_tool confidence subject_match_field object_match_field match_string | ||
ENVO:00000141 bank skos:closeMatch comet:bank bank semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label bank | ||
ENVO:00002042 surface water skos:closeMatch ENVO:01001191 water surface semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label surface water | ||
ENVO:00003096 tap water skos:closeMatch ENVO:03600052 water tap semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label tap water | ||
comet:ExtractiveIndustryProductType#COAL COAL skos:closeMatch comet:FossilFuelType#COAL COAL semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label coal | ||
comet:ExtractiveIndustryProductType#GAS GAS skos:closeMatch comet:WellType#GAS GAS semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label gas | ||
comet:ExtractiveIndustryProductType#OIL OIL skos:closeMatch comet:WellType#OIL OIL semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label oil | ||
comet:HealthcareSite HealthcareSite skos:closeMatch comet:PostalAddress PostalAddress semapv:LexicalMatching oaklib 0.5 skos:exactMatch skos:exactMatch ex:omop/care_site | ||
comet:Location Location skos:closeMatch comet:location location semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label location |
Oops, something went wrong.