Skip to content

Commit

Permalink
PVs lexmatch
Browse files Browse the repository at this point in the history
  • Loading branch information
turbomam committed Feb 24, 2025
1 parent 8db883b commit 104fbcc
Show file tree
Hide file tree
Showing 167 changed files with 374,044 additions and 420,855 deletions.
42 changes: 0 additions & 42 deletions Updated_Lexmatch_Failure_Report.csv

This file was deleted.

49 changes: 45 additions & 4 deletions for-lexmatch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,61 @@ if [ ! -f for-lexmatch.txt ]; then
exit 1
fi

rm -rf lexmatch-errors.txt
#rm -rf lexmatch-output/*

# Loop through each line in the input file
while IFS= read -r ontology; do
if [ -n "$ontology" ]; then
echo "Running lexmatch for $ontology..."

# Run the first command
# echo "nmdc mixs vs $ontology"
# poetry run runoak \
# -i nmdc.db \
# -a sqlite:obo:$ontology \
# lexmatch \
# --add-pipeline-step CaseNormalization \
# --add-pipeline-step WhitespaceNormalization \
# --add-pipeline-step WordOrderNormalization \
# --output lexmatch-output/nmdc_mixs_vs_${ontology}.SSSOM.tsv \
# i^mixs @ .all \
# 2>>lexmatch-errors.txt
#
# echo "nmdc nmdc vs $ontology"
# poetry run runoak \
# -i nmdc.db \
# -a sqlite:obo:$ontology \
# lexmatch \
# --add-pipeline-step CaseNormalization \
# --add-pipeline-step WhitespaceNormalization \
# --add-pipeline-step WordOrderNormalization \
# --output lexmatch-output/nmdc_nmdc_vs_${ontology}.SSSOM.tsv \
# i^nmdc @ .all \
# 2>>lexmatch-errors.txt
#
# echo "submission schema vs $ontology"
# poetry run runoak \
# -i nmdc_submission_schema_no_from_schema_no_brackets.db \
# -a sqlite:obo:$ontology \
# lexmatch \
# --add-pipeline-step CaseNormalization \
# --add-pipeline-step WhitespaceNormalization \
# --add-pipeline-step WordOrderNormalization \
# --output lexmatch-output/submission_schema_vs_${ontology}.SSSOM.tsv \
# i^nmdc_sub_schema @ .all \
# 2>>lexmatch-errors.txt


echo "env triad PVs vs $ontology"
poetry run runoak \
-i nmdc.db \
-i env_triad_pvs_sheet.db \
-a sqlite:obo:$ontology \
lexmatch \
--add-pipeline-step CaseNormalization \
--add-pipeline-step WhitespaceNormalization \
--add-pipeline-step WordOrderNormalization \
--output lexmatch-output/nmdc_mixs_vs_${ontology}.SSSOM.tsv 2>>lexmatch-errors.txt
--output lexmatch-output/env_triad_pvs_vs_${ontology}.SSSOM.tsv \
i^nmdc_sub_schema @ .all \
2>>lexmatch-errors.txt
fi
done < for-lexmatch.txt

Expand Down
32 changes: 25 additions & 7 deletions for-lexmatch.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,25 @@
efo
cosmo
go
oba
ncit
chemessence
bero
agro
biolink
chmo
cob
comet
ecocore
ecosim
ecso
ecto
edam
envo
envthes
goldterms
iao
ido
mop
ms
obi
omo
omit
pato
po
schema-dot-org
sio
sweetAll
106 changes: 0 additions & 106 deletions for-lexmatch.txt.orig

This file was deleted.

735 changes: 735 additions & 0 deletions lexmatch-output/env_triad_pvs_vs_agro.SSSOM.tsv

Large diffs are not rendered by default.

161 changes: 161 additions & 0 deletions lexmatch-output/env_triad_pvs_vs_biolink.SSSOM.tsv

Large diffs are not rendered by default.

115 changes: 115 additions & 0 deletions lexmatch-output/env_triad_pvs_vs_chmo.SSSOM.tsv

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions lexmatch-output/env_triad_pvs_vs_cob.SSSOM.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# curie_map:
# COB: http://purl.obolibrary.org/obo/COB_
# ENVO: http://purl.obolibrary.org/obo/ENVO_
# RO: http://purl.obolibrary.org/obo/RO_
# owl: http://www.w3.org/2002/07/owl#
# rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
# rdfs: http://www.w3.org/2000/01/rdf-schema#
# semapv: https://w3id.org/semapv/vocab/
# skos: http://www.w3.org/2004/02/skos/core#
# sssom: https://w3id.org/sssom/
# license: https://w3id.org/sssom/license/unspecified
# mapping_set_id: https://w3id.org/sssom/mappings/f0aa1cf4-ee40-4933-be0d-903f7d8ed0cc
subject_id subject_label predicate_id object_id object_label mapping_justification mapping_tool confidence subject_match_field object_match_field match_string
COB:0000502 characteristic skos:closeMatch RO:0000052 characteristic of semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label characteristic
ENVO:00002042 surface water skos:closeMatch ENVO:01001191 water surface semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label surface water
ENVO:00003096 tap water skos:closeMatch ENVO:03600052 water tap semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label tap water
20 changes: 20 additions & 0 deletions lexmatch-output/env_triad_pvs_vs_comet.SSSOM.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# curie_map:
# ENVO: http://purl.obolibrary.org/obo/ENVO_
# comet: http://w3id.org/sssom/unknown_prefix/comet/
# owl: http://www.w3.org/2002/07/owl#
# rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
# rdfs: http://www.w3.org/2000/01/rdf-schema#
# semapv: https://w3id.org/semapv/vocab/
# skos: http://www.w3.org/2004/02/skos/core#
# sssom: https://w3id.org/sssom/
# license: https://w3id.org/sssom/license/unspecified
# mapping_set_id: https://w3id.org/sssom/mappings/7bfe79fb-0c2c-4195-b83b-9c1dc0db925c
subject_id subject_label predicate_id object_id object_label mapping_justification mapping_tool confidence subject_match_field object_match_field match_string
ENVO:00000141 bank skos:closeMatch comet:bank bank semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label bank
ENVO:00002042 surface water skos:closeMatch ENVO:01001191 water surface semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label surface water
ENVO:00003096 tap water skos:closeMatch ENVO:03600052 water tap semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label tap water
comet:ExtractiveIndustryProductType#COAL COAL skos:closeMatch comet:FossilFuelType#COAL COAL semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label coal
comet:ExtractiveIndustryProductType#GAS GAS skos:closeMatch comet:WellType#GAS GAS semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label gas
comet:ExtractiveIndustryProductType#OIL OIL skos:closeMatch comet:WellType#OIL OIL semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label oil
comet:HealthcareSite HealthcareSite skos:closeMatch comet:PostalAddress PostalAddress semapv:LexicalMatching oaklib 0.5 skos:exactMatch skos:exactMatch ex:omop/care_site
comet:Location Location skos:closeMatch comet:location location semapv:LexicalMatching oaklib 0.5 rdfs:label rdfs:label location
Loading

0 comments on commit 104fbcc

Please sign in to comment.