forked from TUDoAD/NLP-Based-Ontology-Extender
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
66 lines (50 loc) · 2.77 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""
Created on Fri Jan 13 13:21:08 2023
@author: A. S. Behr
@affiliation: TU Dortmund University
@comment: Functions and code described in paper "Ontology Extension by NLP-based
Concept Extraction for Domain Experts in Catalytic Sciences" by A. S. Behr,
M. Völkenrath, N. Kockmann
"""
import w2v_ontology_extender_modules as w2v_ext
# reads in pdf files stored in subdir ./import/
# and stores preprocessed data as pickles in subdir ./pickle/
w2v_ext.textmining("test")
##
#
Onto_filenames = ["bao_complete_merged", "Allotrope_OWL", "chebi", "chmo", "NCIT", "SBO"]
min_counts_list = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,50,100]
use_IUPAC_goldbook = True
pickle_name = "methanation_only_text"
goldbook_mute = False
# train w2v models based on min_count and store in ./models/
concept_dictionary, metrics = w2v_ext.concept_extractor(Onto_filenames,
use_IUPAC_goldbook,
min_counts_list,
pickle_name,
goldbook_mute)
## extend ontology AFO
Onto_filenames_ext = ["bao_complete_merged", "chebi", "chmo", "NCIT", "SBO"]
#use_IUPAC_goldbook = True
extend_ontology = "Allotrope_OWL"
#min_counts_list = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,50,100]
similarity_threshold_list = [0.8,0.9,0.95,0.99,0.995,0.996,0.997,0.998,0.999]
# to denote automatically annotated strings with [provenance string]
provenance_string = "AB"
mute_prints = True
metrics_onto_extension = w2v_ext.ontology_class_extender(Onto_filenames_ext,
use_IUPAC_goldbook,
extend_ontology,
min_counts_list,
pickle_name,
similarity_threshold_list,
provenance_string,
mute_prints)
# metrics_onto_extension["filenames"] contains the location + names of extended ontologies in subdir ./ontologies_output/
list_of_ontologies_to_annotate = metrics_onto_extension["filenames"]
metrics_onto_annotation = w2v_ext.ontology_class_annotator(list_of_ontologies_to_annotate,
Onto_filenames,
use_IUPAC_goldbook,
provenance_string,
mute_prints)
#