diff --git a/.travis.yml b/.travis.yml index c76e1baf..a81d91ce 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,12 +15,15 @@ before_install: install: # Install Python dependencies - pip3 install -r /home/travis/build/roedoejet/g2p/requirements.txt + # Install g2p + - cd /home/travis/build/roedoejet/g2p && pip3 install -e . + # Legal check: make sure we don't introduce GPL dependencies + - pip3 install pip-licenses + - if pip-licenses | grep -v 'Artistic License' | grep -v LGPL | grep GNU; then echo 'Please avoid introducing *GPL dependencies'; false; fi # Install testing requirements - pip3 install coverage - pip3 install coveralls - pip3 install gunicorn - # Install g2p - - cd /home/travis/build/roedoejet/g2p && pip3 install -e . before_script: - gunicorn --worker-class eventlet -w 1 g2p.app:APP --no-sendfile --bind 0.0.0.0:5000 --daemon diff --git a/g2p/api.py b/g2p/api.py index 1082b347..a9df6920 100644 --- a/g2p/api.py +++ b/g2p/api.py @@ -125,7 +125,7 @@ def update_docs(): LOGGER.info('Updated API documentation') -g2p_api = Blueprint('resources.g2p', __name__) +g2p_api = Blueprint('resources-g2p', __name__) CORS(g2p_api) diff --git a/g2p/cli.py b/g2p/cli.py index 7ca3b872..004f88c8 100644 --- a/g2p/cli.py +++ b/g2p/cli.py @@ -316,7 +316,7 @@ def scan(lang, path): pattern = "[^" + mapped_string + filter_chars + ".]" prog = re.compile(pattern) - with open(path, "r") as file: + with open(path, "r", encoding="utf8") as file: data = normalize(file.read(), "NFD") if not case_sensitive: data = data.lower() diff --git a/g2p/mappings/__init__.py b/g2p/mappings/__init__.py index 7001cca7..1e03c6cd 100644 --- a/g2p/mappings/__init__.py +++ b/g2p/mappings/__init__.py @@ -77,7 +77,7 @@ def __init__(self, mapping=None, abbreviations: Union[str, DefaultDict[str, List # yes, they should self.allowable_kwargs = ['language_name', 'display_name', 'mapping', 'in_lang', 'out_lang', 'out_delimiter', 'as_is', 'case_sensitive', 'rule_ordering', - 'escape_special', 'norm_form', 'prevent_feeding', 'reverse'] + 'escape_special', 'norm_form', 'prevent_feeding', 'reverse', 'type'] self.kwargs = OrderedDict(kwargs) self.processed = False if isinstance(abbreviations, defaultdict) or not abbreviations: @@ -101,6 +101,8 @@ def __init__(self, mapping=None, abbreviations: Union[str, DefaultDict[str, List elif 'id' in self.kwargs: loaded_config = self.find_mapping_by_id(self.kwargs['id']) self.process_loaded_config(loaded_config) + elif self.kwargs.get("type", "") == "unidecode": + self.mapping = [] else: raise exceptions.MalformedLookup() if self.abbreviations: @@ -182,10 +184,13 @@ def process_loaded_config(self, config): ''' For a mapping loaded from a file, take the keyword arguments and supply them to the Mapping, and get any abbreviations data. ''' - self.mapping = config['mapping_data'] + if config.get("type", "") == "unidecode": + self.mapping = [] + else: + self.mapping = config['mapping_data'] + self.abbreviations = config.get('abbreviations_data', None) mapping_kwargs = OrderedDict( {k: v for k, v in config.items() if k in self.allowable_kwargs}) - self.abbreviations = config.get('abbreviations_data', None) # Merge kwargs, but prioritize kwargs that initialized the Mapping self.kwargs = {**mapping_kwargs, **self.kwargs} diff --git a/g2p/mappings/create_fallback_mapping.py b/g2p/mappings/create_fallback_mapping.py index ebb240a5..7efa1fa3 100644 --- a/g2p/mappings/create_fallback_mapping.py +++ b/g2p/mappings/create_fallback_mapping.py @@ -1,6 +1,6 @@ import os -from unidecode import unidecode +from text_unidecode import unidecode from g2p import make_g2p from g2p.log import LOGGER diff --git a/g2p/mappings/langs/generated/config.yaml b/g2p/mappings/langs/generated/config.yaml index e5965fd5..5e29cf66 100644 --- a/g2p/mappings/langs/generated/config.yaml +++ b/g2p/mappings/langs/generated/config.yaml @@ -132,15 +132,16 @@ mappings: - authors: - Generated 2020-09-18 10:40:15.289530 - Updated by hand 2021-02-10 Eric Joanis to handle ejectives + - Update by hand 2021-05-21 Shankhalika Srikanth reorder rules with aspiration case_sensitive: false display_name: Tlingit IPA to English IPA escape_special: false in_lang: tli-ipa language_name: Tlingit IPA - mapping: tli-norm-ipa_to_eng-ipa.json - norm_form: NFC + mapping: tli-ipa_to_eng-ipa.json + norm_form: NFD out_lang: eng-ipa - prevent_feeding: false + prevent_feeding: true reverse: false rule_ordering: as-written - authors: @@ -235,7 +236,7 @@ mappings: reverse: false rule_ordering: as-written - authors: - - Generated 2021-03-24 21:42:30.002668 + - Generated 2021-08-01 19:37:38.316339 case_sensitive: false display_name: tau-ipa IPA to eng-ipa IPA escape_special: false @@ -244,7 +245,7 @@ mappings: mapping: tau-ipa_to_eng-ipa.json norm_form: NFD out_lang: eng-ipa - prevent_feeding: true + prevent_feeding: false reverse: false rule_ordering: as-written - authors: diff --git a/g2p/mappings/langs/generated/haa-ipa_to_eng-ipa.json b/g2p/mappings/langs/generated/haa-ipa_to_eng-ipa.json index 7d25a9d2..ac0afc28 100644 --- a/g2p/mappings/langs/generated/haa-ipa_to_eng-ipa.json +++ b/g2p/mappings/langs/generated/haa-ipa_to_eng-ipa.json @@ -1,4 +1,40 @@ [ + { + "in": "\u0303\u0300", + "out": "\u0303", + "context_before": "", + "context_after": "" + }, + { + "in": "\u0303\u030C", + "out": "\u0303", + "context_before": "", + "context_after": "" + }, + { + "in": "\u0303\u0302", + "out": "\u0303", + "context_before": "", + "context_after": "" + }, + { + "in": "\u0300", + "out": "", + "context_before": "", + "context_after": "" + }, + { + "in": "\u030C", + "out": "", + "context_before": "", + "context_after": "" + }, + { + "in": "\u0302", + "out": "", + "context_before": "", + "context_after": "" + }, { "in": "\u207fk", "out": "nk", @@ -347,12 +383,6 @@ "context_before": "", "context_after": "" }, - { - "in": "\u0303", - "out": "", - "context_before": "", - "context_after": "" - }, { "in": "\u0259", "out": "\u0259", diff --git a/g2p/mappings/langs/generated/tau-ipa_to_eng-ipa.json b/g2p/mappings/langs/generated/tau-ipa_to_eng-ipa.json index 3f63fe6c..594947ec 100644 --- a/g2p/mappings/langs/generated/tau-ipa_to_eng-ipa.json +++ b/g2p/mappings/langs/generated/tau-ipa_to_eng-ipa.json @@ -1,73 +1,73 @@ [ { - "in": "t\u03b8\u02bc", - "out": "t\u03b8", + "in": "tθʼ", + "out": "tθ", "context_before": "", "context_after": "" }, { - "in": "t\u03b8", - "out": "t\u03b8", + "in": "tθ", + "out": "tθ", "context_before": "", "context_after": "" }, { - "in": "t\u03b8", - "out": "t\u03b8", + "in": "tθ", + "out": "tθ", "context_before": "", "context_after": "" }, { - "in": "t\u03b8", - "out": "t\u03b8", + "in": "tθ", + "out": "tθ", "context_before": "", "context_after": "" }, { - "in": "\u0283\u02b2", - "out": "\u0283j", + "in": "ʃʲ", + "out": "ʃj", "context_before": "", "context_after": "" }, { - "in": "ts\u02bc", + "in": "tsʼ", "out": "ts", "context_before": "", "context_after": "" }, { - "in": "t\u0283\u02bc", - "out": "t\u0283", + "in": "tʃʼ", + "out": "tʃ", "context_before": "", "context_after": "" }, { - "in": "t\u026c\u02bc", + "in": "tɬʼ", "out": "ts", "context_before": "", "context_after": "" }, { - "in": "\u207ft", + "in": "ⁿt", "out": "nt", "context_before": "", "context_after": "" }, { - "in": "n\u0325", + "in": "n̥", "out": "n", "context_before": "", "context_after": "" }, { - "in": "\u00f0", - "out": "\u00f0", + "in": "ð", + "out": "ð", "context_before": "", "context_after": "" }, { - "in": "\u03b8", - "out": "\u03b8", + "in": "θ", + "out": "θ", "context_before": "", "context_after": "" }, @@ -84,61 +84,61 @@ "context_after": "" }, { - "in": "t\u026c", + "in": "tɬ", "out": "ts", "context_before": "", "context_after": "" }, { - "in": "t\u02bc", + "in": "tʼ", "out": "t", "context_before": "", "context_after": "" }, { - "in": "k\u02bc", + "in": "kʼ", "out": "k", "context_before": "", "context_after": "" }, { - "in": "\u1d50b", + "in": "ᵐb", "out": "mb", "context_before": "", "context_after": "" }, { - "in": "t\u0283", - "out": "t\u0361\u0283", + "in": "tʃ", + "out": "t͡ʃ", "context_before": "", "context_after": "" }, { - "in": "t\u026c", + "in": "tɬ", "out": "ts", "context_before": "", "context_after": "" }, { - "in": "j\u030a", + "in": "j̊", "out": "j", "context_before": "", "context_after": "" }, { - "in": "\u0283", - "out": "\u0283", + "in": "ʃ", + "out": "ʃ", "context_before": "", "context_after": "" }, { - "in": "\u014b", - "out": "\u014b", + "in": "ŋ", + "out": "ŋ", "context_before": "", "context_after": "" }, { - "in": "\u026c", + "in": "ɬ", "out": "s", "context_before": "", "context_after": "" @@ -234,80 +234,680 @@ "context_after": "" }, { - "in": "t\u0283", - "out": "t\u0361\u0283", + "in": "tʃ", + "out": "t͡ʃ", "context_before": "", "context_after": "" }, { - "in": "\u0294", - "out": "\u0294", + "in": "ʔ", + "out": "ʔ", "context_before": "", "context_after": "" }, { - "in": "\u0258\u02d0", - "out": "\u0259", + "in": "ɘ̃̀", + "out": "ʌ̃", "context_before": "", "context_after": "" }, { - "in": "\u028c", - "out": "\u028c", + "in": "ɘ̃̌", + "out": "ʌ̃", "context_before": "", "context_after": "" }, { - "in": "\u0258", - "out": "\u0259", + "in": "ɘ̃̂", + "out": "ʌ̃", "context_before": "", "context_after": "" }, { - "in": "i\u02d0", - "out": "e\u02d0", + "in": "ɘ̃ː", + "out": "ẽː", "context_before": "", "context_after": "" }, { - "in": "e\u02d0", - "out": "e\u02d0", + "in": "ʌ̃̀", + "out": "ʌ̃", "context_before": "", "context_after": "" }, { - "in": "a\u02d0", - "out": "e\u02d0", + "in": "ʌ̃̌", + "out": "ʌ̃", "context_before": "", "context_after": "" }, { - "in": "u\u02d0", + "in": "ʌ̃̂", + "out": "ʌ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ʌ̃", + "out": "ʌ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ɘ̃̀", + "out": "ʌ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ɘ̃̌", + "out": "ʌ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ɘ̃̂", + "out": "ʌ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ɘ̃", + "out": "ʌ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ĩ̀ː", + "out": "ẽː", + "context_before": "", + "context_after": "" + }, + { + "in": "ĩ̌ː", + "out": "ẽː", + "context_before": "", + "context_after": "" + }, + { + "in": "ĩ̂ː", + "out": "ẽː", + "context_before": "", + "context_after": "" + }, + { + "in": "ĩː", + "out": "ẽː", + "context_before": "", + "context_after": "" + }, + { + "in": "ẽ̀ː", + "out": "ẽː", + "context_before": "", + "context_after": "" + }, + { + "in": "ẽ̌ː", + "out": "ẽː", + "context_before": "", + "context_after": "" + }, + { + "in": "ẽ̂ː", + "out": "ẽː", + "context_before": "", + "context_after": "" + }, + { + "in": "ẽː", + "out": "ẽː", + "context_before": "", + "context_after": "" + }, + { + "in": "ã̀ː", + "out": "ẽː", + "context_before": "", + "context_after": "" + }, + { + "in": "ã̌ː", + "out": "ẽː", + "context_before": "", + "context_after": "" + }, + { + "in": "ã̂ː", + "out": "ẽː", + "context_before": "", + "context_after": "" + }, + { + "in": "ãː", + "out": "ẽː", + "context_before": "", + "context_after": "" + }, + { + "in": "ũ̀ː", + "out": "ũ", + "context_before": "", + "context_after": "" + }, + { + "in": "ũ̌ː", + "out": "ũ", + "context_before": "", + "context_after": "" + }, + { + "in": "ũ̂ː", + "out": "ũ", + "context_before": "", + "context_after": "" + }, + { + "in": "ũː", + "out": "ũ", + "context_before": "", + "context_after": "" + }, + { + "in": "õ̀ː", + "out": "õː", + "context_before": "", + "context_after": "" + }, + { + "in": "õ̌ː", + "out": "õː", + "context_before": "", + "context_after": "" + }, + { + "in": "õ̂ː", + "out": "õː", + "context_before": "", + "context_after": "" + }, + { + "in": "õː", + "out": "õː", + "context_before": "", + "context_after": "" + }, + { + "in": "ĩ̀a", + "out": "ĩæ", + "context_before": "", + "context_after": "" + }, + { + "in": "ĩ̌a", + "out": "ĩæ", + "context_before": "", + "context_after": "" + }, + { + "in": "ĩ̂a", + "out": "ĩæ", + "context_before": "", + "context_after": "" + }, + { + "in": "ĩã", + "out": "ĩæ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ĩ̀o", + "out": "ĩɔ", + "context_before": "", + "context_after": "" + }, + { + "in": "ĩ̌o", + "out": "ĩɔ", + "context_before": "", + "context_after": "" + }, + { + "in": "ĩ̂o", + "out": "ĩɔ", + "context_before": "", + "context_after": "" + }, + { + "in": "ĩõ", + "out": "ĩõ", + "context_before": "", + "context_after": "" + }, + { + "in": "ẽ̀a", + "out": "ẽæ", + "context_before": "", + "context_after": "" + }, + { + "in": "ẽ̌a", + "out": "ẽæ", + "context_before": "", + "context_after": "" + }, + { + "in": "ẽ̂a", + "out": "ẽæ", + "context_before": "", + "context_after": "" + }, + { + "in": "ẽã", + "out": "ẽæ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ĩ̀", + "out": "ĩ", + "context_before": "", + "context_after": "" + }, + { + "in": "ĩ̌", + "out": "ĩ", + "context_before": "", + "context_after": "" + }, + { + "in": "ĩ̂", + "out": "ĩ", + "context_before": "", + "context_after": "" + }, + { + "in": "ĩ", + "out": "ĩ", + "context_before": "", + "context_after": "" + }, + { + "in": "ɛ̃̀", + "out": "ɛ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ɛ̃̌", + "out": "ɛ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ɛ̃̂", + "out": "ɛ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ɛ̃", + "out": "ɛ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ã̀", + "out": "æ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ã̌", + "out": "æ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ã̂", + "out": "æ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ã", + "out": "æ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ũ̀", + "out": "ũ", + "context_before": "", + "context_after": "" + }, + { + "in": "ũ̌", + "out": "ũ", + "context_before": "", + "context_after": "" + }, + { + "in": "ũ̂", + "out": "ũ", + "context_before": "", + "context_after": "" + }, + { + "in": "ũ", + "out": "ũ", + "context_before": "", + "context_after": "" + }, + { + "in": "ɔ̃̀", + "out": "ɔ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ɔ̃̌", + "out": "ɔ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ɔ̃̂", + "out": "ɔ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ɔ̃", + "out": "ɔ̃", + "context_before": "", + "context_after": "" + }, + { + "in": "ɘ̀ː", + "out": "ə", + "context_before": "", + "context_after": "" + }, + { + "in": "ɘ̌ː", + "out": "ə", + "context_before": "", + "context_after": "" + }, + { + "in": "ɘ̂ː", + "out": "ə", + "context_before": "", + "context_after": "" + }, + { + "in": "ɘː", + "out": "ə", + "context_before": "", + "context_after": "" + }, + { + "in": "ʌ̀", + "out": "ʌ", + "context_before": "", + "context_after": "" + }, + { + "in": "ʌ̌", + "out": "ʌ", + "context_before": "", + "context_after": "" + }, + { + "in": "ʌ̂", + "out": "ʌ", + "context_before": "", + "context_after": "" + }, + { + "in": "ʌ", + "out": "ʌ", + "context_before": "", + "context_after": "" + }, + { + "in": "ɘ̀", + "out": "ə", + "context_before": "", + "context_after": "" + }, + { + "in": "ɘ̌", + "out": "ə", + "context_before": "", + "context_after": "" + }, + { + "in": "ɘ̂", + "out": "ə", + "context_before": "", + "context_after": "" + }, + { + "in": "ɘ", + "out": "ə", + "context_before": "", + "context_after": "" + }, + { + "in": "ìː", + "out": "eː", + "context_before": "", + "context_after": "" + }, + { + "in": "ǐː", + "out": "eː", + "context_before": "", + "context_after": "" + }, + { + "in": "îː", + "out": "eː", + "context_before": "", + "context_after": "" + }, + { + "in": "iː", + "out": "eː", + "context_before": "", + "context_after": "" + }, + { + "in": "èː", + "out": "eː", + "context_before": "", + "context_after": "" + }, + { + "in": "ěː", + "out": "eː", + "context_before": "", + "context_after": "" + }, + { + "in": "êː", + "out": "eː", + "context_before": "", + "context_after": "" + }, + { + "in": "eː", + "out": "eː", + "context_before": "", + "context_after": "" + }, + { + "in": "àː", + "out": "eː", + "context_before": "", + "context_after": "" + }, + { + "in": "ǎː", + "out": "eː", + "context_before": "", + "context_after": "" + }, + { + "in": "âː", + "out": "eː", + "context_before": "", + "context_after": "" + }, + { + "in": "aː", + "out": "eː", + "context_before": "", + "context_after": "" + }, + { + "in": "ùː", + "out": "u", + "context_before": "", + "context_after": "" + }, + { + "in": "ǔː", + "out": "u", + "context_before": "", + "context_after": "" + }, + { + "in": "ûː", + "out": "u", + "context_before": "", + "context_after": "" + }, + { + "in": "uː", "out": "u", "context_before": "", "context_after": "" }, { - "in": "o\u02d0", - "out": "o\u02d0", + "in": "òː", + "out": "oː", + "context_before": "", + "context_after": "" + }, + { + "in": "ǒː", + "out": "oː", + "context_before": "", + "context_after": "" + }, + { + "in": "ôː", + "out": "oː", + "context_before": "", + "context_after": "" + }, + { + "in": "oː", + "out": "oː", + "context_before": "", + "context_after": "" + }, + { + "in": "ìa", + "out": "iæ", + "context_before": "", + "context_after": "" + }, + { + "in": "ǐa", + "out": "iæ", + "context_before": "", + "context_after": "" + }, + { + "in": "îa", + "out": "iæ", "context_before": "", "context_after": "" }, { "in": "ia", - "out": "i\u00e6", + "out": "iæ", + "context_before": "", + "context_after": "" + }, + { + "in": "ìo", + "out": "iɔ", + "context_before": "", + "context_after": "" + }, + { + "in": "ǐo", + "out": "iɔ", + "context_before": "", + "context_after": "" + }, + { + "in": "îo", + "out": "iɔ", "context_before": "", "context_after": "" }, { "in": "io", - "out": "i\u0254", + "out": "iɔ", + "context_before": "", + "context_after": "" + }, + { + "in": "èa", + "out": "ɛæ", + "context_before": "", + "context_after": "" + }, + { + "in": "ěa", + "out": "ɛæ", + "context_before": "", + "context_after": "" + }, + { + "in": "êa", + "out": "ɛæ", "context_before": "", "context_after": "" }, { "in": "ea", - "out": "\u025b\u00e6", + "out": "ɛæ", + "context_before": "", + "context_after": "" + }, + { + "in": "ì", + "out": "i", + "context_before": "", + "context_after": "" + }, + { + "in": "ǐ", + "out": "i", + "context_before": "", + "context_after": "" + }, + { + "in": "î", + "out": "i", "context_before": "", "context_after": "" }, @@ -318,14 +918,68 @@ "context_after": "" }, { - "in": "\u025b", - "out": "\u025b", + "in": "ɛ̀", + "out": "ɛ", + "context_before": "", + "context_after": "" + }, + { + "in": "ɛ̌", + "out": "ɛ", + "context_before": "", + "context_after": "" + }, + { + "in": "ɛ̂", + "out": "ɛ", + "context_before": "", + "context_after": "" + }, + { + "in": "ɛ", + "out": "ɛ", + "context_before": "", + "context_after": "" + }, + { + "in": "à", + "out": "æ", + "context_before": "", + "context_after": "" + }, + { + "in": "ǎ", + "out": "æ", + "context_before": "", + "context_after": "" + }, + { + "in": "â", + "out": "æ", "context_before": "", "context_after": "" }, { "in": "a", - "out": "\u00e6", + "out": "æ", + "context_before": "", + "context_after": "" + }, + { + "in": "ù", + "out": "u", + "context_before": "", + "context_after": "" + }, + { + "in": "ǔ", + "out": "u", + "context_before": "", + "context_after": "" + }, + { + "in": "û", + "out": "u", "context_before": "", "context_after": "" }, @@ -336,8 +990,26 @@ "context_after": "" }, { - "in": "\u0254", - "out": "\u0254", + "in": "ɔ̀", + "out": "ɔ", + "context_before": "", + "context_after": "" + }, + { + "in": "ɔ̌", + "out": "ɔ", + "context_before": "", + "context_after": "" + }, + { + "in": "ɔ̂", + "out": "ɔ", + "context_before": "", + "context_after": "" + }, + { + "in": "ɔ", + "out": "ɔ", "context_before": "", "context_after": "" } diff --git a/g2p/mappings/langs/generated/tli-norm-ipa_to_eng-ipa.json b/g2p/mappings/langs/generated/tli-ipa_to_eng-ipa.json similarity index 75% rename from g2p/mappings/langs/generated/tli-norm-ipa_to_eng-ipa.json rename to g2p/mappings/langs/generated/tli-ipa_to_eng-ipa.json index 2ca4c272..93e6f2d7 100644 --- a/g2p/mappings/langs/generated/tli-norm-ipa_to_eng-ipa.json +++ b/g2p/mappings/langs/generated/tli-ipa_to_eng-ipa.json @@ -12,74 +12,74 @@ "context_after": "" }, { - "in": "t", - "out": "t", + "in": "ʃʼ", + "out": "ʃ", "context_before": "", "context_after": "" }, { - "in": "tʰ", - "out": "t", + "in": "tʃʰ", + "out": "tʃ", "context_before": "", "context_after": "" }, { - "in": "ɬʰ", - "out": "s", + "in": "tʃ", + "out": "tʃ", "context_before": "", "context_after": "" }, { - "in": "ɬ", - "out": "s", + "in": "tʰ", + "out": "t", "context_before": "", "context_after": "" }, { - "in": "ʃ", - "out": "ʃ", + "in": "tʼ", + "out": "t", "context_before": "", "context_after": "" }, { - "in": "s", - "out": "s", + "in": "t", + "out": "t", "context_before": "", "context_after": "" }, { - "in": "sʰ", + "in": "ɬʰ", "out": "s", "context_before": "", "context_after": "" }, { - "in": "tʃ", - "out": "t͡ʃ", + "in": "ɬ", + "out": "s", "context_before": "", "context_after": "" }, { - "in": "tʃʰ", - "out": "tʃ", + "in": "ʃ", + "out": "ʃ", "context_before": "", "context_after": "" }, { - "in": "tʃ", - "out": "t͡ʃ", + "in": "sʰ", + "out": "s", "context_before": "", "context_after": "" }, { - "in": "tʃʰ", - "out": "tʃ", + "in": "sʼ", + "out": "s", "context_before": "", "context_after": "" }, { - "in": "tʃ", - "out": "t͡ʃ", + "in": "s", + "out": "s", "context_before": "", "context_after": "" }, @@ -96,116 +96,68 @@ "context_after": "" }, { - "in": "x", + "in": "kʼ", "out": "k", "context_before": "", "context_after": "" }, { - "in": "k", + "in": "xʼ", "out": "k", "context_before": "", "context_after": "" }, { - "in": "j", - "out": "j", - "context_before": "", - "context_after": "" - }, - { - "in": "ɰ", - "out": "j", - "context_before": "", - "context_after": "" - }, - { - "in": "e", - "out": "ɛ", - "context_before": "", - "context_after": "" - }, - { - "in": "a", - "out": "æ", - "context_before": "", - "context_after": "" - }, - { - "in": "o", - "out": "ɔ", + "in": "x", + "out": "k", "context_before": "", "context_after": "" }, { - "in": "u", - "out": "u", + "in": "k", + "out": "k", "context_before": "", "context_after": "" }, { - "in": "i", - "out": "i", + "in": "j", + "out": "j", "context_before": "", "context_after": "" }, { - "in": "e", - "out": "ɛ", + "in": "ɰ", + "out": "j", "context_before": "", "context_after": "" }, { - "in": "a", - "out": "æ", + "in": "iː", + "out": "eː", "context_before": "", "context_after": "" }, { - "in": "i", - "out": "i", + "in": "eː", + "out": "eː", "context_before": "", "context_after": "" }, { - "in": "o", - "out": "ɔ", + "in": "aː", + "out": "eː", "context_before": "", "context_after": "" }, { - "in": "u", + "in": "uː", "out": "u", "context_before": "", "context_after": "" }, { - "in": "e", - "out": "ɛ", - "context_before": "", - "context_after": "" - }, - { - "in": "a", - "out": "æ", - "context_before": "", - "context_after": "" - }, - { - "in": "i", - "out": "i", - "context_before": "", - "context_after": "" - }, - { - "in": "o", - "out": "ɔ", - "context_before": "", - "context_after": "" - }, - { - "in": "u", - "out": "u", + "in": "oː", + "out": "oː", "context_before": "", "context_after": "" }, @@ -240,38 +192,32 @@ "context_after": "" }, { - "in": "iː", - "out": "eː", + "in": "e", + "out": "ɛ", "context_before": "", "context_after": "" }, { - "in": "eː", - "out": "eː", + "in": "a", + "out": "æ", "context_before": "", "context_after": "" }, { - "in": "aː", - "out": "eː", + "in": "o", + "out": "ɔ", "context_before": "", "context_after": "" }, { - "in": "uː", + "in": "u", "out": "u", "context_before": "", "context_after": "" }, { - "in": "oː", - "out": "oː", - "context_before": "", - "context_after": "" - }, - { - "in": "o", - "out": "ɔ", + "in": "i", + "out": "i", "context_before": "", "context_after": "" }, @@ -288,19 +234,19 @@ "context_after": "" }, { - "in": "q", + "in": "qʰ", "out": "k", "context_before": "", "context_after": "" }, { - "in": "q", - "out": "k", + "in": "qʼ", + "out": "q", "context_before": "", "context_after": "" }, { - "in": "qʰ", + "in": "q", "out": "k", "context_before": "", "context_after": "" @@ -311,52 +257,16 @@ "context_before": "", "context_after": "" }, - { - "in": "kʼ", - "out": "k", - "context_before": "", - "context_after": "" - }, { "in": "wʼ", "out": "w", "context_before": "", "context_after": "" }, - { - "in": "qʼ", - "out": "q", - "context_before": "", - "context_after": "" - }, - { - "in": "sʼ", - "out": "s", - "context_before": "", - "context_after": "" - }, - { - "in": "tʼ", - "out": "t", - "context_before": "", - "context_after": "" - }, - { - "in": "xʼ", - "out": "x", - "context_before": "", - "context_after": "" - }, { "in": "hʼ", "out": "h", "context_before": "", "context_after": "" - }, - { - "in": "ʃʼ", - "out": "ʃ", - "context_before": "", - "context_after": "" } ] diff --git a/g2p/mappings/langs/haa/config.yaml b/g2p/mappings/langs/haa/config.yaml index 45622179..a8b7e1d8 100644 --- a/g2p/mappings/langs/haa/config.yaml +++ b/g2p/mappings/langs/haa/config.yaml @@ -13,20 +13,8 @@ mappings: case_sensitive: false norm_form: NFD <<: *shared - - display_name: Hän to Simplified - in_lang: haa-equiv - out_lang: haa-simp - authors: - - Shankhalika Srikanth - type: mapping - mapping: haa_simplify.csv - prevent_feeding: true - rule_ordering: as-written - case_sensitive: false - norm_form: NFD - <<: *shared - display_name: Hän to IPA - in_lang: haa-simp + in_lang: haa-equiv out_lang: haa-ipa authors: - Shankhalika Srikanth diff --git a/g2p/mappings/langs/haa/haa_simplify.csv b/g2p/mappings/langs/haa/haa_simplify.csv deleted file mode 100644 index 7a944c6e..00000000 --- a/g2p/mappings/langs/haa/haa_simplify.csv +++ /dev/null @@ -1,6 +0,0 @@ -\u0328\u0300,\u0328 -\u0328\u030C,\u0328 -\u0328\u0302,\u0328 -\u0300, -\u030C, -\u0302, \ No newline at end of file diff --git a/g2p/mappings/langs/haa/haa_to_ipa.csv b/g2p/mappings/langs/haa/haa_to_ipa.csv index 3d4f7468..93f25946 100644 --- a/g2p/mappings/langs/haa/haa_to_ipa.csv +++ b/g2p/mappings/langs/haa/haa_to_ipa.csv @@ -46,27 +46,74 @@ g,ɡʁ,,[VOWELS] g,k ',ʔ l,ɬɮ,[VOWELS],[VOWELS] +ą̈̀w,ã̀o +ą̈̌w,ã̌o +ą̈̂w,ã̂o a\u0328\u0308w,ão +ą̀w,æ̃̀o +ą̌w,æ̃̌o +ą̂w,æ̃̂o a\u0328w,æ̃o +ą̀y,æ̃̀i +ą̌y,æ̃̌i +ą̂y,æ̃̂i a\u0328y,æ̃i ä\u0328,ɑ̃ a\u0328,æ̃ +ę̀w,ẽ̀o +ę̌w,ẽ̌o +ę̂w,ẽ̂o e\u0328w,ẽo +ę̀y,ẽ̀i +ę̌y,ẽ̌i +ę̂y,ẽ̂i e\u0328y,ẽi ë\u0328,ə̃ +ǫ̀y,õ̀i +ǫ̌y,õ̌i +ǫ̂y,õ̂i o\u0328y,õi +į̀w,ĩ̀u +į̌w,ĩ̌u +į̂w,ĩ̂u i\u0328w,ĩu -\u0328,\u0303 +ë\u0328,ə\u0303 ë,ə +ä̀w,ào +ä̂w,âo +ä̌w,ǎo äw,ao +àw,æ̀o +ǎw,æ̌o +âw,æ̂o aw,æo +ày,æ̀i +ǎy,æ̌i +ây,æ̂i ay,æi +ä\u0328,ɑ\u0303 ä,ɑ +a\u0328,æ\u0303 a,æ +èw,èo +ěw,ěo +êw,êo ew,eo +èy,èi +ěy,ěi +êy,êi ey,ei +òy,òi +ǒy,ǒi +ôy,ôi oy,oi +ìw,ìu +ǐw,ǐu +îw,îu iw,iu y,j +o\u0328,o\u0303 o,o +e\u0328,e\u0303 e,e +\u0328,\u0303 \ No newline at end of file diff --git a/g2p/mappings/langs/ikt/ikt_to_ipa.json b/g2p/mappings/langs/ikt/ikt_to_ipa.json index 0a64adc8..bd3a5b7a 100644 --- a/g2p/mappings/langs/ikt/ikt_to_ipa.json +++ b/g2p/mappings/langs/ikt/ikt_to_ipa.json @@ -39,6 +39,11 @@ "in": "h", "out": "h" }, + { + "in": "ᕼ", + "out": "h", + "comment": "sometimes syllabics 'ᕼ' is used instead of ASCII 'H', e.g., 'ᕼii' was found in real data" + }, { "in": "p", "out": "p" diff --git a/g2p/mappings/langs/langs.pkl b/g2p/mappings/langs/langs.pkl index db372cba..dda56926 100644 Binary files a/g2p/mappings/langs/langs.pkl and b/g2p/mappings/langs/langs.pkl differ diff --git a/g2p/mappings/langs/moh/moh_to_ipa.json b/g2p/mappings/langs/moh/moh_to_ipa.json index efd1f706..47db1b74 100644 --- a/g2p/mappings/langs/moh/moh_to_ipa.json +++ b/g2p/mappings/langs/moh/moh_to_ipa.json @@ -170,6 +170,10 @@ "in": "r", "out": "r" }, + { + "in": "h", + "out": "h" + }, { "in": "t", "out": "t" diff --git a/g2p/mappings/langs/network.pkl b/g2p/mappings/langs/network.pkl index 2a0d31d4..ae9905a7 100644 Binary files a/g2p/mappings/langs/network.pkl and b/g2p/mappings/langs/network.pkl differ diff --git a/g2p/mappings/langs/tau/config.yml b/g2p/mappings/langs/tau/config.yml index 1e69eca5..1e8dea83 100644 --- a/g2p/mappings/langs/tau/config.yml +++ b/g2p/mappings/langs/tau/config.yml @@ -25,4 +25,3 @@ mappings: case_sensitive: false norm_form: NFD <<: *shared - \ No newline at end of file diff --git a/g2p/mappings/langs/tau/tau_equiv.json b/g2p/mappings/langs/tau/tau_equiv.json index 8879036d..dc20646a 100644 --- a/g2p/mappings/langs/tau/tau_equiv.json +++ b/g2p/mappings/langs/tau/tau_equiv.json @@ -10,21 +10,5 @@ { "in": "\u0332", "out": "" - }, - { - "in": "\u030c", - "out": "" - }, - { - "in": "\u0328", - "out": "" - }, - { - "in": "\u0300", - "out": "" - }, - { - "in": "\u0302", - "out": "" } ] diff --git a/g2p/mappings/langs/tau/tau_to_ipa.json b/g2p/mappings/langs/tau/tau_to_ipa.json index 2b787abe..b6ca6ee2 100644 --- a/g2p/mappings/langs/tau/tau_to_ipa.json +++ b/g2p/mappings/langs/tau/tau_to_ipa.json @@ -164,69 +164,515 @@ "out": "ʔ" }, { - "in": "u\u0308u\u0308", - "out": "ɘ\u02d0" + "in": "ų̈̀ų̈", + "out": "ɘ̃̀" }, { - "in": "a\u0308", - "out": "\u028c" + "in": "ų̈̌ų̈", + "out": "ɘ̃̌" }, { - "in": "u\u0308", + "in": "ų̈̂ų̈", + "out": "ɘ̃̂" + }, + { + "in": "ü\u0328ü\u0328", + "out": "ɘ\u0303ː" + }, + { + "in": "ą̈̀", + "out": "ʌ̃̀" + }, + { + "in": "ą̈̌", + "out": "ʌ̃̌" + }, + { + "in": "ą̈̂", + "out": "ʌ̃̂" + }, + { + "in": "ä\u0328", + "out": "ʌ\u0303" + }, + { + "in": "ų̈̀", + "out": "ɘ̃̀" + }, + { + "in": "ų̈̌", + "out": "ɘ̃̌" + }, + { + "in": "ų̈̂", + "out": "ɘ̃̂" + }, + { + "in": "ü\u0328", + "out": "ɘ\u0303" + }, + { + "in": "į̀į", + "out": "ĩ̀ː" + }, + { + "in": "į̌į", + "out": "ĩ̌ː" + }, + { + "in": "į̂į", + "out": "ĩ̂ː" + }, + { + "in": "i\u0328i\u0328", + "out": "i\u0303ː" + }, + { + "in": "ę̀ę", + "out": "ẽ̀ː" + }, + { + "in": "ę̌ę", + "out": "ẽ̌ː" + }, + { + "in": "ę̂ę", + "out": "ẽ̂ː" + }, + { + "in": "e\u0328e\u0328", + "out": "e\u0303ː" + }, + { + "in": "ą̀ą", + "out": "ã̀ː" + }, + { + "in": "ą̌ą", + "out": "ã̌ː" + }, + { + "in": "ą̂ą", + "out": "ã̂ː" + }, + { + "in": "a\u0328a\u0328", + "out": "a\u0303ː" + }, + { + "in": "ų̀ų", + "out": "ũ̀ː" + }, + { + "in": "ų̌ų", + "out": "ũ̌ː" + }, + { + "in": "ų̂ų", + "out": "ũ̂ː" + }, + { + "in": "u\u0328u\u0328", + "out": "u\u0303ː" + }, + { + "in": "ǫ̀ǫ", + "out": "õ̀ː" + }, + { + "in": "ǫ̌ǫ", + "out": "õ̌ː" + }, + { + "in": "ǫ̂ǫ", + "out": "õ̂ː" + }, + { + "in": "o\u0328o\u0328", + "out": "o\u0303ː" + }, + { + "in": "į̀ą", + "out": "ĩ̀a" + }, + { + "in": "į̌ą", + "out": "ĩ̌a" + }, + { + "in": "į̂ą", + "out": "ĩ̂a" + }, + { + "in": "i\u0328a\u0328", + "out": "i\u0303a\u0303" + }, + { + "in": "į̀ǫ", + "out": "ĩ̀o" + }, + { + "in": "į̌ǫ", + "out": "ĩ̌o" + }, + { + "in": "į̂ǫ", + "out": "ĩ̂o" + }, + { + "in": "i\u0328o\u0328", + "out": "i\u0303o\u0303" + }, + { + "in": "ę̀ą", + "out": "ẽ̀a" + }, + { + "in": "ę̌ą", + "out": "ẽ̌a" + }, + { + "in": "ę̂ą", + "out": "ẽ̂a" + }, + { + "in": "e\u0328a\u0328", + "out": "e\u0303a\u0303" + }, + { + "in": "į̀", + "out": "ĩ̀" + }, + { + "in": "į̌", + "out": "ĩ̌" + }, + { + "in": "į̂", + "out": "ĩ̂" + }, + { + "in": "i\u0328", + "out": "i\u0303" + }, + { + "in": "ę̀", + "out": "ɛ̃̀" + }, + { + "in": "ę̌", + "out": "ɛ̃̌" + }, + { + "in": "ę̂", + "out": "ɛ̃̂" + }, + { + "in": "e\u0328", + "out": "ɛ\u0303" + }, + { + "in": "ą̀", + "out": "ã̀" + }, + { + "in": "ą̌", + "out": "ã̌" + }, + { + "in": "ą̂", + "out": "ã̂" + }, + { + "in": "a\u0328", + "out": "a\u0303" + }, + { + "in": "ų̀", + "out": "ũ̀" + }, + { + "in": "ų̌", + "out": "ũ̌" + }, + { + "in": "ų̂", + "out": "ũ̂" + }, + { + "in": "u\u0328", + "out": "u\u0303" + }, + { + "in": "ǫ̀", + "out": "ɔ̃̀" + }, + { + "in": "ǫ̌", + "out": "ɔ̃̌" + }, + { + "in": "ǫ̂", + "out": "ɔ̃̂" + }, + { + "in": "o\u0328", + "out": "ɔ\u0303" + }, + { + "in": "ǜü", + "out": "ɘ̀ː" + }, + { + "in": "ǚü", + "out": "ɘ̌ː" + }, + { + "in": "ü̂ü", + "out": "ɘ̂ː" + }, + { + "in": "üü", + "out": "ɘː" + }, + { + "in": "ä̀", + "out": "ʌ̀" + }, + { + "in": "ä̌", + "out": "ʌ̌" + }, + { + "in": "ä̂", + "out": "ʌ̂" + }, + { + "in": "ä", + "out": "ʌ" + }, + { + "in": "ǜ", + "out": "ɘ̀" + }, + { + "in": "ǚ", + "out": "ɘ̌" + }, + { + "in": "ü̂", + "out": "ɘ̂" + }, + { + "in": "ü", "out": "ɘ" }, + { + "in": "ìi", + "out": "ìː" + }, + { + "in": "ǐi", + "out": "ǐː" + }, + { + "in": "îi", + "out": "îː" + }, { "in": "ii", - "out": "i\u02d0" + "out": "iː" + }, + { + "in": "èe", + "out": "èː" + }, + { + "in": "ěe", + "out": "ěː" + }, + { + "in": "êe", + "out": "êː" }, { "in": "ee", - "out": "e\u02d0" + "out": "eː" + }, + { + "in": "àa", + "out": "àː" + }, + { + "in": "ǎa", + "out": "ǎː" + }, + { + "in": "âa", + "out": "âː" }, { "in": "aa", - "out": "a\u02d0" + "out": "aː" + }, + { + "in": "ùu", + "out": "ùː" + }, + { + "in": "ǔu", + "out": "ǔː" + }, + { + "in": "ûu", + "out": "ûː" }, - { "in": "uu", - "out": "u\u02d0" + "out": "uː" + }, + { + "in": "òo", + "out": "òː" + }, + { + "in": "ǒo", + "out": "ǒː" + }, + { + "in": "ôo", + "out": "ôː" }, { "in": "oo", - "out": "o\u02d0" + "out": "oː" + }, + { + "in": "ìa", + "out": "ìa" + }, + { + "in": "ǐa", + "out": "ǐa" + }, + { + "in": "îa", + "out": "îa" }, { "in": "ia", "out": "ia" }, + { + "in": "ìo", + "out": "ìo" + }, + { + "in": "ǐo", + "out": "ǐo" + }, + { + "in": "îo", + "out": "îo" + }, { "in": "io", "out": "io" }, + { + "in": "èa", + "out": "èa" + }, + { + "in": "ěa", + "out": "ěa" + }, + { + "in": "êa", + "out": "êa" + }, { "in": "ea", "out": "ea" }, - + { + "in": "ì", + "out": "ì" + }, + { + "in": "ǐ", + "out": "ǐ" + }, + { + "in": "î", + "out": "î" + }, { "in": "i", "out": "i" }, + { + "in": "è", + "out": "ɛ̀" + }, + { + "in": "ě", + "out": "ɛ̌" + }, + { + "in": "ê", + "out": "ɛ̂" + }, { "in": "e", - "out": "\u025b" + "out": "ɛ" + }, + { + "in": "à", + "out": "à" + }, + { + "in": "ǎ", + "out": "ǎ" + }, + { + "in": "â", + "out": "â" }, { "in": "a", "out": "a" }, + { + "in": "ù", + "out": "ù" + }, + { + "in": "ǔ", + "out": "ǔ" + }, + { + "in": "û", + "out": "û" + }, { "in": "u", "out": "u" }, + { + "in": "ò", + "out": "ɔ̀" + }, + { + "in": "ǒ", + "out": "ɔ̌" + }, + { + "in": "ô", + "out": "ɔ̂" + }, { "in": "o", - "out": "\u0254" + "out": "ɔ" } ] \ No newline at end of file diff --git a/g2p/mappings/langs/tli/config.yaml b/g2p/mappings/langs/tli/config.yaml index 07526621..0a7a0f3a 100644 --- a/g2p/mappings/langs/tli/config.yaml +++ b/g2p/mappings/langs/tli/config.yaml @@ -11,7 +11,7 @@ mappings: prevent_feeding: false rule_ordering: as-written case_sensitive: false - norm_form: NFC + norm_form: NFD <<: *shared - display_name: Tlingit to IPA in_lang: tli-equiv @@ -23,5 +23,5 @@ mappings: prevent_feeding: true rule_ordering: as-written case_sensitive: false - norm_form: NFC + norm_form: NFD <<: *shared diff --git a/g2p/mappings/langs/tli/tli_to_ipa.csv b/g2p/mappings/langs/tli/tli_to_ipa.csv index d69a8a20..c75e6dd0 100644 --- a/g2p/mappings/langs/tli/tli_to_ipa.csv +++ b/g2p/mappings/langs/tli/tli_to_ipa.csv @@ -17,8 +17,8 @@ w,ʷ,[x'kgh\.𝚐̲𝚔̲𝚡̲̲], k,kʰ,,[^'] x,x,, g,k,, -y,j,, ÿ,ɰ,, +y,j,, é,e,, á,a,, ó,o,, diff --git a/g2p/mappings/langs/und/config.yaml b/g2p/mappings/langs/und/config.yaml index 92584038..cb7c2feb 100644 --- a/g2p/mappings/langs/und/config.yaml +++ b/g2p/mappings/langs/und/config.yaml @@ -1,10 +1,11 @@ <<: &shared language_name: Undetermined mappings: - - display_name: Undetermined to IPA + - display_name: Undetermined ASCII to IPA mapping: und_to_ipa.json - in_lang: und + in_lang: und-ascii out_lang: und-ipa + norm: NFD case_sensitive: false escape_special: true authors: @@ -18,3 +19,11 @@ mappings: authors: - Patrick Littell <<: *shared + - display_name: Undertermined Unicode to ASCII + type: unidecode + norm: NFD + in_lang: und + out_lang: und-ascii + authors: + - Eric Joanis + <<: *shared diff --git a/g2p/mappings/utils.py b/g2p/mappings/utils.py index c5b7f19d..eb2710d0 100644 --- a/g2p/mappings/utils.py +++ b/g2p/mappings/utils.py @@ -229,6 +229,9 @@ def load_mapping_from_path(path_to_mapping_config, index=0): if 'mapping' in mapping: mapping['mapping_data'] = load_from_file( os.path.join(path.parent, mapping['mapping'])) + elif mapping.get("type", "") == "unidecode": + # This mapping is not implemented as a regular mapping, but as custom software + pass else: # Is "mapping" key missing? raise exceptions.MalformedMapping('Key "mapping:" missing from a mapping in {}.'.format(path)) diff --git a/g2p/static/languages-network.json b/g2p/static/languages-network.json index c0cfe88c..846dc6ba 100644 --- a/g2p/static/languages-network.json +++ b/g2p/static/languages-network.json @@ -1 +1 @@ -{"nodes": [{"name": "alq", "symbolSize": 2.7027027027027026, "id": "alq", "category": "alq"}, {"name": "alq-ipa", "symbolSize": 2.7027027027027026, "id": "alq-ipa", "category": "alq"}, {"name": "atj", "symbolSize": 2.7027027027027026, "id": "atj", "category": "atj"}, {"name": "atj-ipa", "symbolSize": 2.7027027027027026, "id": "atj-ipa", "category": "atj"}, {"name": "eng-ipa", "symbolSize": 20, "id": "eng-ipa", "category": "eng"}, {"name": "ckt", "symbolSize": 2.7027027027027026, "id": "ckt", "category": "ckt"}, {"name": "ckt-ipa", "symbolSize": 2.7027027027027026, "id": "ckt-ipa", "category": "ckt"}, {"name": "clc-doulos", "symbolSize": 2, "id": "clc-doulos", "category": "clc"}, {"name": "clc", "symbolSize": 2, "id": "clc", "category": "clc"}, {"name": "crg-tmd", "symbolSize": 2.7027027027027026, "id": "crg-tmd", "category": "crg"}, {"name": "crg-ipa", "symbolSize": 3.6036036036036037, "id": "crg-ipa", "category": "crg"}, {"name": "crg-dv", "symbolSize": 2.7027027027027026, "id": "crg-dv", "category": "crg"}, {"name": "crj", "symbolSize": 3.6036036036036037, "id": "crj", "category": "crj"}, {"name": "crj-equiv", "symbolSize": 3.6036036036036037, "id": "crj-equiv", "category": "crj"}, {"name": "crj-ipa", "symbolSize": 3.6036036036036037, "id": "crj-ipa", "category": "crj"}, {"name": "crk-no-symbols", "symbolSize": 3.6036036036036037, "id": "crk-no-symbols", "category": "crk"}, {"name": "crk-ipa", "symbolSize": 3.6036036036036037, "id": "crk-ipa", "category": "crk"}, {"name": "crk", "symbolSize": 3.6036036036036037, "id": "crk", "category": "crk"}, {"name": "crl", "symbolSize": 3.6036036036036037, "id": "crl", "category": "crl"}, {"name": "crl-equiv", "symbolSize": 3.6036036036036037, "id": "crl-equiv", "category": "crl"}, {"name": "crl-ipa", "symbolSize": 3.6036036036036037, "id": "crl-ipa", "category": "crl"}, {"name": "crm", "symbolSize": 3.6036036036036037, "id": "crm", "category": "crm"}, {"name": "crm-equiv", "symbolSize": 3.6036036036036037, "id": "crm-equiv", "category": "crm"}, {"name": "crm-ipa", "symbolSize": 3.6036036036036037, "id": "crm-ipa", "category": "crm"}, {"name": "crx-sro", "symbolSize": 2, "id": "crx-sro", "category": "crx"}, {"name": "crx-syl", "symbolSize": 2, "id": "crx-syl", "category": "crx"}, {"name": "csw", "symbolSize": 3.6036036036036037, "id": "csw", "category": "csw"}, {"name": "csw-equiv", "symbolSize": 3.6036036036036037, "id": "csw-equiv", "category": "csw"}, {"name": "csw-ipa", "symbolSize": 3.6036036036036037, "id": "csw-ipa", "category": "csw"}, {"name": "ctp", "symbolSize": 2.7027027027027026, "id": "ctp", "category": "ctp"}, {"name": "ctp-ipa", "symbolSize": 2.7027027027027026, "id": "ctp-ipa", "category": "ctp"}, {"name": "dan", "symbolSize": 2.7027027027027026, "id": "dan", "category": "dan"}, {"name": "dan-ipa", "symbolSize": 2.7027027027027026, "id": "dan-ipa", "category": "dan"}, {"name": "eng-arpabet", "symbolSize": 20, "id": "eng-arpabet", "category": "eng"}, {"name": "hei-doulos", "symbolSize": 2, "id": "hei-doulos", "category": "hei"}, {"name": "hei", "symbolSize": 2, "id": "hei", "category": "hei"}, {"name": "hei-times-font", "symbolSize": 2, "id": "hei-times-font", "category": "hei"}, {"name": "nav-times-font", "symbolSize": 2, "id": "nav-times-font", "category": "nav"}, {"name": "nav", "symbolSize": 2, "id": "nav", "category": "nav"}, {"name": "fn-unicode-font", "symbolSize": 2, "id": "fn-unicode-font", "category": "fn"}, {"name": "fn-unicode", "symbolSize": 2, "id": "fn-unicode", "category": "fn"}, {"name": "fra", "symbolSize": 2.7027027027027026, "id": "fra", "category": "fra"}, {"name": "fra-ipa", "symbolSize": 2.7027027027027026, "id": "fra-ipa", "category": "fra"}, {"name": "str-ipa", "symbolSize": 3.6036036036036037, "id": "str-ipa", "category": "str"}, {"name": "see-ipa", "symbolSize": 2.7027027027027026, "id": "see-ipa", "category": "see"}, {"name": "lml-ipa", "symbolSize": 2.7027027027027026, "id": "lml-ipa", "category": "lml"}, {"name": "oji-ipa", "symbolSize": 3.6036036036036037, "id": "oji-ipa", "category": "oji"}, {"name": "gla-ipa", "symbolSize": 2.7027027027027026, "id": "gla-ipa", "category": "gla"}, {"name": "tce-ipa", "symbolSize": 3.6036036036036037, "id": "tce-ipa", "category": "tce"}, {"name": "tli-ipa", "symbolSize": 3.6036036036036037, "id": "tli-ipa", "category": "tli"}, {"name": "gwi-ipa", "symbolSize": 3.6036036036036037, "id": "gwi-ipa", "category": "gwi"}, {"name": "mic-ipa", "symbolSize": 2.7027027027027026, "id": "mic-ipa", "category": "mic"}, {"name": "iku-ipa", "symbolSize": 3.6036036036036037, "id": "iku-ipa", "category": "iku"}, {"name": "ikt-ipa", "symbolSize": 2.7027027027027026, "id": "ikt-ipa", "category": "ikt"}, {"name": "iku-sro-ipa", "symbolSize": 2.7027027027027026, "id": "iku-sro-ipa", "category": "iku"}, {"name": "haa-ipa", "symbolSize": 4.504504504504505, "id": "haa-ipa", "category": "haa"}, {"name": "ttm-ipa", "symbolSize": 3.6036036036036037, "id": "ttm-ipa", "category": "ttm"}, {"name": "tau-ipa", "symbolSize": 3.6036036036036037, "id": "tau-ipa", "category": "tau"}, {"name": "moh-ipa", "symbolSize": 7.207207207207207, "id": "moh-ipa", "category": "moh"}, {"name": "git", "symbolSize": 4.504504504504505, "id": "git", "category": "git"}, {"name": "git-ipa", "symbolSize": 2.7027027027027026, "id": "git-ipa", "category": "git"}, {"name": "git-apa", "symbolSize": 2, "id": "git-apa", "category": "git"}, {"name": "git-equiv", "symbolSize": 2, "id": "git-equiv", "category": "git"}, {"name": "gla", "symbolSize": 2.7027027027027026, "id": "gla", "category": "gla"}, {"name": "gwi", "symbolSize": 3.6036036036036037, "id": "gwi", "category": "gwi"}, {"name": "gwi-equiv", "symbolSize": 3.6036036036036037, "id": "gwi-equiv", "category": "gwi"}, {"name": "haa", "symbolSize": 4.504504504504505, "id": "haa", "category": "haa"}, {"name": "haa-equiv", "symbolSize": 4.504504504504505, "id": "haa-equiv", "category": "haa"}, {"name": "haa-simp", "symbolSize": 4.504504504504505, "id": "haa-simp", "category": "haa"}, {"name": "ikt", "symbolSize": 2.7027027027027026, "id": "ikt", "category": "ikt"}, {"name": "iku", "symbolSize": 3.6036036036036037, "id": "iku", "category": "iku"}, {"name": "iku-equiv", "symbolSize": 3.6036036036036037, "id": "iku-equiv", "category": "iku"}, {"name": "iku-sro", "symbolSize": 2.7027027027027026, "id": "iku-sro", "category": "iku"}, {"name": "kkz", "symbolSize": 2.7027027027027026, "id": "kkz", "category": "kkz"}, {"name": "kkz-ipa", "symbolSize": 2.7027027027027026, "id": "kkz-ipa", "category": "kkz"}, {"name": "kwk-ipa", "symbolSize": 4.504504504504505, "id": "kwk-ipa", "category": "kwk"}, {"name": "kwk-napa", "symbolSize": 2.7027027027027026, "id": "kwk-napa", "category": "kwk"}, {"name": "kwk-umista", "symbolSize": 4.504504504504505, "id": "kwk-umista", "category": "kwk"}, {"name": "kwk-umista-con", "symbolSize": 2, "id": "kwk-umista-con", "category": "kwk"}, {"name": "kwk-napa-ubc", "symbolSize": 2, "id": "kwk-napa-ubc", "category": "kwk"}, {"name": "kwk-napa-ubc-con", "symbolSize": 2, "id": "kwk-napa-ubc-con", "category": "kwk"}, {"name": "kwk-napa-uvic", "symbolSize": 2, "id": "kwk-napa-uvic", "category": "kwk"}, {"name": "kwk-napa-uvic-con", "symbolSize": 2, "id": "kwk-napa-uvic-con", "category": "kwk"}, {"name": "kwk-boas", "symbolSize": 4.504504504504505, "id": "kwk-boas", "category": "kwk"}, {"name": "lml", "symbolSize": 2.7027027027027026, "id": "lml", "category": "lml"}, {"name": "mic", "symbolSize": 2.7027027027027026, "id": "mic", "category": "mic"}, {"name": "moh-equiv", "symbolSize": 7.207207207207207, "id": "moh-equiv", "category": "moh"}, {"name": "moh", "symbolSize": 7.207207207207207, "id": "moh", "category": "moh"}, {"name": "moh-festival", "symbolSize": 7.207207207207207, "id": "moh-festival", "category": "moh"}, {"name": "ipa", "symbolSize": 2, "id": "ipa", "category": "ipa"}, {"name": "oji", "symbolSize": 3.6036036036036037, "id": "oji", "category": "oji"}, {"name": "oji-syl", "symbolSize": 3.6036036036036037, "id": "oji-syl", "category": "oji"}, {"name": "see", "symbolSize": 2.7027027027027026, "id": "see", "category": "see"}, {"name": "srs", "symbolSize": 2.7027027027027026, "id": "srs", "category": "srs"}, {"name": "srs-ipa", "symbolSize": 2.7027027027027026, "id": "srs-ipa", "category": "srs"}, {"name": "str", "symbolSize": 3.6036036036036037, "id": "str", "category": "str"}, {"name": "str-equiv", "symbolSize": 3.6036036036036037, "id": "str-equiv", "category": "str"}, {"name": "tau", "symbolSize": 3.6036036036036037, "id": "tau", "category": "tau"}, {"name": "tau-equiv", "symbolSize": 3.6036036036036037, "id": "tau-equiv", "category": "tau"}, {"name": "tce", "symbolSize": 3.6036036036036037, "id": "tce", "category": "tce"}, {"name": "tce-equiv", "symbolSize": 3.6036036036036037, "id": "tce-equiv", "category": "tce"}, {"name": "tgx", "symbolSize": 2.7027027027027026, "id": "tgx", "category": "tgx"}, {"name": "tgx-ipa", "symbolSize": 2.7027027027027026, "id": "tgx-ipa", "category": "tgx"}, {"name": "tli", "symbolSize": 3.6036036036036037, "id": "tli", "category": "tli"}, {"name": "tli-equiv", "symbolSize": 3.6036036036036037, "id": "tli-equiv", "category": "tli"}, {"name": "ttm", "symbolSize": 3.6036036036036037, "id": "ttm", "category": "ttm"}, {"name": "ttm-equiv", "symbolSize": 3.6036036036036037, "id": "ttm-equiv", "category": "ttm"}, {"name": "und", "symbolSize": 2.7027027027027026, "id": "und", "category": "und"}, {"name": "und-ipa", "symbolSize": 2.7027027027027026, "id": "und-ipa", "category": "und"}, {"name": "win", "symbolSize": 2.7027027027027026, "id": "win", "category": "win"}, {"name": "win-ipa", "symbolSize": 2.7027027027027026, "id": "win-ipa", "category": "win"}], "edges": [{"source": "alq", "target": "alq-ipa"}, {"source": "alq-ipa", "target": "eng-ipa"}, {"source": "atj", "target": "atj-ipa"}, {"source": "atj-ipa", "target": "eng-ipa"}, {"source": "eng-ipa", "target": "eng-arpabet"}, {"source": "ckt", "target": "ckt-ipa"}, {"source": "ckt-ipa", "target": "eng-ipa"}, {"source": "clc-doulos", "target": "clc"}, {"source": "crg-tmd", "target": "crg-ipa"}, {"source": "crg-ipa", "target": "eng-ipa"}, {"source": "crg-dv", "target": "crg-ipa"}, {"source": "crj", "target": "crj-equiv"}, {"source": "crj-equiv", "target": "crj-ipa"}, {"source": "crj-ipa", "target": "eng-ipa"}, {"source": "crk-no-symbols", "target": "crk-ipa"}, {"source": "crk-ipa", "target": "eng-ipa"}, {"source": "crk", "target": "crk-no-symbols"}, {"source": "crl", "target": "crl-equiv"}, {"source": "crl-equiv", "target": "crl-ipa"}, {"source": "crl-ipa", "target": "eng-ipa"}, {"source": "crm", "target": "crm-equiv"}, {"source": "crm-equiv", "target": "crm-ipa"}, {"source": "crm-ipa", "target": "eng-ipa"}, {"source": "crx-sro", "target": "crx-syl"}, {"source": "crx-syl", "target": "crx-sro"}, {"source": "csw", "target": "csw-equiv"}, {"source": "csw-equiv", "target": "csw-ipa"}, {"source": "csw-ipa", "target": "eng-ipa"}, {"source": "ctp", "target": "ctp-ipa"}, {"source": "ctp-ipa", "target": "eng-ipa"}, {"source": "dan", "target": "dan-ipa"}, {"source": "dan-ipa", "target": "eng-ipa"}, {"source": "hei-doulos", "target": "hei"}, {"source": "hei-times-font", "target": "hei"}, {"source": "nav-times-font", "target": "nav"}, {"source": "fn-unicode-font", "target": "fn-unicode"}, {"source": "fra", "target": "fra-ipa"}, {"source": "fra-ipa", "target": "eng-ipa"}, {"source": "str-ipa", "target": "eng-ipa"}, {"source": "see-ipa", "target": "eng-ipa"}, {"source": "lml-ipa", "target": "eng-ipa"}, {"source": "oji-ipa", "target": "eng-ipa"}, {"source": "gla-ipa", "target": "eng-ipa"}, {"source": "tce-ipa", "target": "eng-ipa"}, {"source": "tli-ipa", "target": "eng-ipa"}, {"source": "gwi-ipa", "target": "eng-ipa"}, {"source": "mic-ipa", "target": "eng-ipa"}, {"source": "iku-ipa", "target": "eng-ipa"}, {"source": "ikt-ipa", "target": "eng-ipa"}, {"source": "iku-sro-ipa", "target": "eng-ipa"}, {"source": "haa-ipa", "target": "eng-ipa"}, {"source": "ttm-ipa", "target": "eng-ipa"}, {"source": "tau-ipa", "target": "eng-ipa"}, {"source": "moh-ipa", "target": "eng-ipa"}, {"source": "moh-ipa", "target": "moh"}, {"source": "moh-ipa", "target": "moh-festival"}, {"source": "git", "target": "git-ipa"}, {"source": "git", "target": "git-apa"}, {"source": "git", "target": "git-equiv"}, {"source": "git-ipa", "target": "eng-ipa"}, {"source": "gla", "target": "gla-ipa"}, {"source": "gwi", "target": "gwi-equiv"}, {"source": "gwi-equiv", "target": "gwi-ipa"}, {"source": "haa", "target": "haa-equiv"}, {"source": "haa-equiv", "target": "haa-simp"}, {"source": "haa-simp", "target": "haa-ipa"}, {"source": "ikt", "target": "ikt-ipa"}, {"source": "iku", "target": "iku-equiv"}, {"source": "iku-equiv", "target": "iku-ipa"}, {"source": "iku-sro", "target": "iku-sro-ipa"}, {"source": "kkz", "target": "kkz-ipa"}, {"source": "kkz-ipa", "target": "eng-ipa"}, {"source": "kwk-ipa", "target": "eng-ipa"}, {"source": "kwk-napa", "target": "kwk-ipa"}, {"source": "kwk-umista", "target": "kwk-ipa"}, {"source": "kwk-umista", "target": "kwk-umista-con"}, {"source": "kwk-napa-ubc", "target": "kwk-napa-ubc-con"}, {"source": "kwk-napa-uvic", "target": "kwk-napa-uvic-con"}, {"source": "kwk-boas", "target": "kwk-umista"}, {"source": "lml", "target": "lml-ipa"}, {"source": "mic", "target": "mic-ipa"}, {"source": "moh-equiv", "target": "moh-ipa"}, {"source": "moh", "target": "moh-equiv"}, {"source": "moh-festival", "target": "moh-ipa"}, {"source": "ipa", "target": "ipa"}, {"source": "oji", "target": "oji-ipa"}, {"source": "oji-syl", "target": "oji"}, {"source": "see", "target": "see-ipa"}, {"source": "srs", "target": "srs-ipa"}, {"source": "srs-ipa", "target": "eng-ipa"}, {"source": "str", "target": "str-equiv"}, {"source": "str-equiv", "target": "str-ipa"}, {"source": "tau", "target": "tau-equiv"}, {"source": "tau-equiv", "target": "tau-ipa"}, {"source": "tce", "target": "tce-equiv"}, {"source": "tce-equiv", "target": "tce-ipa"}, {"source": "tgx", "target": "tgx-ipa"}, {"source": "tgx-ipa", "target": "eng-ipa"}, {"source": "tli", "target": "tli-equiv"}, {"source": "tli-equiv", "target": "tli-ipa"}, {"source": "ttm", "target": "ttm-equiv"}, {"source": "ttm-equiv", "target": "ttm-ipa"}, {"source": "und", "target": "und-ipa"}, {"source": "und-ipa", "target": "eng-ipa"}, {"source": "win", "target": "win-ipa"}, {"source": "win-ipa", "target": "eng-ipa"}]} \ No newline at end of file +{"nodes": [{"name": "alq", "symbolSize": 2.7027027027027026, "id": "alq", "category": "alq"}, {"name": "alq-ipa", "symbolSize": 2.7027027027027026, "id": "alq-ipa", "category": "alq"}, {"name": "atj", "symbolSize": 2.7027027027027026, "id": "atj", "category": "atj"}, {"name": "atj-ipa", "symbolSize": 2.7027027027027026, "id": "atj-ipa", "category": "atj"}, {"name": "eng-ipa", "symbolSize": 20, "id": "eng-ipa", "category": "eng"}, {"name": "ckt", "symbolSize": 2.7027027027027026, "id": "ckt", "category": "ckt"}, {"name": "ckt-ipa", "symbolSize": 2.7027027027027026, "id": "ckt-ipa", "category": "ckt"}, {"name": "clc-doulos", "symbolSize": 2, "id": "clc-doulos", "category": "clc"}, {"name": "clc", "symbolSize": 2, "id": "clc", "category": "clc"}, {"name": "crg-tmd", "symbolSize": 2.7027027027027026, "id": "crg-tmd", "category": "crg"}, {"name": "crg-ipa", "symbolSize": 3.6036036036036037, "id": "crg-ipa", "category": "crg"}, {"name": "crg-dv", "symbolSize": 2.7027027027027026, "id": "crg-dv", "category": "crg"}, {"name": "crj", "symbolSize": 3.6036036036036037, "id": "crj", "category": "crj"}, {"name": "crj-equiv", "symbolSize": 3.6036036036036037, "id": "crj-equiv", "category": "crj"}, {"name": "crj-ipa", "symbolSize": 3.6036036036036037, "id": "crj-ipa", "category": "crj"}, {"name": "crk-no-symbols", "symbolSize": 3.6036036036036037, "id": "crk-no-symbols", "category": "crk"}, {"name": "crk-ipa", "symbolSize": 3.6036036036036037, "id": "crk-ipa", "category": "crk"}, {"name": "crk", "symbolSize": 3.6036036036036037, "id": "crk", "category": "crk"}, {"name": "crl", "symbolSize": 3.6036036036036037, "id": "crl", "category": "crl"}, {"name": "crl-equiv", "symbolSize": 3.6036036036036037, "id": "crl-equiv", "category": "crl"}, {"name": "crl-ipa", "symbolSize": 3.6036036036036037, "id": "crl-ipa", "category": "crl"}, {"name": "crm", "symbolSize": 3.6036036036036037, "id": "crm", "category": "crm"}, {"name": "crm-equiv", "symbolSize": 3.6036036036036037, "id": "crm-equiv", "category": "crm"}, {"name": "crm-ipa", "symbolSize": 3.6036036036036037, "id": "crm-ipa", "category": "crm"}, {"name": "crx-sro", "symbolSize": 2, "id": "crx-sro", "category": "crx"}, {"name": "crx-syl", "symbolSize": 2, "id": "crx-syl", "category": "crx"}, {"name": "csw", "symbolSize": 3.6036036036036037, "id": "csw", "category": "csw"}, {"name": "csw-equiv", "symbolSize": 3.6036036036036037, "id": "csw-equiv", "category": "csw"}, {"name": "csw-ipa", "symbolSize": 3.6036036036036037, "id": "csw-ipa", "category": "csw"}, {"name": "ctp", "symbolSize": 2.7027027027027026, "id": "ctp", "category": "ctp"}, {"name": "ctp-ipa", "symbolSize": 2.7027027027027026, "id": "ctp-ipa", "category": "ctp"}, {"name": "dan", "symbolSize": 2.7027027027027026, "id": "dan", "category": "dan"}, {"name": "dan-ipa", "symbolSize": 2.7027027027027026, "id": "dan-ipa", "category": "dan"}, {"name": "eng-arpabet", "symbolSize": 20, "id": "eng-arpabet", "category": "eng"}, {"name": "hei-doulos", "symbolSize": 2, "id": "hei-doulos", "category": "hei"}, {"name": "hei", "symbolSize": 2, "id": "hei", "category": "hei"}, {"name": "hei-times-font", "symbolSize": 2, "id": "hei-times-font", "category": "hei"}, {"name": "nav-times-font", "symbolSize": 2, "id": "nav-times-font", "category": "nav"}, {"name": "nav", "symbolSize": 2, "id": "nav", "category": "nav"}, {"name": "fn-unicode-font", "symbolSize": 2, "id": "fn-unicode-font", "category": "fn"}, {"name": "fn-unicode", "symbolSize": 2, "id": "fn-unicode", "category": "fn"}, {"name": "fra", "symbolSize": 2.7027027027027026, "id": "fra", "category": "fra"}, {"name": "fra-ipa", "symbolSize": 2.7027027027027026, "id": "fra-ipa", "category": "fra"}, {"name": "str-ipa", "symbolSize": 3.6036036036036037, "id": "str-ipa", "category": "str"}, {"name": "see-ipa", "symbolSize": 2.7027027027027026, "id": "see-ipa", "category": "see"}, {"name": "lml-ipa", "symbolSize": 2.7027027027027026, "id": "lml-ipa", "category": "lml"}, {"name": "oji-ipa", "symbolSize": 3.6036036036036037, "id": "oji-ipa", "category": "oji"}, {"name": "gla-ipa", "symbolSize": 2.7027027027027026, "id": "gla-ipa", "category": "gla"}, {"name": "tce-ipa", "symbolSize": 3.6036036036036037, "id": "tce-ipa", "category": "tce"}, {"name": "tli-ipa", "symbolSize": 3.6036036036036037, "id": "tli-ipa", "category": "tli"}, {"name": "gwi-ipa", "symbolSize": 3.6036036036036037, "id": "gwi-ipa", "category": "gwi"}, {"name": "mic-ipa", "symbolSize": 2.7027027027027026, "id": "mic-ipa", "category": "mic"}, {"name": "iku-ipa", "symbolSize": 3.6036036036036037, "id": "iku-ipa", "category": "iku"}, {"name": "ikt-ipa", "symbolSize": 2.7027027027027026, "id": "ikt-ipa", "category": "ikt"}, {"name": "iku-sro-ipa", "symbolSize": 2.7027027027027026, "id": "iku-sro-ipa", "category": "iku"}, {"name": "haa-ipa", "symbolSize": 3.6036036036036037, "id": "haa-ipa", "category": "haa"}, {"name": "ttm-ipa", "symbolSize": 3.6036036036036037, "id": "ttm-ipa", "category": "ttm"}, {"name": "tau-ipa", "symbolSize": 3.6036036036036037, "id": "tau-ipa", "category": "tau"}, {"name": "moh-ipa", "symbolSize": 7.207207207207207, "id": "moh-ipa", "category": "moh"}, {"name": "git", "symbolSize": 4.504504504504505, "id": "git", "category": "git"}, {"name": "git-ipa", "symbolSize": 2.7027027027027026, "id": "git-ipa", "category": "git"}, {"name": "git-apa", "symbolSize": 2, "id": "git-apa", "category": "git"}, {"name": "git-equiv", "symbolSize": 2, "id": "git-equiv", "category": "git"}, {"name": "gla", "symbolSize": 2.7027027027027026, "id": "gla", "category": "gla"}, {"name": "gwi", "symbolSize": 3.6036036036036037, "id": "gwi", "category": "gwi"}, {"name": "gwi-equiv", "symbolSize": 3.6036036036036037, "id": "gwi-equiv", "category": "gwi"}, {"name": "haa", "symbolSize": 3.6036036036036037, "id": "haa", "category": "haa"}, {"name": "haa-equiv", "symbolSize": 3.6036036036036037, "id": "haa-equiv", "category": "haa"}, {"name": "ikt", "symbolSize": 2.7027027027027026, "id": "ikt", "category": "ikt"}, {"name": "iku", "symbolSize": 3.6036036036036037, "id": "iku", "category": "iku"}, {"name": "iku-equiv", "symbolSize": 3.6036036036036037, "id": "iku-equiv", "category": "iku"}, {"name": "iku-sro", "symbolSize": 2.7027027027027026, "id": "iku-sro", "category": "iku"}, {"name": "kkz", "symbolSize": 2.7027027027027026, "id": "kkz", "category": "kkz"}, {"name": "kkz-ipa", "symbolSize": 2.7027027027027026, "id": "kkz-ipa", "category": "kkz"}, {"name": "kwk-ipa", "symbolSize": 4.504504504504505, "id": "kwk-ipa", "category": "kwk"}, {"name": "kwk-napa", "symbolSize": 2.7027027027027026, "id": "kwk-napa", "category": "kwk"}, {"name": "kwk-umista", "symbolSize": 4.504504504504505, "id": "kwk-umista", "category": "kwk"}, {"name": "kwk-umista-con", "symbolSize": 2, "id": "kwk-umista-con", "category": "kwk"}, {"name": "kwk-napa-ubc", "symbolSize": 2, "id": "kwk-napa-ubc", "category": "kwk"}, {"name": "kwk-napa-ubc-con", "symbolSize": 2, "id": "kwk-napa-ubc-con", "category": "kwk"}, {"name": "kwk-napa-uvic", "symbolSize": 2, "id": "kwk-napa-uvic", "category": "kwk"}, {"name": "kwk-napa-uvic-con", "symbolSize": 2, "id": "kwk-napa-uvic-con", "category": "kwk"}, {"name": "kwk-boas", "symbolSize": 4.504504504504505, "id": "kwk-boas", "category": "kwk"}, {"name": "lml", "symbolSize": 2.7027027027027026, "id": "lml", "category": "lml"}, {"name": "mic", "symbolSize": 2.7027027027027026, "id": "mic", "category": "mic"}, {"name": "moh-equiv", "symbolSize": 7.207207207207207, "id": "moh-equiv", "category": "moh"}, {"name": "moh", "symbolSize": 7.207207207207207, "id": "moh", "category": "moh"}, {"name": "moh-festival", "symbolSize": 7.207207207207207, "id": "moh-festival", "category": "moh"}, {"name": "ipa", "symbolSize": 2, "id": "ipa", "category": "ipa"}, {"name": "oji", "symbolSize": 3.6036036036036037, "id": "oji", "category": "oji"}, {"name": "oji-syl", "symbolSize": 3.6036036036036037, "id": "oji-syl", "category": "oji"}, {"name": "see", "symbolSize": 2.7027027027027026, "id": "see", "category": "see"}, {"name": "srs", "symbolSize": 2.7027027027027026, "id": "srs", "category": "srs"}, {"name": "srs-ipa", "symbolSize": 2.7027027027027026, "id": "srs-ipa", "category": "srs"}, {"name": "str", "symbolSize": 3.6036036036036037, "id": "str", "category": "str"}, {"name": "str-equiv", "symbolSize": 3.6036036036036037, "id": "str-equiv", "category": "str"}, {"name": "tau", "symbolSize": 3.6036036036036037, "id": "tau", "category": "tau"}, {"name": "tau-equiv", "symbolSize": 3.6036036036036037, "id": "tau-equiv", "category": "tau"}, {"name": "tce", "symbolSize": 3.6036036036036037, "id": "tce", "category": "tce"}, {"name": "tce-equiv", "symbolSize": 3.6036036036036037, "id": "tce-equiv", "category": "tce"}, {"name": "tgx", "symbolSize": 2.7027027027027026, "id": "tgx", "category": "tgx"}, {"name": "tgx-ipa", "symbolSize": 2.7027027027027026, "id": "tgx-ipa", "category": "tgx"}, {"name": "tli", "symbolSize": 3.6036036036036037, "id": "tli", "category": "tli"}, {"name": "tli-equiv", "symbolSize": 3.6036036036036037, "id": "tli-equiv", "category": "tli"}, {"name": "ttm", "symbolSize": 3.6036036036036037, "id": "ttm", "category": "ttm"}, {"name": "ttm-equiv", "symbolSize": 3.6036036036036037, "id": "ttm-equiv", "category": "ttm"}, {"name": "und-ascii", "symbolSize": 3.6036036036036037, "id": "und-ascii", "category": "und"}, {"name": "und-ipa", "symbolSize": 3.6036036036036037, "id": "und-ipa", "category": "und"}, {"name": "und", "symbolSize": 3.6036036036036037, "id": "und", "category": "und"}, {"name": "win", "symbolSize": 2.7027027027027026, "id": "win", "category": "win"}, {"name": "win-ipa", "symbolSize": 2.7027027027027026, "id": "win-ipa", "category": "win"}], "edges": [{"source": "alq", "target": "alq-ipa"}, {"source": "alq-ipa", "target": "eng-ipa"}, {"source": "atj", "target": "atj-ipa"}, {"source": "atj-ipa", "target": "eng-ipa"}, {"source": "eng-ipa", "target": "eng-arpabet"}, {"source": "ckt", "target": "ckt-ipa"}, {"source": "ckt-ipa", "target": "eng-ipa"}, {"source": "clc-doulos", "target": "clc"}, {"source": "crg-tmd", "target": "crg-ipa"}, {"source": "crg-ipa", "target": "eng-ipa"}, {"source": "crg-dv", "target": "crg-ipa"}, {"source": "crj", "target": "crj-equiv"}, {"source": "crj-equiv", "target": "crj-ipa"}, {"source": "crj-ipa", "target": "eng-ipa"}, {"source": "crk-no-symbols", "target": "crk-ipa"}, {"source": "crk-ipa", "target": "eng-ipa"}, {"source": "crk", "target": "crk-no-symbols"}, {"source": "crl", "target": "crl-equiv"}, {"source": "crl-equiv", "target": "crl-ipa"}, {"source": "crl-ipa", "target": "eng-ipa"}, {"source": "crm", "target": "crm-equiv"}, {"source": "crm-equiv", "target": "crm-ipa"}, {"source": "crm-ipa", "target": "eng-ipa"}, {"source": "crx-sro", "target": "crx-syl"}, {"source": "crx-syl", "target": "crx-sro"}, {"source": "csw", "target": "csw-equiv"}, {"source": "csw-equiv", "target": "csw-ipa"}, {"source": "csw-ipa", "target": "eng-ipa"}, {"source": "ctp", "target": "ctp-ipa"}, {"source": "ctp-ipa", "target": "eng-ipa"}, {"source": "dan", "target": "dan-ipa"}, {"source": "dan-ipa", "target": "eng-ipa"}, {"source": "hei-doulos", "target": "hei"}, {"source": "hei-times-font", "target": "hei"}, {"source": "nav-times-font", "target": "nav"}, {"source": "fn-unicode-font", "target": "fn-unicode"}, {"source": "fra", "target": "fra-ipa"}, {"source": "fra-ipa", "target": "eng-ipa"}, {"source": "str-ipa", "target": "eng-ipa"}, {"source": "see-ipa", "target": "eng-ipa"}, {"source": "lml-ipa", "target": "eng-ipa"}, {"source": "oji-ipa", "target": "eng-ipa"}, {"source": "gla-ipa", "target": "eng-ipa"}, {"source": "tce-ipa", "target": "eng-ipa"}, {"source": "tli-ipa", "target": "eng-ipa"}, {"source": "gwi-ipa", "target": "eng-ipa"}, {"source": "mic-ipa", "target": "eng-ipa"}, {"source": "iku-ipa", "target": "eng-ipa"}, {"source": "ikt-ipa", "target": "eng-ipa"}, {"source": "iku-sro-ipa", "target": "eng-ipa"}, {"source": "haa-ipa", "target": "eng-ipa"}, {"source": "ttm-ipa", "target": "eng-ipa"}, {"source": "tau-ipa", "target": "eng-ipa"}, {"source": "moh-ipa", "target": "eng-ipa"}, {"source": "moh-ipa", "target": "moh"}, {"source": "moh-ipa", "target": "moh-festival"}, {"source": "git", "target": "git-ipa"}, {"source": "git", "target": "git-apa"}, {"source": "git", "target": "git-equiv"}, {"source": "git-ipa", "target": "eng-ipa"}, {"source": "gla", "target": "gla-ipa"}, {"source": "gwi", "target": "gwi-equiv"}, {"source": "gwi-equiv", "target": "gwi-ipa"}, {"source": "haa", "target": "haa-equiv"}, {"source": "haa-equiv", "target": "haa-ipa"}, {"source": "ikt", "target": "ikt-ipa"}, {"source": "iku", "target": "iku-equiv"}, {"source": "iku-equiv", "target": "iku-ipa"}, {"source": "iku-sro", "target": "iku-sro-ipa"}, {"source": "kkz", "target": "kkz-ipa"}, {"source": "kkz-ipa", "target": "eng-ipa"}, {"source": "kwk-ipa", "target": "eng-ipa"}, {"source": "kwk-napa", "target": "kwk-ipa"}, {"source": "kwk-umista", "target": "kwk-ipa"}, {"source": "kwk-umista", "target": "kwk-umista-con"}, {"source": "kwk-napa-ubc", "target": "kwk-napa-ubc-con"}, {"source": "kwk-napa-uvic", "target": "kwk-napa-uvic-con"}, {"source": "kwk-boas", "target": "kwk-umista"}, {"source": "lml", "target": "lml-ipa"}, {"source": "mic", "target": "mic-ipa"}, {"source": "moh-equiv", "target": "moh-ipa"}, {"source": "moh", "target": "moh-equiv"}, {"source": "moh-festival", "target": "moh-ipa"}, {"source": "ipa", "target": "ipa"}, {"source": "oji", "target": "oji-ipa"}, {"source": "oji-syl", "target": "oji"}, {"source": "see", "target": "see-ipa"}, {"source": "srs", "target": "srs-ipa"}, {"source": "srs-ipa", "target": "eng-ipa"}, {"source": "str", "target": "str-equiv"}, {"source": "str-equiv", "target": "str-ipa"}, {"source": "tau", "target": "tau-equiv"}, {"source": "tau-equiv", "target": "tau-ipa"}, {"source": "tce", "target": "tce-equiv"}, {"source": "tce-equiv", "target": "tce-ipa"}, {"source": "tgx", "target": "tgx-ipa"}, {"source": "tgx-ipa", "target": "eng-ipa"}, {"source": "tli", "target": "tli-equiv"}, {"source": "tli-equiv", "target": "tli-ipa"}, {"source": "ttm", "target": "ttm-equiv"}, {"source": "ttm-equiv", "target": "ttm-ipa"}, {"source": "und-ascii", "target": "und-ipa"}, {"source": "und-ipa", "target": "eng-ipa"}, {"source": "und", "target": "und-ascii"}, {"source": "win", "target": "win-ipa"}, {"source": "win-ipa", "target": "eng-ipa"}]} \ No newline at end of file diff --git a/g2p/static/swagger.json b/g2p/static/swagger.json index b7588201..5d73ffc3 100644 --- a/g2p/static/swagger.json +++ b/g2p/static/swagger.json @@ -1 +1 @@ -{"openapi": "3.0.0", "info": {"title": "Simple G2P API", "description": "A simple API for the G2P module", "contact": {"email": "hello@aidanpine.ca"}, "license": {"name": "MIT", "url": "https://github.com/roedoejet/g2p/blob/master/LICENSE"}, "version": "1.0.0"}, "servers": [{"url": "https://g2p-studio.herokuapp.com/api/v1", "description": "G2P Studio Online"}, {"url": "https://localhost:5000/api/v1", "description": "Local Host"}], "tags": [{"name": "ancestors", "description": "Find which mappings can convert to a given node"}, {"name": "descendants", "description": "Find which mappings can be converted to from a given node"}, {"name": "g2p", "description": "Transduced, g2p'ed forms"}, {"name": "langs", "description": "Languages/mappings available for G2P"}], "paths": {"/ancestors/{node}": {"get": {"tags": ["ancestors"], "summary": "get all ancestors of node", "description": "Get the valid ancestors in the network's path to a given node. These are all the mappings that you can convert *from* in order to get the given node.\n", "operationId": "getAncestors", "parameters": [{"name": "node", "in": "path", "description": "language node name", "required": true, "style": "simple", "explode": false, "schema": {"$ref": "#/components/schemas/Langs"}}], "responses": {"200": {"description": "The valid ancestors of a node", "content": {"application/json": {"schema": {"type": "array", "items": {"type": "string"}}}}}, "404": {"description": "No node with that name."}}}}, "/descendants{node}": {"get": {"tags": ["descendants"], "summary": "get all descendants of node", "description": "Get the valid descendants in the network's path to a given node. These are all the mappings that you can convert *to* from the given node.\n", "operationId": "getDescendants", "parameters": [{"name": "node", "in": "path", "description": "language node name", "required": true, "style": "simple", "explode": false, "schema": {"$ref": "#/components/schemas/Langs"}}], "responses": {"200": {"description": "The valid descendants of a node", "content": {"application/json": {"schema": {"type": "array", "items": {"type": "string"}}}}}, "404": {"description": "No node with that name."}}}}, "/langs": {"get": {"tags": ["langs"], "summary": "find all possible languages in g2p", "description": "By passing in the appropriate options, you can find available mappings\n", "operationId": "searchTable", "responses": {"200": {"description": "search results matching criteria", "content": {"application/json": {"schema": {"type": "array", "items": {"$ref": "#/components/schemas/Langs"}}}}}}}}, "/g2p": {"get": {"tags": ["g2p"], "summary": "get g2p'ed form", "description": "Get the converted version of a string, given an input and output lang\n", "operationId": "convertString", "parameters": [{"name": "in-lang", "in": "query", "description": "input lang of string", "required": true, "style": "form", "explode": true, "schema": {"$ref": "#/components/schemas/Langs"}}, {"name": "out-lang", "in": "query", "description": "output lang of string", "required": true, "style": "form", "explode": true, "schema": {"$ref": "#/components/schemas/Langs"}}, {"name": "text", "in": "query", "description": "string to convert", "required": true, "schema": {"type": "string"}}], "responses": {"200": {"description": "The converted text", "content": {"application/json": {"schema": {"type": "array", "items": {"type": "string"}}}}}, "400": {"description": "There is no path between the in-lang and out-lang"}, "404": {"description": "Either the in-lang or out-lang aren't valid Langs"}}}}}, "components": {"schemas": {"Langs": {"type": "string", "enum": ["alq", "alq-ipa", "atj", "atj-ipa", "ckt", "ckt-ipa", "clc", "clc-doulos", "crg-dv", "crg-ipa", "crg-tmd", "crj", "crj-equiv", "crj-ipa", "crk", "crk-ipa", "crk-no-symbols", "crl", "crl-equiv", "crl-ipa", "crm", "crm-equiv", "crm-ipa", "crx-sro", "crx-syl", "csw", "csw-equiv", "csw-ipa", "ctp", "ctp-ipa", "dan", "dan-ipa", "eng-arpabet", "eng-ipa", "fn-unicode", "fn-unicode-font", "fra", "fra-ipa", "git", "git-apa", "git-equiv", "git-ipa", "gla", "gla-ipa", "gwi", "gwi-equiv", "gwi-ipa", "haa", "haa-equiv", "haa-ipa", "haa-simp", "hei", "hei-doulos", "hei-times-font", "ikt", "ikt-ipa", "iku", "iku-equiv", "iku-ipa", "iku-sro", "iku-sro-ipa", "ipa", "kkz", "kkz-ipa", "kwk-boas", "kwk-ipa", "kwk-napa", "kwk-napa-ubc", "kwk-napa-ubc-con", "kwk-napa-uvic", "kwk-napa-uvic-con", "kwk-umista", "kwk-umista-con", "lml", "lml-ipa", "mic", "mic-ipa", "moh", "moh-equiv", "moh-festival", "moh-ipa", "nav", "nav-times-font", "oji", "oji-ipa", "oji-syl", "see", "see-ipa", "srs", "srs-ipa", "str", "str-equiv", "str-ipa", "tau", "tau-equiv", "tau-ipa", "tce", "tce-equiv", "tce-ipa", "tgx", "tgx-ipa", "tli", "tli-equiv", "tli-ipa", "ttm", "ttm-equiv", "ttm-ipa", "und", "und-ipa", "win", "win-ipa"]}}}} \ No newline at end of file +{"openapi": "3.0.0", "info": {"title": "Simple G2P API", "description": "A simple API for the G2P module", "contact": {"email": "hello@aidanpine.ca"}, "license": {"name": "MIT", "url": "https://github.com/roedoejet/g2p/blob/master/LICENSE"}, "version": "1.0.0"}, "servers": [{"url": "https://g2p-studio.herokuapp.com/api/v1", "description": "G2P Studio Online"}, {"url": "https://localhost:5000/api/v1", "description": "Local Host"}], "tags": [{"name": "ancestors", "description": "Find which mappings can convert to a given node"}, {"name": "descendants", "description": "Find which mappings can be converted to from a given node"}, {"name": "g2p", "description": "Transduced, g2p'ed forms"}, {"name": "langs", "description": "Languages/mappings available for G2P"}], "paths": {"/ancestors/{node}": {"get": {"tags": ["ancestors"], "summary": "get all ancestors of node", "description": "Get the valid ancestors in the network's path to a given node. These are all the mappings that you can convert *from* in order to get the given node.\n", "operationId": "getAncestors", "parameters": [{"name": "node", "in": "path", "description": "language node name", "required": true, "style": "simple", "explode": false, "schema": {"$ref": "#/components/schemas/Langs"}}], "responses": {"200": {"description": "The valid ancestors of a node", "content": {"application/json": {"schema": {"type": "array", "items": {"type": "string"}}}}}, "404": {"description": "No node with that name."}}}}, "/descendants{node}": {"get": {"tags": ["descendants"], "summary": "get all descendants of node", "description": "Get the valid descendants in the network's path to a given node. These are all the mappings that you can convert *to* from the given node.\n", "operationId": "getDescendants", "parameters": [{"name": "node", "in": "path", "description": "language node name", "required": true, "style": "simple", "explode": false, "schema": {"$ref": "#/components/schemas/Langs"}}], "responses": {"200": {"description": "The valid descendants of a node", "content": {"application/json": {"schema": {"type": "array", "items": {"type": "string"}}}}}, "404": {"description": "No node with that name."}}}}, "/langs": {"get": {"tags": ["langs"], "summary": "find all possible languages in g2p", "description": "By passing in the appropriate options, you can find available mappings\n", "operationId": "searchTable", "responses": {"200": {"description": "search results matching criteria", "content": {"application/json": {"schema": {"type": "array", "items": {"$ref": "#/components/schemas/Langs"}}}}}}}}, "/g2p": {"get": {"tags": ["g2p"], "summary": "get g2p'ed form", "description": "Get the converted version of a string, given an input and output lang\n", "operationId": "convertString", "parameters": [{"name": "in-lang", "in": "query", "description": "input lang of string", "required": true, "style": "form", "explode": true, "schema": {"$ref": "#/components/schemas/Langs"}}, {"name": "out-lang", "in": "query", "description": "output lang of string", "required": true, "style": "form", "explode": true, "schema": {"$ref": "#/components/schemas/Langs"}}, {"name": "text", "in": "query", "description": "string to convert", "required": true, "schema": {"type": "string"}}], "responses": {"200": {"description": "The converted text", "content": {"application/json": {"schema": {"type": "array", "items": {"type": "string"}}}}}, "400": {"description": "There is no path between the in-lang and out-lang"}, "404": {"description": "Either the in-lang or out-lang aren't valid Langs"}}}}}, "components": {"schemas": {"Langs": {"type": "string", "enum": ["alq", "alq-ipa", "atj", "atj-ipa", "ckt", "ckt-ipa", "clc", "clc-doulos", "crg-dv", "crg-ipa", "crg-tmd", "crj", "crj-equiv", "crj-ipa", "crk", "crk-ipa", "crk-no-symbols", "crl", "crl-equiv", "crl-ipa", "crm", "crm-equiv", "crm-ipa", "crx-sro", "crx-syl", "csw", "csw-equiv", "csw-ipa", "ctp", "ctp-ipa", "dan", "dan-ipa", "eng-arpabet", "eng-ipa", "fn-unicode", "fn-unicode-font", "fra", "fra-ipa", "git", "git-apa", "git-equiv", "git-ipa", "gla", "gla-ipa", "gwi", "gwi-equiv", "gwi-ipa", "haa", "haa-equiv", "haa-ipa", "hei", "hei-doulos", "hei-times-font", "ikt", "ikt-ipa", "iku", "iku-equiv", "iku-ipa", "iku-sro", "iku-sro-ipa", "ipa", "kkz", "kkz-ipa", "kwk-boas", "kwk-ipa", "kwk-napa", "kwk-napa-ubc", "kwk-napa-ubc-con", "kwk-napa-uvic", "kwk-napa-uvic-con", "kwk-umista", "kwk-umista-con", "lml", "lml-ipa", "mic", "mic-ipa", "moh", "moh-equiv", "moh-festival", "moh-ipa", "nav", "nav-times-font", "oji", "oji-ipa", "oji-syl", "see", "see-ipa", "srs", "srs-ipa", "str", "str-equiv", "str-ipa", "tau", "tau-equiv", "tau-ipa", "tce", "tce-equiv", "tce-ipa", "tgx", "tgx-ipa", "tli", "tli-equiv", "tli-ipa", "ttm", "ttm-equiv", "ttm-ipa", "und", "und-ascii", "und-ipa", "win", "win-ipa"]}}}} \ No newline at end of file diff --git a/g2p/tests/public/data/haa.csv b/g2p/tests/public/data/haa.csv index 897c087a..d29adb92 100644 --- a/g2p/tests/public/data/haa.csv +++ b/g2p/tests/public/data/haa.csv @@ -1,4 +1,5 @@ -haa,haa-ipa,tth’ą̂ą̈̌,tθʼæ̃ɑ̃ +haa,haa-ipa,tth’ą̂ą̈̌,tθʼæ̃̂ɑ̃̌ +haa,eng-ipa,tth’ą̂ą̈̌,tθæ̃ɑ̃ haa,haa-ipa,nhdhthtthddhwhshzhchch’yhkhghhʼ,n̥ðθtθʰtθw̥ʃʒtʃʰtʃʼj̊xɣhʔ haa,haa-ipa,nddh,ntθ haa,haa-ipa,njonjkënjp,ŋoⁿkkʁəⁿkpʰ @@ -6,6 +7,9 @@ haa,haa-ipa,zrhzrsrtrdrtr’r,zɻ̊ʐʂʈʂʰʈʂʈʂʼɻ haa,haa-ipa,chshzhjchʼyhj̱,tʃʰʃʒtʃtʃʼj̊dʒ haa,haa-ipa,eyewayawoyoeëäwä,eieoæiæooioeəaoɑ haa,haa-ipa,ḏgʼtldltl’lłh,dkʔtɬʰtɬtɬʼlɬh -haa,haa-ipa,ywwhbpmbnszë̀ù,jww̥ppʰᵐpnszəu -haa,haa-ipa,kěwgǒy,kʁeoɡʁoi -haa,haa-ipa,ą̈̀wlį̌w,ãoɬɮĩu +haa,haa-ipa,ywwhbpmbnszë̀ù,jww̥ppʰᵐpnszə̀ù +haa,eng-ipa,ywwhbpmbnszë̀ù,jwwppmpnszəu +haa,haa-ipa,kěwgǒy,kʁěoɡʁǒi +haa,eng-ipa,kěwgǒy,kʒɛɔɡʒɔi +haa,haa-ipa,ą̈̀wlį̌w,ã̀oɬɮĩ̌u +haa,haa-ipa,į̌ǫ̀,ĩ̌õ̀ diff --git a/g2p/tests/public/data/tau.psv b/g2p/tests/public/data/tau.psv index 2b65643c..0780db80 100644 --- a/g2p/tests/public/data/tau.psv +++ b/g2p/tests/public/data/tau.psv @@ -4,9 +4,9 @@ tau|tau-ipa|maasee'|maːseːʔ tau|tau-ipa|betlanh|bɛtɬan̥ tau|tau-ipa|do'eent'aa|tɔʔeːntʼaː tau|tau-ipa|aaeeooiiuuioiaea|aːeːoːiːuːioiaea -tau|tau-ipa|ąąęęįįǫǫųų|aːeːiːoːuː -tau|tau-ipa|ąęįųǫ|aɛiuɔ -tau|tau-ipa|àìùèò|aiuɛɔ +tau|tau-ipa|ąąęęįįǫǫųų|ãːẽːĩːõːũː +tau|tau-ipa|ąęįųǫ|ãɛ̃ĩũɔ̃ +tau|tau-ipa|àìùèò|àìùɛ̀ɔ̀ tau|tau-ipa|aäüüü|aʌɘːɘ tau|tau-ipa|thtth'tthht't|θtθʼtθhtʼt tau|tau-ipa|ddhdh|tθð @@ -17,5 +17,5 @@ tau|tau-ipa|shchjch'|ʃtʃtʃtʃʼ tau|tau-ipa|shyyyh|ʃʲjj̊ tau|tau-ipa|kgk'gnx|kkkʼŋx tau|tau-ipa|h'|hʔ -tau|eng-ipa|do'eent'aa àìùèò ąęįųǫ h' kgk'gnx|tɔʔeːnteː æiuɛɔ æɛiuɔ hʔ kkkŋk -tau|eng-arpabet|do'eent'aa àìùèò ąęįųǫ h' kgk'gnx|T AO HH EY N T EY AE IY UW EH AO AE EH IY UW AO HH HH K K K NG K +tau|eng-ipa|do'eent'aa àìùèò ąęįųǫ h' kgk'gnx|tɔʔeːnteː æiuɛɔ æ̃ɛ̃ĩũɔ̃ hʔ kkkŋk +tau|eng-arpabet|do'eent'aa àìùèò ąęįųǫ h' kgk'gnx|T AO HH EY N T EY AE IY UW EH AO AE N EH N IY N UW N AO N HH HH K K K NG K diff --git a/g2p/tests/public/data/tli.csv b/g2p/tests/public/data/tli.csv index 377c61d4..b6469014 100644 --- a/g2p/tests/public/data/tli.csv +++ b/g2p/tests/public/data/tli.csv @@ -37,3 +37,8 @@ tli,tli-ipa,𝚕̲,l tli,tli-ipa,w𝚘̲,wo tli,tli-ipa,ÿ,ɰ tli,tli-ipa,tâch,tʰatʃ +tli,eng-ipa,tâcha,tætʃæ +tli,eng-ipa,ch'𝚘̲o,tʃoː +tli,eng-arpabet,ch'𝚘̲o,CH OW +tli,eng-arpabet,tsaa,T S EY +tli,eng-ipa,x',k \ No newline at end of file diff --git a/g2p/tests/run.py b/g2p/tests/run.py index 7faf9e9a..8f06e21a 100755 --- a/g2p/tests/run.py +++ b/g2p/tests/run.py @@ -5,26 +5,29 @@ import os import sys -from unittest import TestLoader, TextTestRunner, TestSuite +from unittest import TestLoader, TestSuite, TextTestRunner # Unit tests from g2p.log import LOGGER +from g2p.tests.test_api_resources import ResourceIntegrationTest +from g2p.tests.test_check_ipa_arpabet import CheckIpaArpabetTest +from g2p.tests.test_cli import CliTest from g2p.tests.test_create_mapping import MappingCreationTest -from g2p.tests.test_mappings import MappingTest -from g2p.tests.test_network import NetworkTest +from g2p.tests.test_doctor import DoctorTest +from g2p.tests.test_fallback import FallbackTest from g2p.tests.test_indices import IndicesTest from g2p.tests.test_langs import LangTest -from g2p.tests.test_transducer import TransducerTest -from g2p.tests.test_cli import CliTest -from g2p.tests.test_utils import UtilsTest -from g2p.tests.test_fallback import FallbackTest -from g2p.tests.test_api_resources import ResourceIntegrationTest +from g2p.tests.test_mappings import MappingTest +from g2p.tests.test_network import NetworkTest from g2p.tests.test_studio import StudioTest -from g2p.tests.test_doctor import DoctorTest -from g2p.tests.test_tokenizer import TokenizerTest from g2p.tests.test_tokenize_and_map import TokenizeAndMapTest -from g2p.tests.test_check_ipa_arpabet import CheckIpaArpabetTest +from g2p.tests.test_tokenizer import TokenizerTest +from g2p.tests.test_transducer import TransducerTest +from g2p.tests.test_unidecode_transducer import UnidecodeTransducerTest +from g2p.tests.test_utils import UtilsTest +# Deliberately left out: +# from g2p.tests.test_doctor_expensive import ExpensiveDoctorTest LOADER = TestLoader() @@ -32,7 +35,8 @@ LOADER.loadTestsFromTestCase(test) for test in [ IndicesTest, - TransducerTest + TransducerTest, + UnidecodeTransducerTest, ] ] diff --git a/g2p/tests/test_cli.py b/g2p/tests/test_cli.py index b5ee2b0e..0621ff94 100755 --- a/g2p/tests/test_cli.py +++ b/g2p/tests/test_cli.py @@ -17,7 +17,7 @@ def setUp(self): self.runner = APP.test_cli_runner() self.data_dir = os.path.dirname(data_dir) self.langs_to_test = [] - for fn in glob(f"{self.data_dir}/*.*sv"): + for fn in glob(os.path.join(self.data_dir, "*.*sv")): if fn.endswith("csv"): delimiter = "," elif fn.endswith("psv"): @@ -40,6 +40,7 @@ def test_update(self): self.assertEqual(result.exit_code, 0) def test_convert(self): + LOGGER.info(f"Running {len(self.langs_to_test)} g2p convert test cases found in public/data") error_count = 0 for tok_option in [["--tok", "--check"], ["--no-tok"]]: for test in self.langs_to_test: @@ -82,9 +83,13 @@ def test_doctor(self): self.assertEqual(result.exit_code, 0) self.assertIn("vagon", result.stdout) - result = self.runner.invoke(doctor) - self.assertEqual(result.exit_code, 0) - self.assertGreaterEqual(len(result.stdout), 10000) + # Disable this test: it's very slow (8s, just by itself) and does not assert + # anything useful. + # Migrated to test_doctor_expensive.py so we can still run it, manually or via + # ./run.py all. + # result = self.runner.invoke(doctor) + # self.assertEqual(result.exit_code, 0) + # self.assertGreaterEqual(len(result.stdout), 10000) result = self.runner.invoke(doctor, "-m eng-arpabet") self.assertEqual(result.exit_code, 0) @@ -103,7 +108,7 @@ def test_doctor_lists(self): def not_test_scan_fra(self): # TODO: fix fra g2p so fra_panagrams.txt passes - result = self.runner.invoke(scan, f"fra {self.data_dir}/fra_panagrams.txt") + result = self.runner.invoke(scan, ["fra", os.path.join(self.data_dir, "fra_panagrams.txt")]) self.assertEqual(result.exit_code, 0) self.assertLogs(level="WARNING") diacritics = "àâéèêëîïôùûüç" @@ -115,7 +120,7 @@ def not_test_scan_fra(self): def test_scan_fra_simple(self): # For now, unit test g2p scan using a simpler piece of French - result = self.runner.invoke(scan, f"fra {self.data_dir}/fra_simple.txt") + result = self.runner.invoke(scan, ["fra", os.path.join(self.data_dir, "fra_simple.txt")]) self.assertEqual(result.exit_code, 0) self.assertLogs(level="WARNING") diacritics = "àâéèêëîïôùûüç" @@ -126,7 +131,7 @@ def test_scan_fra_simple(self): self.assertIn(c, result.stdout) def test_scan_str_case(self): - result = self.runner.invoke(scan, f'str {self.data_dir}/str_un_human_rights.txt') + result = self.runner.invoke(scan, ["str", os.path.join(self.data_dir, "str_un_human_rights.txt")]) returned_set = re.search('{(.*)}', result.stdout).group(1) self.assertEqual(result.exit_code, 0) self.assertLogs(level='WARNING') diff --git a/g2p/tests/test_doctor.py b/g2p/tests/test_doctor.py index d8805a58..02c104ef 100755 --- a/g2p/tests/test_doctor.py +++ b/g2p/tests/test_doctor.py @@ -15,7 +15,12 @@ def test_ipa_known_segs_fra(self): self.assertIn("panphon", "".join(cm.output)) self.assertGreaterEqual(len(cm.output), 2) - def test_ipa_known_segs_all(self): + # this test takes 8 seconds and doesn't do anything useful: it trivially increases + # code coverage but does not have enough assertions to catch a future code-breaking + # change. + # Migrated to test_doctor_expensive.py so we can still run it, manually or via + # ./run.py all. + def not_test_ipa_known_segs_all(self): with self.assertLogs(LOGGER, level='WARNING') as cm: check_ipa_known_segs() self.assertGreaterEqual(len(cm.output), 20) diff --git a/g2p/tests/test_doctor_expensive.py b/g2p/tests/test_doctor_expensive.py new file mode 100755 index 00000000..fe86f886 --- /dev/null +++ b/g2p/tests/test_doctor_expensive.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 + +from unittest import TestCase, main + +from g2p.app import APP +from g2p.cli import doctor +from g2p.log import LOGGER +from g2p.mappings.langs.utils import check_ipa_known_segs + + +class ExpensiveDoctorTest(TestCase): + # We segragate the expensive tests for g2p doctor in this suite which is not included + # in dev, so that it doesn't slow down our Travis CI tests, but can still be run by + # hand when desired. + # These tests are not very good because they don't assert enough to make sure doctor + # actually works, but they still exercise the code. + # + # This test suite is deliberately left out of run.py: it will only get run if you run + # ./run.py all, or ./test_doctor_expensive.py. + + # Migrated here from test_cli.py + def test_doctor_cli(self): + # TODO: assert something more useful here... + # This test simulates calling "g2p doctor" on the command line with no arguments, + # which runs doctor on all mappings. + runner = APP.test_cli_runner() + result = runner.invoke(doctor) + self.assertEqual(result.exit_code, 0) + self.assertGreaterEqual(len(result.stdout), 10000) + + # Migrated here from test_doctor.py + def test_ipa_known_segs_all(self): + # This test simulates the innards of having called "g2p doctor" on the command + # line with no arguments, again running the innards of doctor on all mappings. + with self.assertLogs(LOGGER, level="WARNING") as cm: + check_ipa_known_segs() + self.assertGreaterEqual(len(cm.output), 20) + + +if __name__ == "__main__": + main() diff --git a/g2p/tests/test_tokenizer.py b/g2p/tests/test_tokenizer.py index dd16be61..63eec90e 100755 --- a/g2p/tests/test_tokenizer.py +++ b/g2p/tests/test_tokenizer.py @@ -96,8 +96,10 @@ def test_tokenize_kwk(self): ) def test_three_hop_tokenizer(self): - """ test the three hop tokenizer with haa -> haa-ipa via haa-equiv and haa-simp """ - tokenizer = tok.get_tokenizer("haa", tok_path=["haa", "haa-equiv", "haa-simp", "haa-ipa"]) + # This used to test the three hop tokenizer with haa -> haa-ipa via haa-equiv and haa-simp + # tokenizer = tok.get_tokenizer("haa", tok_path=["haa", "haa-equiv", "haa-simp", "haa-ipa"]) + # But now haa has been redesigned to not use haa-simp, so downgrade the test to two hops + tokenizer = tok.get_tokenizer("haa", tok_path=["haa", "haa-equiv", "haa-ipa"]) tokens = tokenizer.tokenize_text("ch'ch") self.assertEqual(len(tokens), 1) diff --git a/g2p/tests/test_unidecode_transducer.py b/g2p/tests/test_unidecode_transducer.py new file mode 100755 index 00000000..0f07022c --- /dev/null +++ b/g2p/tests/test_unidecode_transducer.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + +import os +from unittest import TestCase, main + +from g2p import make_g2p +from g2p.mappings import Mapping +from g2p.transducer import Transducer + + +class UnidecodeTransducerTest(TestCase): + def test_unidecode_mapping(self): + m = Mapping(type="unidecode") + self.assertEqual(m.mapping, []) + self.assertEqual(m.kwargs["type"], "unidecode") + t = Transducer(m) + tg = t("été Nunavut ᓄᓇᕗᑦ") + self.assertEqual(tg.output_string, "ete Nunavut nonafot") + + def test_unidecode_g2p(self): + transducer = make_g2p("und", "und-ascii") + tg = transducer("éçà") + self.assertEqual(tg.output_string, "eca") + self.assertEqual(tg.edges, [(0,0),(1,0),(2,1),(3,1),(4,2),(5,2)]) + + def test_unidecode_empty_output(self): + transducer = make_g2p("und", "und-ascii") + # \u0361 on its own gets deleted completely by unidecode + tg = transducer("\u0361") + self.assertEqual(tg.output_string, "") + self.assertEqual(tg.edges, []) + + def test_unidecode_to_arpabet(self): + transducer = make_g2p("und", "eng-arpabet") + tg = transducer("été Nunavut ᓄᓇᕗᑦ") + self.assertEqual(tg.output_string, "EY T EY N UW N AA V UW T N OW N AA F OW T ") + + +if __name__ == "__main__": + main() diff --git a/g2p/transducer/__init__.py b/g2p/transducer/__init__.py index 476e64d3..f19aa533 100644 --- a/g2p/transducer/__init__.py +++ b/g2p/transducer/__init__.py @@ -6,6 +6,7 @@ import re import copy +import text_unidecode from typing import Dict, List, Pattern, Tuple, Union from collections import defaultdict, OrderedDict from collections.abc import Iterable @@ -453,7 +454,42 @@ def update_default_indices(self, tg, match, intermediate_diff, out_string): if edge[1] != None and edge[1] > index_to_delete: tg.edges[i][1] -= 1 + def apply_unidecode(self, to_convert: str): + if self.norm_form: + to_convert = normalize(to_convert, self.norm_form) + tg = TransductionGraph(to_convert) + + # Conversion is done character by character using unidecode + converted = [ + text_unidecode.unidecode(c) + for c in to_convert + ] + tg.output_string = "".join(converted) + + # Edges are calculated to follow the conversion step by step + if tg.output_string == "": + # Some inputs get completely deleted by unidecode, in which case there are no + # valid edges to output. + tg.edges = [] + else: + edges = [] + x_len, y_len = 0, 0 + for tgt in converted: + if tgt: + for c in tgt: + edges.append((x_len, y_len)) + y_len += 1 + else: + edges.append((x_len, max(y_len-1, 0))) + x_len += 1 + tg.edges = edges + + return tg + def apply_rules(self, to_convert: str): + if self.mapping.kwargs.get("type", "") == "unidecode": + return self.apply_unidecode(to_convert) + # perform any normalization if not self.case_sensitive: to_convert = to_convert.lower() @@ -512,13 +548,14 @@ def apply_rules(self, to_convert: str): ) return tg - def check(self, tg: TransductionGraph, shallow=False, display_warnings=False): + def check(self, tg: TransductionGraph, shallow=False, display_warnings=False, original_input=None): out_lang = self.mapping.kwargs["out_lang"] if out_lang == "eng-arpabet": if not is_arpabet(tg.output_string): if display_warnings: + display_input = original_input if original_input else tg.input_string LOGGER.warning( - f'Transducer output "{tg.output_string}" is not fully valid eng-arpabet as recognized by soundswallower.' + f'Transducer output "{tg.output_string}" for input "{display_input}" is not fully valid eng-arpabet as recognized by soundswallower.' ) return False else: @@ -526,8 +563,9 @@ def check(self, tg: TransductionGraph, shallow=False, display_warnings=False): elif is_ipa(out_lang): if not is_panphon(tg.output_string, display_warnings=display_warnings): if display_warnings: + display_input = original_input if original_input else tg.input_string LOGGER.warning( - f'Transducer output "{tg.output_string}" is not fully valid {out_lang}.' + f'Transducer output "{tg.output_string}" for input "{display_input}" is not fully valid {out_lang}.' ) return False else: @@ -644,13 +682,17 @@ def check( assert len(self._transducers) == len(tg._tiers) if shallow: return self._transducers[-1].check( - tg._tiers[-1], display_warnings=display_warnings + tg._tiers[-1], + display_warnings=display_warnings, + original_input=tg.input_string ) else: result = True for i, transducer in enumerate(self._transducers): if not transducer.check( - tg._tiers[i], display_warnings=display_warnings + tg._tiers[i], + display_warnings=display_warnings, + original_input=tg.input_string ): # Don't short circuit if warnings are required if display_warnings: diff --git a/requirements.txt b/requirements.txt index 49207778..dae93086 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,16 +2,16 @@ openpyxl click coloredlogs<=14.0 eventlet==0.30.2 -flask-cors +flask-cors>=3.0.9 Flask>=2.0.0 flask_socketio==4.3.2 -flask-talisman -flask-restful +flask-talisman>=0.7.0 +flask-restful>=0.3.9 gunicorn networkx -panphon -pyyaml +panphon>=0.19 +pyyaml>=5.2 regex requests tqdm -unidecode +text_unidecode