diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5b283e9..56d7169 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ repos: - id: no-commit-to-branch args: ['--branch', 'main'] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.4 + rev: v0.9.2 hooks: - id: ruff args: [ --fix ] diff --git a/onnxtr/utils/vocabs.py b/onnxtr/utils/vocabs.py index c1941f7..e51fc83 100644 --- a/onnxtr/utils/vocabs.py +++ b/onnxtr/utils/vocabs.py @@ -22,6 +22,10 @@ "hindi_letters": "अआइईउऊऋॠऌॡएऐओऔंःकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसह", "hindi_digits": "०१२३४५६७८९", "hindi_punctuation": "।,?!:्ॐ॰॥", + "gujarati_vowels": "અઆઇઈઉઊઋએઐઓ", + "gujarati_consonants": "ખગઘચછજઝઞટઠડઢણતથદધનપફબભમયરલવશસહળક્ષ", + "gujarati_digits": "૦૧૨૩૪૫૬૭૮૯", + "gujarati_punctuation": "૰ઽ◌ંઃ॥ૐ઼ઁ" + "૱", "bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ", "bangla_digits": "০১২৩৪৫৬৭৮৯", "generic_cyrillic_letters": "абвгдежзийклмнопрстуфхцчшщьюяАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЮЯ", @@ -58,6 +62,13 @@ ) VOCABS["hebrew"] = VOCABS["english"] + "אבגדהוזחטיכלמנסעפצקרשת" + "₪" VOCABS["hindi"] = VOCABS["hindi_letters"] + VOCABS["hindi_digits"] + VOCABS["hindi_punctuation"] +VOCABS["gujarati"] = ( + VOCABS["gujarati_vowels"] + + VOCABS["gujarati_consonants"] + + VOCABS["gujarati_digits"] + + VOCABS["gujarati_punctuation"] + + VOCABS["punctuation"] +) VOCABS["bangla"] = VOCABS["bangla_letters"] + VOCABS["bangla_digits"] VOCABS["ukrainian"] = ( VOCABS["generic_cyrillic_letters"] + VOCABS["digits"] + VOCABS["punctuation"] + VOCABS["currency"] + "ґіїєҐІЇЄ₴"