Skip to content

Commit

Permalink
[Sync] Add gujarati vocab
Browse files Browse the repository at this point in the history
  • Loading branch information
felixdittrich92 committed Jan 20, 2025
1 parent 72d2a59 commit 811fc39
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ repos:
- id: no-commit-to-branch
args: ['--branch', 'main']
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.4
rev: v0.9.2
hooks:
- id: ruff
args: [ --fix ]
Expand Down
11 changes: 11 additions & 0 deletions onnxtr/utils/vocabs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
"hindi_letters": "अआइईउऊऋॠऌॡएऐओऔंःकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसह",
"hindi_digits": "०१२३४५६७८९",
"hindi_punctuation": "।,?!:्ॐ॰॥",
"gujarati_vowels": "અઆઇઈઉઊઋએઐઓ",
"gujarati_consonants": "ખગઘચછજઝઞટઠડઢણતથદધનપફબભમયરલવશસહળક્ષ",
"gujarati_digits": "૦૧૨૩૪૫૬૭૮૯",
"gujarati_punctuation": "૰ઽ◌ંઃ॥ૐ઼ઁ" + "૱",
"bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ",
"bangla_digits": "০১২৩৪৫৬৭৮৯",
"generic_cyrillic_letters": "абвгдежзийклмнопрстуфхцчшщьюяАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЮЯ",
Expand Down Expand Up @@ -58,6 +62,13 @@
)
VOCABS["hebrew"] = VOCABS["english"] + "אבגדהוזחטיכלמנסעפצקרשת" + "₪"
VOCABS["hindi"] = VOCABS["hindi_letters"] + VOCABS["hindi_digits"] + VOCABS["hindi_punctuation"]
VOCABS["gujarati"] = (
VOCABS["gujarati_vowels"]
+ VOCABS["gujarati_consonants"]
+ VOCABS["gujarati_digits"]
+ VOCABS["gujarati_punctuation"]
+ VOCABS["punctuation"]
)
VOCABS["bangla"] = VOCABS["bangla_letters"] + VOCABS["bangla_digits"]
VOCABS["ukrainian"] = (
VOCABS["generic_cyrillic_letters"] + VOCABS["digits"] + VOCABS["punctuation"] + VOCABS["currency"] + "ґіїєҐІЇЄ₴"
Expand Down

0 comments on commit 811fc39

Please sign in to comment.