-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathseparasilabas.py
70 lines (59 loc) · 2.43 KB
/
separasilabas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python3
# Based on Mabodo's ipython notebook (https://github.com/mabodo/sibilizador)
# (c) Mabodo
class char():
def __init__(self):
pass
class char_line():
def __init__(self, word):
self.word = word
self.char_line = [(char, self.char_type(char)) for char in word]
self.type_line = ''.join(chartype for char, chartype in self.char_line)
def char_type(self, char):
if char in set(['a', 'á', 'e', 'é','o', 'ó', 'í', 'ú']):
return 'V' #strong vowel
if char in set(['i', 'u', 'ü']):
return 'v' #week vowel
if char=='x':
return 'x'
if char=='s':
return 's'
else:
return 'c'
def find(self, finder):
return self.type_line.find(finder)
def split(self, pos, where):
return char_line(self.word[0:pos+where]), char_line(self.word[pos+where:])
def split_by(self, finder, where):
split_point = self.find(finder)
if split_point!=-1:
chl1, chl2 = self.split(split_point, where)
return chl1, chl2
return self, False
def __str__(self):
return self.word
def __repr__(self):
return repr(self.word)
class silabizer():
def __init__(self):
self.grammar = []
def split(self, chars):
rules = [('VV',1), ('cccc',2), ('xcc',1), ('ccx',2), ('csc',2), ('xc',1), ('cc',1), ('vcc',2), ('Vcc',2), ('sc',1), ('cs',1),('Vc',1), ('vc',1), ('Vs',1), ('vs',1)]
for split_rule, where in rules:
first, second = chars.split_by(split_rule,where)
if second:
if first.type_line in set(['c','s','x','cs']) or second.type_line in set(['c','s','x','cs']):
#print 'skip1', first.word, second.word, split_rule, chars.type_line
continue
if first.type_line[-1]=='c' and second.word[0] in set(['l','r']):
continue
if first.word[-1]=='l' and second.word[-1]=='l':
continue
if first.word[-1]=='r' and second.word[-1]=='r':
continue
if first.word[-1]=='c' and second.word[-1]=='h':
continue
return self.split(first)+self.split(second)
return [chars]
def __call__(self, word):
return self.split(char_line(word))