-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgeneration.py
81 lines (68 loc) · 3.11 KB
/
generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""
## This module allows you to **build** the matrix from the processed dictionary and **generate** words.
"""
from random import choices
from itertools import product
###########################################
# N dimensions matrix, N-1 letters before #
###########################################
separator_list = [' ','\t','-','_',',',';',':','|']
def find_separator(alphabet):
"""
`find_separator()` gets the first char in the list above that is not in the alphabet, if no such character exists, an exception is raised.
* **alphabet** (*list*): the used alphabet (from input file or from dictionary)
* **return** (*char*): the first separator that is not in the alphabet
"""
for s in separator_list:
if s not in alphabet:
return s
raise Exception(f"no separator available: all characters in {separator_list} are in the alphabet, maybe try to add one manually in the code")
def build_ND_matrix(dictionary, alphabet, N):
"""
`build_ND_matrix()` initiate and fill a N dimension matrix (dict of dict object) by browsing the dictionary.
* **dictionary** (*list*): the input dictionary (after processing)
* **alphabet** (*list*): the used alphabet (from input file or from dictionary)
* **N** (*int*): the dimension of the matrix
* **return** (*dict*): the matrix representing the probability of letter chaining each other
"""
separator = alphabet[-1]
# initiate the matrix
matrix = dict()
for i in product(alphabet, repeat=N-1):
index = ''.join(i)
matrix[index] = dict()
for l in alphabet:
matrix[index][l] = 0
# fill matrix with dictionary
for word in dictionary:
previous_letters = (N-1)*separator
for current_letter in word:
matrix[previous_letters][current_letter]+=1
previous_letters = previous_letters[1:] + current_letter
for i in range (1,N):
matrix[previous_letters][separator]+=1
previous_letters = previous_letters[1:] + separator
return matrix
def generate_word_ND(matrix, alphabet, prefix, N):
"""
`generate_word_ND()` generates a word used the `random.choices()` method uppon the ND matrix in the last letter column.
* **matrix** (*dict*): the matrix representing the probability of letter chaining each other
* **alphabet** (*list*): the used alphabet (from input file or from dictionary)
* **prefix** (*str*): the prefix requested for the generated words
* **N** (*int*): the dimension of the matrix
* **return** (*str*): the generated word (length variable)
"""
separator = alphabet[-1]
previous_letters = (N-1)*separator
if len(prefix) < N:
previous_letters = previous_letters[len(prefix):] + prefix
else:
previous_letters = prefix[len(prefix)-N+1:]
word = prefix
new_letter = None
while new_letter != separator:
new_letter = choices(population=alphabet, weights=matrix[previous_letters].values(), k=1)[0]
if new_letter != separator:
word = word+new_letter
previous_letters = previous_letters[1:] + new_letter
return (word)