-
Notifications
You must be signed in to change notification settings - Fork 52
/
Copy pathcc_markov.py
83 lines (69 loc) · 2.37 KB
/
cc_markov.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import re
import random
from collections import defaultdict, deque
"""
Codecademy Pro Final Project supplementary code
Markov Chain generator
This is a text generator that uses Markov Chains to generate text
using a uniform distribution.
num_key_words is the number of words that compose a key (suggested: 2 or 3)
"""
class MarkovChain:
def __init__(self, num_key_words=2):
self.num_key_words = num_key_words
self.lookup_dict = defaultdict(list)
self._punctuation_regex = re.compile('[,.!;\?\:\-\[\]\n]+')
self._seeded = False
self.__seed_me()
def __seed_me(self, rand_seed=None):
if self._seeded is not True:
try:
if rand_seed is not None:
random.seed(rand_seed)
else:
random.seed()
self._seeded = True
except NotImplementedError:
self._seeded = False
"""
" Build Markov Chain from data source.
" Use add_file() or add_string() to add the appropriate format source
"""
def add_file(self, file_path):
content = ''
with open(file_path, 'r') as fh:
self.__add_source_data(fh.read())
def add_string(self, str):
self.__add_source_data(str)
def __add_source_data(self, str):
clean_str = self._punctuation_regex.sub(' ', str).lower()
tuples = self.__generate_tuple_keys(clean_str.split())
for t in tuples:
self.lookup_dict[t[0]].append(t[1])
def __generate_tuple_keys(self, data):
if len(data) < self.num_key_words:
return
for i in xrange(len(data) - self.num_key_words):
yield [ tuple(data[i:i+self.num_key_words]), data[i+self.num_key_words] ]
"""
" Generates text based on the data the Markov Chain contains
" max_length is the maximum number of words to generate
"""
def generate_text(self, max_length=20):
context = deque()
output = []
if len(self.lookup_dict) > 0:
self.__seed_me(rand_seed=len(self.lookup_dict))
idx = random.randint(0, len(self.lookup_dict)-1)
chain_head = list(self.lookup_dict.keys()[idx])
context.extend(chain_head)
while len(output) < (max_length - self.num_key_words):
next_choices = self.lookup_dict[tuple(context)]
if len(next_choices) > 0:
next_word = random.choice(next_choices)
context.append(next_word)
output.append(context.popleft())
else:
break
output.extend(list(context))
return output