-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil.py
36 lines (31 loc) · 828 Bytes
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import re
from nltk.corpus import stopwords
def clean_not_english_word(x):
if x is not None:
x = re.sub("[^a-zA-Z.']+",' ',x).strip()
return x
else:
return None
def clean_stopwords(x):
stopwords_list = stopwords.words('english')
if x is not None:
word_list = []
for word in x.split(' '):
if word not in stopwords_list:
word_list.append(word)
new_sentence = ' '.join(word_list)
return new_sentence
else:
return None
def clean_special_word(x):
if x is not None:
x = re.sub("[()]+",' ',x).strip()
return x
else:
return None
def clean_comma(x):
if x is not None:
x = re.sub("[,。]+",' ',x).strip()
return x
else:
return None