-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
51 lines (35 loc) · 1.75 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from embeddings.functions import corpus2plainfile, proc_phrases, train_wordvectors, eval_embeddings, t_sne_scatterplot
import argparse
parser = argparse.ArgumentParser(description='Process tweets containing Maori loanwords and train embeddings')
parser.add_argument('--proc_corpus', action='store_true',
help='Process corpus')
parser.add_argument('--corpus_path', type=str, default="data/rawtweets",
help='Path to the corpus folder',
metavar='')
parser.add_argument('--tweet_outfile', type=str, default="all_tweets.txt",
help='Name of processed corpus',
metavar='')
parser.add_argument('--proc_phrases', action='store_true',
help='Extract phrases')
parser.add_argument('--phrase_outfile', type=str, default="all_tweets_phrases.txt",
help='Name of processed corpus after detecting phrases',
metavar='')
parser.add_argument('--train_wordvectors', action='store_true',
help='Train word vectors')
parser.add_argument('--eval_wordvectors', action='store_true',
help='Evaluate word vectors')
parser.add_argument('--plot_wordvectors', action='store_true',
help='Plot word vectors')
args = parser.parse_args()
if args.proc_corpus:
corpus2plainfile(args.corpus_path,args.tweet_outfile)
if args.proc_phrases:
proc_phrases(args.tweet_outfile,args.phrase_outfile)
if args.train_wordvectors:
train_wordvectors('all_tweets_phrases.txt','experiments')
if args.eval_wordvectors:
eval_embeddings('experiments','data/gold_pairs.csv','emb_res.csv')
if args.plot_wordvectors:
t_sne_scatterplot()