-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
121 lines (110 loc) · 5.27 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# DialogueGetter gets dialogues from russian texts,
# clean them and write down to a dataset file <.txt>
# for use in nlp projects.
import datetime
import os
from loguru import logger
from tkinter import ttk
from tkinter import *
from tkinter.ttk import *
from FileTreeOps.dir_preprocessor import pipeline
from FileTreeOps.files_preprocessor import merge_files_in_directory
from FileTreeOps.files_preprocessor import delete_files_except_one
from FileTreeOps.dir_preprocessor import delete_directory_n_all_subdirs
from TextOps.dialogue_extractor import extract_n_save_replicas
# vars
SRC_PATH = ''
DTSET_PATH = ''
# UI foos
def input_src():
# save a path to var
path = str_src.get()
path = path.strip()
# check if the dir exists
if os.path.isdir(path):
global SRC_PATH
# save path to global var
SRC_PATH = path
logger.info('Input directory found')
display_info_string_var.set('Current state: Work in progress')
else:
logger.error('Input directory not found')
display_info_string_var.set('Current state: Input directory not found')
def input_dtset():
# save input to var
path = str_dtset.get()
path = path.strip()
# check if the dir exists
if os.path.isdir(path):
global SRC_PATH
global DTSET_PATH
# save path to global var
DTSET_PATH = path
logger.info('Output directory found')
# pre-process files in src dir -> list of files ready to extract dialogues
pipeline(SRC_PATH)
# extract & save dialogues
extract_n_save_replicas(SRC_PATH, DTSET_PATH)
display_info_string_var.set('Current state: Work done')
else:
logger.error('Output directory not found')
display_info_string_var.set('Current state: Output directory not found')
def merge_files():
global DTSET_PATH
path_to_merge_files = DTSET_PATH
merge_files_in_directory(path_to_merge_files)
display_info_string_var.set('Current state: All datasets are merged into all_in_one_dataset_file')
# delete autogenerated tmp files
def delete_tmp_files():
global SRC_PATH
global DTSET_PATH
# delete tmp dir in src dir
src_dir = SRC_PATH
name_of_dir_to_delete = "automatically_created_copies_of_utf_encoded_files"
del_dir_pth = os.path.join(src_dir, name_of_dir_to_delete)
delete_directory_n_all_subdirs(del_dir_pth)
# delete tmp files
dtset_dir = DTSET_PATH
file_to_keep = 'all_in_one_dataset_file.txt'
delete_files_except_one(dtset_dir, file_to_keep)
display_info_string_var.set('Current state: All tmp dirs and files are deleted')
if __name__ == '__main__':
# Open main window
root = Tk()
root.title("Dialogues Getter RU")
root.geometry('615x432')
root.resizable(False, False)
# Enter paths widgets
heart_label = Label(root, text='. .. ... ❦ .... .....', font=("Helvetica", "27"))
heart_label.grid(column=0, row=0, columnspan=3, pady=10)
info_label = Label(root, text='At first copy all < .txt > source-files in a separate directory of your computer.', font=("Helvetica", "12"))
info_label.grid(column=0, row=1, columnspan=3, sticky=W, pady=10, padx=5)
entry_label_src = Label(root, text="Then enter path to the directory where the source-files were copied to:", font=("Helvetica", "12"))
entry_label_src.grid(column=0, row=2, columnspan=3, sticky=W, pady=10, padx=5)
str_src = StringVar()
entry_src = Entry(root, width=63, textvariable=str_src)
entry_src.grid(column=0, row=3, columnspan=2, sticky=W, padx=5)
path_src = Button(root, text="OK", command=lambda:input_src())
path_src.grid(column=2, row=3, sticky=E)
entry_label_dtset = Label(root, text="Enter path to directory where you would like to put the datasets that will be created:", font=("Helvetica", "12"))
entry_label_dtset.grid(column=0, row=4, columnspan=3, sticky=W, pady=10, padx=5)
str_dtset = StringVar()
entry_dtset = Entry(root, width=63, textvariable=str_dtset)
entry_dtset.grid(column=0, row=5, columnspan=2, sticky=W, padx=5)
path_dtset = Button(root, text="GO", command=lambda:input_dtset())
path_dtset.grid(column=2, row=5, sticky=E)
display_info_string_var = StringVar()
display_info_string_var.set('Current state: Program is idle. Waiting for paths data.')
display_info_label = Label(root, textvariable=display_info_string_var, font="Helvetica 12", foreground="red")
display_info_label.grid(column=0, row=6, columnspan=3, sticky=W, pady=10, padx=5)
# Create an instance of ttk Style Object to change font in a button
style = Style()
merge_files_btn = Button(root, text="Merge all datasets created in output directory into all-in-one big dataset file", style="big.TButton", command=lambda:merge_files())
merge_files_btn.grid(column=0, row=7, columnspan=3, sticky=EW, pady=5, padx=5)
del_files_btn = Button(root, text="Delete all automatically generated temporary files", style="big.TButton", command=lambda:delete_tmp_files())
del_files_btn.grid(column=0, row=8, columnspan=3, sticky=EW, pady=5, padx=5)
style.configure('big.TButton', font=('Helvetica', 12))
heart_label_end = Label(root, text='. .. ... ❦ .... .....', font=("Helvetica", "27"))
heart_label_end.grid(column=0, row=9, columnspan=3, pady=15)
# Execute Tkinter
root.mainloop()