-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathpreprocess.py
29 lines (26 loc) · 1.01 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import os
import hydra
import librosa
import utils
from os.path import expanduser, exists, basename, join
from utils import read_filelist, write_filelist, find_all_files
from tqdm import tqdm
@hydra.main(version_base=None, config_path='config', config_name='default')
def preprocess(cfg):
os.makedirs('filelists', exist_ok=True)
# train
root = cfg.preprocess.datasets.LibriSpeech.root_val
root = expanduser(root)
trainfiles = []
print(f'Root: {root}')
for subset in cfg.preprocess.datasets.LibriSpeech.testsets:
files = find_all_files(join(root, subset), '.flac')
print(f'Found {len(files)} flac files in {subset}')
for i in range(len(files)):
files[i][1] = files[i][1].replace(root, '').lstrip('/')
trainfiles.extend(files)
print(f'Write train filelist to {cfg.preprocess.view.test_filelist}')
os.makedirs('filelists', exist_ok=True)
utils.write_filelist(trainfiles, cfg.preprocess.view.test_filelist)
if __name__ == '__main__':
preprocess()