-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathte.yaml
108 lines (95 loc) · 2.24 KB
/
te.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
base_config:
- ./base.yaml
raw_data_dir: 'data/raw/gtsinger'
processed_data_dir: data/processed/gtsinger
binary_data_dir: data/binary/gtsinger
ds_names_in_training: '' # ';' separated
ds_names_in_testing: '' # default: '', means all in testing
noise_data_dir: data/binary/musan_24k
binarizer_cls: research.singtech.config.te_binarizer.TechExtractionBinarizer
binarization_args:
with_wav: true
with_mel: false
with_breathiness: true
with_energy: true
with_zcr: true
task_cls: research.singtech.te_task.TETask
test_prefixes: [
# The item name of the test_prefixes should be the same as the name of the test_prefixes in the dataset.
]
audio_sample_rate: 24000
fft_size: 512 # Extra window size is filled with 0 paddings to match this parameter
win_size: 512
hop_size: 128
fmin: 30
fmax: 12000
dropout: 0.1
hidden_size: 256
###########
# model related params
###########
model: te
conformer_kernel: 9
updown_rates: 2-2-2-2
channel_multiples: 1-1-1-1
bkb_net: conformer # conv|conformer|transformer|wavenet
bkb_layers: 2
unet_skip_layer: false
unet_skip_scale: 0.7071067812 # 1 / sqrt(2)
###########
# note prediction related hparams
###########
# f0
f0_add_noise: gaussian:0.04 # default: gaussian:0.04, you can also use none
# pitch
pe: rmvpe # rmvpe|parselmouth|pw
pe_ckpt: /path/your/pe/ckpt
f0_bin: 256
f0_filepath: ''
use_pitch_embed: true
# tech
tech_num: 6
tech_threshold: 0.8
#tech_label_smoothing: 0.005
tech_temperature: 0.01
tech_attn_num_head: 4
tech_focal_loss: 5.0
# lambdas
lambda_tech: 1.0
lambdas_tech: '' # lambdas jointed with '-'
lambda_tech_focal: 3.0
label_pos_weight_decay: 0.95
###########
# noise
###########
noise_snr: 6-20
noise_prob: 0.8
noise_in_test: false
loud_norm: true
use_mel: true
use_mel_bins: 80
mel_add_noise: gaussian:0.05 # gaussian|none|musan
use_wav: false
use_f0: true
use_breathiness: false
use_energy: false
use_zcr: false
frames_multiple: 16
dataset_downsample_rate: 1.0
lr: 0.000001
scheduler: step_lr
scheduler_lr_step_size: 500
warmup_updates: 0
max_epochs: 1000
max_updates: 60000
max_tokens: 80000
max_sentences: 32
val_check_interval: 500
save_best: true
num_valid_plots: 10
num_valid_stats: 100
accumulate_grad_batches: 4
find_unused_parameters: true
pin_memory: true
ds_workers: 8
torch_compile: false