Skip to content
This repository has been archived by the owner on Oct 13, 2022. It is now read-only.

Sync snowfall with Lhotse's dataset/sampler refactoring PR #95

Merged
merged 2 commits into from
Feb 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 8 additions & 12 deletions egs/aishell/asr/simple_v1/ctc_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,24 @@
# Copyright (c) 2020 Xiaomi Corporation (authors: Daniel Povey, Haowen Qiu)
# Apache 2.0

import k2
import logging
import os
import torch
from k2 import Fsa, SymbolTable
from kaldialign import edit_distance
from pathlib import Path
from typing import List
from typing import Optional
from typing import Union

import k2
import torch
from k2 import Fsa, SymbolTable
from kaldialign import edit_distance
from lhotse import CutSet
from lhotse.dataset.speech_recognition import K2SpeechRecognitionIterableDataset

from lhotse.dataset import K2SpeechRecognitionDataset, SingleCutSampler
from snowfall.common import get_phone_symbols
from snowfall.common import load_checkpoint
from snowfall.common import setup_logger
from snowfall.decoding.graph import compile_LG
from snowfall.models import AcousticModel
from snowfall.models.tdnn import Tdnn1a
from snowfall.models.tdnn_lstm import TdnnLstm1b
from snowfall.training.ctc_graph import build_ctc_topo

Expand Down Expand Up @@ -168,12 +166,10 @@ def main():
cuts_test = CutSet.from_json(feature_dir / 'cuts_test.json.gz')

print("About to create test dataset")
test = K2SpeechRecognitionIterableDataset(cuts_test,
max_frames=100000,
shuffle=False,
concat_cuts=False)
test = K2SpeechRecognitionDataset(cuts_test)
sampler = SingleCutSampler(cuts_test, max_frames=100000)
print("About to create test dataloader")
test_dl = torch.utils.data.DataLoader(test, batch_size=None, num_workers=1)
test_dl = torch.utils.data.DataLoader(test, batch_size=None, sampler=sampler, num_workers=1)

# if not torch.cuda.is_available():
# logging.error('No GPU detected!')
Expand Down
72 changes: 42 additions & 30 deletions egs/aishell/asr/simple_v1/ctc_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,28 @@
# Copyright (c) 2020 Xiaomi Corporation (authors: Daniel Povey, Haowen Qiu)
# Apache 2.0

import k2
import logging
import math
import numpy as np
import os
import sys
from datetime import datetime
from pathlib import Path
from typing import Dict, Optional, Tuple

import k2
import numpy as np
import torch
import torch.optim as optim

from lhotse import CutSet
from lhotse.dataset.speech_recognition import K2SpeechRecognitionIterableDataset
from lhotse.utils import fix_random_seed

from datetime import datetime
from pathlib import Path
from torch import nn
from torch.nn.utils import clip_grad_value_
from torch.utils.tensorboard import SummaryWriter
from typing import Dict, Optional, Tuple

from snowfall.common import save_checkpoint, load_checkpoint
from lhotse import CutSet
from lhotse.dataset import CutConcatenate, CutMix, K2SpeechRecognitionDataset, SingleCutSampler
from lhotse.utils import fix_random_seed
from snowfall.common import get_phone_symbols
from snowfall.common import load_checkpoint, save_checkpoint
from snowfall.common import save_training_info
from snowfall.common import setup_logger
from snowfall.common import get_phone_symbols
from snowfall.models import AcousticModel
from snowfall.models.tdnn_lstm import TdnnLstm1b
from snowfall.training.ctc_graph import CtcTrainingGraphCompiler
Expand Down Expand Up @@ -275,25 +272,39 @@ def main():
cuts_musan = CutSet.from_json(feature_dir / 'cuts_musan.json.gz')

logging.info("About to create train dataset")
train = K2SpeechRecognitionIterableDataset(cuts_train,
max_frames=90000,
shuffle=True,
aug_cuts=cuts_musan,
aug_prob=0.5,
aug_snr=(10, 20))
logging.info("About to create dev dataset")
validate = K2SpeechRecognitionIterableDataset(cuts_dev,
max_frames=90000,
shuffle=False,
concat_cuts=False)
train = K2SpeechRecognitionDataset(
cuts_train,
cut_transforms=[
CutConcatenate(),
CutMix(
cuts=cuts_musan,
prob=0.5,
snr=(10, 20)
)
]
)
train_sampler = SingleCutSampler(
cuts_train,
max_frames=90000,
shuffle=True,
)
logging.info("About to create train dataloader")
train_dl = torch.utils.data.DataLoader(train,
batch_size=None,
num_workers=4)
train_dl = torch.utils.data.DataLoader(
train,
sampler=train_sampler,
batch_size=None,
num_workers=4
)
logging.info("About to create dev dataset")
validate = K2SpeechRecognitionDataset(cuts_dev)
valid_sampler = SingleCutSampler(cuts_dev, max_frames=90000)
logging.info("About to create dev dataloader")
valid_dl = torch.utils.data.DataLoader(validate,
batch_size=None,
num_workers=1)
valid_dl = torch.utils.data.DataLoader(
validate,
sampler=valid_sampler,
batch_size=None,
num_workers=1
)

if not torch.cuda.is_available():
logging.error('No GPU detected!')
Expand Down Expand Up @@ -335,6 +346,7 @@ def main():
weight_decay=5e-4)

for epoch in range(start_epoch, num_epochs):
train_sampler.set_epoch(epoch)
curr_learning_rate = 1e-3
# curr_learning_rate = learning_rate * pow(0.4, epoch)
# for param_group in optimizer.param_groups:
Expand Down
28 changes: 12 additions & 16 deletions egs/aishell/asr/simple_v1/mmi_bigram_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,28 @@
# 2021 Pingfeng Luo
# Apache 2.0

import k2
import logging
import numpy as np
import os
import torch
from k2 import Fsa, SymbolTable
from kaldialign import edit_distance
from pathlib import Path
from typing import List
from typing import Optional
from typing import Union

import k2
import numpy as np
import torch
from k2 import Fsa, SymbolTable
from kaldialign import edit_distance
from lhotse import CutSet
from lhotse.dataset.speech_recognition import K2SpeechRecognitionIterableDataset

from lhotse.dataset import K2SpeechRecognitionDataset, SingleCutSampler
from snowfall.common import load_checkpoint
from snowfall.common import setup_logger
from snowfall.decoding.graph import compile_LG
from snowfall.models import AcousticModel
from snowfall.models.tdnn import Tdnn1a
from snowfall.models.tdnn_lstm import TdnnLstm1b
from snowfall.training.ctc_graph import build_ctc_topo
from snowfall.training.mmi_graph import get_phone_symbols
from snowfall.training.mmi_graph import create_bigram_phone_lm
from snowfall.training.mmi_graph import get_phone_symbols


def decode(dataloader: torch.utils.data.DataLoader, model: AcousticModel,
Expand Down Expand Up @@ -256,13 +254,11 @@ def main():
logging.debug("About to get test cuts")
cuts_test = CutSet.from_json(feature_dir / 'cuts_test.json.gz')

logging.debug("About to create test dataset")
test = K2SpeechRecognitionIterableDataset(cuts_test,
max_frames=100000,
shuffle=False,
concat_cuts=False)
logging.debug("About to create test dataloader")
test_dl = torch.utils.data.DataLoader(test, batch_size=None, num_workers=1)
logging.info("About to create test dataset")
test = K2SpeechRecognitionDataset(cuts_test)
sampler = SingleCutSampler(cuts_test, max_frames=100000)
logging.info("About to create test dataloader")
test_dl = torch.utils.data.DataLoader(test, batch_size=None, sampler=sampler, num_workers=1)

# if not torch.cuda.is_available():
# logging.error('No GPU detected!')
Expand Down
74 changes: 43 additions & 31 deletions egs/aishell/asr/simple_v1/mmi_bigram_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,32 @@
# 2021 Pingfeng Luo
# Apache 2.0

import k2
import logging
import math
import numpy as np
import os
import sys
from datetime import datetime
from pathlib import Path
from typing import Dict, Optional, Tuple

import k2
import numpy as np
import torch
import torch.optim as optim

from lhotse import CutSet
from lhotse.dataset.speech_recognition import K2SpeechRecognitionIterableDataset
from lhotse.utils import fix_random_seed

from datetime import datetime
from pathlib import Path
from torch import nn
from torch.nn.utils import clip_grad_value_
from torch.utils.tensorboard import SummaryWriter
from typing import Dict, Optional, Tuple

from snowfall.common import save_checkpoint, load_checkpoint
from lhotse import CutSet
from lhotse.dataset import CutConcatenate, CutMix, K2SpeechRecognitionDataset, SingleCutSampler
from lhotse.utils import fix_random_seed
from snowfall.common import load_checkpoint, save_checkpoint
from snowfall.common import save_training_info
from snowfall.common import setup_logger
from snowfall.models import AcousticModel
from snowfall.models.tdnn_lstm import TdnnLstm1b
from snowfall.training.mmi_graph import get_phone_symbols
from snowfall.training.mmi_graph import create_bigram_phone_lm
from snowfall.training.mmi_graph import MmiTrainingGraphCompiler
from snowfall.training.mmi_graph import create_bigram_phone_lm
from snowfall.training.mmi_graph import get_phone_symbols

den_scale = 1.0

Expand Down Expand Up @@ -303,25 +300,39 @@ def main():
cuts_musan = CutSet.from_json(feature_dir / 'cuts_musan.json.gz')

logging.info("About to create train dataset")
train = K2SpeechRecognitionIterableDataset(cuts_train,
max_frames=30000,
shuffle=True,
aug_cuts=cuts_musan,
aug_prob=0.5,
aug_snr=(10, 20))
logging.info("About to create dev dataset")
validate = K2SpeechRecognitionIterableDataset(cuts_dev,
max_frames=30000,
shuffle=False,
concat_cuts=False)
train = K2SpeechRecognitionDataset(
cuts_train,
cut_transforms=[
CutConcatenate(),
CutMix(
cuts=cuts_musan,
prob=0.5,
snr=(10, 20)
)
]
)
train_sampler = SingleCutSampler(
cuts_train,
max_frames=30000,
shuffle=True,
)
logging.info("About to create train dataloader")
train_dl = torch.utils.data.DataLoader(train,
batch_size=None,
num_workers=2)
train_dl = torch.utils.data.DataLoader(
train,
sampler=train_sampler,
batch_size=None,
num_workers=4
)
logging.info("About to create dev dataset")
validate = K2SpeechRecognitionDataset(cuts_dev)
valid_sampler = SingleCutSampler(cuts_dev, max_frames=30000)
logging.info("About to create dev dataloader")
valid_dl = torch.utils.data.DataLoader(validate,
batch_size=None,
num_workers=1)
valid_dl = torch.utils.data.DataLoader(
validate,
sampler=valid_sampler,
batch_size=None,
num_workers=1
)

if not torch.cuda.is_available():
logging.error('No GPU detected!')
Expand Down Expand Up @@ -364,6 +375,7 @@ def main():

curr_learning_rate = learning_rate
for epoch in range(start_epoch, num_epochs):
train_sampler.set_epoch(epoch)
# curr_learning_rate = learning_rate * pow(0.4, epoch)
if epoch > 6:
curr_learning_rate *= 0.8
Expand Down
21 changes: 9 additions & 12 deletions egs/librispeech/asr/simple_v1/ctc_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,25 @@
# Copyright (c) 2020 Xiaomi Corporation (authors: Daniel Povey, Haowen Qiu)
# Apache 2.0

import k2
import logging
import os
import torch
from k2 import Fsa, SymbolTable
from kaldialign import edit_distance
from pathlib import Path
from typing import List
from typing import Optional
from typing import Union

import k2
import torch
from k2 import Fsa, SymbolTable
from kaldialign import edit_distance
from lhotse import CutSet
from lhotse.dataset.speech_recognition import K2SpeechRecognitionIterableDataset

from lhotse.dataset import K2SpeechRecognitionDataset
from lhotse.dataset import SingleCutSampler
from snowfall.common import get_phone_symbols
from snowfall.common import load_checkpoint
from snowfall.common import setup_logger
from snowfall.decoding.graph import compile_LG
from snowfall.models import AcousticModel
from snowfall.models.tdnn import Tdnn1a
from snowfall.models.tdnn_lstm import TdnnLstm1b
from snowfall.training.ctc_graph import build_ctc_topo

Expand Down Expand Up @@ -168,12 +167,10 @@ def main():
cuts_test = CutSet.from_json(feature_dir / 'cuts_test-clean.json.gz')

print("About to create test dataset")
test = K2SpeechRecognitionIterableDataset(cuts_test,
max_frames=100000,
shuffle=False,
concat_cuts=False)
test = K2SpeechRecognitionDataset(cuts_test)
sampler = SingleCutSampler(cuts_test, max_frames=100000)
print("About to create test dataloader")
test_dl = torch.utils.data.DataLoader(test, batch_size=None, num_workers=1)
test_dl = torch.utils.data.DataLoader(test, batch_size=None, sampler=sampler, num_workers=1)

# if not torch.cuda.is_available():
# logging.error('No GPU detected!')
Expand Down
Loading