Skip to content

Commit c0108e0

Browse files
committed
TDNN+LSTM semisup recipes
1 parent 520b4a7 commit c0108e0

File tree

46 files changed

+2500
-12799
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+2500
-12799
lines changed

egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,12 @@
4343

4444
set -u -e -o pipefail
4545

46-
stage=0 # Start from -1 for supervised seed system training
46+
stage=0
4747
train_stage=-100
4848
nj=80
4949
test_nj=50
5050

51-
# The following 3 options decide the output directory for semi-supervised
51+
# The following 3 options decide the output directory for semi-supervised
5252
# chain system
5353
# dir=${exp_root}/chain${chain_affix}/tdnn${tdnn_affix}
5454

@@ -89,7 +89,7 @@ echo "$0 $@" # Print the command line for logging
8989
if [ -f ./path.sh ]; then . ./path.sh; fi
9090
. ./utils/parse_options.sh
9191

92-
# The following can be replaced with the versions that model
92+
# The following can be replaced with the versions that do not model
9393
# UNK using phone LM. $sup_lat_dir should also ideally be changed.
9494
unsup_decode_lang=data/lang_test_poco_sup100k_unk
9595
unsup_decode_graph_affix=_poco_sup100k_unk
@@ -141,6 +141,8 @@ if [ $stage -le 2 ]; then
141141

142142
steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" \
143143
--mfcc-config conf/mfcc_hires.conf data/${unsupervised_set}_sp_hires || exit 1
144+
steps/compute_cmvn_stats.sh data/${unsupervised_set}_sp_hires
145+
utils/fix_data_dir.sh data/${unsupervised_set}_sp_hires
144146
fi
145147
unsupervised_set_perturbed=${unsupervised_set}_sp
146148

egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ test_graph_affix=_poco_unk
109109

110110
unsup_rescore_lang=${unsup_decode_lang}_big
111111

112-
dir=$exp_root/chain${chain_affix}/tdnn${tdnn_affix}
112+
dir=$exp_root/chain${chain_affix}/tdnn_lstm${tdnn_affix}
113113

114114
if ! cuda-compiled; then
115115
cat <<EOF && exit 1
@@ -144,6 +144,8 @@ fi
144144
if [ $stage -le 2 ]; then
145145
utils/data/perturb_data_dir_speed_3way.sh data/${unsupervised_set} \
146146
data/${unsupervised_set_perturbed}_hires
147+
utils/data/perturb_data_dir_volume.sh \
148+
data/${unsupervised_set_perturbed}_hires
147149

148150
steps/make_mfcc.sh --cmd "$train_cmd" --nj $nj \
149151
--mfcc-config conf/mfcc_hires.conf \
@@ -177,7 +179,7 @@ fi
177179
# Rescore undeterminized lattices with larger LM
178180
if [ $stage -le 5 ]; then
179181
steps/lmrescore_const_arpa_undeterminized.sh --cmd "$decode_cmd" \
180-
--acwt 0.1 --beam 8.0 --skip-scoring true \
182+
--acwt 0.1 --beam 8.0 --skip-scoring true \
181183
$unsup_decode_lang $unsup_rescore_lang \
182184
data/${unsupervised_set_perturbed}_hires \
183185
$sup_chain_dir/decode_${unsupervised_set_perturbed} \
@@ -433,6 +435,7 @@ if [ $stage -le 18 ]; then
433435
steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
434436
--nj $num_jobs --cmd "$decode_cmd" ${decode_iter:+--iter $decode_iter} \
435437
--online-ivector-dir $ivector_root_dir/ivectors_${decode_set}_hires \
438+
--frames-per-chunk 160 \
436439
$test_graph_dir data/${decode_set}_hires \
437440
$dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || touch $dir/.error
438441
) &

0 commit comments

Comments
 (0)