@@ -16,9 +16,10 @@ gmm=tri5_cleaned # This specifies a GMM-dir from the features
1616 # of the type you're training the system on;
1717 # it should contain alignments for 'train_set'.
1818langdir=data/langp/tri5_ali
19-
19+ generate_alignments=true # Set to false to skip alignment generation
2020num_threads_ubm=12
2121nnet3_affix=_cleaned
22+ extractor= # If supplied, uses this extractor instead of training a new one
2223
2324. ./cmd.sh
2425. ./path.sh
@@ -57,7 +58,7 @@ if [ $stage -le 1 ]; then
5758 utils/fix_data_dir.sh data/${train_set} _sp
5859fi
5960
60- if [ $stage -le 2 ]; then
61+ if $generate_alignments && [ $stage -le 2 ]; then
6162 echo " $0 : aligning with the perturbed low-resolution data"
6263 steps/align_fmllr.sh --nj $nj --cmd " $train_cmd " \
6364 data/${train_set} _sp data/lang $gmm_dir $ali_dir || exit 1
@@ -93,53 +94,55 @@ if [ $stage -le 3 ]; then
9394 steps/compute_cmvn_stats.sh \
9495 data/${datadir} _hires_nopitch exp/make_hires/${datadir} _nopitch $mfccdir || exit 1;
9596 utils/fix_data_dir.sh data/${datadir} _hires_nopitch
96-
9797 done
9898fi
9999
100- if [ $stage -le 4 ]; then
101- echo " $0 : computing a subset of data to train the diagonal UBM."
102-
103- mkdir -p exp/nnet3${nnet3_affix} /diag_ubm
104- temp_data_root=exp/nnet3${nnet3_affix} /diag_ubm
105-
106- # train a diagonal UBM using a subset of about a quarter of the data
107- # we don't use the _comb data for this as there is no need for compatibility with
108- # the alignments, and using the non-combined data is more efficient for I/O
109- # (no messing about with piped commands).
110- num_utts_total=$( wc -l < data/${train_set} _sp_hires/utt2spk)
111- if [ $num_utts_total -le 14000 ] ; then
112- num_utts=14000
113- else
114- num_utts=$num_utts_total
100+ if [ -z " $extractor " ]; then
101+ if [ $stage -le 4 ]; then
102+ echo " $0 : computing a subset of data to train the diagonal UBM."
103+
104+ mkdir -p exp/nnet3${nnet3_affix} /diag_ubm
105+ temp_data_root=exp/nnet3${nnet3_affix} /diag_ubm
106+
107+ # train a diagonal UBM using a subset of about a quarter of the data
108+ # we don't use the _comb data for this as there is no need for compatibility with
109+ # the alignments, and using the non-combined data is more efficient for I/O
110+ # (no messing about with piped commands).
111+ num_utts_total=$( wc -l < data/${train_set} _sp_hires/utt2spk)
112+ if [ $num_utts_total -le 14000 ] ; then
113+ num_utts=14000
114+ else
115+ num_utts=$num_utts_total
116+ fi
117+ utils/data/subset_data_dir.sh data/${train_set} _sp_hires_nopitch \
118+ $num_utts ${temp_data_root} /${train_set} _sp_hires_nopitch_subset
119+
120+ echo " $0 : computing a PCA transform from the hires data."
121+ steps/online/nnet2/get_pca_transform.sh --cmd " $train_cmd " \
122+ --splice-opts " --left-context=3 --right-context=3" \
123+ --max-utts 10000 --subsample 2 \
124+ ${temp_data_root} /${train_set} _sp_hires_nopitch_subset \
125+ exp/nnet3${nnet3_affix} /pca_transform
126+
127+ echo " $0 : training the diagonal UBM."
128+ # Use 512 Gaussians in the UBM.
129+ steps/online/nnet2/train_diag_ubm.sh --cmd " $train_cmd " --nj 30 \
130+ --num-frames 700000 \
131+ --num-threads $num_threads_ubm \
132+ ${temp_data_root} /${train_set} _sp_hires_nopitch_subset 512 \
133+ exp/nnet3${nnet3_affix} /pca_transform exp/nnet3${nnet3_affix} /diag_ubm
115134 fi
116- utils/data/subset_data_dir.sh data/${train_set} _sp_hires_nopitch \
117- $num_utts ${temp_data_root} /${train_set} _sp_hires_nopitch_subset
118-
119- echo " $0 : computing a PCA transform from the hires data."
120- steps/online/nnet2/get_pca_transform.sh --cmd " $train_cmd " \
121- --splice-opts " --left-context=3 --right-context=3" \
122- --max-utts 10000 --subsample 2 \
123- ${temp_data_root} /${train_set} _sp_hires_nopitch_subset \
124- exp/nnet3${nnet3_affix} /pca_transform
125-
126- echo " $0 : training the diagonal UBM."
127- # Use 512 Gaussians in the UBM.
128- steps/online/nnet2/train_diag_ubm.sh --cmd " $train_cmd " --nj 30 \
129- --num-frames 700000 \
130- --num-threads $num_threads_ubm \
131- ${temp_data_root} /${train_set} _sp_hires_nopitch_subset 512 \
132- exp/nnet3${nnet3_affix} /pca_transform exp/nnet3${nnet3_affix} /diag_ubm
133- fi
134135
135- if [ $stage -le 5 ]; then
136- # Train the iVector extractor. Use all of the speed-perturbed data since iVector extractors
137- # can be sensitive to the amount of data. The script defaults to an iVector dimension of
138- # 100.
139- echo " $0 : training the iVector extractor"
140- steps/online/nnet2/train_ivector_extractor.sh --cmd " $train_cmd " --nj 10 \
141- data/${train_set} _sp_hires_nopitch exp/nnet3${nnet3_affix} /diag_ubm \
142- exp/nnet3${nnet3_affix} /extractor || exit 1;
136+ if [ $stage -le 5 ]; then
137+ # Train the iVector extractor. Use all of the speed-perturbed data since iVector extractors
138+ # can be sensitive to the amount of data. The script defaults to an iVector dimension of
139+ # 100.
140+ echo " $0 : training the iVector extractor"
141+ steps/online/nnet2/train_ivector_extractor.sh --cmd " $train_cmd " --nj 10 \
142+ data/${train_set} _sp_hires_nopitch exp/nnet3${nnet3_affix} /diag_ubm \
143+ exp/nnet3${nnet3_affix} /extractor || exit 1;
144+ fi
145+ extractor=exp/nnet3${nnet3_affix} /extractor
143146fi
144147
145148if [ $stage -le 6 ]; then
@@ -166,7 +169,7 @@ if [ $stage -le 6 ]; then
166169
167170 steps/online/nnet2/extract_ivectors_online.sh --cmd " $train_cmd " --nj $nj \
168171 ${temp_data_root} /${train_set} _sp_hires_nopitch_max2 \
169- exp/nnet3 ${nnet3_affix} / extractor $ivectordir
172+ $ extractor $ivectordir
170173
171174fi
172175
0 commit comments