Skip to content

Commit 2f1e23c

Browse files
authored
Narrower and deeper conformer (k2-fsa#330)
* Copy files for editing. * Add random combine from k2-fsa#229. * Minor fixes. * Pass model parameters from the command line. * Fix warnings. * Fix warnings. * Update readme. * Rename to avoid conflicts. * Update results. * Add CI for pruned_transducer_stateless5 * Typo fixes. * Remove random combiner. * Update decode.py and train.py to use periodically averaged models. * Minor fixes. * Revert to use random combiner. * Update results. * Minor fixes.
1 parent ec5a112 commit 2f1e23c

22 files changed

+4299
-59
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
#!/usr/bin/env bash
2+
3+
log() {
4+
# This function is from espnet
5+
local fname=${BASH_SOURCE[1]##*/}
6+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
7+
}
8+
9+
cd egs/librispeech/ASR
10+
11+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13
12+
13+
log "Downloading pre-trained model from $repo_url"
14+
git lfs install
15+
git clone $repo_url
16+
repo=$(basename $repo_url)
17+
18+
log "Display test files"
19+
tree $repo/
20+
soxi $repo/test_wavs/*.wav
21+
ls -lh $repo/test_wavs/*.wav
22+
23+
pushd $repo/exp
24+
ln -s pretrained-epoch-39-avg-7.pt pretrained.pt
25+
popd
26+
27+
for sym in 1 2 3; do
28+
log "Greedy search with --max-sym-per-frame $sym"
29+
30+
./pruned_transducer_stateless5/pretrained.py \
31+
--method greedy_search \
32+
--max-sym-per-frame $sym \
33+
--checkpoint $repo/exp/pretrained.pt \
34+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
35+
$repo/test_wavs/1089-134686-0001.wav \
36+
$repo/test_wavs/1221-135766-0001.wav \
37+
$repo/test_wavs/1221-135766-0002.wav
38+
done
39+
40+
for method in modified_beam_search beam_search fast_beam_search; do
41+
log "$method"
42+
43+
./pruned_transducer_stateless5/pretrained.py \
44+
--method $method \
45+
--beam-size 4 \
46+
--checkpoint $repo/exp/pretrained.pt \
47+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
48+
$repo/test_wavs/1089-134686-0001.wav \
49+
$repo/test_wavs/1221-135766-0001.wav \
50+
$repo/test_wavs/1221-135766-0002.wav \
51+
--num-encoder-layers 18 \
52+
--dim-feedforward 2048 \
53+
--nhead 8 \
54+
--encoder-dim 512 \
55+
--decoder-dim 512 \
56+
--joiner-dim 512
57+
done
58+
59+
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
60+
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
61+
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
62+
mkdir -p pruned_transducer_stateless5/exp
63+
ln -s $PWD/$repo/exp/pretrained-epoch-39-avg-7.pt pruned_transducer_stateless5/exp/epoch-999.pt
64+
ln -s $PWD/$repo/data/lang_bpe_500 data/
65+
66+
ls -lh data
67+
ls -lh pruned_transducer_stateless5/exp
68+
69+
log "Decoding test-clean and test-other"
70+
71+
# use a small value for decoding with CPU
72+
max_duration=100
73+
74+
for method in greedy_search fast_beam_search modified_beam_search; do
75+
log "Decoding with $method"
76+
77+
./pruned_transducer_stateless5/decode.py \
78+
--decoding-method $method \
79+
--epoch 999 \
80+
--avg 1 \
81+
--max-duration $max_duration \
82+
--exp-dir pruned_transducer_stateless5/exp \
83+
--num-encoder-layers 18 \
84+
--dim-feedforward 2048 \
85+
--nhead 8 \
86+
--encoder-dim 512 \
87+
--decoder-dim 512 \
88+
--joiner-dim 512
89+
done
90+
91+
rm pruned_transducer_stateless5/exp/*.pt
92+
fi
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
# Copyright 2022 Fangjun Kuang ([email protected])
2+
3+
# See ../../LICENSE for clarification regarding multiple authors
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
name: run-librispeech-2022-05-13
18+
# stateless transducer + k2 pruned rnnt-loss + deeper model
19+
20+
on:
21+
push:
22+
branches:
23+
- master
24+
pull_request:
25+
types: [labeled]
26+
27+
schedule:
28+
# minute (0-59)
29+
# hour (0-23)
30+
# day of the month (1-31)
31+
# month (1-12)
32+
# day of the week (0-6)
33+
# nightly build at 15:50 UTC time every day
34+
- cron: "50 15 * * *"
35+
36+
jobs:
37+
run_librispeech_2022_05_13:
38+
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
39+
runs-on: ${{ matrix.os }}
40+
strategy:
41+
matrix:
42+
os: [ubuntu-18.04]
43+
python-version: [3.7, 3.8, 3.9]
44+
45+
fail-fast: false
46+
47+
steps:
48+
- uses: actions/checkout@v2
49+
with:
50+
fetch-depth: 0
51+
52+
- name: Setup Python ${{ matrix.python-version }}
53+
uses: actions/setup-python@v2
54+
with:
55+
python-version: ${{ matrix.python-version }}
56+
cache: 'pip'
57+
cache-dependency-path: '**/requirements-ci.txt'
58+
59+
- name: Install Python dependencies
60+
run: |
61+
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
62+
63+
- name: Cache kaldifeat
64+
id: my-cache
65+
uses: actions/cache@v2
66+
with:
67+
path: |
68+
~/tmp/kaldifeat
69+
key: cache-tmp-${{ matrix.python-version }}
70+
71+
- name: Install kaldifeat
72+
if: steps.my-cache.outputs.cache-hit != 'true'
73+
shell: bash
74+
run: |
75+
.github/scripts/install-kaldifeat.sh
76+
77+
- name: Cache LibriSpeech test-clean and test-other datasets
78+
id: libri-test-clean-and-test-other-data
79+
uses: actions/cache@v2
80+
with:
81+
path: |
82+
~/tmp/download
83+
key: cache-libri-test-clean-and-test-other
84+
85+
- name: Download LibriSpeech test-clean and test-other
86+
if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
87+
shell: bash
88+
run: |
89+
.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
90+
91+
- name: Prepare manifests for LibriSpeech test-clean and test-other
92+
shell: bash
93+
run: |
94+
.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
95+
96+
- name: Cache LibriSpeech test-clean and test-other fbank features
97+
id: libri-test-clean-and-test-other-fbank
98+
uses: actions/cache@v2
99+
with:
100+
path: |
101+
~/tmp/fbank-libri
102+
key: cache-libri-fbank-test-clean-and-test-other
103+
104+
- name: Compute fbank for LibriSpeech test-clean and test-other
105+
if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
106+
shell: bash
107+
run: |
108+
.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
109+
110+
- name: Inference with pre-trained model
111+
shell: bash
112+
env:
113+
GITHUB_EVENT_NAME: ${{ github.event_name }}
114+
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
115+
run: |
116+
mkdir -p egs/librispeech/ASR/data
117+
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
118+
ls -lh egs/librispeech/ASR/data/*
119+
120+
sudo apt-get -qq install git-lfs tree sox
121+
export PYTHONPATH=$PWD:$PYTHONPATH
122+
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
123+
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
124+
125+
.github/scripts/run-librispeech-pruned-transducer-stateless5-2022-05-13.sh
126+
127+
- name: Display decoding results for librispeech pruned_transducer_stateless5
128+
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
129+
shell: bash
130+
run: |
131+
cd egs/librispeech/ASR/
132+
tree ./pruned_transducer_stateless5/exp
133+
134+
cd pruned_transducer_stateless5
135+
echo "results for pruned_transducer_stateless5"
136+
echo "===greedy search==="
137+
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
138+
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
139+
140+
echo "===fast_beam_search==="
141+
find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
142+
find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
143+
144+
echo "===modified beam search==="
145+
find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
146+
find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
147+
148+
- name: Upload decoding results for librispeech pruned_transducer_stateless5
149+
uses: actions/upload-artifact@v2
150+
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
151+
with:
152+
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless5-2022-05-13
153+
path: egs/librispeech/ASR/pruned_transducer_stateless5/exp/

egs/librispeech/ASR/README.md

+2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ The following table lists the differences among them.
1919
| `pruned_transducer_stateless` | Conformer | Embedding + Conv1d | Using k2 pruned RNN-T loss |
2020
| `pruned_transducer_stateless2` | Conformer(modified) | Embedding + Conv1d | Using k2 pruned RNN-T loss |
2121
| `pruned_transducer_stateless3` | Conformer(modified) | Embedding + Conv1d | Using k2 pruned RNN-T loss + using GigaSpeech as extra training data |
22+
| `pruned_transducer_stateless4` | Conformer(modified) | Embedding + Conv1d | same as pruned_transducer_stateless2 + save averaged models periodically during training |
23+
| `pruned_transducer_stateless5` | Conformer(modified) | Embedding + Conv1d | same as pruned_transducer_stateless4 + more layers + random combiner|
2224

2325

2426
The decoder in `transducer_stateless` is modified from the paper

0 commit comments

Comments
 (0)