感谢AIShell在商业化道路上的探索。期待着v3的到来。
sv@HP:~$ sudo lsb_release -a
Distributor ID: Ubuntu
Description: Ubuntu 18.04.1 LTS
Release: 18.04
Codename: bionic
sv@HP:~$ cat /proc/cpuinfo | grep model\ name
model name : Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz
model name : Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz
model name : Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz
model name : Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz
model name : Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz
model name : Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz
model name : Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz
model name : Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz
model name : Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz
model name : Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz
model name : Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz
model name : Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz
sv@HP:~$ cat /proc/meminfo | grep MemTotal
MemTotal: 16321360 kB
sv@HP:~$ lspci | grep 'VGA'
01:00.0 VGA compatible controller: NVIDIA Corporation GP104 [GeForce GTX 1070] (rev a1)
一网打尽。
先看结果。错词率达到36.69%。monophone效果一般般。
sv@HP:~/lkaldi/egs/aishell/s5$ for x in exp/ */decode_test; do [ -d $x ] && grep WER $x/cer_* | utils/best_wer.sh; done 2>/dev/null
sv@HP:~/lkaldi/egs/aishell/s5$ %WER 36.59 [ 38335 / 104765, 849 ins, 3183 del, 34303 sub ] exp/mono/decode_test/cer_10_0.0
sv@HP:~/lkaldi/egs/aishell/s5$ #train mono
sv@HP:~/lkaldi/egs/aishell/s5$ utils/fix_data_dir.sh data/test || exit 1;
fix_data_dir.sh: kept all 7176 utterances.
fix_data_dir.sh: old files are kept in data/test/.backup
sv@HP:~/lkaldi/egs/aishell/s5$ #done
sv@HP:~/lkaldi/egs/aishell/s5$ #train mono
sv@HP:~/lkaldi/egs/aishell/s5$ steps/train_mono.sh --cmd "$train_cmd" --nj 10 data/train data/lang exp/mono || exit 1;
steps/train_mono.sh --cmd run.pl --mem 8G --nj 10 data/train data/lang exp/mono
steps/train_mono.sh: Initializing monophone system.
steps/train_mono.sh: Compiling training graphs
steps/train_mono.sh: Aligning data equally (pass 0)
steps/train_mono.sh: Pass 1
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 2
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 3
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 4
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 5
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 6
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 7
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 8
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 9
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 10
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 11
steps/train_mono.sh: Pass 12
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 13
steps/train_mono.sh: Pass 14
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 15
steps/train_mono.sh: Pass 16
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 17
steps/train_mono.sh: Pass 18
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 19
steps/train_mono.sh: Pass 20
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 21
steps/train_mono.sh: Pass 22
steps/train_mono.sh: Pass 23
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 24
steps/train_mono.sh: Pass 25
steps/train_mono.sh: Pass 26
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 27
steps/train_mono.sh: Pass 28
steps/train_mono.sh: Pass 29
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 30
steps/train_mono.sh: Pass 31
steps/train_mono.sh: Pass 32
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 33
steps/train_mono.sh: Pass 34
steps/train_mono.sh: Pass 35
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 36
steps/train_mono.sh: Pass 37
steps/train_mono.sh: Pass 38
steps/train_mono.sh: Aligning data
steps/train_mono.sh: Pass 39
steps/diagnostic/analyze_alignments.sh --cmd run.pl --mem 8G data/lang exp/mono
steps/diagnostic/analyze_alignments.sh: see stats in exp/mono/log/analyze_alignments.log
1122 warnings in exp/mono/log/acc.*.*.log
37929 warnings in exp/mono/log/align.*.*.log
exp/mono: nj=10 align prob=-82.03 over 150.16h [retry=1.0%, fail=0.0%] states=203 gauss=984
steps/train_mono.sh: Done training monophone system in exp/mono
sv@HP:~/lkaldi/egs/aishell/s5$
sv@HP:~/lkaldi/egs/aishell/s5$ # Monophone decoding
sv@HP:~/lkaldi/egs/aishell/s5$ utils/mkgraph.sh data/lang_test exp/mono exp/mono/graph || exit 1;
tree-info exp/mono/tree
tree-info exp/mono/tree
fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst
fstminimizeencoded
fstdeterminizestar --use-log=true
fstpushspecial
fstisstochastic data/lang_test/tmp/LG.fst
-0.0663446 -0.0666824
[info]: LG not stochastic.
fstcomposecontext --context-size=1 --central-position=0 --read-disambig-syms=data/lang_test/phones/disambig.int --write-disambig-syms=data/lang_test/tmp/disambig_ilabels_1_0.int data/lang_test/tmp/ilabels_1_0.28227 data/lang_test/tmp/LG.fst
fstisstochastic data/lang_test/tmp/CLG_1_0.fst
-0.0663446 -0.0666824
[info]: CLG not stochastic.
make-h-transducer --disambig-syms-out=exp/mono/graph/disambig_tid.int --transition-scale=1.0 data/lang_test/tmp/ilabels_1_0 exp/mono/tree exp/mono/final.mdl
fsttablecompose exp/mono/graph/Ha.fst data/lang_test/tmp/CLG_1_0.fst
fstminimizeencoded
fstdeterminizestar --use-log=true
fstrmepslocal
fstrmsymbols exp/mono/graph/disambig_tid.int
fstisstochastic exp/mono/graph/HCLGa.fst
0.000157882 -0.132761
HCLGa is not stochastic
add-self-loops --self-loop-scale=0.1 --reorder=true exp/mono/final.mdl exp/mono/graph/HCLGa.fst
sv@HP:~/lkaldi/egs/aishell/s5$ steps/decode.sh --cmd "$decode_cmd" --config conf/decode.config --nj 10 exp/mono/graph data/dev exp/mono/decode_dev
steps/decode.sh --cmd run.pl --mem 8G --config conf/decode.config --nj 10 exp/mono/graph data/dev exp/mono/decode_dev
decode.sh: feature type is delta
steps/diagnostic/analyze_lats.sh --cmd run.pl --mem 8G exp/mono/graph exp/mono/decode_dev
steps/diagnostic/analyze_lats.sh: see stats in exp/mono/decode_dev/log/analyze_alignments.log
Overall, lattice depth (10,50,90-percentile)=(1,16,132) and mean=49.3
steps/diagnostic/analyze_lats.sh: see stats in exp/mono/decode_dev/log/analyze_lattice_depth_stats.log
+ steps/score_kaldi.sh --cmd 'run.pl --mem 8G' data/dev exp/mono/graph exp/mono/decode_dev
steps/score_kaldi.sh --cmd run.pl --mem 8G data/dev exp/mono/graph exp/mono/decode_dev
steps/score_kaldi.sh: scoring with word insertion penalty=0.0,0.5,1.0
+ steps/scoring/score_kaldi_cer.sh --stage 2 --cmd 'run.pl --mem 8G' data/dev exp/mono/graph exp/mono/decode_dev
steps/scoring/score_kaldi_cer.sh --stage 2 --cmd run.pl --mem 8G data/dev exp/mono/graph exp/mono/decode_dev
steps/scoring/score_kaldi_cer.sh: scoring with word insertion penalty=0.0,0.5,1.0
+ echo 'local/score.sh: Done'
local/score.sh: Done
sv@HP:~/lkaldi/egs/aishell/s5$ steps/decode.sh --cmd "$decode_cmd" --config conf/decode.config --nj 10 exp/mono/graph data/test exp/mono/decode_test
steps/decode.sh --cmd run.pl --mem 8G --config conf/decode.config --nj 10 exp/mono/graph data/test exp/mono/decode_test
decode.sh: feature type is delta
steps/diagnostic/analyze_lats.sh --cmd run.pl --mem 8G exp/mono/graph exp/mono/decode_test
steps/diagnostic/analyze_lats.sh: see stats in exp/mono/decode_test/log/analyze_alignments.log
Overall, lattice depth (10,50,90-percentile)=(1,20,154) and mean=57.3
steps/diagnostic/analyze_lats.sh: see stats in exp/mono/decode_test/log/analyze_lattice_depth_stats.log
+ steps/score_kaldi.sh --cmd 'run.pl --mem 8G' data/test exp/mono/graph exp/mono/decode_test
steps/score_kaldi.sh --cmd run.pl --mem 8G data/test exp/mono/graph exp/mono/decode_test
steps/score_kaldi.sh: scoring with word insertion penalty=0.0,0.5,1.0
+ steps/scoring/score_kaldi_cer.sh --stage 2 --cmd 'run.pl --mem 8G' data/test exp/mono/graph exp/mono/decode_test
steps/scoring/score_kaldi_cer.sh --stage 2 --cmd run.pl --mem 8G data/test exp/mono/graph exp/mono/decode_test
steps/scoring/score_kaldi_cer.sh: scoring with word insertion penalty=0.0,0.5,1.0
+ echo 'local/score.sh: Done'
local/score.sh: Done
sv@HP:~/lkaldi/egs/aishell/s5$
sv@HP:~/lkaldi/egs/aishell/s5$ # Get alignments from monophone system.
sv@HP:~/lkaldi/egs/aishell/s5$ steps/align_si.sh --cmd "$train_cmd" --nj 10 data/train data/lang exp/mono exp/mono_ali || exit 1;
steps/align_si.sh --cmd run.pl --mem 8G --nj 10 data/train data/lang exp/mono exp/mono_ali
steps/align_si.sh: feature type is delta
steps/align_si.sh: aligning data in data/train using model from exp/mono, putting alignments in exp/mono_ali
steps/diagnostic/analyze_alignments.sh --cmd run.pl --mem 8G data/lang exp/mono_ali
steps/diagnostic/analyze_alignments.sh: see stats in exp/mono_ali/log/analyze_alignments.log
steps/align_si.sh: done aligning data.
继续:Kaldi单步完美运行AIShell v1 S5之五:chain DNN
继续:Kaldi单步完美运行AIShell v1 S5之四:nnet3 DNN
回头:Kaldi单步完美运行AIShell v1 S5之三:三音素TriPhone
回头:Kaldi单步完美运行AIShell v1 S5之二:单音素MonoPhone
回头:Kaldi单步完美运行AIShell v1 S5之一:MONO前
其他参考:Kaldi完美运行TIMIT完整结果(含DNN)