在以下脚本
steps/make_mfcc.sh
steps/make_mfcc_pitch.sh
steps/make_fbank.sh
steps/make_fbank_pitch.sh
steps/compute_cmvn_stats.sh
中涉及到的命令以及脚本情况:
命令行 | 作用 |
---|---|
脚本 | 作用 |
---|---|
compute_cmvn_stats.sh 是为了计算提取特征的CMVN,即为倒谱方差均值归一化
NOTE:以上所有脚本中所必需的文件为wav.scp
这个脚本的输入参数有三个:1.data/train 2exp/make_mfcc/train 3mfcc
1.中有数据预处理后的一些文件:text utt2spk wav.scp
2.中应该是要保存程序运行的日志文件的
3.中是提取出的特征文件
1是输入目录,2,3是输出目录
kaldi中以上脚本在进行提取特征的过程中,存在着相似的结构。因此将以上四个脚本放在一块进行记录分析。
在此记录中,使用 steps/make_mfcc_pitch.sh 进行基本分析,其脚本与其他脚本的区别仅仅在特征提取的过程。
以下内容为首先进行特征提取的配置,参数的初始化以及配置文件的初始化
# Begin configuration section.
nj=4
cmd=run.pl
mfcc_config=conf/mfcc.conf
pitch_config=conf/pitch.conf
pitch_postprocess_config=
paste_length_tolerance=2
compress=true
write_utt2num_frames=false # if true writes utt2num_frames
# End configuration section.
# 打印这个脚本的名称以及所有的参数
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# -lt 1 ] || [ $# -gt 3 ]; then
echo "Usage: $0 [options] [ [] ]";
echo "e.g.: $0 data/train exp/make_mfcc/train mfcc"
echo "Note: defaults to /log, and defaults to /data"
echo "Options: "
echo " --mfcc-config # config passed to compute-mfcc-feats "
echo " --pitch-config # config passed to compute-kaldi-pitch-feats "
echo " --pitch-postprocess-config # config passed to process-kaldi-pitch-feats "
echo " --paste-length-tolerance # length tolerance passed to paste-feats"
echo " --nj # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs."
echo " --write-utt2num-frames # If true, write utt2num_frames file."
exit 1;
fi
以下脚本在进行使用的过程中,根据传入参数的大小进行写入数据
data=$1
if [ $# -ge 2 ]; then # 如果参数大于等于2,就日志就存放在第二参数中
logdir=$2
else # 否则将日志写到文件下
logdir=$data/log
fi
if [ $# -ge 3 ]; then # 如果参数大于等于3
mfcc_pitch_dir=$3
else # 将提取处理出来的特征放到该文间加下
mfcc_pitch_dir=$data/data
fi
在进行特征提取的过程中,所需的必须的文件为 wav.scp
# make $mfcc_pitch_dir an absolute pathname.
mfcc_pitch_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $mfcc_pitch_dir ${PWD}`
# use "name" as part of name of the archive.
name=`basename $data`
mkdir -p $mfcc_pitch_dir || exit 1; # 创建mfcc特征文件夹和log文件夹
mkdir -p $logdir || exit 1;
if [ -f $data/feats.scp ]; then # 如果之前就存在feats.scp文件夹的话,就建立
mkdir -p $data/.backup
echo "$0: moving $data/feats.scp to $data/.backup"
mv $data/feats.scp $data/.backup
fi
scp=$data/wav.scp # 获取音频文件列表,根据该列表中的uttid以及wav的位置进行特征提取
required="$scp $mfcc_config $pitch_config"
for f in $required; do # 在进行特征提取的过程中,所需的必须的文件为 wav.scp
if [ ! -f $f ]; then
echo "make_mfcc_pitch.sh: no such file $f"
exit 1;
fi
done
# 使用validate_data_dir.sh 检测$data里的内容是否正确
utils/validate_data_dir.sh --no-text --no-feats $data || exit 1;
if [ ! -z "$pitch_postprocess_config" ]; then
postprocess_config_opt="--config=$pitch_postprocess_config";
else
postprocess_config_opt=
fi
if [ -f $data/spk2warp ]; then
echo "$0 [info]: using VTLN warp factors from $data/spk2warp"
vtln_opts="--vtln-map=ark:$data/spk2warp --utt2spk=ark:$data/utt2spk"
elif [ -f $data/utt2warp ]; then
echo "$0 [info]: using VTLN warp factors from $data/utt2warp"
vtln_opts="--vtln-map=ark:$data/utt2warp"
fi
以下的内容主要为是否使用segments文件,并且使用脚本进行模型处理,如下:
for n in $(seq $nj); do
# the next command does nothing unless $mfcc_pitch_dir/storage/ exists, see
# utils/create_data_link.pl for more info.
utils/create_data_link.pl $mfcc_pitch_dir/raw_mfcc_pitch_$name.$n.ark
done
if $write_utt2num_frames; then
write_num_frames_opt="--write-num-frames=ark,t:$logdir/utt2num_frames.JOB"
else
write_num_frames_opt=
fi
if [ -f $data/segments ]; then
echo "$0 [info]: segments file exists: using that."
split_segments=""
for n in $(seq $nj); do
split_segments="$split_segments $logdir/segments.$n"
done
utils/split_scp.pl $data/segments $split_segments || exit 1;
rm $logdir/.error 2>/dev/null
mfcc_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config ark:- ark:- |"
pitch_feats="ark,s,cs:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config ark:- ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |"
$cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \
paste-feats --length-tolerance=$paste_length_tolerance "$mfcc_feats" "$pitch_feats" ark:- \| \
copy-feats --compress=$compress $write_num_frames_opt ark:- \
ark,scp:$mfcc_pitch_dir/raw_mfcc_pitch_$name.JOB.ark,$mfcc_pitch_dir/raw_mfcc_pitch_$name.JOB.scp \
|| exit 1;
else
echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance."
split_scps=""
for n in $(seq $nj); do
split_scps="$split_scps $logdir/wav_${name}.$n.scp"
done
utils/split_scp.pl $scp $split_scps || exit 1; # 使用脚本处理 wav.scp
mfcc_feats="ark:compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |"
pitch_feats="ark,s,cs:compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config scp,p:$logdir/wav_${name}.JOB.scp ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |"
#最后生成的应该就是mfcc/train 中的raw_mfcc_train.1.ark raw_mfcc_train.1.scp
$cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \
paste-feats --length-tolerance=$paste_length_tolerance "$mfcc_feats" "$pitch_feats" ark:- \| \
copy-feats --compress=$compress $write_num_frames_opt ark:- \
ark,scp:$mfcc_pitch_dir/raw_mfcc_pitch_$name.JOB.ark,$mfcc_pitch_dir/raw_mfcc_pitch_$name.JOB.scp \
|| exit 1;
fi
在进行特征提取完之后,进行后续处理,如下:
if [ -f $logdir/.error.$name ]; then # 如果出现了错误则打印出log中最后的错误信息,并且退出
echo "Error producing mfcc & pitch features for $name:"
tail $logdir/make_mfcc_pitch_${name}.1.log
exit 1;
fi
# concatenate the .scp files together. # 根据id,将所有的scp文件拼接起来输出到 feats.scp
for n in $(seq $nj); do
cat $mfcc_pitch_dir/raw_mfcc_pitch_$name.$n.scp || exit 1;
done > $data/feats.scp
if $write_utt2num_frames; then
for n in $(seq $nj); do
cat $logdir/utt2num_frames.$n || exit 1;
done > $data/utt2num_frames || exit 1
rm $logdir/utt2num_frames.*
fi
rm $logdir/wav_${name}.*.scp $logdir/segments.* 2>/dev/null # 删除过程文件
nf=`cat $data/feats.scp | wc -l` # 输出文件的行数
nu=`cat $data/utt2spk | wc -l`
if [ $nf -ne $nu ]; then # 检测特征的数目与音频文件的数目是否相同
echo "It seems not all of the feature files were successfully processed ($nf != $nu);"
echo "consider using utils/fix_data_dir.sh $data"
fi
if [ $nf -lt $[$nu - ($nu/20)] ]; then
echo "Less than 95% the features were successfully generated. Probably a serious error."
exit 1;
fi
echo "Succeeded creating MFCC & Pitch features for $name"