从说话人识别demo开始学习kaldi--(5)compute_vad_decision.sh

#!/bin/bash 

# Copyright    2017  Vimal Manohar
# Apache 2.0

# To be run from .. (one directory up from here)
# see ../run.sh for example

# Compute energy based VAD output

nj=4
cmd=run.pl
vad_config=conf/vad.conf

echo "$0 $@"  # Print the command line for logging
# $0:  脚本自身的名称
# $@: 传入脚本的所有参数;

if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;

if [ $# -lt 1 ] || [ $# -gt 3 ]; then
   echo "Usage: $0 [options]  [ []]";
   echo "e.g.: $0 data/train exp/make_vad mfcc"
   echo "Note:  defaults to /log, and  defaults to /data"
   echo " Options:"
   echo "  --vad-config                        # config passed to compute-vad-energy"
   echo "  --nj                                         # number of parallel jobs"
   echo "  --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs."
   exit 1;
fi
.<<EOF
$#:传入脚本的参数个数
如果传入的参数小于1个,或者大于3个,then

EOF



data=$1
if [ $# -ge 2 ]; then
  logdir=$2
else
  logdir=$data/log
fi
if [ $# -ge 3 ]; then
  vaddir=$3
else
  vaddir=$data/data
fi


# make $vaddir an absolute pathname.
vaddir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $vaddir ${PWD}`

# use "name" as part of name of the archive.
name=`basename $data`
.<<EOF
basename是linux的关键词命令
basename /usr/bin/sort.txt      输出"sort.txt"。
basename ./include/stdio.h .h   输出"stdio"。   这种去掉了后缀名
这里应该是第一种用法

这里不是单引号,是倒斜杠,单引号里面不可以有命令和变量引用
EOF

mkdir -p $vaddir || exit 1;
mkdir -p $logdir || exit 1;

if [ -f $data/vad.scp ]; then
  mkdir -p $data/.backup
  echo "$0: moving $data/vad.scp to $data/.backup"
  mv $data/vad.scp $data/.backup
fi

for f in $data/feats.scp "$vad_config"; do
  if [ ! -f $f ]; then
    echo "compute_vad_decision.sh: no such file $f"
    exit 1;
  fi
done

utils/split_data.sh $data $nj || exit 1;
.<<EOF
下面是split_data.sh文件中的内容:
echo "Usage: $0 [--per-utt]  "
echo "E.g.: $0 data/train 50"
echo "It creates its output in e.g. data/train/split50/{1,2,3,...50}, or if the "
echo "--per-utt option was given, in e.g. data/train/split50utt/{1,2,3,...50}."
echo ""
echo "This script will not split the data-dir if it detects that the output is newer than the input."
echo "By default it splits per speaker (so each speaker is in only one split dir),"
echo "but with the --per-utt option it will ignore the speaker information while splitting."

EOF


sdata=$data/split$nj;

$cmd JOB=1:$nj $logdir/vad_${name}.JOB.log \
  compute-vad --config=$vad_config scp:$sdata/JOB/feats.scp \
  ark,scp:$vaddir/vad_${name}.JOB.ark,$vaddir/vad_${name}.JOB.scp || exit 1
.<<EOF
在~/kaldi-trunk/src/ivectorbin中有文件compute-vad.cc
使用方法如下:
        "This program reads input features and writes out, for each utterance,\n"
        "a vector of floats that are 1.0 if we judge the frame voice and 0.0\n"
        "otherwise.  The algorithm is very simple and is based on thresholding\n"
        "the log mel energy (and taking the consensus of threshold decisions\n"
        "within a window centered on the current frame).  See the options for\n"
        "more details, and egs/sid/s1/run.sh for examples; this program is\n"
        "intended for use in speaker-ID.\n"
        "\n"
        "Usage: compute-vad [options]  \n"
        "e.g.: compute-vad scp:feats.scp ark:vad.ark\n";
EOF


# 将scp文件concat起来
for ((n=1; n<=nj; n++)); do
  cat $vaddir/vad_${name}.$n.scp || exit 1;
done > $data/vad.scp

nc=`cat $data/vad.scp | wc -l` 
nu=`cat $data/feats.scp | wc -l` 
if [ $nc -ne $nu ]; then
  echo "**Warning it seems not all of the speakers got VAD output ($nc != $nu);"
  echo "**validate_data_dir.sh will fail; you might want to use fix_data_dir.sh"
  [ $nc -eq 0 ] && exit 1;
fi


echo "Created VAD output for $name"

你可能感兴趣的:(从说话人识别demo开始学习kaldi--(5)compute_vad_decision.sh)