vt main.sh

export rtdir=`pwd`
export prj=102

#rm -rf  $rtdir/work/$prj/temp

#---------------------------
# Nnet topology
#---------------------------
NNET_HIDDEN_LAYERS=4
NNET_HIDDEN_UNITS=64


mkdir -p $rtdir/work
mkdir -p $rtdir/work/$prj
mkdir -p $rtdir/work/$prj/temp


sh $rtdir/tools/raw2mfcc.sh
sh $rtdir/tools/align.sh


#make post.{ark,scp}
# kaldi_root=/dnn4_added/lujing/VoiceTrigger/tools/kaldi.r4920.VoiceTrigger
# kaldi_root=/dnn4_added/asr/tools/kaldi.r4920.VoiceTrigger
kaldi_root=/dnn10/taorui/tools/kaldi.r4920.VoiceTrigger
export KALDI_ROOT=$kaldi_root
kaldi=$kaldi_root/src/bin
LEARN_RATE=0.003
MINIBATCH_SIZE=256

#---------------------------
# Archive params
#---------------------------
ARCH_NUMARCHS=64
ARCH_CVSET_AMOUNT=7.5
ARCH_SUBSET_AMOUNT=25


#---------------------------
# Constants (hardcoded in voicetrigger engine)
#---------------------------
NNET_CXTWINDOW=10
NNET_CXTWINDOW_STEP=2
NNET_FCNN_DIM=16
NNET_WEIGHT_CLIP=4.0
NNET_FCNN_INPUT_UNITS=$(((NNET_CXTWINDOW * 2 + 1) * NNET_FCNN_DIM))

NNET_GPUID=00000000:3D:00.0  # dnnr 1
# NNET_GPUID=0000:06:00.0  #dnnd #0
#NNET_GPUID=0000:07:00.0  #dnnd #1
#NNET_GPUID=0000:84:00.0  #dnnd #2
#NNET_GPUID=0000:85:00.0  #dnnd #3

# NNET_GPUID=00000000:1A:00.0    #dnnf

#NNET_GPUID=00000000:04:00.0    #dnng #0
#NNET_GPUID=00000000:05:00.0    #dnng #1
#NNET_GPUID=00000000:08:00.0    #dnng #2
#NNET_GPUID=00000000:09:00.0    #dnng #3
#NNET_GPUID=00000000:85:00.0    #dnng #4
#NNET_GPUID=00000000:86:00.0    #dnng #5
#NNET_GPUID=00000000:89:00.0    #dnng #6
#NNET_GPUID=00000000:8A:00.0    #dnng #7

#NNET_GPUID=00000000:04:00.0    #dnnk #0

export PATH=$KALDI_ROOT/egs/wsj/s5/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/:$PWD:$PATH
export LC_ALL=C


rm -f $rtdir/work/$prj/temp/all.clean.align
touch $rtdir/work/$prj/temp/all.clean.align
find $rtdir/work/$prj/temp/align -type f | while read alignfile; do
    cat $alignfile >> $rtdir/work/$prj/temp/all.clean.align
done
cd $rtdir/work/$prj

#:< temp/all.clean.align.net
$rtdir/tools/make-aliark-from-mlf.py temp/all.clean.align.net temp/ark0 temp/scp0
$rtdir/tools/del-dupid.pl temp/scp0 temp/scp1
$kaldi/copy-int-vector "scp:head -n 1 temp/scp1 |" ark,t:- | $rtdir/tools/make_state_map.py > temp/state.map
$kaldi/convert-ali-states temp/state.map scp:temp/scp1 ark,scp:temp/ark,temp/scp
$kaldi/ali-to-post scp:temp/scp ark,scp:temp/ark.post,temp/scp.post
#!

#make feats.{ark,scp}
#:< temp/trn.mfcc.lst
sed 's/\(.*\/\)\([^.]*\)\(.*\)/\2 \1\2\3/' temp/trn.mfcc.lst > temp/trn.mfcc.lst.htk
awk 'BEGIN{srand(0)}{print rand(), $0}' temp/trn.mfcc.lst.htk | sort | awk '{print $2, $3}' > temp/trn.mfcc.lst.htk.rand
$rtdir/tools/mysplit.pl -d ./temp -f "mfcc.%d" temp/trn.mfcc.lst.htk.rand $ARCH_NUMARCHS
    for ((i=1;i<=$ARCH_NUMARCHS;i++)); do
        $kaldi/../featbin/copy-feats --htk-in scp:temp/mfcc.$i ark,scp:temp/feats.ark.$i,temp/feats.scp.$i &
    done
    wait
#cat temp/feats.*.scp >> temp/feats.scp

nfiles=`cat temp/feats.scp.* | wc -l`
subset=`echo "($nfiles * ${ARCH_SUBSET_AMOUNT} / 100)" | bc`
cvset=`echo "($nfiles * ${ARCH_CVSET_AMOUNT} / 100)" | bc`
trset=`echo "($nfiles - $cvset)" | bc`

cat temp/feats.scp.* | head -n $subset > temp/feats.scp.subset
cat temp/feats.scp.* | head -n $trset > temp/feats-tr.scp
cat temp/feats.scp.* | tail -n $cvset > temp/feats-cv.scp

#initial model topo

${KALDI_ROOT}/src/bin/analyze-counts scp:temp/scp temp/ali.counts >& temp/analyze-counts.log
input_dim=`${KALDI_ROOT}/src/featbin/feat-to-dim scp:temp/feats-tr.scp -`
num_output_units=$((`cat temp/ali.counts | wc -w` - 2 + 1))

/usr/bin/python $rtdir/tools/make_fcnn_proto.py \
/usr/bin/python $rtdir/tools/make_fcnn_proto.py \
                    --fcnn-dim=${input_dim}:${NNET_FCNN_DIM} "--weight-clip=${NNET_WEIGHT_CLIP}" ${NNET_FCNN_INPUT_UNITS} ${num_output_units} ${NNET_HIDDEN_LAYERS} ${NNET_HIDDEN_UNITS} \
                    > temp/dnn.proto
${KALDI_ROOT}/src/nnetbin/nnet-initialize temp/dnn.proto temp/dnn.init >& temp/nnet-initialize.log

$rtdir/tools/make-cmvn-splice-nnet.sh ${NNET_CXTWINDOW} ${NNET_CXTWINDOW_STEP} ${input_dim} temp/feats.scp.subset ./temp "--round-shift" \
                 >& temp/make-cmvn-splice-nnet.log

#train base model
#:<& temp/base/train_scheduler.log
    if [ -f temp/base/final.nnet ] ; then
         ${KALDI_ROOT}/src/nnetbin/nnet-concat temp/cmvn-g_tr_splice10-2.nnet temp/base/final.nnet - \
          | ${KALDI_ROOT}/src/nnetbin/nnet-copy \
              --binary=false \
              --merge-affine-transforms \
              --merge-fcnn \
              --reduce-softmax - temp/base/kaldi.nnet
    else
        echo "not complete."
    fi

#!

#get state file
$rtdir/tools/toshiba_tools/print_state_names $rtdir/config/model.mmf $rtdir/config/triclstsp.plist > $rtdir/config/states

你可能感兴趣的:(语音识别,人工智能)