Kaldi中的plda的训练以及computer-socre

最近一直有个疑惑,plda是怎么训练的?就是plda是怎么计算得分的? 又是怎么根据得分进行判断说话人的? EER跟准确率之间又是什么关系?
时间很紧,论文还没写,需要静一静,好好弄明白这个来清醒一下。
提取完i-vector之后,计算出每一句话的ivector特征,然后计算了sre

ivector-mean scp:exp/ivectors_sre/ivector.scp exp/ivectors_sre/mean.vec

下面看ivector-mean.cc里边进行了什么操作:
就是把sre中所有的i-vector全都加起来并且总的utt number. 然后得到一个sre集合的均值向量,训练PLDA模型公式中中u.

// If 2 arguments are given, computes the mean of all input files and writes out the mean vector.
if (po.NumArgs() == 2) {
      // Compute the mean of the input vectors and write it out.
      std::string ivector_rspecifier = po.GetArg(1),
          mean_wxfilename = po.GetArg(2);
      int32 num_done = 0;
      SequentialBaseFloatVectorReader ivector_reader(ivector_rspecifier);
      Vector<double> sum;
      for (; !ivector_reader.Done(); ivector_reader.Next()) {
        if (sum.Dim() == 0) sum.Resize(ivector_reader.Value().Dim());
        sum.AddVec(1.0, ivector_reader.Value());
        num_done++;
      }
      if (num_done == 0) {
        KALDI_ERR << "No iVectors read";
      } else {
        sum.Scale(1.0 / num_done);
        WriteKaldiObject(sum, mean_wxfilename, binary_write);
        return 0;
      }
    } 

下面就是plda-scoring.sh的脚本,输入了8个参数,最后得到的得分文件 plda_scores.

local/plda_scoring.sh $tandem_feats_dir/sre $tandem_feats_dir/train $tandem_feats_dir/test \
     exp/ivectors_sre exp/ivectors_train exp/ivectors_test $trials exp/scores_gmm_512_ind_pooled

vim local/plda_scoring.sh

#各个参数看上面的脚本,8个参数
plda_data_dir=$1  
enroll_data_dir=$2
test_data_dir=$3
plda_ivec_dir=$4
enroll_ivec_dir=$5
test_ivec_dir=$6
trials=$7
scores_dir=$8
#由i-vector特征来训练一个plda模型,plda模型也是由sre集合训练的,所以这里传的参数都是sre的。
ivector-compute-plda ark:$plda_data_dir/spk2utt \
    "ark:ivector-normalize-length scp:${plda_ivec_dir}/ivector.scp  ark:- |" \
      $plda_ivec_dir/plda 2>$plda_ivec_dir/log/plda.log

mkdir -p $scores_dir

ivector-plda-scoring --num-utts=ark:${enroll_ivec_dir}/num_utts.ark \
   "ivector-copy-plda --smoothing=0.0 ${plda_ivec_dir}/plda - |"  
   "ark:ivector-subtract-global-mean ${plda_ivec_dir}/mean.vec \ 
        scp:${enroll_ivec_dir}/spk_ivector.scp ark:- |" \
   "ark:ivector-subtract-global-mean ${plda_ivec_dir}/mean.vec \
        scp:${test_ivec_dir}/ivector.scp ark:- |" \
   "cat '$trials' | awk '{print \$1, \$2}' |" $scores_dir/plda_scores

先看一下训练PLDA的源码:ivector-compute-plda.cc 只把关键代码拿出来,否则看起来很乱

int main(int argc, char *argv[]) {
  try {
    const char *usage =
        "Computes a Plda object (for Probabilistic Linear Discriminant Analysis)\n"
        "from a set of iVectors.  Uses speaker information from a spk2utt file\n"
        "to compute within and between class variances.\n" ";
    ParseOptions po(usage);
    bool binary = true;
    PldaEstimationConfig plda_config;
    plda_config.Register(&po);
    po.Register("binary", &binary, "Write output in binary mode");
    po.Read(argc, argv);
    #需要三个参数:sre的spk2utt,sre的ivetor.scp, plda模型文件
    std::string spk2utt_rspecifier = po.GetArg(1),
        ivector_rspecifier = po.GetArg(2),
        plda_wxfilename = po.GetArg(3);

    int64 num_spk_done = 0, num_spk_err = 0,
        num_utt_done = 0, num_utt_err = 0;

    SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier);
    RandomAccessBaseFloatVectorReader ivector_reader(ivector_rspecifier);
    PldaStats plda_stats;

     for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) {
      std::string spk = spk2utt_reader.Key();
      const std::vector &uttlist = spk2utt_reader.Value(); #所有spk的utts
      std::vector > ivectors; #注意类型,所有的ivector
      ivectors.reserve(uttlist.size());
      #对每一句话进行处理
      for (size_t i = 0; i < uttlist.size(); i++) {
        std::string utt = uttlist[i];
        ivectors.resize(ivectors.size() + 1);
        ivectors.back() = ivector_reader.Value(utt);
        num_utt_done++;
       }
       Matrix ivector_mat(ivectors.size(), ivectors[0].Dim()); #每个i-vector一行,组成一个矩阵,
       for (size_t i = 0; i < ivectors.size(); i++){
          ivector_mat.Row(i).CopyFromVec(ivectors[i]);
       }
       double weight = 1.0; 
       plda_stats.AddSamples(weight, ivector_mat); #每个人一个plda_stats,在plda.cc
       num_spk_done++;
    }
    #对所有的plda_stats排序
    #PLDA的实现是根据:"Probabilistic Linear Discriminant Analysis" by Sergey Ioffe, ECCV 2006.
    plda_stats.Sort(); 
    PldaEstimator plda_estimator(plda_stats);
    Plda plda;
    //默认迭代10次,更新类内协方差和类间协方差
    plda_estimator.Estimate(plda_config, &plda);  

~/kaldi/src/ivectorbin/ivector-copy-plda –binary=false plda - > plda.txt
plda.txt 是一个均值,一个transform矩阵,也就是 x_ij = u + Fh_i + Gz_i + epsilon 中的F, 以及一个 psi_ 类间斜方差对角阵,即

in plda.h
 Vector mean_;  // mean of samples in original space.
  149   Matrix transform_; // of dimension Dim() by Dim();
  150                              // this transform makes within-class covar unit
  151                              // and diagonalizes the between-class covar.
  152   Vector psi_; // of dimension Dim().  The between-class
  153                        // (diagonal) covariance elements, in decreasing order.
  154 
in plda.cc
   26  void Plda::Write(std::ostream &os, bool binary) const {
   27   WriteToken(os, binary, "");
   28   mean_.Write(os, binary);
   29   transform_.Write(os, binary);
   30   psi_.Write(os, binary);
   31   WriteToken(os, binary, "");
   32 }

上面是训练 PLDA 模型的代码,下面是用plda模型进行 LLR 的计算,ivector-plda-scoring.cc

ivector-plda-scoring --num-utts=ark:${enroll_ivec_dir}/num_utts.ark \
   "ivector-copy-plda --smoothing=0.0 ${plda_ivec_dir}/plda - |"  
   "ark:ivector-subtract-global-mean ${plda_ivec_dir}/mean.vec \ 
        scp:${enroll_ivec_dir}/spk_ivector.scp ark:- |" \
   "ark:ivector-subtract-global-mean ${plda_ivec_dir}/mean.vec \
        scp:${test_ivec_dir}/ivector.scp ark:- |" \
   "cat '$trials' | awk '{print \$1, \$2}' |" $scores_dir/plda_scores
//--num-utts是训练集中每个人对应的句子的数目: 
//其中第二个参数的结果是: enrollment的每个说话人的ivector都减去mean.vec的结果
//其中第三个参数的结果是: test的每句话的ivector都减去mean.vec的结果,注意跟第二个参数的区别
//其中第四个参数的结果是: trials文件的前两列,
int main(int argc, char *argv[]) {
  using namespace kaldi;
    std::string plda_rxfilename = po.GetArg(1),
        train_ivector_rspecifier = po.GetArg(2),
        test_ivector_rspecifier = po.GetArg(3),
        trials_rxfilename = po.GetArg(4),
        scores_wxfilename = po.GetArg(5);

    //  diagnostics:
    double tot_test_renorm_scale = 0.0, tot_train_renorm_scale = 0.0;
    int64 num_train_ivectors = 0, num_train_errs = 0, num_test_ivectors = 0;
    int64 num_trials_done = 0, num_trials_err = 0;

    Plda plda;
    ReadKaldiObject(plda_rxfilename, &plda);

    int32 dim = plda.Dim();

    SequentialBaseFloatVectorReader train_ivector_reader(train_ivector_rspecifier);
    SequentialBaseFloatVectorReader test_ivector_reader(test_ivector_rspecifier);
    RandomAccessInt32Reader num_utts_reader(num_utts_rspecifier);

    typedef unordered_map<string, Vector*, StringHasher> HashType;

    // These hashes will contain the iVectors in the PLDA subspace
    // (that makes the within-class variance unit and diagonalizes the
    // between-class covariance).  
    HashType train_ivectors, test_ivectors;

    KALDI_LOG << "Reading train iVectors";
    for (; !train_ivector_reader.Done(); train_ivector_reader.Next()) {
      std::string spk = train_ivector_reader.Key();
      const Vector &ivector = train_ivector_reader.Value();
      Vector *transformed_ivector = new Vector(dim);
      tot_train_renorm_scale += plda.TransformIvector(plda_config, ivector,
                                                      transformed_ivector);
      train_ivectors[spk] = transformed_ivector;
      num_train_ivectors++;
    }

    KALDI_LOG << "Reading test iVectors";
    for (; !test_ivector_reader.Done(); test_ivector_reader.Next()) {
      std::string utt = test_ivector_reader.Key();
      const Vector &ivector = test_ivector_reader.Value();
      Vector *transformed_ivector = new Vector(dim);

      tot_test_renorm_scale += plda.TransformIvector(plda_config, ivector,
                                                     transformed_ivector);
      test_ivectors[utt] = transformed_ivector;
      num_test_ivectors++;
    }
    KALDI_LOG << "Read " << num_test_ivectors << " test iVectors.";

    Input ki(trials_rxfilename);
    bool binary = false;
    Output ko(scores_wxfilename, binary);

    double sum = 0.0, sumsq = 0.0;
    std::string line;

    while (std::getline(ki.Stream(), line)) {
      std::vector<std::string> fields;
      SplitStringToVector(line, " \t\n\r", true, &fields);
      std::string key1 = fields[0], key2 = fields[1];
      const Vector *train_ivector = train_ivectors[key1],
          *test_ivector = test_ivectors[key2];  
      Vector<double> train_ivector_dbl(*train_ivector),
          test_ivector_dbl(*test_ivector);

      int32 num_train_examples;
      num_train_examples += 1;
      BaseFloat score = plda.LogLikelihoodRatio(train_ivector_dbl,
                                                num_train_examples,
                                                test_ivector_dbl);
      sum += score;
      sumsq += score * score;
      num_trials_done++;
      ko.Stream() << key1 << ' ' << key2 << ' ' << score << std::endl;
    }
 }

计算对数似然比的函数:
nΨnΨ+Iu¯g n Ψ n Ψ + I u ¯ g
Ψ Ψ 是类内协方差矩阵(对角)的元素,维度为 dim(ivector)

double Plda::LogLikelihoodRatio(
    const VectorBase<double> &transformed_train_ivector,
    int32 n, // number of training utterances.
    const VectorBase<double> &transformed_test_ivector) const {
  int32 dim = Dim();
  double loglike_given_class, loglike_without_class;
  { // work out loglike_given_class.
    // "mean" will be the mean of the distribution if it comes from the
    // training example.  The mean is \frac{n \Psi}{n \Psi + I} \bar{u}^g
    // "variance" will be the variance of that distribution, equal to
    // I + \frac{\Psi}{n\Psi + I}.
    Vector<double> mean(dim, kUndefined);
    Vector<double> variance(dim, kUndefined);
    for (int32 i = 0; i < dim; i++) {
      mean(i) = n * psi_(i) / (n * psi_(i) + 1.0) * transformed_train_ivector(i);
      variance(i) = 1.0 + psi_(i) / (n * psi_(i) + 1.0);
    }
    double logdet = variance.SumLog();
    Vector<double> sqdiff(transformed_test_ivector);
    sqdiff.AddVec(-1.0, mean);
    sqdiff.ApplyPow(2.0);
    variance.InvertElements();
    loglike_given_class = -0.5 * (logdet + M_LOG_2PI * dim +
                                  VecVec(sqdiff, variance));
  }
  { // work out loglike_without_class.  Here the mean is zero and the variance
    // is I + \Psi.
    Vector<double> sqdiff(transformed_test_ivector); // there is no offset.
    sqdiff.ApplyPow(2.0);
    Vector<double> variance(psi_);
    variance.Add(1.0); // I + \Psi.
    double logdet = variance.SumLog();
    variance.InvertElements();
    loglike_without_class = -0.5 * (logdet + M_LOG_2PI * dim +
                                    VecVec(sqdiff, variance));
  }
  double loglike_ratio = loglike_given_class - loglike_without_class;
  return loglike_ratio;
}

得到plda模型之后怎么获得 plda之后的特征向量呢? 下面自己的 python 实现:

pass

你可能感兴趣的:(kaldi,sre)