最近一直有个疑惑,plda是怎么训练的?就是plda是怎么计算得分的? 又是怎么根据得分进行判断说话人的? EER跟准确率之间又是什么关系?
时间很紧,论文还没写,需要静一静,好好弄明白这个来清醒一下。
提取完i-vector之后,计算出每一句话的ivector特征,然后计算了sre
ivector-mean scp:exp/ivectors_sre/ivector.scp exp/ivectors_sre/mean.vec
下面看ivector-mean.cc里边进行了什么操作:
就是把sre中所有的i-vector全都加起来并且总的utt number. 然后得到一个sre集合的均值向量,训练PLDA模型公式中中u.
// If 2 arguments are given, computes the mean of all input files and writes out the mean vector.
if (po.NumArgs() == 2) {
// Compute the mean of the input vectors and write it out.
std::string ivector_rspecifier = po.GetArg(1),
mean_wxfilename = po.GetArg(2);
int32 num_done = 0;
SequentialBaseFloatVectorReader ivector_reader(ivector_rspecifier);
Vector<double> sum;
for (; !ivector_reader.Done(); ivector_reader.Next()) {
if (sum.Dim() == 0) sum.Resize(ivector_reader.Value().Dim());
sum.AddVec(1.0, ivector_reader.Value());
num_done++;
}
if (num_done == 0) {
KALDI_ERR << "No iVectors read";
} else {
sum.Scale(1.0 / num_done);
WriteKaldiObject(sum, mean_wxfilename, binary_write);
return 0;
}
}
下面就是plda-scoring.sh的脚本,输入了8个参数,最后得到的得分文件 plda_scores.
local/plda_scoring.sh $tandem_feats_dir/sre $tandem_feats_dir/train $tandem_feats_dir/test \
exp/ivectors_sre exp/ivectors_train exp/ivectors_test $trials exp/scores_gmm_512_ind_pooled
vim local/plda_scoring.sh
#各个参数看上面的脚本,8个参数
plda_data_dir=$1
enroll_data_dir=$2
test_data_dir=$3
plda_ivec_dir=$4
enroll_ivec_dir=$5
test_ivec_dir=$6
trials=$7
scores_dir=$8
#由i-vector特征来训练一个plda模型,plda模型也是由sre集合训练的,所以这里传的参数都是sre的。
ivector-compute-plda ark:$plda_data_dir/spk2utt \
"ark:ivector-normalize-length scp:${plda_ivec_dir}/ivector.scp ark:- |" \
$plda_ivec_dir/plda 2>$plda_ivec_dir/log/plda.log
mkdir -p $scores_dir
ivector-plda-scoring --num-utts=ark:${enroll_ivec_dir}/num_utts.ark \
"ivector-copy-plda --smoothing=0.0 ${plda_ivec_dir}/plda - |"
"ark:ivector-subtract-global-mean ${plda_ivec_dir}/mean.vec \
scp:${enroll_ivec_dir}/spk_ivector.scp ark:- |" \
"ark:ivector-subtract-global-mean ${plda_ivec_dir}/mean.vec \
scp:${test_ivec_dir}/ivector.scp ark:- |" \
"cat '$trials' | awk '{print \$1, \$2}' |" $scores_dir/plda_scores
先看一下训练PLDA的源码:ivector-compute-plda.cc 只把关键代码拿出来,否则看起来很乱
int main(int argc, char *argv[]) {
try {
const char *usage =
"Computes a Plda object (for Probabilistic Linear Discriminant Analysis)\n"
"from a set of iVectors. Uses speaker information from a spk2utt file\n"
"to compute within and between class variances.\n" ";
ParseOptions po(usage);
bool binary = true;
PldaEstimationConfig plda_config;
plda_config.Register(&po);
po.Register("binary", &binary, "Write output in binary mode");
po.Read(argc, argv);
#需要三个参数:sre的spk2utt,sre的ivetor.scp, plda模型文件
std::string spk2utt_rspecifier = po.GetArg(1),
ivector_rspecifier = po.GetArg(2),
plda_wxfilename = po.GetArg(3);
int64 num_spk_done = 0, num_spk_err = 0,
num_utt_done = 0, num_utt_err = 0;
SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier);
RandomAccessBaseFloatVectorReader ivector_reader(ivector_rspecifier);
PldaStats plda_stats;
for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) {
std::string spk = spk2utt_reader.Key();
const std::vector &uttlist = spk2utt_reader.Value(); #所有spk的utts
std::vector > ivectors; #注意类型,所有的ivector
ivectors.reserve(uttlist.size());
#对每一句话进行处理
for (size_t i = 0; i < uttlist.size(); i++) {
std::string utt = uttlist[i];
ivectors.resize(ivectors.size() + 1);
ivectors.back() = ivector_reader.Value(utt);
num_utt_done++;
}
Matrix ivector_mat(ivectors.size(), ivectors[0].Dim()); #每个i-vector一行,组成一个矩阵,
for (size_t i = 0; i < ivectors.size(); i++){
ivector_mat.Row(i).CopyFromVec(ivectors[i]);
}
double weight = 1.0;
plda_stats.AddSamples(weight, ivector_mat); #每个人一个plda_stats,在plda.cc
num_spk_done++;
}
#对所有的plda_stats排序
#PLDA的实现是根据:"Probabilistic Linear Discriminant Analysis" by Sergey Ioffe, ECCV 2006.
plda_stats.Sort();
PldaEstimator plda_estimator(plda_stats);
Plda plda;
//默认迭代10次,更新类内协方差和类间协方差
plda_estimator.Estimate(plda_config, &plda);
~/kaldi/src/ivectorbin/ivector-copy-plda –binary=false plda - > plda.txt
plda.txt 是一个均值,一个transform矩阵,也就是 x_ij = u + Fh_i + Gz_i + epsilon 中的F, 以及一个 psi_ 类间斜方差对角阵,即
in plda.h
Vector mean_; // mean of samples in original space.
149 Matrix transform_; // of dimension Dim() by Dim();
150 // this transform makes within-class covar unit
151 // and diagonalizes the between-class covar.
152 Vector psi_; // of dimension Dim(). The between-class
153 // (diagonal) covariance elements, in decreasing order.
154
in plda.cc
26 void Plda::Write(std::ostream &os, bool binary) const {
27 WriteToken(os, binary, "" );
28 mean_.Write(os, binary);
29 transform_.Write(os, binary);
30 psi_.Write(os, binary);
31 WriteToken(os, binary, "");
32 }
上面是训练 PLDA 模型的代码,下面是用plda模型进行 LLR 的计算,ivector-plda-scoring.cc
ivector-plda-scoring --num-utts=ark:${enroll_ivec_dir}/num_utts.ark \
"ivector-copy-plda --smoothing=0.0 ${plda_ivec_dir}/plda - |"
"ark:ivector-subtract-global-mean ${plda_ivec_dir}/mean.vec \
scp:${enroll_ivec_dir}/spk_ivector.scp ark:- |" \
"ark:ivector-subtract-global-mean ${plda_ivec_dir}/mean.vec \
scp:${test_ivec_dir}/ivector.scp ark:- |" \
"cat '$trials' | awk '{print \$1, \$2}' |" $scores_dir/plda_scores
//--num-utts是训练集中每个人对应的句子的数目:
//其中第二个参数的结果是: enrollment的每个说话人的ivector都减去mean.vec的结果
//其中第三个参数的结果是: test的每句话的ivector都减去mean.vec的结果,注意跟第二个参数的区别
//其中第四个参数的结果是: trials文件的前两列,
int main(int argc, char *argv[]) {
using namespace kaldi;
std::string plda_rxfilename = po.GetArg(1),
train_ivector_rspecifier = po.GetArg(2),
test_ivector_rspecifier = po.GetArg(3),
trials_rxfilename = po.GetArg(4),
scores_wxfilename = po.GetArg(5);
// diagnostics:
double tot_test_renorm_scale = 0.0, tot_train_renorm_scale = 0.0;
int64 num_train_ivectors = 0, num_train_errs = 0, num_test_ivectors = 0;
int64 num_trials_done = 0, num_trials_err = 0;
Plda plda;
ReadKaldiObject(plda_rxfilename, &plda);
int32 dim = plda.Dim();
SequentialBaseFloatVectorReader train_ivector_reader(train_ivector_rspecifier);
SequentialBaseFloatVectorReader test_ivector_reader(test_ivector_rspecifier);
RandomAccessInt32Reader num_utts_reader(num_utts_rspecifier);
typedef unordered_map<string, Vector *, StringHasher> HashType;
// These hashes will contain the iVectors in the PLDA subspace
// (that makes the within-class variance unit and diagonalizes the
// between-class covariance).
HashType train_ivectors, test_ivectors;
KALDI_LOG << "Reading train iVectors";
for (; !train_ivector_reader.Done(); train_ivector_reader.Next()) {
std::string spk = train_ivector_reader.Key();
const Vector &ivector = train_ivector_reader.Value();
Vector *transformed_ivector = new Vector(dim);
tot_train_renorm_scale += plda.TransformIvector(plda_config, ivector,
transformed_ivector);
train_ivectors[spk] = transformed_ivector;
num_train_ivectors++;
}
KALDI_LOG << "Reading test iVectors";
for (; !test_ivector_reader.Done(); test_ivector_reader.Next()) {
std::string utt = test_ivector_reader.Key();
const Vector &ivector = test_ivector_reader.Value();
Vector *transformed_ivector = new Vector(dim);
tot_test_renorm_scale += plda.TransformIvector(plda_config, ivector,
transformed_ivector);
test_ivectors[utt] = transformed_ivector;
num_test_ivectors++;
}
KALDI_LOG << "Read " << num_test_ivectors << " test iVectors.";
Input ki(trials_rxfilename);
bool binary = false;
Output ko(scores_wxfilename, binary);
double sum = 0.0, sumsq = 0.0;
std::string line;
while (std::getline(ki.Stream(), line)) {
std::vector<std::string> fields;
SplitStringToVector(line, " \t\n\r", true, &fields);
std::string key1 = fields[0], key2 = fields[1];
const Vector *train_ivector = train_ivectors[key1],
*test_ivector = test_ivectors[key2];
Vector<double> train_ivector_dbl(*train_ivector),
test_ivector_dbl(*test_ivector);
int32 num_train_examples;
num_train_examples += 1;
BaseFloat score = plda.LogLikelihoodRatio(train_ivector_dbl,
num_train_examples,
test_ivector_dbl);
sum += score;
sumsq += score * score;
num_trials_done++;
ko.Stream() << key1 << ' ' << key2 << ' ' << score << std::endl;
}
}
计算对数似然比的函数:
nΨnΨ+Iu¯g n Ψ n Ψ + I u ¯ g
Ψ Ψ 是类内协方差矩阵(对角)的元素,维度为 dim(ivector)
double Plda::LogLikelihoodRatio(
const VectorBase<double> &transformed_train_ivector,
int32 n, // number of training utterances.
const VectorBase<double> &transformed_test_ivector) const {
int32 dim = Dim();
double loglike_given_class, loglike_without_class;
{ // work out loglike_given_class.
// "mean" will be the mean of the distribution if it comes from the
// training example. The mean is \frac{n \Psi}{n \Psi + I} \bar{u}^g
// "variance" will be the variance of that distribution, equal to
// I + \frac{\Psi}{n\Psi + I}.
Vector<double> mean(dim, kUndefined);
Vector<double> variance(dim, kUndefined);
for (int32 i = 0; i < dim; i++) {
mean(i) = n * psi_(i) / (n * psi_(i) + 1.0) * transformed_train_ivector(i);
variance(i) = 1.0 + psi_(i) / (n * psi_(i) + 1.0);
}
double logdet = variance.SumLog();
Vector<double> sqdiff(transformed_test_ivector);
sqdiff.AddVec(-1.0, mean);
sqdiff.ApplyPow(2.0);
variance.InvertElements();
loglike_given_class = -0.5 * (logdet + M_LOG_2PI * dim +
VecVec(sqdiff, variance));
}
{ // work out loglike_without_class. Here the mean is zero and the variance
// is I + \Psi.
Vector<double> sqdiff(transformed_test_ivector); // there is no offset.
sqdiff.ApplyPow(2.0);
Vector<double> variance(psi_);
variance.Add(1.0); // I + \Psi.
double logdet = variance.SumLog();
variance.InvertElements();
loglike_without_class = -0.5 * (logdet + M_LOG_2PI * dim +
VecVec(sqdiff, variance));
}
double loglike_ratio = loglike_given_class - loglike_without_class;
return loglike_ratio;
}
得到plda模型之后怎么获得 plda之后的特征向量呢? 下面自己的 python 实现:
pass