WebRTC之process_test.cc语音测试模块分析

CSND已永久停更,最新版唯一来源点击下面链接跳转:

语音增强和语音识别网页书

书籍已出版(完整版)淘宝、京东、当当有售。

WebRTC之process_test.cc语音测试模块分析_第1张图片

该文件所在目录:

src/webrtc/modules/audio_processing/test​​

从该函数的main看起:

 1153 int main(int argc, char* argv[]) {
1154   webrtc::void_main(argc, argv);
1155 
1156   // Optional, but removes memory leak noise from Valgrind.
1157   google::protobuf::ShutdownProtobufLibrary();
1158   return 0;
1159 }

该函数调用webrtc中的void_main函数进行处理,并且原封不动将传递进来的参数传递给该函数。

 ​140 void void_main(int argc, char* argv[]) {
...
 151   std::unique_ptr apm(AudioProcessing::Create());
 152   ASSERT_TRUE(apm.get() != NULL);
 153 
 154   const char* pb_filename = NULL;
 155   const char* far_filename = NULL;
 156   const char* near_filename = NULL;
 157   std::string out_filename;
 158   const char* vad_out_filename = NULL;
 159   const char* ns_prob_filename = NULL;
 160   const char* aecm_echo_path_in_filename = NULL;
 161   const char* aecm_echo_path_out_filename = NULL;
 162 
 163   int32_t sample_rate_hz = 16000;
 164 
 165   size_t num_capture_input_channels = 1;
 166   size_t num_capture_output_channels = 1;
 167   size_t num_render_channels = 1;
 168 
 169   int samples_per_channel = sample_rate_hz / 100;
 170 
 171   bool simulating = false;
 172   bool perf_testing = false;
 173   bool verbose = true;
 174   bool progress = true;

第151行使用了unique_ptr智能指针定义了AudioProcessing类的一个变量。APM是audio processing module的简称,该module为实时语音处理提供了各个功能组件。APM模块基于两波束逐帧工作。所有处理都要经过的那路语义流被成为primary stream,该路语音流将被送到|ProcessStream()|里。另一路被放置到|ProcessReverseStream()|,它们分别被称为near-end(capture)和far-end(render)

 

 

实际就是在求麦克风阵列所在平面的法向量,如果没有法向量,存在三个连续的麦克风在同一直线上,法向量不存在。
 ​rtc::Optional GetArrayNormalIfExists(
    const std::vector& array_geometry) {
%如果麦克风在一条线上,则direction的值非空
  const rtc::Optional direction = GetDirectionIfLinear(array_geometry);
  if (direction) {
    return rtc::Optional(Point(direction->y(), -direction->x(), 0.f));
  }
%如果麦克风不在一条线上,但是在一个平面上,则是面阵
  const rtc::Optional normal = GetNormalIfPlanar(array_geometry);
  if (normal && normal->z() < kMaxDotProduct) {
    return normal;
  }
  return rtc::Optional();
}

麦克风目标的球坐标表示如下:
struct SphericalPoint {
  SphericalPoint(T azimuth, T elevation, T radius) {
    s[0] = azimuth;
    s[1] = elevation;
    s[2] = radius;
  }
  T azimuth() const { return s[0]; }
  T elevation() const { return s[1]; }
  T distance() const { return s[2]; }
  T s[3];
};

 

 

void CovarianceMatrixGenerator::PhaseAlignmentMasks(
    size_t frequency_bin,
    size_t fft_size,
    int sample_rate,
    float sound_speed,
    const std::vector& geometry,
    float angle,
    ComplexMatrix* mat) {
  RTC_CHECK_EQ(1, mat->num_rows());
  RTC_CHECK_EQ(static_cast(geometry.size()), mat->num_columns());

  float freq_in_hertz =
      (static_cast(frequency_bin) / fft_size) * sample_rate;

  complex* const* mat_els = mat->elements();
  for (size_t c_ix = 0; c_ix < geometry.size(); ++c_ix) {
    float distance = std::cos(angle) * geometry[c_ix].x() +
                     std::sin(angle) * geometry[c_ix].y();
    float phase_shift = -2.f * M_PI * distance * freq_in_hertz / sound_speed;

    // Euler's formula for mat[0][c_ix] = e^(j * phase_shift).
    mat_els[0][c_ix] = complex(cos(phase_shift), sin(phase_shift));
  }
}

 

 

 ​phase_shift = -2.f * M_PI * distance * freq_in_hertz / sound_speed;​

=2*pi*freq_in_hertz*(distance/sound_speed)

 ​void CovarianceMatrixGenerator::UniformCovarianceMatrix(
    float wave_number,
    const std::vector& geometry,
    ComplexMatrix* mat) {
  RTC_CHECK_EQ(static_cast(geometry.size()), mat->num_rows());
  RTC_CHECK_EQ(static_cast(geometry.size()), mat->num_columns());

  complex* const* mat_els = mat->elements();
  for (size_t i = 0; i < geometry.size(); ++i) {
    for (size_t j = 0; j < geometry.size(); ++j) {
      if (wave_number > 0.f) {
        mat_els[i][j] =
            BesselJ0(wave_number * Distance(geometry[i], geometry[j]));
      } else {
        mat_els[i][j] = i == j ? 1.f : 0.f;
      }
    }
  }
}

其调用函数是

 ​    CovarianceMatrixGenerator::UniformCovarianceMatrix(
        wave_numbers_[i], array_geometry_, &uniform_cov_mat_[i]);

高频带宽的求解:

 ​void NonlinearBeamformer::InitHighFrequencyCorrectionRanges() {
  const float kAliasingFreqHz =
      kSpeedOfSoundMeterSeconds /
      (min_mic_spacing_ * (1.f + std::abs(std::cos(target_angle_radians_))));
  const float kHighMeanStartHz = std::min(0.5f *  kAliasingFreqHz,
                                          sample_rate_hz_ / 2.f);
  const float kHighMeanEndHz = std::min(0.75f *  kAliasingFreqHz,
                                        sample_rate_hz_ / 2.f);
  high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
  high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);

  RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
  RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
  RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);
}

空域混叠要求阵元间距d

void CovarianceMatrixGenerator::AngledCovarianceMatrix(
    float sound_speed,
    float angle,
    size_t frequency_bin,
    size_t fft_size,
    size_t num_freq_bins,
    int sample_rate,
    const std::vector& geometry,
    ComplexMatrix* mat) {
  RTC_CHECK_EQ(static_cast(geometry.size()), mat->num_rows());
  RTC_CHECK_EQ(static_cast(geometry.size()), mat->num_columns());

  ComplexMatrix interf_cov_vector(1, geometry.size());
  ComplexMatrix interf_cov_vector_transposed(geometry.size(), 1);
  PhaseAlignmentMasks(frequency_bin,
                      fft_size,
                      sample_rate,
                      sound_speed,
                      geometry,
                      angle,
                      &interf_cov_vector);
  interf_cov_vector.Scale(1.f / Norm(interf_cov_vector));
  interf_cov_vector_transposed.Transpose(interf_cov_vector);
  interf_cov_vector.PointwiseConjugate();
  mat->Multiply(interf_cov_vector_transposed, interf_cov_vector);
}

  weighted average 加权平均

 

 

你可能感兴趣的:(语音识别)