CSND已永久停更,最新版唯一来源点击下面链接跳转:
语音增强和语音识别网页书
书籍已出版(完整版)淘宝、京东、当当有售。
该文件所在目录:
src/webrtc/modules/audio_processing/test
从该函数的main看起:
1153 int main(int argc, char* argv[]) {
1154 webrtc::void_main(argc, argv);
1155
1156 // Optional, but removes memory leak noise from Valgrind.
1157 google::protobuf::ShutdownProtobufLibrary();
1158 return 0;
1159 }
该函数调用webrtc中的void_main函数进行处理,并且原封不动将传递进来的参数传递给该函数。
140 void void_main(int argc, char* argv[]) {
...
151 std::unique_ptr apm(AudioProcessing::Create());
152 ASSERT_TRUE(apm.get() != NULL);
153
154 const char* pb_filename = NULL;
155 const char* far_filename = NULL;
156 const char* near_filename = NULL;
157 std::string out_filename;
158 const char* vad_out_filename = NULL;
159 const char* ns_prob_filename = NULL;
160 const char* aecm_echo_path_in_filename = NULL;
161 const char* aecm_echo_path_out_filename = NULL;
162
163 int32_t sample_rate_hz = 16000;
164
165 size_t num_capture_input_channels = 1;
166 size_t num_capture_output_channels = 1;
167 size_t num_render_channels = 1;
168
169 int samples_per_channel = sample_rate_hz / 100;
170
171 bool simulating = false;
172 bool perf_testing = false;
173 bool verbose = true;
174 bool progress = true;
第151行使用了unique_ptr智能指针定义了AudioProcessing类的一个变量。APM是audio processing module的简称,该module为实时语音处理提供了各个功能组件。APM模块基于两波束逐帧工作。所有处理都要经过的那路语义流被成为primary stream,该路语音流将被送到|ProcessStream()|里。另一路被放置到|ProcessReverseStream()|,它们分别被称为near-end(capture)和far-end(render)
实际就是在求麦克风阵列所在平面的法向量,如果没有法向量,存在三个连续的麦克风在同一直线上,法向量不存在。
rtc::Optional GetArrayNormalIfExists(
const std::vector& array_geometry) {
%如果麦克风在一条线上,则direction的值非空
const rtc::Optional direction = GetDirectionIfLinear(array_geometry);
if (direction) {
return rtc::Optional(Point(direction->y(), -direction->x(), 0.f));
}
%如果麦克风不在一条线上,但是在一个平面上,则是面阵
const rtc::Optional normal = GetNormalIfPlanar(array_geometry);
if (normal && normal->z() < kMaxDotProduct) {
return normal;
}
return rtc::Optional();
}
麦克风目标的球坐标表示如下:
struct SphericalPoint {
SphericalPoint(T azimuth, T elevation, T radius) {
s[0] = azimuth;
s[1] = elevation;
s[2] = radius;
}
T azimuth() const { return s[0]; }
T elevation() const { return s[1]; }
T distance() const { return s[2]; }
T s[3];
};
void CovarianceMatrixGenerator::PhaseAlignmentMasks(
size_t frequency_bin,
size_t fft_size,
int sample_rate,
float sound_speed,
const std::vector& geometry,
float angle,
ComplexMatrix* mat) {
RTC_CHECK_EQ(1, mat->num_rows());
RTC_CHECK_EQ(static_cast(geometry.size()), mat->num_columns());
float freq_in_hertz =
(static_cast(frequency_bin) / fft_size) * sample_rate;
complex* const* mat_els = mat->elements();
for (size_t c_ix = 0; c_ix < geometry.size(); ++c_ix) {
float distance = std::cos(angle) * geometry[c_ix].x() +
std::sin(angle) * geometry[c_ix].y();
float phase_shift = -2.f * M_PI * distance * freq_in_hertz / sound_speed;
// Euler's formula for mat[0][c_ix] = e^(j * phase_shift).
mat_els[0][c_ix] = complex(cos(phase_shift), sin(phase_shift));
}
}
phase_shift = -2.f * M_PI * distance * freq_in_hertz / sound_speed;
=2*pi*freq_in_hertz*(distance/sound_speed)
void CovarianceMatrixGenerator::UniformCovarianceMatrix(
float wave_number,
const std::vector& geometry,
ComplexMatrix* mat) {
RTC_CHECK_EQ(static_cast(geometry.size()), mat->num_rows());
RTC_CHECK_EQ(static_cast(geometry.size()), mat->num_columns());
complex* const* mat_els = mat->elements();
for (size_t i = 0; i < geometry.size(); ++i) {
for (size_t j = 0; j < geometry.size(); ++j) {
if (wave_number > 0.f) {
mat_els[i][j] =
BesselJ0(wave_number * Distance(geometry[i], geometry[j]));
} else {
mat_els[i][j] = i == j ? 1.f : 0.f;
}
}
}
}
其调用函数是
CovarianceMatrixGenerator::UniformCovarianceMatrix(
wave_numbers_[i], array_geometry_, &uniform_cov_mat_[i]);
高频带宽的求解:
void NonlinearBeamformer::InitHighFrequencyCorrectionRanges() {
const float kAliasingFreqHz =
kSpeedOfSoundMeterSeconds /
(min_mic_spacing_ * (1.f + std::abs(std::cos(target_angle_radians_))));
const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz,
sample_rate_hz_ / 2.f);
const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz,
sample_rate_hz_ / 2.f);
high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);
}
空域混叠要求阵元间距d weighted average 加权平均 void CovarianceMatrixGenerator::AngledCovarianceMatrix(
float sound_speed,
float angle,
size_t frequency_bin,
size_t fft_size,
size_t num_freq_bins,
int sample_rate,
const std::vector