aec_state.cc - mozsearch

mozilla-central/third_party/libwebrtc/modules/audio_processing/aec3/aec_state.cc (file symbol)

Enable keyboard shortcuts

Source code

Revision control

Copy as Markdown

Other Tools

/*

 *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.

 *  Use of this source code is governed by a BSD-style license

 *  that can be found in the LICENSE file in the root of the source

 *  tree. An additional intellectual property rights grant can be found

 *  in the file PATENTS.  All contributing project authors may

 *  be found in the AUTHORS file in the root of the source tree.

*/

#include "modules/audio_processing/aec3/aec_state.h"

#include <math.h>

#include <algorithm>

#include <numeric>

#include <vector>

#include "absl/types/optional.h"

#include "api/array_view.h"

#include "modules/audio_processing/aec3/aec3_common.h"

#include "modules/audio_processing/logging/apm_data_dumper.h"

#include "rtc_base/checks.h"

#include "system_wrappers/include/field_trial.h"

namespace webrtc {

namespace {

bool DeactivateInitialStateResetAtEchoPathChange() {

  return field_trial::IsEnabled(

      "WebRTC-Aec3DeactivateInitialStateResetKillSwitch");

bool FullResetAtEchoPathChange() {

  return !field_trial::IsEnabled("WebRTC-Aec3AecStateFullResetKillSwitch");

bool SubtractorAnalyzerResetAtEchoPathChange() {

  return !field_trial::IsEnabled(

      "WebRTC-Aec3AecStateSubtractorAnalyzerResetKillSwitch");

void ComputeAvgRenderReverb(

    const SpectrumBuffer& spectrum_buffer,

    int delay_blocks,

    float reverb_decay,

    ReverbModel* reverb_model,

    rtc::ArrayView<float, kFftLengthBy2Plus1> reverb_power_spectrum) {

  RTC_DCHECK(reverb_model);

  const size_t num_render_channels = spectrum_buffer.buffer[0].size();

  int idx_at_delay =

      spectrum_buffer.OffsetIndex(spectrum_buffer.read, delay_blocks);

  int idx_past = spectrum_buffer.IncIndex(idx_at_delay);

  std::array<float, kFftLengthBy2Plus1> X2_data;

  rtc::ArrayView<const float> X2;

  if (num_render_channels > 1) {

    auto average_channels =

        [](size_t num_render_channels,

           rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>

               spectrum_band_0,

           rtc::ArrayView<float, kFftLengthBy2Plus1> render_power) {

          std::fill(render_power.begin(), render_power.end(), 0.f);

          for (size_t ch = 0; ch < num_render_channels; ++ch) {

            for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {

              render_power[k] += spectrum_band_0[ch][k];

          const float normalizer = 1.f / num_render_channels;

          for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {

            render_power[k] *= normalizer;

};

    average_channels(num_render_channels, spectrum_buffer.buffer[idx_past],

                     X2_data);

    reverb_model->UpdateReverbNoFreqShaping(

        X2_data, /*power_spectrum_scaling=*/1.0f, reverb_decay);

    average_channels(num_render_channels, spectrum_buffer.buffer[idx_at_delay],

                     X2_data);

    X2 = X2_data;

  } else {

    reverb_model->UpdateReverbNoFreqShaping(

        spectrum_buffer.buffer[idx_past][/*channel=*/0],

        /*power_spectrum_scaling=*/1.0f, reverb_decay);

    X2 = spectrum_buffer.buffer[idx_at_delay][/*channel=*/0];

  rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb_power =

      reverb_model->reverb();

  for (size_t k = 0; k < X2.size(); ++k) {

    reverb_power_spectrum[k] = X2[k] + reverb_power[k];

}  // namespace

std::atomic<int> AecState::instance_count_(0);

void AecState::GetResidualEchoScaling(

    rtc::ArrayView<float> residual_scaling) const {

  bool filter_has_had_time_to_converge;

  if (config_.filter.conservative_initial_phase) {

    filter_has_had_time_to_converge =

        strong_not_saturated_render_blocks_ >= 1.5f * kNumBlocksPerSecond;

  } else {

    filter_has_had_time_to_converge =

        strong_not_saturated_render_blocks_ >= 0.8f * kNumBlocksPerSecond;

  echo_audibility_.GetResidualEchoScaling(filter_has_had_time_to_converge,

                                          residual_scaling);

AecState::AecState(const EchoCanceller3Config& config,

                   size_t num_capture_channels)

    : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),

      config_(config),

      num_capture_channels_(num_capture_channels),

      deactivate_initial_state_reset_at_echo_path_change_(

          DeactivateInitialStateResetAtEchoPathChange()),

      full_reset_at_echo_path_change_(FullResetAtEchoPathChange()),

      subtractor_analyzer_reset_at_echo_path_change_(

          SubtractorAnalyzerResetAtEchoPathChange()),

      initial_state_(config_),

      delay_state_(config_, num_capture_channels_),

      transparent_state_(TransparentMode::Create(config_)),

      filter_quality_state_(config_, num_capture_channels_),

      erl_estimator_(2 * kNumBlocksPerSecond),

      erle_estimator_(2 * kNumBlocksPerSecond, config_, num_capture_channels_),

      filter_analyzer_(config_, num_capture_channels_),

      echo_audibility_(

          config_.echo_audibility.use_stationarity_properties_at_init),

      reverb_model_estimator_(config_, num_capture_channels_),

      subtractor_output_analyzer_(num_capture_channels_) {}

AecState::~AecState() = default;

void AecState::HandleEchoPathChange(

    const EchoPathVariability& echo_path_variability) {

  const auto full_reset = [&]() {

    filter_analyzer_.Reset();

    capture_signal_saturation_ = false;

    strong_not_saturated_render_blocks_ = 0;

    blocks_with_active_render_ = 0;

    if (!deactivate_initial_state_reset_at_echo_path_change_) {

      initial_state_.Reset();

    if (transparent_state_) {

      transparent_state_->Reset();

    erle_estimator_.Reset(true);

    erl_estimator_.Reset();

    filter_quality_state_.Reset();

};

  // TODO(peah): Refine the reset scheme according to the type of gain and

  // delay adjustment.

  if (full_reset_at_echo_path_change_ &&

      echo_path_variability.delay_change !=

          EchoPathVariability::DelayAdjustment::kNone) {

    full_reset();

  } else if (echo_path_variability.gain_change) {

    erle_estimator_.Reset(false);

  if (subtractor_analyzer_reset_at_echo_path_change_) {

    subtractor_output_analyzer_.HandleEchoPathChange();

void AecState::Update(

    const absl::optional<DelayEstimate>& external_delay,

    rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>

        adaptive_filter_frequency_responses,

    rtc::ArrayView<const std::vector<float>> adaptive_filter_impulse_responses,

    const RenderBuffer& render_buffer,

    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2_refined,

    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,

    rtc::ArrayView<const SubtractorOutput> subtractor_output) {

  RTC_DCHECK_EQ(num_capture_channels_, Y2.size());

  RTC_DCHECK_EQ(num_capture_channels_, subtractor_output.size());

  RTC_DCHECK_EQ(num_capture_channels_,

                adaptive_filter_frequency_responses.size());

  RTC_DCHECK_EQ(num_capture_channels_,

                adaptive_filter_impulse_responses.size());

  // Analyze the filter outputs and filters.

  bool any_filter_converged;

  bool any_coarse_filter_converged;

  bool all_filters_diverged;

  subtractor_output_analyzer_.Update(subtractor_output, &any_filter_converged,

                                     &any_coarse_filter_converged,

                                     &all_filters_diverged);

  bool any_filter_consistent;

  float max_echo_path_gain;

  filter_analyzer_.Update(adaptive_filter_impulse_responses, render_buffer,

                          &any_filter_consistent, &max_echo_path_gain);

  // Estimate the direct path delay of the filter.

  if (config_.filter.use_linear_filter) {

    delay_state_.Update(filter_analyzer_.FilterDelaysBlocks(), external_delay,

                        strong_not_saturated_render_blocks_);

  const Block& aligned_render_block =

      render_buffer.GetBlock(-delay_state_.MinDirectPathFilterDelay());

  // Update render counters.

  bool active_render = false;

  for (int ch = 0; ch < aligned_render_block.NumChannels(); ++ch) {

    const float render_energy =

        std::inner_product(aligned_render_block.begin(/*block=*/0, ch),

                           aligned_render_block.end(/*block=*/0, ch),

                           aligned_render_block.begin(/*block=*/0, ch), 0.f);

    if (render_energy > (config_.render_levels.active_render_limit *

                         config_.render_levels.active_render_limit) *

                            kFftLengthBy2) {

      active_render = true;

      break;

  blocks_with_active_render_ += active_render ? 1 : 0;

  strong_not_saturated_render_blocks_ +=

      active_render && !SaturatedCapture() ? 1 : 0;

  std::array<float, kFftLengthBy2Plus1> avg_render_spectrum_with_reverb;

  ComputeAvgRenderReverb(render_buffer.GetSpectrumBuffer(),

                         delay_state_.MinDirectPathFilterDelay(),

                         ReverbDecay(/*mild=*/false), &avg_render_reverb_,

                         avg_render_spectrum_with_reverb);

  if (config_.echo_audibility.use_stationarity_properties) {

    // Update the echo audibility evaluator.

    echo_audibility_.Update(render_buffer, avg_render_reverb_.reverb(),

                            delay_state_.MinDirectPathFilterDelay(),

                            delay_state_.ExternalDelayReported());

  // Update the ERL and ERLE measures.

  if (initial_state_.TransitionTriggered()) {

    erle_estimator_.Reset(false);

  erle_estimator_.Update(render_buffer, adaptive_filter_frequency_responses,

                         avg_render_spectrum_with_reverb, Y2, E2_refined,

                         subtractor_output_analyzer_.ConvergedFilters());

  erl_estimator_.Update(

      subtractor_output_analyzer_.ConvergedFilters(),

      render_buffer.Spectrum(delay_state_.MinDirectPathFilterDelay()), Y2);

  // Detect and flag echo saturation.

  if (config_.ep_strength.echo_can_saturate) {

    saturation_detector_.Update(aligned_render_block, SaturatedCapture(),

                                UsableLinearEstimate(), subtractor_output,

                                max_echo_path_gain);

  } else {

    RTC_DCHECK(!saturation_detector_.SaturatedEcho());

  // Update the decision on whether to use the initial state parameter set.

  initial_state_.Update(active_render, SaturatedCapture());

  // Detect whether the transparent mode should be activated.

  if (transparent_state_) {

    transparent_state_->Update(

        delay_state_.MinDirectPathFilterDelay(), any_filter_consistent,

        any_filter_converged, any_coarse_filter_converged, all_filters_diverged,

        active_render, SaturatedCapture());

  // Analyze the quality of the filter.

  filter_quality_state_.Update(active_render, TransparentModeActive(),

                               SaturatedCapture(), external_delay,

                               any_filter_converged);

  // Update the reverb estimate.

  const bool stationary_block =

      config_.echo_audibility.use_stationarity_properties &&

      echo_audibility_.IsBlockStationary();

  reverb_model_estimator_.Update(

      filter_analyzer_.GetAdjustedFilters(),

      adaptive_filter_frequency_responses,

      erle_estimator_.GetInstLinearQualityEstimates(),

      delay_state_.DirectPathFilterDelays(),

      filter_quality_state_.UsableLinearFilterOutputs(), stationary_block);

  erle_estimator_.Dump(data_dumper_);

  reverb_model_estimator_.Dump(data_dumper_.get());

  data_dumper_->DumpRaw("aec3_active_render", active_render);

  data_dumper_->DumpRaw("aec3_erl", Erl());

  data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain());

  data_dumper_->DumpRaw("aec3_erle", Erle(/*onset_compensated=*/false)[0]);

  data_dumper_->DumpRaw("aec3_erle_onset_compensated",

                        Erle(/*onset_compensated=*/true)[0]);

  data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate());

  data_dumper_->DumpRaw("aec3_transparent_mode", TransparentModeActive());

  data_dumper_->DumpRaw("aec3_filter_delay",

                        filter_analyzer_.MinFilterDelayBlocks());

  data_dumper_->DumpRaw("aec3_any_filter_consistent", any_filter_consistent);

  data_dumper_->DumpRaw("aec3_initial_state",

                        initial_state_.InitialStateActive());

  data_dumper_->DumpRaw("aec3_capture_saturation", SaturatedCapture());

  data_dumper_->DumpRaw("aec3_echo_saturation", SaturatedEcho());

  data_dumper_->DumpRaw("aec3_any_filter_converged", any_filter_converged);

  data_dumper_->DumpRaw("aec3_any_coarse_filter_converged",

                        any_coarse_filter_converged);

  data_dumper_->DumpRaw("aec3_all_filters_diverged", all_filters_diverged);

  data_dumper_->DumpRaw("aec3_external_delay_avaliable",

                        external_delay ? 1 : 0);

  data_dumper_->DumpRaw("aec3_filter_tail_freq_resp_est",

                        GetReverbFrequencyResponse());

  data_dumper_->DumpRaw("aec3_subtractor_y2", subtractor_output[0].y2);

  data_dumper_->DumpRaw("aec3_subtractor_e2_coarse",

                        subtractor_output[0].e2_coarse);

  data_dumper_->DumpRaw("aec3_subtractor_e2_refined",

                        subtractor_output[0].e2_refined);

AecState::InitialState::InitialState(const EchoCanceller3Config& config)

    : conservative_initial_phase_(config.filter.conservative_initial_phase),

      initial_state_seconds_(config.filter.initial_state_seconds) {

  Reset();

void AecState::InitialState::InitialState::Reset() {

  initial_state_ = true;

  strong_not_saturated_render_blocks_ = 0;

void AecState::InitialState::InitialState::Update(bool active_render,

                                                  bool saturated_capture) {

  strong_not_saturated_render_blocks_ +=

      active_render && !saturated_capture ? 1 : 0;

  // Flag whether the initial state is still active.

  bool prev_initial_state = initial_state_;

  if (conservative_initial_phase_) {

    initial_state_ =

        strong_not_saturated_render_blocks_ < 5 * kNumBlocksPerSecond;

  } else {

    initial_state_ = strong_not_saturated_render_blocks_ <

                     initial_state_seconds_ * kNumBlocksPerSecond;

  // Flag whether the transition from the initial state has started.

  transition_triggered_ = !initial_state_ && prev_initial_state;

AecState::FilterDelay::FilterDelay(const EchoCanceller3Config& config,

                                   size_t num_capture_channels)

    : delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize),

      filter_delays_blocks_(num_capture_channels, delay_headroom_blocks_),

      min_filter_delay_(delay_headroom_blocks_) {}

void AecState::FilterDelay::Update(

    rtc::ArrayView<const int> analyzer_filter_delay_estimates_blocks,

    const absl::optional<DelayEstimate>& external_delay,

    size_t blocks_with_proper_filter_adaptation) {

  // Update the delay based on the external delay.

  if (external_delay &&

      (!external_delay_ || external_delay_->delay != external_delay->delay)) {

    external_delay_ = external_delay;

    external_delay_reported_ = true;

  // Override the estimated delay if it is not certain that the filter has had

  // time to converge.

  const bool delay_estimator_may_not_have_converged =

      blocks_with_proper_filter_adaptation < 2 * kNumBlocksPerSecond;

  if (delay_estimator_may_not_have_converged && external_delay_) {

    const int delay_guess = delay_headroom_blocks_;

    std::fill(filter_delays_blocks_.begin(), filter_delays_blocks_.end(),

              delay_guess);

  } else {

    RTC_DCHECK_EQ(filter_delays_blocks_.size(),

                  analyzer_filter_delay_estimates_blocks.size());

    std::copy(analyzer_filter_delay_estimates_blocks.begin(),

              analyzer_filter_delay_estimates_blocks.end(),

              filter_delays_blocks_.begin());

  min_filter_delay_ = *std::min_element(filter_delays_blocks_.begin(),

                                        filter_delays_blocks_.end());

AecState::FilteringQualityAnalyzer::FilteringQualityAnalyzer(

    const EchoCanceller3Config& config,

    size_t num_capture_channels)

    : use_linear_filter_(config.filter.use_linear_filter),

      usable_linear_filter_estimates_(num_capture_channels, false) {}

void AecState::FilteringQualityAnalyzer::Reset() {

  std::fill(usable_linear_filter_estimates_.begin(),

            usable_linear_filter_estimates_.end(), false);

  overall_usable_linear_estimates_ = false;

  filter_update_blocks_since_reset_ = 0;

void AecState::FilteringQualityAnalyzer::Update(

    bool active_render,

    bool transparent_mode,

    bool saturated_capture,

    const absl::optional<DelayEstimate>& external_delay,

    bool any_filter_converged) {

  // Update blocks counter.

  const bool filter_update = active_render && !saturated_capture;

  filter_update_blocks_since_reset_ += filter_update ? 1 : 0;

  filter_update_blocks_since_start_ += filter_update ? 1 : 0;

  // Store convergence flag when observed.

  convergence_seen_ = convergence_seen_ || any_filter_converged;

  // Verify requirements for achieving a decent filter. The requirements for

  // filter adaptation at call startup are more restrictive than after an

  // in-call reset.

  const bool sufficient_data_to_converge_at_startup =

      filter_update_blocks_since_start_ > kNumBlocksPerSecond * 0.4f;

  const bool sufficient_data_to_converge_at_reset =

      sufficient_data_to_converge_at_startup &&

      filter_update_blocks_since_reset_ > kNumBlocksPerSecond * 0.2f;

  // The linear filter can only be used if it has had time to converge.

  overall_usable_linear_estimates_ = sufficient_data_to_converge_at_startup &&

                                     sufficient_data_to_converge_at_reset;

  // The linear filter can only be used if an external delay or convergence have

  // been identified

  overall_usable_linear_estimates_ =

      overall_usable_linear_estimates_ && (external_delay || convergence_seen_);

  // If transparent mode is on, deactivate usign the linear filter.

  overall_usable_linear_estimates_ =

      overall_usable_linear_estimates_ && !transparent_mode;

  if (use_linear_filter_) {

    std::fill(usable_linear_filter_estimates_.begin(),

              usable_linear_filter_estimates_.end(),

              overall_usable_linear_estimates_);

void AecState::SaturationDetector::Update(

    const Block& x,

    bool saturated_capture,

    bool usable_linear_estimate,

    rtc::ArrayView<const SubtractorOutput> subtractor_output,

    float echo_path_gain) {

  saturated_echo_ = false;

  if (!saturated_capture) {

    return;

  if (usable_linear_estimate) {

    constexpr float kSaturationThreshold = 20000.f;

    for (size_t ch = 0; ch < subtractor_output.size(); ++ch) {

      saturated_echo_ =

          saturated_echo_ ||

          (subtractor_output[ch].s_refined_max_abs > kSaturationThreshold ||

           subtractor_output[ch].s_coarse_max_abs > kSaturationThreshold);

  } else {

    float max_sample = 0.f;

    for (int ch = 0; ch < x.NumChannels(); ++ch) {

      rtc::ArrayView<const float, kBlockSize> x_ch = x.View(/*band=*/0, ch);

      for (float sample : x_ch) {

        max_sample = std::max(max_sample, fabsf(sample));

    const float kMargin = 10.f;

    float peak_echo_amplitude = max_sample * echo_path_gain * kMargin;

    saturated_echo_ = saturated_echo_ || peak_echo_amplitude > 32000;

}  // namespace webrtc