frame_combiner_unittest.cc

/*

 *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.

 *  Use of this source code is governed by a BSD-style license

 *  that can be found in the LICENSE file in the root of the source

 *  tree. An additional intellectual property rights grant can be found

 *  in the file PATENTS.  All contributing project authors may

 *  be found in the AUTHORS file in the root of the source tree.

*/

#include "modules/audio_mixer/frame_combiner.h"

#include <cstdint>

#include <initializer_list>

#include <numeric>

#include <string>

#include <type_traits>

#include <vector>

#include "absl/types/optional.h"

#include "api/array_view.h"

#include "api/rtp_packet_info.h"

#include "api/rtp_packet_infos.h"

#include "api/units/timestamp.h"

#include "audio/utility/audio_frame_operations.h"

#include "modules/audio_mixer/gain_change_calculator.h"

#include "modules/audio_mixer/sine_wave_generator.h"

#include "rtc_base/checks.h"

#include "rtc_base/strings/string_builder.h"

#include "test/gmock.h"

#include "test/gtest.h"

namespace webrtc {

namespace {

using ::testing::ElementsAreArray;

using ::testing::IsEmpty;

using ::testing::UnorderedElementsAreArray;

struct FrameCombinerConfig {

  bool use_limiter;

  int sample_rate_hz;

  int number_of_channels;

  float wave_frequency;

};

std::string ProduceDebugText(int sample_rate_hz,

                             int number_of_channels,

                             int number_of_sources) {

  rtc::StringBuilder ss;

  ss << "Sample rate: " << sample_rate_hz << " ,";

  ss << "number of channels: " << number_of_channels << " ,";

  ss << "number of sources: " << number_of_sources;

  return ss.Release();

std::string ProduceDebugText(const FrameCombinerConfig& config) {

  rtc::StringBuilder ss;

  ss << "Sample rate: " << config.sample_rate_hz << " ,";

  ss << "number of channels: " << config.number_of_channels << " ,";

  ss << "limiter active: " << (config.use_limiter ? "on" : "off") << " ,";

  ss << "wave frequency: " << config.wave_frequency << " ,";

  return ss.Release();

AudioFrame frame1;

AudioFrame frame2;

void SetUpFrames(int sample_rate_hz, int number_of_channels) {

  RtpPacketInfo packet_info1(/*ssrc=*/1001, /*csrcs=*/{},

                             /*rtp_timestamp=*/1000,

                             /*receive_time=*/Timestamp::Millis(1));

  RtpPacketInfo packet_info2(/*ssrc=*/4004, /*csrcs=*/{},

                             /*rtp_timestamp=*/1234,

                             /*receive_time=*/Timestamp::Millis(2));

  RtpPacketInfo packet_info3(/*ssrc=*/7007, /*csrcs=*/{},

                             /*rtp_timestamp=*/1333,

                             /*receive_time=*/Timestamp::Millis(2));

  frame1.packet_infos_ = RtpPacketInfos({packet_info1});

  frame2.packet_infos_ = RtpPacketInfos({packet_info2, packet_info3});

  for (auto* frame : {&frame1, &frame2}) {

    frame->UpdateFrame(0, nullptr, rtc::CheckedDivExact(sample_rate_hz, 100),

                       sample_rate_hz, AudioFrame::kNormalSpeech,

                       AudioFrame::kVadActive, number_of_channels);

}  // namespace

// The limiter requires sample rate divisible by 2000.

TEST(FrameCombiner, BasicApiCallsLimiter) {

  FrameCombiner combiner(true);

  for (const int rate : {8000, 18000, 34000, 48000}) {

    for (const int number_of_channels : {1, 2, 4, 8}) {

      const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};

      SetUpFrames(rate, number_of_channels);

      for (const int number_of_frames : {0, 1, 2}) {

        SCOPED_TRACE(

            ProduceDebugText(rate, number_of_channels, number_of_frames));

        const std::vector<AudioFrame*> frames_to_combine(

            all_frames.begin(), all_frames.begin() + number_of_frames);

        AudioFrame audio_frame_for_mixing;

        combiner.Combine(frames_to_combine, number_of_channels, rate,

                         frames_to_combine.size(), &audio_frame_for_mixing);

// The RtpPacketInfos field of the mixed packet should contain the union of the

// RtpPacketInfos from the frames that were actually mixed.

TEST(FrameCombiner, ContainsAllRtpPacketInfos) {

  static constexpr int kSampleRateHz = 48000;

  static constexpr int kNumChannels = 1;

  FrameCombiner combiner(true);

  const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};

  SetUpFrames(kSampleRateHz, kNumChannels);

  for (const int number_of_frames : {0, 1, 2}) {

    SCOPED_TRACE(

        ProduceDebugText(kSampleRateHz, kNumChannels, number_of_frames));

    const std::vector<AudioFrame*> frames_to_combine(

        all_frames.begin(), all_frames.begin() + number_of_frames);

    std::vector<RtpPacketInfo> packet_infos;

    for (const auto& frame : frames_to_combine) {

      packet_infos.insert(packet_infos.end(), frame->packet_infos_.begin(),

                          frame->packet_infos_.end());

    AudioFrame audio_frame_for_mixing;

    combiner.Combine(frames_to_combine, kNumChannels, kSampleRateHz,

                     frames_to_combine.size(), &audio_frame_for_mixing);

    EXPECT_THAT(audio_frame_for_mixing.packet_infos_,

                UnorderedElementsAreArray(packet_infos));

#if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)

// There are CHECKs in place to check for invalid parameters.

TEST(FrameCombinerDeathTest, BuildCrashesWithManyChannels) {

  FrameCombiner combiner(true);

  for (const int rate : {8000, 18000, 34000, 48000}) {

    for (const int number_of_channels : {10, 20, 21}) {

      if (static_cast<size_t>(rate / 100 * number_of_channels) >

          AudioFrame::kMaxDataSizeSamples) {

        continue;

      const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};

      // With an unsupported channel count, this will crash in

      // `AudioFrame::UpdateFrame`.

      EXPECT_DEATH(SetUpFrames(rate, number_of_channels), "");

      const int number_of_frames = 2;

      SCOPED_TRACE(

          ProduceDebugText(rate, number_of_channels, number_of_frames));

      const std::vector<AudioFrame*> frames_to_combine(

          all_frames.begin(), all_frames.begin() + number_of_frames);

      AudioFrame audio_frame_for_mixing;

      EXPECT_DEATH(

          combiner.Combine(frames_to_combine, number_of_channels, rate,

                           frames_to_combine.size(), &audio_frame_for_mixing),

          "");

#endif  // GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)

TEST(FrameCombinerDeathTest, DebugBuildCrashesWithHighRate) {

  FrameCombiner combiner(true);

  for (const int rate : {50000, 96000, 128000, 196000}) {

    for (const int number_of_channels : {1, 2, 3}) {

      if (static_cast<size_t>(rate / 100 * number_of_channels) >

          AudioFrame::kMaxDataSizeSamples) {

        continue;

      const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};

      SetUpFrames(rate, number_of_channels);

      const int number_of_frames = 2;

      SCOPED_TRACE(

          ProduceDebugText(rate, number_of_channels, number_of_frames));

      const std::vector<AudioFrame*> frames_to_combine(

          all_frames.begin(), all_frames.begin() + number_of_frames);

      AudioFrame audio_frame_for_mixing;

#if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)

      EXPECT_DEATH(

          combiner.Combine(frames_to_combine, number_of_channels, rate,

                           frames_to_combine.size(), &audio_frame_for_mixing),

"")

          << "number_of_channels=" << number_of_channels << ", rate=" << rate

          << ", frames to combine=" << frames_to_combine.size();

#endif

// With no limiter, the rate has to be divisible by 100 since we use

// 10 ms frames.

TEST(FrameCombiner, BasicApiCallsNoLimiter) {

  FrameCombiner combiner(false);

  for (const int rate : {8000, 10000, 11000, 32000, 44100}) {

    for (const int number_of_channels : {1, 2, 4, 8}) {

      const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};

      SetUpFrames(rate, number_of_channels);

      for (const int number_of_frames : {0, 1, 2}) {

        SCOPED_TRACE(

            ProduceDebugText(rate, number_of_channels, number_of_frames));

        const std::vector<AudioFrame*> frames_to_combine(

            all_frames.begin(), all_frames.begin() + number_of_frames);

        AudioFrame audio_frame_for_mixing;

        combiner.Combine(frames_to_combine, number_of_channels, rate,

                         frames_to_combine.size(), &audio_frame_for_mixing);

TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) {

  FrameCombiner combiner(false);

  for (const int rate : {8000, 10000, 11000, 32000, 44100}) {

    for (const int number_of_channels : {1, 2}) {

      SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 0));

      AudioFrame audio_frame_for_mixing;

      const std::vector<AudioFrame*> frames_to_combine;

      combiner.Combine(frames_to_combine, number_of_channels, rate,

                       frames_to_combine.size(), &audio_frame_for_mixing);

      const int16_t* audio_frame_for_mixing_data =

          audio_frame_for_mixing.data();

      const std::vector<int16_t> mixed_data(

          audio_frame_for_mixing_data,

          audio_frame_for_mixing_data + number_of_channels * rate / 100);

      const std::vector<int16_t> expected(number_of_channels * rate / 100, 0);

      EXPECT_EQ(mixed_data, expected);

      EXPECT_THAT(audio_frame_for_mixing.packet_infos_, IsEmpty());

TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) {

  FrameCombiner combiner(false);

  for (const int rate : {8000, 10000, 11000, 32000, 44100}) {

    // kMaxConcurrentChannels is 8.

    for (const int number_of_channels : {1, 2, 4, kMaxConcurrentChannels}) {

      SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1));

      AudioFrame audio_frame_for_mixing;

      SetUpFrames(rate, number_of_channels);

      int16_t* frame1_data = frame1.mutable_data();

      std::iota(frame1_data, frame1_data + number_of_channels * rate / 100, 0);

      const std::vector<AudioFrame*> frames_to_combine = {&frame1};

      combiner.Combine(frames_to_combine, number_of_channels, rate,

                       frames_to_combine.size(), &audio_frame_for_mixing);

      const int16_t* audio_frame_for_mixing_data =

          audio_frame_for_mixing.data();

      const std::vector<int16_t> mixed_data(

          audio_frame_for_mixing_data,

          audio_frame_for_mixing_data + number_of_channels * rate / 100);

      std::vector<int16_t> expected(number_of_channels * rate / 100);

      std::iota(expected.begin(), expected.end(), 0);

      EXPECT_EQ(mixed_data, expected);

      EXPECT_THAT(audio_frame_for_mixing.packet_infos_,

                  ElementsAreArray(frame1.packet_infos_));

// Send a sine wave through the FrameCombiner, and check that the

// difference between input and output varies smoothly. Also check

// that it is inside reasonable bounds. This is to catch issues like

// chromium:695993 and chromium:816875.

TEST(FrameCombiner, GainCurveIsSmoothForAlternatingNumberOfStreams) {

  // Rates are divisible by 2000 when limiter is active.

  std::vector<FrameCombinerConfig> configs = {

      {false, 30100, 2, 50.f},  {false, 16500, 1, 3200.f},

      {true, 8000, 1, 3200.f},  {true, 16000, 1, 50.f},

      {true, 18000, 8, 3200.f}, {true, 10000, 2, 50.f},

};

  for (const auto& config : configs) {

    SCOPED_TRACE(ProduceDebugText(config));

    FrameCombiner combiner(config.use_limiter);

    constexpr int16_t wave_amplitude = 30000;

    SineWaveGenerator wave_generator(config.wave_frequency, wave_amplitude);

    GainChangeCalculator change_calculator;

    float cumulative_change = 0.f;

    constexpr size_t iterations = 100;

    for (size_t i = 0; i < iterations; ++i) {

      SetUpFrames(config.sample_rate_hz, config.number_of_channels);

      wave_generator.GenerateNextFrame(&frame1);

      AudioFrameOperations::Mute(&frame2);

      std::vector<AudioFrame*> frames_to_combine = {&frame1};

      if (i % 2 == 0) {

        frames_to_combine.push_back(&frame2);

      const size_t number_of_samples =

          frame1.samples_per_channel_ * config.number_of_channels;

      // Ensures limiter is on if 'use_limiter'.

      constexpr size_t number_of_streams = 2;

      AudioFrame audio_frame_for_mixing;

      combiner.Combine(frames_to_combine, config.number_of_channels,

                       config.sample_rate_hz, number_of_streams,

                       &audio_frame_for_mixing);

      cumulative_change += change_calculator.CalculateGainChange(

          rtc::ArrayView<const int16_t>(frame1.data(), number_of_samples),

          rtc::ArrayView<const int16_t>(audio_frame_for_mixing.data(),

                                        number_of_samples));

    // Check that the gain doesn't vary too much.

    EXPECT_LT(cumulative_change, 10);

    // Check that the latest gain is within reasonable bounds. It

    // should be slightly less that 1.

    EXPECT_LT(0.9f, change_calculator.LatestGain());

    EXPECT_LT(change_calculator.LatestGain(), 1.01f);

}  // namespace webrtc

Source code

Revision control

Copy as Markdown

Other Tools