DynamicResampler.h

mozilla-central/dom/media/driftcontrol/DynamicResampler.h (file symbol)

Enable keyboard shortcuts

Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this file,

 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_

#define DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_

#include "AudioRingBuffer.h"

#include "AudioSegment.h"

#include "TimeUnits.h"

#include "WavDumper.h"

#include <speex/speex_resampler.h>

namespace mozilla {

const uint32_t STEREO = 2;

/**

 * DynamicResampler allows updating on the fly the output sample rate and the

 * number of channels. In addition to that, it maintains an internal buffer for

 * the input data and allows pre-buffering as well. The Resample() method

 * strives to provide the requested number of output frames by using the input

 * data including any pre-buffering. If there are fewer frames in the internal

 * buffer than is requested, the internal buffer is padded with enough silence

 * to allow the requested to be resampled and returned.

 * Input data buffering makes use of the AudioRingBuffer. The capacity of the

 * buffer is initially 100ms of audio and it is pre-allocated during

 * SetSampleFormat(). Should the input data grow beyond that, the input buffer

 * is re-allocated on the fly. In addition to that, due to special feature of

 * AudioRingBuffer, no extra copies take place when the input data is fed to the

 * resampler.

 * The sample format must be set before using any method.

 * The DynamicResampler is not thread-safe, so all the methods appart from the

 * constructor must be called on the same thread.

*/

class DynamicResampler final {

 public:

/**

   * Provide the initial input and output rate and the amount of pre-buffering.

   * The channel count will be set to stereo. Memory allocation will take

   * place. The input buffer is non-interleaved.

*/

  DynamicResampler(uint32_t aInRate, uint32_t aOutRate,

                   uint32_t aInputPreBufferFrameCount = 0);

  ~DynamicResampler();

/**

   * Set the sample format type to float or short.

*/

  void SetSampleFormat(AudioSampleFormat aFormat);

  uint32_t GetInRate() const { return mInRate; }

  uint32_t GetChannels() const { return mChannels; }

/**

   * Append `aInFrames` number of frames from `aInBuffer` to the internal input

   * buffer. Memory copy/move takes place.

*/

  void AppendInput(Span<const float* const> aInBuffer, uint32_t aInFrames);

  void AppendInput(Span<const int16_t* const> aInBuffer, uint32_t aInFrames);

/**

   * Append `aInFrames` number of frames of silence to the internal input

   * buffer. Memory copy/move takes place.

*/

  void AppendInputSilence(const uint32_t aInFrames);

/**

   * Return the number of frames the internal input buffer can store.

*/

  uint32_t InFramesBufferSize() const;

/**

   * Return the number of frames stored in the internal input buffer.

*/

  uint32_t InFramesBuffered(uint32_t aChannelIndex) const;

/**

   * Prepends existing input data with a silent pre-buffer if not already done.

   * Data will be prepended so that after resampling aDuration of data,

   * the buffering level will be as close as possible to

   * mInputPreBufferFrameCount, which is the desired buffering level.

*/

  void EnsurePreBuffer(media::TimeUnit aDuration);

/**

   * Set the number of frames that should be used for input pre-buffering.

*/

  void SetInputPreBufferFrameCount(uint32_t aInputPreBufferFrameCount);

/*

   * Resample as much frames as needed from the internal input buffer to the

   * `aOutBuffer` in order to provide all `aOutFrames`.

   * On first call, prepends the input buffer with silence so that after

   * resampling aOutFrames frames of data, the input buffer holds data as close

   * as possible to the configured pre-buffer size.

   * If there are not enough input frames to provide the requested output

   * frames, the input buffer is padded with enough silence to allow the

   * requested frames to be resampled, and the pre-buffer is reset so that the

   * next call will be treated as the first.

   * Returns true if the internal input buffer underran and had to be padded

   * with silence, otherwise false.

*/

  bool Resample(float* aOutBuffer, uint32_t aOutFrames, uint32_t aChannelIndex);

  bool Resample(int16_t* aOutBuffer, uint32_t aOutFrames,

                uint32_t aChannelIndex);

/**

   * Update the output rate or/and the channel count. If a value is not updated

   * compared to the current one nothing happens. Changing the `aInRate`

   * results in recalculation in the resampler. Changing `aChannels` results in

   * the reallocation of the internal input buffer with the exception of

   * changes between mono to stereo and vice versa where no reallocation takes

   * place. A stereo internal input buffer is always maintained even if the

   * sound is mono.

*/

  void UpdateResampler(uint32_t aInRate, uint32_t aChannels);

 private:

  template <typename T>

  void AppendInputInternal(Span<const T* const>& aInBuffer,

                           uint32_t aInFrames) {

    MOZ_ASSERT(aInBuffer.Length() == (uint32_t)mChannels);

    for (uint32_t i = 0; i < mChannels; ++i) {

      PushInFrames(aInBuffer[i], aInFrames, i);

  void ResampleInternal(const float* aInBuffer, uint32_t* aInFrames,

                        float* aOutBuffer, uint32_t* aOutFrames,

                        uint32_t aChannelIndex);

  void ResampleInternal(const int16_t* aInBuffer, uint32_t* aInFrames,

                        int16_t* aOutBuffer, uint32_t* aOutFrames,

                        uint32_t aChannelIndex);

  template <typename T>

  bool ResampleInternal(T* aOutBuffer, uint32_t aOutFrames,

                        uint32_t aChannelIndex) {

    MOZ_ASSERT(mInRate);

    MOZ_ASSERT(mOutRate);

    MOZ_ASSERT(mChannels);

    MOZ_ASSERT(aChannelIndex < mChannels);

    MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length());

    MOZ_ASSERT(aOutFrames);

    uint32_t outFramesNeeded = aOutFrames;

    T* nextOutFrame = aOutBuffer;

    if (mInRate == mOutRate) {

      if (!mResamplerIsBypassed) {

        uint32_t latency = speex_resampler_get_input_latency(mResampler);

        mInternalInBuffer[aChannelIndex].ReadNoCopy(

            [&](const Span<const T>& aInBuffer) -> uint32_t {

              // Although unlikely with the sample rates used with this class,

              // the resampler input latency may temporarily be higher than

              // indicated, after a change in resampling rate that reduces the

              // indicated latency. The resampler's "magic" samples cause

              // this. All frames in the resampler are extracted when

              // `latency` output frames have been extracted.

              uint32_t outFramesResampled = std::min(outFramesNeeded, latency);

              uint32_t inFrames = aInBuffer.Length();

              ResampleInternal(aInBuffer.Elements(), &inFrames, nextOutFrame,

                               &outFramesResampled, aChannelIndex);

              nextOutFrame += outFramesResampled;

              outFramesNeeded -= outFramesResampled;

              if (outFramesResampled == latency) {

                mResamplerIsBypassed = true;

                // The last `latency` frames of input to the resampler will not

                // be extracted from the resampler. Leave them in

                // mInternalInBuffer to be copied directly to nextOutFrame.

                MOZ_ASSERT(inFrames >= latency);

                return inFrames - latency;

              return inFrames;

});

      bool underrun = false;

      if (uint32_t buffered = mInternalInBuffer[aChannelIndex].AvailableRead();

          buffered < outFramesNeeded) {

        underrun = true;

        mIsPreBufferSet = false;

        mInternalInBuffer[aChannelIndex].WriteSilence(outFramesNeeded -

                                                      buffered);

      DebugOnly<uint32_t> numFramesRead = mInternalInBuffer[aChannelIndex].Read(

          Span(nextOutFrame, outFramesNeeded));

      MOZ_ASSERT(numFramesRead == outFramesNeeded);

      // Workaround to avoid discontinuity when the speex resampler operates

      // again. Feed it with the last 20 frames to warm up the internal memory

      // of the resampler and then skip memory equals to resampler's input

      // latency.

      mInputTail[aChannelIndex].StoreTail<T>(aOutBuffer, aOutFrames);

      if (aChannelIndex == 0 && !mIsWarmingUp) {

        mInputStreamFile.Write(nextOutFrame, outFramesNeeded);

        mOutputStreamFile.Write(nextOutFrame, outFramesNeeded);

      return underrun;

    auto resample = [&](const T* aInBuffer, uint32_t aInLength) -> uint32_t {

      uint32_t outFramesResampled = outFramesNeeded;

      uint32_t inFrames = aInLength;

      ResampleInternal(aInBuffer, &inFrames, nextOutFrame, &outFramesResampled,

                       aChannelIndex);

      nextOutFrame += outFramesResampled;

      outFramesNeeded -= outFramesResampled;

      mInputTail[aChannelIndex].StoreTail<T>(aInBuffer, inFrames);

      return inFrames;

};

    MOZ_ASSERT(!mResamplerIsBypassed);

    mInternalInBuffer[aChannelIndex].ReadNoCopy(

        [&](const Span<const T>& aInBuffer) -> uint32_t {

          if (!outFramesNeeded) {

            return 0;

          return resample(aInBuffer.Elements(), aInBuffer.Length());

});

    if (outFramesNeeded == 0) {

      return false;

    while (outFramesNeeded > 0) {

      MOZ_ASSERT(mInternalInBuffer[aChannelIndex].AvailableRead() == 0);

      // Round up.

      uint32_t totalInFramesNeeded =

          ((CheckedUint32(outFramesNeeded) * mInRate + mOutRate - 1) / mOutRate)

              .value();

      resample(nullptr, totalInFramesNeeded);

    mIsPreBufferSet = false;

    return true;

  template <typename T>

  void PushInFrames(const T* aInBuffer, const uint32_t aInFrames,

                    uint32_t aChannelIndex) {

    MOZ_ASSERT(aInBuffer);

    MOZ_ASSERT(aInFrames);

    MOZ_ASSERT(mChannels);

    MOZ_ASSERT(aChannelIndex < mChannels);

    MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length());

    EnsureInputBufferSizeInFrames(

        mInternalInBuffer[aChannelIndex].AvailableRead() + aInFrames);

    mInternalInBuffer[aChannelIndex].Write(Span(aInBuffer, aInFrames));

  void WarmUpResampler(bool aSkipLatency);

  bool EnsureInputBufferSizeInFrames(uint32_t aSizeInFrames) {

    uint32_t sampleSize = 0;

    if (mSampleFormat == AUDIO_FORMAT_FLOAT32) {

      sampleSize = sizeof(float);

    } else if (mSampleFormat == AUDIO_FORMAT_S16) {

      sampleSize = sizeof(short);

    if (sampleSize == 0) {

      // No sample format set, we wouldn't know how many bytes to allocate.

      return true;

    uint32_t sizeInFrames = InFramesBufferSize();

    if (aSizeInFrames <= sizeInFrames) {

      // Buffer size is sufficient.

      return true;  // no reallocation necessary

    // 5 second cap.

    const uint32_t cap = 5 * mInRate;

    if (sizeInFrames >= cap) {

      // Already at the cap.

      return false;

    // As a backoff strategy, at least double the previous size.

    sizeInFrames *= 2;

    if (aSizeInFrames > sizeInFrames) {

      // A larger buffer than the normal backoff strategy provides is needed, or

      // this is the first time setting the buffer size. Add another 50ms, as

      // some jitter is expected.

      sizeInFrames = aSizeInFrames + mInRate / 20;

    // mInputPreBufferFrameCount is an indication of the desired average

    // buffering.  Provide for at least twice this.

    sizeInFrames = std::max(sizeInFrames, mInputPreBufferFrameCount * 2);

    sizeInFrames = std::min(cap, sizeInFrames);

    bool success = true;

    for (auto& b : mInternalInBuffer) {

      success = success && b.EnsureLengthBytes(sampleSize * sizeInFrames);

    if (success) {

      // All buffers have the new size.

      return true;

    // Allocating an input buffer failed. We stick with the old buffer size.

    NS_WARNING(nsPrintfCString("Failed to allocate a buffer of %u bytes (%u "

                               "frames). Expect glitches.",

                               sampleSize * sizeInFrames, sizeInFrames)

                   .get());

    return false;

 public:

  const uint32_t mOutRate;

 private:

  bool mIsPreBufferSet = false;

  bool mIsWarmingUp = false;

  // The resampler can be bypassed when the input and output rates match and

  // any frames buffered in the resampler have been extracted.  This initial

  // value is reset on construction by UpdateResampler() if the rates differ.

  bool mResamplerIsBypassed = true;

  uint32_t mInputPreBufferFrameCount;

  uint32_t mChannels = 0;

  uint32_t mInRate;

  AutoTArray<AudioRingBuffer, STEREO> mInternalInBuffer;

  SpeexResamplerState* mResampler = nullptr;

  AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE;

  class TailBuffer {

   public:

    template <typename T>

    T* Buffer() {

      return reinterpret_cast<T*>(mBuffer);

    /* Store the MAXSIZE last elements of the buffer. */

    template <typename T>

    void StoreTail(const Span<const T>& aInBuffer) {

      StoreTail(aInBuffer.data(), aInBuffer.size());

    template <typename T>

    void StoreTail(const T* aInBuffer, uint32_t aInFrames) {

      const T* inBuffer = aInBuffer;

      mSize = std::min(aInFrames, MAXSIZE);

      if (inBuffer) {

        PodCopy(Buffer<T>(), inBuffer + aInFrames - mSize, mSize);

      } else {

        std::fill_n(Buffer<T>(), mSize, static_cast<T>(0));

    uint32_t Length() { return mSize; }

    static constexpr uint32_t MAXSIZE = 20;

   private:

    float mBuffer[MAXSIZE] = {};

    uint32_t mSize = 0;

};

  AutoTArray<TailBuffer, STEREO> mInputTail;

  WavDumper mInputStreamFile;

  WavDumper mOutputStreamFile;

};

}  // namespace mozilla

#endif  // DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_