FFmpegVideoEncoder.cpp

mozilla-central/dom/media/platforms/ffmpeg/FFmpegVideoEncoder.cpp (file symbol)

Enable keyboard shortcuts

Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

/* vim:set ts=2 sw=2 sts=2 et cindent: */

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "FFmpegVideoEncoder.h"

#include <algorithm>

#include <aom/aomcx.h>

#include "BufferReader.h"

#include "EncoderConfig.h"

#include "FFmpegLog.h"

#include "FFmpegRuntimeLinker.h"

#include "FFmpegUtils.h"

#include "H264.h"

#include "ImageContainer.h"

#include "ImageConversion.h"

#include "libavutil/error.h"

#include "libavutil/pixfmt.h"

#include "libyuv.h"

#include "mozilla/StaticPrefs_media.h"

#include "mozilla/dom/ImageBitmapBinding.h"

#include "mozilla/dom/ImageUtils.h"

#include "mozilla/dom/VideoFrameBinding.h"

#include "nsPrintfCString.h"

// The ffmpeg namespace is introduced to avoid the PixelFormat's name conflicts

// with MediaDataEncoder::PixelFormat in MediaDataEncoder class scope.

namespace ffmpeg {

// TODO: WebCodecs' I420A should map to MediaDataEncoder::PixelFormat and then

// to AV_PIX_FMT_YUVA420P here.

#if LIBAVCODEC_VERSION_MAJOR < 54

using FFmpegPixelFormat = enum PixelFormat;

const FFmpegPixelFormat FFMPEG_PIX_FMT_NONE = FFmpegPixelFormat::PIX_FMT_NONE;

const FFmpegPixelFormat FFMPEG_PIX_FMT_RGBA = FFmpegPixelFormat::PIX_FMT_RGBA;

const FFmpegPixelFormat FFMPEG_PIX_FMT_BGRA = FFmpegPixelFormat::PIX_FMT_BGRA;

const FFmpegPixelFormat FFMPEG_PIX_FMT_RGB24 = FFmpegPixelFormat::PIX_FMT_RGB24;

const FFmpegPixelFormat FFMPEG_PIX_FMT_BGR24 = FFmpegPixelFormat::PIX_FMT_BGR24;

const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV444P =

    FFmpegPixelFormat::PIX_FMT_YUV444P;

const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV422P =

    FFmpegPixelFormat::PIX_FMT_YUV422P;

const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV420P =

    FFmpegPixelFormat::PIX_FMT_YUV420P;

const FFmpegPixelFormat FFMPEG_PIX_FMT_NV12 = FFmpegPixelFormat::PIX_FMT_NV12;

const FFmpegPixelFormat FFMPEG_PIX_FMT_NV21 = FFmpegPixelFormat::PIX_FMT_NV21;

#else

using FFmpegPixelFormat = enum AVPixelFormat;

const FFmpegPixelFormat FFMPEG_PIX_FMT_NONE =

    FFmpegPixelFormat::AV_PIX_FMT_NONE;

const FFmpegPixelFormat FFMPEG_PIX_FMT_RGBA =

    FFmpegPixelFormat::AV_PIX_FMT_RGBA;

const FFmpegPixelFormat FFMPEG_PIX_FMT_BGRA =

    FFmpegPixelFormat::AV_PIX_FMT_BGRA;

const FFmpegPixelFormat FFMPEG_PIX_FMT_RGB24 =

    FFmpegPixelFormat::AV_PIX_FMT_RGB24;

const FFmpegPixelFormat FFMPEG_PIX_FMT_BGR24 =

    FFmpegPixelFormat::AV_PIX_FMT_BGR24;

const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV444P =

    FFmpegPixelFormat::AV_PIX_FMT_YUV444P;

const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV422P =

    FFmpegPixelFormat::AV_PIX_FMT_YUV422P;

const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV420P =

    FFmpegPixelFormat::AV_PIX_FMT_YUV420P;

const FFmpegPixelFormat FFMPEG_PIX_FMT_NV12 =

    FFmpegPixelFormat::AV_PIX_FMT_NV12;

const FFmpegPixelFormat FFMPEG_PIX_FMT_NV21 =

    FFmpegPixelFormat::AV_PIX_FMT_NV21;

#endif

static const char* GetPixelFormatString(FFmpegPixelFormat aFormat) {

  switch (aFormat) {

    case FFMPEG_PIX_FMT_NONE:

      return "none";

    case FFMPEG_PIX_FMT_RGBA:

      return "packed RGBA 8:8:8:8 (32bpp, RGBARGBA...)";

    case FFMPEG_PIX_FMT_BGRA:

      return "packed BGRA 8:8:8:8 (32bpp, BGRABGRA...)";

    case FFMPEG_PIX_FMT_RGB24:

      return "packed RGB 8:8:8 (24bpp, RGBRGB...)";

    case FFMPEG_PIX_FMT_BGR24:

      return "packed RGB 8:8:8 (24bpp, BGRBGR...)";

    case FFMPEG_PIX_FMT_YUV444P:

      return "planar YUV 4:4:4 (24bpp, 1 Cr & Cb sample per 1x1 Y samples)";

    case FFMPEG_PIX_FMT_YUV422P:

      return "planar YUV 4:2:2 (16bpp, 1 Cr & Cb sample per 2x1 Y samples)";

    case FFMPEG_PIX_FMT_YUV420P:

      return "planar YUV 4:2:0 (12bpp, 1 Cr & Cb sample per 2x2 Y samples)";

    case FFMPEG_PIX_FMT_NV12:

      return "planar YUV 4:2:0 (12bpp, 1 interleaved UV components per 1x1 Y "

             "samples)";

    case FFMPEG_PIX_FMT_NV21:

      return "planar YUV 4:2:0 (12bpp, 1 interleaved VU components per 1x1 Y "

             "samples)";

    default:

      break;

  MOZ_ASSERT_UNREACHABLE("Unsupported pixel format");

  return "unsupported";

};  // namespace ffmpeg

namespace mozilla {

struct H264Setting {

  int mValue;

  nsCString mString;

};

struct H264LiteralSetting {

  int mValue;

  nsLiteralCString mString;

  H264Setting get() const { return {mValue, mString.AsString()}; }

};

static constexpr H264LiteralSetting H264Profiles[]{

    {FF_PROFILE_H264_BASELINE, "baseline"_ns},

    {FF_PROFILE_H264_MAIN, "main"_ns},

    {FF_PROFILE_H264_EXTENDED, ""_ns},

    {FF_PROFILE_H264_HIGH, "high"_ns}};

static Maybe<H264Setting> GetH264Profile(const H264_PROFILE& aProfile) {

  switch (aProfile) {

    case H264_PROFILE::H264_PROFILE_UNKNOWN:

      return Nothing();

    case H264_PROFILE::H264_PROFILE_BASE:

      return Some(H264Profiles[0].get());

    case H264_PROFILE::H264_PROFILE_MAIN:

      return Some(H264Profiles[1].get());

    case H264_PROFILE::H264_PROFILE_EXTENDED:

      return Some(H264Profiles[2].get());

    case H264_PROFILE::H264_PROFILE_HIGH:

      return Some(H264Profiles[3].get());

    default:

      break;

  MOZ_ASSERT_UNREACHABLE("undefined profile");

  return Nothing();

static Maybe<H264Setting> GetH264Level(const H264_LEVEL& aLevel) {

  int val = static_cast<int>(aLevel);

  nsPrintfCString str("%d", val);

  str.Insert('.', 1);

  return Some(H264Setting{val, str});

struct VPXSVCAppendix {

  uint8_t mLayeringMode;

};

struct SVCLayerSettings {

  using CodecAppendix = Variant<VPXSVCAppendix, aom_svc_params_t>;

  size_t mNumberSpatialLayers;

  size_t mNumberTemporalLayers;

  uint8_t mPeriodicity;

  nsTArray<uint8_t> mLayerIds;

  // libvpx: ts_rate_decimator, libaom: framerate_factor

  nsTArray<uint8_t> mRateDecimators;

  nsTArray<uint32_t> mTargetBitrates;

  Maybe<CodecAppendix> mCodecAppendix;

};

static SVCLayerSettings GetSVCLayerSettings(CodecType aCodec,

                                            const ScalabilityMode& aMode,

                                            uint32_t aBitPerSec) {

  // TODO: Apply more sophisticated bitrate allocation, like SvcRateAllocator:

  // https://searchfox.org/mozilla-central/rev/3bd65516eb9b3a9568806d846ba8c81a9402a885/third_party/libwebrtc/modules/video_coding/svc/svc_rate_allocator.h#26

  size_t layers = 0;

  const uint32_t kbps = aBitPerSec / 1000;  // ts_target_bitrate requies kbps.

  uint8_t periodicity;

  nsTArray<uint8_t> layerIds;

  nsTArray<uint8_t> rateDecimators;

  nsTArray<uint32_t> bitrates;

  Maybe<SVCLayerSettings::CodecAppendix> appendix;

  if (aMode == ScalabilityMode::L1T2) {

    // Two temporal layers. 0-1...

//

    // Frame pattern:

    // Layer 0: |0| |2| |4| |6| |8|

    // Layer 1: | |1| |3| |5| |7| |

    layers = 2;

    // 2 frames per period.

    periodicity = 2;

    // Assign layer ids.

    layerIds.AppendElement(0);

    layerIds.AppendElement(1);

    // Set rate decimators.

    rateDecimators.AppendElement(2);

    rateDecimators.AppendElement(1);

    // Bitrate allocation: L0 - 60%, L1 - 40%.

    bitrates.AppendElement(kbps * 3 / 5);

    bitrates.AppendElement(kbps);

    if (aCodec == CodecType::VP8 || aCodec == CodecType::VP9) {

      appendix.emplace(VPXSVCAppendix{

          .mLayeringMode = 2 /* VP9E_TEMPORAL_LAYERING_MODE_0101 */

});

  } else {

    MOZ_ASSERT(aMode == ScalabilityMode::L1T3);

    // Three temporal layers. 0-2-1-2...

//

    // Frame pattern:

    // Layer 0: |0| | | |4| | | |8| |  |  |12|

    // Layer 1: | | |2| | | |6| | | |10|  |  |

    // Layer 2: | |1| |3| |5| |7| |9|  |11|  |

    layers = 3;

    // 4 frames per period

    periodicity = 4;

    // Assign layer ids.

    layerIds.AppendElement(0);

    layerIds.AppendElement(2);

    layerIds.AppendElement(1);

    layerIds.AppendElement(2);

    // Set rate decimators.

    rateDecimators.AppendElement(4);

    rateDecimators.AppendElement(2);

    rateDecimators.AppendElement(1);

    // Bitrate allocation: L0 - 50%, L1 - 20%, L2 - 30%.

    bitrates.AppendElement(kbps / 2);

    bitrates.AppendElement(kbps * 7 / 10);

    bitrates.AppendElement(kbps);

    if (aCodec == CodecType::VP8 || aCodec == CodecType::VP9) {

      appendix.emplace(VPXSVCAppendix{

          .mLayeringMode = 3 /* VP9E_TEMPORAL_LAYERING_MODE_0212 */

});

  MOZ_ASSERT(layers == bitrates.Length(),

             "Bitrate must be assigned to each layer");

  return SVCLayerSettings{1,

                          layers,

                          periodicity,

                          std::move(layerIds),

                          std::move(rateDecimators),

                          std::move(bitrates),

                          appendix};

void FFmpegVideoEncoder<LIBAV_VER>::SVCInfo::UpdateTemporalLayerId() {

  MOZ_ASSERT(!mTemporalLayerIds.IsEmpty());

  mCurrentIndex = (mCurrentIndex + 1) % mTemporalLayerIds.Length();

uint8_t FFmpegVideoEncoder<LIBAV_VER>::SVCInfo::CurrentTemporalLayerId() {

  MOZ_ASSERT(!mTemporalLayerIds.IsEmpty());

  return mTemporalLayerIds[mCurrentIndex];

void FFmpegVideoEncoder<LIBAV_VER>::SVCInfo::ResetTemporalLayerId() {

  MOZ_ASSERT(!mTemporalLayerIds.IsEmpty());

  mCurrentIndex = 0;

FFmpegVideoEncoder<LIBAV_VER>::FFmpegVideoEncoder(

    const FFmpegLibWrapper* aLib, AVCodecID aCodecID,

    const RefPtr<TaskQueue>& aTaskQueue, const EncoderConfig& aConfig)

    : FFmpegDataEncoder(aLib, aCodecID, aTaskQueue, aConfig) {}

nsCString FFmpegVideoEncoder<LIBAV_VER>::GetDescriptionName() const {

#ifdef USING_MOZFFVPX

  return "ffvpx video encoder"_ns;

#else

  const char* lib =

#  if defined(MOZ_FFMPEG)

      FFmpegRuntimeLinker::LinkStatusLibraryName();

#  else

      "no library: ffmpeg disabled during build";

#  endif

  return nsPrintfCString("ffmpeg video encoder (%s)", lib);

#endif

bool FFmpegVideoEncoder<LIBAV_VER>::SvcEnabled() const {

  return mConfig.mScalabilityMode != ScalabilityMode::None;

nsresult FFmpegVideoEncoder<LIBAV_VER>::InitSpecific() {

  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());

  FFMPEGV_LOG("FFmpegVideoEncoder::InitSpecific");

  // Initialize the common members of the encoder instance

  AVCodec* codec = FFmpegDataEncoder<LIBAV_VER>::InitCommon();

  if (!codec) {

    FFMPEGV_LOG("FFmpegDataEncoder::InitCommon failed");

    return NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR;

  // And now the video-specific part

  mCodecContext->pix_fmt = ffmpeg::FFMPEG_PIX_FMT_YUV420P;

  // // TODO: do this properly, based on the colorspace of the frame. Setting

  // this like that crashes encoders. if (mConfig.mCodec != CodecType::AV1) {

  //     if (mConfig.mPixelFormat == dom::ImageBitmapFormat::RGBA32 ||

  //         mConfig.mPixelFormat == dom::ImageBitmapFormat::BGRA32) {

  //       mCodecContext->color_primaries = AVCOL_PRI_BT709;

  //       mCodecContext->colorspace = AVCOL_SPC_RGB;

  //   #ifdef FFVPX_VERSION

  //       mCodecContext->color_trc = AVCOL_TRC_IEC61966_2_1;

  //   #endif

  //     } else {

  //       mCodecContext->color_primaries = AVCOL_PRI_BT709;

  //       mCodecContext->colorspace = AVCOL_SPC_BT709;

  //       mCodecContext->color_trc = AVCOL_TRC_BT709;

  //     }

  // }

  mCodecContext->width = static_cast<int>(mConfig.mSize.width);

  mCodecContext->height = static_cast<int>(mConfig.mSize.height);

  // Reasonnable default for the quantization range.

  mCodecContext->qmin =

      static_cast<int>(StaticPrefs::media_ffmpeg_encoder_quantizer_min());

  mCodecContext->qmax =

      static_cast<int>(StaticPrefs::media_ffmpeg_encoder_quantizer_max());

  if (mConfig.mUsage == Usage::Realtime) {

    mCodecContext->thread_count = 1;

  } else {

    int64_t pixels = mCodecContext->width * mCodecContext->height;

    int threads = 1;

    // Select a thread count that depends on the frame size, and cap to the

    // number of available threads minus one

    if (pixels >= 3840 * 2160) {

      threads = 16;

    } else if (pixels >= 1920 * 1080) {

      threads = 8;

    } else if (pixels >= 1280 * 720) {

      threads = 4;

    } else if (pixels >= 640 * 480) {

      threads = 2;

    mCodecContext->thread_count =

        std::clamp<int>(threads, 1, GetNumberOfProcessors() - 1);

  // TODO(bug 1869560): The recommended time_base is the reciprocal of the frame

  // rate, but we set it to microsecond for now.

  mCodecContext->time_base =

      AVRational{.num = 1, .den = static_cast<int>(USECS_PER_S)};

#if LIBAVCODEC_VERSION_MAJOR >= 57

  // Note that sometimes framerate can be zero (from webcodecs).

  mCodecContext->framerate =

      AVRational{.num = static_cast<int>(mConfig.mFramerate), .den = 1};

#endif

#if LIBAVCODEC_VERSION_MAJOR >= 60

  mCodecContext->flags |= AV_CODEC_FLAG_FRAME_DURATION;

#endif

  // Setting 0 here disable inter-frames: all frames are keyframes

  mCodecContext->gop_size = mConfig.mKeyframeInterval

                                ? static_cast<int>(mConfig.mKeyframeInterval)

                                : 10000;

  mCodecContext->keyint_min = 0;

  // When either real-time or SVC is enabled via config, the general settings of

  // the encoder are set to be more appropriate for real-time usage

  if (mConfig.mUsage == Usage::Realtime || SvcEnabled()) {

    if (mConfig.mUsage != Usage::Realtime) {

      FFMPEGV_LOG(

          "SVC enabled but low latency encoding mode not enabled, forcing low "

          "latency mode");

    mLib->av_opt_set(mCodecContext->priv_data, "deadline", "realtime", 0);

    // Explicitly ask encoder do not keep in flight at any one time for

    // lookahead purposes.

    mLib->av_opt_set(mCodecContext->priv_data, "lag-in-frames", "0", 0);

    if (mConfig.mCodec == CodecType::VP8 || mConfig.mCodec == CodecType::VP9) {

      mLib->av_opt_set(mCodecContext->priv_data, "error-resilient", "1", 0);

    if (mConfig.mCodec == CodecType::AV1) {

      mLib->av_opt_set(mCodecContext->priv_data, "error-resilience", "1", 0);

      // This sets usage to AOM_USAGE_REALTIME

      mLib->av_opt_set(mCodecContext->priv_data, "usage", "1", 0);

      // Allow the bitrate to swing 50% up and down the target

      mLib->av_opt_set(mCodecContext->priv_data, "rc_undershoot_percent", "50",

0);

      mLib->av_opt_set(mCodecContext->priv_data, "rc_overshoot_percent", "50",

0);

      // Row multithreading -- note that we do single threaded encoding for now,

      // so this doesn't do much

      mLib->av_opt_set(mCodecContext->priv_data, "row_mt", "1", 0);

      // Cyclic refresh adaptive quantization

      mLib->av_opt_set(mCodecContext->priv_data, "aq-mode", "3", 0);

      // optimized for real-time, 7 for regular, lower: more cpu use -> higher

      // compression ratio

      mLib->av_opt_set(mCodecContext->priv_data, "cpu-used", "9", 0);

      // disable, this is to handle camera motion, unlikely for our use case

      mLib->av_opt_set(mCodecContext->priv_data, "enable-global-motion", "0",

0);

      mLib->av_opt_set(mCodecContext->priv_data, "enable-cfl-intra", "0", 0);

      // TODO: Set a number of tiles appropriate for the number of threads used

      // -- disable tiling if using a single thread.

      mLib->av_opt_set(mCodecContext->priv_data, "tile-columns", "0", 0);

      mLib->av_opt_set(mCodecContext->priv_data, "tile-rows", "0", 0);

  if (SvcEnabled()) {

    if (Maybe<SVCSettings> settings = GetSVCSettings()) {

      if (mCodecName == "libaom-av1") {

        if (mConfig.mBitrateMode != BitrateMode::Constant) {

          return NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR;

      SVCSettings s = settings.extract();

      FFMPEGV_LOG("SVC options string: %s=%s", s.mSettingKeyValue.first.get(),

                  s.mSettingKeyValue.second.get());

      mLib->av_opt_set(mCodecContext->priv_data, s.mSettingKeyValue.first.get(),

                       s.mSettingKeyValue.second.get(), 0);

      // FFmpegVideoEncoder is reset after Drain(), so mSVCInfo should be

      // reset() before emplace().

      mSVCInfo.reset();

      mSVCInfo.emplace(std::move(s.mTemporalLayerIds));

      // TODO: layer settings should be changed dynamically when the frame's

      // color space changed.

  nsAutoCString h264Log;

  if (mConfig.mCodecSpecific && mConfig.mCodecSpecific->is<H264Specific>()) {

    // TODO: Set profile, level, avcc/annexb for openh264 and others.

    if (mCodecName == "libx264") {

      const H264Specific& h264Specific =

          mConfig.mCodecSpecific->as<H264Specific>();

      H264Settings s = GetH264Settings(h264Specific);

      mCodecContext->profile = s.mProfile;

      mCodecContext->level = s.mLevel;

      for (const auto& pair : s.mSettingKeyValuePairs) {

        mLib->av_opt_set(mCodecContext->priv_data, pair.first.get(),

                         pair.second.get(), 0);

      // Log the settings.

      // When using profile other than EXTENDED, the profile string is in the

      // first element of mSettingKeyValuePairs, while EXTENDED profile has no

      // profile string.

      MOZ_ASSERT_IF(

          s.mSettingKeyValuePairs.Length() != 3,

          h264Specific.mProfile == H264_PROFILE::H264_PROFILE_EXTENDED);

      const char* profileStr = s.mSettingKeyValuePairs.Length() == 3

                                   ? s.mSettingKeyValuePairs[0].second.get()

                                   : "extended";

      const char* levelStr = s.mSettingKeyValuePairs.Length() == 3

                                 ? s.mSettingKeyValuePairs[1].second.get()

                                 : s.mSettingKeyValuePairs[0].second.get();

      const char* formatStr =

          h264Specific.mFormat == H264BitStreamFormat::AVC ? "AVCC" : "AnnexB";

      h264Log.AppendPrintf(", H264: profile - %d (%s), level %d (%s), %s",

                           mCodecContext->profile, profileStr,

                           mCodecContext->level, levelStr, formatStr);

  // - if mConfig.mDenoising is set: av_opt_set_int(mCodecContext->priv_data,

  // "noise_sensitivity", x, 0), where the x is from 0(disabled) to 6.

  // - if mConfig.mAdaptiveQp is set: av_opt_set_int(mCodecContext->priv_data,

  // "aq_mode", x, 0), where x is from 0 to 3: 0 - Disabled, 1 - Variance

  // AQ(default), 2 - Complexity AQ, 3 - Cycle AQ.

  // Our old version of libaom-av1 is considered experimental by the recent

  // ffmpeg we use. Allow experimental codecs for now until we decide on an AV1

  // encoder.

  mCodecContext->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;

  MediaResult rv = FinishInitCommon(codec);

  if (NS_FAILED(rv)) {

    FFMPEGV_LOG("FFmpeg video encoder initialization failure.");

    return rv;

  FFMPEGV_LOG(

      "%s has been initialized with format: %s, bitrate: %" PRIi64

      ", width: %d, height: %d, quantizer: [%d, %d], time_base: %d/%d%s",

      codec->name, ffmpeg::GetPixelFormatString(mCodecContext->pix_fmt),

      static_cast<int64_t>(mCodecContext->bit_rate), mCodecContext->width,

      mCodecContext->height, mCodecContext->qmin, mCodecContext->qmax,

      mCodecContext->time_base.num, mCodecContext->time_base.den,

      h264Log.IsEmpty() ? "" : h264Log.get());

  return NS_OK;

bool FFmpegVideoEncoder<LIBAV_VER>::ScaleInputFrame() {

  AVFrame* source = mFrame;

  mFrame = nullptr;

  // Allocate AVFrame.

  if (!PrepareFrame()) {

    FFMPEGV_LOG("failed to allocate frame");

    return false;

  // Set AVFrame properties for its internal data allocation. For now, we always

  // convert into ffmpeg's buffer.

  mFrame->format = ffmpeg::FFMPEG_PIX_FMT_YUV420P;

  mFrame->width = static_cast<int>(mConfig.mSize.Width());

  mFrame->height = static_cast<int>(mConfig.mSize.Height());

  // Allocate AVFrame data.

  if (int ret = mLib->av_frame_get_buffer(mFrame, 16); ret < 0) {

    FFMPEGV_LOG("failed to allocate frame data: %s",

                MakeErrorString(mLib, ret).get());

    return false;

  // Make sure AVFrame is writable.

  if (int ret = mLib->av_frame_make_writable(mFrame); ret < 0) {

    FFMPEGV_LOG("failed to make frame writable: %s",

                MakeErrorString(mLib, ret).get());

    return false;

  int rv = I420Scale(source->data[0], source->linesize[0], source->data[1],

                     source->linesize[1], source->data[2], source->linesize[2],

                     source->width, source->height, mFrame->data[0],

                     mFrame->linesize[0], mFrame->data[1], mFrame->linesize[1],

                     mFrame->data[2], mFrame->linesize[2], mFrame->width,

                     mFrame->height, libyuv::FilterMode::kFilterBox);

  if (!rv) {

    FFMPEGV_LOG("YUV scale error");

  mLib->av_frame_unref(source);

  mLib->av_frame_free(&source);

  return true;

// avcodec_send_frame and avcodec_receive_packet were introduced in version 58.

#if LIBAVCODEC_VERSION_MAJOR >= 58

Result<MediaDataEncoder::EncodedData, nsresult> FFmpegVideoEncoder<

    LIBAV_VER>::EncodeInputWithModernAPIs(RefPtr<const MediaData> aSample) {

  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());

  MOZ_ASSERT(mCodecContext);

  MOZ_ASSERT(aSample);

  RefPtr<const VideoData> sample(aSample->As<VideoData>());

  // Validate input.

  if (!sample->mImage) {

    FFMPEGV_LOG("No image");

    return Result<MediaDataEncoder::EncodedData, nsresult>(

        NS_ERROR_DOM_MEDIA_FATAL_ERR);

  if (sample->mImage->GetSize().IsEmpty()) {

    FFMPEGV_LOG("image width or height is invalid");

    return Result<MediaDataEncoder::EncodedData, nsresult>(

        NS_ERROR_DOM_MEDIA_FATAL_ERR);

  // Allocate AVFrame.

  if (!PrepareFrame()) {

    FFMPEGV_LOG("failed to allocate frame");

    return Result<MediaDataEncoder::EncodedData, nsresult>(

        NS_ERROR_DOM_MEDIA_FATAL_ERR);

  // Set AVFrame properties for its internal data allocation. For now, we always

  // convert into ffmpeg's buffer.

  mFrame->format = ffmpeg::FFMPEG_PIX_FMT_YUV420P;

  mFrame->width = static_cast<int>(sample->mImage->GetSize().width);

  mFrame->height = static_cast<int>(sample->mImage->GetSize().height);

  mFrame->pict_type =

      sample->mKeyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_NONE;

  // Allocate AVFrame data.

  if (int ret = mLib->av_frame_get_buffer(mFrame, 0); ret < 0) {

    FFMPEGV_LOG("failed to allocate frame data: %s",

                MakeErrorString(mLib, ret).get());

    return Result<MediaDataEncoder::EncodedData, nsresult>(

        NS_ERROR_DOM_MEDIA_FATAL_ERR);

  // Make sure AVFrame is writable.

  if (int ret = mLib->av_frame_make_writable(mFrame); ret < 0) {

    FFMPEGV_LOG("failed to make frame writable: %s",

                MakeErrorString(mLib, ret).get());

    return Result<MediaDataEncoder::EncodedData, nsresult>(

        NS_ERROR_DOM_MEDIA_FATAL_ERR);

  nsresult rv = ConvertToI420(

      sample->mImage, mFrame->data[0], mFrame->linesize[0], mFrame->data[1],

      mFrame->linesize[1], mFrame->data[2], mFrame->linesize[2]);

  if (NS_FAILED(rv)) {

    FFMPEGV_LOG("Conversion error!");

    return Result<MediaDataEncoder::EncodedData, nsresult>(

        NS_ERROR_DOM_MEDIA_FATAL_ERR);

  // Scale the YUV input frame if needed -- the encoded frame will have the

  // dimensions configured at encoded initialization.

  if (mFrame->width != mConfig.mSize.Width() ||

      mFrame->height != mConfig.mSize.Height()) {

    if (!ScaleInputFrame()) {

      return Result<MediaDataEncoder::EncodedData, nsresult>(

          NS_ERROR_DOM_MEDIA_FATAL_ERR);

  // Set presentation timestamp and duration of the AVFrame. The unit of pts is

  // time_base.

  // TODO(bug 1869560): The recommended time_base is the reciprocal of the frame

  // rate, but we set it to microsecond for now.

#  if LIBAVCODEC_VERSION_MAJOR >= 59

  mFrame->time_base =

      AVRational{.num = 1, .den = static_cast<int>(USECS_PER_S)};

#  endif

  // Provide fake pts, see header file.

  if (mConfig.mCodec == CodecType::AV1) {

    mFrame->pts = mFakePts;

    mPtsMap.Insert(mFakePts, aSample->mTime.ToMicroseconds());

    mFakePts += aSample->mDuration.ToMicroseconds();

    mCurrentFramePts = aSample->mTime.ToMicroseconds();

  } else {

    mFrame->pts = aSample->mTime.ToMicroseconds();

#  if LIBAVCODEC_VERSION_MAJOR >= 60

  mFrame->duration = aSample->mDuration.ToMicroseconds();

#  else

  // Save duration in the time_base unit.

  mDurationMap.Insert(mFrame->pts, aSample->mDuration.ToMicroseconds());

#  endif

  Duration(mFrame) = aSample->mDuration.ToMicroseconds();

  AVDictionary* dict = nullptr;

  // VP8/VP9 use a mode that handles the temporal layer id sequence internally,

  // and don't require setting explicitly setting the metadata. Other codecs

  // such as AV1 via libaom however requires manual frame tagging.

  if (SvcEnabled() && mConfig.mCodec != CodecType::VP8 &&

      mConfig.mCodec != CodecType::VP9) {

    if (aSample->mKeyframe) {

      FFMPEGV_LOG("Key frame requested, reseting temporal layer id");

      mSVCInfo->ResetTemporalLayerId();

    nsPrintfCString str("%d", mSVCInfo->CurrentTemporalLayerId());

    mLib->av_dict_set(&dict, "temporal_id", str.get(), 0);

    mFrame->metadata = dict;

  // Now send the AVFrame to ffmpeg for encoding, same code for audio and video.

  return FFmpegDataEncoder<LIBAV_VER>::EncodeWithModernAPIs();

#endif  // if LIBAVCODEC_VERSION_MAJOR >= 58

RefPtr<MediaRawData> FFmpegVideoEncoder<LIBAV_VER>::ToMediaRawData(

    AVPacket* aPacket) {

  MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());

  MOZ_ASSERT(aPacket);

  RefPtr<MediaRawData> data = ToMediaRawDataCommon(aPacket);

  if (mConfig.mCodec == CodecType::AV1) {

    auto found = mPtsMap.Take(aPacket->pts);

    data->mTime = media::TimeUnit::FromMicroseconds(found.value());

  if (mSVCInfo) {

    if (data->mKeyframe) {

      FFMPEGV_LOG(

          "Encoded packet is key frame, reseting temporal layer id sequence");

      mSVCInfo->ResetTemporalLayerId();

    uint8_t temporalLayerId = mSVCInfo->CurrentTemporalLayerId();

    data->mTemporalLayerId.emplace(temporalLayerId);

    mSVCInfo->UpdateTemporalLayerId();

  return data;

Result<already_AddRefed<MediaByteBuffer>, nsresult>

FFmpegVideoEncoder<LIBAV_VER>::GetExtraData(AVPacket* aPacket) {

  MOZ_ASSERT(aPacket);

  // H264 Extra data comes with the key frame and we only extract it when

  // encoding into AVCC format.

  if (mCodecID != AV_CODEC_ID_H264 || !mConfig.mCodecSpecific ||

      !mConfig.mCodecSpecific->is<H264Specific>() ||

      mConfig.mCodecSpecific->as<H264Specific>().mFormat !=

          H264BitStreamFormat::AVC ||

      !(aPacket->flags & AV_PKT_FLAG_KEY)) {

    return Err(NS_ERROR_NOT_AVAILABLE);

  if (mCodecName != "libx264") {

    FFMPEGV_LOG("Get extra data from codec %s has not been implemented yet",

                mCodecName.get());

    return Err(NS_ERROR_NOT_IMPLEMENTED);

  bool useGlobalHeader =

#if LIBAVCODEC_VERSION_MAJOR >= 57

      mCodecContext->flags & AV_CODEC_FLAG_GLOBAL_HEADER;

#else

      false;

#endif

  Span<const uint8_t> buf;

  if (useGlobalHeader) {

    buf =

        Span<const uint8_t>(mCodecContext->extradata,

                            static_cast<size_t>(mCodecContext->extradata_size));

  } else {

    buf =

        Span<const uint8_t>(aPacket->data, static_cast<size_t>(aPacket->size));

  if (buf.empty()) {

    FFMPEGV_LOG("fail to get H264 AVCC header in key frame!");

    return Err(NS_ERROR_UNEXPECTED);

  BufferReader reader(buf);

  // The first part is sps.

  uint32_t spsSize;

  MOZ_TRY_VAR(spsSize, reader.ReadU32());

  Span<const uint8_t> spsData;

  MOZ_TRY_VAR(spsData,

              reader.ReadSpan<const uint8_t>(static_cast<size_t>(spsSize)));

  // The second part is pps.

  uint32_t ppsSize;

  MOZ_TRY_VAR(ppsSize, reader.ReadU32());

  Span<const uint8_t> ppsData;

  MOZ_TRY_VAR(ppsData,

              reader.ReadSpan<const uint8_t>(static_cast<size_t>(ppsSize)));

  // Ensure we have profile, constraints and level needed to create the extra

  // data.

  if (spsData.Length() < 4) {

    return Err(NS_ERROR_NOT_AVAILABLE);

  FFMPEGV_LOG(

      "Generate extra data: profile - %u, constraints: %u, level: %u for pts @ "

      "%" PRId64,

      spsData[1], spsData[2], spsData[3], aPacket->pts);

  // Create extra data.

  auto extraData = MakeRefPtr<MediaByteBuffer>();

  H264::WriteExtraData(extraData, spsData[1], spsData[2], spsData[3], spsData,

                       ppsData);

  MOZ_ASSERT(extraData);

  return extraData.forget();

void FFmpegVideoEncoder<LIBAV_VER>::ForceEnablingFFmpegDebugLogs() {

#if DEBUG

  if (!getenv("MOZ_AV_LOG_LEVEL") &&

      MOZ_LOG_TEST(sFFmpegVideoLog, LogLevel::Debug)) {

    mLib->av_log_set_level(AV_LOG_DEBUG);

#endif  // DEBUG

Maybe<FFmpegVideoEncoder<LIBAV_VER>::SVCSettings>

FFmpegVideoEncoder<LIBAV_VER>::GetSVCSettings() {

  MOZ_ASSERT(!mCodecName.IsEmpty());

  MOZ_ASSERT(SvcEnabled());

  CodecType codecType = CodecType::Unknown;

  if (mCodecName == "libvpx") {

    codecType = CodecType::VP8;

  } else if (mCodecName == "libvpx-vp9") {

    codecType = CodecType::VP9;

  } else if (mCodecName == "libaom-av1") {

    codecType = CodecType::AV1;

  if (codecType == CodecType::Unknown) {

    FFMPEGV_LOG("SVC setting is not implemented for %s codec",

                mCodecName.get());

    return Nothing();

  SVCLayerSettings svc = GetSVCLayerSettings(

      codecType, mConfig.mScalabilityMode, mConfig.mBitrate);

  nsAutoCString name;

  nsAutoCString parameters;

  if (codecType == CodecType::VP8 || codecType == CodecType::VP9) {

    // Check if the number of temporal layers in codec specific settings

    // matches

    // the number of layers for the given scalability mode.

    if (mConfig.mCodecSpecific) {

      if (mConfig.mCodecSpecific->is<VP8Specific>()) {

        MOZ_ASSERT(

            mConfig.mCodecSpecific->as<VP8Specific>().mNumTemporalLayers ==

            svc.mNumberTemporalLayers);

      } else if (mConfig.mCodecSpecific->is<VP9Specific>()) {

        MOZ_ASSERT(

            mConfig.mCodecSpecific->as<VP9Specific>().mNumTemporalLayers ==

            svc.mNumberTemporalLayers);

    // Form an SVC setting string for libvpx.

    name = "ts-parameters"_ns;

    parameters.Append("ts_target_bitrate=");

    for (size_t i = 0; i < svc.mTargetBitrates.Length(); ++i) {

      if (i > 0) {

        parameters.Append(",");

      parameters.AppendPrintf("%d", svc.mTargetBitrates[i]);

    parameters.AppendPrintf(

        ":ts_layering_mode=%u",

        svc.mCodecAppendix->as<VPXSVCAppendix>().mLayeringMode);

  if (codecType == CodecType::AV1) {

    // Form an SVC setting string for libaom.

    name = "svc-parameters"_ns;

    parameters.AppendPrintf("number_spatial_layers=%zu",

                            svc.mNumberSpatialLayers);

    parameters.AppendPrintf(":number_temporal_layers=%zu",

                            svc.mNumberTemporalLayers);

    parameters.Append(":framerate_factor=");

    for (size_t i = 0; i < svc.mRateDecimators.Length(); ++i) {

      if (i > 0) {

        parameters.Append(",");

      parameters.AppendPrintf("%d", svc.mRateDecimators[i]);

    parameters.Append(":layer_target_bitrate=");

    for (size_t i = 0; i < svc.mTargetBitrates.Length(); ++i) {

      if (i > 0) {

        parameters.Append(",");

      parameters.AppendPrintf("%d", svc.mTargetBitrates[i]);

  return Some(

      SVCSettings{std::move(svc.mLayerIds),

                  std::make_pair(std::move(name), std::move(parameters))});

FFmpegVideoEncoder<LIBAV_VER>::H264Settings FFmpegVideoEncoder<

    LIBAV_VER>::GetH264Settings(const H264Specific& aH264Specific) {

  MOZ_ASSERT(mCodecName == "libx264",

             "GetH264Settings is libx264-only for now");

  nsTArray<std::pair<nsCString, nsCString>> keyValuePairs;

  Maybe<H264Setting> profile = GetH264Profile(aH264Specific.mProfile);

  MOZ_RELEASE_ASSERT(profile.isSome());

  if (!profile->mString.IsEmpty()) {

    keyValuePairs.AppendElement(std::make_pair("profile"_ns, profile->mString));

  } else {

    MOZ_RELEASE_ASSERT(aH264Specific.mProfile ==

                       H264_PROFILE::H264_PROFILE_EXTENDED);

  Maybe<H264Setting> level = GetH264Level(aH264Specific.mLevel);

  MOZ_RELEASE_ASSERT(level.isSome());

  MOZ_RELEASE_ASSERT(!level->mString.IsEmpty());

  keyValuePairs.AppendElement(std::make_pair("level"_ns, level->mString));

  // Set format: libx264's default format is annexb.

  if (aH264Specific.mFormat == H264BitStreamFormat::AVC) {

    keyValuePairs.AppendElement(std::make_pair("x264-params"_ns, "annexb=0"));

    // mCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER

    // if we don't want to append SPS/PPS data in all keyframe

    // (LIBAVCODEC_VERSION_MAJOR >= 57 only).

  } else {

    // Set annexb explicitly even if it's default format.

    keyValuePairs.AppendElement(std::make_pair("x264-params"_ns, "annexb=1"));

  return H264Settings{.mProfile = profile->mValue,

                      .mLevel = level->mValue,

                      .mSettingKeyValuePairs = std::move(keyValuePairs)};

}  // namespace mozilla