Source code
Revision control
Copy as Markdown
Other Tools
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
#include "FFmpegVideoEncoder.h"
#include <algorithm>
#include <aom/aomcx.h>
#include "BufferReader.h"
#include "EncoderConfig.h"
#include "FFmpegLog.h"
#include "FFmpegRuntimeLinker.h"
#include "FFmpegUtils.h"
#include "H264.h"
#include "ImageContainer.h"
#include "ImageConversion.h"
#include "libavutil/error.h"
#include "libavutil/pixfmt.h"
#include "libyuv.h"
#include "mozilla/StaticPrefs_media.h"
#include "mozilla/dom/ImageBitmapBinding.h"
#include "mozilla/dom/ImageUtils.h"
#include "mozilla/dom/VideoFrameBinding.h"
#include "nsPrintfCString.h"
// The ffmpeg namespace is introduced to avoid the PixelFormat's name conflicts
// with MediaDataEncoder::PixelFormat in MediaDataEncoder class scope.
namespace ffmpeg {
// TODO: WebCodecs' I420A should map to MediaDataEncoder::PixelFormat and then
// to AV_PIX_FMT_YUVA420P here.
#if LIBAVCODEC_VERSION_MAJOR < 54
using FFmpegPixelFormat = enum PixelFormat;
const FFmpegPixelFormat FFMPEG_PIX_FMT_NONE = FFmpegPixelFormat::PIX_FMT_NONE;
const FFmpegPixelFormat FFMPEG_PIX_FMT_RGBA = FFmpegPixelFormat::PIX_FMT_RGBA;
const FFmpegPixelFormat FFMPEG_PIX_FMT_BGRA = FFmpegPixelFormat::PIX_FMT_BGRA;
const FFmpegPixelFormat FFMPEG_PIX_FMT_RGB24 = FFmpegPixelFormat::PIX_FMT_RGB24;
const FFmpegPixelFormat FFMPEG_PIX_FMT_BGR24 = FFmpegPixelFormat::PIX_FMT_BGR24;
const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV444P =
FFmpegPixelFormat::PIX_FMT_YUV444P;
const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV422P =
FFmpegPixelFormat::PIX_FMT_YUV422P;
const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV420P =
FFmpegPixelFormat::PIX_FMT_YUV420P;
const FFmpegPixelFormat FFMPEG_PIX_FMT_NV12 = FFmpegPixelFormat::PIX_FMT_NV12;
const FFmpegPixelFormat FFMPEG_PIX_FMT_NV21 = FFmpegPixelFormat::PIX_FMT_NV21;
#else
using FFmpegPixelFormat = enum AVPixelFormat;
const FFmpegPixelFormat FFMPEG_PIX_FMT_NONE =
FFmpegPixelFormat::AV_PIX_FMT_NONE;
const FFmpegPixelFormat FFMPEG_PIX_FMT_RGBA =
FFmpegPixelFormat::AV_PIX_FMT_RGBA;
const FFmpegPixelFormat FFMPEG_PIX_FMT_BGRA =
FFmpegPixelFormat::AV_PIX_FMT_BGRA;
const FFmpegPixelFormat FFMPEG_PIX_FMT_RGB24 =
FFmpegPixelFormat::AV_PIX_FMT_RGB24;
const FFmpegPixelFormat FFMPEG_PIX_FMT_BGR24 =
FFmpegPixelFormat::AV_PIX_FMT_BGR24;
const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV444P =
FFmpegPixelFormat::AV_PIX_FMT_YUV444P;
const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV422P =
FFmpegPixelFormat::AV_PIX_FMT_YUV422P;
const FFmpegPixelFormat FFMPEG_PIX_FMT_YUV420P =
FFmpegPixelFormat::AV_PIX_FMT_YUV420P;
const FFmpegPixelFormat FFMPEG_PIX_FMT_NV12 =
FFmpegPixelFormat::AV_PIX_FMT_NV12;
const FFmpegPixelFormat FFMPEG_PIX_FMT_NV21 =
FFmpegPixelFormat::AV_PIX_FMT_NV21;
#endif
static const char* GetPixelFormatString(FFmpegPixelFormat aFormat) {
switch (aFormat) {
case FFMPEG_PIX_FMT_NONE:
return "none";
case FFMPEG_PIX_FMT_RGBA:
return "packed RGBA 8:8:8:8 (32bpp, RGBARGBA...)";
case FFMPEG_PIX_FMT_BGRA:
return "packed BGRA 8:8:8:8 (32bpp, BGRABGRA...)";
case FFMPEG_PIX_FMT_RGB24:
return "packed RGB 8:8:8 (24bpp, RGBRGB...)";
case FFMPEG_PIX_FMT_BGR24:
return "packed RGB 8:8:8 (24bpp, BGRBGR...)";
case FFMPEG_PIX_FMT_YUV444P:
return "planar YUV 4:4:4 (24bpp, 1 Cr & Cb sample per 1x1 Y samples)";
case FFMPEG_PIX_FMT_YUV422P:
return "planar YUV 4:2:2 (16bpp, 1 Cr & Cb sample per 2x1 Y samples)";
case FFMPEG_PIX_FMT_YUV420P:
return "planar YUV 4:2:0 (12bpp, 1 Cr & Cb sample per 2x2 Y samples)";
case FFMPEG_PIX_FMT_NV12:
return "planar YUV 4:2:0 (12bpp, 1 interleaved UV components per 1x1 Y "
"samples)";
case FFMPEG_PIX_FMT_NV21:
return "planar YUV 4:2:0 (12bpp, 1 interleaved VU components per 1x1 Y "
"samples)";
default:
break;
}
MOZ_ASSERT_UNREACHABLE("Unsupported pixel format");
return "unsupported";
}
}; // namespace ffmpeg
namespace mozilla {
struct H264Setting {
int mValue;
nsCString mString;
};
struct H264LiteralSetting {
int mValue;
nsLiteralCString mString;
H264Setting get() const { return {mValue, mString.AsString()}; }
};
static constexpr H264LiteralSetting H264Profiles[]{
{FF_PROFILE_H264_BASELINE, "baseline"_ns},
{FF_PROFILE_H264_MAIN, "main"_ns},
{FF_PROFILE_H264_EXTENDED, ""_ns},
{FF_PROFILE_H264_HIGH, "high"_ns}};
static Maybe<H264Setting> GetH264Profile(const H264_PROFILE& aProfile) {
switch (aProfile) {
case H264_PROFILE::H264_PROFILE_UNKNOWN:
return Nothing();
case H264_PROFILE::H264_PROFILE_BASE:
return Some(H264Profiles[0].get());
case H264_PROFILE::H264_PROFILE_MAIN:
return Some(H264Profiles[1].get());
case H264_PROFILE::H264_PROFILE_EXTENDED:
return Some(H264Profiles[2].get());
case H264_PROFILE::H264_PROFILE_HIGH:
return Some(H264Profiles[3].get());
default:
break;
}
MOZ_ASSERT_UNREACHABLE("undefined profile");
return Nothing();
}
static Maybe<H264Setting> GetH264Level(const H264_LEVEL& aLevel) {
int val = static_cast<int>(aLevel);
nsPrintfCString str("%d", val);
str.Insert('.', 1);
return Some(H264Setting{val, str});
}
struct VPXSVCAppendix {
uint8_t mLayeringMode;
};
struct SVCLayerSettings {
using CodecAppendix = Variant<VPXSVCAppendix, aom_svc_params_t>;
size_t mNumberSpatialLayers;
size_t mNumberTemporalLayers;
uint8_t mPeriodicity;
nsTArray<uint8_t> mLayerIds;
// libvpx: ts_rate_decimator, libaom: framerate_factor
nsTArray<uint8_t> mRateDecimators;
nsTArray<uint32_t> mTargetBitrates;
Maybe<CodecAppendix> mCodecAppendix;
};
static SVCLayerSettings GetSVCLayerSettings(CodecType aCodec,
const ScalabilityMode& aMode,
uint32_t aBitPerSec) {
// TODO: Apply more sophisticated bitrate allocation, like SvcRateAllocator:
size_t layers = 0;
const uint32_t kbps = aBitPerSec / 1000; // ts_target_bitrate requies kbps.
uint8_t periodicity;
nsTArray<uint8_t> layerIds;
nsTArray<uint8_t> rateDecimators;
nsTArray<uint32_t> bitrates;
Maybe<SVCLayerSettings::CodecAppendix> appendix;
if (aMode == ScalabilityMode::L1T2) {
// Two temporal layers. 0-1...
//
// Frame pattern:
// Layer 0: |0| |2| |4| |6| |8|
// Layer 1: | |1| |3| |5| |7| |
layers = 2;
// 2 frames per period.
periodicity = 2;
// Assign layer ids.
layerIds.AppendElement(0);
layerIds.AppendElement(1);
// Set rate decimators.
rateDecimators.AppendElement(2);
rateDecimators.AppendElement(1);
// Bitrate allocation: L0 - 60%, L1 - 40%.
bitrates.AppendElement(kbps * 3 / 5);
bitrates.AppendElement(kbps);
if (aCodec == CodecType::VP8 || aCodec == CodecType::VP9) {
appendix.emplace(VPXSVCAppendix{
.mLayeringMode = 2 /* VP9E_TEMPORAL_LAYERING_MODE_0101 */
});
}
} else {
MOZ_ASSERT(aMode == ScalabilityMode::L1T3);
// Three temporal layers. 0-2-1-2...
//
// Frame pattern:
// Layer 0: |0| | | |4| | | |8| | | |12|
// Layer 1: | | |2| | | |6| | | |10| | |
// Layer 2: | |1| |3| |5| |7| |9| |11| |
layers = 3;
// 4 frames per period
periodicity = 4;
// Assign layer ids.
layerIds.AppendElement(0);
layerIds.AppendElement(2);
layerIds.AppendElement(1);
layerIds.AppendElement(2);
// Set rate decimators.
rateDecimators.AppendElement(4);
rateDecimators.AppendElement(2);
rateDecimators.AppendElement(1);
// Bitrate allocation: L0 - 50%, L1 - 20%, L2 - 30%.
bitrates.AppendElement(kbps / 2);
bitrates.AppendElement(kbps * 7 / 10);
bitrates.AppendElement(kbps);
if (aCodec == CodecType::VP8 || aCodec == CodecType::VP9) {
appendix.emplace(VPXSVCAppendix{
.mLayeringMode = 3 /* VP9E_TEMPORAL_LAYERING_MODE_0212 */
});
}
}
MOZ_ASSERT(layers == bitrates.Length(),
"Bitrate must be assigned to each layer");
return SVCLayerSettings{1,
layers,
periodicity,
std::move(layerIds),
std::move(rateDecimators),
std::move(bitrates),
appendix};
}
void FFmpegVideoEncoder<LIBAV_VER>::SVCInfo::UpdateTemporalLayerId() {
MOZ_ASSERT(!mTemporalLayerIds.IsEmpty());
mCurrentIndex = (mCurrentIndex + 1) % mTemporalLayerIds.Length();
}
uint8_t FFmpegVideoEncoder<LIBAV_VER>::SVCInfo::CurrentTemporalLayerId() {
MOZ_ASSERT(!mTemporalLayerIds.IsEmpty());
return mTemporalLayerIds[mCurrentIndex];
}
void FFmpegVideoEncoder<LIBAV_VER>::SVCInfo::ResetTemporalLayerId() {
MOZ_ASSERT(!mTemporalLayerIds.IsEmpty());
mCurrentIndex = 0;
}
FFmpegVideoEncoder<LIBAV_VER>::FFmpegVideoEncoder(
const FFmpegLibWrapper* aLib, AVCodecID aCodecID,
const RefPtr<TaskQueue>& aTaskQueue, const EncoderConfig& aConfig)
: FFmpegDataEncoder(aLib, aCodecID, aTaskQueue, aConfig) {}
nsCString FFmpegVideoEncoder<LIBAV_VER>::GetDescriptionName() const {
#ifdef USING_MOZFFVPX
return "ffvpx video encoder"_ns;
#else
const char* lib =
# if defined(MOZ_FFMPEG)
FFmpegRuntimeLinker::LinkStatusLibraryName();
# else
"no library: ffmpeg disabled during build";
# endif
return nsPrintfCString("ffmpeg video encoder (%s)", lib);
#endif
}
bool FFmpegVideoEncoder<LIBAV_VER>::SvcEnabled() const {
return mConfig.mScalabilityMode != ScalabilityMode::None;
}
nsresult FFmpegVideoEncoder<LIBAV_VER>::InitSpecific() {
MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
FFMPEGV_LOG("FFmpegVideoEncoder::InitSpecific");
// Initialize the common members of the encoder instance
AVCodec* codec = FFmpegDataEncoder<LIBAV_VER>::InitCommon();
if (!codec) {
FFMPEGV_LOG("FFmpegDataEncoder::InitCommon failed");
return NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR;
}
// And now the video-specific part
mCodecContext->pix_fmt = ffmpeg::FFMPEG_PIX_FMT_YUV420P;
// // TODO: do this properly, based on the colorspace of the frame. Setting
// this like that crashes encoders. if (mConfig.mCodec != CodecType::AV1) {
// if (mConfig.mPixelFormat == dom::ImageBitmapFormat::RGBA32 ||
// mConfig.mPixelFormat == dom::ImageBitmapFormat::BGRA32) {
// mCodecContext->color_primaries = AVCOL_PRI_BT709;
// mCodecContext->colorspace = AVCOL_SPC_RGB;
// #ifdef FFVPX_VERSION
// mCodecContext->color_trc = AVCOL_TRC_IEC61966_2_1;
// #endif
// } else {
// mCodecContext->color_primaries = AVCOL_PRI_BT709;
// mCodecContext->colorspace = AVCOL_SPC_BT709;
// mCodecContext->color_trc = AVCOL_TRC_BT709;
// }
// }
mCodecContext->width = static_cast<int>(mConfig.mSize.width);
mCodecContext->height = static_cast<int>(mConfig.mSize.height);
// Reasonnable default for the quantization range.
mCodecContext->qmin =
static_cast<int>(StaticPrefs::media_ffmpeg_encoder_quantizer_min());
mCodecContext->qmax =
static_cast<int>(StaticPrefs::media_ffmpeg_encoder_quantizer_max());
if (mConfig.mUsage == Usage::Realtime) {
mCodecContext->thread_count = 1;
} else {
int64_t pixels = mCodecContext->width * mCodecContext->height;
int threads = 1;
// Select a thread count that depends on the frame size, and cap to the
// number of available threads minus one
if (pixels >= 3840 * 2160) {
threads = 16;
} else if (pixels >= 1920 * 1080) {
threads = 8;
} else if (pixels >= 1280 * 720) {
threads = 4;
} else if (pixels >= 640 * 480) {
threads = 2;
}
mCodecContext->thread_count =
std::clamp<int>(threads, 1, GetNumberOfProcessors() - 1);
}
// rate, but we set it to microsecond for now.
mCodecContext->time_base =
AVRational{.num = 1, .den = static_cast<int>(USECS_PER_S)};
#if LIBAVCODEC_VERSION_MAJOR >= 57
// Note that sometimes framerate can be zero (from webcodecs).
mCodecContext->framerate =
AVRational{.num = static_cast<int>(mConfig.mFramerate), .den = 1};
#endif
#if LIBAVCODEC_VERSION_MAJOR >= 60
mCodecContext->flags |= AV_CODEC_FLAG_FRAME_DURATION;
#endif
// Setting 0 here disable inter-frames: all frames are keyframes
mCodecContext->gop_size = mConfig.mKeyframeInterval
? static_cast<int>(mConfig.mKeyframeInterval)
: 10000;
mCodecContext->keyint_min = 0;
// When either real-time or SVC is enabled via config, the general settings of
// the encoder are set to be more appropriate for real-time usage
if (mConfig.mUsage == Usage::Realtime || SvcEnabled()) {
if (mConfig.mUsage != Usage::Realtime) {
FFMPEGV_LOG(
"SVC enabled but low latency encoding mode not enabled, forcing low "
"latency mode");
}
mLib->av_opt_set(mCodecContext->priv_data, "deadline", "realtime", 0);
// Explicitly ask encoder do not keep in flight at any one time for
// lookahead purposes.
mLib->av_opt_set(mCodecContext->priv_data, "lag-in-frames", "0", 0);
if (mConfig.mCodec == CodecType::VP8 || mConfig.mCodec == CodecType::VP9) {
mLib->av_opt_set(mCodecContext->priv_data, "error-resilient", "1", 0);
}
if (mConfig.mCodec == CodecType::AV1) {
mLib->av_opt_set(mCodecContext->priv_data, "error-resilience", "1", 0);
// This sets usage to AOM_USAGE_REALTIME
mLib->av_opt_set(mCodecContext->priv_data, "usage", "1", 0);
// Allow the bitrate to swing 50% up and down the target
mLib->av_opt_set(mCodecContext->priv_data, "rc_undershoot_percent", "50",
0);
mLib->av_opt_set(mCodecContext->priv_data, "rc_overshoot_percent", "50",
0);
// Row multithreading -- note that we do single threaded encoding for now,
// so this doesn't do much
mLib->av_opt_set(mCodecContext->priv_data, "row_mt", "1", 0);
// Cyclic refresh adaptive quantization
mLib->av_opt_set(mCodecContext->priv_data, "aq-mode", "3", 0);
// optimized for real-time, 7 for regular, lower: more cpu use -> higher
// compression ratio
mLib->av_opt_set(mCodecContext->priv_data, "cpu-used", "9", 0);
// disable, this is to handle camera motion, unlikely for our use case
mLib->av_opt_set(mCodecContext->priv_data, "enable-global-motion", "0",
0);
mLib->av_opt_set(mCodecContext->priv_data, "enable-cfl-intra", "0", 0);
// TODO: Set a number of tiles appropriate for the number of threads used
// -- disable tiling if using a single thread.
mLib->av_opt_set(mCodecContext->priv_data, "tile-columns", "0", 0);
mLib->av_opt_set(mCodecContext->priv_data, "tile-rows", "0", 0);
}
}
if (SvcEnabled()) {
if (Maybe<SVCSettings> settings = GetSVCSettings()) {
if (mCodecName == "libaom-av1") {
if (mConfig.mBitrateMode != BitrateMode::Constant) {
return NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR;
}
}
SVCSettings s = settings.extract();
FFMPEGV_LOG("SVC options string: %s=%s", s.mSettingKeyValue.first.get(),
s.mSettingKeyValue.second.get());
mLib->av_opt_set(mCodecContext->priv_data, s.mSettingKeyValue.first.get(),
s.mSettingKeyValue.second.get(), 0);
// FFmpegVideoEncoder is reset after Drain(), so mSVCInfo should be
// reset() before emplace().
mSVCInfo.reset();
mSVCInfo.emplace(std::move(s.mTemporalLayerIds));
// TODO: layer settings should be changed dynamically when the frame's
// color space changed.
}
}
nsAutoCString h264Log;
if (mConfig.mCodecSpecific && mConfig.mCodecSpecific->is<H264Specific>()) {
// TODO: Set profile, level, avcc/annexb for openh264 and others.
if (mCodecName == "libx264") {
const H264Specific& h264Specific =
mConfig.mCodecSpecific->as<H264Specific>();
H264Settings s = GetH264Settings(h264Specific);
mCodecContext->profile = s.mProfile;
mCodecContext->level = s.mLevel;
for (const auto& pair : s.mSettingKeyValuePairs) {
mLib->av_opt_set(mCodecContext->priv_data, pair.first.get(),
pair.second.get(), 0);
}
// Log the settings.
// When using profile other than EXTENDED, the profile string is in the
// first element of mSettingKeyValuePairs, while EXTENDED profile has no
// profile string.
MOZ_ASSERT_IF(
s.mSettingKeyValuePairs.Length() != 3,
h264Specific.mProfile == H264_PROFILE::H264_PROFILE_EXTENDED);
const char* profileStr = s.mSettingKeyValuePairs.Length() == 3
? s.mSettingKeyValuePairs[0].second.get()
: "extended";
const char* levelStr = s.mSettingKeyValuePairs.Length() == 3
? s.mSettingKeyValuePairs[1].second.get()
: s.mSettingKeyValuePairs[0].second.get();
const char* formatStr =
h264Specific.mFormat == H264BitStreamFormat::AVC ? "AVCC" : "AnnexB";
h264Log.AppendPrintf(", H264: profile - %d (%s), level %d (%s), %s",
mCodecContext->profile, profileStr,
mCodecContext->level, levelStr, formatStr);
}
}
// - if mConfig.mDenoising is set: av_opt_set_int(mCodecContext->priv_data,
// "noise_sensitivity", x, 0), where the x is from 0(disabled) to 6.
// - if mConfig.mAdaptiveQp is set: av_opt_set_int(mCodecContext->priv_data,
// "aq_mode", x, 0), where x is from 0 to 3: 0 - Disabled, 1 - Variance
// AQ(default), 2 - Complexity AQ, 3 - Cycle AQ.
// Our old version of libaom-av1 is considered experimental by the recent
// ffmpeg we use. Allow experimental codecs for now until we decide on an AV1
// encoder.
mCodecContext->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
MediaResult rv = FinishInitCommon(codec);
if (NS_FAILED(rv)) {
FFMPEGV_LOG("FFmpeg video encoder initialization failure.");
return rv;
}
FFMPEGV_LOG(
"%s has been initialized with format: %s, bitrate: %" PRIi64
", width: %d, height: %d, quantizer: [%d, %d], time_base: %d/%d%s",
codec->name, ffmpeg::GetPixelFormatString(mCodecContext->pix_fmt),
static_cast<int64_t>(mCodecContext->bit_rate), mCodecContext->width,
mCodecContext->height, mCodecContext->qmin, mCodecContext->qmax,
mCodecContext->time_base.num, mCodecContext->time_base.den,
h264Log.IsEmpty() ? "" : h264Log.get());
return NS_OK;
}
bool FFmpegVideoEncoder<LIBAV_VER>::ScaleInputFrame() {
AVFrame* source = mFrame;
mFrame = nullptr;
// Allocate AVFrame.
if (!PrepareFrame()) {
FFMPEGV_LOG("failed to allocate frame");
return false;
}
// Set AVFrame properties for its internal data allocation. For now, we always
// convert into ffmpeg's buffer.
mFrame->format = ffmpeg::FFMPEG_PIX_FMT_YUV420P;
mFrame->width = static_cast<int>(mConfig.mSize.Width());
mFrame->height = static_cast<int>(mConfig.mSize.Height());
// Allocate AVFrame data.
if (int ret = mLib->av_frame_get_buffer(mFrame, 16); ret < 0) {
FFMPEGV_LOG("failed to allocate frame data: %s",
MakeErrorString(mLib, ret).get());
return false;
}
// Make sure AVFrame is writable.
if (int ret = mLib->av_frame_make_writable(mFrame); ret < 0) {
FFMPEGV_LOG("failed to make frame writable: %s",
MakeErrorString(mLib, ret).get());
return false;
}
int rv = I420Scale(source->data[0], source->linesize[0], source->data[1],
source->linesize[1], source->data[2], source->linesize[2],
source->width, source->height, mFrame->data[0],
mFrame->linesize[0], mFrame->data[1], mFrame->linesize[1],
mFrame->data[2], mFrame->linesize[2], mFrame->width,
mFrame->height, libyuv::FilterMode::kFilterBox);
if (!rv) {
FFMPEGV_LOG("YUV scale error");
}
mLib->av_frame_unref(source);
mLib->av_frame_free(&source);
return true;
}
// avcodec_send_frame and avcodec_receive_packet were introduced in version 58.
#if LIBAVCODEC_VERSION_MAJOR >= 58
Result<MediaDataEncoder::EncodedData, nsresult> FFmpegVideoEncoder<
LIBAV_VER>::EncodeInputWithModernAPIs(RefPtr<const MediaData> aSample) {
MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
MOZ_ASSERT(mCodecContext);
MOZ_ASSERT(aSample);
RefPtr<const VideoData> sample(aSample->As<VideoData>());
// Validate input.
if (!sample->mImage) {
FFMPEGV_LOG("No image");
return Result<MediaDataEncoder::EncodedData, nsresult>(
NS_ERROR_DOM_MEDIA_FATAL_ERR);
}
if (sample->mImage->GetSize().IsEmpty()) {
FFMPEGV_LOG("image width or height is invalid");
return Result<MediaDataEncoder::EncodedData, nsresult>(
NS_ERROR_DOM_MEDIA_FATAL_ERR);
}
// Allocate AVFrame.
if (!PrepareFrame()) {
FFMPEGV_LOG("failed to allocate frame");
return Result<MediaDataEncoder::EncodedData, nsresult>(
NS_ERROR_DOM_MEDIA_FATAL_ERR);
}
// Set AVFrame properties for its internal data allocation. For now, we always
// convert into ffmpeg's buffer.
mFrame->format = ffmpeg::FFMPEG_PIX_FMT_YUV420P;
mFrame->width = static_cast<int>(sample->mImage->GetSize().width);
mFrame->height = static_cast<int>(sample->mImage->GetSize().height);
mFrame->pict_type =
sample->mKeyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_NONE;
// Allocate AVFrame data.
if (int ret = mLib->av_frame_get_buffer(mFrame, 0); ret < 0) {
FFMPEGV_LOG("failed to allocate frame data: %s",
MakeErrorString(mLib, ret).get());
return Result<MediaDataEncoder::EncodedData, nsresult>(
NS_ERROR_DOM_MEDIA_FATAL_ERR);
}
// Make sure AVFrame is writable.
if (int ret = mLib->av_frame_make_writable(mFrame); ret < 0) {
FFMPEGV_LOG("failed to make frame writable: %s",
MakeErrorString(mLib, ret).get());
return Result<MediaDataEncoder::EncodedData, nsresult>(
NS_ERROR_DOM_MEDIA_FATAL_ERR);
}
nsresult rv = ConvertToI420(
sample->mImage, mFrame->data[0], mFrame->linesize[0], mFrame->data[1],
mFrame->linesize[1], mFrame->data[2], mFrame->linesize[2]);
if (NS_FAILED(rv)) {
FFMPEGV_LOG("Conversion error!");
return Result<MediaDataEncoder::EncodedData, nsresult>(
NS_ERROR_DOM_MEDIA_FATAL_ERR);
}
// Scale the YUV input frame if needed -- the encoded frame will have the
// dimensions configured at encoded initialization.
if (mFrame->width != mConfig.mSize.Width() ||
mFrame->height != mConfig.mSize.Height()) {
if (!ScaleInputFrame()) {
return Result<MediaDataEncoder::EncodedData, nsresult>(
NS_ERROR_DOM_MEDIA_FATAL_ERR);
}
}
// Set presentation timestamp and duration of the AVFrame. The unit of pts is
// time_base.
// rate, but we set it to microsecond for now.
# if LIBAVCODEC_VERSION_MAJOR >= 59
mFrame->time_base =
AVRational{.num = 1, .den = static_cast<int>(USECS_PER_S)};
# endif
// Provide fake pts, see header file.
if (mConfig.mCodec == CodecType::AV1) {
mFrame->pts = mFakePts;
mPtsMap.Insert(mFakePts, aSample->mTime.ToMicroseconds());
mFakePts += aSample->mDuration.ToMicroseconds();
mCurrentFramePts = aSample->mTime.ToMicroseconds();
} else {
mFrame->pts = aSample->mTime.ToMicroseconds();
}
# if LIBAVCODEC_VERSION_MAJOR >= 60
mFrame->duration = aSample->mDuration.ToMicroseconds();
# else
// Save duration in the time_base unit.
mDurationMap.Insert(mFrame->pts, aSample->mDuration.ToMicroseconds());
# endif
Duration(mFrame) = aSample->mDuration.ToMicroseconds();
AVDictionary* dict = nullptr;
// VP8/VP9 use a mode that handles the temporal layer id sequence internally,
// and don't require setting explicitly setting the metadata. Other codecs
// such as AV1 via libaom however requires manual frame tagging.
if (SvcEnabled() && mConfig.mCodec != CodecType::VP8 &&
mConfig.mCodec != CodecType::VP9) {
if (aSample->mKeyframe) {
FFMPEGV_LOG("Key frame requested, reseting temporal layer id");
mSVCInfo->ResetTemporalLayerId();
}
nsPrintfCString str("%d", mSVCInfo->CurrentTemporalLayerId());
mLib->av_dict_set(&dict, "temporal_id", str.get(), 0);
mFrame->metadata = dict;
}
// Now send the AVFrame to ffmpeg for encoding, same code for audio and video.
return FFmpegDataEncoder<LIBAV_VER>::EncodeWithModernAPIs();
}
#endif // if LIBAVCODEC_VERSION_MAJOR >= 58
RefPtr<MediaRawData> FFmpegVideoEncoder<LIBAV_VER>::ToMediaRawData(
AVPacket* aPacket) {
MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
MOZ_ASSERT(aPacket);
RefPtr<MediaRawData> data = ToMediaRawDataCommon(aPacket);
if (mConfig.mCodec == CodecType::AV1) {
auto found = mPtsMap.Take(aPacket->pts);
data->mTime = media::TimeUnit::FromMicroseconds(found.value());
}
if (mSVCInfo) {
if (data->mKeyframe) {
FFMPEGV_LOG(
"Encoded packet is key frame, reseting temporal layer id sequence");
mSVCInfo->ResetTemporalLayerId();
}
uint8_t temporalLayerId = mSVCInfo->CurrentTemporalLayerId();
data->mTemporalLayerId.emplace(temporalLayerId);
mSVCInfo->UpdateTemporalLayerId();
}
return data;
}
Result<already_AddRefed<MediaByteBuffer>, nsresult>
FFmpegVideoEncoder<LIBAV_VER>::GetExtraData(AVPacket* aPacket) {
MOZ_ASSERT(aPacket);
// H264 Extra data comes with the key frame and we only extract it when
// encoding into AVCC format.
if (mCodecID != AV_CODEC_ID_H264 || !mConfig.mCodecSpecific ||
!mConfig.mCodecSpecific->is<H264Specific>() ||
mConfig.mCodecSpecific->as<H264Specific>().mFormat !=
H264BitStreamFormat::AVC ||
!(aPacket->flags & AV_PKT_FLAG_KEY)) {
return Err(NS_ERROR_NOT_AVAILABLE);
}
if (mCodecName != "libx264") {
FFMPEGV_LOG("Get extra data from codec %s has not been implemented yet",
mCodecName.get());
return Err(NS_ERROR_NOT_IMPLEMENTED);
}
bool useGlobalHeader =
#if LIBAVCODEC_VERSION_MAJOR >= 57
mCodecContext->flags & AV_CODEC_FLAG_GLOBAL_HEADER;
#else
false;
#endif
Span<const uint8_t> buf;
if (useGlobalHeader) {
buf =
Span<const uint8_t>(mCodecContext->extradata,
static_cast<size_t>(mCodecContext->extradata_size));
} else {
buf =
Span<const uint8_t>(aPacket->data, static_cast<size_t>(aPacket->size));
}
if (buf.empty()) {
FFMPEGV_LOG("fail to get H264 AVCC header in key frame!");
return Err(NS_ERROR_UNEXPECTED);
}
BufferReader reader(buf);
// The first part is sps.
uint32_t spsSize;
MOZ_TRY_VAR(spsSize, reader.ReadU32());
Span<const uint8_t> spsData;
MOZ_TRY_VAR(spsData,
reader.ReadSpan<const uint8_t>(static_cast<size_t>(spsSize)));
// The second part is pps.
uint32_t ppsSize;
MOZ_TRY_VAR(ppsSize, reader.ReadU32());
Span<const uint8_t> ppsData;
MOZ_TRY_VAR(ppsData,
reader.ReadSpan<const uint8_t>(static_cast<size_t>(ppsSize)));
// Ensure we have profile, constraints and level needed to create the extra
// data.
if (spsData.Length() < 4) {
return Err(NS_ERROR_NOT_AVAILABLE);
}
FFMPEGV_LOG(
"Generate extra data: profile - %u, constraints: %u, level: %u for pts @ "
"%" PRId64,
spsData[1], spsData[2], spsData[3], aPacket->pts);
// Create extra data.
auto extraData = MakeRefPtr<MediaByteBuffer>();
H264::WriteExtraData(extraData, spsData[1], spsData[2], spsData[3], spsData,
ppsData);
MOZ_ASSERT(extraData);
return extraData.forget();
}
void FFmpegVideoEncoder<LIBAV_VER>::ForceEnablingFFmpegDebugLogs() {
#if DEBUG
if (!getenv("MOZ_AV_LOG_LEVEL") &&
MOZ_LOG_TEST(sFFmpegVideoLog, LogLevel::Debug)) {
mLib->av_log_set_level(AV_LOG_DEBUG);
}
#endif // DEBUG
}
Maybe<FFmpegVideoEncoder<LIBAV_VER>::SVCSettings>
FFmpegVideoEncoder<LIBAV_VER>::GetSVCSettings() {
MOZ_ASSERT(!mCodecName.IsEmpty());
MOZ_ASSERT(SvcEnabled());
CodecType codecType = CodecType::Unknown;
if (mCodecName == "libvpx") {
codecType = CodecType::VP8;
} else if (mCodecName == "libvpx-vp9") {
codecType = CodecType::VP9;
} else if (mCodecName == "libaom-av1") {
codecType = CodecType::AV1;
}
if (codecType == CodecType::Unknown) {
FFMPEGV_LOG("SVC setting is not implemented for %s codec",
mCodecName.get());
return Nothing();
}
SVCLayerSettings svc = GetSVCLayerSettings(
codecType, mConfig.mScalabilityMode, mConfig.mBitrate);
nsAutoCString name;
nsAutoCString parameters;
if (codecType == CodecType::VP8 || codecType == CodecType::VP9) {
// Check if the number of temporal layers in codec specific settings
// matches
// the number of layers for the given scalability mode.
if (mConfig.mCodecSpecific) {
if (mConfig.mCodecSpecific->is<VP8Specific>()) {
MOZ_ASSERT(
mConfig.mCodecSpecific->as<VP8Specific>().mNumTemporalLayers ==
svc.mNumberTemporalLayers);
} else if (mConfig.mCodecSpecific->is<VP9Specific>()) {
MOZ_ASSERT(
mConfig.mCodecSpecific->as<VP9Specific>().mNumTemporalLayers ==
svc.mNumberTemporalLayers);
}
}
// Form an SVC setting string for libvpx.
name = "ts-parameters"_ns;
parameters.Append("ts_target_bitrate=");
for (size_t i = 0; i < svc.mTargetBitrates.Length(); ++i) {
if (i > 0) {
parameters.Append(",");
}
parameters.AppendPrintf("%d", svc.mTargetBitrates[i]);
}
parameters.AppendPrintf(
":ts_layering_mode=%u",
svc.mCodecAppendix->as<VPXSVCAppendix>().mLayeringMode);
}
if (codecType == CodecType::AV1) {
// Form an SVC setting string for libaom.
name = "svc-parameters"_ns;
parameters.AppendPrintf("number_spatial_layers=%zu",
svc.mNumberSpatialLayers);
parameters.AppendPrintf(":number_temporal_layers=%zu",
svc.mNumberTemporalLayers);
parameters.Append(":framerate_factor=");
for (size_t i = 0; i < svc.mRateDecimators.Length(); ++i) {
if (i > 0) {
parameters.Append(",");
}
parameters.AppendPrintf("%d", svc.mRateDecimators[i]);
}
parameters.Append(":layer_target_bitrate=");
for (size_t i = 0; i < svc.mTargetBitrates.Length(); ++i) {
if (i > 0) {
parameters.Append(",");
}
parameters.AppendPrintf("%d", svc.mTargetBitrates[i]);
}
}
return Some(
SVCSettings{std::move(svc.mLayerIds),
std::make_pair(std::move(name), std::move(parameters))});
}
FFmpegVideoEncoder<LIBAV_VER>::H264Settings FFmpegVideoEncoder<
LIBAV_VER>::GetH264Settings(const H264Specific& aH264Specific) {
MOZ_ASSERT(mCodecName == "libx264",
"GetH264Settings is libx264-only for now");
nsTArray<std::pair<nsCString, nsCString>> keyValuePairs;
Maybe<H264Setting> profile = GetH264Profile(aH264Specific.mProfile);
MOZ_RELEASE_ASSERT(profile.isSome());
if (!profile->mString.IsEmpty()) {
keyValuePairs.AppendElement(std::make_pair("profile"_ns, profile->mString));
} else {
MOZ_RELEASE_ASSERT(aH264Specific.mProfile ==
H264_PROFILE::H264_PROFILE_EXTENDED);
}
Maybe<H264Setting> level = GetH264Level(aH264Specific.mLevel);
MOZ_RELEASE_ASSERT(level.isSome());
MOZ_RELEASE_ASSERT(!level->mString.IsEmpty());
keyValuePairs.AppendElement(std::make_pair("level"_ns, level->mString));
// Set format: libx264's default format is annexb.
if (aH264Specific.mFormat == H264BitStreamFormat::AVC) {
keyValuePairs.AppendElement(std::make_pair("x264-params"_ns, "annexb=0"));
// mCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER
// if we don't want to append SPS/PPS data in all keyframe
// (LIBAVCODEC_VERSION_MAJOR >= 57 only).
} else {
// Set annexb explicitly even if it's default format.
keyValuePairs.AppendElement(std::make_pair("x264-params"_ns, "annexb=1"));
}
return H264Settings{.mProfile = profile->mValue,
.mLevel = level->mValue,
.mSettingKeyValuePairs = std::move(keyValuePairs)};
}
} // namespace mozilla