Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "FFmpegAudioEncoder.h"
#include "FFmpegRuntimeLinker.h"
#include "FFmpegLog.h"
#include "FFmpegUtils.h"
#include "MediaData.h"
#include "AudioSegment.h"
namespace mozilla {
FFmpegAudioEncoder<LIBAV_VER>::FFmpegAudioEncoder(
const FFmpegLibWrapper* aLib, AVCodecID aCodecID,
const RefPtr<TaskQueue>& aTaskQueue, const EncoderConfig& aConfig)
: FFmpegDataEncoder(aLib, aCodecID, aTaskQueue, aConfig) {}
nsCString FFmpegAudioEncoder<LIBAV_VER>::GetDescriptionName() const {
#ifdef USING_MOZFFVPX
return "ffvpx audio encoder"_ns;
#else
const char* lib =
# if defined(MOZ_FFMPEG)
FFmpegRuntimeLinker::LinkStatusLibraryName();
# else
"no library: ffmpeg disabled during build";
# endif
return nsPrintfCString("ffmpeg audio encoder (%s)", lib);
#endif
}
void FFmpegAudioEncoder<LIBAV_VER>::ResamplerDestroy::operator()(
SpeexResamplerState* aResampler) {
speex_resampler_destroy(aResampler);
}
nsresult FFmpegAudioEncoder<LIBAV_VER>::InitSpecific() {
MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
FFMPEG_LOG("FFmpegAudioEncoder::InitInternal");
// Initialize the common members of the encoder instance
AVCodec* codec = FFmpegDataEncoder<LIBAV_VER>::InitCommon();
if (!codec) {
FFMPEG_LOG("FFmpegDataEncoder::InitCommon failed");
return NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR;
}
// Find a compatible input rate for the codec, update the encoder config, and
// note the rate at which this instance was configured.
mInputSampleRate = AssertedCast<int>(mConfig.mSampleRate);
if (codec->supported_samplerates) {
// Ensure the sample-rate list is sorted, iterate and either find that the
// sample rate is supported, or pick the same rate just above the audio
// input sample-rate (as to not lose information). If the audio is higher
// than the highest supported sample-rate, down-sample to the highest
// sample-rate supported by the codec. This is the case when encoding high
// samplerate audio to opus.
AutoTArray<int, 16> supportedSampleRates;
IterateZeroTerminated(codec->supported_samplerates,
[&supportedSampleRates](int aRate) mutable {
supportedSampleRates.AppendElement(aRate);
});
supportedSampleRates.Sort();
for (const auto& rate : supportedSampleRates) {
if (mInputSampleRate == rate) {
mConfig.mSampleRate = rate;
break;
}
if (mInputSampleRate < rate) {
// This rate is the smallest supported rate above the content's rate.
mConfig.mSampleRate = rate;
break;
}
if (mInputSampleRate > rate) {
mConfig.mSampleRate = rate;
}
}
}
if (mConfig.mSampleRate != AssertedCast<uint32_t>(mInputSampleRate)) {
// Need to resample to targetRate
int err;
SpeexResamplerState* resampler = speex_resampler_init(
mConfig.mNumberOfChannels, mInputSampleRate, mConfig.mSampleRate,
SPEEX_RESAMPLER_QUALITY_DEFAULT, &err);
if (!err) {
mResampler.reset(resampler);
} else {
FFMPEG_LOG(
"Error creating resampler in FFmpegAudioEncoder %dHz -> %dHz (%dch)",
mInputSampleRate, mConfig.mSampleRate, mConfig.mNumberOfChannels);
}
}
// And now the audio-specific part
mCodecContext->sample_rate = AssertedCast<int>(mConfig.mSampleRate);
#if LIBAVCODEC_VERSION_MAJOR >= 60
// Gecko's ordering intentionnally matches ffmepg's ordering
mLib->av_channel_layout_default(&mCodecContext->ch_layout,
AssertedCast<int>(mConfig.mNumberOfChannels));
#else
mCodecContext->channels = AssertedCast<int>(mConfig.mNumberOfChannels);
#endif
switch (mConfig.mCodec) {
case CodecType::Opus:
// When using libopus, ffmpeg supports interleaved float and s16 input.
mCodecContext->sample_fmt = AV_SAMPLE_FMT_FLT;
break;
case CodecType::Vorbis:
// When using libvorbis, ffmpeg only supports planar f32 input.
mCodecContext->sample_fmt = AV_SAMPLE_FMT_FLTP;
break;
default:
MOZ_ASSERT_UNREACHABLE("Not supported");
}
if (mConfig.mCodec == CodecType::Opus) {
// Default is VBR
if (mConfig.mBitrateMode == BitrateMode::Constant) {
mLib->av_opt_set(mCodecContext->priv_data, "vbr", "off", 0);
}
if (mConfig.mCodecSpecific.isSome()) {
MOZ_ASSERT(mConfig.mCodecSpecific->is<OpusSpecific>());
const OpusSpecific& specific = mConfig.mCodecSpecific->as<OpusSpecific>();
// This attribute maps directly to complexity
mCodecContext->compression_level = specific.mComplexity;
FFMPEG_LOG("Opus complexity set to %d", specific.mComplexity);
float frameDurationMs =
AssertedCast<float>(specific.mFrameDuration) / 1000.f;
if (mLib->av_opt_set_double(mCodecContext->priv_data, "frame_duration",
frameDurationMs, 0)) {
FFMPEG_LOG("Error setting the frame duration on Opus encoder");
return NS_ERROR_FAILURE;
}
FFMPEG_LOG("Opus frame duration set to %0.2f", frameDurationMs);
if (specific.mPacketLossPerc) {
if (mLib->av_opt_set_int(
mCodecContext->priv_data, "packet_loss",
AssertedCast<int64_t>(specific.mPacketLossPerc), 0)) {
FFMPEG_LOG("Error setting the packet loss percentage to %" PRIu64
" on Opus encoder",
specific.mPacketLossPerc);
return NS_ERROR_FAILURE;
}
FFMPEG_LOGV("Packet loss set to %d%% in Opus encoder",
AssertedCast<int>(specific.mPacketLossPerc));
}
if (specific.mUseInBandFEC) {
if (mLib->av_opt_set(mCodecContext->priv_data, "fec", "on", 0)) {
FFMPEG_LOG("Error %s FEC on Opus encoder",
specific.mUseInBandFEC ? "enabling" : "disabling");
return NS_ERROR_FAILURE;
}
FFMPEG_LOGV("In-band FEC enabled for Opus encoder.");
}
if (specific.mUseDTX) {
if (mLib->av_opt_set(mCodecContext->priv_data, "dtx", "on", 0)) {
FFMPEG_LOG("Error %s DTX on Opus encoder",
specific.mUseDTX ? "enabling" : "disabling");
return NS_ERROR_FAILURE;
}
// DTX packets are a TOC byte, and possibly one byte of length, packets
// 3 bytes and larger are to be returned.
mDtxThreshold = 3;
}
// TODO: format
}
}
// Override the time base: always the sample-rate the encoder is running at
mCodecContext->time_base =
AVRational{.num = 1, .den = mCodecContext->sample_rate};
MediaResult rv = FinishInitCommon(codec);
if (NS_FAILED(rv)) {
FFMPEG_LOG("FFmpeg encode initialization failure.");
return rv.Code();
}
return NS_OK;
}
// avcodec_send_frame and avcodec_receive_packet were introduced in version 58.
#if LIBAVCODEC_VERSION_MAJOR >= 58
Result<MediaDataEncoder::EncodedData, nsresult>
FFmpegAudioEncoder<LIBAV_VER>::EncodeOnePacket(Span<float> aSamples,
media::TimeUnit aPts) {
// Allocate AVFrame.
if (!PrepareFrame()) {
FFMPEG_LOG("failed to allocate frame");
return Err(NS_ERROR_OUT_OF_MEMORY);
}
uint32_t frameCount = aSamples.Length() / mConfig.mNumberOfChannels;
// This method assumes that the audio has been packetized appropriately --
// packets smaller than the packet size are allowed when draining.
MOZ_ASSERT(AssertedCast<int>(frameCount) <= mCodecContext->frame_size);
ChannelCount(mFrame) = AssertedCast<int>(mConfig.mNumberOfChannels);
# if LIBAVCODEC_VERSION_MAJOR >= 60
int rv = mLib->av_channel_layout_copy(&mFrame->ch_layout,
&mCodecContext->ch_layout);
if (rv < 0) {
FFMPEG_LOG("channel layout copy error: %s",
MakeErrorString(mLib, rv).get());
return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR);
}
# endif
mFrame->sample_rate = AssertedCast<int>(mConfig.mSampleRate);
// Not a mistake, nb_samples is per channel in ffmpeg
mFrame->nb_samples = AssertedCast<int>(frameCount);
// Audio is converted below if needed
mFrame->format = mCodecContext->sample_fmt;
// Set presentation timestamp and duration of the AVFrame.
# if LIBAVCODEC_VERSION_MAJOR >= 59
mFrame->time_base =
AVRational{.num = 1, .den = static_cast<int>(mConfig.mSampleRate)};
# endif
mFrame->pts = aPts.ToTicksAtRate(mConfig.mSampleRate);
# if LIBAVCODEC_VERSION_MAJOR >= 60
mFrame->duration = frameCount;
# else
mFrame->pkt_duration = frameCount;
// Save duration in the time_base unit.
mDurationMap.Insert(mFrame->pts, mFrame->pkt_duration);
# endif
if (int ret = mLib->av_frame_get_buffer(mFrame, 16); ret < 0) {
FFMPEG_LOG("failed to allocate frame data: %s",
MakeErrorString(mLib, ret).get());
return Err(NS_ERROR_OUT_OF_MEMORY);
}
// Make sure AVFrame is writable.
if (int ret = mLib->av_frame_make_writable(mFrame); ret < 0) {
FFMPEG_LOG("failed to make frame writable: %s",
MakeErrorString(mLib, ret).get());
return Err(NS_ERROR_DOM_MEDIA_FATAL_ERR);
}
// The input is always in f32 interleaved for now
if (mCodecContext->sample_fmt == AV_SAMPLE_FMT_FLT) {
PodCopy(reinterpret_cast<float*>(mFrame->data[0]), aSamples.data(),
aSamples.Length());
} else {
MOZ_ASSERT(mCodecContext->sample_fmt == AV_SAMPLE_FMT_FLTP);
for (uint32_t i = 0; i < mConfig.mNumberOfChannels; i++) {
DeinterleaveAndConvertBuffer(aSamples.data(), mFrame->nb_samples,
mConfig.mNumberOfChannels, mFrame->data);
}
}
// Now send the AVFrame to ffmpeg for encoding, same code for audio and video.
return FFmpegDataEncoder<LIBAV_VER>::EncodeWithModernAPIs();
}
Result<MediaDataEncoder::EncodedData, nsresult> FFmpegAudioEncoder<
LIBAV_VER>::EncodeInputWithModernAPIs(RefPtr<const MediaData> aSample) {
MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
MOZ_ASSERT(mCodecContext);
MOZ_ASSERT(aSample);
RefPtr<const AudioData> sample(aSample->As<AudioData>());
FFMPEG_LOG("Encoding %" PRIu32 " frames of audio at pts: %s",
sample->Frames(), sample->mTime.ToString().get());
if ((!mResampler && sample->mRate != mConfig.mSampleRate) ||
(mResampler &&
sample->mRate != AssertedCast<uint32_t>(mInputSampleRate)) ||
sample->mChannels != mConfig.mNumberOfChannels) {
FFMPEG_LOG(
"Rate or sample-rate at the inputof the encoder different from what "
"has been configured initially, erroring out");
return Result<MediaDataEncoder::EncodedData, nsresult>(
NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR);
}
// ffmpeg expects exactly sized input audio packets most of the time.
// Packetization is performed if needed, and audio packets of the correct size
// are fed to ffmpeg, with timestamps extrapolated the timestamp found on
// the input MediaData.
if (!mPacketizer) {
media::TimeUnit basePts = media::TimeUnit::Zero(mConfig.mSampleRate);
basePts += sample->mTime;
mPacketizer.emplace(mCodecContext->frame_size, sample->mChannels,
basePts.ToTicksAtRate(mConfig.mSampleRate),
mConfig.mSampleRate);
}
if (!mFirstPacketPts.IsValid()) {
mFirstPacketPts = sample->mTime;
}
Span<float> audio = sample->Data();
if (mResampler) {
// Ensure that all input frames are consumed each time by oversizing the
// output buffer.
int bufferLengthGuess = std::ceil(2. * static_cast<float>(audio.size()) *
mConfig.mSampleRate / mInputSampleRate);
mTempBuffer.SetLength(bufferLengthGuess);
uint32_t inputFrames = audio.size() / mConfig.mNumberOfChannels;
uint32_t inputFramesProcessed = inputFrames;
uint32_t outputFrames = bufferLengthGuess / mConfig.mNumberOfChannels;
DebugOnly<int> rv = speex_resampler_process_interleaved_float(
mResampler.get(), audio.data(), &inputFramesProcessed,
mTempBuffer.Elements(), &outputFrames);
audio = Span<float>(mTempBuffer.Elements(),
outputFrames * mConfig.mNumberOfChannels);
MOZ_ASSERT(inputFrames == inputFramesProcessed,
"increate the buffer to consume all input each time");
MOZ_ASSERT(rv == RESAMPLER_ERR_SUCCESS);
}
EncodedData output;
MediaResult rv = NS_OK;
mPacketizer->Input(audio.data(), audio.Length() / mConfig.mNumberOfChannels);
// Dequeue and encode each packet
while (mPacketizer->PacketsAvailable() && rv.Code() == NS_OK) {
mTempBuffer.SetLength(mCodecContext->frame_size *
mConfig.mNumberOfChannels);
media::TimeUnit pts = mPacketizer->Output(mTempBuffer.Elements());
auto audio = Span(mTempBuffer.Elements(), mTempBuffer.Length());
FFMPEG_LOG("Encoding %" PRIu32 " frames, pts: %s",
mPacketizer->PacketSize(), pts.ToString().get());
auto encodeResult = EncodeOnePacket(audio, pts);
if (encodeResult.isOk()) {
output.AppendElements(std::move(encodeResult.unwrap()));
} else {
return encodeResult;
}
pts += media::TimeUnit(mPacketizer->PacketSize(), mConfig.mSampleRate);
}
return Result<MediaDataEncoder::EncodedData, nsresult>(std::move(output));
}
Result<MediaDataEncoder::EncodedData, nsresult>
FFmpegAudioEncoder<LIBAV_VER>::DrainWithModernAPIs() {
// If there's no packetizer, or it's empty, we can proceed immediately.
if (!mPacketizer || mPacketizer->FramesAvailable() == 0) {
return FFmpegDataEncoder<LIBAV_VER>::DrainWithModernAPIs();
}
EncodedData output;
MediaResult rv = NS_OK;
// Dequeue and encode each packet
mTempBuffer.SetLength(mCodecContext->frame_size *
mPacketizer->ChannelCount());
uint32_t written;
media::TimeUnit pts = mPacketizer->Drain(mTempBuffer.Elements(), written);
auto audio =
Span(mTempBuffer.Elements(), written * mPacketizer->ChannelCount());
auto encodeResult = EncodeOnePacket(audio, pts);
if (encodeResult.isOk()) {
auto array = encodeResult.unwrap();
output.AppendElements(std::move(array));
} else {
return encodeResult;
}
// Now, drain the encoder
auto drainResult = FFmpegDataEncoder<LIBAV_VER>::DrainWithModernAPIs();
if (drainResult.isOk()) {
auto array = drainResult.unwrap();
output.AppendElements(std::move(array));
} else {
return drainResult;
}
return Result<MediaDataEncoder::EncodedData, nsresult>(std::move(output));
}
#endif // if LIBAVCODEC_VERSION_MAJOR >= 58
RefPtr<MediaRawData> FFmpegAudioEncoder<LIBAV_VER>::ToMediaRawData(
AVPacket* aPacket) {
MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
MOZ_ASSERT(aPacket);
if (aPacket->size < mDtxThreshold) {
FFMPEG_LOG(
"DTX enabled and packet is %d bytes (threshold %d), not returning.",
aPacket->size, mDtxThreshold);
return nullptr;
}
RefPtr<MediaRawData> data = ToMediaRawDataCommon(aPacket);
data->mTime = media::TimeUnit(aPacket->pts, mConfig.mSampleRate);
data->mTimecode = data->mTime;
data->mDuration =
media::TimeUnit(mCodecContext->frame_size, mConfig.mSampleRate);
// Handle encoder delay
// Tracked in https://github.com/w3c/webcodecs/issues/626 because not quite
// specced yet.
if (mFirstPacketPts > data->mTime) {
data->mOriginalPresentationWindow =
Some(media::TimeInterval{data->mTime, data->GetEndTime()});
// Duration is likely to be ajusted when the above spec issue is fixed. For
// now, leave it as-is
// data->mDuration -= (mFirstPacketPts - data->mTime);
// if (data->mDuration.IsNegative()) {
// data->mDuration = media::TimeUnit::Zero();
// }
data->mTime = mFirstPacketPts;
}
if (mPacketsDelivered++ == 0) {
// Attach extradata, and the config (including any channel / samplerate
// modification to fit the encoder requirements), if needed.
if (auto r = GetExtraData(aPacket); r.isOk()) {
data->mExtraData = r.unwrap();
}
data->mConfig = MakeUnique<EncoderConfig>(mConfig);
}
if (data->mExtraData) {
FFMPEG_LOG(
"FFmpegAudioEncoder out: [%s,%s] (%zu bytes, extradata %zu bytes)",
data->mTime.ToString().get(), data->mDuration.ToString().get(),
data->Size(), data->mExtraData->Length());
} else {
FFMPEG_LOG("FFmpegAudioEncoder out: [%s,%s] (%zu bytes)",
data->mTime.ToString().get(), data->mDuration.ToString().get(),
data->Size());
}
return data;
}
Result<already_AddRefed<MediaByteBuffer>, nsresult>
FFmpegAudioEncoder<LIBAV_VER>::GetExtraData(AVPacket* /* aPacket */) {
if (!mCodecContext->extradata_size) {
return Err(NS_ERROR_NOT_AVAILABLE);
}
// Create extra data -- they are on the context.
auto extraData = MakeRefPtr<MediaByteBuffer>();
extraData->SetLength(mCodecContext->extradata_size);
MOZ_ASSERT(extraData);
PodCopy(extraData->Elements(), mCodecContext->extradata,
mCodecContext->extradata_size);
return extraData.forget();
}
} // namespace mozilla