Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "WMFAudioMFTManager.h"
#include "MediaInfo.h"
#include "TimeUnits.h"
#include "VideoUtils.h"
#include "WMFUtils.h"
#include "mozilla/AbstractThread.h"
#include "mozilla/Logging.h"
#include "mozilla/Telemetry.h"
#include "nsTArray.h"
#include "BufferReader.h"
#include "mozilla/ScopeExit.h"
#define LOG(...) MOZ_LOG(sPDMLog, mozilla::LogLevel::Debug, (__VA_ARGS__))
namespace mozilla {
using media::TimeUnit;
WMFAudioMFTManager::WMFAudioMFTManager(const AudioInfo& aConfig)
: mAudioChannels(aConfig.mChannels),
mChannelsMap(AudioConfig::ChannelLayout::UNKNOWN_MAP),
mAudioRate(aConfig.mRate),
mStreamType(GetStreamTypeFromMimeType(aConfig.mMimeType)) {
MOZ_COUNT_CTOR(WMFAudioMFTManager);
if (mStreamType == WMFStreamType::AAC) {
const uint8_t* audioSpecConfig;
uint32_t configLength;
if (aConfig.mCodecSpecificConfig.is<AacCodecSpecificData>()) {
const AacCodecSpecificData& aacCodecSpecificData =
aConfig.mCodecSpecificConfig.as<AacCodecSpecificData>();
audioSpecConfig =
aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->Elements();
configLength =
aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->Length();
mRemainingEncoderDelay = mEncoderDelay =
aacCodecSpecificData.mEncoderDelayFrames;
mTotalMediaFrames = aacCodecSpecificData.mMediaFrameCount;
LOG("AudioMFT decoder: Found AAC decoder delay (%" PRIu32
"frames) and total media frames (%" PRIu64 " frames)\n",
mEncoderDelay, mTotalMediaFrames);
} else {
// Gracefully handle failure to cover all codec specific cases above. Once
// we're confident there is no fall through from these cases above, we
// should remove this code.
RefPtr<MediaByteBuffer> audioCodecSpecificBinaryBlob =
GetAudioCodecSpecificBlob(aConfig.mCodecSpecificConfig);
audioSpecConfig = audioCodecSpecificBinaryBlob->Elements();
configLength = audioCodecSpecificBinaryBlob->Length();
}
// If no extradata has been provided, assume this is ADTS. Otherwise,
// assume raw AAC packets.
mIsADTS = !configLength;
AACAudioSpecificConfigToUserData(aConfig.mExtendedProfile, audioSpecConfig,
configLength, mUserData);
}
}
WMFAudioMFTManager::~WMFAudioMFTManager() {
MOZ_COUNT_DTOR(WMFAudioMFTManager);
}
const GUID& WMFAudioMFTManager::GetMediaSubtypeGUID() {
MOZ_ASSERT(StreamTypeIsAudio(mStreamType));
switch (mStreamType) {
case WMFStreamType::AAC:
return MFAudioFormat_AAC;
case WMFStreamType::MP3:
return MFAudioFormat_MP3;
default:
return GUID_NULL;
};
}
bool WMFAudioMFTManager::Init() {
NS_ENSURE_TRUE(StreamTypeIsAudio(mStreamType), false);
RefPtr<MFTDecoder> decoder(new MFTDecoder());
// Note: MP3 MFT isn't registered as supporting Float output, but it works.
// Find PCM output MFTs as this is the common type.
HRESULT hr = WMFDecoderModule::CreateMFTDecoder(mStreamType, decoder);
NS_ENSURE_TRUE(SUCCEEDED(hr), false);
// Setup input/output media types
RefPtr<IMFMediaType> inputType;
hr = wmf::MFCreateMediaType(getter_AddRefs(inputType));
NS_ENSURE_TRUE(SUCCEEDED(hr), false);
hr = inputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
NS_ENSURE_TRUE(SUCCEEDED(hr), false);
hr = inputType->SetGUID(MF_MT_SUBTYPE, GetMediaSubtypeGUID());
NS_ENSURE_TRUE(SUCCEEDED(hr), false);
hr = inputType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, mAudioRate);
NS_ENSURE_TRUE(SUCCEEDED(hr), false);
hr = inputType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, mAudioChannels);
NS_ENSURE_TRUE(SUCCEEDED(hr), false);
if (mStreamType == WMFStreamType::AAC) {
UINT32 payloadType = mIsADTS ? 1 : 0;
hr = inputType->SetUINT32(MF_MT_AAC_PAYLOAD_TYPE, payloadType);
NS_ENSURE_TRUE(SUCCEEDED(hr), false);
hr = inputType->SetBlob(MF_MT_USER_DATA, mUserData.Elements(),
mUserData.Length());
NS_ENSURE_TRUE(SUCCEEDED(hr), false);
}
RefPtr<IMFMediaType> outputType;
hr = wmf::MFCreateMediaType(getter_AddRefs(outputType));
NS_ENSURE_TRUE(SUCCEEDED(hr), false);
hr = outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
NS_ENSURE_TRUE(SUCCEEDED(hr), false);
hr = outputType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_Float);
NS_ENSURE_TRUE(SUCCEEDED(hr), false);
hr = outputType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, 32);
NS_ENSURE_TRUE(SUCCEEDED(hr), false);
hr = decoder->SetMediaTypes(inputType, outputType);
NS_ENSURE_TRUE(SUCCEEDED(hr), false);
mDecoder = decoder;
return true;
}
HRESULT
WMFAudioMFTManager::Input(MediaRawData* aSample) {
mLastInputTime = aSample->mTime;
return mDecoder->Input(aSample->Data(), uint32_t(aSample->Size()),
aSample->mTime.ToMicroseconds(),
aSample->mDuration.ToMicroseconds());
}
nsCString WMFAudioMFTManager::GetCodecName() const {
if (mStreamType == WMFStreamType::AAC) {
return "aac"_ns;
}
if (mStreamType == WMFStreamType::MP3) {
return "mp3"_ns;
}
return "unknown"_ns;
}
HRESULT
WMFAudioMFTManager::UpdateOutputType() {
HRESULT hr;
RefPtr<IMFMediaType> type;
hr = mDecoder->GetOutputMediaType(type);
NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
hr = type->GetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, &mAudioRate);
NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
hr = type->GetUINT32(MF_MT_AUDIO_NUM_CHANNELS, &mAudioChannels);
NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
uint32_t channelsMap;
hr = type->GetUINT32(MF_MT_AUDIO_CHANNEL_MASK, &channelsMap);
if (SUCCEEDED(hr)) {
mChannelsMap = channelsMap;
} else {
LOG("Unable to retrieve channel layout. Ignoring");
mChannelsMap = AudioConfig::ChannelLayout::UNKNOWN_MAP;
}
return S_OK;
}
HRESULT
WMFAudioMFTManager::Output(int64_t aStreamOffset, RefPtr<MediaData>& aOutput) {
aOutput = nullptr;
RefPtr<IMFSample> sample;
HRESULT hr;
int typeChangeCount = 0;
const auto oldAudioRate = mAudioRate;
while (true) {
hr = mDecoder->Output(&sample);
if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) {
return hr;
}
if (hr == MF_E_TRANSFORM_STREAM_CHANGE) {
hr = mDecoder->FindDecoderOutputType();
NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
hr = UpdateOutputType();
NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
// Catch infinite loops, but some decoders perform at least 2 stream
// changes on consecutive calls, so be permissive.
// 100 is arbitrarily > 2.
NS_ENSURE_TRUE(typeChangeCount < 100, MF_E_TRANSFORM_STREAM_CHANGE);
++typeChangeCount;
continue;
}
break;
}
NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
if (!sample) {
LOG("Audio MFTDecoder returned success but null output.");
return E_FAIL;
}
UINT32 discontinuity = false;
sample->GetUINT32(MFSampleExtension_Discontinuity, &discontinuity);
if (mFirstFrame || discontinuity) {
// Update the output type, in case this segment has a different
// rate. This also triggers on the first sample, which can have a
// different rate than is advertised in the container, and sometimes we
// don't get a MF_E_TRANSFORM_STREAM_CHANGE when the rate changes.
hr = UpdateOutputType();
NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
mFirstFrame = false;
}
LONGLONG hns;
hr = sample->GetSampleTime(&hns);
if (FAILED(hr)) {
return E_FAIL;
}
TimeUnit pts = TimeUnit::FromHns(hns, mAudioRate);
NS_ENSURE_TRUE(pts.IsValid(), E_FAIL);
RefPtr<IMFMediaBuffer> buffer;
hr = sample->ConvertToContiguousBuffer(getter_AddRefs(buffer));
NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
BYTE* data = nullptr; // Note: *data will be owned by the IMFMediaBuffer, we
// don't need to free it.
DWORD maxLength = 0, currentLength = 0;
hr = buffer->Lock(&data, &maxLength, &currentLength);
ScopeExit exit([buffer] { buffer->Unlock(); });
NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
// Output is made of floats.
uint32_t numSamples = currentLength / sizeof(float);
uint32_t numFrames = numSamples / mAudioChannels;
MOZ_ASSERT(numFrames >= 0);
MOZ_ASSERT(numSamples >= 0);
if (numFrames == 0) {
// All data from this chunk stripped, loop back and try to output the next
// frame, if possible.
return S_OK;
}
if (oldAudioRate != mAudioRate) {
LOG("Audio rate changed from %" PRIu32 " to %" PRIu32, oldAudioRate,
mAudioRate);
}
AlignedAudioBuffer audioData(numSamples);
if (!audioData) {
return E_OUTOFMEMORY;
}
float* floatData = reinterpret_cast<float*>(data);
PodCopy(audioData.Data(), floatData, numSamples);
TimeUnit duration(numFrames, mAudioRate);
NS_ENSURE_TRUE(duration.IsValid(), E_FAIL);
const bool isAudioRateChangedToHigher = oldAudioRate < mAudioRate;
if (IsPartialOutput(duration, isAudioRateChangedToHigher)) {
LOG("Encounter a partial frame?! duration shrinks from %s to %s",
mLastOutputDuration.ToString().get(), duration.ToString().get());
return MF_E_TRANSFORM_NEED_MORE_INPUT;
}
aOutput = new AudioData(aStreamOffset, pts, std::move(audioData),
mAudioChannels, mAudioRate, mChannelsMap);
MOZ_DIAGNOSTIC_ASSERT(duration == aOutput->mDuration, "must be equal");
mLastOutputDuration = aOutput->mDuration;
#ifdef LOG_SAMPLE_DECODE
LOG("Decoded audio sample! timestamp=%lld duration=%lld currentLength=%u",
pts.ToMicroseconds(), duration.ToMicroseconds(), currentLength);
#endif
return S_OK;
}
bool WMFAudioMFTManager::IsPartialOutput(
const media::TimeUnit& aNewOutputDuration,
const bool aIsRateChangedToHigher) const {
// This issue was found in Windows11, where AAC MFT decoder would incorrectly
// output partial output samples to us, even if MS's documentation said it
// won't happen [1]. More details are described in bug 1731430 comment 26.
// If the audio rate isn't changed to higher, which would result in shorter
// duration, but the new output duration is still shorter than the last one,
// then new output is possible an incorrect partial output.
// [1]
if (mStreamType != WMFStreamType::AAC) {
return false;
}
if (mLastOutputDuration > aNewOutputDuration && !aIsRateChangedToHigher) {
return true;
}
return false;
}
void WMFAudioMFTManager::Shutdown() { mDecoder = nullptr; }
} // namespace mozilla
#undef LOG