AppleVTEncoder.cpp

mozilla-central/dom/media/platforms/apple/AppleVTEncoder.cpp (file symbol)

Enable keyboard shortcuts

Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

/* vim:set ts=2 sw=2 sts=2 et cindent: */

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "AppleVTEncoder.h"

#include <CoreFoundation/CFArray.h>

#include <CoreFoundation/CFByteOrder.h>

#include <CoreFoundation/CFDictionary.h>

#include <MacTypes.h>

#include "AnnexB.h"

#include "AppleUtils.h"

#include "H264.h"

#include "ImageContainer.h"

#include "mozilla/dom/BindingUtils.h"

#include "mozilla/dom/ImageUtils.h"

namespace mozilla {

extern LazyLogModule sPEMLog;

#define LOGE(fmt, ...)                       \

  MOZ_LOG(sPEMLog, mozilla::LogLevel::Error, \

          ("[AppleVTEncoder] %s: " fmt, __func__, ##__VA_ARGS__))

#define LOGW(fmt, ...)                         \

  MOZ_LOG(sPEMLog, mozilla::LogLevel::Warning, \

          ("[AppleVTEncoder] %s: " fmt, __func__, ##__VA_ARGS__))

#define LOGD(fmt, ...)                       \

  MOZ_LOG(sPEMLog, mozilla::LogLevel::Debug, \

          ("[AppleVTEncoder] %s: " fmt, __func__, ##__VA_ARGS__))

#define LOGV(fmt, ...)                         \

  MOZ_LOG(sPEMLog, mozilla::LogLevel::Verbose, \

          ("[AppleVTEncoder] %s: " fmt, __func__, ##__VA_ARGS__))

static CFDictionaryRef BuildEncoderSpec(const bool aHardwareNotAllowed,

                                        const bool aLowLatencyRateControl) {

  if (__builtin_available(macos 11.3, *)) {

    if (aLowLatencyRateControl) {

      // If doing low-latency rate control, the hardware encoder is required.

      const void* keys[] = {

          kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder,

          kVTVideoEncoderSpecification_EnableLowLatencyRateControl};

      const void* values[] = {kCFBooleanTrue, kCFBooleanTrue};

      static_assert(std::size(keys) == std::size(values),

                    "Non matching keys/values array size");

      return CFDictionaryCreate(kCFAllocatorDefault, keys, values,

                                std::size(keys), &kCFTypeDictionaryKeyCallBacks,

                                &kCFTypeDictionaryValueCallBacks);

  const void* keys[] = {

      kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder};

  const void* values[] = {aHardwareNotAllowed ? kCFBooleanFalse

                                              : kCFBooleanTrue};

  static_assert(std::size(keys) == std::size(values),

                "Non matching keys/values array size");

  return CFDictionaryCreate(kCFAllocatorDefault, keys, values, std::size(keys),

                            &kCFTypeDictionaryKeyCallBacks,

                            &kCFTypeDictionaryValueCallBacks);

static void FrameCallback(void* aEncoder, void* aFrameRefCon, OSStatus aStatus,

                          VTEncodeInfoFlags aInfoFlags,

                          CMSampleBufferRef aSampleBuffer) {

  (static_cast<AppleVTEncoder*>(aEncoder))

      ->OutputFrame(aStatus, aInfoFlags, aSampleBuffer);

static bool SetAverageBitrate(VTCompressionSessionRef& aSession,

                              uint32_t aBitsPerSec) {

  int64_t bps(aBitsPerSec);

  AutoCFRelease<CFNumberRef> bitrate(

      CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &bps));

  return VTSessionSetProperty(aSession,

                              kVTCompressionPropertyKey_AverageBitRate,

                              bitrate) == noErr;

static bool SetConstantBitrate(VTCompressionSessionRef& aSession,

                               uint32_t aBitsPerSec) {

  int32_t bps(aBitsPerSec);

  AutoCFRelease<CFNumberRef> bitrate(

      CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &bps));

  if (__builtin_available(macos 13.0, *)) {

    int rv = VTSessionSetProperty(aSession,

                                  kVTCompressionPropertyKey_ConstantBitRate,

                                  bitrate) == noErr;

    if (rv == kVTPropertyNotSupportedErr) {

      LOGE("Constant bitrate not supported.");

  return false;

static bool SetBitrateAndMode(VTCompressionSessionRef& aSession,

                              BitrateMode aBitrateMode, uint32_t aBitsPerSec) {

  if (aBitrateMode == BitrateMode::Variable) {

    return SetAverageBitrate(aSession, aBitsPerSec);

  return SetConstantBitrate(aSession, aBitsPerSec);

static bool SetFrameRate(VTCompressionSessionRef& aSession, int64_t aFPS) {

  AutoCFRelease<CFNumberRef> framerate(

      CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &aFPS));

  return VTSessionSetProperty(aSession,

                              kVTCompressionPropertyKey_ExpectedFrameRate,

                              framerate) == noErr;

static bool SetRealtime(VTCompressionSessionRef& aSession, bool aEnabled) {

  // B-frames has been disabled in Init(), so no need to set it here.

  CFBooleanRef enabled = aEnabled ? kCFBooleanTrue : kCFBooleanFalse;

  OSStatus status = VTSessionSetProperty(

      aSession, kVTCompressionPropertyKey_RealTime, enabled);

  LOGD("%s real time, status: %d", aEnabled ? "Enable" : "Disable", status);

  if (status != noErr) {

    return false;

  if (__builtin_available(macos 11.0, *)) {

    status = VTSessionSetProperty(

        aSession, kVTCompressionPropertyKey_PrioritizeEncodingSpeedOverQuality,

        enabled);

    LOGD("%s PrioritizeEncodingSpeedOverQuality, status: %d",

         aEnabled ? "Enable" : "Disable", status);

    if (status != noErr && status != kVTPropertyNotSupportedErr) {

      return false;

  int32_t maxFrameDelayCount = aEnabled ? 0 : kVTUnlimitedFrameDelayCount;

  AutoCFRelease<CFNumberRef> cf(CFNumberCreate(

      kCFAllocatorDefault, kCFNumberSInt32Type, &maxFrameDelayCount));

  status = VTSessionSetProperty(

      aSession, kVTCompressionPropertyKey_MaxFrameDelayCount, cf);

  LOGD("Set max frame delay count to %d, status: %d", maxFrameDelayCount,

       status);

  if (status != noErr && status != kVTPropertyNotSupportedErr) {

    return false;

  return true;

static bool SetProfileLevel(VTCompressionSessionRef& aSession,

                            H264_PROFILE aValue) {

  CFStringRef profileLevel = nullptr;

  switch (aValue) {

    case H264_PROFILE::H264_PROFILE_BASE:

      profileLevel = kVTProfileLevel_H264_Baseline_AutoLevel;

      break;

    case H264_PROFILE::H264_PROFILE_MAIN:

      profileLevel = kVTProfileLevel_H264_Main_AutoLevel;

      break;

    case H264_PROFILE::H264_PROFILE_HIGH:

      profileLevel = kVTProfileLevel_H264_High_AutoLevel;

      break;

    default:

      LOGE("Profile %d not handled", static_cast<int>(aValue));

  return profileLevel ? VTSessionSetProperty(

                            aSession, kVTCompressionPropertyKey_ProfileLevel,

                            profileLevel) == noErr

                      : false;

RefPtr<MediaDataEncoder::InitPromise> AppleVTEncoder::Init() {

  MOZ_ASSERT(!mInited, "Cannot initialize encoder again without shutting down");

  if (mConfig.mSize.width == 0 || mConfig.mSize.height == 0) {

    LOGE("width or height 0 in encoder init");

    return InitPromise::CreateAndReject(NS_ERROR_ILLEGAL_VALUE, __func__);

  if (mConfig.mScalabilityMode != ScalabilityMode::None && !OSSupportsSVC()) {

    LOGE("SVC only supported on macOS 11.3 and more recent");

    return InitPromise::CreateAndReject(

        MediaResult(NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR,

                    "SVC only supported on macOS 11.3 and more recent"),

        __func__);

  bool lowLatencyRateControl =

      mConfig.mUsage == Usage::Realtime ||

      mConfig.mScalabilityMode != ScalabilityMode::None;

  LOGD("low latency rate control: %s, Hardware allowed: %s",

       lowLatencyRateControl ? "yes" : "no",

       mHardwareNotAllowed ? "no" : "yes");

  AutoCFRelease<CFDictionaryRef> spec(

      BuildEncoderSpec(mHardwareNotAllowed, lowLatencyRateControl));

  AutoCFRelease<CFDictionaryRef> srcBufferAttr(

      BuildSourceImageBufferAttributes());

  if (!srcBufferAttr) {

    LOGE("Failed to create source buffer attr");

    return InitPromise::CreateAndReject(

        MediaResult(NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR,

                    "fail to create source buffer attributes"),

        __func__);

  OSStatus status = VTCompressionSessionCreate(

      kCFAllocatorDefault, mConfig.mSize.width, mConfig.mSize.height,

      kCMVideoCodecType_H264, spec, srcBufferAttr, kCFAllocatorDefault,

      &FrameCallback, this /* outputCallbackRefCon */, &mSession);

  if (status != noErr) {

    LOGE("Failed to create compression session");

    return InitPromise::CreateAndReject(

        MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,

                    "fail to create encoder session"),

        __func__);

  if (VTSessionSetProperty(mSession,

                           kVTCompressionPropertyKey_AllowFrameReordering,

                           kCFBooleanFalse) != noErr) {

    LOGE("Couldn't disable bframes");

    return InitPromise::CreateAndReject(

        MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "Couldn't disable bframes"),

        __func__);

  if (mConfig.mUsage == Usage::Realtime && !SetRealtime(mSession, true)) {

    LOGE("fail to configure realtime properties");

    return InitPromise::CreateAndReject(

        MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,

                    "fail to configure real-time"),

        __func__);

  if (mConfig.mBitrate) {

    if (mConfig.mCodec == CodecType::H264 &&

        mConfig.mBitrateMode == BitrateMode::Constant) {

      // Not supported, fall-back to VBR.

      LOGD("H264 CBR not supported in VideoToolbox, falling back to VBR");

      mConfig.mBitrateMode = BitrateMode::Variable;

    bool rv =

        SetBitrateAndMode(mSession, mConfig.mBitrateMode, mConfig.mBitrate);

    if (!rv) {

      LOGE("failed to set bitrate to %d and mode to %s", mConfig.mBitrate,

           mConfig.mBitrateMode == BitrateMode::Constant ? "constant"

                                                         : "variable");

      return InitPromise::CreateAndReject(

          MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,

                      "fail to configurate bitrate"),

          __func__);

  if (mConfig.mScalabilityMode != ScalabilityMode::None) {

    if (__builtin_available(macos 11.3, *)) {

      float baseLayerFPSRatio = 1.0f;

      switch (mConfig.mScalabilityMode) {

        case ScalabilityMode::L1T2:

          baseLayerFPSRatio = 0.5;

          break;

        case ScalabilityMode::L1T3:

          // Not supported in hw on macOS, but is accepted and errors out when

          // encoding. Reject the configuration now.

          LOGE("macOS only supports L1T2 h264 SVC");

          return InitPromise::CreateAndReject(

              MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,

                          nsPrintfCString("macOS only support L1T2 h264 SVC")),

              __func__);

        default:

          MOZ_ASSERT_UNREACHABLE("Unhandled value");

      AutoCFRelease<CFNumberRef> cf(CFNumberCreate(

          kCFAllocatorDefault, kCFNumberFloatType, &baseLayerFPSRatio));

      if (VTSessionSetProperty(

              mSession, kVTCompressionPropertyKey_BaseLayerFrameRateFraction,

              cf)) {

        LOGE("Failed to set base layer framerate fraction to %f",

             baseLayerFPSRatio);

        return InitPromise::CreateAndReject(

            MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,

                        nsPrintfCString("fail to configure SVC (base ratio: %f",

                                        baseLayerFPSRatio)),

            __func__);

    } else {

      LOGE("MacOS version too old to enable SVC");

      return InitPromise::CreateAndReject(

          MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,

                      "macOS version too old to enable SVC"),

          __func__);

  int64_t interval =

      mConfig.mKeyframeInterval > std::numeric_limits<int64_t>::max()

          ? std::numeric_limits<int64_t>::max()

          : AssertedCast<int64_t>(mConfig.mKeyframeInterval);

  AutoCFRelease<CFNumberRef> cf(

      CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &interval));

  if (VTSessionSetProperty(mSession,

                           kVTCompressionPropertyKey_MaxKeyFrameInterval,

                           cf) != noErr) {

    LOGE("Failed to set max keyframe interval");

    return InitPromise::CreateAndReject(

        MediaResult(

            NS_ERROR_DOM_MEDIA_FATAL_ERR,

            nsPrintfCString("fail to configurate keyframe interval:%" PRId64,

                            interval)),

        __func__);

  if (mConfig.mCodecSpecific) {

    const H264Specific& specific = mConfig.mCodecSpecific->as<H264Specific>();

    if (!SetProfileLevel(mSession, specific.mProfile)) {

      LOGE("Failed to set profile level");

      return InitPromise::CreateAndReject(

          MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,

                      nsPrintfCString("fail to configurate profile level:%d",

                                      int(specific.mProfile))),

          __func__);

  AutoCFRelease<CFBooleanRef> isUsingHW = nullptr;

  status = VTSessionCopyProperty(

      mSession, kVTCompressionPropertyKey_UsingHardwareAcceleratedVideoEncoder,

      kCFAllocatorDefault, isUsingHW.receive());

  mIsHardwareAccelerated = status == noErr && isUsingHW == kCFBooleanTrue;

  LOGD("Using hw acceleration: %s", mIsHardwareAccelerated ? "yes" : "no");

  mError = NS_OK;

  return InitPromise::CreateAndResolve(true, __func__);

static Maybe<OSType> MapPixelFormat(dom::ImageBitmapFormat aFormat) {

  switch (aFormat) {

    case dom::ImageBitmapFormat::RGBA32:

      return Some(kCVPixelFormatType_32RGBA);

    case dom::ImageBitmapFormat::BGRA32:

      return Some(kCVPixelFormatType_32BGRA);

    case dom::ImageBitmapFormat::RGB24:

      return Some(kCVPixelFormatType_24RGB);

    case dom::ImageBitmapFormat::BGR24:

      return Some(kCVPixelFormatType_24BGR);

    case dom::ImageBitmapFormat::GRAY8:

      return Some(kCVPixelFormatType_OneComponent8);

    case dom::ImageBitmapFormat::YUV444P:

      return Some(kCVPixelFormatType_444YpCbCr8);

    case dom::ImageBitmapFormat::YUV420P:

      return Some(kCVPixelFormatType_420YpCbCr8PlanarFullRange);

    case dom::ImageBitmapFormat::YUV420SP_NV12:

      return Some(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange);

    default:

      return Nothing();

CFDictionaryRef AppleVTEncoder::BuildSourceImageBufferAttributes() {

  Maybe<OSType> fmt = MapPixelFormat(mConfig.mSourcePixelFormat);

  if (fmt.isNothing()) {

    LOGE("unsupported source pixel format");

    return nullptr;

  // Source image buffer attributes

  const void* keys[] = {kCVPixelBufferOpenGLCompatibilityKey,  // TODO

                        kCVPixelBufferIOSurfacePropertiesKey,  // TODO

                        kCVPixelBufferPixelFormatTypeKey};

  AutoCFRelease<CFDictionaryRef> ioSurfaceProps(CFDictionaryCreate(

      kCFAllocatorDefault, nullptr, nullptr, 0, &kCFTypeDictionaryKeyCallBacks,

      &kCFTypeDictionaryValueCallBacks));

  AutoCFRelease<CFNumberRef> pixelFormat(

      CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &fmt));

  const void* values[] = {kCFBooleanTrue, ioSurfaceProps, pixelFormat};

  MOZ_ASSERT(std::size(keys) == std::size(values),

             "Non matching keys/values array size");

  return CFDictionaryCreate(kCFAllocatorDefault, keys, values, std::size(keys),

                            &kCFTypeDictionaryKeyCallBacks,

                            &kCFTypeDictionaryValueCallBacks);

static bool IsKeyframe(CMSampleBufferRef aSample) {

  CFArrayRef attachments = CMSampleBufferGetSampleAttachmentsArray(aSample, 0);

  if (attachments == nullptr || CFArrayGetCount(attachments) == 0) {

    return false;

  return !CFDictionaryContainsKey(

      static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(attachments, 0)),

      kCMSampleAttachmentKey_NotSync);

static size_t GetNumParamSets(CMFormatDescriptionRef aDescription) {

  size_t numParamSets = 0;

  OSStatus status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(

      aDescription, 0, nullptr, nullptr, &numParamSets, nullptr);

  if (status != noErr) {

    LOGE("Cannot get number of parameter sets from format description");

  return numParamSets;

static const uint8_t kNALUStart[4] = {0, 0, 0, 1};

static size_t GetParamSet(CMFormatDescriptionRef aDescription, size_t aIndex,

                          const uint8_t** aDataPtr) {

  size_t length = 0;

  int headerSize = 0;

  if (CMVideoFormatDescriptionGetH264ParameterSetAtIndex(

          aDescription, aIndex, aDataPtr, &length, nullptr, &headerSize) !=

      noErr) {

    LOGE("failed to get parameter set from format description");

    return 0;

  MOZ_ASSERT(headerSize == sizeof(kNALUStart), "Only support 4 byte header");

  return length;

static bool WriteSPSPPS(MediaRawData* aDst,

                        CMFormatDescriptionRef aDescription) {

  // Get SPS/PPS

  const size_t numParamSets = GetNumParamSets(aDescription);

  UniquePtr<MediaRawDataWriter> writer(aDst->CreateWriter());

  for (size_t i = 0; i < numParamSets; i++) {

    const uint8_t* data = nullptr;

    size_t length = GetParamSet(aDescription, i, &data);

    if (length == 0) {

      return false;

    if (!writer->Append(kNALUStart, sizeof(kNALUStart))) {

      LOGE("Cannot write NAL unit start code");

      return false;

    if (!writer->Append(data, length)) {

      LOGE("Cannot write parameter set");

      return false;

  return true;

static RefPtr<MediaByteBuffer> extractAvcc(

    CMFormatDescriptionRef aDescription) {

  CFPropertyListRef list = CMFormatDescriptionGetExtension(

      aDescription,

      kCMFormatDescriptionExtension_SampleDescriptionExtensionAtoms);

  if (!list) {

    LOGE("fail to get atoms");

    return nullptr;

  CFDataRef avcC = static_cast<CFDataRef>(

      CFDictionaryGetValue(static_cast<CFDictionaryRef>(list), CFSTR("avcC")));

  if (!avcC) {

    LOGE("fail to extract avcC");

    return nullptr;

  CFIndex length = CFDataGetLength(avcC);

  const UInt8* bytes = CFDataGetBytePtr(avcC);

  if (length <= 0 || !bytes) {

    LOGE("empty avcC");

    return nullptr;

  RefPtr<MediaByteBuffer> config = new MediaByteBuffer(length);

  config->AppendElements(bytes, length);

  return config;

bool AppleVTEncoder::WriteExtraData(MediaRawData* aDst, CMSampleBufferRef aSrc,

                                    const bool aAsAnnexB) {

  if (!IsKeyframe(aSrc)) {

    return true;

  aDst->mKeyframe = true;

  CMFormatDescriptionRef desc = CMSampleBufferGetFormatDescription(aSrc);

  if (!desc) {

    LOGE("fail to get format description from sample");

    return false;

  if (aAsAnnexB) {

    return WriteSPSPPS(aDst, desc);

  RefPtr<MediaByteBuffer> avcc = extractAvcc(desc);

  if (!avcc) {

    return false;

  if (!mAvcc || !H264::CompareExtraData(avcc, mAvcc)) {

    mAvcc = avcc;

    aDst->mExtraData = mAvcc;

  return avcc != nullptr;

static bool WriteNALUs(MediaRawData* aDst, CMSampleBufferRef aSrc,

                       bool aAsAnnexB = false) {

  size_t srcRemaining = CMSampleBufferGetTotalSampleSize(aSrc);

  CMBlockBufferRef block = CMSampleBufferGetDataBuffer(aSrc);

  if (!block) {

    LOGE("Cannot get block buffer frome sample");

    return false;

  UniquePtr<MediaRawDataWriter> writer(aDst->CreateWriter());

  size_t writtenLength = aDst->Size();

  // Ensure capacity.

  if (!writer->SetSize(writtenLength + srcRemaining)) {

    LOGE("Cannot allocate buffer");

    return false;

  size_t readLength = 0;

  while (srcRemaining > 0) {

    // Extract the size of next NAL unit

    uint8_t unitSizeBytes[4];

    MOZ_ASSERT(srcRemaining > sizeof(unitSizeBytes));

    if (CMBlockBufferCopyDataBytes(block, readLength, sizeof(unitSizeBytes),

                                   reinterpret_cast<uint32_t*>(

                                       unitSizeBytes)) != kCMBlockBufferNoErr) {

      LOGE("Cannot copy unit size bytes");

      return false;

    size_t unitSize =

        CFSwapInt32BigToHost(*reinterpret_cast<uint32_t*>(unitSizeBytes));

    if (aAsAnnexB) {

      // Replace unit size bytes with NALU start code.

      PodCopy(writer->Data() + writtenLength, kNALUStart, sizeof(kNALUStart));

      readLength += sizeof(unitSizeBytes);

      srcRemaining -= sizeof(unitSizeBytes);

      writtenLength += sizeof(kNALUStart);

    } else {

      // Copy unit size bytes + data.

      unitSize += sizeof(unitSizeBytes);

    MOZ_ASSERT(writtenLength + unitSize <= aDst->Size());

    // Copy NAL unit data

    if (CMBlockBufferCopyDataBytes(block, readLength, unitSize,

                                   writer->Data() + writtenLength) !=

        kCMBlockBufferNoErr) {

      LOGE("Cannot copy unit data");

      return false;

    readLength += unitSize;

    srcRemaining -= unitSize;

    writtenLength += unitSize;

  MOZ_ASSERT(writtenLength == aDst->Size());

  return true;

void AppleVTEncoder::OutputFrame(OSStatus aStatus, VTEncodeInfoFlags aFlags,

                                 CMSampleBufferRef aBuffer) {

  LOGV("status: %d, flags: %d, buffer %p", aStatus, aFlags, aBuffer);

  if (aStatus != noErr) {

    ProcessOutput(nullptr, EncodeResult::EncodeError);

    return;

  if (aFlags & kVTEncodeInfo_FrameDropped) {

    ProcessOutput(nullptr, EncodeResult::FrameDropped);

    return;

  if (!aBuffer) {

    ProcessOutput(nullptr, EncodeResult::EmptyBuffer);

    return;

  RefPtr<MediaRawData> output(new MediaRawData());

  if (__builtin_available(macos 11.3, *)) {

    if (mConfig.mScalabilityMode != ScalabilityMode::None) {

      CFDictionaryRef dict = (CFDictionaryRef)(CFArrayGetValueAtIndex(

          CMSampleBufferGetSampleAttachmentsArray(aBuffer, true), 0));

      CFBooleanRef isBaseLayerRef = (CFBooleanRef)CFDictionaryGetValue(

          dict, (const void*)kCMSampleAttachmentKey_IsDependedOnByOthers);

      Boolean isBaseLayer = CFBooleanGetValue(isBaseLayerRef);

      output->mTemporalLayerId.emplace(isBaseLayer ? 0 : 1);

  bool forceAvcc = false;

  if (mConfig.mCodecSpecific->is<H264Specific>()) {

    forceAvcc = mConfig.mCodecSpecific->as<H264Specific>().mFormat ==

                H264BitStreamFormat::AVC;

  bool asAnnexB = !forceAvcc;

  bool succeeded = WriteExtraData(output, aBuffer, asAnnexB) &&

                   WriteNALUs(output, aBuffer, asAnnexB);

  output->mTime = media::TimeUnit::FromSeconds(

      CMTimeGetSeconds(CMSampleBufferGetPresentationTimeStamp(aBuffer)));

  output->mDuration = media::TimeUnit::FromSeconds(

      CMTimeGetSeconds(CMSampleBufferGetOutputDuration(aBuffer)));

  LOGV("Make a %s output[time: %s, duration: %s]: %s",

       asAnnexB ? "AnnexB" : "AVCC", output->mTime.ToString().get(),

       output->mDuration.ToString().get(), succeeded ? "succeed" : "failed");

  ProcessOutput(succeeded ? std::move(output) : nullptr, EncodeResult::Success);

void AppleVTEncoder::ProcessOutput(RefPtr<MediaRawData>&& aOutput,

                                   EncodeResult aResult) {

  if (!mTaskQueue->IsCurrentThreadIn()) {

    LOGV("Dispatch ProcessOutput to task queue");

    nsresult rv = mTaskQueue->Dispatch(

        NewRunnableMethod<RefPtr<MediaRawData>, EncodeResult>(

            "AppleVTEncoder::ProcessOutput", this,

            &AppleVTEncoder::ProcessOutput, std::move(aOutput), aResult));

    MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));

    Unused << rv;

    return;

  if (aResult != EncodeResult::Success) {

    switch (aResult) {

      case EncodeResult::EncodeError:

        mError = MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "Failed to encode");

        break;

      case EncodeResult::EmptyBuffer:

        mError = MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "Buffer is empty");

        break;

      case EncodeResult::FrameDropped:

        if (mConfig.mUsage == Usage::Realtime) {

          // Dropping a frame in real-time usage is okay.

          LOGW("Frame is dropped");

        } else {

          // Some usages like transcoding should not drop a frame.

          LOGE("Frame is dropped");

          mError =

              MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "Frame is dropped");

        break;

      default:

        MOZ_ASSERT_UNREACHABLE("Unknown EncodeResult");

        break;

    MaybeResolveOrRejectEncodePromise();

    return;

  LOGV("Got %zu bytes of output", !aOutput.get() ? 0 : aOutput->Size());

  if (!aOutput) {

    mError = MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "No converted output");

    MaybeResolveOrRejectEncodePromise();

    return;

  mEncodedData.AppendElement(std::move(aOutput));

  MaybeResolveOrRejectEncodePromise();

RefPtr<MediaDataEncoder::EncodePromise> AppleVTEncoder::Encode(

    const MediaData* aSample) {

  MOZ_ASSERT(aSample != nullptr);

  RefPtr<const VideoData> sample(aSample->As<const VideoData>());

  RefPtr<AppleVTEncoder> self = this;

  return InvokeAsync(mTaskQueue, __func__, [self, this, sample] {

    MOZ_ASSERT(mEncodePromise.IsEmpty(),

               "Encode should not be called again before getting results");

    RefPtr<EncodePromise> p = mEncodePromise.Ensure(__func__);

    ProcessEncode(sample);

    return p;

});

RefPtr<MediaDataEncoder::ReconfigurationPromise> AppleVTEncoder::Reconfigure(

    const RefPtr<const EncoderConfigurationChangeList>& aConfigurationChanges) {

  return InvokeAsync(mTaskQueue, this, __func__,

                     &AppleVTEncoder::ProcessReconfigure,

                     aConfigurationChanges);

void AppleVTEncoder::ProcessEncode(const RefPtr<const VideoData>& aSample) {

  LOGV("::ProcessEncode");

  AssertOnTaskQueue();

  MOZ_ASSERT(mSession);

  if (NS_FAILED(mError)) {

    LOGE("Pending error: %s", mError.Description().get());

    MaybeResolveOrRejectEncodePromise();

  AutoCVBufferRelease<CVImageBufferRef> buffer(

      CreateCVPixelBuffer(aSample->mImage));

  if (!buffer) {

    LOGE("Failed to allocate buffer");

    mError = MediaResult(NS_ERROR_OUT_OF_MEMORY, "failed to allocate buffer");

    MaybeResolveOrRejectEncodePromise();

    return;

  CFDictionaryRef frameProps = nullptr;

  if (aSample->mKeyframe) {

    CFTypeRef keys[] = {kVTEncodeFrameOptionKey_ForceKeyFrame};

    CFTypeRef values[] = {kCFBooleanTrue};

    MOZ_ASSERT(std::size(keys) == std::size(values));

    frameProps = CFDictionaryCreate(

        kCFAllocatorDefault, keys, values, std::size(keys),

        &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);

};

  VTEncodeInfoFlags info;

  OSStatus status = VTCompressionSessionEncodeFrame(

      mSession, buffer,

      CMTimeMake(aSample->mTime.ToMicroseconds(), USECS_PER_S),

      CMTimeMake(aSample->mDuration.ToMicroseconds(), USECS_PER_S), frameProps,

      nullptr /* sourceFrameRefcon */, &info);

  if (status != noErr) {

    LOGE("VTCompressionSessionEncodeFrame error: %d", status);

    mError = MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,

                         "VTCompressionSessionEncodeFrame error");

    MaybeResolveOrRejectEncodePromise();

    return;

  if (mConfig.mUsage != Usage::Realtime) {

    MaybeResolveOrRejectEncodePromise();

    return;

  // The latency between encoding a sample and receiving the encoded output is

  // critical in real-time usage. To minimize the latency, the output result

  // should be returned immediately once they are ready, instead of being

  // returned in the next or later Encode() iterations.

  LOGV("Encoding in progress");

  // Workaround for real-time encoding in OS versions < 11.

  ForceOutputIfNeeded();

RefPtr<MediaDataEncoder::ReconfigurationPromise>

AppleVTEncoder::ProcessReconfigure(

    const RefPtr<const EncoderConfigurationChangeList>& aConfigurationChanges) {

  bool ok = false;

  for (const auto& confChange : aConfigurationChanges->mChanges) {

    // A reconfiguration on the fly succeeds if all changes can be applied

    // successfuly. In case of failure, the encoder will be drained and

    // recreated.

    ok &= confChange.match(

        // Not supported yet

        [&](const DimensionsChange& aChange) -> bool { return false; },

        [&](const DisplayDimensionsChange& aChange) -> bool { return false; },

        [&](const BitrateModeChange& aChange) -> bool {

          mConfig.mBitrateMode = aChange.get();

          return SetBitrateAndMode(mSession, mConfig.mBitrateMode,

                                   mConfig.mBitrate);

},

        [&](const BitrateChange& aChange) -> bool {

          mConfig.mBitrate = aChange.get().refOr(0);

          // 0 is the default in AppleVTEncoder: the encoder chooses the bitrate

          // based on the content.

          return SetBitrateAndMode(mSession, mConfig.mBitrateMode,

                                   mConfig.mBitrate);

},

        [&](const FramerateChange& aChange) -> bool {

          // 0 means default, in VideoToolbox, and is valid, perform some light

          // sanitation on other values.

          double fps = aChange.get().refOr(0);

          if (std::isnan(fps) || fps < 0 ||

              int64_t(fps) > std::numeric_limits<int32_t>::max()) {

            LOGE("Invalid fps of %lf", fps);

            return false;

          return SetFrameRate(mSession, AssertedCast<int64_t>(fps));

},

        [&](const UsageChange& aChange) -> bool {

          mConfig.mUsage = aChange.get();

          return SetRealtime(mSession, aChange.get() == Usage::Realtime);

},

        [&](const ContentHintChange& aChange) -> bool { return false; },

        [&](const SampleRateChange& aChange) -> bool { return false; },

        [&](const NumberOfChannelsChange& aChange) -> bool { return false; });

};

  using P = MediaDataEncoder::ReconfigurationPromise;

  if (ok) {

    return P::CreateAndResolve(true, __func__);

  return P::CreateAndReject(NS_ERROR_DOM_MEDIA_FATAL_ERR, __func__);

static size_t NumberOfPlanes(dom::ImageBitmapFormat aPixelFormat) {

  switch (aPixelFormat) {

    case dom::ImageBitmapFormat::RGBA32:

    case dom::ImageBitmapFormat::BGRA32:

    case dom::ImageBitmapFormat::RGB24:

    case dom::ImageBitmapFormat::BGR24:

    case dom::ImageBitmapFormat::GRAY8:

      return 1;

    case dom::ImageBitmapFormat::YUV444P:

    case dom::ImageBitmapFormat::YUV420P:

      return 3;

    case dom::ImageBitmapFormat::YUV420SP_NV12:

      return 2;

    default:

      LOGE("Unsupported input pixel format");

      return 0;

using namespace layers;

static void ReleaseSurface(void* aReleaseRef, const void* aBaseAddress) {

  RefPtr<gfx::DataSourceSurface> released =

      dont_AddRef(static_cast<gfx::DataSourceSurface*>(aReleaseRef));

static void ReleaseImage(void* aImageGrip, const void* aDataPtr,

                         size_t aDataSize, size_t aNumOfPlanes,

                         const void** aPlanes) {

  (static_cast<PlanarYCbCrImage*>(aImageGrip))->Release();

CVPixelBufferRef AppleVTEncoder::CreateCVPixelBuffer(Image* aSource) {

  AssertOnTaskQueue();

  if (aSource->GetFormat() == ImageFormat::PLANAR_YCBCR) {

    PlanarYCbCrImage* image = aSource->AsPlanarYCbCrImage();

    if (!image || !image->GetData()) {

      return nullptr;

    OSType format = MapPixelFormat(mConfig.mSourcePixelFormat).ref();

    size_t numPlanes = NumberOfPlanes(mConfig.mSourcePixelFormat);

    const PlanarYCbCrImage::Data* yuv = image->GetData();

    if (!yuv) {

      return nullptr;

    auto ySize = yuv->YDataSize();

    auto cbcrSize = yuv->CbCrDataSize();

    void* addresses[3] = {};

    size_t widths[3] = {};

    size_t heights[3] = {};

    size_t strides[3] = {};

    switch (numPlanes) {

      case 3:

        addresses[2] = yuv->mCrChannel;

        widths[2] = cbcrSize.width;

        heights[2] = cbcrSize.height;

        strides[2] = yuv->mCbCrStride;

        [[fallthrough]];

      case 2:

        addresses[1] = yuv->mCbChannel;

        widths[1] = cbcrSize.width;

        heights[1] = cbcrSize.height;

        strides[1] = yuv->mCbCrStride;

        [[fallthrough]];

      case 1:

        addresses[0] = yuv->mYChannel;

        widths[0] = ySize.width;

        heights[0] = ySize.height;

        strides[0] = yuv->mYStride;

        break;

      default:

        return nullptr;

    CVPixelBufferRef buffer = nullptr;

    image->AddRef();  // Grip input buffers.

    CVReturn rv = CVPixelBufferCreateWithPlanarBytes(

        kCFAllocatorDefault, yuv->mPictureRect.width, yuv->mPictureRect.height,

        format, nullptr /* dataPtr */, 0 /* dataSize */, numPlanes, addresses,

        widths, heights, strides, ReleaseImage /* releaseCallback */,

        image /* releaseRefCon */, nullptr /* pixelBufferAttributes */,

        &buffer);

    if (rv == kCVReturnSuccess) {

      return buffer;

      // |image| will be released in |ReleaseImage()|.

    LOGE("CVPIxelBufferCreateWithPlanarBytes error");

    image->Release();

    return nullptr;

  RefPtr<gfx::SourceSurface> surface = aSource->GetAsSourceSurface();

  if (!surface) {

    LOGE("Failed to get SourceSurface");

    return nullptr;

  RefPtr<gfx::DataSourceSurface> dataSurface = surface->GetDataSurface();

  if (!dataSurface) {

    LOGE("Failed to get DataSurface");

    return nullptr;

  gfx::DataSourceSurface::ScopedMap map(dataSurface,

                                        gfx::DataSourceSurface::READ);

  if (NS_WARN_IF(!map.IsMapped())) {

    LOGE("Failed to map DataSurface");

    return nullptr;

  const dom::ImageUtils imageUtils(aSource);

  Maybe<dom::ImageBitmapFormat> format = imageUtils.GetFormat();

  if (format.isNothing()) {

    LOGE("Image conversion not implemented in AppleVTEncoder");

    return nullptr;

  if (format.ref() != mConfig.mSourcePixelFormat) {

    LOGV("Encode image in %s format, even though config's source format is %s",

         dom::GetEnumString(format.ref()).get(),

         dom::GetEnumString(mConfig.mSourcePixelFormat).get());

  Maybe<OSType> imgFormat = MapPixelFormat(format.ref());

  if (imgFormat.isNothing()) {

    LOGE("Failed to get kCVPixelFormatType");

    return nullptr;

  CVPixelBufferRef buffer = nullptr;

  gfx::DataSourceSurface* dss = dataSurface.forget().take();

  CVReturn rv = CVPixelBufferCreateWithBytes(

      kCFAllocatorDefault, dss->GetSize().Width(), dss->GetSize().Height(),

      imgFormat.value(), map.GetData(), map.GetStride(), ReleaseSurface, dss,

      nullptr, &buffer);

  if (rv == kCVReturnSuccess) {

    return buffer;

    // |dss| will be released in |ReleaseSurface()|.

  LOGE("CVPIxelBufferCreateWithBytes error: %d", rv);

  RefPtr<gfx::DataSourceSurface> released = dont_AddRef(dss);

  return nullptr;

RefPtr<MediaDataEncoder::EncodePromise> AppleVTEncoder::Drain() {

  return InvokeAsync(mTaskQueue, this, __func__, &AppleVTEncoder::ProcessDrain);

RefPtr<MediaDataEncoder::EncodePromise> AppleVTEncoder::ProcessDrain() {

  LOGV("::ProcessDrain");

  AssertOnTaskQueue();

  MOZ_ASSERT(mSession);

  OSStatus status =

      VTCompressionSessionCompleteFrames(mSession, kCMTimeIndefinite);

  if (status != noErr) {

    LOGE("VTCompressionSessionCompleteFrames error");

    return EncodePromise::CreateAndReject(NS_ERROR_DOM_MEDIA_FATAL_ERR,

                                          __func__);

  // Resolve the pending encode promise if any.

  MaybeResolveOrRejectEncodePromise();

  // VTCompressionSessionCompleteFrames() could have queued multiple tasks with

  // the new drained frames. Dispatch a task after them to resolve the promise

  // with those frames.

  RefPtr<AppleVTEncoder> self = this;

  return InvokeAsync(mTaskQueue, __func__, [self]() {

    EncodedData pendingFrames(std::move(self->mEncodedData));

    LOGV("Resolve drain promise with %zu encoded outputs",

         pendingFrames.Length());

    self->mEncodedData = EncodedData();

    return EncodePromise::CreateAndResolve(std::move(pendingFrames), __func__);

});

RefPtr<ShutdownPromise> AppleVTEncoder::Shutdown() {

  return InvokeAsync(mTaskQueue, this, __func__,

                     &AppleVTEncoder::ProcessShutdown);

RefPtr<ShutdownPromise> AppleVTEncoder::ProcessShutdown() {

  LOGD("::ProcessShutdown");

  AssertOnTaskQueue();

  if (mSession) {

    VTCompressionSessionInvalidate(mSession);

    CFRelease(mSession);

    mSession = nullptr;

    mInited = false;

  mError = MediaResult(NS_ERROR_DOM_MEDIA_CANCELED, "Canceled in shutdown");

  MaybeResolveOrRejectEncodePromise();

  mError = NS_OK;

  return ShutdownPromise::CreateAndResolve(true, __func__);

RefPtr<GenericPromise> AppleVTEncoder::SetBitrate(uint32_t aBitsPerSec) {

  RefPtr<AppleVTEncoder> self = this;

  return InvokeAsync(mTaskQueue, __func__, [self, aBitsPerSec]() {

    MOZ_ASSERT(self->mSession);

    bool rv = SetBitrateAndMode(self->mSession, self->mConfig.mBitrateMode,

                                aBitsPerSec);

    return rv ? GenericPromise::CreateAndResolve(true, __func__)

              : GenericPromise::CreateAndReject(

                    NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR, __func__);

});

void AppleVTEncoder::MaybeResolveOrRejectEncodePromise() {

  AssertOnTaskQueue();

  if (mEncodePromise.IsEmpty()) {

    LOGV(

        "No pending promise to resolve(pending outputs: %zu) or reject(err: "

        "%s)",

        mEncodedData.Length(), mError.Description().get());

    return;

  if (mTimer) {

    mTimer->Cancel();

    mTimer = nullptr;

  if (NS_FAILED(mError.Code())) {

    LOGE("Rejecting encode promise with error: %s", mError.Description().get());

    mEncodePromise.Reject(mError, __func__);

    return;

  LOGV("Resolving with %zu encoded outputs", mEncodedData.Length());

  mEncodePromise.Resolve(std::move(mEncodedData), __func__);

void AppleVTEncoder::ForceOutputIfNeeded() {

  if (__builtin_available(macos 11.0, *)) {

    return;

  // Ideally, OutputFrame (called via FrameCallback) should resolve the encode

  // promise. However, sometimes output is produced only after multiple

  // inputs. To ensure continuous encoding, we force the encoder to produce a

  // potentially empty output if no result is received in 50 ms.

  RefPtr<AppleVTEncoder> self = this;

  auto r = NS_NewTimerWithCallback(

      [self](nsITimer* aTimer) {

        if (!self->mSession) {

          LOGV("Do nothing since the encoder has been shut down");

          return;

        LOGV("Resolving the pending promise");

        self->MaybeResolveOrRejectEncodePromise();

},

      TimeDuration::FromMilliseconds(50), nsITimer::TYPE_ONE_SHOT,

      "EncodingProgressChecker", mTaskQueue);

  if (r.isErr()) {

    LOGE(

        "Failed to set an encoding progress checker. Resolve the pending "

        "promise now");

    MaybeResolveOrRejectEncodePromise();

    return;

  mTimer = r.unwrap();

#undef LOGE

#undef LOGW

#undef LOGD

#undef LOGV

}  // namespace mozilla