video_codec_tester.cc

/*

 *  Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.

 *  Use of this source code is governed by a BSD-style license

 *  that can be found in the LICENSE file in the root of the source

 *  tree. An additional intellectual property rights grant can be found

 *  in the file PATENTS.  All contributing project authors may

 *  be found in the AUTHORS file in the root of the source tree.

*/

#include "test/video_codec_tester.h"

#include <algorithm>

#include <numeric>

#include <set>

#include <tuple>

#include <utility>

#include "absl/strings/match.h"

#include "api/array_view.h"

#include "api/environment/environment.h"

#include "api/environment/environment_factory.h"

#include "api/test/create_frame_generator.h"

#include "api/test/frame_generator_interface.h"

#include "api/units/time_delta.h"

#include "api/units/timestamp.h"

#include "api/video/builtin_video_bitrate_allocator_factory.h"

#include "api/video/i420_buffer.h"

#include "api/video/video_bitrate_allocator.h"

#include "api/video/video_codec_type.h"

#include "api/video/video_frame.h"

#include "api/video_codecs/h264_profile_level_id.h"

#include "api/video_codecs/simulcast_stream.h"

#include "api/video_codecs/video_decoder.h"

#include "api/video_codecs/video_encoder.h"

#include "media/base/media_constants.h"

#include "modules/video_coding/codecs/av1/av1_svc_config.h"

#include "modules/video_coding/codecs/h264/include/h264.h"

#include "modules/video_coding/codecs/vp9/svc_config.h"

#include "modules/video_coding/include/video_codec_interface.h"

#include "modules/video_coding/include/video_error_codes.h"

#include "modules/video_coding/svc/scalability_mode_util.h"

#include "modules/video_coding/utility/ivf_file_writer.h"

#include "rtc_base/event.h"

#include "rtc_base/logging.h"

#include "rtc_base/strings/string_builder.h"

#include "rtc_base/synchronization/mutex.h"

#include "rtc_base/task_queue_for_test.h"

#include "rtc_base/time_utils.h"

#include "system_wrappers/include/sleep.h"

#include "test/testsupport/file_utils.h"

#include "test/testsupport/frame_reader.h"

#include "test/testsupport/video_frame_writer.h"

#include "third_party/libyuv/include/libyuv/compare.h"

#include "video/config/encoder_stream_factory.h"

namespace webrtc {

namespace test {

namespace {

using CodedVideoSource = VideoCodecTester::CodedVideoSource;

using VideoSourceSettings = VideoCodecTester::VideoSourceSettings;

using EncodingSettings = VideoCodecTester::EncodingSettings;

using LayerSettings = EncodingSettings::LayerSettings;

using LayerId = VideoCodecTester::LayerId;

using EncoderSettings = VideoCodecTester::EncoderSettings;

using DecoderSettings = VideoCodecTester::DecoderSettings;

using PacingSettings = VideoCodecTester::PacingSettings;

using PacingMode = PacingSettings::PacingMode;

using VideoCodecStats = VideoCodecTester::VideoCodecStats;

using DecodeCallback =

    absl::AnyInvocable<void(const VideoFrame& decoded_frame)>;

using webrtc::test::ImprovementDirection;

constexpr Frequency k90kHz = Frequency::Hertz(90000);

const std::set<ScalabilityMode> kFullSvcScalabilityModes{

    ScalabilityMode::kL2T1,  ScalabilityMode::kL2T1h, ScalabilityMode::kL2T2,

    ScalabilityMode::kL2T2h, ScalabilityMode::kL2T3,  ScalabilityMode::kL2T3h,

    ScalabilityMode::kL3T1,  ScalabilityMode::kL3T1h, ScalabilityMode::kL3T2,

    ScalabilityMode::kL3T2h, ScalabilityMode::kL3T3,  ScalabilityMode::kL3T3h};

const std::set<ScalabilityMode> kKeySvcScalabilityModes{

    ScalabilityMode::kL2T1_KEY,       ScalabilityMode::kL2T2_KEY,

    ScalabilityMode::kL2T2_KEY_SHIFT, ScalabilityMode::kL2T3_KEY,

    ScalabilityMode::kL3T1_KEY,       ScalabilityMode::kL3T2_KEY,

    ScalabilityMode::kL3T3_KEY};

rtc::scoped_refptr<VideoFrameBuffer> ScaleFrame(

    rtc::scoped_refptr<VideoFrameBuffer> buffer,

    int scaled_width,

    int scaled_height) {

  if (buffer->width() == scaled_width && buffer->height() == scaled_height) {

    return buffer;

  return buffer->Scale(scaled_width, scaled_height);

// A video source that reads frames from YUV, Y4M or IVF (compressed with VPx,

// AV1 or H264) files.

class VideoSource {

 public:

  explicit VideoSource(VideoSourceSettings source_settings)

      : source_settings_(source_settings) {

    if (absl::EndsWith(source_settings.file_path, "ivf")) {

      ivf_reader_ = CreateFromIvfFileFrameGenerator(CreateEnvironment(),

                                                    source_settings.file_path);

    } else if (absl::EndsWith(source_settings.file_path, "y4m")) {

      yuv_reader_ =

          CreateY4mFrameReader(source_settings_.file_path,

                               YuvFrameReaderImpl::RepeatMode::kPingPong);

    } else {

      yuv_reader_ = CreateYuvFrameReader(

          source_settings_.file_path, source_settings_.resolution,

          YuvFrameReaderImpl::RepeatMode::kPingPong);

    RTC_CHECK(ivf_reader_ || yuv_reader_);

  VideoFrame PullFrame(uint32_t timestamp_rtp,

                       Resolution output_resolution,

                       Frequency output_framerate) {

    // If the source and output frame rates differ, resampling is performed by

    // skipping or repeating source frames.

    time_delta_ = time_delta_.value_or(1 / source_settings_.framerate);

    int seek = 0;

    while (time_delta_->us() <= 0) {

      *time_delta_ += 1 / source_settings_.framerate;

      ++seek;

    *time_delta_ -= 1 / output_framerate;

    if (seek > 0 || last_frame_ == nullptr) {

      rtc::scoped_refptr<VideoFrameBuffer> buffer;

      do {

        if (yuv_reader_) {

          buffer = yuv_reader_->PullFrame();

        } else {

          buffer = ivf_reader_->NextFrame().buffer;

      } while (--seek > 0);

      RTC_CHECK(buffer) << "Could not read frame. timestamp_rtp "

                        << timestamp_rtp;

      last_frame_ = buffer;

    rtc::scoped_refptr<VideoFrameBuffer> buffer = ScaleFrame(

        last_frame_, output_resolution.width, output_resolution.height);

    return VideoFrame::Builder()

        .set_video_frame_buffer(buffer)

        .set_rtp_timestamp(timestamp_rtp)

        .set_timestamp_us((timestamp_rtp / k90kHz).us())

        .build();

 private:

  VideoSourceSettings source_settings_;

  std::unique_ptr<FrameReader> yuv_reader_;

  std::unique_ptr<FrameGeneratorInterface> ivf_reader_;

  rtc::scoped_refptr<VideoFrameBuffer> last_frame_;

  // Time delta between the source and output video. Used for frame rate

  // scaling. This value increases by the source frame duration each time a

  // frame is read from the source, and decreases by the output frame duration

  // each time an output frame is delivered.

  absl::optional<TimeDelta> time_delta_;

};

// Pacer calculates delay necessary to keep frame encode or decode call spaced

// from the previous calls by the pacing time. `Schedule` is expected to be

// called as close as possible to posting frame encode or decode task. This

// class is not thread safe.

class Pacer {

 public:

  explicit Pacer(PacingSettings settings)

      : settings_(settings), delay_(TimeDelta::Zero()) {}

  Timestamp Schedule(Timestamp timestamp) {

    Timestamp now = Timestamp::Micros(rtc::TimeMicros());

    if (settings_.mode == PacingMode::kNoPacing) {

      return now;

    Timestamp scheduled = now;

    if (prev_scheduled_) {

      scheduled = *prev_scheduled_ + PacingTime(timestamp);

      if (scheduled < now) {

        scheduled = now;

    prev_timestamp_ = timestamp;

    prev_scheduled_ = scheduled;

    return scheduled;

 private:

  TimeDelta PacingTime(Timestamp timestamp) {

    if (settings_.mode == PacingMode::kRealTime) {

      return timestamp - *prev_timestamp_;

    RTC_CHECK_EQ(PacingMode::kConstantRate, settings_.mode);

    return 1 / settings_.constant_rate;

  PacingSettings settings_;

  absl::optional<Timestamp> prev_timestamp_;

  absl::optional<Timestamp> prev_scheduled_;

  TimeDelta delay_;

};

// A task queue that limits its maximum size and guarantees FIFO execution of

// the scheduled tasks.

class LimitedTaskQueue {

 public:

  // Frame reading, encoding and decoding are handled in separate threads. If

  // encoding or decoding is slow, the frame reader may run far ahead, loading

  // many large frames into memory. To prevent this, we limit the maximum size

  // of the task queue. When this limit is reached, posting new tasks is blocked

  // until the queue size is reduced by executing previous tasks.

  static constexpr int kMaxTaskQueueSize = 3;

  LimitedTaskQueue() : queue_size_(0) {}

  void PostScheduledTask(absl::AnyInvocable<void() &&> task,

                         Timestamp scheduled) {

      // Block posting new tasks until the queue size is reduced.

      MutexLock lock(&mutex_);

      while (queue_size_ >= kMaxTaskQueueSize) {

        task_executed_.Wait(TimeDelta::Seconds(10));

        task_executed_.Reset();

    ++queue_size_;

    task_queue_.PostTask([this, task = std::move(task), scheduled]() mutable {

      Timestamp now = Timestamp::Millis(rtc::TimeMillis());

      int64_t wait_ms = (scheduled - now).ms();

      if (wait_ms > 0) {

        RTC_CHECK_LT(wait_ms, 10000) << "Too high wait_ms " << wait_ms;

        SleepMs(wait_ms);

      std::move(task)();

      --queue_size_;

      task_executed_.Set();

});

  void PostTask(absl::AnyInvocable<void() &&> task) {

    Timestamp now = Timestamp::Millis(rtc::TimeMillis());

    PostScheduledTask(std::move(task), now);

  void PostTaskAndWait(absl::AnyInvocable<void() &&> task) {

    PostTask(std::move(task));

    WaitForPreviouslyPostedTasks();

  void WaitForPreviouslyPostedTasks() {

    task_queue_.WaitForPreviouslyPostedTasks();

 private:

  TaskQueueForTest task_queue_;

  std::atomic_int queue_size_;

  rtc::Event task_executed_;

  Mutex mutex_;

};

class TesterY4mWriter {

 public:

  explicit TesterY4mWriter(absl::string_view base_path)

      : base_path_(base_path) {}

  ~TesterY4mWriter() {

    task_queue_.SendTask([] {});

  void Write(const VideoFrame& frame, int spatial_idx) {

    task_queue_.PostTask([this, frame, spatial_idx] {

      if (y4m_writers_.find(spatial_idx) == y4m_writers_.end()) {

        std::string file_path =

            base_path_ + "-s" + std::to_string(spatial_idx) + ".y4m";

        Y4mVideoFrameWriterImpl* y4m_writer = new Y4mVideoFrameWriterImpl(

            file_path, frame.width(), frame.height(), /*fps=*/30);

        RTC_CHECK(y4m_writer);

        y4m_writers_[spatial_idx] =

            std::unique_ptr<VideoFrameWriter>(y4m_writer);

      y4m_writers_.at(spatial_idx)->WriteFrame(frame);

});

 private:

  std::string base_path_;

  std::map<int, std::unique_ptr<VideoFrameWriter>> y4m_writers_;

  TaskQueueForTest task_queue_;

};

class TesterIvfWriter {

 public:

  explicit TesterIvfWriter(absl::string_view base_path)

      : base_path_(base_path) {}

  ~TesterIvfWriter() {

    task_queue_.SendTask([] {});

  void Write(const EncodedImage& encoded_frame, VideoCodecType codec_type) {

    task_queue_.PostTask([this, encoded_frame, codec_type] {

      int spatial_idx = encoded_frame.SpatialIndex().value_or(

          encoded_frame.SimulcastIndex().value_or(0));

      if (ivf_file_writers_.find(spatial_idx) == ivf_file_writers_.end()) {

        std::string ivf_path =

            base_path_ + "-s" + std::to_string(spatial_idx) + ".ivf";

        FileWrapper ivf_file = FileWrapper::OpenWriteOnly(ivf_path);

        RTC_CHECK(ivf_file.is_open());

        std::unique_ptr<IvfFileWriter> ivf_writer =

            IvfFileWriter::Wrap(std::move(ivf_file), /*byte_limit=*/0);

        RTC_CHECK(ivf_writer);

        ivf_file_writers_[spatial_idx] = std::move(ivf_writer);

      // To play: ffplay -vcodec vp8|vp9|av1|hevc|h264 filename

      ivf_file_writers_.at(spatial_idx)->WriteFrame(encoded_frame, codec_type);

});

 private:

  std::string base_path_;

  std::map<int, std::unique_ptr<IvfFileWriter>> ivf_file_writers_;

  TaskQueueForTest task_queue_;

};

class LeakyBucket {

 public:

  LeakyBucket() : level_bits_(0) {}

  // Updates bucket level and returns its current level in bits. Data is removed

  // from bucket with rate equal to target bitrate of previous frame. Bucket

  // level is tracked with floating point precision. Returned value of bucket

  // level is rounded up.

  int Update(const VideoCodecStats::Frame& frame) {

    RTC_CHECK(frame.target_bitrate) << "Bitrate must be specified.";

    if (prev_frame_) {

      RTC_CHECK_GT(frame.timestamp_rtp, prev_frame_->timestamp_rtp)

          << "Timestamp must increase.";

      TimeDelta passed =

          (frame.timestamp_rtp - prev_frame_->timestamp_rtp) / k90kHz;

      level_bits_ -=

          prev_frame_->target_bitrate->bps<double>() * passed.seconds<double>();

      level_bits_ = std::max(level_bits_, 0.0);

    prev_frame_ = frame;

    level_bits_ += frame.frame_size.bytes() * 8;

    return static_cast<int>(std::ceil(level_bits_));

 private:

  absl::optional<VideoCodecStats::Frame> prev_frame_;

  double level_bits_;

};

class VideoCodecAnalyzer : public VideoCodecTester::VideoCodecStats {

 public:

  void StartEncode(const VideoFrame& video_frame,

                   const EncodingSettings& encoding_settings) {

    int64_t encode_start_us = rtc::TimeMicros();

    task_queue_.PostTask([this, timestamp_rtp = video_frame.rtp_timestamp(),

                          encoding_settings, encode_start_us]() {

      RTC_CHECK(frames_.find(timestamp_rtp) == frames_.end())

          << "Duplicate frame. Frame with timestamp " << timestamp_rtp

          << " was seen before";

      Frame frame;

      frame.timestamp_rtp = timestamp_rtp;

      frame.encode_start = Timestamp::Micros(encode_start_us),

      frames_.emplace(timestamp_rtp,

                      std::map<int, Frame>{{/*spatial_idx=*/0, frame}});

      encoding_settings_.emplace(timestamp_rtp, encoding_settings);

});

  void FinishEncode(const EncodedImage& encoded_frame) {

    int64_t encode_finished_us = rtc::TimeMicros();

    task_queue_.PostTask(

        [this, timestamp_rtp = encoded_frame.RtpTimestamp(),

         spatial_idx = encoded_frame.SpatialIndex().value_or(

             encoded_frame.SimulcastIndex().value_or(0)),

         temporal_idx = encoded_frame.TemporalIndex().value_or(0),

         width = encoded_frame._encodedWidth,

         height = encoded_frame._encodedHeight,

         frame_type = encoded_frame._frameType,

         frame_size_bytes = encoded_frame.size(), qp = encoded_frame.qp_,

         encode_finished_us]() {

          if (spatial_idx > 0) {

            RTC_CHECK(frames_.find(timestamp_rtp) != frames_.end())

                << "Spatial layer 0 frame with timestamp " << timestamp_rtp

                << " was not seen before";

            const Frame& base_frame =

                frames_.at(timestamp_rtp).at(/*spatial_idx=*/0);

            frames_.at(timestamp_rtp).emplace(spatial_idx, base_frame);

          Frame& frame = frames_.at(timestamp_rtp).at(spatial_idx);

          frame.layer_id = {.spatial_idx = spatial_idx,

                            .temporal_idx = temporal_idx};

          frame.width = width;

          frame.height = height;

          frame.frame_size = DataSize::Bytes(frame_size_bytes);

          frame.qp = qp;

          frame.keyframe = frame_type == VideoFrameType::kVideoFrameKey;

          frame.encode_time =

              Timestamp::Micros(encode_finished_us) - frame.encode_start;

          frame.encoded = true;

});

  void StartDecode(const EncodedImage& encoded_frame) {

    int64_t decode_start_us = rtc::TimeMicros();

    task_queue_.PostTask(

        [this, timestamp_rtp = encoded_frame.RtpTimestamp(),

         spatial_idx = encoded_frame.SpatialIndex().value_or(

             encoded_frame.SimulcastIndex().value_or(0)),

         temporal_idx = encoded_frame.TemporalIndex().value_or(0),

         width = encoded_frame._encodedWidth,

         height = encoded_frame._encodedHeight,

         frame_type = encoded_frame._frameType, qp = encoded_frame.qp_,

         frame_size_bytes = encoded_frame.size(), decode_start_us]() {

          bool decode_only = frames_.find(timestamp_rtp) == frames_.end();

          if (decode_only || frames_.at(timestamp_rtp).find(spatial_idx) ==

                                 frames_.at(timestamp_rtp).end()) {

            Frame frame;

            frame.timestamp_rtp = timestamp_rtp;

            frame.layer_id = {.spatial_idx = spatial_idx,

                              .temporal_idx = temporal_idx};

            frame.width = width;

            frame.height = height;

            frame.keyframe = frame_type == VideoFrameType::kVideoFrameKey;

            frame.qp = qp;

            if (decode_only) {

              frame.frame_size = DataSize::Bytes(frame_size_bytes);

              frames_[timestamp_rtp] = {{spatial_idx, frame}};

            } else {

              frames_[timestamp_rtp][spatial_idx] = frame;

          Frame& frame = frames_.at(timestamp_rtp).at(spatial_idx);

          frame.decode_start = Timestamp::Micros(decode_start_us);

});

  void FinishDecode(const VideoFrame& decoded_frame,

                    int spatial_idx,

                    absl::optional<VideoFrame> ref_frame = absl::nullopt) {

    int64_t decode_finished_us = rtc::TimeMicros();

    task_queue_.PostTask([this, timestamp_rtp = decoded_frame.rtp_timestamp(),

                          spatial_idx, width = decoded_frame.width(),

                          height = decoded_frame.height(),

                          decode_finished_us]() {

      Frame& frame = frames_.at(timestamp_rtp).at(spatial_idx);

      frame.decode_time =

          Timestamp::Micros(decode_finished_us) - frame.decode_start;

      if (!frame.encoded) {

        frame.width = width;

        frame.height = height;

      frame.decoded = true;

});

    if (ref_frame.has_value()) {

      // Copy hardware-backed frame into main memory to release output buffers

      // which number may be limited in hardware decoders.

      rtc::scoped_refptr<I420BufferInterface> decoded_buffer =

          decoded_frame.video_frame_buffer()->ToI420();

      task_queue_.PostTask([this, decoded_buffer, ref_frame,

                            timestamp_rtp = decoded_frame.rtp_timestamp(),

                            spatial_idx]() {

        rtc::scoped_refptr<I420BufferInterface> ref_buffer =

            ScaleFrame(ref_frame->video_frame_buffer(), decoded_buffer->width(),

                       decoded_buffer->height())

                ->ToI420();

        Frame& frame = frames_.at(timestamp_rtp).at(spatial_idx);

        frame.psnr = CalcPsnr(*decoded_buffer, *ref_buffer);

});

  std::vector<Frame> Slice(Filter filter, bool merge) const {

    std::vector<Frame> slice;

    for (const auto& [timestamp_rtp, temporal_unit_frames] : frames_) {

      if (temporal_unit_frames.empty()) {

        continue;

      bool is_svc = false;

      if (!encoding_settings_.empty()) {

        ScalabilityMode scalability_mode =

            encoding_settings_.at(timestamp_rtp).scalability_mode;

        if (kFullSvcScalabilityModes.count(scalability_mode) > 0 ||

            (kKeySvcScalabilityModes.count(scalability_mode) > 0 &&

             temporal_unit_frames.at(0).keyframe)) {

          is_svc = true;

      std::vector<Frame> subframes;

      for (const auto& [spatial_idx, frame] : temporal_unit_frames) {

        if (frame.timestamp_rtp < filter.min_timestamp_rtp ||

            frame.timestamp_rtp > filter.max_timestamp_rtp) {

          continue;

        if (filter.layer_id) {

          if (is_svc &&

              frame.layer_id.spatial_idx > filter.layer_id->spatial_idx) {

            continue;

          if (!is_svc &&

              frame.layer_id.spatial_idx != filter.layer_id->spatial_idx) {

            continue;

          if (frame.layer_id.temporal_idx > filter.layer_id->temporal_idx) {

            continue;

        subframes.push_back(frame);

      if (subframes.empty()) {

        continue;

      if (!merge) {

        std::copy(subframes.begin(), subframes.end(),

                  std::back_inserter(slice));

        continue;

      Frame superframe = subframes.back();

      for (const Frame& frame :

           rtc::ArrayView<Frame>(subframes).subview(0, subframes.size() - 1)) {

        superframe.decoded |= frame.decoded;

        superframe.encoded |= frame.encoded;

        superframe.frame_size += frame.frame_size;

        superframe.keyframe |= frame.keyframe;

        superframe.encode_time =

            std::max(superframe.encode_time, frame.encode_time);

        superframe.decode_time =

            std::max(superframe.decode_time, frame.decode_time);

      if (!encoding_settings_.empty()) {

        RTC_CHECK(encoding_settings_.find(superframe.timestamp_rtp) !=

                  encoding_settings_.end())

            << "No encoding settings for frame " << superframe.timestamp_rtp;

        const EncodingSettings& es =

            encoding_settings_.at(superframe.timestamp_rtp);

        superframe.target_bitrate = GetTargetBitrate(es, filter.layer_id);

        superframe.target_framerate = GetTargetFramerate(es, filter.layer_id);

      slice.push_back(superframe);

    return slice;

  Stream Aggregate(Filter filter) const {

    std::vector<Frame> frames = Slice(filter, /*merge=*/true);

    Stream stream;

    LeakyBucket leaky_bucket;

    for (const Frame& frame : frames) {

      Timestamp time = Timestamp::Micros((frame.timestamp_rtp / k90kHz).us());

      if (!frame.frame_size.IsZero()) {

        stream.width.AddSample(StatsSample(frame.width, time));

        stream.height.AddSample(StatsSample(frame.height, time));

        stream.frame_size_bytes.AddSample(

            StatsSample(frame.frame_size.bytes(), time));

        stream.keyframe.AddSample(StatsSample(frame.keyframe, time));

        if (frame.qp) {

          stream.qp.AddSample(StatsSample(*frame.qp, time));

      if (frame.encoded) {

        stream.encode_time_ms.AddSample(

            StatsSample(frame.encode_time.ms(), time));

      if (frame.decoded) {

        stream.decode_time_ms.AddSample(

            StatsSample(frame.decode_time.ms(), time));

      if (frame.psnr) {

        stream.psnr.y.AddSample(StatsSample(frame.psnr->y, time));

        stream.psnr.u.AddSample(StatsSample(frame.psnr->u, time));

        stream.psnr.v.AddSample(StatsSample(frame.psnr->v, time));

      if (frame.target_framerate) {

        stream.target_framerate_fps.AddSample(

            StatsSample(frame.target_framerate->hertz<double>(), time));

      if (frame.target_bitrate) {

        stream.target_bitrate_kbps.AddSample(

            StatsSample(frame.target_bitrate->kbps<double>(), time));

        int buffer_level_bits = leaky_bucket.Update(frame);

        stream.transmission_time_ms.AddSample(StatsSample(

            1000 * buffer_level_bits / frame.target_bitrate->bps<double>(),

            time));

    int num_encoded_frames = stream.frame_size_bytes.NumSamples();

    if (num_encoded_frames == 0) {

      return stream;

    const Frame& first_frame = frames.front();

    Filter filter_all_layers{.min_timestamp_rtp = filter.min_timestamp_rtp,

                             .max_timestamp_rtp = filter.max_timestamp_rtp};

    std::vector<Frame> frames_all_layers =

        Slice(filter_all_layers, /*merge=*/true);

    const Frame& last_frame = frames_all_layers.back();

    TimeDelta duration =

        (last_frame.timestamp_rtp - first_frame.timestamp_rtp) / k90kHz;

    if (last_frame.target_framerate) {

      duration += 1 / *last_frame.target_framerate;

    DataRate encoded_bitrate =

        DataSize::Bytes(stream.frame_size_bytes.GetSum()) / duration;

    Frequency encoded_framerate = num_encoded_frames / duration;

    double bitrate_mismatch_pct = 0.0;

    if (const auto& target_bitrate = first_frame.target_bitrate;

        target_bitrate) {

      bitrate_mismatch_pct = 100 * (encoded_bitrate / *target_bitrate - 1);

    double framerate_mismatch_pct = 0.0;

    if (const auto& target_framerate = first_frame.target_framerate;

        target_framerate) {

      framerate_mismatch_pct =

          100 * (encoded_framerate / *target_framerate - 1);

    for (Frame& frame : frames) {

      Timestamp time = Timestamp::Micros((frame.timestamp_rtp / k90kHz).us());

      stream.encoded_bitrate_kbps.AddSample(

          StatsSample(encoded_bitrate.kbps<double>(), time));

      stream.encoded_framerate_fps.AddSample(

          StatsSample(encoded_framerate.hertz<double>(), time));

      stream.bitrate_mismatch_pct.AddSample(

          StatsSample(bitrate_mismatch_pct, time));

      stream.framerate_mismatch_pct.AddSample(

          StatsSample(framerate_mismatch_pct, time));

    return stream;

  void LogMetrics(absl::string_view csv_path,

                  std::vector<Frame> frames,

                  std::map<std::string, std::string> metadata) const {

    RTC_LOG(LS_INFO) << "Write metrics to " << csv_path;

    FILE* csv_file = fopen(csv_path.data(), "w");

    const std::string delimiter = ";";

    rtc::StringBuilder header;

    header

        << "timestamp_rtp;spatial_idx;temporal_idx;width;height;frame_size_"

           "bytes;keyframe;qp;encode_time_us;decode_time_us;psnr_y_db;psnr_u_"

           "db;psnr_v_db;target_bitrate_kbps;target_framerate_fps";

    for (const auto& data : metadata) {

      header << ";" << data.first;

    fwrite(header.str().c_str(), 1, header.size(), csv_file);

    for (const Frame& f : frames) {

      rtc::StringBuilder row;

      row << "\n" << f.timestamp_rtp;

      row << ";" << f.layer_id.spatial_idx;

      row << ";" << f.layer_id.temporal_idx;

      row << ";" << f.width;

      row << ";" << f.height;

      row << ";" << f.frame_size.bytes();

      row << ";" << f.keyframe;

      row << ";";

      if (f.qp) {

        row << *f.qp;

      row << ";" << f.encode_time.us();

      row << ";" << f.decode_time.us();

      if (f.psnr) {

        row << ";" << f.psnr->y;

        row << ";" << f.psnr->u;

        row << ";" << f.psnr->v;

      } else {

        row << ";;;";

      const auto& es = encoding_settings_.at(f.timestamp_rtp);

      row << ";"

          << f.target_bitrate.value_or(GetTargetBitrate(es, f.layer_id)).kbps();

      row << ";"

          << f.target_framerate.value_or(GetTargetFramerate(es, f.layer_id))

                 .hertz<double>();

      for (const auto& data : metadata) {

        row << ";" << data.second;

      fwrite(row.str().c_str(), 1, row.size(), csv_file);

    fclose(csv_file);

  void Flush() { task_queue_.WaitForPreviouslyPostedTasks(); }

 private:

  struct FrameId {

    uint32_t timestamp_rtp;

    int spatial_idx;

    bool operator==(const FrameId& o) const {

      return timestamp_rtp == o.timestamp_rtp && spatial_idx == o.spatial_idx;

    bool operator<(const FrameId& o) const {

      return timestamp_rtp < o.timestamp_rtp ||

             (timestamp_rtp == o.timestamp_rtp && spatial_idx < o.spatial_idx);

};

  Frame::Psnr CalcPsnr(const I420BufferInterface& ref_buffer,

                       const I420BufferInterface& dec_buffer) {

    RTC_CHECK_EQ(ref_buffer.width(), dec_buffer.width());

    RTC_CHECK_EQ(ref_buffer.height(), dec_buffer.height());

    uint64_t sse_y = libyuv::ComputeSumSquareErrorPlane(

        dec_buffer.DataY(), dec_buffer.StrideY(), ref_buffer.DataY(),

        ref_buffer.StrideY(), dec_buffer.width(), dec_buffer.height());

    uint64_t sse_u = libyuv::ComputeSumSquareErrorPlane(

        dec_buffer.DataU(), dec_buffer.StrideU(), ref_buffer.DataU(),

        ref_buffer.StrideU(), dec_buffer.width() / 2, dec_buffer.height() / 2);

    uint64_t sse_v = libyuv::ComputeSumSquareErrorPlane(

        dec_buffer.DataV(), dec_buffer.StrideV(), ref_buffer.DataV(),

        ref_buffer.StrideV(), dec_buffer.width() / 2, dec_buffer.height() / 2);

    int num_y_samples = dec_buffer.width() * dec_buffer.height();

    Frame::Psnr psnr;

    psnr.y = libyuv::SumSquareErrorToPsnr(sse_y, num_y_samples);

    psnr.u = libyuv::SumSquareErrorToPsnr(sse_u, num_y_samples / 4);

    psnr.v = libyuv::SumSquareErrorToPsnr(sse_v, num_y_samples / 4);

    return psnr;

  DataRate GetTargetBitrate(const EncodingSettings& encoding_settings,

                            absl::optional<LayerId> layer_id) const {

    int base_spatial_idx;

    if (layer_id.has_value()) {

      bool is_svc =

          kFullSvcScalabilityModes.count(encoding_settings.scalability_mode);

      base_spatial_idx = is_svc ? 0 : layer_id->spatial_idx;

    } else {

      int num_spatial_layers =

          ScalabilityModeToNumSpatialLayers(encoding_settings.scalability_mode);

      int num_temporal_layers = ScalabilityModeToNumTemporalLayers(

          encoding_settings.scalability_mode);

      layer_id = LayerId({.spatial_idx = num_spatial_layers - 1,

                          .temporal_idx = num_temporal_layers - 1});

      base_spatial_idx = 0;

    DataRate bitrate = DataRate::Zero();

    for (int sidx = base_spatial_idx; sidx <= layer_id->spatial_idx; ++sidx) {

      for (int tidx = 0; tidx <= layer_id->temporal_idx; ++tidx) {

        auto layer_settings = encoding_settings.layers_settings.find(

            {.spatial_idx = sidx, .temporal_idx = tidx});

        RTC_CHECK(layer_settings != encoding_settings.layers_settings.end())

            << "bitrate is not specified for layer sidx=" << sidx

            << " tidx=" << tidx;

        bitrate += layer_settings->second.bitrate;

    return bitrate;

  Frequency GetTargetFramerate(const EncodingSettings& encoding_settings,

                               absl::optional<LayerId> layer_id) const {

    if (layer_id.has_value()) {

      auto layer_settings = encoding_settings.layers_settings.find(

          {.spatial_idx = layer_id->spatial_idx,

           .temporal_idx = layer_id->temporal_idx});

      RTC_CHECK(layer_settings != encoding_settings.layers_settings.end())

          << "framerate is not specified for layer sidx="

          << layer_id->spatial_idx << " tidx=" << layer_id->temporal_idx;

      return layer_settings->second.framerate;

    return encoding_settings.layers_settings.rbegin()->second.framerate;

  SamplesStatsCounter::StatsSample StatsSample(double value,

                                               Timestamp time) const {

    return SamplesStatsCounter::StatsSample{value, time};

  LimitedTaskQueue task_queue_;

  // RTP timestamp -> spatial layer -> Frame

  std::map<uint32_t, std::map<int, Frame>> frames_;

  std::map<uint32_t, EncodingSettings> encoding_settings_;

};

class Decoder : public DecodedImageCallback {

 public:

  Decoder(const Environment& env,

          VideoDecoderFactory* decoder_factory,

          const DecoderSettings& decoder_settings,

          VideoCodecAnalyzer* analyzer)

      : env_(env),

        decoder_factory_(decoder_factory),

        analyzer_(analyzer),

        pacer_(decoder_settings.pacing_settings) {

    RTC_CHECK(analyzer_) << "Analyzer must be provided";

    if (decoder_settings.decoder_input_base_path) {

      ivf_writer_ = std::make_unique<TesterIvfWriter>(

          *decoder_settings.decoder_input_base_path);

    if (decoder_settings.decoder_output_base_path) {

      y4m_writer_ = std::make_unique<TesterY4mWriter>(

          *decoder_settings.decoder_output_base_path);

  void Initialize(const SdpVideoFormat& sdp_video_format) {

    decoder_ = decoder_factory_->Create(env_, sdp_video_format);

    RTC_CHECK(decoder_) << "Could not create decoder for video format "

                        << sdp_video_format.ToString();

    codec_type_ = PayloadStringToCodecType(sdp_video_format.name);

    task_queue_.PostTaskAndWait([this] {

      decoder_->RegisterDecodeCompleteCallback(this);

      VideoDecoder::Settings ds;

      ds.set_codec_type(*codec_type_);

      ds.set_number_of_cores(1);

      ds.set_max_render_resolution({1280, 720});

      bool result = decoder_->Configure(ds);

      RTC_CHECK(result) << "Failed to configure decoder";

});

  void Decode(const EncodedImage& encoded_frame,

              absl::optional<VideoFrame> ref_frame = absl::nullopt) {

    int spatial_idx = encoded_frame.SpatialIndex().value_or(

        encoded_frame.SimulcastIndex().value_or(0));

      MutexLock lock(&mutex_);

      RTC_CHECK_EQ(spatial_idx_.value_or(spatial_idx), spatial_idx)

          << "Spatial index changed from " << *spatial_idx_ << " to "

          << spatial_idx;

      spatial_idx_ = spatial_idx;

      if (ref_frame.has_value()) {

        ref_frames_.insert({encoded_frame.RtpTimestamp(), *ref_frame});

    Timestamp pts =

        Timestamp::Micros((encoded_frame.RtpTimestamp() / k90kHz).us());

    task_queue_.PostScheduledTask(

        [this, encoded_frame] {

          analyzer_->StartDecode(encoded_frame);

          int error = decoder_->Decode(encoded_frame, /*render_time_ms*/ 0);

          if (error != 0) {

            RTC_LOG(LS_WARNING)

                << "Decode failed with error code " << error

                << " RTP timestamp " << encoded_frame.RtpTimestamp();

},

        pacer_.Schedule(pts));

    if (ivf_writer_) {

      ivf_writer_->Write(encoded_frame, *codec_type_);

  void Flush() {

    // TODO(webrtc:14852): Add Flush() to VideoDecoder API.

    task_queue_.PostTaskAndWait([this] { decoder_->Release(); });

 private:

  int Decoded(VideoFrame& decoded_frame) override {

    int spatial_idx;

    absl::optional<VideoFrame> ref_frame;

      MutexLock lock(&mutex_);

      spatial_idx = *spatial_idx_;

      if (ref_frames_.size() > 0) {

        auto it = ref_frames_.find(decoded_frame.rtp_timestamp());

        RTC_CHECK(it != ref_frames_.end());

        ref_frame = it->second;

        ref_frames_.erase(ref_frames_.begin(), std::next(it));

    analyzer_->FinishDecode(decoded_frame, spatial_idx, ref_frame);

    if (y4m_writer_) {

      y4m_writer_->Write(decoded_frame, spatial_idx);

    return WEBRTC_VIDEO_CODEC_OK;

  const Environment env_;

  VideoDecoderFactory* decoder_factory_;

  std::unique_ptr<VideoDecoder> decoder_;

  VideoCodecAnalyzer* const analyzer_;

  Pacer pacer_;

  LimitedTaskQueue task_queue_;

  std::unique_ptr<TesterIvfWriter> ivf_writer_;

  std::unique_ptr<TesterY4mWriter> y4m_writer_;

  absl::optional<VideoCodecType> codec_type_;

  absl::optional<int> spatial_idx_ RTC_GUARDED_BY(mutex_);

  std::map<uint32_t, VideoFrame> ref_frames_ RTC_GUARDED_BY(mutex_);

  Mutex mutex_;

};

class Encoder : public EncodedImageCallback {

 public:

  using EncodeCallback =

      absl::AnyInvocable<void(const EncodedImage& encoded_frame)>;

  Encoder(const Environment& env,

          VideoEncoderFactory* encoder_factory,

          const EncoderSettings& encoder_settings,

          VideoCodecAnalyzer* analyzer)

      : env_(env),

        encoder_factory_(encoder_factory),

        analyzer_(analyzer),

        pacer_(encoder_settings.pacing_settings) {

    RTC_CHECK(analyzer_) << "Analyzer must be provided";

    if (encoder_settings.encoder_input_base_path) {

      y4m_writer_ = std::make_unique<TesterY4mWriter>(

          *encoder_settings.encoder_input_base_path);

    if (encoder_settings.encoder_output_base_path) {

      ivf_writer_ = std::make_unique<TesterIvfWriter>(

          *encoder_settings.encoder_output_base_path);

  void Initialize(const EncodingSettings& encoding_settings) {

    encoder_ =

        encoder_factory_->Create(env_, encoding_settings.sdp_video_format);

    RTC_CHECK(encoder_) << "Could not create encoder for video format "

                        << encoding_settings.sdp_video_format.ToString();

    codec_type_ =

        PayloadStringToCodecType(encoding_settings.sdp_video_format.name);

    task_queue_.PostTaskAndWait([this, encoding_settings] {

      encoder_->RegisterEncodeCompleteCallback(this);

      Configure(encoding_settings);

      SetRates(encoding_settings);

});

  void Encode(const VideoFrame& input_frame,

              const EncodingSettings& encoding_settings,

              EncodeCallback callback) {

      MutexLock lock(&mutex_);

      callbacks_[input_frame.rtp_timestamp()] = std::move(callback);

    Timestamp pts =

        Timestamp::Micros((input_frame.rtp_timestamp() / k90kHz).us());

    task_queue_.PostScheduledTask(

        [this, input_frame, encoding_settings] {

          analyzer_->StartEncode(input_frame, encoding_settings);

          if (!last_encoding_settings_ ||

              !IsSameRate(encoding_settings, *last_encoding_settings_)) {

            SetRates(encoding_settings);

          last_encoding_settings_ = encoding_settings;

          std::vector<VideoFrameType> frame_types = {

              encoding_settings.keyframe ? VideoFrameType::kVideoFrameKey

                                         : VideoFrameType::kVideoFrameDelta};

          int error = encoder_->Encode(input_frame, &frame_types);

          if (error != 0) {

            RTC_LOG(LS_WARNING)

                << "Encode failed with error code " << error

                << " RTP timestamp " << input_frame.rtp_timestamp();

},

        pacer_.Schedule(pts));

    if (y4m_writer_) {

      y4m_writer_->Write(input_frame, /*spatial_idx=*/0);

  void Flush() {

    task_queue_.PostTaskAndWait([this] { encoder_->Release(); });

    if (last_superframe_) {

      int num_spatial_layers =

          ScalabilityModeToNumSpatialLayers(last_superframe_->scalability_mode);

      for (int sidx = *last_superframe_->encoded_frame.SpatialIndex() + 1;

           sidx < num_spatial_layers; ++sidx) {

        last_superframe_->encoded_frame.SetSpatialIndex(sidx);

        DeliverEncodedFrame(last_superframe_->encoded_frame);

      last_superframe_.reset();

 private:

  struct Superframe {

    EncodedImage encoded_frame;

    rtc::scoped_refptr<EncodedImageBuffer> encoded_data;

    ScalabilityMode scalability_mode;

};

  Result OnEncodedImage(const EncodedImage& encoded_frame,

                        const CodecSpecificInfo* codec_specific_info) override {

    analyzer_->FinishEncode(encoded_frame);

    if (last_superframe_ && last_superframe_->encoded_frame.RtpTimestamp() !=

                                encoded_frame.RtpTimestamp()) {

      // New temporal unit. We have frame of previous temporal unit (TU) stored

      // which means that the previous TU used spatial prediction. If encoder

      // dropped a frame of layer X in the previous TU, mark the stored frame

      // as a frame belonging to layer >X and deliver it such that decoders of

      // layer >X receive encoded lower layers.

      int num_spatial_layers =

          ScalabilityModeToNumSpatialLayers(last_superframe_->scalability_mode);

      for (int sidx =

               last_superframe_->encoded_frame.SpatialIndex().value_or(0) + 1;

           sidx < num_spatial_layers; ++sidx) {

        last_superframe_->encoded_frame.SetSpatialIndex(sidx);

        DeliverEncodedFrame(last_superframe_->encoded_frame);

      last_superframe_.reset();

    const EncodedImage& superframe =

        MakeSuperFrame(encoded_frame, codec_specific_info);

    DeliverEncodedFrame(superframe);

    return Result(Result::Error::OK);

  void DeliverEncodedFrame(const EncodedImage& encoded_frame) {

      MutexLock lock(&mutex_);

      auto it = callbacks_.find(encoded_frame.RtpTimestamp());

      RTC_CHECK(it != callbacks_.end());

      it->second(encoded_frame);

      callbacks_.erase(callbacks_.begin(), it);

    if (ivf_writer_ != nullptr) {

      ivf_writer_->Write(encoded_frame, codec_type_);

  void Configure(const EncodingSettings& es) {

    const LayerSettings& top_layer_settings =

        es.layers_settings.rbegin()->second;

    const int num_spatial_layers =

        ScalabilityModeToNumSpatialLayers(es.scalability_mode);

    const int num_temporal_layers =

        ScalabilityModeToNumTemporalLayers(es.scalability_mode);

    DataRate total_bitrate = std::accumulate(

        es.layers_settings.begin(), es.layers_settings.end(), DataRate::Zero(),

        [](DataRate acc, const std::pair<const LayerId, LayerSettings> layer) {

          return acc + layer.second.bitrate;

});

    VideoCodec vc;

    vc.width = top_layer_settings.resolution.width;

    vc.height = top_layer_settings.resolution.height;

    vc.startBitrate = total_bitrate.kbps();

    vc.maxBitrate = total_bitrate.kbps();

    vc.minBitrate = 0;

    vc.maxFramerate = top_layer_settings.framerate.hertz<uint32_t>();

    vc.active = true;

    vc.numberOfSimulcastStreams = 0;

    vc.mode = es.content_type;

    vc.SetFrameDropEnabled(es.frame_drop);

    vc.SetScalabilityMode(es.scalability_mode);

    vc.SetVideoEncoderComplexity(VideoCodecComplexity::kComplexityNormal);

    vc.codecType = PayloadStringToCodecType(es.sdp_video_format.name);

    switch (vc.codecType) {

      case kVideoCodecVP8:

        *(vc.VP8()) = VideoEncoder::GetDefaultVp8Settings();

        vc.VP8()->SetNumberOfTemporalLayers(num_temporal_layers);

        vc.SetScalabilityMode(std::vector<ScalabilityMode>{

            ScalabilityMode::kL1T1, ScalabilityMode::kL1T2,

            ScalabilityMode::kL1T3}[num_temporal_layers - 1]);

        vc.qpMax = cricket::kDefaultVideoMaxQpVpx;

        break;

      case kVideoCodecVP9:

        *(vc.VP9()) = VideoEncoder::GetDefaultVp9Settings();

        vc.qpMax = cricket::kDefaultVideoMaxQpVpx;

        break;

      case kVideoCodecAV1:

        vc.qpMax = cricket::kDefaultVideoMaxQpVpx;

        break;

      case kVideoCodecH264:

        *(vc.H264()) = VideoEncoder::GetDefaultH264Settings();

        vc.H264()->SetNumberOfTemporalLayers(num_temporal_layers);

        vc.qpMax = cricket::kDefaultVideoMaxQpH26x;

        break;

      case kVideoCodecH265:

        vc.qpMax = cricket::kDefaultVideoMaxQpH26x;

        break;

      case kVideoCodecGeneric:

        RTC_CHECK_NOTREACHED();

        break;

    bool is_simulcast =

        num_spatial_layers > 1 &&

        (vc.codecType == kVideoCodecVP8 || vc.codecType == kVideoCodecH264 ||

         vc.codecType == kVideoCodecH265);

    if (is_simulcast) {

      vc.numberOfSimulcastStreams = num_spatial_layers;

      for (int sidx = 0; sidx < num_spatial_layers; ++sidx) {

        auto tl0_settings = es.layers_settings.find(

            LayerId{.spatial_idx = sidx, .temporal_idx = 0});

        auto tlx_settings = es.layers_settings.find(LayerId{

            .spatial_idx = sidx, .temporal_idx = num_temporal_layers - 1});

        DataRate total_bitrate = std::accumulate(

            tl0_settings, tlx_settings, DataRate::Zero(),

            [](DataRate acc,

               const std::pair<const LayerId, LayerSettings> layer) {

              return acc + layer.second.bitrate;

});

        SimulcastStream& ss = vc.simulcastStream[sidx];

        ss.width = tl0_settings->second.resolution.width;

        ss.height = tl0_settings->second.resolution.height;

        ss.numberOfTemporalLayers = num_temporal_layers;

        ss.maxBitrate = total_bitrate.kbps();

        ss.targetBitrate = total_bitrate.kbps();

        ss.minBitrate = 0;

        ss.maxFramerate = vc.maxFramerate;

        ss.qpMax = vc.qpMax;

        ss.active = true;

    VideoEncoder::Settings ves(

        VideoEncoder::Capabilities(/*loss_notification=*/false),

        /*number_of_cores=*/1,

        /*max_payload_size=*/1440);

    int result = encoder_->InitEncode(&vc, ves);

    RTC_CHECK(result == WEBRTC_VIDEO_CODEC_OK);

  void SetRates(const EncodingSettings& es) {

    VideoEncoder::RateControlParameters rc;

    int num_spatial_layers =

        ScalabilityModeToNumSpatialLayers(es.scalability_mode);

    int num_temporal_layers =

        ScalabilityModeToNumTemporalLayers(es.scalability_mode);

    for (int sidx = 0; sidx < num_spatial_layers; ++sidx) {

      for (int tidx = 0; tidx < num_temporal_layers; ++tidx) {

        auto layers_settings = es.layers_settings.find(

            {.spatial_idx = sidx, .temporal_idx = tidx});

        RTC_CHECK(layers_settings != es.layers_settings.end())

            << "Bitrate for layer S=" << sidx << " T=" << tidx << " is not set";

        rc.bitrate.SetBitrate(sidx, tidx,

                              layers_settings->second.bitrate.bps());

    rc.framerate_fps =

        es.layers_settings.rbegin()->second.framerate.hertz<double>();

    encoder_->SetRates(rc);

  bool IsSameRate(const EncodingSettings& a, const EncodingSettings& b) const {

    for (auto [layer_id, layer] : a.layers_settings) {

      const auto& other_layer = b.layers_settings.at(layer_id);

      if (layer.bitrate != other_layer.bitrate ||

          layer.framerate != other_layer.framerate) {

        return false;

    return true;

  static bool IsSvc(const EncodedImage& encoded_frame,

                    const CodecSpecificInfo& codec_specific_info) {

    if (!codec_specific_info.scalability_mode) {

      return false;

    ScalabilityMode scalability_mode = *codec_specific_info.scalability_mode;

    return (kFullSvcScalabilityModes.count(scalability_mode) ||

            (kKeySvcScalabilityModes.count(scalability_mode) &&

             encoded_frame.FrameType() == VideoFrameType::kVideoFrameKey));

  const EncodedImage& MakeSuperFrame(

      const EncodedImage& encoded_frame,

      const CodecSpecificInfo* codec_specific_info) {

    if (last_superframe_) {

      // Append to base spatial layer frame(s).

      RTC_CHECK_EQ(*encoded_frame.SpatialIndex(),

                   *last_superframe_->encoded_frame.SpatialIndex() + 1)

          << "Inter-layer frame drops are not supported.";

      size_t current_size = last_superframe_->encoded_data->size();

      last_superframe_->encoded_data->Realloc(current_size +

                                              encoded_frame.size());

      memcpy(last_superframe_->encoded_data->data() + current_size,

             encoded_frame.data(), encoded_frame.size());

      last_superframe_->encoded_frame.SetEncodedData(

          last_superframe_->encoded_data);

      last_superframe_->encoded_frame.SetSpatialIndex(

          encoded_frame.SpatialIndex());

      return last_superframe_->encoded_frame;

    RTC_CHECK(codec_specific_info != nullptr);

    if (IsSvc(encoded_frame, *codec_specific_info)) {

      last_superframe_ = Superframe{

          .encoded_frame = EncodedImage(encoded_frame),

          .encoded_data = EncodedImageBuffer::Create(encoded_frame.data(),

                                                     encoded_frame.size()),

          .scalability_mode = *codec_specific_info->scalability_mode};

      last_superframe_->encoded_frame.SetEncodedData(

          last_superframe_->encoded_data);

      return last_superframe_->encoded_frame;

    return encoded_frame;

  const Environment env_;

  VideoEncoderFactory* const encoder_factory_;

  std::unique_ptr<VideoEncoder> encoder_;

  VideoCodecAnalyzer* const analyzer_;

  Pacer pacer_;

  absl::optional<EncodingSettings> last_encoding_settings_;

  std::unique_ptr<VideoBitrateAllocator> bitrate_allocator_;

  LimitedTaskQueue task_queue_;

  std::unique_ptr<TesterY4mWriter> y4m_writer_;

  std::unique_ptr<TesterIvfWriter> ivf_writer_;

  std::map<uint32_t, int> sidx_ RTC_GUARDED_BY(mutex_);

  std::map<uint32_t, EncodeCallback> callbacks_ RTC_GUARDED_BY(mutex_);

  VideoCodecType codec_type_;

  absl::optional<Superframe> last_superframe_;

  Mutex mutex_;

};

void ConfigureSimulcast(const FieldTrialsView& field_trials, VideoCodec* vc) {

  int num_spatial_layers =

      ScalabilityModeToNumSpatialLayers(*vc->GetScalabilityMode());

  int num_temporal_layers =

      ScalabilityModeToNumTemporalLayers(*vc->GetScalabilityMode());

  if (num_spatial_layers == 1) {

    SimulcastStream* ss = &vc->simulcastStream[0];

    ss->width = vc->width;

    ss->height = vc->height;

    ss->numberOfTemporalLayers = num_temporal_layers;

    ss->maxBitrate = vc->maxBitrate;

    ss->targetBitrate = vc->maxBitrate;

    ss->minBitrate = vc->minBitrate;

    ss->qpMax = vc->qpMax;

    ss->active = true;

    return;

  VideoEncoderConfig encoder_config;

  encoder_config.codec_type = vc->codecType;

  encoder_config.number_of_streams = num_spatial_layers;

  encoder_config.simulcast_layers.resize(num_spatial_layers);

  VideoEncoder::EncoderInfo encoder_info;

  auto stream_factory =

      rtc::make_ref_counted<cricket::EncoderStreamFactory>(encoder_info);

  const std::vector<VideoStream> streams = stream_factory->CreateEncoderStreams(

      field_trials, vc->width, vc->height, encoder_config);

  vc->numberOfSimulcastStreams = streams.size();

  RTC_CHECK_LE(vc->numberOfSimulcastStreams, num_spatial_layers);

  if (vc->numberOfSimulcastStreams < num_spatial_layers) {

    vc->SetScalabilityMode(LimitNumSpatialLayers(*vc->GetScalabilityMode(),

                                                 vc->numberOfSimulcastStreams));

  for (int i = 0; i < vc->numberOfSimulcastStreams; ++i) {

    SimulcastStream* ss = &vc->simulcastStream[i];

    ss->width = streams[i].width;

    ss->height = streams[i].height;

    ss->numberOfTemporalLayers = num_temporal_layers;

    ss->maxBitrate = streams[i].max_bitrate_bps / 1000;

    ss->targetBitrate = streams[i].target_bitrate_bps / 1000;

    ss->minBitrate = streams[i].min_bitrate_bps / 1000;

    ss->qpMax = vc->qpMax;

    ss->active = true;

void SetDefaultCodecSpecificSettings(VideoCodec* vc, int num_temporal_layers) {

  switch (vc->codecType) {

    case kVideoCodecVP8:

      *(vc->VP8()) = VideoEncoder::GetDefaultVp8Settings();

      vc->VP8()->SetNumberOfTemporalLayers(num_temporal_layers);

      break;

    case kVideoCodecVP9: {

      *(vc->VP9()) = VideoEncoder::GetDefaultVp9Settings();

      vc->VP9()->SetNumberOfTemporalLayers(num_temporal_layers);

    } break;

    case kVideoCodecH264: {

      *(vc->H264()) = VideoEncoder::GetDefaultH264Settings();

      vc->H264()->SetNumberOfTemporalLayers(num_temporal_layers);

    } break;

    case kVideoCodecAV1:

    case kVideoCodecH265:

      break;

    case kVideoCodecGeneric:

      RTC_CHECK_NOTREACHED();

std::tuple<std::vector<DataRate>, ScalabilityMode>

SplitBitrateAndUpdateScalabilityMode(const Environment& env,

                                     std::string codec_type,

                                     ScalabilityMode scalability_mode,

                                     int width,

                                     int height,

                                     std::vector<DataRate> layer_bitrate,

                                     Frequency framerate,

                                     VideoCodecMode content_type) {

  int num_spatial_layers = ScalabilityModeToNumSpatialLayers(scalability_mode);

  int num_temporal_layers =

      ScalabilityModeToNumTemporalLayers(scalability_mode);

  int num_bitrates = static_cast<int>(layer_bitrate.size());

  RTC_CHECK(num_bitrates == 1 || num_bitrates == num_spatial_layers ||

            num_bitrates == num_spatial_layers * num_temporal_layers);

  if (num_bitrates == num_spatial_layers * num_temporal_layers) {

    return std::make_tuple(layer_bitrate, scalability_mode);

  DataRate total_bitrate = std::accumulate(

      layer_bitrate.begin(), layer_bitrate.end(), DataRate::Zero());

  VideoCodec vc;

  vc.codecType = PayloadStringToCodecType(codec_type);

  vc.width = width;

  vc.height = height;

  vc.startBitrate = total_bitrate.kbps();

  vc.maxBitrate = total_bitrate.kbps();

  vc.minBitrate = 0;

  vc.maxFramerate = framerate.hertz();

  vc.numberOfSimulcastStreams = 0;

  vc.mode = content_type;

  vc.SetScalabilityMode(scalability_mode);

  SetDefaultCodecSpecificSettings(&vc, num_temporal_layers);

  if (num_bitrates == num_spatial_layers) {

    switch (vc.codecType) {

      case kVideoCodecVP8:

      case kVideoCodecH264:

      case kVideoCodecH265:

        vc.numberOfSimulcastStreams = num_spatial_layers;

        for (int sidx = 0; sidx < num_spatial_layers; ++sidx) {

          SimulcastStream* ss = &vc.simulcastStream[sidx];

          ss->width = width >> (num_spatial_layers - sidx - 1);

          ss->height = height >> (num_spatial_layers - sidx - 1);

          ss->maxFramerate = vc.maxFramerate;

          ss->numberOfTemporalLayers = num_temporal_layers;

          ss->maxBitrate = layer_bitrate[sidx].kbps();

          ss->targetBitrate = layer_bitrate[sidx].kbps();

          ss->minBitrate = 0;

          ss->qpMax = 0;

          ss->active = true;

        break;

      case kVideoCodecVP9:

      case kVideoCodecAV1:

        for (int sidx = num_spatial_layers - 1; sidx >= 0; --sidx) {

          SpatialLayer* ss = &vc.spatialLayers[sidx];

          ss->width = width >> (num_spatial_layers - sidx - 1);

          ss->height = height >> (num_spatial_layers - sidx - 1);

          ss->maxFramerate = vc.maxFramerate;

          ss->numberOfTemporalLayers = num_temporal_layers;

          ss->maxBitrate = layer_bitrate[sidx].kbps();

          ss->targetBitrate = layer_bitrate[sidx].kbps();

          ss->minBitrate = 0;

          ss->qpMax = 0;

          ss->active = true;

        break;

      case kVideoCodecGeneric:

        RTC_CHECK_NOTREACHED();

  } else {

    switch (vc.codecType) {

      case kVideoCodecVP8:

      case kVideoCodecH264:

      case kVideoCodecH265:

        ConfigureSimulcast(env.field_trials(), &vc);

        break;

      case kVideoCodecVP9: {

        const std::vector<SpatialLayer> spatialLayers = GetVp9SvcConfig(vc);

        for (size_t i = 0; i < spatialLayers.size(); ++i) {

          vc.spatialLayers[i] = spatialLayers[i];

          vc.spatialLayers[i].active = true;

      } break;

      case kVideoCodecAV1: {

        bool result =

            SetAv1SvcConfig(vc, num_spatial_layers, num_temporal_layers);

        RTC_CHECK(result) << "SetAv1SvcConfig failed";

      } break;

      case kVideoCodecGeneric:

        RTC_CHECK_NOTREACHED();

    if (*vc.GetScalabilityMode() != scalability_mode) {

      RTC_LOG(LS_WARNING) << "Scalability mode changed from "

                          << ScalabilityModeToString(scalability_mode) << " to "

                          << ScalabilityModeToString(*vc.GetScalabilityMode());

      num_spatial_layers =

          ScalabilityModeToNumSpatialLayers(*vc.GetScalabilityMode());

      num_temporal_layers =

          ScalabilityModeToNumTemporalLayers(*vc.GetScalabilityMode());

  std::unique_ptr<VideoBitrateAllocator> bitrate_allocator =

      CreateBuiltinVideoBitrateAllocatorFactory()->Create(env, vc);

  VideoBitrateAllocation bitrate_allocation =

      bitrate_allocator->Allocate(VideoBitrateAllocationParameters(

          total_bitrate.bps(), framerate.hertz<double>()));

  std::vector<DataRate> bitrates;

  for (int sidx = 0; sidx < num_spatial_layers; ++sidx) {

    for (int tidx = 0; tidx < num_temporal_layers; ++tidx) {

      int bitrate_bps = bitrate_allocation.GetBitrate(sidx, tidx);

      bitrates.push_back(DataRate::BitsPerSec(bitrate_bps));

  return std::make_tuple(bitrates, *vc.GetScalabilityMode());

}  // namespace

void VideoCodecStats::Stream::LogMetrics(

    MetricsLogger* logger,

    std::string test_case_name,

    std::string prefix,

    std::map<std::string, std::string> metadata) const {

  logger->LogMetric(prefix + "width", test_case_name, width, Unit::kCount,

                    ImprovementDirection::kBiggerIsBetter, metadata);

  logger->LogMetric(prefix + "height", test_case_name, height, Unit::kCount,

                    ImprovementDirection::kBiggerIsBetter, metadata);

  logger->LogMetric(prefix + "frame_size_bytes", test_case_name,

                    frame_size_bytes, Unit::kBytes,

                    ImprovementDirection::kNeitherIsBetter, metadata);

  logger->LogMetric(prefix + "keyframe", test_case_name, keyframe, Unit::kCount,

                    ImprovementDirection::kSmallerIsBetter, metadata);

  logger->LogMetric(prefix + "qp", test_case_name, qp, Unit::kUnitless,

                    ImprovementDirection::kSmallerIsBetter, metadata);

  // TODO(webrtc:14852): Change to us or even ns.

  logger->LogMetric(prefix + "encode_time_ms", test_case_name, encode_time_ms,

                    Unit::kMilliseconds, ImprovementDirection::kSmallerIsBetter,

                    metadata);

  logger->LogMetric(prefix + "decode_time_ms", test_case_name, decode_time_ms,

                    Unit::kMilliseconds, ImprovementDirection::kSmallerIsBetter,

                    metadata);

  // TODO(webrtc:14852): Change to kUnitLess. kKilobitsPerSecond are converted

  // to bytes per second in Chromeperf dash.

  logger->LogMetric(prefix + "target_bitrate_kbps", test_case_name,

                    target_bitrate_kbps, Unit::kKilobitsPerSecond,

                    ImprovementDirection::kBiggerIsBetter, metadata);

  logger->LogMetric(prefix + "target_framerate_fps", test_case_name,

                    target_framerate_fps, Unit::kHertz,

                    ImprovementDirection::kBiggerIsBetter, metadata);

  // TODO(webrtc:14852): Change to kUnitLess. kKilobitsPerSecond are converted

  // to bytes per second in Chromeperf dash.

  logger->LogMetric(prefix + "encoded_bitrate_kbps", test_case_name,

                    encoded_bitrate_kbps, Unit::kKilobitsPerSecond,

                    ImprovementDirection::kBiggerIsBetter, metadata);

  logger->LogMetric(prefix + "encoded_framerate_fps", test_case_name,

                    encoded_framerate_fps, Unit::kHertz,

                    ImprovementDirection::kBiggerIsBetter, metadata);

  logger->LogMetric(prefix + "bitrate_mismatch_pct", test_case_name,

                    bitrate_mismatch_pct, Unit::kPercent,

                    ImprovementDirection::kNeitherIsBetter, metadata);

  logger->LogMetric(prefix + "framerate_mismatch_pct", test_case_name,

                    framerate_mismatch_pct, Unit::kPercent,

                    ImprovementDirection::kNeitherIsBetter, metadata);

  logger->LogMetric(prefix + "transmission_time_ms", test_case_name,

                    transmission_time_ms, Unit::kMilliseconds,

                    ImprovementDirection::kSmallerIsBetter, metadata);

  logger->LogMetric(prefix + "psnr_y_db", test_case_name, psnr.y,

                    Unit::kUnitless, ImprovementDirection::kBiggerIsBetter,

                    metadata);

  logger->LogMetric(prefix + "psnr_u_db", test_case_name, psnr.u,

                    Unit::kUnitless, ImprovementDirection::kBiggerIsBetter,

                    metadata);

  logger->LogMetric(prefix + "psnr_v_db", test_case_name, psnr.v,

                    Unit::kUnitless, ImprovementDirection::kBiggerIsBetter,

                    metadata);

EncodingSettings VideoCodecTester::CreateEncodingSettings(

    const Environment& env,

    std::string codec_type,

    std::string scalability_name,

    int width,

    int height,

    std::vector<DataRate> bitrate,

    Frequency framerate,

    bool screencast,

    bool frame_drop) {

  VideoCodecMode content_type = screencast ? VideoCodecMode::kScreensharing

                                           : VideoCodecMode::kRealtimeVideo;

  auto [adjusted_bitrate, scalability_mode] =

      SplitBitrateAndUpdateScalabilityMode(

          env, codec_type, *ScalabilityModeFromString(scalability_name), width,

          height, bitrate, framerate, content_type);

  int num_spatial_layers = ScalabilityModeToNumSpatialLayers(scalability_mode);

  int num_temporal_layers =

      ScalabilityModeToNumTemporalLayers(scalability_mode);

  std::map<LayerId, LayerSettings> layers_settings;

  for (int sidx = 0; sidx < num_spatial_layers; ++sidx) {

    int layer_width = width >> (num_spatial_layers - sidx - 1);

    int layer_height = height >> (num_spatial_layers - sidx - 1);

    for (int tidx = 0; tidx < num_temporal_layers; ++tidx) {

      layers_settings.emplace(

          LayerId{.spatial_idx = sidx, .temporal_idx = tidx},

          LayerSettings{

              .resolution = {.width = layer_width, .height = layer_height},

              .framerate = framerate / (1 << (num_temporal_layers - tidx - 1)),

              .bitrate = adjusted_bitrate[sidx * num_temporal_layers + tidx]});

  SdpVideoFormat sdp_video_format = SdpVideoFormat(codec_type);

  if (codec_type == "H264") {

    const std::string packetization_mode =

        "1";  // H264PacketizationMode::SingleNalUnit

    sdp_video_format.parameters =

        CreateH264Format(H264Profile::kProfileConstrainedBaseline,

                         H264Level::kLevel3_1, packetization_mode,

                         /*add_scalability_modes=*/false)

            .parameters;

  return EncodingSettings{.sdp_video_format = sdp_video_format,

                          .scalability_mode = scalability_mode,

                          .content_type = content_type,

                          .frame_drop = frame_drop,

                          .layers_settings = layers_settings};

std::map<uint32_t, EncodingSettings> VideoCodecTester::CreateFrameSettings(

    const EncodingSettings& encoding_settings,

    int num_frames,

    uint32_t timestamp_rtp) {

  std::map<uint32_t, EncodingSettings> frame_settings;

  Frequency framerate =

      encoding_settings.layers_settings.rbegin()->second.framerate;

  for (int frame_num = 0; frame_num < num_frames; ++frame_num) {

    frame_settings.emplace(timestamp_rtp, encoding_settings);

    timestamp_rtp += k90kHz / framerate;

  return frame_settings;

std::unique_ptr<VideoCodecTester::VideoCodecStats>

VideoCodecTester::RunDecodeTest(const Environment& env,

                                CodedVideoSource* video_source,

                                VideoDecoderFactory* decoder_factory,

                                const DecoderSettings& decoder_settings,

                                const SdpVideoFormat& sdp_video_format) {

  std::unique_ptr<VideoCodecAnalyzer> analyzer =

      std::make_unique<VideoCodecAnalyzer>();

  Decoder decoder(env, decoder_factory, decoder_settings, analyzer.get());

  decoder.Initialize(sdp_video_format);

  while (auto frame = video_source->PullFrame()) {

    decoder.Decode(*frame);

  decoder.Flush();

  analyzer->Flush();

  return std::move(analyzer);

std::unique_ptr<VideoCodecTester::VideoCodecStats>

VideoCodecTester::RunEncodeTest(

    const Environment& env,

    const VideoSourceSettings& source_settings,

    VideoEncoderFactory* encoder_factory,

    const EncoderSettings& encoder_settings,

    const std::map<uint32_t, EncodingSettings>& encoding_settings) {

  VideoSource video_source(source_settings);

  std::unique_ptr<VideoCodecAnalyzer> analyzer =

      std::make_unique<VideoCodecAnalyzer>();

  Encoder encoder(env, encoder_factory, encoder_settings, analyzer.get());

  encoder.Initialize(encoding_settings.begin()->second);

  for (const auto& [timestamp_rtp, frame_settings] : encoding_settings) {

    const EncodingSettings::LayerSettings& top_layer =

        frame_settings.layers_settings.rbegin()->second;

    VideoFrame source_frame = video_source.PullFrame(

        timestamp_rtp, top_layer.resolution, top_layer.framerate);

    encoder.Encode(source_frame, frame_settings,

                   [](const EncodedImage& encoded_frame) {});

  encoder.Flush();

  analyzer->Flush();

  return std::move(analyzer);

std::unique_ptr<VideoCodecTester::VideoCodecStats>

VideoCodecTester::RunEncodeDecodeTest(

    const Environment& env,

    const VideoSourceSettings& source_settings,

    VideoEncoderFactory* encoder_factory,

    VideoDecoderFactory* decoder_factory,

    const EncoderSettings& encoder_settings,

    const DecoderSettings& decoder_settings,

    const std::map<uint32_t, EncodingSettings>& encoding_settings) {

  VideoSource video_source(source_settings);

  std::unique_ptr<VideoCodecAnalyzer> analyzer =

      std::make_unique<VideoCodecAnalyzer>();

  const EncodingSettings& frame_settings = encoding_settings.begin()->second;

  Encoder encoder(env, encoder_factory, encoder_settings, analyzer.get());

  encoder.Initialize(frame_settings);

  int num_spatial_layers =

      ScalabilityModeToNumSpatialLayers(frame_settings.scalability_mode);

  std::vector<std::unique_ptr<Decoder>> decoders;

  for (int sidx = 0; sidx < num_spatial_layers; ++sidx) {

    auto decoder = std::make_unique<Decoder>(env, decoder_factory,

                                             decoder_settings, analyzer.get());

    decoder->Initialize(frame_settings.sdp_video_format);

    decoders.push_back(std::move(decoder));

  for (const auto& [timestamp_rtp, frame_settings] : encoding_settings) {

    const EncodingSettings::LayerSettings& top_layer =

        frame_settings.layers_settings.rbegin()->second;

    VideoFrame source_frame = video_source.PullFrame(

        timestamp_rtp, top_layer.resolution, top_layer.framerate);

    encoder.Encode(source_frame, frame_settings,

                   [&decoders,

                    source_frame](const EncodedImage& encoded_frame) {

                     int sidx = encoded_frame.SpatialIndex().value_or(

                         encoded_frame.SimulcastIndex().value_or(0));

                     decoders.at(sidx)->Decode(encoded_frame, source_frame);

});

  encoder.Flush();

  for (auto& decoder : decoders) {

    decoder->Flush();

  analyzer->Flush();

  return std::move(analyzer);

}  // namespace test

}  // namespace webrtc

Source code

Revision control

Copy as Markdown

Other Tools