quantize_func_test.cc

/*

 * Copyright (c) 2017, Alliance for Open Media. All rights reserved.

 * This source code is subject to the terms of the BSD 2 Clause License and

 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License

 * was not distributed with this source code in the LICENSE file, you can

 * obtain it at www.aomedia.org/license/software. If the Alliance for Open

 * Media Patent License 1.0 was not distributed with this source code in the

 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.

*/

#include <algorithm>

#include <tuple>

#include "gtest/gtest.h"

#include "config/aom_config.h"

#include "config/aom_dsp_rtcd.h"

#include "config/av1_rtcd.h"

#include "aom/aom_codec.h"

#include "aom_dsp/txfm_common.h"

#include "aom_ports/aom_timer.h"

#include "av1/encoder/encoder.h"

#include "av1/common/scan.h"

#include "test/acm_random.h"

#include "test/register_state_check.h"

#include "test/util.h"

namespace {

using libaom_test::ACMRandom;

#define QUAN_PARAM_LIST                                                       \

  const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,    \

      const int16_t *round_ptr, const int16_t *quant_ptr,                     \

      const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,                 \

      tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, \

      const int16_t *scan, const int16_t *iscan

#define LP_QUANTIZE_PARAM_LIST                                             \

  const int16_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr,   \

      const int16_t *quant_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, \

      const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan,  \

      const int16_t *iscan

typedef void (*LPQuantizeFunc)(LP_QUANTIZE_PARAM_LIST);

typedef void (*QuantizeFunc)(QUAN_PARAM_LIST);

typedef void (*QuantizeFuncHbd)(QUAN_PARAM_LIST, int log_scale);

#undef LP_QUANTIZE_PARAM_LIST

#define HBD_QUAN_FUNC                                                      \

  fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \

     qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, log_scale)

#define LBD_QUAN_FUNC                                                      \

  fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \

     qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan)

template <QuantizeFuncHbd fn>

void highbd_quan16x16_wrapper(QUAN_PARAM_LIST) {

  const int log_scale = 0;

  HBD_QUAN_FUNC;

template <QuantizeFuncHbd fn>

void highbd_quan32x32_wrapper(QUAN_PARAM_LIST) {

  const int log_scale = 1;

  HBD_QUAN_FUNC;

template <QuantizeFuncHbd fn>

void highbd_quan64x64_wrapper(QUAN_PARAM_LIST) {

  const int log_scale = 2;

  HBD_QUAN_FUNC;

enum QuantType { TYPE_B, TYPE_DC, TYPE_FP };

using std::tuple;

template <typename FuncType>

using QuantizeParam =

    tuple<FuncType, FuncType, TX_SIZE, QuantType, aom_bit_depth_t>;

typedef struct {

  QUANTS quant;

  Dequants dequant;

} QuanTable;

const int kTestNum = 1000;

#define GET_TEMPLATE_PARAM(k) std::get<k>(this->GetParam())

template <typename CoeffType, typename FuncType>

class QuantizeTestBase

    : public ::testing::TestWithParam<QuantizeParam<FuncType>> {

 protected:

  QuantizeTestBase()

      : quant_ref_(GET_TEMPLATE_PARAM(0)), quant_(GET_TEMPLATE_PARAM(1)),

        tx_size_(GET_TEMPLATE_PARAM(2)), type_(GET_TEMPLATE_PARAM(3)),

        bd_(GET_TEMPLATE_PARAM(4)) {}

  ~QuantizeTestBase() override = default;

  void SetUp() override {

    qtab_ = reinterpret_cast<QuanTable *>(aom_memalign(32, sizeof(*qtab_)));

    ASSERT_NE(qtab_, nullptr);

    const int n_coeffs = coeff_num();

    coeff_ = reinterpret_cast<CoeffType *>(

        aom_memalign(32, 6 * n_coeffs * sizeof(CoeffType)));

    ASSERT_NE(coeff_, nullptr);

    InitQuantizer();

  void TearDown() override {

    aom_free(qtab_);

    qtab_ = nullptr;

    aom_free(coeff_);

    coeff_ = nullptr;

  void InitQuantizer() {

    av1_build_quantizer(bd_, 0, 0, 0, 0, 0, &qtab_->quant, &qtab_->dequant);

  virtual void RunQuantizeFunc(

      const CoeffType *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,

      const int16_t *round_ptr, const int16_t *quant_ptr,

      const int16_t *quant_shift_ptr, CoeffType *qcoeff_ptr,

      CoeffType *qcoeff_ref_ptr, CoeffType *dqcoeff_ptr,

      CoeffType *dqcoeff_ref_ptr, const int16_t *dequant_ptr,

      uint16_t *eob_ref_ptr, uint16_t *eob_ptr, const int16_t *scan,

      const int16_t *iscan) = 0;

  void QuantizeRun(bool is_loop, int q = 0, int test_num = 1) {

    CoeffType *coeff_ptr = coeff_;

    const intptr_t n_coeffs = coeff_num();

    CoeffType *qcoeff_ref = coeff_ptr + n_coeffs;

    CoeffType *dqcoeff_ref = qcoeff_ref + n_coeffs;

    CoeffType *qcoeff = dqcoeff_ref + n_coeffs;

    CoeffType *dqcoeff = qcoeff + n_coeffs;

    uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs);

    // Testing uses 2-D DCT scan order table

    const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT);

    // Testing uses luminance quantization table

    const int16_t *zbin = qtab_->quant.y_zbin[q];

    const int16_t *round = nullptr;

    const int16_t *quant = nullptr;

    if (type_ == TYPE_B) {

      round = qtab_->quant.y_round[q];

      quant = qtab_->quant.y_quant[q];

    } else if (type_ == TYPE_FP) {

      round = qtab_->quant.y_round_fp[q];

      quant = qtab_->quant.y_quant_fp[q];

    const int16_t *quant_shift = qtab_->quant.y_quant_shift[q];

    const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q];

    for (int i = 0; i < test_num; ++i) {

      if (is_loop) FillCoeffRandom();

      memset(qcoeff_ref, 0, 5 * n_coeffs * sizeof(*qcoeff_ref));

      RunQuantizeFunc(coeff_ptr, n_coeffs, zbin, round, quant, quant_shift,

                      qcoeff, qcoeff_ref, dqcoeff, dqcoeff_ref, dequant,

                      &eob[0], &eob[1], sc->scan, sc->iscan);

      for (int j = 0; j < n_coeffs; ++j) {

        ASSERT_EQ(qcoeff_ref[j], qcoeff[j])

            << "Q mismatch on test: " << i << " at position: " << j

            << " Q: " << q << " coeff: " << coeff_ptr[j];

      for (int j = 0; j < n_coeffs; ++j) {

        ASSERT_EQ(dqcoeff_ref[j], dqcoeff[j])

            << "Dq mismatch on test: " << i << " at position: " << j

            << " Q: " << q << " coeff: " << coeff_ptr[j];

      ASSERT_EQ(eob[0], eob[1])

          << "eobs mismatch on test: " << i << " Q: " << q;

  void CompareResults(const CoeffType *buf_ref, const CoeffType *buf, int size,

                      const char *text, int q, int number) {

    int i;

    for (i = 0; i < size; ++i) {

      ASSERT_EQ(buf_ref[i], buf[i]) << text << " mismatch on test: " << number

                                    << " at position: " << i << " Q: " << q;

  int coeff_num() const { return av1_get_max_eob(tx_size_); }

  void FillCoeff(CoeffType c) {

    const int n_coeffs = coeff_num();

    for (int i = 0; i < n_coeffs; ++i) {

      coeff_[i] = c;

  void FillCoeffRandom() {

    const int n_coeffs = coeff_num();

    FillCoeffZero();

    const int num = rnd_.Rand16() % n_coeffs;

    // Randomize the first non zero coeff position.

    const int start = rnd_.Rand16() % n_coeffs;

    const int end = std::min(start + num, n_coeffs);

    for (int i = start; i < end; ++i) {

      coeff_[i] = GetRandomCoeff();

  void FillCoeffRandomRows(int num) {

    FillCoeffZero();

    for (int i = 0; i < num; ++i) {

      coeff_[i] = GetRandomCoeff();

  void FillCoeffZero() { FillCoeff(0); }

  void FillCoeffConstant() {

    CoeffType c = GetRandomCoeff();

    FillCoeff(c);

  void FillDcOnly() {

    FillCoeffZero();

    coeff_[0] = GetRandomCoeff();

  void FillDcLargeNegative() {

    FillCoeffZero();

    // Generate a qcoeff which contains 512/-512 (0x0100/0xFE00) to catch issues

    // like BUG=883 where the constant being compared was incorrectly

    // initialized.

    coeff_[0] = -8191;

  CoeffType GetRandomCoeff() {

    CoeffType coeff;

    if (bd_ == AOM_BITS_8) {

      coeff =

          clamp(static_cast<int16_t>(rnd_.Rand16()), INT16_MIN + 1, INT16_MAX);

    } else {

      CoeffType min = -(1 << (7 + bd_));

      CoeffType max = -min - 1;

      coeff = clamp(static_cast<CoeffType>(rnd_.Rand31()), min, max);

    return coeff;

  ACMRandom rnd_;

  QuanTable *qtab_;

  CoeffType *coeff_;

  FuncType quant_ref_;

  FuncType quant_;

  TX_SIZE tx_size_;

  QuantType type_;

  aom_bit_depth_t bd_;

};

class FullPrecisionQuantizeTest

    : public QuantizeTestBase<tran_low_t, QuantizeFunc> {

  void RunQuantizeFunc(const tran_low_t *coeff_ptr, intptr_t n_coeffs,

                       const int16_t *zbin_ptr, const int16_t *round_ptr,

                       const int16_t *quant_ptr, const int16_t *quant_shift_ptr,

                       tran_low_t *qcoeff_ptr, tran_low_t *qcoeff_ref_ptr,

                       tran_low_t *dqcoeff_ptr, tran_low_t *dqcoeff_ref_ptr,

                       const int16_t *dequant_ptr, uint16_t *eob_ref_ptr,

                       uint16_t *eob_ptr, const int16_t *scan,

                       const int16_t *iscan) override {

    quant_ref_(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,

               quant_shift_ptr, qcoeff_ref_ptr, dqcoeff_ref_ptr, dequant_ptr,

               eob_ref_ptr, scan, iscan);

    API_REGISTER_STATE_CHECK(quant_(

        coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,

        qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan));

};

class LowPrecisionQuantizeTest

    : public QuantizeTestBase<int16_t, LPQuantizeFunc> {

  void RunQuantizeFunc(const int16_t *coeff_ptr, intptr_t n_coeffs,

                       const int16_t * /*zbin_ptr*/, const int16_t *round_ptr,

                       const int16_t *quant_ptr,

                       const int16_t * /*quant_shift_ptr*/, int16_t *qcoeff_ptr,

                       int16_t *qcoeff_ref_ptr, int16_t *dqcoeff_ptr,

                       int16_t *dqcoeff_ref_ptr, const int16_t *dequant_ptr,

                       uint16_t *eob_ref_ptr, uint16_t *eob_ptr,

                       const int16_t *scan, const int16_t *iscan) override {

    quant_ref_(coeff_ptr, n_coeffs, round_ptr, quant_ptr, qcoeff_ref_ptr,

               dqcoeff_ref_ptr, dequant_ptr, eob_ref_ptr, scan, iscan);

    API_REGISTER_STATE_CHECK(quant_(coeff_ptr, n_coeffs, round_ptr, quant_ptr,

                                    qcoeff_ptr, dqcoeff_ptr, dequant_ptr,

                                    eob_ptr, scan, iscan));

};

TEST_P(FullPrecisionQuantizeTest, ZeroInput) {

  FillCoeffZero();

  QuantizeRun(false);

TEST_P(FullPrecisionQuantizeTest, LargeNegativeInput) {

  FillDcLargeNegative();

  QuantizeRun(false, 0, 1);

TEST_P(FullPrecisionQuantizeTest, DcOnlyInput) {

  FillDcOnly();

  QuantizeRun(false, 0, 1);

TEST_P(FullPrecisionQuantizeTest, RandomInput) {

  QuantizeRun(true, 0, kTestNum);

TEST_P(FullPrecisionQuantizeTest, MultipleQ) {

  for (int q = 0; q < QINDEX_RANGE; ++q) {

    QuantizeRun(true, q, kTestNum);

// Force the coeff to be half the value of the dequant.  This exposes a

// mismatch found in av1_quantize_fp_sse2().

TEST_P(FullPrecisionQuantizeTest, CoeffHalfDequant) {

  FillCoeff(16);

  QuantizeRun(false, 25, 1);

TEST_P(FullPrecisionQuantizeTest, DISABLED_Speed) {

  tran_low_t *coeff_ptr = coeff_;

  const intptr_t n_coeffs = coeff_num();

  tran_low_t *qcoeff_ref = coeff_ptr + n_coeffs;

  tran_low_t *dqcoeff_ref = qcoeff_ref + n_coeffs;

  tran_low_t *qcoeff = dqcoeff_ref + n_coeffs;

  tran_low_t *dqcoeff = qcoeff + n_coeffs;

  uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs);

  // Testing uses 2-D DCT scan order table

  const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT);

  // Testing uses luminance quantization table

  const int q = 22;

  const int16_t *zbin = qtab_->quant.y_zbin[q];

  const int16_t *round_fp = qtab_->quant.y_round_fp[q];

  const int16_t *quant_fp = qtab_->quant.y_quant_fp[q];

  const int16_t *quant_shift = qtab_->quant.y_quant_shift[q];

  const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q];

  const int kNumTests = 5000000;

  aom_usec_timer timer, simd_timer;

  int rows = tx_size_high[tx_size_];

  int cols = tx_size_wide[tx_size_];

  rows = AOMMIN(32, rows);

  cols = AOMMIN(32, cols);

  for (int cnt = 0; cnt <= rows; cnt++) {

    FillCoeffRandomRows(cnt * cols);

    aom_usec_timer_start(&timer);

    for (int n = 0; n < kNumTests; ++n) {

      quant_ref_(coeff_ptr, n_coeffs, zbin, round_fp, quant_fp, quant_shift,

                 qcoeff, dqcoeff, dequant, eob, sc->scan, sc->iscan);

    aom_usec_timer_mark(&timer);

    aom_usec_timer_start(&simd_timer);

    for (int n = 0; n < kNumTests; ++n) {

      quant_(coeff_ptr, n_coeffs, zbin, round_fp, quant_fp, quant_shift, qcoeff,

             dqcoeff, dequant, eob, sc->scan, sc->iscan);

    aom_usec_timer_mark(&simd_timer);

    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));

    const int simd_elapsed_time =

        static_cast<int>(aom_usec_timer_elapsed(&simd_timer));

    printf("c_time = %d \t simd_time = %d \t Gain = %f \n", elapsed_time,

           simd_elapsed_time, ((float)elapsed_time / simd_elapsed_time));

// TODO(crbug.com/aomedia/2796)

TEST_P(LowPrecisionQuantizeTest, ZeroInput) {

  FillCoeffZero();

  QuantizeRun(false);

TEST_P(LowPrecisionQuantizeTest, LargeNegativeInput) {

  FillDcLargeNegative();

  QuantizeRun(false, 0, 1);

TEST_P(LowPrecisionQuantizeTest, DcOnlyInput) {

  FillDcOnly();

  QuantizeRun(false, 0, 1);

TEST_P(LowPrecisionQuantizeTest, RandomInput) {

  QuantizeRun(true, 0, kTestNum);

TEST_P(LowPrecisionQuantizeTest, MultipleQ) {

  for (int q = 0; q < QINDEX_RANGE; ++q) {

    QuantizeRun(true, q, kTestNum);

// Force the coeff to be half the value of the dequant.  This exposes a

// mismatch found in av1_quantize_fp_sse2().

TEST_P(LowPrecisionQuantizeTest, CoeffHalfDequant) {

  FillCoeff(16);

  QuantizeRun(false, 25, 1);

TEST_P(LowPrecisionQuantizeTest, DISABLED_Speed) {

  int16_t *coeff_ptr = coeff_;

  const intptr_t n_coeffs = coeff_num();

  int16_t *qcoeff_ref = coeff_ptr + n_coeffs;

  int16_t *dqcoeff_ref = qcoeff_ref + n_coeffs;

  int16_t *qcoeff = dqcoeff_ref + n_coeffs;

  int16_t *dqcoeff = qcoeff + n_coeffs;

  uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs);

  // Testing uses 2-D DCT scan order table

  const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT);

  // Testing uses luminance quantization table

  const int q = 22;

  const int16_t *round_fp = qtab_->quant.y_round_fp[q];

  const int16_t *quant_fp = qtab_->quant.y_quant_fp[q];

  const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q];

  const int kNumTests = 5000000;

  aom_usec_timer timer, simd_timer;

  int rows = tx_size_high[tx_size_];

  int cols = tx_size_wide[tx_size_];

  rows = AOMMIN(32, rows);

  cols = AOMMIN(32, cols);

  for (int cnt = 0; cnt <= rows; cnt++) {

    FillCoeffRandomRows(cnt * cols);

    aom_usec_timer_start(&timer);

    for (int n = 0; n < kNumTests; ++n) {

      quant_ref_(coeff_ptr, n_coeffs, round_fp, quant_fp, qcoeff, dqcoeff,

                 dequant, eob, sc->scan, sc->iscan);

    aom_usec_timer_mark(&timer);

    aom_usec_timer_start(&simd_timer);

    for (int n = 0; n < kNumTests; ++n) {

      quant_(coeff_ptr, n_coeffs, round_fp, quant_fp, qcoeff, dqcoeff, dequant,

             eob, sc->scan, sc->iscan);

    aom_usec_timer_mark(&simd_timer);

    const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));

    const int simd_elapsed_time =

        static_cast<int>(aom_usec_timer_elapsed(&simd_timer));

    printf("c_time = %d \t simd_time = %d \t Gain = %f \n", elapsed_time,

           simd_elapsed_time, ((float)elapsed_time / simd_elapsed_time));

using std::make_tuple;

#if HAVE_AVX2

const QuantizeParam<LPQuantizeFunc> kLPQParamArrayAvx2[] = {

  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_avx2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_avx2,

             static_cast<TX_SIZE>(TX_8X8), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_avx2,

             static_cast<TX_SIZE>(TX_4X4), TYPE_FP, AOM_BITS_8)

};

INSTANTIATE_TEST_SUITE_P(AVX2, LowPrecisionQuantizeTest,

                         ::testing::ValuesIn(kLPQParamArrayAvx2));

const QuantizeParam<QuantizeFunc> kQParamArrayAvx2[] = {

  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2,

             static_cast<TX_SIZE>(TX_4X16), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2,

             static_cast<TX_SIZE>(TX_16X4), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2,

             static_cast<TX_SIZE>(TX_32X8), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2,

             static_cast<TX_SIZE>(TX_8X32), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2,

             static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2,

             static_cast<TX_SIZE>(TX_16X64), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2,

             static_cast<TX_SIZE>(TX_64X16), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_64x64_c, &av1_quantize_fp_64x64_avx2,

             static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8),

#if CONFIG_AV1_HIGHBITDEPTH

  make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,

             &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>,

             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),

  make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,

             &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>,

             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_10),

  make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,

             &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>,

             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_12),

  make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,

             &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>,

             static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8),

  make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,

             &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>,

             static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_10),

  make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,

             &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>,

             static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_12),

  make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,

             &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>,

             static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8),

  make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,

             &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>,

             static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_10),

  make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,

             &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>,

             static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_12),

  make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10),

  make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12),

  make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_avx2,

             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12),

  make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_avx2,

             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12),

#if !CONFIG_REALTIME_ONLY

  make_tuple(&aom_highbd_quantize_b_adaptive_c,

             &aom_highbd_quantize_b_adaptive_avx2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_highbd_quantize_b_adaptive_c,

             &aom_highbd_quantize_b_adaptive_avx2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10),

  make_tuple(&aom_highbd_quantize_b_adaptive_c,

             &aom_highbd_quantize_b_adaptive_avx2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12),

  make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,

             &aom_highbd_quantize_b_32x32_adaptive_avx2,

             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,

             &aom_highbd_quantize_b_32x32_adaptive_avx2,

             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_10),

  make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,

             &aom_highbd_quantize_b_32x32_adaptive_avx2,

             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12),

#endif  // !CONFIG_REALTIME_ONLY

#endif  // CONFIG_AV1_HIGHBITDEPTH

#if !CONFIG_REALTIME_ONLY

  make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_avx2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_avx2,

             static_cast<TX_SIZE>(TX_8X8), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_avx2,

             static_cast<TX_SIZE>(TX_4X4), TYPE_B, AOM_BITS_8),

#endif  // !CONFIG_REALTIME_ONLY

  make_tuple(&aom_quantize_b_c, &aom_quantize_b_avx2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_avx2,

             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_quantize_b_64x64_c, &aom_quantize_b_64x64_avx2,

             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8),

};

INSTANTIATE_TEST_SUITE_P(AVX2, FullPrecisionQuantizeTest,

                         ::testing::ValuesIn(kQParamArrayAvx2));

#endif  // HAVE_AVX2

#if HAVE_SSE2

const QuantizeParam<LPQuantizeFunc> kLPQParamArraySSE2[] = {

  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_sse2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_sse2,

             static_cast<TX_SIZE>(TX_8X8), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_sse2,

             static_cast<TX_SIZE>(TX_4X4), TYPE_FP, AOM_BITS_8)

};

INSTANTIATE_TEST_SUITE_P(SSE2, LowPrecisionQuantizeTest,

                         ::testing::ValuesIn(kLPQParamArraySSE2));

const QuantizeParam<QuantizeFunc> kQParamArraySSE2[] = {

  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2,

             static_cast<TX_SIZE>(TX_4X16), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2,

             static_cast<TX_SIZE>(TX_16X4), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2,

             static_cast<TX_SIZE>(TX_8X32), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2,

             static_cast<TX_SIZE>(TX_32X8), TYPE_FP, AOM_BITS_8),

  make_tuple(&aom_quantize_b_c, &aom_quantize_b_sse2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),

#if CONFIG_AV1_HIGHBITDEPTH

  make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10),

  make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12),

#if !CONFIG_REALTIME_ONLY

  make_tuple(&aom_highbd_quantize_b_adaptive_c,

             &aom_highbd_quantize_b_adaptive_sse2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_highbd_quantize_b_adaptive_c,

             &aom_highbd_quantize_b_adaptive_sse2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10),

  make_tuple(&aom_highbd_quantize_b_adaptive_c,

             &aom_highbd_quantize_b_adaptive_sse2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12),

  make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2,

             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2,

             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_10),

  make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2,

             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12),

  make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,

             &aom_highbd_quantize_b_32x32_adaptive_sse2,

             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,

             &aom_highbd_quantize_b_32x32_adaptive_sse2,

             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_10),

  make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,

             &aom_highbd_quantize_b_32x32_adaptive_sse2,

             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12),

#endif  // !CONFIG_REALTIME_ONLY

  make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2,

             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2,

             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_10),

  make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2,

             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12),

#if !CONFIG_REALTIME_ONLY

  make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c,

             &aom_highbd_quantize_b_64x64_adaptive_sse2,

             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c,

             &aom_highbd_quantize_b_64x64_adaptive_sse2,

             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_10),

  make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c,

             &aom_highbd_quantize_b_64x64_adaptive_sse2,

             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12),

#endif  // !CONFIG_REALTIME_ONLY

#endif  // CONFIG_AV1_HIGHBITDEPTH

#if !CONFIG_REALTIME_ONLY

  make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_sse2,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_sse2,

             static_cast<TX_SIZE>(TX_8X8), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_sse2,

             static_cast<TX_SIZE>(TX_4X4), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_quantize_b_32x32_adaptive_c,

             &aom_quantize_b_32x32_adaptive_sse2,

             static_cast<TX_SIZE>(TX_32X16), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_quantize_b_32x32_adaptive_c,

             &aom_quantize_b_32x32_adaptive_sse2,

             static_cast<TX_SIZE>(TX_16X32), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_quantize_b_32x32_adaptive_c,

             &aom_quantize_b_32x32_adaptive_sse2,

             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_quantize_b_64x64_adaptive_c,

             &aom_quantize_b_64x64_adaptive_sse2,

             static_cast<TX_SIZE>(TX_32X64), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_quantize_b_64x64_adaptive_c,

             &aom_quantize_b_64x64_adaptive_sse2,

             static_cast<TX_SIZE>(TX_64X32), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_quantize_b_64x64_adaptive_c,

             &aom_quantize_b_64x64_adaptive_sse2,

             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8)

#endif  // !CONFIG_REALTIME_ONLY

};

INSTANTIATE_TEST_SUITE_P(SSE2, FullPrecisionQuantizeTest,

                         ::testing::ValuesIn(kQParamArraySSE2));

#endif

#if HAVE_NEON

const QuantizeParam<LPQuantizeFunc> kLPQParamArrayNEON[] = {

  make_tuple(av1_quantize_lp_c, av1_quantize_lp_neon,

             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),

  make_tuple(av1_quantize_lp_c, av1_quantize_lp_neon,

             static_cast<TX_SIZE>(TX_8X8), TYPE_FP, AOM_BITS_8),

  make_tuple(av1_quantize_lp_c, av1_quantize_lp_neon,

             static_cast<TX_SIZE>(TX_4X4), TYPE_FP, AOM_BITS_8)

};

INSTANTIATE_TEST_SUITE_P(NEON, LowPrecisionQuantizeTest,

                         ::testing::ValuesIn(kLPQParamArrayNEON));

const QuantizeParam<QuantizeFunc> kQParamArrayNEON[] = {

  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,

             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,

             static_cast<TX_SIZE>(TX_4X16), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,

             static_cast<TX_SIZE>(TX_16X4), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,

             static_cast<TX_SIZE>(TX_8X32), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon,

             static_cast<TX_SIZE>(TX_32X8), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_neon,

             static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8),

  make_tuple(&av1_quantize_fp_64x64_c, &av1_quantize_fp_64x64_neon,

             static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8),

  make_tuple(&aom_quantize_b_c, &aom_quantize_b_neon,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_neon,

             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),

  make_tuple(&aom_quantize_b_64x64_c, &aom_quantize_b_64x64_neon,

             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8),

#if CONFIG_AV1_HIGHBITDEPTH

  make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>,

             &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_neon>,

             static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_12),

  make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>,

             &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_neon>,

             static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_12),

  make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>,

             &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_neon>,

             static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_12),

  make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_neon,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12),

  make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_neon,

             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12),

  make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_neon,

             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12),

#if !CONFIG_REALTIME_ONLY

  make_tuple(&aom_highbd_quantize_b_adaptive_c,

             &aom_highbd_quantize_b_adaptive_neon,

             static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12),

  make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c,

             &aom_highbd_quantize_b_32x32_adaptive_neon,

             static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12),

  make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c,

             &aom_highbd_quantize_b_64x64_adaptive_neon,

             static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12),

#endif  // !CONFIG_REALTIME_ONLY

#endif  // CONFIG_AV1_HIGHBITDEPTH

};

INSTANTIATE_TEST_SUITE_P(NEON, FullPrecisionQuantizeTest,

                         ::testing::ValuesIn(kQParamArrayNEON));

#endif

#if HAVE_SSSE3 && AOM_ARCH_X86_64

INSTANTIATE_TEST_SUITE_P(

    SSSE3, FullPrecisionQuantizeTest,

    ::testing::Values(

        make_tuple(&aom_quantize_b_c, &aom_quantize_b_ssse3,

                   static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),

        make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_ssse3,

                   static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8),

        make_tuple(&aom_quantize_b_64x64_c, &aom_quantize_b_64x64_ssse3,

                   static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8)));

#endif  // HAVE_SSSE3 && AOM_ARCH_X86_64

#if HAVE_AVX

INSTANTIATE_TEST_SUITE_P(

    AVX, FullPrecisionQuantizeTest,

    ::testing::Values(

        make_tuple(&aom_quantize_b_c, &aom_quantize_b_avx,

                   static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8),

        make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_avx,

                   static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8)));

#endif  // HAVE_AVX

}  // namespace

Source code

Revision control

Copy as Markdown

Other Tools