nsScriptableUConv.cpp

Enable keyboard shortcuts

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsString.h"

#include "nsIScriptableUConv.h"

#include "nsScriptableUConv.h"

#include "nsComponentManagerUtils.h"

#include <tuple>

using namespace mozilla;

/* Implementation file */

NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter)

nsScriptableUnicodeConverter::nsScriptableUnicodeConverter()

    : mIsInternal(false) {}

nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter() = default;

NS_IMETHODIMP

nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc,

                                                 nsACString& _retval) {

  if (!mEncoder) return NS_ERROR_FAILURE;

  // We can compute the length without replacement, because the

  // the replacement is only one byte long and a mappable character

  // would always output something, i.e. at least one byte.

  // When encoding to ISO-2022-JP, unmappables shouldn't be able

  // to cause more escape sequences to be emitted than the mappable

  // worst case where every input character causes an escape into

  // a different state.

  CheckedInt<size_t> needed =

      mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length());

  if (!needed.isValid() || needed.value() > UINT32_MAX) {

    return NS_ERROR_OUT_OF_MEMORY;

  auto dstChars = _retval.GetMutableData(needed.value(), fallible);

  if (!dstChars) {

    return NS_ERROR_OUT_OF_MEMORY;

  auto src = Span(aSrc);

  auto dst = AsWritableBytes(*dstChars);

  size_t totalWritten = 0;

  for (;;) {

    auto [result, read, written] =

        mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false);

    if (result != kInputEmpty && result != kOutputFull) {

      MOZ_RELEASE_ASSERT(written < dst.Length(),

                         "Unmappables with one-byte replacement should not "

                         "exceed mappable worst case.");

      dst[written++] = '?';

    totalWritten += written;

    if (result == kInputEmpty) {

      MOZ_ASSERT(totalWritten <= UINT32_MAX);

      if (!_retval.SetLength(totalWritten, fallible)) {

        return NS_ERROR_OUT_OF_MEMORY;

      return NS_OK;

    src = src.From(read);

    dst = dst.From(written);

NS_IMETHODIMP

nsScriptableUnicodeConverter::Finish(nsACString& _retval) {

  // The documentation for this method says it should be called after

  // ConvertFromUnicode(). However, our own tests called it after

  // convertFromByteArray(), i.e. when *decoding*.

  // Assuming that there exists extensions that similarly call

  // this at the wrong time, let's deal. In general, it is a design

  // error for this class to handle conversions in both directions.

  if (!mEncoder) {

    _retval.Truncate();

    mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);

    return NS_OK;

  // If we are encoding to ISO-2022-JP, potentially

  // transition back to the ASCII state. The buffer

  // needs to be large enough for an additional NCR,

  // though.

  _retval.SetLength(13);

  auto dst = AsWritableBytes(_retval.GetMutableData(13));

  Span<char16_t> src(nullptr);

  uint32_t result;

  size_t read;

  size_t written;

  std::tie(result, read, written, std::ignore) =

      mEncoder->EncodeFromUTF16(src, dst, true);

  MOZ_ASSERT(!read);

  MOZ_ASSERT(result == kInputEmpty);

  _retval.SetLength(written);

  mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);

  mEncoder->Encoding()->NewEncoderInto(*mEncoder);

  return NS_OK;

NS_IMETHODIMP

nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc,

                                               nsAString& _retval) {

  if (!mDecoder) return NS_ERROR_FAILURE;

  uint32_t length = aSrc.Length();

  CheckedInt<size_t> needed = mDecoder->MaxUTF16BufferLength(length);

  if (!needed.isValid() || needed.value() > UINT32_MAX) {

    return NS_ERROR_OUT_OF_MEMORY;

  auto dst = _retval.GetMutableData(needed.value(), fallible);

  if (!dst) {

    return NS_ERROR_OUT_OF_MEMORY;

  auto src =

      Span(reinterpret_cast<const uint8_t*>(aSrc.BeginReading()), length);

  uint32_t result;

  size_t read;

  size_t written;

  // The UTF-8 decoder used to throw regardless of the error behavior.

  // Simulating the old behavior for compatibility with legacy callers.

  // If callers want control over the behavior, they should switch to

  // TextDecoder.

  if (mDecoder->Encoding() == UTF_8_ENCODING) {

    std::tie(result, read, written) =

        mDecoder->DecodeToUTF16WithoutReplacement(src, *dst, false);

    if (result != kInputEmpty) {

      return NS_ERROR_UDEC_ILLEGALINPUT;

  } else {

    std::tie(result, read, written, std::ignore) =

        mDecoder->DecodeToUTF16(src, *dst, false);

  MOZ_ASSERT(result == kInputEmpty);

  MOZ_ASSERT(read == length);

  MOZ_ASSERT(written <= needed.value());

  if (!_retval.SetLength(written, fallible)) {

    return NS_ERROR_OUT_OF_MEMORY;

  return NS_OK;

NS_IMETHODIMP

nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset) {

  if (!mDecoder) {

    aCharset.Truncate();

  } else {

    mDecoder->Encoding()->Name(aCharset);

  return NS_OK;

NS_IMETHODIMP

nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset) {

  return InitConverter(aCharset);

NS_IMETHODIMP

nsScriptableUnicodeConverter::GetIsInternal(bool* aIsInternal) {

  *aIsInternal = mIsInternal;

  return NS_OK;

NS_IMETHODIMP

nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal) {

  mIsInternal = aIsInternal;

  return NS_OK;

nsresult nsScriptableUnicodeConverter::InitConverter(

    const nsACString& aCharset) {

  mEncoder = nullptr;

  mDecoder = nullptr;

  auto encoding = Encoding::ForLabelNoReplacement(aCharset);

  if (!encoding) {

    return NS_ERROR_UCONV_NOCONV;

  if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) {

    mEncoder = encoding->NewEncoder();

  mDecoder = encoding->NewDecoderWithBOMRemoval();

  return NS_OK;