Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsTextRunTransformations.h"
#include <utility>
#include "GreekCasing.h"
#include "IrishCasing.h"
#include "MathMLTextRunFactory.h"
#include "mozilla/ComputedStyleInlines.h"
#include "mozilla/MemoryReporting.h"
#include "mozilla/StaticPrefs_layout.h"
#include "mozilla/StaticPrefs_mathml.h"
#include "mozilla/TextEditor.h"
#include "mozilla/gfx/2D.h"
#include "nsGkAtoms.h"
#include "nsLineBreaker.h"
#include "nsSpecialCasingData.h"
#include "nsStyleConsts.h"
#include "nsTextFrameUtils.h"
#include "nsUnicharUtils.h"
#include "nsUnicodeProperties.h"
using namespace mozilla;
using namespace mozilla::gfx;
// Unicode characters needing special casing treatment in tr/az languages
#define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130
#define LATIN_SMALL_LETTER_DOTLESS_I 0x0131
// Greek sigma needs custom handling for the lowercase transform; for details
// see bug 740120.
#define GREEK_CAPITAL_LETTER_SIGMA 0x03A3
#define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
#define GREEK_SMALL_LETTER_SIGMA 0x03C3
already_AddRefed<nsTransformedTextRun> nsTransformedTextRun::Create(
const gfxTextRunFactory::Parameters* aParams,
nsTransformingTextRunFactory* aFactory, gfxFontGroup* aFontGroup,
const char16_t* aString, uint32_t aLength,
const gfx::ShapedTextFlags aFlags, const nsTextFrameUtils::Flags aFlags2,
nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
NS_ASSERTION(!(aFlags & gfx::ShapedTextFlags::TEXT_IS_8BIT),
"didn't expect text to be marked as 8-bit here");
void* storage =
AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength);
if (!storage) {
return nullptr;
}
RefPtr<nsTransformedTextRun> result = new (storage)
nsTransformedTextRun(aParams, aFactory, aFontGroup, aString, aLength,
aFlags, aFlags2, std::move(aStyles), aOwnsFactory);
return result.forget();
}
void nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength,
bool* aCapitalization) {
if (mCapitalize.IsEmpty()) {
// XXX(Bug 1631371) Check if this should use a fallible operation as it
// pretended earlier.
mCapitalize.AppendElements(GetLength());
memset(mCapitalize.Elements(), 0, GetLength() * sizeof(bool));
}
memcpy(mCapitalize.Elements() + aStart, aCapitalization,
aLength * sizeof(bool));
mNeedsRebuild = true;
}
bool nsTransformedTextRun::SetPotentialLineBreaks(Range aRange,
const uint8_t* aBreakBefore) {
bool changed = gfxTextRun::SetPotentialLineBreaks(aRange, aBreakBefore);
if (changed) {
mNeedsRebuild = true;
}
return changed;
}
void nsTransformedTextRun::SetEmergencyWrapPositions() {
// This parallels part of what gfxShapedText::SetupClusterBoundaries() does
// for normal textruns.
bool prevWasHyphen = false;
for (uint32_t pos : IntegerRange(mString.Length())) {
const char16_t ch = mString[pos];
if (prevWasHyphen) {
if (nsContentUtils::IsAlphanumeric(ch)) {
mCharacterGlyphs[pos].SetCanBreakBefore(
CompressedGlyph::FLAG_BREAK_TYPE_EMERGENCY_WRAP);
}
prevWasHyphen = false;
}
if (nsContentUtils::IsHyphen(ch) && pos &&
nsContentUtils::IsAlphanumeric(mString[pos - 1])) {
prevWasHyphen = true;
}
}
}
size_t nsTransformedTextRun::SizeOfExcludingThis(
mozilla::MallocSizeOf aMallocSizeOf) {
size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf);
total += mStyles.ShallowSizeOfExcludingThis(aMallocSizeOf);
total += mCapitalize.ShallowSizeOfExcludingThis(aMallocSizeOf);
if (mOwnsFactory) {
total += aMallocSizeOf(mFactory);
}
return total;
}
size_t nsTransformedTextRun::SizeOfIncludingThis(
mozilla::MallocSizeOf aMallocSizeOf) {
return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
}
already_AddRefed<nsTransformedTextRun>
nsTransformingTextRunFactory::MakeTextRun(
const char16_t* aString, uint32_t aLength,
const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
return nsTransformedTextRun::Create(aParams, this, aFontGroup, aString,
aLength, aFlags, aFlags2,
std::move(aStyles), aOwnsFactory);
}
already_AddRefed<nsTransformedTextRun>
nsTransformingTextRunFactory::MakeTextRun(
const uint8_t* aString, uint32_t aLength,
const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
// We'll only have a Unicode code path to minimize the amount of code needed
// for these rarely used features
NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast<const char*>(aString),
aLength);
return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup,
aFlags & ~gfx::ShapedTextFlags::TEXT_IS_8BIT, aFlags2,
std::move(aStyles), aOwnsFactory);
}
void MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc,
const bool* aCharsToMerge,
const bool* aDeletedChars) {
MOZ_ASSERT(!aDest->TrailingGlyphRun(), "unexpected glyphRuns in aDest!");
uint32_t offset = 0;
AutoTArray<gfxTextRun::DetailedGlyph, 2> glyphs;
const gfxTextRun::CompressedGlyph continuationGlyph =
gfxTextRun::CompressedGlyph::MakeComplex(false, false);
const gfxTextRun::CompressedGlyph* srcGlyphs = aSrc->GetCharacterGlyphs();
gfxTextRun::CompressedGlyph* destGlyphs = aDest->GetCharacterGlyphs();
for (gfxTextRun::GlyphRunIterator iter(aSrc, gfxTextRun::Range(aSrc));
!iter.AtEnd(); iter.NextRun()) {
const gfxTextRun::GlyphRun* run = iter.GlyphRun();
aDest->AddGlyphRun(run->mFont, run->mMatchType, offset, false,
run->mOrientation, run->mIsCJK);
bool anyMissing = false;
uint32_t mergeRunStart = iter.StringStart();
// Initialize to a copy of the first source glyph in the merge run.
gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart];
uint32_t stringEnd = iter.StringEnd();
for (uint32_t k = iter.StringStart(); k < stringEnd; ++k) {
const gfxTextRun::CompressedGlyph g = srcGlyphs[k];
if (g.IsSimpleGlyph()) {
if (!anyMissing) {
gfxTextRun::DetailedGlyph details;
details.mGlyphID = g.GetSimpleGlyph();
details.mAdvance = g.GetSimpleAdvance();
glyphs.AppendElement(details);
}
} else {
if (g.IsMissing()) {
anyMissing = true;
glyphs.Clear();
}
if (g.GetGlyphCount() > 0) {
glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount());
}
}
if (k + 1 < iter.StringEnd() && aCharsToMerge[k + 1]) {
// next char is supposed to merge with current, so loop without
// writing current merged glyph to the destination
continue;
}
// If the start of the merge run is actually a character that should
// have been merged with the previous character (this can happen
// if there's a font change in the middle of a case-mapped character,
// that decomposed into a sequence of base+diacritics, for example),
// just discard the entire merge run. See comment at start of this
// function.
NS_WARNING_ASSERTION(
!aCharsToMerge[mergeRunStart],
"unable to merge across a glyph run boundary, glyph(s) discarded");
if (!aCharsToMerge[mergeRunStart]) {
// Determine if we can just copy the existing simple glyph record.
if (mergedGlyph.IsSimpleGlyph() && glyphs.Length() == 1) {
destGlyphs[offset] = mergedGlyph;
} else {
// Otherwise set up complex glyph record and store detailed glyphs.
mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(),
mergedGlyph.IsLigatureGroupStart());
destGlyphs[offset] = mergedGlyph;
aDest->SetDetailedGlyphs(offset, glyphs.Length(), glyphs.Elements());
if (anyMissing) {
destGlyphs[offset].SetMissing();
}
}
offset++;
while (offset < aDest->GetLength() && aDeletedChars[offset]) {
destGlyphs[offset++] = continuationGlyph;
}
}
glyphs.Clear();
anyMissing = false;
mergeRunStart = k + 1;
if (mergeRunStart < stringEnd) {
mergedGlyph = srcGlyphs[mergeRunStart];
}
}
NS_ASSERTION(glyphs.Length() == 0,
"Leftover glyphs, don't request merging of the last character "
"with its next!");
}
NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations");
}
gfxTextRunFactory::Parameters GetParametersForInner(
nsTransformedTextRun* aTextRun, gfx::ShapedTextFlags* aFlags,
DrawTarget* aRefDrawTarget) {
gfxTextRunFactory::Parameters params = {
aRefDrawTarget, nullptr, nullptr,
nullptr, 0, aTextRun->GetAppUnitsPerDevUnit()};
*aFlags = aTextRun->GetFlags();
return params;
}
// Some languages have special casing conventions that differ from the
// default Unicode mappings.
// The enum values here are named for well-known exemplar languages that
// exhibit the behavior in question; multiple lang tags may map to the
// same setting here, if the behavior is shared by other languages.
enum LanguageSpecificCasingBehavior {
eLSCB_None, // default non-lang-specific behavior
eLSCB_Dutch, // treat "ij" digraph as a unit for capitalization
eLSCB_Greek, // strip accent when uppercasing Greek vowels
eLSCB_Irish, // keep prefix letters as lowercase when uppercasing Irish
eLSCB_Turkish, // preserve dotted/dotless-i distinction in uppercase
eLSCB_Lithuanian // retain dot on lowercase i/j when an accent is present
};
static LanguageSpecificCasingBehavior GetCasingFor(const nsAtom* aLang) {
if (!aLang) {
return eLSCB_None;
}
if (aLang == nsGkAtoms::tr || aLang == nsGkAtoms::az ||
aLang == nsGkAtoms::ba || aLang == nsGkAtoms::crh ||
aLang == nsGkAtoms::tt) {
return eLSCB_Turkish;
}
if (aLang == nsGkAtoms::nl) {
return eLSCB_Dutch;
}
if (aLang == nsGkAtoms::el) {
return eLSCB_Greek;
}
if (aLang == nsGkAtoms::ga) {
return eLSCB_Irish;
}
if (aLang == nsGkAtoms::lt_) {
return eLSCB_Lithuanian;
}
// Is there a region subtag we should ignore?
nsAtomString langStr(const_cast<nsAtom*>(aLang));
int index = langStr.FindChar('-');
if (index > 0) {
langStr.Truncate(index);
RefPtr<nsAtom> truncatedLang = NS_Atomize(langStr);
return GetCasingFor(truncatedLang);
}
return eLSCB_None;
}
bool nsCaseTransformTextRunFactory::TransformString(
const nsAString& aString, nsString& aConvertedString,
const Maybe<StyleTextTransform>& aGlobalTransform, char16_t aMaskChar,
bool aCaseTransformsOnly, const nsAtom* aLanguage,
nsTArray<bool>& aCharsToMergeArray, nsTArray<bool>& aDeletedCharsArray,
const nsTransformedTextRun* aTextRun, uint32_t aOffsetInTextRun,
nsTArray<uint8_t>* aCanBreakBeforeArray,
nsTArray<RefPtr<nsTransformedCharStyle>>* aStyleArray) {
bool auxiliaryOutputArrays = aCanBreakBeforeArray && aStyleArray;
MOZ_ASSERT(!auxiliaryOutputArrays || aTextRun,
"text run must be provided to use aux output arrays");
uint32_t length = aString.Length();
const char16_t* str = aString.BeginReading();
// If an unconditional mask character was passed, we'll use it; if not, any
// masking called for by the textrun styles will use TextEditor's mask char.
const char16_t mask = aMaskChar ? aMaskChar : TextEditor::PasswordMask();
bool mergeNeeded = false;
bool capitalizeDutchIJ = false;
bool prevIsLetter = false;
bool ntPrefix = false; // true immediately after a word-initial 'n' or 't'
// when doing Irish lowercasing
bool seenSoftDotted = false; // true immediately after an I or J that is
// converted to lowercase in Lithuanian mode
uint32_t sigmaIndex = uint32_t(-1);
nsUGenCategory cat;
StyleTextTransform style = aGlobalTransform.valueOr(StyleTextTransform::NONE);
bool forceNonFullWidth = false;
const nsAtom* lang = aLanguage;
LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang);
mozilla::GreekCasing::State greekState;
mozilla::IrishCasing::State irishState;
uint32_t irishMark = uint32_t(-1); // location of possible prefix letter(s)
// in the output string
uint32_t irishMarkSrc = uint32_t(-1); // corresponding location in source
// string (may differ from output due
// to expansions like eszet -> 'SS')
uint32_t greekMark = uint32_t(-1); // location of uppercase ETA that may need
// tonos added (if it is disjunctive eta)
const char16_t kGreekUpperEta = 0x0397;
// If we're doing capitalization and don't have a textrun, this is the state
// to be passed to each call to nsLineBreaker::ShouldCapitalize.
bool capitalizeNext = true;
for (uint32_t i = 0; i < length; ++i, ++aOffsetInTextRun) {
uint32_t ch = str[i];
RefPtr<nsTransformedCharStyle> charStyle;
if (aTextRun) {
charStyle = aTextRun->mStyles[aOffsetInTextRun];
style = aGlobalTransform.valueOr(charStyle->mTextTransform);
forceNonFullWidth = charStyle->mForceNonFullWidth;
nsAtom* newLang =
charStyle->mExplicitLanguage ? charStyle->mLanguage.get() : nullptr;
if (lang != newLang) {
lang = newLang;
languageSpecificCasing = GetCasingFor(lang);
greekState.Reset();
irishState.Reset();
irishMark = uint32_t(-1);
irishMarkSrc = uint32_t(-1);
greekMark = uint32_t(-1);
}
}
// These should be mutually exclusive: mMaskPassword is set if we are
// handling <input type=password>, where the TextEditor code controls
// masking and we use its PasswordMask() character, in which case
// aMaskChar (from -webkit-text-security) is not used.
MOZ_ASSERT_IF(aMaskChar, !(charStyle && charStyle->mMaskPassword));
bool maskPassword = (charStyle && charStyle->mMaskPassword) || aMaskChar;
int extraChars = 0;
const unicode::MultiCharMapping* mcm;
bool inhibitBreakBefore = false; // have we just deleted preceding hyphen?
if (i < length - 1 && NS_IS_SURROGATE_PAIR(ch, str[i + 1])) {
ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
}
const uint32_t originalCh = ch;
// Skip case transform if we're masking current character.
if (!maskPassword) {
switch ((style & StyleTextTransform::CASE_TRANSFORMS)._0) {
case StyleTextTransform::NONE._0:
break;
case StyleTextTransform::LOWERCASE._0:
if (languageSpecificCasing == eLSCB_Turkish) {
if (ch == 'I') {
ch = LATIN_SMALL_LETTER_DOTLESS_I;
prevIsLetter = true;
sigmaIndex = uint32_t(-1);
break;
}
if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
ch = 'i';
prevIsLetter = true;
sigmaIndex = uint32_t(-1);
break;
}
}
if (languageSpecificCasing == eLSCB_Lithuanian) {
// clang-format off
/* From SpecialCasing.txt:
* # Introduce an explicit dot above when lowercasing capital I's and J's
* # whenever there are more accents above.
* # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
*
* 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
* 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
* 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
* 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
* 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
* 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
*/
// clang-format on
if (ch == 'I' || ch == 'J' || ch == 0x012E) {
ch = ToLowerCase(ch);
prevIsLetter = true;
seenSoftDotted = true;
sigmaIndex = uint32_t(-1);
break;
}
if (ch == 0x00CC) {
aConvertedString.Append('i');
aConvertedString.Append(0x0307);
extraChars += 2;
ch = 0x0300;
prevIsLetter = true;
seenSoftDotted = false;
sigmaIndex = uint32_t(-1);
break;
}
if (ch == 0x00CD) {
aConvertedString.Append('i');
aConvertedString.Append(0x0307);
extraChars += 2;
ch = 0x0301;
prevIsLetter = true;
seenSoftDotted = false;
sigmaIndex = uint32_t(-1);
break;
}
if (ch == 0x0128) {
aConvertedString.Append('i');
aConvertedString.Append(0x0307);
extraChars += 2;
ch = 0x0303;
prevIsLetter = true;
seenSoftDotted = false;
sigmaIndex = uint32_t(-1);
break;
}
}
cat = unicode::GetGenCategory(ch);
if (languageSpecificCasing == eLSCB_Irish &&
cat == nsUGenCategory::kLetter) {
// See bug 1018805 for Irish lowercasing requirements
if (!prevIsLetter && (ch == 'n' || ch == 't')) {
ntPrefix = true;
} else {
if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) {
aConvertedString.Append('-');
++extraChars;
}
ntPrefix = false;
}
} else {
ntPrefix = false;
}
if (seenSoftDotted && cat == nsUGenCategory::kMark) {
// The seenSoftDotted flag will only be set in Lithuanian mode.
if (ch == 0x0300 || ch == 0x0301 || ch == 0x0303) {
aConvertedString.Append(0x0307);
++extraChars;
}
}
seenSoftDotted = false;
// Special lowercasing behavior for Greek Sigma: note that this is
// listed as context-sensitive in Unicode's SpecialCasing.txt, but is
// *not* a language-specific mapping; it applies regardless of the
// language of the element.
//
// The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA
// (i.e. the non-final form) whenever there is a following letter, or
// when the CAPITAL SIGMA occurs in isolation (neither preceded nor
// followed by a LETTER); and to FINAL SIGMA when it is preceded by
// another letter but not followed by one.
//
// To implement the context-sensitive nature of this mapping, we keep
// track of whether the previous character was a letter. If not,
// CAPITAL SIGMA will map directly to SMALL SIGMA. If the previous
// character was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we
// record the position in the converted string; if we then encounter
// another letter, that FINAL SIGMA is replaced with a standard
// SMALL SIGMA.
// If sigmaIndex is not -1, it marks where we have provisionally
// mapped a CAPITAL SIGMA to FINAL SIGMA; if we now find another
// letter, we need to change it to SMALL SIGMA.
if (sigmaIndex != uint32_t(-1)) {
if (cat == nsUGenCategory::kLetter) {
aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
}
}
if (ch == GREEK_CAPITAL_LETTER_SIGMA) {
// If preceding char was a letter, map to FINAL instead of SMALL,
// and note where it occurred by setting sigmaIndex; we'll change
// it to standard SMALL SIGMA later if another letter follows
if (prevIsLetter) {
ch = GREEK_SMALL_LETTER_FINAL_SIGMA;
sigmaIndex = aConvertedString.Length();
} else {
// CAPITAL SIGMA not preceded by a letter is unconditionally
// mapped to SMALL SIGMA
ch = GREEK_SMALL_LETTER_SIGMA;
sigmaIndex = uint32_t(-1);
}
prevIsLetter = true;
break;
}
// ignore diacritics for the purpose of contextual sigma mapping;
// otherwise, reset prevIsLetter appropriately and clear the
// sigmaIndex marker
if (cat != nsUGenCategory::kMark) {
prevIsLetter = (cat == nsUGenCategory::kLetter);
sigmaIndex = uint32_t(-1);
}
mcm = unicode::SpecialLower(ch);
if (mcm) {
int j = 0;
while (j < 2 && mcm->mMappedChars[j + 1]) {
aConvertedString.Append(mcm->mMappedChars[j]);
++extraChars;
++j;
}
ch = mcm->mMappedChars[j];
break;
}
ch = ToLowerCase(ch);
break;
case StyleTextTransform::UPPERCASE._0:
if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
break;
}
if (languageSpecificCasing == eLSCB_Greek) {
bool markEta;
bool updateEta;
ch = mozilla::GreekCasing::UpperCase(ch, greekState, markEta,
updateEta);
if (markEta) {
greekMark = aConvertedString.Length();
} else if (updateEta) {
// Remove the TONOS from an uppercase ETA-TONOS that turned out
// not to be disjunctive-eta.
MOZ_ASSERT(aConvertedString.Length() > 0 &&
greekMark < aConvertedString.Length(),
"bad greekMark!");
aConvertedString.SetCharAt(kGreekUpperEta, greekMark);
greekMark = uint32_t(-1);
}
break;
}
if (languageSpecificCasing == eLSCB_Lithuanian) {
/*
* # Remove DOT ABOVE after "i" with upper or titlecase
*
* 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
*/
if (ch == 'i' || ch == 'j' || ch == 0x012F) {
seenSoftDotted = true;
ch = ToTitleCase(ch);
break;
}
if (seenSoftDotted) {
seenSoftDotted = false;
if (ch == 0x0307) {
ch = uint32_t(-1);
break;
}
}
}
if (languageSpecificCasing == eLSCB_Irish) {
bool mark;
uint8_t action;
ch = mozilla::IrishCasing::UpperCase(ch, irishState, mark, action);
if (mark) {
irishMark = aConvertedString.Length();
irishMarkSrc = i;
break;
} else if (action) {
nsString& str = aConvertedString; // shorthand
switch (action) {
case 1:
// lowercase a single prefix letter
MOZ_ASSERT(str.Length() > 0 && irishMark < str.Length(),
"bad irishMark!");
str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
irishMark = uint32_t(-1);
irishMarkSrc = uint32_t(-1);
break;
case 2:
// lowercase two prefix letters (immediately before current
// pos)
MOZ_ASSERT(str.Length() >= 2 && irishMark == str.Length() - 2,
"bad irishMark!");
str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
str.SetCharAt(ToLowerCase(str[irishMark + 1]), irishMark + 1);
irishMark = uint32_t(-1);
irishMarkSrc = uint32_t(-1);
break;
case 3:
// lowercase one prefix letter, and delete following hyphen
// (which must be the immediately-preceding char)
MOZ_ASSERT(str.Length() >= 2 && irishMark == str.Length() - 2,
"bad irishMark!");
MOZ_ASSERT(
irishMark != uint32_t(-1) && irishMarkSrc != uint32_t(-1),
"failed to set irishMarks");
str.Replace(irishMark, 2, ToLowerCase(str[irishMark]));
aDeletedCharsArray[irishMarkSrc + 1] = true;
// Remove the trailing entries (corresponding to the deleted
// hyphen) from the auxiliary arrays.
uint32_t len = aCharsToMergeArray.Length();
MOZ_ASSERT(len >= 2);
aCharsToMergeArray.TruncateLength(len - 1);
if (auxiliaryOutputArrays) {
MOZ_ASSERT(aStyleArray->Length() == len);
MOZ_ASSERT(aCanBreakBeforeArray->Length() == len);
aStyleArray->TruncateLength(len - 1);
aCanBreakBeforeArray->TruncateLength(len - 1);
inhibitBreakBefore = true;
}
mergeNeeded = true;
irishMark = uint32_t(-1);
irishMarkSrc = uint32_t(-1);
break;
}
// ch has been set to the uppercase for current char;
// No need to check for SpecialUpper here as none of the
// characters that could trigger an Irish casing action have
// special mappings.
break;
}
// If we didn't have any special action to perform, fall through
// to check for special uppercase (ß)
}
// Updated mapping for German eszett, not currently reflected in the
// Unicode data files. This is behind a pref, as it may not work well
// with many (esp. older) fonts.
if (ch == 0x00DF &&
StaticPrefs::
layout_css_text_transform_uppercase_eszett_enabled()) {
ch = 0x1E9E;
break;
}
mcm = unicode::SpecialUpper(ch);
if (mcm) {
int j = 0;
while (j < 2 && mcm->mMappedChars[j + 1]) {
aConvertedString.Append(mcm->mMappedChars[j]);
++extraChars;
++j;
}
ch = mcm->mMappedChars[j];
break;
}
// Bug 1476304: we exclude Georgian letters U+10D0..10FF because of
// lack of widespread font support for the corresponding Mtavruli
// characters at this time (July 2018).
// This condition is to be removed once the major platforms ship with
// fonts that support U+1C90..1CBF.
if (ch < 0x10D0 || ch > 0x10FF) {
ch = ToUpperCase(ch);
}
break;
case StyleTextTransform::CAPITALIZE._0: {
if (capitalizeDutchIJ && ch == 'j') {
ch = 'J';
capitalizeDutchIJ = false;
break;
}
capitalizeDutchIJ = false;
// If we have a textrun, its mCapitalize array tells us which chars
// are to be capitalized. If not, we track the state locally, and
// assume there's no context to be considered.
bool doCapitalize = false;
if (aTextRun) {
if (aOffsetInTextRun < aTextRun->mCapitalize.Length()) {
doCapitalize = aTextRun->mCapitalize[aOffsetInTextRun];
}
} else {
doCapitalize = nsLineBreaker::ShouldCapitalize(ch, capitalizeNext);
}
if (doCapitalize) {
if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
break;
}
if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') {
ch = 'I';
capitalizeDutchIJ = true;
break;
}
if (languageSpecificCasing == eLSCB_Lithuanian) {
/*
* # Remove DOT ABOVE after "i" with upper or titlecase
*
* 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
*/
if (ch == 'i' || ch == 'j' || ch == 0x012F) {
seenSoftDotted = true;
ch = ToTitleCase(ch);
break;
}
if (seenSoftDotted) {
seenSoftDotted = false;
if (ch == 0x0307) {
ch = uint32_t(-1);
break;
}
}
}
mcm = unicode::SpecialTitle(ch);
if (mcm) {
int j = 0;
while (j < 2 && mcm->mMappedChars[j + 1]) {
aConvertedString.Append(mcm->mMappedChars[j]);
++extraChars;
++j;
}
ch = mcm->mMappedChars[j];
break;
}
ch = ToTitleCase(ch);
}
break;
}
case StyleTextTransform::MATH_AUTO._0:
// text-transform: math-auto is used for automatic italicization of
// single-char <mi> elements. However, some legacy cases (italic style
// fallback and <mi> with leading/trailing whitespace) are still
// handled in MathMLTextRunFactory.
if (length == 1) {
uint32_t ch2 =
MathMLTextRunFactory::MathVariant(ch, StyleMathVariant::Italic);
if (StaticPrefs::mathml_mathvariant_styling_fallback_disabled()) {
ch = ch2;
} else if (ch2 != ch) {
// Bug 930504. Some platforms do not have fonts for Mathematical
// Alphanumeric Symbols. Hence we only perform the transform if a
// character is actually available.
auto* fontGroup = aTextRun->GetFontGroup();
fontGroup->EnsureFontList();
FontMatchType matchType;
RefPtr<gfxFont> mathFont = fontGroup->FindFontForChar(
ch2, 0, 0, intl::Script::COMMON, nullptr, &matchType);
if (mathFont) {
ch = ch2;
}
}
}
break;
default:
MOZ_ASSERT_UNREACHABLE("all cases should be handled");
break;
}
if (!aCaseTransformsOnly) {
if (!forceNonFullWidth && (style & StyleTextTransform::FULL_WIDTH)) {
ch = unicode::GetFullWidth(ch);
}
if (style & StyleTextTransform::FULL_SIZE_KANA) {
// clang-format off
static const uint32_t kSmallKanas[] = {
// ぁ ぃ ぅ ぇ ぉ っ ゃ ゅ ょ
0x3041, 0x3043, 0x3045, 0x3047, 0x3049, 0x3063, 0x3083, 0x3085, 0x3087,
// ゎ ゕ ゖ
0x308E, 0x3095, 0x3096,
// ァ ィ ゥ ェ ォ ッ ャ ュ ョ
0x30A1, 0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30C3, 0x30E3, 0x30E5, 0x30E7,
// ヮ ヵ ヶ ㇰ ㇱ ㇲ ㇳ ㇴ ㇵ
0x30EE, 0x30F5, 0x30F6, 0x31F0, 0x31F1, 0x31F2, 0x31F3, 0x31F4, 0x31F5,
// ㇶ ㇷ ㇸ ㇹ ㇺ ㇻ ㇼ ㇽ ㇾ
0x31F6, 0x31F7, 0x31F8, 0x31F9, 0x31FA, 0x31FB, 0x31FC, 0x31FD, 0x31FE,
// ㇿ
0x31FF,
// ァ ィ ゥ ェ ォ ャ ュ ョ ッ
0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F,
// 𛄲 𛅐 𛅑 𛅒 𛅕 𛅤 𛅥 𛅦
0x1B132, 0x1B150, 0x1B151, 0x1B152, 0x1B155, 0x1B164, 0x1B165, 0x1B166,
// 𛅧
0x1B167};
static const uint16_t kFullSizeKanas[] = {
// あ い う え お つ や ゆ よ
0x3042, 0x3044, 0x3046, 0x3048, 0x304A, 0x3064, 0x3084, 0x3086, 0x3088,
// わ か け
0x308F, 0x304B, 0x3051,
// ア イ ウ エ オ ツ ヤ ユ ヨ
0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30C4, 0x30E4, 0x30E6, 0x30E8,
// ワ カ ケ ク シ ス ト ヌ ハ
0x30EF, 0x30AB, 0x30B1, 0x30AF, 0x30B7, 0x30B9, 0x30C8, 0x30CC, 0x30CF,
// ヒ フ ヘ ホ ム ラ リ ル レ
0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30E0, 0x30E9, 0x30EA, 0x30EB, 0x30EC,
// ロ
0x30ED,
// ア イ ウ エ オ ヤ ユ ヨ ツ
0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF94, 0xFF95, 0xFF96, 0xFF82,
// こ ゐ ゑ を コ ヰ ヱ ヲ ン
0x3053, 0x3090, 0x3091, 0x3092, 0x30B3, 0x30F0, 0x30F1, 0x30F2, 0x30F3};
// clang-format on
size_t index;
const uint16_t len = std::size(kSmallKanas);
if (mozilla::BinarySearch(kSmallKanas, 0, len, ch, &index)) {
ch = kFullSizeKanas[index];
}
}
}
if (forceNonFullWidth) {
ch = unicode::GetFullWidthInverse(ch);
}
}
if (ch == uint32_t(-1)) {
aDeletedCharsArray.AppendElement(true);
mergeNeeded = true;
} else {
aDeletedCharsArray.AppendElement(false);
aCharsToMergeArray.AppendElement(false);
if (auxiliaryOutputArrays) {
aStyleArray->AppendElement(charStyle);
aCanBreakBeforeArray->AppendElement(
inhibitBreakBefore
? gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE
: aTextRun->CanBreakBefore(aOffsetInTextRun));
}
if (IS_IN_BMP(ch)) {
aConvertedString.Append(maskPassword ? mask : ch);
} else {
if (maskPassword) {
aConvertedString.Append(mask);
// TODO: We should show a password mask for a surrogate pair later.
aConvertedString.Append(mask);
} else {
aConvertedString.Append(H_SURROGATE(ch));
aConvertedString.Append(L_SURROGATE(ch));
}
++extraChars;
}
if (!IS_IN_BMP(originalCh)) {
// Skip the trailing surrogate.
++aOffsetInTextRun;
++i;
aDeletedCharsArray.AppendElement(true);
}
while (extraChars-- > 0) {
mergeNeeded = true;
aCharsToMergeArray.AppendElement(true);
if (auxiliaryOutputArrays) {
aStyleArray->AppendElement(charStyle);
aCanBreakBeforeArray->AppendElement(
gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE);
}
}
}
}
// These output arrays, if present, must always have matching lengths:
if (auxiliaryOutputArrays) {
DebugOnly<uint32_t> len = aCharsToMergeArray.Length();
MOZ_ASSERT(aStyleArray->Length() == len);
MOZ_ASSERT(aCanBreakBeforeArray->Length() == len);
}
return mergeNeeded;
}
void nsCaseTransformTextRunFactory::RebuildTextRun(
nsTransformedTextRun* aTextRun, DrawTarget* aRefDrawTarget,
gfxMissingFontRecorder* aMFR) {
nsAutoString convertedString;
AutoTArray<bool, 50> charsToMergeArray;
AutoTArray<bool, 50> deletedCharsArray;
AutoTArray<uint8_t, 50> canBreakBeforeArray;
AutoTArray<RefPtr<nsTransformedCharStyle>, 50> styleArray;
auto globalTransform =
mAllUppercase ? Some(StyleTextTransform::UPPERCASE) : Nothing();
bool mergeNeeded = TransformString(
aTextRun->mString, convertedString, globalTransform, mMaskChar,
/* aCaseTransformsOnly = */ false, nullptr, charsToMergeArray,
deletedCharsArray, aTextRun, 0, &canBreakBeforeArray, &styleArray);
gfx::ShapedTextFlags flags;
gfxTextRunFactory::Parameters innerParams =
GetParametersForInner(aTextRun, &flags, aRefDrawTarget);
gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
RefPtr<nsTransformedTextRun> transformedChild;
RefPtr<gfxTextRun> cachedChild;
gfxTextRun* child;
if (mInnerTransformingTextRunFactory) {
transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
convertedString.BeginReading(), convertedString.Length(), &innerParams,
fontGroup, flags, nsTextFrameUtils::Flags(), std::move(styleArray),
false);
child = transformedChild.get();
} else {
cachedChild = fontGroup->MakeTextRun(
convertedString.BeginReading(), convertedString.Length(), &innerParams,
flags, nsTextFrameUtils::Flags(), aMFR);
child = cachedChild.get();
}
if (!child) {
return;
}
// Copy potential linebreaks into child so they're preserved
// (and also child will be shaped appropriately)
NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
"Dropped characters or break-before values somewhere!");
gfxTextRun::Range range(0, uint32_t(canBreakBeforeArray.Length()));
child->SetPotentialLineBreaks(range, canBreakBeforeArray.Elements());
if (transformedChild) {
transformedChild->FinishSettingProperties(aRefDrawTarget, aMFR);
}
aTextRun->ResetGlyphRuns();
if (mergeNeeded) {
// Now merge multiple characters into one multi-glyph character as required
// and deal with skipping deleted accent chars
NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),
"source length mismatch");
NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),
"destination length mismatch");
MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),
deletedCharsArray.Elements());
} else {
// No merging to do, so just copy; this produces a more optimized textrun.
// We can't steal the data because the child may be cached and stealing
// the data would break the cache.
aTextRun->CopyGlyphDataFrom(child, gfxTextRun::Range(child), 0);
}
}