Source code
Revision control
Copy as Markdown
Other Tools
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
#include "TextDirectiveCreator.h"
#include "AbstractRange.h"
#include "StaticRange.h"
#include "TextDirectiveUtil.h"
#include "mozilla/ErrorResult.h"
#include "mozilla/ResultVariant.h"
#include "nsINode.h"
#include "nsRange.h"
#include "Document.h"
namespace mozilla::dom {
TextDirectiveCreator::TextDirectiveCreator(Document& aDocument,
AbstractRange* aRange)
: mDocument(aDocument), mRange(aRange) {}
/* static */
mozilla::Result<nsCString, ErrorResult>
TextDirectiveCreator::CreateTextDirectiveFromRange(Document& aDocument,
AbstractRange* aInputRange) {
MOZ_ASSERT(aInputRange);
MOZ_ASSERT(!aInputRange->Collapsed());
const nsString rangeContent =
MOZ_TRY(TextDirectiveUtil::RangeContentAsString(aInputRange));
if (rangeContent.IsEmpty()) {
TEXT_FRAGMENT_LOG("Input range does not contain text.");
return VoidCString();
}
const RefPtr<AbstractRange> extendedRange =
MOZ_TRY(ExtendRangeToWordBoundaries(aInputRange));
if (!extendedRange) {
return VoidCString();
}
UniquePtr<TextDirectiveCreator> instance =
MOZ_TRY(CreateInstance(aDocument, extendedRange));
MOZ_TRY(instance->CollectContextTerms());
const bool canContinue = instance->CollectContextTermWordBoundaryDistances();
if (!canContinue) {
return VoidCString();
}
MOZ_TRY(instance->FindAllMatchingCandidates());
return instance->CreateTextDirective();
}
/* static */ Result<bool, ErrorResult>
TextDirectiveCreator::MustUseRangeBasedMatching(AbstractRange* aRange) {
MOZ_ASSERT(aRange);
if (TextDirectiveUtil::FindBlockBoundaryInRange<TextScanDirection::Right>(
*aRange)
.isSome()) {
TEXT_FRAGMENT_LOG(
"Use range-based matching because the target range contains a block "
"boundary.");
return true;
}
const nsString rangeContent =
MOZ_TRY(TextDirectiveUtil::RangeContentAsString(aRange));
const uint32_t kMaxLength = StaticPrefs::
dom_text_fragments_create_text_fragment_exact_match_max_length();
const bool rangeTooLong = rangeContent.Length() > kMaxLength;
if (rangeTooLong) {
TEXT_FRAGMENT_LOG(
"Use range-based matching because the target range is too long "
"({} chars > {} threshold)",
rangeContent.Length(), kMaxLength);
} else {
TEXT_FRAGMENT_LOG("Use exact matching.");
}
return rangeTooLong;
}
Result<UniquePtr<TextDirectiveCreator>, ErrorResult>
TextDirectiveCreator::CreateInstance(Document& aDocument,
AbstractRange* aRange) {
return MOZ_TRY(MustUseRangeBasedMatching(aRange))
? UniquePtr<TextDirectiveCreator>(
new RangeBasedTextDirectiveCreator(aDocument, aRange))
: UniquePtr<TextDirectiveCreator>(
new ExactMatchTextDirectiveCreator(aDocument, aRange));
}
/*static*/
Result<RefPtr<AbstractRange>, ErrorResult>
TextDirectiveCreator::ExtendRangeToWordBoundaries(AbstractRange* aRange) {
MOZ_ASSERT(aRange && !aRange->Collapsed());
TEXT_FRAGMENT_LOG(
"Input range :\n{}",
NS_ConvertUTF16toUTF8(
TextDirectiveUtil::RangeContentAsString(aRange).unwrapOr(
u"<Could not be converted to string>"_ns)));
ErrorResult rv;
RangeBoundary startPoint = TextDirectiveUtil::FindNextNonWhitespacePosition<
TextScanDirection::Right>(aRange->StartRef());
startPoint =
TextDirectiveUtil::FindWordBoundary<TextScanDirection::Left>(startPoint);
RangeBoundary endPoint =
TextDirectiveUtil::FindNextNonWhitespacePosition<TextScanDirection::Left>(
aRange->EndRef());
endPoint =
TextDirectiveUtil::FindWordBoundary<TextScanDirection::Right>(endPoint);
#if MOZ_DIAGNOSTIC_ASSERT_ENABLED
auto cmp = nsContentUtils::ComparePoints(startPoint, endPoint);
MOZ_DIAGNOSTIC_ASSERT(
cmp && *cmp != 1,
"The new end point must not be before the start point.");
#endif
if (startPoint.IsSetAndValid() && endPoint.IsSetAndValid()) {
ErrorResult rv;
RefPtr<AbstractRange> range = StaticRange::Create(startPoint, endPoint, rv);
if (MOZ_UNLIKELY(rv.Failed())) {
return Err(std::move(rv));
}
MOZ_ASSERT(range);
if (!range->Collapsed()) {
TEXT_FRAGMENT_LOG(
"Expanded target range to word boundaries:\n{}",
NS_ConvertUTF16toUTF8(
TextDirectiveUtil::RangeContentAsString(range).unwrapOr(
u"<Could not be converted to string>"_ns)));
return range;
}
}
TEXT_FRAGMENT_LOG("Extending to word boundaries collapsed the range.");
return Result<RefPtr<AbstractRange>, ErrorResult>(nullptr);
}
Result<Ok, ErrorResult> ExactMatchTextDirectiveCreator::CollectContextTerms() {
MOZ_ASSERT(mRange);
if (MOZ_UNLIKELY(mRange->Collapsed())) {
return Ok();
}
TEXT_FRAGMENT_LOG("Collecting context terms for the target range.");
MOZ_TRY(CollectPrefixContextTerm());
MOZ_TRY(CollectSuffixContextTerm());
mStartContent = MOZ_TRY(TextDirectiveUtil::RangeContentAsString(mRange));
mStartFoldCaseContent = mStartContent;
ToFoldedCase(mStartFoldCaseContent);
TEXT_FRAGMENT_LOG("Start term:\n{}", NS_ConvertUTF16toUTF8(mStartContent));
TEXT_FRAGMENT_LOG("No end term present (exact match).");
return Ok();
}
Result<Ok, ErrorResult> RangeBasedTextDirectiveCreator::CollectContextTerms() {
MOZ_ASSERT(mRange);
if (MOZ_UNLIKELY(mRange->Collapsed())) {
return Ok();
}
TEXT_FRAGMENT_LOG("Collecting context terms for the target range.");
MOZ_TRY(CollectPrefixContextTerm());
MOZ_TRY(CollectSuffixContextTerm());
if (const Maybe<RangeBoundary> firstBlockBoundaryInRange =
TextDirectiveUtil::FindBlockBoundaryInRange<TextScanDirection::Right>(
*mRange)) {
TEXT_FRAGMENT_LOG(
"Target range contains a block boundary, collecting start and end "
"terms by considering the closest block boundaries inside the range.");
ErrorResult rv;
RefPtr startRange =
StaticRange::Create(mRange->StartRef(), *firstBlockBoundaryInRange, rv);
if (MOZ_UNLIKELY(rv.Failed())) {
return Err(std::move(rv));
}
MOZ_DIAGNOSTIC_ASSERT(!startRange->Collapsed());
mStartContent =
MOZ_TRY(TextDirectiveUtil::RangeContentAsString(startRange));
const Maybe<RangeBoundary> lastBlockBoundaryInRange =
TextDirectiveUtil::FindBlockBoundaryInRange<TextScanDirection::Left>(
*mRange);
MOZ_DIAGNOSTIC_ASSERT(
lastBlockBoundaryInRange.isSome(),
"If the target range contains a block boundary looking left-to-right, "
"it must also contain one looking right-to-left");
RefPtr endRange =
StaticRange::Create(*lastBlockBoundaryInRange, mRange->EndRef(), rv);
if (MOZ_UNLIKELY(rv.Failed())) {
return Err(std::move(rv));
}
MOZ_DIAGNOSTIC_ASSERT(!endRange->Collapsed());
mEndContent = MOZ_TRY(TextDirectiveUtil::RangeContentAsString(endRange));
} else {
TEXT_FRAGMENT_LOG(
"Target range is too long, collecting start and end by dividing "
"content in the middle.");
mStartContent = MOZ_TRY(TextDirectiveUtil::RangeContentAsString(mRange));
MOZ_DIAGNOSTIC_ASSERT(
mStartContent.Length() >
StaticPrefs::
dom_text_fragments_create_text_fragment_exact_match_max_length());
const auto [wordStart, wordEnd] =
intl::WordBreaker::FindWord(mStartContent, mStartContent.Length() / 2);
mEndContent = Substring(mStartContent, wordEnd);
mStartContent = Substring(mStartContent, 0, wordEnd);
}
mStartFoldCaseContent = mStartContent;
ToFoldedCase(mStartFoldCaseContent);
TEXT_FRAGMENT_LOG("Maximum possible start term:\n{}",
NS_ConvertUTF16toUTF8(mStartContent));
mEndFoldCaseContent = mEndContent;
ToFoldedCase(mEndFoldCaseContent);
TEXT_FRAGMENT_LOG("Maximum possible end term:\n{}",
NS_ConvertUTF16toUTF8(mEndContent));
return Ok();
}
Result<Ok, ErrorResult> TextDirectiveCreator::CollectPrefixContextTerm() {
TEXT_FRAGMENT_LOG("Collecting prefix term for the target range.");
ErrorResult rv;
RangeBoundary prefixEnd =
TextDirectiveUtil::FindNextNonWhitespacePosition<TextScanDirection::Left>(
mRange->StartRef());
RangeBoundary prefixStart =
TextDirectiveUtil::FindNextBlockBoundary<TextScanDirection::Left>(
prefixEnd);
RefPtr prefixRange = StaticRange::Create(prefixStart, prefixEnd, rv);
if (MOZ_UNLIKELY(rv.Failed())) {
return Err(std::move(rv));
}
MOZ_ASSERT(prefixRange);
mPrefixContent =
MOZ_TRY(TextDirectiveUtil::RangeContentAsString(prefixRange));
mPrefixFoldCaseContent = mPrefixContent;
ToFoldedCase(mPrefixFoldCaseContent);
TEXT_FRAGMENT_LOG("Maximum possible prefix term:\n{}",
NS_ConvertUTF16toUTF8(mPrefixContent));
return Ok();
}
Result<Ok, ErrorResult> TextDirectiveCreator::CollectSuffixContextTerm() {
TEXT_FRAGMENT_LOG("Collecting suffix term for the target range.");
ErrorResult rv;
RangeBoundary suffixBegin = TextDirectiveUtil::FindNextNonWhitespacePosition<
TextScanDirection::Right>(mRange->EndRef());
RangeBoundary suffixEnd =
TextDirectiveUtil::FindNextBlockBoundary<TextScanDirection::Right>(
suffixBegin);
RefPtr suffixRange = StaticRange::Create(suffixBegin, suffixEnd, rv);
if (MOZ_UNLIKELY(rv.Failed())) {
return Err(std::move(rv));
}
MOZ_ASSERT(suffixRange);
mSuffixContent =
MOZ_TRY(TextDirectiveUtil::RangeContentAsString(suffixRange));
mSuffixFoldCaseContent = mSuffixContent;
ToFoldedCase(mSuffixFoldCaseContent);
TEXT_FRAGMENT_LOG("Maximum possible suffix term:\n{}",
NS_ConvertUTF16toUTF8(mSuffixContent));
return Ok();
}
bool ExactMatchTextDirectiveCreator::CollectContextTermWordBoundaryDistances() {
mPrefixWordBeginDistances =
TextDirectiveUtil::ComputeWordBoundaryDistances<TextScanDirection::Left>(
mPrefixContent);
TEXT_FRAGMENT_LOG("Word begin distances for prefix term: {}",
mPrefixWordBeginDistances);
mSuffixWordEndDistances =
TextDirectiveUtil::ComputeWordBoundaryDistances<TextScanDirection::Right>(
mSuffixContent);
TEXT_FRAGMENT_LOG("Word end distances for suffix term: {}",
mSuffixWordEndDistances);
return true;
}
bool RangeBasedTextDirectiveCreator::CollectContextTermWordBoundaryDistances() {
mPrefixWordBeginDistances =
TextDirectiveUtil::ComputeWordBoundaryDistances<TextScanDirection::Left>(
mPrefixContent);
TEXT_FRAGMENT_LOG("Word begin distances for prefix term: {}",
mPrefixWordBeginDistances);
mStartWordEndDistances =
TextDirectiveUtil::ComputeWordBoundaryDistances<TextScanDirection::Right>(
mStartContent);
TEXT_FRAGMENT_LOG("Word end distances for start term: {}",
mStartWordEndDistances);
mEndWordBeginDistances =
TextDirectiveUtil::ComputeWordBoundaryDistances<TextScanDirection::Left>(
mEndContent);
TEXT_FRAGMENT_LOG("Word begin distances for end term: {}",
mEndWordBeginDistances);
mSuffixWordEndDistances =
TextDirectiveUtil::ComputeWordBoundaryDistances<TextScanDirection::Right>(
mSuffixContent);
TEXT_FRAGMENT_LOG("Word end distances for suffix term: {}",
mSuffixWordEndDistances);
if (mEndWordBeginDistances.IsEmpty()) {
TEXT_FRAGMENT_LOG(
"No word begin distances for end term. This is likely because the "
"target range's content does not contain word boundaries. In this "
"case, it's not possible to create a text directive using the range "
"based algorithm, however using the exact match algorithm would create "
"a too-long text directive. Therefore, the operation is aborted.");
return false;
}
return true;
}
Result<nsTArray<RefPtr<AbstractRange>>, ErrorResult>
TextDirectiveCreator::FindAllMatchingRanges(const nsString& aSearchQuery,
const RangeBoundary& aSearchStart,
const RangeBoundary& aSearchEnd) {
MOZ_ASSERT(!aSearchQuery.IsEmpty());
RangeBoundary searchStart = aSearchStart;
nsTArray<RefPtr<AbstractRange>> matchingRanges;
while (true) {
if (mWatchdog.IsDone()) {
return matchingRanges;
}
RefPtr<AbstractRange> searchResult = TextDirectiveUtil::FindStringInRange(
searchStart, aSearchEnd, aSearchQuery, true, true, &mNodeIndexCache);
if (!searchResult || searchResult->Collapsed()) {
break;
}
searchStart = searchResult->StartRef();
if (auto cmp = nsContentUtils::ComparePoints(searchStart, aSearchEnd,
&mNodeIndexCache);
!cmp || *cmp != -1) {
// this means hitting a bug in nsFind which apparently does not stop
// exactly where it is told to. There are cases where it might
// overshoot, e.g. if `aSearchEnd` is a text node with offset=0.
// However, due to reusing the cache used by nsFind this additional call
// to ComparePoints should be very cheap.
break;
}
matchingRanges.AppendElement(searchResult);
MOZ_DIAGNOSTIC_ASSERT(searchResult->GetStartContainer()->IsText());
auto newSearchStart =
TextDirectiveUtil::MoveToNextBoundaryPoint(searchStart);
MOZ_DIAGNOSTIC_ASSERT(newSearchStart != searchStart);
searchStart = newSearchStart;
if (auto cmp = nsContentUtils::ComparePoints(searchStart, aSearchEnd,
&mNodeIndexCache);
!cmp || *cmp != -1) {
break;
}
}
TEXT_FRAGMENT_LOG(
"Found {} matches for the input '{}' in the partial document.",
matchingRanges.Length(), NS_ConvertUTF16toUTF8(aSearchQuery));
return matchingRanges;
}
Result<Ok, ErrorResult>
ExactMatchTextDirectiveCreator::FindAllMatchingCandidates() {
if (MOZ_UNLIKELY(mRange->Collapsed())) {
return Ok();
}
TEXT_FRAGMENT_LOG(
"Searching all occurrences of range content ({}) in the partial document "
"from document begin to begin of target range.",
NS_ConvertUTF16toUTF8(mStartContent));
const nsTArray<RefPtr<AbstractRange>> matchRanges =
MOZ_TRY(FindAllMatchingRanges(mStartContent, {&mDocument, 0u},
mRange->StartRef()));
FindCommonSubstringLengths(matchRanges);
return Ok();
}
void ExactMatchTextDirectiveCreator::FindCommonSubstringLengths(
const nsTArray<RefPtr<AbstractRange>>& aMatchRanges) {
if (mWatchdog.IsDone()) {
return;
}
size_t loopCounter = 0;
for (const auto& range : aMatchRanges) {
TEXT_FRAGMENT_LOG("Computing common prefix substring length for match {}.",
++loopCounter);
const uint32_t commonPrefixLength =
TextDirectiveUtil::ComputeCommonSubstringLength<
TextScanDirection::Left>(
mPrefixFoldCaseContent,
TextDirectiveUtil::FindNextNonWhitespacePosition<
TextScanDirection::Left>(range->StartRef()));
TEXT_FRAGMENT_LOG("Computing common suffix substring length for match {}.",
loopCounter);
const uint32_t commonSuffixLength =
TextDirectiveUtil::ComputeCommonSubstringLength<
TextScanDirection::Right>(
mSuffixFoldCaseContent,
TextDirectiveUtil::FindNextNonWhitespacePosition<
TextScanDirection::Right>(range->EndRef()));
mCommonSubstringLengths.EmplaceBack(commonPrefixLength, commonSuffixLength);
}
}
Result<Ok, ErrorResult>
RangeBasedTextDirectiveCreator::FindAllMatchingCandidates() {
MOZ_DIAGNOSTIC_ASSERT(!mStartWordEndDistances.IsEmpty() &&
!mEndWordBeginDistances.IsEmpty());
const nsString firstWordOfStartContent(
Substring(mStartContent, 0, mStartWordEndDistances[0]));
const nsString lastWordOfEndContent(
Substring(mEndContent, mEndContent.Length() - mEndWordBeginDistances[0]));
TEXT_FRAGMENT_LOG(
"Searching all occurrences of first word of start content ({}) in the "
"partial document from document begin to begin of the target range.",
NS_ConvertUTF16toUTF8(firstWordOfStartContent));
const nsTArray<RefPtr<AbstractRange>> startContentRanges =
MOZ_TRY(FindAllMatchingRanges(firstWordOfStartContent, {&mDocument, 0u},
mRange->StartRef()));
FindStartMatchCommonSubstringLengths(startContentRanges);
if (mWatchdog.IsDone()) {
return Ok();
}
TEXT_FRAGMENT_LOG(
"Searching all occurrences of last word of end content ({}) in the "
"partial document from beginning of the target range to the end of the "
"target range, excluding the last word.",
NS_ConvertUTF16toUTF8(lastWordOfEndContent));
auto searchEnd =
TextDirectiveUtil::FindNextNonWhitespacePosition<TextScanDirection::Left>(
mRange->EndRef());
searchEnd =
TextDirectiveUtil::FindWordBoundary<TextScanDirection::Left>(searchEnd);
const nsTArray<RefPtr<AbstractRange>> endContentRanges =
MOZ_TRY(FindAllMatchingRanges(lastWordOfEndContent, mRange->StartRef(),
searchEnd));
FindEndMatchCommonSubstringLengths(endContentRanges);
return Ok();
}
void RangeBasedTextDirectiveCreator::FindStartMatchCommonSubstringLengths(
const nsTArray<RefPtr<AbstractRange>>& aMatchRanges) {
size_t loopCounter = 0;
for (const auto& range : aMatchRanges) {
++loopCounter;
TEXT_FRAGMENT_LOG(
"Computing common prefix substring length for start match {}.",
loopCounter);
const uint32_t commonPrefixLength =
TextDirectiveUtil::ComputeCommonSubstringLength<
TextScanDirection::Left>(
mPrefixFoldCaseContent,
TextDirectiveUtil::FindNextNonWhitespacePosition<
TextScanDirection::Left>(range->StartRef()));
TEXT_FRAGMENT_LOG(
"Computing common start substring length for start match {}.",
loopCounter);
const uint32_t commonStartLength =
TextDirectiveUtil::ComputeCommonSubstringLength<
TextScanDirection::Right>(mStartFoldCaseContent, range->StartRef());
const uint32_t commonStartLengthWithoutFirstWord =
std::max(0, int(commonStartLength - mStartWordEndDistances[0]));
TEXT_FRAGMENT_LOG("Ignoring first word ({}). Remaining common length: {}",
NS_ConvertUTF16toUTF8(Substring(
mStartContent, 0, mStartWordEndDistances[0])),
commonStartLengthWithoutFirstWord);
mStartMatchCommonSubstringLengths.EmplaceBack(
commonPrefixLength, commonStartLengthWithoutFirstWord);
}
}
void RangeBasedTextDirectiveCreator::FindEndMatchCommonSubstringLengths(
const nsTArray<RefPtr<AbstractRange>>& aMatchRanges) {
size_t loopCounter = 0;
for (const auto& range : aMatchRanges) {
++loopCounter;
TEXT_FRAGMENT_LOG("Computing common end substring length for end match {}.",
loopCounter);
const uint32_t commonEndLength =
TextDirectiveUtil::ComputeCommonSubstringLength<
TextScanDirection::Left>(mEndFoldCaseContent, range->EndRef());
const uint32_t commonEndLengthWithoutLastWord =
std::max(0, int(commonEndLength - mEndWordBeginDistances[0]));
TEXT_FRAGMENT_LOG(
"Ignoring last word ({}). Remaining common length: {}",
NS_ConvertUTF16toUTF8(Substring(
mEndContent, mEndContent.Length() - mEndWordBeginDistances[0])),
commonEndLengthWithoutLastWord);
TEXT_FRAGMENT_LOG(
"Computing common suffix substring length for end match {}.",
loopCounter);
const uint32_t commonSuffixLength =
TextDirectiveUtil::ComputeCommonSubstringLength<
TextScanDirection::Right>(
mSuffixFoldCaseContent,
TextDirectiveUtil::FindNextNonWhitespacePosition<
TextScanDirection::Right>(range->EndRef()));
mEndMatchCommonSubstringLengths.EmplaceBack(commonEndLengthWithoutLastWord,
commonSuffixLength);
}
}
Result<nsCString, ErrorResult> TextDirectiveCreator::CreateTextDirective() {
if (mWatchdog.IsDone()) {
TEXT_FRAGMENT_LOG("Hitting timeout.");
return VoidCString();
}
if (mRange->Collapsed()) {
TEXT_FRAGMENT_LOG("Input range collapsed.");
return VoidCString();
}
if (mStartContent.IsEmpty()) {
TEXT_FRAGMENT_LOG("Input range is empty.");
return VoidCString();
}
if (const Maybe<TextDirective> textDirective = FindShortestCombination()) {
nsCString textDirectiveString;
DebugOnly<bool> ret =
create_text_directive(&*textDirective, &textDirectiveString);
MOZ_ASSERT(ret);
TEXT_FRAGMENT_LOG("Created text directive: {}", textDirectiveString);
return textDirectiveString;
}
TEXT_FRAGMENT_LOG(
"It's not possible to create a text directive for the given range.");
return nsCString{};
}
/*static*/ std::tuple<nsTArray<uint32_t>, nsTArray<uint32_t>>
TextDirectiveCreator::ExtendSubstringLengthsToWordBoundaries(
const nsTArray<std::tuple<uint32_t, uint32_t>>& aExactSubstringLengths,
const Span<const uint32_t>& aFirstWordPositions,
const Span<const uint32_t>& aSecondWordPositions) {
const auto getNextWordBoundaryPosition =
[](const Span<const uint32_t>& distances, uint32_t length) {
// Note: This algorithm works for word begins and word ends,
// since the position arrays for properties that go right-to-left
// (prefix, end) are reversed and start from the end of the
// strings.
for (const uint32_t distance : distances) {
if (distance > length) {
return distance;
}
}
return distances.IsEmpty() ? 0 : distances.at(distances.Length() - 1);
};
const auto hashSetToSortedArray = [](const nsTHashSet<uint32_t>& aHashSet) {
AutoTArray<uint32_t, 64> array;
for (auto value : aHashSet) {
array.InsertElementSorted(value);
}
return array;
};
nsTHashSet<uint32_t> firstSet;
nsTHashSet<uint32_t> secondSet;
firstSet.Insert(0);
secondSet.Insert(0);
// This loop is O(n^2) in the worst case, but the number of
// aFirstWordPositions and aSecondWordPositions is small (< 32).
// Also, one of the purposes of this algorithm is to bucket the exact lengths
// (which represent the amount of matches for the target range) into word
// bounded lengths. This means that the number of unique word bounded lengths
// is < 32.
for (const auto& [first, second] : aExactSubstringLengths) {
firstSet.Insert(getNextWordBoundaryPosition(aFirstWordPositions, first));
secondSet.Insert(getNextWordBoundaryPosition(aSecondWordPositions, second));
}
return {hashSetToSortedArray(firstSet), hashSetToSortedArray(secondSet)};
}
/*static*/
Maybe<std::tuple<uint32_t, uint32_t>>
TextDirectiveCreator::CheckAllCombinations(
const nsTArray<std::tuple<uint32_t, uint32_t>>& aExactWordLengths,
const nsTArray<uint32_t>& aFirstExtendedToWordBoundaries,
const nsTArray<uint32_t>& aSecondExtendedToWordBoundaries) {
nsTArray<std::tuple<uint32_t, uint32_t, uint32_t>> sortedCandidates;
sortedCandidates.SetCapacity(aFirstExtendedToWordBoundaries.Length() *
aSecondExtendedToWordBoundaries.Length());
// Create all combinations of the extended values and sort them by their
// cost function value (sum of the two values).
// The cost function value is used to sort the candidates, so that the
// candidates with the lowest cost function value are checked first. Since the
// algorithm searches for the shortest possible combination, it can return as
// soon as it finds a valid combination.
for (const uint32_t firstExtendedToWordBoundary :
aFirstExtendedToWordBoundaries) {
for (const uint32_t secondExtendedToWordBoundary :
aSecondExtendedToWordBoundaries) {
const uint32_t costFunctionValue =
firstExtendedToWordBoundary + secondExtendedToWordBoundary;
sortedCandidates.InsertElementSorted(
std::tuple{firstExtendedToWordBoundary, secondExtendedToWordBoundary,
costFunctionValue},
[](const auto& a, const auto& b) -> int {
return std::get<2>(a) - std::get<2>(b);
});
}
}
for (auto [firstExtendedToWordBoundary, secondExtendedToWordBoundary,
costFunctionValue] : sortedCandidates) {
TEXT_FRAGMENT_LOG("Checking candidate ({},{}). Score: {}",
firstExtendedToWordBoundary, secondExtendedToWordBoundary,
costFunctionValue);
const bool isInvalid = AnyOf(
aExactWordLengths.begin(), aExactWordLengths.end(),
[firstExtended = firstExtendedToWordBoundary,
secondExtended = secondExtendedToWordBoundary](
const std::tuple<uint32_t, uint32_t>& exactWordLengths) {
const auto [firstExact, secondExact] = exactWordLengths;
return firstExtended <= firstExact && secondExtended <= secondExact;
});
if (isInvalid) {
TEXT_FRAGMENT_LOG(
"Current candidate doesn't eliminate all matches. Discarding this "
"candidate.");
continue;
}
TEXT_FRAGMENT_LOG("Current candidate ({},{}) is the best candidate.",
firstExtendedToWordBoundary,
secondExtendedToWordBoundary);
return Some(
std::tuple{firstExtendedToWordBoundary, secondExtendedToWordBoundary});
}
return Nothing{};
}
Maybe<TextDirective> ExactMatchTextDirectiveCreator::FindShortestCombination()
const {
const auto [prefixLengths, suffixLengths] =
TextDirectiveCreator::ExtendSubstringLengthsToWordBoundaries(
mCommonSubstringLengths, mPrefixWordBeginDistances,
mSuffixWordEndDistances);
TEXT_FRAGMENT_LOG("Find shortest combination based on prefix and suffix.");
TEXT_FRAGMENT_LOG("Matches to eliminate: {}, Total combinations: {}",
mCommonSubstringLengths.Length(),
prefixLengths.Length() * suffixLengths.Length());
TEXT_FRAGMENT_LOG("Checking prefix lengths (extended to word boundaries): {}",
prefixLengths);
TEXT_FRAGMENT_LOG("Checking suffix lengths (extended to word boundaries): {}",
suffixLengths);
TEXT_FRAGMENT_LOG("Matches: {}", mCommonSubstringLengths);
return CheckAllCombinations(mCommonSubstringLengths, prefixLengths,
suffixLengths)
.andThen([&](std::tuple<uint32_t, uint32_t> bestMatch) {
const auto [prefixLength, suffixLength] = bestMatch;
TextDirective td;
if (prefixLength) {
td.prefix =
Substring(mPrefixContent, mPrefixContent.Length() - prefixLength);
}
td.start = mStartContent;
if (suffixLength) {
td.suffix = Substring(mSuffixContent, 0, suffixLength);
}
return Some(td);
});
}
Maybe<TextDirective> RangeBasedTextDirectiveCreator::FindShortestCombination()
const {
// For this algorithm, ignore the first word of the start term and the last
// word of the end term (which are required). This allows the optimization
// algorithm to minimize to 0.
auto [prefixLengths, startLengths] = ExtendSubstringLengthsToWordBoundaries(
mStartMatchCommonSubstringLengths, mPrefixWordBeginDistances,
Span(mStartWordEndDistances).From(1));
TEXT_FRAGMENT_LOG(
"Find shortest combination for start match based on prefix and start");
TEXT_FRAGMENT_LOG("Matches to eliminate: {}, Total combinations: {}",
mStartMatchCommonSubstringLengths.Length(),
prefixLengths.Length() * startLengths.Length());
TEXT_FRAGMENT_LOG("Checking prefix lengths (extended to word boundaries): {}",
prefixLengths);
TEXT_FRAGMENT_LOG("Checking start lengths (extended to word boundaries): {}",
startLengths);
TEXT_FRAGMENT_LOG("Matches: {}", mStartMatchCommonSubstringLengths);
const auto bestStartMatch = CheckAllCombinations(
mStartMatchCommonSubstringLengths, prefixLengths, startLengths);
if (MOZ_UNLIKELY(bestStartMatch.isNothing())) {
TEXT_FRAGMENT_LOG(
"Could not find unique start match. It's not possible to create a text "
"directive for the target range.");
return Nothing{};
}
auto [endLengths, suffixLengths] = ExtendSubstringLengthsToWordBoundaries(
mEndMatchCommonSubstringLengths, Span(mEndWordBeginDistances).From(1),
mSuffixWordEndDistances);
TEXT_FRAGMENT_LOG(
"Find shortest combination for end match based on end and suffix");
TEXT_FRAGMENT_LOG("Matches to eliminate: {}, Total combinations: {}",
mEndMatchCommonSubstringLengths.Length(),
endLengths.Length() * suffixLengths.Length());
TEXT_FRAGMENT_LOG("Checking end lengths (extended to word boundaries): {}",
endLengths);
TEXT_FRAGMENT_LOG("Checking suffix lengths (extended to word boundaries): {}",
suffixLengths);
TEXT_FRAGMENT_LOG("Matches: {}", mEndMatchCommonSubstringLengths);
const auto bestEndMatch = CheckAllCombinations(
mEndMatchCommonSubstringLengths, endLengths, suffixLengths);
if (MOZ_UNLIKELY(bestEndMatch.isNothing())) {
TEXT_FRAGMENT_LOG(
"Could not find unique end match. It's not possible to create a text "
"directive for the target range.");
return Nothing{};
}
const auto [prefixLength, startLength] = *bestStartMatch;
const auto [endLength, suffixLength] = *bestEndMatch;
TextDirective td;
if (prefixLength) {
td.prefix =
Substring(mPrefixContent, mPrefixContent.Length() - prefixLength);
}
const uint32_t startLengthIncludingFirstWord =
std::max(mStartWordEndDistances[0], startLength);
TEXT_FRAGMENT_LOG(
"Removeme: start content: {}, start length: {}, calculated: {}",
NS_ConvertUTF16toUTF8(mStartContent), mStartContent.Length(),
startLengthIncludingFirstWord);
MOZ_DIAGNOSTIC_ASSERT(startLengthIncludingFirstWord <=
mStartContent.Length());
td.start = Substring(mStartContent, 0, startLengthIncludingFirstWord);
const uint32_t endLengthIncludingLastWord =
std::max(mEndWordBeginDistances[0], endLength);
MOZ_DIAGNOSTIC_ASSERT(endLengthIncludingLastWord <= mEndContent.Length());
td.end =
Substring(mEndContent, mEndContent.Length() - endLengthIncludingLastWord);
if (suffixLength) {
td.suffix = Substring(mSuffixContent, 0, suffixLength);
}
return Some(td);
}
} // namespace mozilla::dom