Source code

Revision control

Copy as Markdown

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "ICU4CGlue.h"
#include "NumberFormatFields.h"
#include "ScopedICUObject.h"
#include "mozilla/FloatingPoint.h"
#include "unicode/uformattedvalue.h"
#include "unicode/unum.h"
#include "unicode/unumberformatter.h"
namespace mozilla::intl {
bool NumberFormatFields::append(NumberPartType type, int32_t begin,
int32_t end) {
MOZ_ASSERT(begin >= 0);
MOZ_ASSERT(end >= 0);
MOZ_ASSERT(begin < end, "erm, aren't fields always non-empty?");
return fields_.emplaceBack(uint32_t(begin), uint32_t(end), type);
}
bool NumberFormatFields::toPartsVector(size_t overallLength,
const NumberPartSourceMap& sourceMap,
NumberPartVector& parts) {
std::sort(fields_.begin(), fields_.end(),
[](const NumberFormatField& left, const NumberFormatField& right) {
// Sort first by begin index, then to place
// enclosing fields before nested fields.
return left.begin < right.begin ||
(left.begin == right.begin && left.end > right.end);
});
// Then iterate over the sorted field list to generate a sequence of parts
// (what ECMA-402 actually exposes). A part is a maximal character sequence
// entirely within no field or a single most-nested field.
//
// Diagrams may be helpful to illustrate how fields map to parts. Consider
// formatting -19,766,580,028,249.41, the US national surplus (negative
// because it's actually a debt) on October 18, 2016.
//
// var options =
// { style: "currency", currency: "USD", currencyDisplay: "name" };
// var usdFormatter = new Intl.NumberFormat("en-US", options);
// usdFormatter.format(-19766580028249.41);
//
// The formatted result is "-19,766,580,028,249.41 US dollars". ICU
// identifies these fields in the string:
//
// UNUM_GROUPING_SEPARATOR_FIELD
// |
// UNUM_SIGN_FIELD | UNUM_DECIMAL_SEPARATOR_FIELD
// | __________/| |
// | / | | | |
// "-19,766,580,028,249.41 US dollars"
// \________________/ |/ \_______/
// | | |
// UNUM_INTEGER_FIELD | UNUM_CURRENCY_FIELD
// |
// UNUM_FRACTION_FIELD
//
// These fields map to parts as follows:
//
// integer decimal
// _____|________ |
// / /| |\ |\ |\ | literal
// /| / | | \ | \ | \| |
// "-19,766,580,028,249.41 US dollars"
// | \___|___|___/ |/ \________/
// | | | |
// | group | currency
// | |
// minusSign fraction
//
// The sign is a part. Each comma is a part, splitting the integer field
// into parts for trillions/billions/&c. digits. The decimal point is a
// part. Cents are a part. The space between cents and currency is a part
// (outside any field). Last, the currency field is a part.
class PartGenerator {
// The fields in order from start to end, then least to most nested.
const FieldsVector& fields;
// Index of the current field, in |fields|, being considered to
// determine part boundaries. |lastEnd <= fields[index].begin| is an
// invariant.
size_t index = 0;
// The end index of the last part produced, always less than or equal
// to |limit|, strictly increasing.
uint32_t lastEnd = 0;
// The length of the overall formatted string.
const uint32_t limit = 0;
NumberPartSourceMap sourceMap;
Vector<size_t, 4> enclosingFields;
void popEnclosingFieldsEndingAt(uint32_t end) {
MOZ_ASSERT_IF(enclosingFields.length() > 0,
fields[enclosingFields.back()].end >= end);
while (enclosingFields.length() > 0 &&
fields[enclosingFields.back()].end == end) {
enclosingFields.popBack();
}
}
bool nextPartInternal(NumberPart* part) {
size_t len = fields.length();
MOZ_ASSERT(index <= len);
// If we're out of fields, all that remains are part(s) consisting
// of trailing portions of enclosing fields, and maybe a final
// literal part.
if (index == len) {
if (enclosingFields.length() > 0) {
const auto& enclosing = fields[enclosingFields.popCopy()];
*part = {enclosing.type, sourceMap.source(enclosing), enclosing.end};
// If additional enclosing fields end where this part ends,
// pop them as well.
popEnclosingFieldsEndingAt(part->endIndex);
} else {
*part = {NumberPartType::Literal, sourceMap.source(limit), limit};
}
return true;
}
// Otherwise we still have a field to process.
const NumberFormatField* current = &fields[index];
MOZ_ASSERT(lastEnd <= current->begin);
MOZ_ASSERT(current->begin < current->end);
// But first, deal with inter-field space.
if (lastEnd < current->begin) {
if (enclosingFields.length() > 0) {
// Space between fields, within an enclosing field, is part
// of that enclosing field, until the start of the current
// field or the end of the enclosing field, whichever is
// earlier.
const auto& enclosing = fields[enclosingFields.back()];
*part = {enclosing.type, sourceMap.source(enclosing),
std::min(enclosing.end, current->begin)};
popEnclosingFieldsEndingAt(part->endIndex);
} else {
// If there's no enclosing field, the space is a literal.
*part = {NumberPartType::Literal, sourceMap.source(current->begin),
current->begin};
}
return true;
}
// Otherwise, the part spans a prefix of the current field. Find
// the most-nested field containing that prefix.
const NumberFormatField* next;
do {
current = &fields[index];
// If the current field is last, the part extends to its end.
if (++index == len) {
*part = {current->type, sourceMap.source(*current), current->end};
return true;
}
next = &fields[index];
MOZ_ASSERT(current->begin <= next->begin);
MOZ_ASSERT(current->begin < next->end);
// If the next field nests within the current field, push an
// enclosing field. (If there are no nested fields, don't
// bother pushing a field that'd be immediately popped.)
if (current->end > next->begin) {
if (!enclosingFields.append(index - 1)) {
return false;
}
}
// Do so until the next field begins after this one.
} while (current->begin == next->begin);
if (current->end <= next->begin) {
// The next field begins after the current field ends. Therefore
// the current part ends at the end of the current field.
*part = {current->type, sourceMap.source(*current), current->end};
popEnclosingFieldsEndingAt(part->endIndex);
} else {
// The current field encloses the next one. The current part
// ends where the next field/part will start.
*part = {current->type, sourceMap.source(*current), next->begin};
}
return true;
}
public:
PartGenerator(const FieldsVector& vec, uint32_t limit,
const NumberPartSourceMap& sourceMap)
: fields(vec), limit(limit), sourceMap(sourceMap) {}
bool nextPart(bool* hasPart, NumberPart* part) {
// There are no parts left if we've partitioned the entire string.
if (lastEnd == limit) {
MOZ_ASSERT(enclosingFields.length() == 0);
*hasPart = false;
return true;
}
if (!nextPartInternal(part)) {
return false;
}
*hasPart = true;
lastEnd = part->endIndex;
return true;
}
};
// Finally, generate the result array.
size_t lastEndIndex = 0;
PartGenerator gen(fields_, overallLength, sourceMap);
do {
bool hasPart;
NumberPart part;
if (!gen.nextPart(&hasPart, &part)) {
return false;
}
if (!hasPart) {
break;
}
MOZ_ASSERT(lastEndIndex < part.endIndex);
if (!parts.append(part)) {
return false;
}
lastEndIndex = part.endIndex;
} while (true);
MOZ_ASSERT(lastEndIndex == overallLength,
"result array must partition the entire string");
return lastEndIndex == overallLength;
}
Result<std::u16string_view, ICUError> FormatResultToParts(
const UFormattedNumber* value, Maybe<double> number, bool isNegative,
bool formatForUnit, NumberPartVector& parts) {
UErrorCode status = U_ZERO_ERROR;
const UFormattedValue* formattedValue = unumf_resultAsValue(value, &status);
if (U_FAILURE(status)) {
return Err(ToICUError(status));
}
return FormatResultToParts(formattedValue, number, isNegative, formatForUnit,
parts);
}
Result<std::u16string_view, ICUError> FormatResultToParts(
const UFormattedValue* value, Maybe<double> number, bool isNegative,
bool formatForUnit, NumberPartVector& parts) {
UErrorCode status = U_ZERO_ERROR;
int32_t utf16Length;
const char16_t* utf16Str = ufmtval_getString(value, &utf16Length, &status);
if (U_FAILURE(status)) {
return Err(ToICUError(status));
}
UConstrainedFieldPosition* fpos = ucfpos_open(&status);
if (U_FAILURE(status)) {
return Err(ToICUError(status));
}
ScopedICUObject<UConstrainedFieldPosition, ucfpos_close> toCloseFpos(fpos);
// We're only interested in UFIELD_CATEGORY_NUMBER fields.
ucfpos_constrainCategory(fpos, UFIELD_CATEGORY_NUMBER, &status);
if (U_FAILURE(status)) {
return Err(ToICUError(status));
}
// Vacuum up fields in the overall formatted string.
NumberFormatFields fields;
while (true) {
bool hasMore = ufmtval_nextPosition(value, fpos, &status);
if (U_FAILURE(status)) {
return Err(ToICUError(status));
}
if (!hasMore) {
break;
}
int32_t fieldName = ucfpos_getField(fpos, &status);
if (U_FAILURE(status)) {
return Err(ToICUError(status));
}
int32_t beginIndex, endIndex;
ucfpos_getIndexes(fpos, &beginIndex, &endIndex, &status);
if (U_FAILURE(status)) {
return Err(ToICUError(status));
}
Maybe<NumberPartType> partType = GetPartTypeForNumberField(
UNumberFormatFields(fieldName), number, isNegative, formatForUnit);
if (!partType || !fields.append(*partType, beginIndex, endIndex)) {
return Err(ICUError::InternalError);
}
}
if (!fields.toPartsVector(utf16Length, parts)) {
return Err(ICUError::InternalError);
}
return std::u16string_view(utf16Str, static_cast<size_t>(utf16Length));
}
// See intl/icu/source/i18n/unicode/unum.h for a detailed field list. This
// list is deliberately exhaustive: cases might have to be added/removed if
// this code is compiled with a different ICU with more UNumberFormatFields
// enum initializers. Please guard such cases with appropriate ICU
// version-testing #ifdefs, should cross-version divergence occur.
Maybe<NumberPartType> GetPartTypeForNumberField(UNumberFormatFields fieldName,
Maybe<double> number,
bool isNegative,
bool formatForUnit) {
switch (fieldName) {
case UNUM_INTEGER_FIELD:
if (number.isSome()) {
if (std::isnan(*number)) {
return Some(NumberPartType::Nan);
}
if (!std::isfinite(*number)) {
return Some(NumberPartType::Infinity);
}
}
return Some(NumberPartType::Integer);
case UNUM_FRACTION_FIELD:
return Some(NumberPartType::Fraction);
case UNUM_DECIMAL_SEPARATOR_FIELD:
return Some(NumberPartType::Decimal);
case UNUM_EXPONENT_SYMBOL_FIELD:
return Some(NumberPartType::ExponentSeparator);
case UNUM_EXPONENT_SIGN_FIELD:
return Some(NumberPartType::ExponentMinusSign);
case UNUM_EXPONENT_FIELD:
return Some(NumberPartType::ExponentInteger);
case UNUM_GROUPING_SEPARATOR_FIELD:
return Some(NumberPartType::Group);
case UNUM_CURRENCY_FIELD:
return Some(NumberPartType::Currency);
case UNUM_PERCENT_FIELD:
if (formatForUnit) {
return Some(NumberPartType::Unit);
}
return Some(NumberPartType::Percent);
case UNUM_PERMILL_FIELD:
MOZ_ASSERT_UNREACHABLE(
"unexpected permill field found, even though "
"we don't use any user-defined patterns that "
"would require a permill field");
break;
case UNUM_SIGN_FIELD:
if (isNegative) {
return Some(NumberPartType::MinusSign);
}
return Some(NumberPartType::PlusSign);
case UNUM_MEASURE_UNIT_FIELD:
return Some(NumberPartType::Unit);
case UNUM_COMPACT_FIELD:
return Some(NumberPartType::Compact);
case UNUM_APPROXIMATELY_SIGN_FIELD:
return Some(NumberPartType::ApproximatelySign);
#ifndef U_HIDE_DEPRECATED_API
case UNUM_FIELD_COUNT:
MOZ_ASSERT_UNREACHABLE(
"format field sentinel value returned by iterator!");
break;
#endif
}
MOZ_ASSERT_UNREACHABLE(
"unenumerated, undocumented format field returned by iterator");
return Nothing();
}
} // namespace mozilla::intl