Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsBidiUtils_h__
#define nsBidiUtils_h__
#include "mozilla/intl/BidiClass.h"
#include "nsString.h"
#include "encoding_rs_mem.h"
/**
* definitions of bidirection character types by category
*/
#define BIDICLASS_IS_RTL(val) \
(((val) == mozilla::intl::BidiClass::RightToLeft) || \
((val) == mozilla::intl::BidiClass::RightToLeftArabic))
#define BIDICLASS_IS_WEAK(val) \
(((val) == mozilla::intl::BidiClass::EuropeanNumberSeparator) || \
((val) == mozilla::intl::BidiClass::EuropeanNumberTerminator) || \
(((val) > mozilla::intl::BidiClass::ArabicNumber) && \
((val) != mozilla::intl::BidiClass::RightToLeftArabic)))
/**
* Inspects a Unichar, converting numbers to Arabic or Hindi forms and
* returning them
* @param aChar is the character
* @param aPrevCharArabic is true if the previous character in the string is
* an Arabic char
* @param aNumFlag specifies the conversion to perform:
* IBMBIDI_NUMERAL_NOMINAL: don't do any conversion
* IBMBIDI_NUMERAL_HINDI: convert to Hindi forms
* (Unicode 0660-0669)
* IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms
* (Unicode 0030-0039)
* IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to
* Hindi, otherwise to Arabic
* @return the converted Unichar
*/
char16_t HandleNumberInChar(char16_t aChar, bool aPrevCharArabic,
uint32_t aNumFlag);
/**
* Scan a Unichar string, converting numbers to Arabic or Hindi forms in
* place
* @param aBuffer is the string
* @param aSize is the size of aBuffer
* @param aNumFlag specifies the conversion to perform:
* IBMBIDI_NUMERAL_NOMINAL: don't do any conversion
* IBMBIDI_NUMERAL_HINDI: convert to Hindi forms
* (Unicode 0660-0669)
* IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms
* (Unicode 0030-0039)
* IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to
* Hindi, otherwise to Arabic
*/
nsresult HandleNumbers(char16_t* aBuffer, uint32_t aSize, uint32_t aNumFlag);
/**
* Give a UTF-32 codepoint
* return true if the codepoint is a Bidi control character (LRM, RLM, ALM;
* LRE, RLE, PDF, LRO, RLO; LRI, RLI, FSI, PDI).
* Return false, otherwise
*/
#define LRM_CHAR 0x200e
#define RLM_CHAR 0x200f
#define LRE_CHAR 0x202a
#define RLE_CHAR 0x202b
#define PDF_CHAR 0x202c
#define LRO_CHAR 0x202d
#define RLO_CHAR 0x202e
#define LRI_CHAR 0x2066
#define RLI_CHAR 0x2067
#define FSI_CHAR 0x2068
#define PDI_CHAR 0x2069
#define ALM_CHAR 0x061C
inline bool IsBidiControl(uint32_t aChar) {
return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) ||
(LRI_CHAR <= aChar && aChar <= PDI_CHAR) || (aChar == ALM_CHAR) ||
(aChar & 0xfffffe) == LRM_CHAR);
}
/**
* Give a UTF-32 codepoint
* Return true if the codepoint is a Bidi control character that may result
* in RTL directionality and therefore needs to trigger bidi resolution;
* return false otherwise.
*/
inline bool IsBidiControlRTL(uint32_t aChar) {
return aChar == RLM_CHAR || aChar == RLE_CHAR || aChar == RLO_CHAR ||
aChar == RLI_CHAR || aChar == ALM_CHAR;
}
/**
* Give a 16-bit (UTF-16) text buffer
* @return true if the string contains right-to-left characters
*/
inline bool HasRTLChars(mozilla::Span<const char16_t> aBuffer) {
// Span ensures we never pass a nullptr to Rust--even if the
// length of the buffer is zero.
return encoding_mem_is_utf16_bidi(aBuffer.Elements(), aBuffer.Length());
}
// These values are shared with Preferences dialog
// ------------------
// If Pref values are to be changed
// in the XUL file of Prefs. the values
// Must be changed here too..
// ------------------
//
#define IBMBIDI_TEXTDIRECTION_STR "bidi.direction"
#define IBMBIDI_TEXTTYPE_STR "bidi.texttype"
#define IBMBIDI_NUMERAL_STR "bidi.numeral"
// ------------------
// Text Direction
// ------------------
// bidi.direction
#define IBMBIDI_TEXTDIRECTION_LTR 1 // 1 = directionLTRBidi *
#define IBMBIDI_TEXTDIRECTION_RTL 2 // 2 = directionRTLBidi
// ------------------
// Text Type
// ------------------
// bidi.texttype
#define IBMBIDI_TEXTTYPE_CHARSET 1 // 1 = charsettexttypeBidi *
#define IBMBIDI_TEXTTYPE_LOGICAL 2 // 2 = logicaltexttypeBidi
#define IBMBIDI_TEXTTYPE_VISUAL 3 // 3 = visualtexttypeBidi
// ------------------
// Numeral Style
// ------------------
// bidi.numeral
#define IBMBIDI_NUMERAL_NOMINAL 0 // 0 = nominalnumeralBidi *
#define IBMBIDI_NUMERAL_REGULAR 1 // 1 = regularcontextnumeralBidi
#define IBMBIDI_NUMERAL_HINDICONTEXT 2 // 2 = hindicontextnumeralBidi
#define IBMBIDI_NUMERAL_ARABIC 3 // 3 = arabicnumeralBidi
#define IBMBIDI_NUMERAL_HINDI 4 // 4 = hindinumeralBidi
#define IBMBIDI_NUMERAL_PERSIANCONTEXT 5 // 5 = persiancontextnumeralBidi
#define IBMBIDI_NUMERAL_PERSIAN 6 // 6 = persiannumeralBidi
#define IBMBIDI_DEFAULT_BIDI_OPTIONS \
((IBMBIDI_TEXTDIRECTION_LTR << 0) | (IBMBIDI_TEXTTYPE_CHARSET << 4) | \
(IBMBIDI_NUMERAL_NOMINAL << 8))
#define GET_BIDI_OPTION_DIRECTION(bo) \
(((bo) >> 0) & 0x0000000F) /* 4 bits for DIRECTION */
#define GET_BIDI_OPTION_TEXTTYPE(bo) \
(((bo) >> 4) & 0x0000000F) /* 4 bits for TEXTTYPE */
#define GET_BIDI_OPTION_NUMERAL(bo) \
(((bo) >> 8) & 0x0000000F) /* 4 bits for NUMERAL */
#define SET_BIDI_OPTION_DIRECTION(bo, dir) \
{ (bo) = ((bo) & 0xFFFFFFF0) | (((dir) & 0x0000000F) << 0); }
#define SET_BIDI_OPTION_TEXTTYPE(bo, tt) \
{ (bo) = ((bo) & 0xFFFFFF0F) | (((tt) & 0x0000000F) << 4); }
#define SET_BIDI_OPTION_NUMERAL(bo, num) \
{ (bo) = ((bo) & 0xFFFFF0FF) | (((num) & 0x0000000F) << 8); }
/* Constants related to the position of numerics in the codepage */
#define START_HINDI_DIGITS 0x0660
#define END_HINDI_DIGITS 0x0669
#define START_ARABIC_DIGITS 0x0030
#define END_ARABIC_DIGITS 0x0039
#define START_FARSI_DIGITS 0x06f0
#define END_FARSI_DIGITS 0x06f9
#define IS_HINDI_DIGIT(u) \
(((u) >= START_HINDI_DIGITS) && ((u) <= END_HINDI_DIGITS))
#define IS_ARABIC_DIGIT(u) \
(((u) >= START_ARABIC_DIGITS) && ((u) <= END_ARABIC_DIGITS))
#define IS_FARSI_DIGIT(u) \
(((u) >= START_FARSI_DIGITS) && ((u) <= END_FARSI_DIGITS))
/**
* Arabic numeric separator and numeric formatting characters:
* U+0600;ARABIC NUMBER SIGN
* U+0601;ARABIC SIGN SANAH
* U+0602;ARABIC FOOTNOTE MARKER
* U+0603;ARABIC SIGN SAFHA
* U+066A;ARABIC PERCENT SIGN
* U+066B;ARABIC DECIMAL SEPARATOR
* U+066C;ARABIC THOUSANDS SEPARATOR
* U+06DD;ARABIC END OF AYAH
*/
#define IS_ARABIC_SEPARATOR(u) \
((/*(u) >= 0x0600 &&*/ (u) <= 0x0603) || ((u) >= 0x066A && (u) <= 0x066C) || \
((u) == 0x06DD))
#define IS_BIDI_DIACRITIC(u) \
(((u) >= 0x0591 && (u) <= 0x05A1) || ((u) >= 0x05A3 && (u) <= 0x05B9) || \
((u) >= 0x05BB && (u) <= 0x05BD) || ((u) == 0x05BF) || ((u) == 0x05C1) || \
((u) == 0x05C2) || ((u) == 0x05C4) || ((u) >= 0x064B && (u) <= 0x0652) || \
((u) == 0x0670) || ((u) >= 0x06D7 && (u) <= 0x06E4) || ((u) == 0x06E7) || \
((u) == 0x06E8) || ((u) >= 0x06EA && (u) <= 0x06ED))
#define IS_HEBREW_CHAR(c) \
(((0x0590 <= (c)) && ((c) <= 0x05FF)) || (((c) >= 0xfb1d) && ((c) <= 0xfb4f)))
#define IS_ARABIC_CHAR(c) \
((0x0600 <= (c) && (c) <= 0x08FF) && \
((c) <= 0x06ff || ((c) >= 0x0750 && (c) <= 0x077f) || (c) >= 0x08a0))
#define IS_ARABIC_ALPHABETIC(c) \
(IS_ARABIC_CHAR(c) && \
!(IS_HINDI_DIGIT(c) || IS_FARSI_DIGIT(c) || IS_ARABIC_SEPARATOR(c)))
/**
* The codepoint ranges in the following macros are based on the blocks
* allocated, or planned to be allocated, to right-to-left characters in the
* BMP (Basic Multilingual Plane) and SMP (Supplementary Multilingual Plane)
* according to
*/
#define IS_IN_BMP_RTL_BLOCK(c) ((0x590 <= (c)) && ((c) <= 0x8ff))
#define IS_RTL_PRESENTATION_FORM(c) \
(((0xfb1d <= (c)) && ((c) <= 0xfdff)) || ((0xfe70 <= (c)) && ((c) <= 0xfefe)))
#define IS_IN_SMP_RTL_BLOCK(c) \
(((0x10800 <= (c)) && ((c) <= 0x10fff)) || \
((0x1e800 <= (c)) && ((c) <= 0x1eFFF)))
// Due to the supplementary-plane RTL blocks being identifiable from the
// high surrogate without examining the low surrogate, it is correct to
// use this by-code-unit check on potentially astral text without doing
// the math to decode surrogate pairs into code points. However, unpaired
// high surrogates that are RTL high surrogates then count as RTL even
// though, if replaced by the REPLACEMENT CHARACTER, it would not be
// RTL.
#define UTF16_CODE_UNIT_IS_BIDI(c) \
((IS_IN_BMP_RTL_BLOCK(c)) || (IS_RTL_PRESENTATION_FORM(c)) || \
(c) == 0xD802 || (c) == 0xD803 || (c) == 0xD83A || (c) == 0xD83B)
#define UTF32_CHAR_IS_BIDI(c) \
((IS_IN_BMP_RTL_BLOCK(c)) || (IS_RTL_PRESENTATION_FORM(c)) || \
(IS_IN_SMP_RTL_BLOCK(c)))
#endif /* nsBidiUtils_h__ */