Source code
Revision control
Copy as Markdown
Other Tools
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
use icu_segmenter::LineBreakOptions;
use icu_segmenter::LineBreakStrictness;
use icu_segmenter::LineBreakWordOption;
#[diplomat::bridge]
pub mod ffi {
use crate::errors::ffi::ICU4XError;
use crate::provider::ffi::ICU4XDataProvider;
use alloc::boxed::Box;
use core::convert::TryFrom;
use icu_segmenter::{
LineBreakIteratorLatin1, LineBreakIteratorPotentiallyIllFormedUtf8, LineBreakIteratorUtf16,
LineSegmenter,
};
#[diplomat::opaque]
/// An ICU4X line-break segmenter, capable of finding breakpoints in strings.
#[diplomat::rust_link(icu::segmenter::LineSegmenter, Struct)]
pub struct ICU4XLineSegmenter(LineSegmenter);
#[diplomat::rust_link(icu::segmenter::LineBreakStrictness, Enum)]
pub enum ICU4XLineBreakStrictness {
Loose,
Normal,
Strict,
Anywhere,
}
#[diplomat::rust_link(icu::segmenter::LineBreakWordOption, Enum)]
pub enum ICU4XLineBreakWordOption {
Normal,
BreakAll,
KeepAll,
}
#[diplomat::rust_link(icu::segmenter::LineBreakOptions, Struct)]
#[diplomat::attr(dart, rename = "LineBreakOptions")]
pub struct ICU4XLineBreakOptionsV1 {
pub strictness: ICU4XLineBreakStrictness,
pub word_option: ICU4XLineBreakWordOption,
pub ja_zh: bool,
}
#[diplomat::opaque]
#[diplomat::rust_link(icu::segmenter::LineBreakIterator, Struct)]
#[diplomat::rust_link(
icu::segmenter::LineBreakIteratorPotentiallyIllFormedUtf8,
Typedef,
compact
)]
#[diplomat::rust_link(icu::segmenter::LineBreakIteratorUtf8, Typedef, hidden)]
pub struct ICU4XLineBreakIteratorUtf8<'a>(LineBreakIteratorPotentiallyIllFormedUtf8<'a, 'a>);
#[diplomat::opaque]
#[diplomat::rust_link(icu::segmenter::LineBreakIterator, Struct)]
#[diplomat::rust_link(icu::segmenter::LineBreakIteratorUtf16, Typedef, compact)]
pub struct ICU4XLineBreakIteratorUtf16<'a>(LineBreakIteratorUtf16<'a, 'a>);
#[diplomat::opaque]
#[diplomat::rust_link(icu::segmenter::LineBreakIterator, Struct)]
#[diplomat::rust_link(icu::segmenter::LineBreakIteratorLatin1, Typedef, compact)]
pub struct ICU4XLineBreakIteratorLatin1<'a>(LineBreakIteratorLatin1<'a, 'a>);
impl ICU4XLineSegmenter {
/// Construct a [`ICU4XLineSegmenter`] with default options. It automatically loads the best
/// available payload data for Burmese, Khmer, Lao, and Thai.
#[diplomat::rust_link(icu::segmenter::LineSegmenter::new_auto, FnInStruct)]
#[diplomat::attr(all(supports = constructors, supports = fallible_constructors, supports = named_constructors), named_constructor = "auto")]
pub fn create_auto(
provider: &ICU4XDataProvider,
) -> Result<Box<ICU4XLineSegmenter>, ICU4XError> {
Ok(Box::new(ICU4XLineSegmenter(call_constructor!(
LineSegmenter::new_auto [r => Ok(r)],
LineSegmenter::try_new_auto_with_any_provider,
LineSegmenter::try_new_auto_with_buffer_provider,
provider
)?)))
}
/// Construct a [`ICU4XLineSegmenter`] with default options and LSTM payload data for
/// Burmese, Khmer, Lao, and Thai.
#[diplomat::rust_link(icu::segmenter::LineSegmenter::new_lstm, FnInStruct)]
#[diplomat::attr(all(supports = constructors, supports = fallible_constructors, supports = named_constructors), named_constructor = "lstm")]
pub fn create_lstm(
provider: &ICU4XDataProvider,
) -> Result<Box<ICU4XLineSegmenter>, ICU4XError> {
Ok(Box::new(ICU4XLineSegmenter(call_constructor!(
LineSegmenter::new_lstm [r => Ok(r)],
LineSegmenter::try_new_lstm_with_any_provider,
LineSegmenter::try_new_lstm_with_buffer_provider,
provider,
)?)))
}
/// Construct a [`ICU4XLineSegmenter`] with default options and dictionary payload data for
/// Burmese, Khmer, Lao, and Thai..
#[diplomat::rust_link(icu::segmenter::LineSegmenter::new_dictionary, FnInStruct)]
#[diplomat::attr(all(supports = constructors, supports = fallible_constructors, supports = named_constructors), named_constructor = "dictionary")]
pub fn create_dictionary(
provider: &ICU4XDataProvider,
) -> Result<Box<ICU4XLineSegmenter>, ICU4XError> {
Ok(Box::new(ICU4XLineSegmenter(call_constructor!(
LineSegmenter::new_dictionary [r => Ok(r)],
LineSegmenter::try_new_dictionary_with_any_provider,
LineSegmenter::try_new_dictionary_with_buffer_provider,
provider,
)?)))
}
/// Construct a [`ICU4XLineSegmenter`] with custom options. It automatically loads the best
/// available payload data for Burmese, Khmer, Lao, and Thai.
#[diplomat::rust_link(icu::segmenter::LineSegmenter::new_auto_with_options, FnInStruct)]
#[diplomat::attr(dart, rename = "auto_with_options")]
#[diplomat::attr(all(supports = constructors, supports = fallible_constructors, supports = named_constructors), named_constructor = "auto_with_options_v1")]
pub fn create_auto_with_options_v1(
provider: &ICU4XDataProvider,
options: ICU4XLineBreakOptionsV1,
) -> Result<Box<ICU4XLineSegmenter>, ICU4XError> {
Ok(Box::new(ICU4XLineSegmenter(call_constructor!(
LineSegmenter::new_auto_with_options [r => Ok(r)],
LineSegmenter::try_new_auto_with_options_with_any_provider,
LineSegmenter::try_new_auto_with_options_with_buffer_provider,
provider,
options.into(),
)?)))
}
/// Construct a [`ICU4XLineSegmenter`] with custom options and LSTM payload data for
/// Burmese, Khmer, Lao, and Thai.
#[diplomat::rust_link(icu::segmenter::LineSegmenter::new_lstm_with_options, FnInStruct)]
#[diplomat::attr(dart, rename = "lstm_with_options")]
#[diplomat::attr(all(supports = constructors, supports = fallible_constructors, supports = named_constructors), named_constructor = "lstm_with_options_v1")]
pub fn create_lstm_with_options_v1(
provider: &ICU4XDataProvider,
options: ICU4XLineBreakOptionsV1,
) -> Result<Box<ICU4XLineSegmenter>, ICU4XError> {
Ok(Box::new(ICU4XLineSegmenter(call_constructor!(
LineSegmenter::new_lstm_with_options [r => Ok(r)],
LineSegmenter::try_new_lstm_with_options_with_any_provider,
LineSegmenter::try_new_lstm_with_options_with_buffer_provider,
provider,
options.into(),
)?)))
}
/// Construct a [`ICU4XLineSegmenter`] with custom options and dictionary payload data for
/// Burmese, Khmer, Lao, and Thai.
#[diplomat::rust_link(
icu::segmenter::LineSegmenter::new_dictionary_with_options,
FnInStruct
)]
#[diplomat::attr(dart, rename = "dictionary_with_options")]
#[diplomat::attr(all(supports = constructors, supports = fallible_constructors, supports = named_constructors), named_constructor = "dictionary_with_options_v1")]
pub fn create_dictionary_with_options_v1(
provider: &ICU4XDataProvider,
options: ICU4XLineBreakOptionsV1,
) -> Result<Box<ICU4XLineSegmenter>, ICU4XError> {
Ok(Box::new(ICU4XLineSegmenter(call_constructor!(
LineSegmenter::new_dictionary_with_options [r => Ok(r)],
LineSegmenter::try_new_dictionary_with_options_with_any_provider,
LineSegmenter::try_new_dictionary_with_options_with_buffer_provider,
provider,
options.into(),
)?)))
}
/// Segments a string.
///
/// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
/// to the WHATWG Encoding Standard.
#[diplomat::rust_link(icu::segmenter::LineSegmenter::segment_utf8, FnInStruct)]
#[diplomat::rust_link(icu::segmenter::LineSegmenter::segment_str, FnInStruct, hidden)]
#[diplomat::attr(dart, disable)]
pub fn segment_utf8<'a>(
&'a self,
input: &'a DiplomatStr,
) -> Box<ICU4XLineBreakIteratorUtf8<'a>> {
Box::new(ICU4XLineBreakIteratorUtf8(self.0.segment_utf8(input)))
}
/// Segments a string.
///
/// Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
/// to the WHATWG Encoding Standard.
#[diplomat::rust_link(icu::segmenter::LineSegmenter::segment_utf16, FnInStruct)]
#[diplomat::attr(dart, rename = "segment")]
pub fn segment_utf16<'a>(
&'a self,
input: &'a DiplomatStr16,
) -> Box<ICU4XLineBreakIteratorUtf16<'a>> {
Box::new(ICU4XLineBreakIteratorUtf16(self.0.segment_utf16(input)))
}
/// Segments a Latin-1 string.
#[diplomat::rust_link(icu::segmenter::LineSegmenter::segment_latin1, FnInStruct)]
#[diplomat::attr(dart, disable)]
pub fn segment_latin1<'a>(
&'a self,
input: &'a [u8],
) -> Box<ICU4XLineBreakIteratorLatin1<'a>> {
Box::new(ICU4XLineBreakIteratorLatin1(self.0.segment_latin1(input)))
}
}
impl<'a> ICU4XLineBreakIteratorUtf8<'a> {
/// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
/// out of range of a 32-bit signed integer.
#[diplomat::rust_link(icu::segmenter::LineBreakIterator::next, FnInStruct)]
#[diplomat::rust_link(
icu::segmenter::LineBreakIterator::Item,
AssociatedTypeInStruct,
hidden
)]
pub fn next(&mut self) -> i32 {
self.0
.next()
.and_then(|u| i32::try_from(u).ok())
.unwrap_or(-1)
}
}
impl<'a> ICU4XLineBreakIteratorUtf16<'a> {
/// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
/// out of range of a 32-bit signed integer.
#[diplomat::rust_link(icu::segmenter::LineBreakIterator::next, FnInStruct)]
#[diplomat::rust_link(
icu::segmenter::LineBreakIterator::Item,
AssociatedTypeInStruct,
hidden
)]
pub fn next(&mut self) -> i32 {
self.0
.next()
.and_then(|u| i32::try_from(u).ok())
.unwrap_or(-1)
}
}
impl<'a> ICU4XLineBreakIteratorLatin1<'a> {
/// Finds the next breakpoint. Returns -1 if at the end of the string or if the index is
/// out of range of a 32-bit signed integer.
#[diplomat::rust_link(icu::segmenter::LineBreakIterator::next, FnInStruct)]
#[diplomat::rust_link(
icu::segmenter::LineBreakIterator::Item,
AssociatedTypeInStruct,
hidden
)]
pub fn next(&mut self) -> i32 {
self.0
.next()
.and_then(|u| i32::try_from(u).ok())
.unwrap_or(-1)
}
}
}
impl From<ffi::ICU4XLineBreakStrictness> for LineBreakStrictness {
fn from(other: ffi::ICU4XLineBreakStrictness) -> Self {
match other {
ffi::ICU4XLineBreakStrictness::Loose => Self::Loose,
ffi::ICU4XLineBreakStrictness::Normal => Self::Normal,
ffi::ICU4XLineBreakStrictness::Strict => Self::Strict,
ffi::ICU4XLineBreakStrictness::Anywhere => Self::Anywhere,
}
}
}
impl From<ffi::ICU4XLineBreakWordOption> for LineBreakWordOption {
fn from(other: ffi::ICU4XLineBreakWordOption) -> Self {
match other {
ffi::ICU4XLineBreakWordOption::Normal => Self::Normal,
ffi::ICU4XLineBreakWordOption::BreakAll => Self::BreakAll,
ffi::ICU4XLineBreakWordOption::KeepAll => Self::KeepAll,
}
}
}
impl From<ffi::ICU4XLineBreakOptionsV1> for LineBreakOptions {
fn from(other: ffi::ICU4XLineBreakOptionsV1) -> Self {
let mut options = LineBreakOptions::default();
options.strictness = other.strictness.into();
options.word_option = other.word_option.into();
options.ja_zh = other.ja_zh;
options
}
}