Source code
Revision control
Copy as Markdown
Other Tools
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
use nsstring::nsACString;
use oxilangtag::LanguageTag;
pub struct LangTag; // Opaque type for ffi interface.
/// Parse a string as a BCP47 language tag. Returns a `LangTag` object if the string is
/// successfully parsed; this must be freed with `lang_tag_destroy`.
///
/// The string `tag` must outlive the `LangTag`.
///
/// Returns null if `tag` is not a well-formed BCP47 tag (including if it is not
/// valid UTF-8).
#[no_mangle]
pub extern "C" fn lang_tag_new(tag: &nsACString) -> *mut LangTag {
if let Ok(tag_str) = core::str::from_utf8(tag.as_ref()) {
if let Ok(language_tag) = LanguageTag::parse(tag_str) {
return Box::into_raw(Box::new(language_tag)) as *mut LangTag;
}
}
std::ptr::null_mut()
}
/// Free a `LangTag` instance.
#[no_mangle]
pub extern "C" fn lang_tag_destroy(lang: *mut LangTag) {
if lang.is_null() {
return;
}
let _ = unsafe { Box::from_raw(lang as *mut LanguageTag<&str>) };
}
/// Matches an HTML language attribute against a CSS :lang() selector using the
/// "extended filtering" algorithm.
/// The attribute is a BCP47 language tag that was successfully parsed by oxilangtag;
/// the selector is a string that is treated as a language range per RFC 4647.
#[no_mangle]
pub extern "C" fn lang_tag_matches(attribute: *const LangTag, selector: &nsACString) -> bool {
// This should only be called with a pointer that we got from lang_tag_new().
let lang = unsafe { *(attribute as *const LanguageTag<&str>) };
// Our callers guarantee that the selector string is valid UTF-8.
let range_str = unsafe { selector.as_str_unchecked() };
if lang.is_empty() || range_str.is_empty() {
return false;
}
// RFC 4647 Extended Filtering:
// 1. Split both the extended language range and the language tag being
// compared into a list of subtags by dividing on the hyphen (%x2D)
// character. Two subtags match if either they are the same when
// compared case-insensitively or the language range's subtag is the
// wildcard '*'.
let mut range_subtags = range_str.split('-');
let mut lang_subtags = lang.as_str().split('-');
// 2. Begin with the first subtag in each list. If the first subtag in
// the range does not match the first subtag in the tag, the overall
// match fails. Otherwise, move to the next subtag in both the
// range and the tag.
let mut range_subtag = range_subtags.next();
let mut lang_subtag = lang_subtags.next();
// Cannot be None, because we checked that both args were non-empty.
assert!(range_subtag.is_some() && lang_subtag.is_some());
if !(range_subtag.unwrap() == "*"
|| range_subtag
.unwrap()
.eq_ignore_ascii_case(lang_subtag.unwrap()))
{
return false;
}
range_subtag = range_subtags.next();
lang_subtag = lang_subtags.next();
// 3. While there are more subtags left in the language range's list:
loop {
// 4. When the language range's list has no more subtags, the match
// succeeds.
let Some(range_subtag_str) = range_subtag else {
return true;
};
// A. If the subtag currently being examined in the range is the
// wildcard ('*'), move to the next subtag in the range and
// continue with the loop.
if range_subtag_str == "*" {
range_subtag = range_subtags.next();
continue;
}
// B. Else, if there are no more subtags in the language tag's
// list, the match fails.
let Some(lang_subtag_str) = lang_subtag else {
return false;
};
// C. Else, if the current subtag in the range's list matches the
// current subtag in the language tag's list, move to the next
// subtag in both lists and continue with the loop.
if range_subtag_str.eq_ignore_ascii_case(lang_subtag_str) {
range_subtag = range_subtags.next();
lang_subtag = lang_subtags.next();
continue;
}
// D. Else, if the language tag's subtag is a "singleton" (a single
// letter or digit, which includes the private-use subtag 'x')
// the match fails.
if lang_subtag_str.len() == 1 {
return false;
}
// E. Else, move to the next subtag in the language tag's list and
// continue with the loop.
lang_subtag = lang_subtags.next();
}
}