Source code
Revision control
Copy as Markdown
Other Tools
#[allow(unused, deprecated)]
use std::ascii::AsciiExt;
use std::error::Error;
use std::fmt;
use std::iter::Enumerate;
use std::str::Bytes;
use super::{Mime, Source, ParamSource, Indexed, CHARSET, UTF_8};
#[derive(Debug)]
pub enum ParseError {
MissingSlash,
MissingEqual,
MissingQuote,
InvalidToken {
pos: usize,
byte: u8,
},
}
impl ParseError {
fn s(&self) -> &str {
use self::ParseError::*;
match *self {
MissingSlash => "a slash (/) was missing between the type and subtype",
MissingEqual => "an equals sign (=) was missing between a parameter and its value",
MissingQuote => "a quote (\") was missing from a parameter value",
InvalidToken { .. } => "an invalid token was encountered",
}
}
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if let ParseError::InvalidToken { pos, byte } = *self {
write!(f, "{}, {:X} at position {}", self.s(), byte, pos)
} else {
f.write_str(self.s())
}
}
}
impl Error for ParseError {
// Minimum Rust is 1.15, Error::description was still required then
#[allow(deprecated)]
fn description(&self) -> &str {
self.s()
}
}
pub fn parse(s: &str) -> Result<Mime, ParseError> {
if s == "*/*" {
return Ok(::STAR_STAR);
}
let mut iter = s.bytes().enumerate();
// toplevel
let mut start;
let slash;
loop {
match iter.next() {
Some((_, c)) if is_token(c) => (),
Some((i, b'/')) if i > 0 => {
slash = i;
start = i + 1;
break;
},
None => return Err(ParseError::MissingSlash), // EOF and no toplevel is no Mime
Some((pos, byte)) => return Err(ParseError::InvalidToken {
pos: pos,
byte: byte,
})
};
}
// sublevel
let mut plus = None;
loop {
match iter.next() {
Some((i, b'+')) if i > start => {
plus = Some(i);
},
Some((i, b';')) if i > start => {
start = i;
break;
},
Some((_, c)) if is_token(c) => (),
None => {
return Ok(Mime {
source: Source::Dynamic(s.to_ascii_lowercase()),
slash: slash,
plus: plus,
params: ParamSource::None,
});
},
Some((pos, byte)) => return Err(ParseError::InvalidToken {
pos: pos,
byte: byte,
})
};
}
// params
let params = params_from_str(s, &mut iter, start)?;
let src = match params {
ParamSource::Utf8(_) => s.to_ascii_lowercase(),
ParamSource::Custom(semicolon, ref indices) => lower_ascii_with_params(s, semicolon, indices),
ParamSource::None => {
// Chop off the empty list
s[..start].to_ascii_lowercase()
}
};
Ok(Mime {
source: Source::Dynamic(src),
slash: slash,
plus: plus,
params: params,
})
}
fn params_from_str(s: &str, iter: &mut Enumerate<Bytes>, mut start: usize) -> Result<ParamSource, ParseError> {
let semicolon = start;
start += 1;
let mut params = ParamSource::None;
'params: while start < s.len() {
let name;
// name
'name: loop {
match iter.next() {
Some((i, b' ')) if i == start => {
start = i + 1;
continue 'params;
},
Some((_, c)) if is_token(c) => (),
Some((i, b'=')) if i > start => {
name = Indexed(start, i);
start = i + 1;
break 'name;
},
None => return Err(ParseError::MissingEqual),
Some((pos, byte)) => return Err(ParseError::InvalidToken {
pos: pos,
byte: byte,
}),
}
}
let value;
// values must be restrict-name-char or "anything goes"
let mut is_quoted = false;
'value: loop {
if is_quoted {
match iter.next() {
Some((i, b'"')) if i > start => {
value = Indexed(start, i);
break 'value;
},
Some((_, c)) if is_restricted_quoted_char(c) => (),
None => return Err(ParseError::MissingQuote),
Some((pos, byte)) => return Err(ParseError::InvalidToken {
pos: pos,
byte: byte,
}),
}
} else {
match iter.next() {
Some((i, b'"')) if i == start => {
is_quoted = true;
start = i + 1;
},
Some((_, c)) if is_token(c) => (),
Some((i, b';')) if i > start => {
value = Indexed(start, i);
start = i + 1;
break 'value;
}
None => {
value = Indexed(start, s.len());
start = s.len();
break 'value;
},
Some((pos, byte)) => return Err(ParseError::InvalidToken {
pos: pos,
byte: byte,
}),
}
}
}
if is_quoted {
'ws: loop {
match iter.next() {
Some((i, b';')) => {
// next param
start = i + 1;
break 'ws;
},
Some((_, b' ')) => {
// skip whitespace
},
None => {
// eof
start = s.len();
break 'ws;
},
Some((pos, byte)) => return Err(ParseError::InvalidToken {
pos: pos,
byte: byte,
}),
}
}
}
match params {
ParamSource::Utf8(i) => {
let i = i + 2;
let charset = Indexed(i, "charset".len() + i);
let utf8 = Indexed(charset.1 + 1, charset.1 + "utf-8".len() + 1);
params = ParamSource::Custom(semicolon, vec![
(charset, utf8),
(name, value),
]);
},
ParamSource::Custom(_, ref mut vec) => {
vec.push((name, value));
},
ParamSource::None => {
if semicolon + 2 == name.0 && CHARSET == &s[name.0..name.1] {
if UTF_8 == &s[value.0..value.1] {
params = ParamSource::Utf8(semicolon);
continue 'params;
}
}
params = ParamSource::Custom(semicolon, vec![(name, value)]);
},
}
}
Ok(params)
}
fn lower_ascii_with_params(s: &str, semi: usize, params: &[(Indexed, Indexed)]) -> String {
let mut owned = s.to_owned();
owned[..semi].make_ascii_lowercase();
for &(ref name, ref value) in params {
owned[name.0..name.1].make_ascii_lowercase();
// Since we just converted this part of the string to lowercase,
// we can skip the `Name == &str` unicase check and do a faster
// memcmp instead.
if &owned[name.0..name.1] == CHARSET.source {
owned[value.0..value.1].make_ascii_lowercase();
}
}
owned
}
//
// > All registered media types MUST be assigned top-level type and
// > subtype names. The combination of these names serves to uniquely
// > identify the media type, and the subtype name facet (or the absence
// > of one) identifies the registration tree. Both top-level type and
// > subtype names are case-insensitive.
// >
// > Type and subtype names MUST conform to the following ABNF:
// >
// > type-name = restricted-name
// > subtype-name = restricted-name
// >
// > restricted-name = restricted-name-first *126restricted-name-chars
// > restricted-name-first = ALPHA / DIGIT
// > restricted-name-chars = ALPHA / DIGIT / "!" / "#" /
// > "$" / "&" / "-" / "^" / "_"
// > restricted-name-chars =/ "." ; Characters before first dot always
// > ; specify a facet name
// > restricted-name-chars =/ "+" ; Characters after last plus always
// > ; specify a structured syntax suffix
//
// > media-type = type "/" subtype *( OWS ";" OWS parameter )
// > type = token
// > subtype = token
// > parameter = token "=" ( token / quoted-string )
//
// Where token is defined as:
//
// > token = 1*tchar
// > tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
// > "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
//
// So, clearly, ¯\_(Ä_/¯
macro_rules! byte_map {
($($flag:expr,)*) => ([
$($flag != 0,)*
])
}
static TOKEN_MAP: [bool; 256] = byte_map![
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
];
fn is_token(c: u8) -> bool {
TOKEN_MAP[c as usize]
}
fn is_restricted_quoted_char(c: u8) -> bool {
c > 31 && c != 127
}
#[test]
#[allow(warnings)] // ... ranges deprecated
fn test_lookup_tables() {
for (i, &valid) in TOKEN_MAP.iter().enumerate() {
let i = i as u8;
let should = match i {
b'a'...b'z' |
b'A'...b'Z' |
b'0'...b'9' |
b'!' |
b'#' |
b'$' |
b'%' |
b'&' |
b'\'' |
b'*' |
b'+' |
b'-' |
b'.' |
b'^' |
b'_' |
b'`' |
b'|' |
b'~' => true,
_ => false
};
assert_eq!(valid, should, "{:?} ({}) should be {}", i as char, i, should);
}
}