Source code
Revision control
Copy as Markdown
Other Tools
// Copyright 2013-2016 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Parser and serializer for the [`application/x-www-form-urlencoded` syntax](
//! as used by HTML forms.
//!
//! Converts between a string (such as an URL’s query string)
//! and a sequence of (name, value) pairs.
#![no_std]
// For forwards compatibility
#[cfg(feature = "std")]
extern crate std as _;
extern crate alloc;
#[cfg(not(feature = "alloc"))]
compile_error!("the `alloc` feature must currently be enabled");
use alloc::borrow::{Borrow, Cow, ToOwned};
use alloc::string::String;
use core::str;
use percent_encoding::{percent_decode, percent_encode_byte};
/// Convert a byte string in the `application/x-www-form-urlencoded` syntax
/// into a iterator of (name, value) pairs.
///
/// Use `parse(input.as_bytes())` to parse a `&str` string.
///
/// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be
/// converted to `[("#first", "%try%")]`.
#[inline]
pub fn parse(input: &[u8]) -> Parse<'_> {
Parse { input }
}
/// The return type of `parse()`.
#[derive(Copy, Clone)]
pub struct Parse<'a> {
input: &'a [u8],
}
impl<'a> Iterator for Parse<'a> {
type Item = (Cow<'a, str>, Cow<'a, str>);
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.input.is_empty() {
return None;
}
let mut split2 = self.input.splitn(2, |&b| b == b'&');
let sequence = split2.next().unwrap();
self.input = split2.next().unwrap_or(&[][..]);
if sequence.is_empty() {
continue;
}
let mut split2 = sequence.splitn(2, |&b| b == b'=');
let name = split2.next().unwrap();
let value = split2.next().unwrap_or(&[][..]);
return Some((decode(name), decode(value)));
}
}
}
fn decode(input: &[u8]) -> Cow<'_, str> {
let replaced = replace_plus(input);
decode_utf8_lossy(match percent_decode(&replaced).into() {
Cow::Owned(vec) => Cow::Owned(vec),
Cow::Borrowed(_) => replaced,
})
}
/// Replace b'+' with b' '
fn replace_plus(input: &[u8]) -> Cow<'_, [u8]> {
match input.iter().position(|&b| b == b'+') {
None => Cow::Borrowed(input),
Some(first_position) => {
let mut replaced = input.to_owned();
replaced[first_position] = b' ';
for byte in &mut replaced[first_position + 1..] {
if *byte == b'+' {
*byte = b' ';
}
}
Cow::Owned(replaced)
}
}
}
impl<'a> Parse<'a> {
/// Return a new iterator that yields pairs of `String` instead of pairs of `Cow<str>`.
pub fn into_owned(self) -> ParseIntoOwned<'a> {
ParseIntoOwned { inner: self }
}
}
/// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow<str>`.
pub struct ParseIntoOwned<'a> {
inner: Parse<'a>,
}
impl<'a> Iterator for ParseIntoOwned<'a> {
type Item = (String, String);
fn next(&mut self) -> Option<Self::Item> {
self.inner
.next()
.map(|(k, v)| (k.into_owned(), v.into_owned()))
}
}
/// The [`application/x-www-form-urlencoded` byte serializer](
///
/// Return an iterator of `&str` slices.
pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> {
ByteSerialize { bytes: input }
}
/// Return value of `byte_serialize()`.
#[derive(Debug)]
pub struct ByteSerialize<'a> {
bytes: &'a [u8],
}
fn byte_serialized_unchanged(byte: u8) -> bool {
matches!(byte, b'*' | b'-' | b'.' | b'0' ..= b'9' | b'A' ..= b'Z' | b'_' | b'a' ..= b'z')
}
impl<'a> Iterator for ByteSerialize<'a> {
type Item = &'a str;
fn next(&mut self) -> Option<&'a str> {
if let Some((&first, tail)) = self.bytes.split_first() {
if !byte_serialized_unchanged(first) {
self.bytes = tail;
return Some(if first == b' ' {
"+"
} else {
percent_encode_byte(first)
});
}
let position = tail.iter().position(|&b| !byte_serialized_unchanged(b));
let (unchanged_slice, remaining) = match position {
// 1 for first_byte + i unchanged in tail
Some(i) => self.bytes.split_at(1 + i),
None => (self.bytes, &[][..]),
};
self.bytes = remaining;
// This unsafe is appropriate because we have already checked these
// bytes in byte_serialized_unchanged, which checks for a subset
// of UTF-8. So we know these bytes are valid UTF-8, and doing
// another UTF-8 check would be wasteful.
Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
} else {
None
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
if self.bytes.is_empty() {
(0, Some(0))
} else {
(1, Some(self.bytes.len()))
}
}
}
/// The [`application/x-www-form-urlencoded` serializer](
pub struct Serializer<'a, T: Target> {
target: Option<T>,
start_position: usize,
encoding: EncodingOverride<'a>,
}
pub trait Target {
fn as_mut_string(&mut self) -> &mut String;
fn finish(self) -> Self::Finished;
type Finished;
}
impl Target for String {
fn as_mut_string(&mut self) -> &mut String {
self
}
fn finish(self) -> Self {
self
}
type Finished = Self;
}
impl<'a> Target for &'a mut String {
fn as_mut_string(&mut self) -> &mut String {
self
}
fn finish(self) -> Self {
self
}
type Finished = Self;
}
impl<'a, T: Target> Serializer<'a, T> {
/// Create a new `application/x-www-form-urlencoded` serializer for the given target.
///
/// If the target is non-empty,
/// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
pub fn new(target: T) -> Self {
Self::for_suffix(target, 0)
}
/// Create a new `application/x-www-form-urlencoded` serializer
/// for a suffix of the given target.
///
/// If that suffix is non-empty,
/// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
pub fn for_suffix(mut target: T, start_position: usize) -> Self {
if target.as_mut_string().len() < start_position {
panic!(
"invalid length {} for target of length {}",
start_position,
target.as_mut_string().len()
);
}
Serializer {
target: Some(target),
start_position,
encoding: None,
}
}
/// Remove any existing name/value pair.
///
/// Panics if called after `.finish()`.
pub fn clear(&mut self) -> &mut Self {
string(&mut self.target).truncate(self.start_position);
self
}
/// Set the character encoding to be used for names and values before percent-encoding.
pub fn encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self {
self.encoding = new;
self
}
/// Serialize and append a name/value pair.
///
/// Panics if called after `.finish()`.
pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self {
append_pair(
string(&mut self.target),
self.start_position,
self.encoding,
name,
value,
);
self
}
/// Serialize and append a name of parameter without any value.
///
/// Panics if called after `.finish()`.
pub fn append_key_only(&mut self, name: &str) -> &mut Self {
append_key_only(
string(&mut self.target),
self.start_position,
self.encoding,
name,
);
self
}
/// Serialize and append a number of name/value pairs.
///
/// This simply calls `append_pair` repeatedly.
/// This can be more convenient, so the user doesn’t need to introduce a block
/// to limit the scope of `Serializer`’s borrow of its string.
///
/// Panics if called after `.finish()`.
pub fn extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self
where
I: IntoIterator,
I::Item: Borrow<(K, V)>,
K: AsRef<str>,
V: AsRef<str>,
{
{
let string = string(&mut self.target);
for pair in iter {
let (k, v) = pair.borrow();
append_pair(
string,
self.start_position,
self.encoding,
k.as_ref(),
v.as_ref(),
);
}
}
self
}
/// Serialize and append a number of names without values.
///
/// This simply calls `append_key_only` repeatedly.
/// This can be more convenient, so the user doesn’t need to introduce a block
/// to limit the scope of `Serializer`’s borrow of its string.
///
/// Panics if called after `.finish()`.
pub fn extend_keys_only<I, K>(&mut self, iter: I) -> &mut Self
where
I: IntoIterator,
I::Item: Borrow<K>,
K: AsRef<str>,
{
{
let string = string(&mut self.target);
for key in iter {
let k = key.borrow().as_ref();
append_key_only(string, self.start_position, self.encoding, k);
}
}
self
}
/// If this serializer was constructed with a string, take and return that string.
///
/// ```rust
/// use form_urlencoded;
/// let encoded: String = form_urlencoded::Serializer::new(String::new())
/// .append_pair("foo", "bar & baz")
/// .append_pair("saison", "Été+hiver")
/// .finish();
/// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver");
/// ```
///
/// Panics if called more than once.
pub fn finish(&mut self) -> T::Finished {
self.target
.take()
.expect("url::form_urlencoded::Serializer double finish")
.finish()
}
}
fn append_separator_if_needed(string: &mut String, start_position: usize) {
if string.len() > start_position {
string.push('&')
}
}
fn string<T: Target>(target: &mut Option<T>) -> &mut String {
target
.as_mut()
.expect("url::form_urlencoded::Serializer finished")
.as_mut_string()
}
fn append_pair(
string: &mut String,
start_position: usize,
encoding: EncodingOverride<'_>,
name: &str,
value: &str,
) {
append_separator_if_needed(string, start_position);
append_encoded(name, string, encoding);
string.push('=');
append_encoded(value, string, encoding);
}
fn append_key_only(
string: &mut String,
start_position: usize,
encoding: EncodingOverride,
name: &str,
) {
append_separator_if_needed(string, start_position);
append_encoded(name, string, encoding);
}
fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride<'_>) {
string.extend(byte_serialize(&encode(encoding, s)))
}
pub(crate) fn encode<'a>(encoding_override: EncodingOverride<'_>, input: &'a str) -> Cow<'a, [u8]> {
if let Some(o) = encoding_override {
return o(input);
}
input.as_bytes().into()
}
pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
// Note: This function is duplicated in `percent_encoding/lib.rs`.
match input {
Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
Cow::Owned(bytes) => {
match String::from_utf8_lossy(&bytes) {
Cow::Borrowed(utf8) => {
// If from_utf8_lossy returns a Cow::Borrowed, then we can
// be sure our original bytes were valid UTF-8. This is because
// if the bytes were invalid UTF-8 from_utf8_lossy would have
// to allocate a new owned string to back the Cow so it could
// replace invalid bytes with a placeholder.
// First we do a debug_assert to confirm our description above.
let raw_utf8: *const [u8] = utf8.as_bytes();
debug_assert!(raw_utf8 == &*bytes as *const [u8]);
// Given we know the original input bytes are valid UTF-8,
// and we have ownership of those bytes, we re-use them and
// return a Cow::Owned here.
Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
}
Cow::Owned(s) => Cow::Owned(s),
}
}
}
}
pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>;