lib.rs - mozsearch

comm-central/third_party/rust/litrs/src/lib.rs

Enable keyboard shortcuts

Revision control

Copy as Markdown

Other Tools

HG Web

//! Parsing and inspecting Rust literal tokens.

//!

//! This library offers functionality to parse Rust literals, i.e. tokens in the

//! Rust programming language that represent fixed values. The grammar for

//! those is defined [here][ref].

//!

//! This kind of functionality already exists in the crate `syn`. However, as

//! you oftentimes don't need (nor want) the full power of `syn`, `litrs` was

//! built. This crate also offers a bit more flexibility compared to `syn`

//! (only regarding literals, of course).

//!

//!

//! # Quick start

//!

//! | **`StringLit::try_from(tt)?.value()`** |

//! | - |

//!

//! ... where `tt` is a `proc_macro::TokenTree` and where [`StringLit`] can be

//! replaced with [`Literal`] or other types of literals (e.g. [`FloatLit`]).

//! Calling `value()` returns the value that is represented by the literal.

//!

//! **Mini Example**

//!

//! ```ignore

//! use proc_macro::TokenStream;

//!

//! #[proc_macro]

//! pub fn foo(input: TokenStream) -> TokenStream {

//!      let first_token = input.into_iter().next().unwrap(); // Do proper error handling!

//!      let string_value = match litrs::StringLit::try_from(first_token) {

//!          Ok(string_lit) => string_lit.value(),

//!          Err(e) => return e.to_compile_error(),

//!      };

//!

//!      // `string_value` is the string value with all escapes resolved.

//!      todo!()

//! }

//! ```

//!

//! # Overview

//!

//! The main types of this library are [`Literal`], representing any kind of

//! literal, and `*Lit`, like [`StringLit`] or [`FloatLit`], representing a

//! specific kind of literal.

//!

//! There are different ways to obtain such a literal type:

//!

//! - **`parse`**: parses a `&str` or `String` and returns `Result<_,

//!     ParseError>`. For example: [`Literal::parse`] and

//!     [`IntegerLit::parse`].

//!

//! - **`From<proc_macro::Literal> for Literal`**: turns a `Literal` value from

//!     the `proc_macro` crate into a `Literal` from this crate.

//!

//! - **`TryFrom<proc_macro::Literal> for *Lit`**: tries to turn a

//!     `proc_macro::Literal` into a specific literal type of this crate. If

//!     the input is a literal of a different kind, `Err(InvalidToken)` is

//!     returned.

//!

//! - **`TryFrom<proc_macro::TokenTree>`**: attempts to turn a token tree into a

//!     literal type of this crate. An error is returned if the token tree is

//!     not a literal, or if you are trying to turn it into a specific kind of

//!     literal and the token tree is a different kind of literal.

//!

//! All of the `From` and `TryFrom` conversions also work for reference to

//! `proc_macro` types. Additionally, if the crate feature `proc-macro2` is

//! enabled (which it is by default), all these `From` and `TryFrom` impls also

//! exist for the corresponding `proc_macro2` types.

//!

//! **Note**: `true` and `false` are `Ident`s when passed to your proc macro.

//! The `TryFrom<TokenTree>` impls check for those two special idents and

//! return a [`BoolLit`] appropriately. For that reason, there is also no

//! `TryFrom<proc_macro::Literal>` impl for [`BoolLit`]. The `proc_macro::Literal`

//! simply cannot represent bool literals.

//!

//!

//! # Examples

//!

//! In a proc-macro:

//!

//! ```ignore

//! use std::convert::TryFrom;

//! use proc_macro::TokenStream;

//! use litrs::FloatLit;

//!

//! #[proc_macro]

//! pub fn foo(input: TokenStream) -> TokenStream {

//!      let mut input = input.into_iter().collect::<Vec<_>>();

//!      if input.len() != 1 {

//!          // Please do proper error handling in your real code!

//!          panic!("expected exactly one token as input");

//!      }

//!      let token = input.remove(0);

//!

//!      match FloatLit::try_from(token) {

//!          Ok(float_lit) => { /* do something */ }

//!          Err(e) => return e.to_compile_error(),

//!      }

//!

//!      // Dummy output

//!      TokenStream::new()

//! }

//! ```

//!

//! Parsing from string:

//!

//! ```

//! use litrs::{FloatLit, Literal};

//!

//! // Parse a specific kind of literal (float in this case):

//! let float_lit = FloatLit::parse("3.14f32");

//! assert!(float_lit.is_ok());

//! assert_eq!(float_lit.unwrap().suffix(), "f32");

//! assert!(FloatLit::parse("'c'").is_err());

//!

//! // Parse any kind of literal. After parsing, you can inspect the literal

//! // and decide what to do in each case.

//! let lit = Literal::parse("0xff80").expect("failed to parse literal");

//! match lit {

//!     Literal::Integer(lit) => { /* ... */ }

//!     Literal::Float(lit) => { /* ... */ }

//!     Literal::Bool(lit) => { /* ... */ }

//!     Literal::Char(lit) => { /* ... */ }

//!     Literal::String(lit) => { /* ... */ }

//!     Literal::Byte(lit) => { /* ... */ }

//!     Literal::ByteString(lit) => { /* ... */ }

//! }

//! ```

//!

//!

//!

//! # Crate features

//!

//! - `proc-macro2` (**default**): adds the dependency `proc_macro2`, a bunch of

//!   `From` and `TryFrom` impls, and [`InvalidToken::to_compile_error2`].

//! - `check_suffix`: if enabled, `parse` functions will exactly verify that the

//!   literal suffix is valid. Adds the dependency `unicode-xid`. If disabled,

//!   only an approximate check (only in ASCII range) is done. If you are

//!   writing a proc macro, you don't need to enable this as the suffix is

//!   already checked by the compiler.

//!

//!

//! [ref]: https://doc.rust-lang.org/reference/tokens.html#literals

//!

#![deny(missing_debug_implementations)]

extern crate proc_macro;

#[cfg(test)]

#[macro_use]

mod test_util;

#[cfg(test)]

mod tests;

mod bool;

mod byte;

mod bytestr;

mod char;

mod err;

mod escape;

mod float;

mod impls;

mod integer;

mod parse;

mod string;

use std::{borrow::{Borrow, Cow}, fmt, ops::{Deref, Range}};

pub use self::{

    bool::BoolLit,

    byte::ByteLit,

    bytestr::ByteStringLit,

    char::CharLit,

    err::{InvalidToken, ParseError},

    float::{FloatLit, FloatType},

    integer::{FromIntegerLiteral, IntegerLit, IntegerBase, IntegerType},

    string::StringLit,

};

// ==============================================================================================

// ===== `Literal` and type defs

// ==============================================================================================

/// A literal. This is the main type of this library.

///

/// This type is generic over the underlying buffer `B`, which can be `&str` or

/// `String`.

///

/// To create this type, you have to either call [`Literal::parse`] with an

/// input string or use the `From<_>` impls of this type. The impls are only

/// available of the corresponding crate features are enabled (they are enabled

/// by default).

#[derive(Debug, Clone, PartialEq, Eq)]

pub enum Literal<B: Buffer> {

    Bool(BoolLit),

    Integer(IntegerLit<B>),

    Float(FloatLit<B>),

    Char(CharLit<B>),

    String(StringLit<B>),

    Byte(ByteLit<B>),

    ByteString(ByteStringLit<B>),

impl<B: Buffer> Literal<B> {

    /// Parses the given input as a Rust literal.

    pub fn parse(input: B) -> Result<Self, ParseError> {

        parse::parse(input)

    /// Returns the suffix of this literal or `""` if it doesn't have one.

///

    /// Rust token grammar actually allows suffixes for all kinds of tokens.

    /// Most Rust programmer only know the type suffixes for integer and

    /// floats, e.g. `0u32`. And in normal Rust code, everything else causes an

    /// error. But it is possible to pass literals with arbitrary suffixes to

    /// proc macros, for example:

///

    /// ```ignore

    /// some_macro!(3.14f33  16px  '🦊'good_boy  "toph"beifong);

    /// ```

///

    /// Boolean literals, not actually being literals, but idents, cannot have

    /// suffixes and this method always returns `""` for those.

///

    /// There are some edge cases to be aware of:

    /// - Integer suffixes must not start with `e` or `E` as that conflicts with

    ///   the exponent grammar for floats. `0e1` is a float; `0eel` is also

    ///   parsed as a float and results in an error.

    /// - Hexadecimal integers eagerly parse digits, so `0x5abcdefgh` has a

    ///   suffix von `gh`.

    /// - Suffixes can contain and start with `_`, but for integer and number

    ///   literals, `_` is eagerly parsed as part of the number, so `1_x` has

    ///   the suffix `x`.

    /// - The input `55f32` is regarded as integer literal with suffix `f32`.

///

    /// # Example

///

    /// ```

    /// use litrs::Literal;

///

    /// assert_eq!(Literal::parse(r##"3.14f33"##).unwrap().suffix(), "f33");

    /// assert_eq!(Literal::parse(r##"123hackerman"##).unwrap().suffix(), "hackerman");

    /// assert_eq!(Literal::parse(r##"0x0fuck"##).unwrap().suffix(), "uck");

    /// assert_eq!(Literal::parse(r##"'🦊'good_boy"##).unwrap().suffix(), "good_boy");

    /// assert_eq!(Literal::parse(r##""toph"beifong"##).unwrap().suffix(), "beifong");

    /// ```

    pub fn suffix(&self) -> &str {

        match self {

            Literal::Bool(_) => "",

            Literal::Integer(l) => l.suffix(),

            Literal::Float(l) => l.suffix(),

            Literal::Char(l) => l.suffix(),

            Literal::String(l) => l.suffix(),

            Literal::Byte(l) => l.suffix(),

            Literal::ByteString(l) => l.suffix(),

impl Literal<&str> {

    /// Makes a copy of the underlying buffer and returns the owned version of

    /// `Self`.

    pub fn into_owned(self) -> Literal<String> {

        match self {

            Literal::Bool(l) => Literal::Bool(l.to_owned()),

            Literal::Integer(l) => Literal::Integer(l.to_owned()),

            Literal::Float(l) => Literal::Float(l.to_owned()),

            Literal::Char(l) => Literal::Char(l.to_owned()),

            Literal::String(l) => Literal::String(l.into_owned()),

            Literal::Byte(l) => Literal::Byte(l.to_owned()),

            Literal::ByteString(l) => Literal::ByteString(l.into_owned()),

impl<B: Buffer> fmt::Display for Literal<B> {

    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {

        match self {

            Literal::Bool(l) => l.fmt(f),

            Literal::Integer(l) => l.fmt(f),

            Literal::Float(l) => l.fmt(f),

            Literal::Char(l) => l.fmt(f),

            Literal::String(l) => l.fmt(f),

            Literal::Byte(l) => l.fmt(f),

            Literal::ByteString(l) => l.fmt(f),

// ==============================================================================================

// ===== Buffer

// ==============================================================================================

/// A shared or owned string buffer. Implemented for `String` and `&str`. *Implementation detail*.

///

/// This is trait is implementation detail of this library, cannot be

/// implemented in other crates and is not subject to semantic versioning.

/// `litrs` only guarantees that this trait is implemented for `String` and

/// `for<'a> &'a str`.

pub trait Buffer: sealed::Sealed + Deref<Target = str> {

    /// This is `Cow<'static, str>` for `String`, and `Cow<'a, str>` for `&'a str`.

    type Cow: From<String> + AsRef<str> + Borrow<str> + Deref<Target = str>;

    #[doc(hidden)]

    fn into_cow(self) -> Self::Cow;

    /// This is `Cow<'static, [u8]>` for `String`, and `Cow<'a, [u8]>` for `&'a str`.

    type ByteCow: From<Vec<u8>> + AsRef<[u8]> + Borrow<[u8]> + Deref<Target = [u8]>;

    #[doc(hidden)]

    fn into_byte_cow(self) -> Self::ByteCow;

    /// Cuts away some characters at the beginning and some at the end. Given

    /// range has to be in bounds.

    #[doc(hidden)]

    fn cut(self, range: Range<usize>) -> Self;

mod sealed {

    pub trait Sealed {}

impl<'a> sealed::Sealed for &'a str {}

impl<'a> Buffer for &'a str {

    #[doc(hidden)]

    fn cut(self, range: Range<usize>) -> Self {

        &self[range]

    type Cow = Cow<'a, str>;

    #[doc(hidden)]

    fn into_cow(self) -> Self::Cow {

        self.into()

    type ByteCow = Cow<'a, [u8]>;

    #[doc(hidden)]

    fn into_byte_cow(self) -> Self::ByteCow {

        self.as_bytes().into()

impl sealed::Sealed for String {}

impl Buffer for String {

    #[doc(hidden)]

    fn cut(mut self, range: Range<usize>) -> Self {

        // This is not the most efficient way, but it works. First we cut the

        // end, then the beginning. Note that `drain` also removes the range if

        // the iterator is not consumed.

        self.truncate(range.end);

        self.drain(..range.start);

        self

    type Cow = Cow<'static, str>;

    #[doc(hidden)]

    fn into_cow(self) -> Self::Cow {

        self.into()

    type ByteCow = Cow<'static, [u8]>;

    #[doc(hidden)]

    fn into_byte_cow(self) -> Self::ByteCow {

        self.into_bytes().into()