files.rs - mozsearch

mozilla-central/third_party/rust/codespan-reporting/src/files.rs (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Firefox Build System :: General

Revision control

Copy as Markdown

Other Tools

//! Source file support for diagnostic reporting.

//!

//! The main trait defined in this module is the [`Files`] trait, which provides

//! provides the minimum amount of functionality required for printing [`Diagnostics`]

//! with the [`term::emit`] function.

//!

//! Simple implementations of this trait are implemented:

//!

//! - [`SimpleFile`]: For single-file use-cases

//! - [`SimpleFiles`]: For multi-file use-cases

//!

//! These data structures provide a pretty minimal API, however,

//! so end-users are encouraged to create their own implementations for their

//! own specific use-cases, such as an implementation that accesses the file

//! system directly (and caches the line start locations), or an implementation

//! using an incremental compilation library like [`salsa`].

//!

//! [`term::emit`]: crate::term::emit

//! [`Diagnostics`]: crate::diagnostic::Diagnostic

//! [`Files`]: Files

//! [`SimpleFile`]: SimpleFile

//! [`SimpleFiles`]: SimpleFiles

//!

//! [`salsa`]: https://crates.io/crates/salsa

use std::ops::Range;

/// An enum representing an error that happened while looking up a file or a piece of content in that file.

#[derive(Debug)]

#[non_exhaustive]

pub enum Error {

    /// A required file is not in the file database.

    FileMissing,

    /// The file is present, but does not contain the specified byte index.

    IndexTooLarge { given: usize, max: usize },

    /// The file is present, but does not contain the specified line index.

    LineTooLarge { given: usize, max: usize },

    /// The file is present and contains the specified line index, but the line does not contain the specified column index.

    ColumnTooLarge { given: usize, max: usize },

    /// The given index is contained in the file, but is not a boundary of a UTF-8 code point.

    InvalidCharBoundary { given: usize },

    /// There was a error while doing IO.

    Io(std::io::Error),

impl From<std::io::Error> for Error {

    fn from(err: std::io::Error) -> Error {

        Error::Io(err)

impl std::fmt::Display for Error {

    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {

        match self {

            Error::FileMissing => write!(f, "file missing"),

            Error::IndexTooLarge { given, max } => {

                write!(f, "invalid index {}, maximum index is {}", given, max)

            Error::LineTooLarge { given, max } => {

                write!(f, "invalid line {}, maximum line is {}", given, max)

            Error::ColumnTooLarge { given, max } => {

                write!(f, "invalid column {}, maximum column {}", given, max)

            Error::InvalidCharBoundary { .. } => write!(f, "index is not a code point boundary"),

            Error::Io(err) => write!(f, "{}", err),

impl std::error::Error for Error {

    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {

        match &self {

            Error::Io(err) => Some(err),

            _ => None,

/// A minimal interface for accessing source files when rendering diagnostics.

///

/// A lifetime parameter `'a` is provided to allow any of the returned values to returned by reference.

/// This is to workaround the lack of higher kinded lifetime parameters.

/// This can be ignored if this is not needed, however.

pub trait Files<'a> {

    /// A unique identifier for files in the file provider. This will be used

    /// for rendering `diagnostic::Label`s in the corresponding source files.

    type FileId: 'a + Copy + PartialEq;

    /// The user-facing name of a file, to be displayed in diagnostics.

    type Name: 'a + std::fmt::Display;

    /// The source code of a file.

    type Source: 'a + AsRef<str>;

    /// The user-facing name of a file.

    fn name(&'a self, id: Self::FileId) -> Result<Self::Name, Error>;

    /// The source code of a file.

    fn source(&'a self, id: Self::FileId) -> Result<Self::Source, Error>;

    /// The index of the line at the given byte index.

    /// If the byte index is past the end of the file, returns the maximum line index in the file.

    /// This means that this function only fails if the file is not present.

///

    /// # Note for trait implementors

///

    /// This can be implemented efficiently by performing a binary search over

    /// a list of line starts that was computed by calling the [`line_starts`]

    /// function that is exported from the [`files`] module. It might be useful

    /// to pre-compute and cache these line starts.

///

    /// [`line_starts`]: crate::files::line_starts

    /// [`files`]: crate::files

    fn line_index(&'a self, id: Self::FileId, byte_index: usize) -> Result<usize, Error>;

    /// The user-facing line number at the given line index.

    /// It is not necessarily checked that the specified line index

    /// is actually in the file.

///

    /// # Note for trait implementors

///

    /// This is usually 1-indexed from the beginning of the file, but

    /// can be useful for implementing something like the

    /// [C preprocessor's `#line` macro][line-macro].

///

    /// [line-macro]: https://en.cppreference.com/w/c/preprocessor/line

    #[allow(unused_variables)]

    fn line_number(&'a self, id: Self::FileId, line_index: usize) -> Result<usize, Error> {

        Ok(line_index + 1)

    /// The user-facing column number at the given line index and byte index.

///

    /// # Note for trait implementors

///

    /// This is usually 1-indexed from the the start of the line.

    /// A default implementation is provided, based on the [`column_index`]

    /// function that is exported from the [`files`] module.

///

    /// [`files`]: crate::files

    /// [`column_index`]: crate::files::column_index

    fn column_number(

        &'a self,

        id: Self::FileId,

        line_index: usize,

        byte_index: usize,

    ) -> Result<usize, Error> {

        let source = self.source(id)?;

        let line_range = self.line_range(id, line_index)?;

        let column_index = column_index(source.as_ref(), line_range, byte_index);

        Ok(column_index + 1)

    /// Convenience method for returning line and column number at the given

    /// byte index in the file.

    fn location(&'a self, id: Self::FileId, byte_index: usize) -> Result<Location, Error> {

        let line_index = self.line_index(id, byte_index)?;

        Ok(Location {

            line_number: self.line_number(id, line_index)?,

            column_number: self.column_number(id, line_index, byte_index)?,

})

    /// The byte range of line in the source of the file.

    fn line_range(&'a self, id: Self::FileId, line_index: usize) -> Result<Range<usize>, Error>;

/// A user-facing location in a source file.

///

/// Returned by [`Files::location`].

///

/// [`Files::location`]: Files::location

#[derive(Debug, Copy, Clone, PartialEq, Eq)]

pub struct Location {

    /// The user-facing line number.

    pub line_number: usize,

    /// The user-facing column number.

    pub column_number: usize,

/// The column index at the given byte index in the source file.

/// This is the number of characters to the given byte index.

///

/// If the byte index is smaller than the start of the line, then `0` is returned.

/// If the byte index is past the end of the line, the column index of the last

/// character `+ 1` is returned.

///

/// # Example

///

/// ```rust

/// use codespan_reporting::files;

///

/// let source = "\n\n🗻∈🌏\n\n";

///

/// assert_eq!(files::column_index(source, 0..1, 0), 0);

/// assert_eq!(files::column_index(source, 2..13, 0), 0);

/// assert_eq!(files::column_index(source, 2..13, 2 + 0), 0);

/// assert_eq!(files::column_index(source, 2..13, 2 + 1), 0);

/// assert_eq!(files::column_index(source, 2..13, 2 + 4), 1);

/// assert_eq!(files::column_index(source, 2..13, 2 + 8), 2);

/// assert_eq!(files::column_index(source, 2..13, 2 + 10), 2);

/// assert_eq!(files::column_index(source, 2..13, 2 + 11), 3);

/// assert_eq!(files::column_index(source, 2..13, 2 + 12), 3);

/// ```

pub fn column_index(source: &str, line_range: Range<usize>, byte_index: usize) -> usize {

    let end_index = std::cmp::min(byte_index, std::cmp::min(line_range.end, source.len()));

    (line_range.start..end_index)

        .filter(|byte_index| source.is_char_boundary(byte_index + 1))

        .count()

/// Return the starting byte index of each line in the source string.

///

/// This can make it easier to implement [`Files::line_index`] by allowing

/// implementors of [`Files`] to pre-compute the line starts, then search for

/// the corresponding line range, as shown in the example below.

///

/// [`Files`]: Files

/// [`Files::line_index`]: Files::line_index

///

/// # Example

///

/// ```rust

/// use codespan_reporting::files;

///

/// let source = "foo\nbar\r\n\nbaz";

/// let line_starts: Vec<_> = files::line_starts(source).collect();

///

/// assert_eq!(

///     line_starts,

///     [

///         0,  // "foo\n"

///         4,  // "bar\r\n"

///         9,  // ""

///         10, // "baz"

///     ],

/// );

///

/// fn line_index(line_starts: &[usize], byte_index: usize) -> Option<usize> {

///     match line_starts.binary_search(&byte_index) {

///         Ok(line) => Some(line),

///         Err(next_line) => Some(next_line - 1),

///     }

/// }

///

/// assert_eq!(line_index(&line_starts, 5), Some(1));

/// ```

// NOTE: this is copied in `codespan::file::line_starts` and should be kept in sync.

pub fn line_starts<'source>(source: &'source str) -> impl 'source + Iterator<Item = usize> {

    std::iter::once(0).chain(source.match_indices('\n').map(|(i, _)| i + 1))

/// A file database that contains a single source file.

///

/// Because there is only single file in this database we use `()` as a [`FileId`].

///

/// This is useful for simple language tests, but it might be worth creating a

/// custom implementation when a language scales beyond a certain size.

///

/// [`FileId`]: Files::FileId

#[derive(Debug, Clone)]

pub struct SimpleFile<Name, Source> {

    /// The name of the file.

    name: Name,

    /// The source code of the file.

    source: Source,

    /// The starting byte indices in the source code.

    line_starts: Vec<usize>,

impl<Name, Source> SimpleFile<Name, Source>

where

    Name: std::fmt::Display,

    Source: AsRef<str>,

    /// Create a new source file.

    pub fn new(name: Name, source: Source) -> SimpleFile<Name, Source> {

        SimpleFile {

            name,

            line_starts: line_starts(source.as_ref()).collect(),

            source,

    /// Return the name of the file.

    pub fn name(&self) -> &Name {

        &self.name

    /// Return the source of the file.

    pub fn source(&self) -> &Source {

        &self.source

    /// Return the starting byte index of the line with the specified line index.

    /// Convenience method that already generates errors if necessary.

    fn line_start(&self, line_index: usize) -> Result<usize, Error> {

        use std::cmp::Ordering;

        match line_index.cmp(&self.line_starts.len()) {

            Ordering::Less => Ok(self

                .line_starts

                .get(line_index)

                .cloned()

                .expect("failed despite previous check")),

            Ordering::Equal => Ok(self.source.as_ref().len()),

            Ordering::Greater => Err(Error::LineTooLarge {

                given: line_index,

                max: self.line_starts.len() - 1,

}),

impl<'a, Name, Source> Files<'a> for SimpleFile<Name, Source>

where

    Name: 'a + std::fmt::Display + Clone,

    Source: 'a + AsRef<str>,

    type FileId = ();

    type Name = Name;

    type Source = &'a str;

    fn name(&self, (): ()) -> Result<Name, Error> {

        Ok(self.name.clone())

    fn source(&self, (): ()) -> Result<&str, Error> {

        Ok(self.source.as_ref())

    fn line_index(&self, (): (), byte_index: usize) -> Result<usize, Error> {

        Ok(self

            .line_starts

            .binary_search(&byte_index)

            .unwrap_or_else(|next_line| next_line - 1))

    fn line_range(&self, (): (), line_index: usize) -> Result<Range<usize>, Error> {

        let line_start = self.line_start(line_index)?;

        let next_line_start = self.line_start(line_index + 1)?;

        Ok(line_start..next_line_start)

/// A file database that can store multiple source files.

///

/// This is useful for simple language tests, but it might be worth creating a

/// custom implementation when a language scales beyond a certain size.

/// It is a glorified `Vec<SimpleFile>` that implements the `Files` trait.

#[derive(Debug, Clone)]

pub struct SimpleFiles<Name, Source> {

    files: Vec<SimpleFile<Name, Source>>,

impl<Name, Source> SimpleFiles<Name, Source>

where

    Name: std::fmt::Display,

    Source: AsRef<str>,

    /// Create a new files database.

    pub fn new() -> SimpleFiles<Name, Source> {

        SimpleFiles { files: Vec::new() }

    /// Add a file to the database, returning the handle that can be used to

    /// refer to it again.

    pub fn add(&mut self, name: Name, source: Source) -> usize {

        let file_id = self.files.len();

        self.files.push(SimpleFile::new(name, source));

        file_id

    /// Get the file corresponding to the given id.

    pub fn get(&self, file_id: usize) -> Result<&SimpleFile<Name, Source>, Error> {

        self.files.get(file_id).ok_or(Error::FileMissing)

impl<'a, Name, Source> Files<'a> for SimpleFiles<Name, Source>

where

    Name: 'a + std::fmt::Display + Clone,

    Source: 'a + AsRef<str>,

    type FileId = usize;

    type Name = Name;

    type Source = &'a str;

    fn name(&self, file_id: usize) -> Result<Name, Error> {

        Ok(self.get(file_id)?.name().clone())

    fn source(&self, file_id: usize) -> Result<&str, Error> {

        Ok(self.get(file_id)?.source().as_ref())

    fn line_index(&self, file_id: usize, byte_index: usize) -> Result<usize, Error> {

        self.get(file_id)?.line_index((), byte_index)

    fn line_range(&self, file_id: usize, line_index: usize) -> Result<Range<usize>, Error> {

        self.get(file_id)?.line_range((), line_index)

#[cfg(test)]

mod test {

    use super::*;

    const TEST_SOURCE: &str = "foo\nbar\r\n\nbaz";

    #[test]

    fn line_starts() {

        let file = SimpleFile::new("test", TEST_SOURCE);

        assert_eq!(

            file.line_starts,

                0,  // "foo\n"

                4,  // "bar\r\n"

                9,  // ""

                10, // "baz"

],

);

    #[test]

    fn line_span_sources() {

        let file = SimpleFile::new("test", TEST_SOURCE);

        let line_sources = (0..4)

            .map(|line| {

                let line_range = file.line_range((), line).unwrap();

                &file.source[line_range]

})

            .collect::<Vec<_>>();

        assert_eq!(line_sources, ["foo\n", "bar\r\n", "\n", "baz"]);