rustc_lexer/
cursor.rs

1use std::str::Chars;
2
3pub enum FrontmatterAllowed {
4    Yes,
5    No,
6}
7
8/// Peekable iterator over a char sequence.
9///
10/// Next characters can be peeked via `first` method,
11/// and position can be shifted forward via `bump` method.
12pub struct Cursor<'a> {
13    len_remaining: usize,
14    /// Iterator over chars. Slightly faster than a &str.
15    chars: Chars<'a>,
16    pub(crate) frontmatter_allowed: FrontmatterAllowed,
17    #[cfg(debug_assertions)]
18    prev: char,
19}
20
21pub(crate) const EOF_CHAR: char = '\0';
22
23impl<'a> Cursor<'a> {
24    pub fn new(input: &'a str, frontmatter_allowed: FrontmatterAllowed) -> Cursor<'a> {
25        Cursor {
26            len_remaining: input.len(),
27            chars: input.chars(),
28            frontmatter_allowed,
29            #[cfg(debug_assertions)]
30            prev: EOF_CHAR,
31        }
32    }
33
34    pub fn as_str(&self) -> &'a str {
35        self.chars.as_str()
36    }
37
38    /// Returns the last eaten symbol (or `'\0'` in release builds).
39    /// (For debug assertions only.)
40    pub(crate) fn prev(&self) -> char {
41        #[cfg(debug_assertions)]
42        {
43            self.prev
44        }
45
46        #[cfg(not(debug_assertions))]
47        {
48            EOF_CHAR
49        }
50    }
51
52    /// Peeks the next symbol from the input stream without consuming it.
53    /// If requested position doesn't exist, `EOF_CHAR` is returned.
54    /// However, getting `EOF_CHAR` doesn't always mean actual end of file,
55    /// it should be checked with `is_eof` method.
56    pub fn first(&self) -> char {
57        // `.next()` optimizes better than `.nth(0)`
58        self.chars.clone().next().unwrap_or(EOF_CHAR)
59    }
60
61    /// Peeks the second symbol from the input stream without consuming it.
62    pub(crate) fn second(&self) -> char {
63        // `.next()` optimizes better than `.nth(1)`
64        let mut iter = self.chars.clone();
65        iter.next();
66        iter.next().unwrap_or(EOF_CHAR)
67    }
68
69    /// Peeks the third symbol from the input stream without consuming it.
70    pub fn third(&self) -> char {
71        // `.next()` optimizes better than `.nth(1)`
72        let mut iter = self.chars.clone();
73        iter.next();
74        iter.next();
75        iter.next().unwrap_or(EOF_CHAR)
76    }
77
78    /// Checks if there is nothing more to consume.
79    pub(crate) fn is_eof(&self) -> bool {
80        self.chars.as_str().is_empty()
81    }
82
83    /// Returns amount of already consumed symbols.
84    pub(crate) fn pos_within_token(&self) -> u32 {
85        (self.len_remaining - self.chars.as_str().len()) as u32
86    }
87
88    /// Resets the number of bytes consumed to 0.
89    pub(crate) fn reset_pos_within_token(&mut self) {
90        self.len_remaining = self.chars.as_str().len();
91    }
92
93    /// Moves to the next character.
94    pub(crate) fn bump(&mut self) -> Option<char> {
95        let c = self.chars.next()?;
96
97        #[cfg(debug_assertions)]
98        {
99            self.prev = c;
100        }
101
102        Some(c)
103    }
104
105    /// Moves to a substring by a number of bytes.
106    pub(crate) fn bump_bytes(&mut self, n: usize) {
107        self.chars = self.as_str()[n..].chars();
108    }
109
110    /// Eats symbols while predicate returns true or until the end of file is reached.
111    pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
112        // It was tried making optimized version of this for eg. line comments, but
113        // LLVM can inline all of this and compile it down to fast iteration over bytes.
114        while predicate(self.first()) && !self.is_eof() {
115            self.bump();
116        }
117    }
118
119    pub(crate) fn eat_until(&mut self, byte: u8) {
120        self.chars = match memchr::memchr(byte, self.as_str().as_bytes()) {
121            Some(index) => self.as_str()[index..].chars(),
122            None => "".chars(),
123        }
124    }
125}