rustc_lexer/
cursor.rs

1use std::str::Chars;
2
3/// Peekable iterator over a char sequence.
4///
5/// Next characters can be peeked via `first` method,
6/// and position can be shifted forward via `bump` method.
7pub struct Cursor<'a> {
8    len_remaining: usize,
9    /// Iterator over chars. Slightly faster than a &str.
10    chars: Chars<'a>,
11    #[cfg(debug_assertions)]
12    prev: char,
13}
14
15pub(crate) const EOF_CHAR: char = '\0';
16
17impl<'a> Cursor<'a> {
18    pub fn new(input: &'a str) -> Cursor<'a> {
19        Cursor {
20            len_remaining: input.len(),
21            chars: input.chars(),
22            #[cfg(debug_assertions)]
23            prev: EOF_CHAR,
24        }
25    }
26
27    pub fn as_str(&self) -> &'a str {
28        self.chars.as_str()
29    }
30
31    /// Returns the last eaten symbol (or `'\0'` in release builds).
32    /// (For debug assertions only.)
33    pub(crate) fn prev(&self) -> char {
34        #[cfg(debug_assertions)]
35        {
36            self.prev
37        }
38
39        #[cfg(not(debug_assertions))]
40        {
41            EOF_CHAR
42        }
43    }
44
45    /// Peeks the next symbol from the input stream without consuming it.
46    /// If requested position doesn't exist, `EOF_CHAR` is returned.
47    /// However, getting `EOF_CHAR` doesn't always mean actual end of file,
48    /// it should be checked with `is_eof` method.
49    pub fn first(&self) -> char {
50        // `.next()` optimizes better than `.nth(0)`
51        self.chars.clone().next().unwrap_or(EOF_CHAR)
52    }
53
54    /// Peeks the second symbol from the input stream without consuming it.
55    pub(crate) fn second(&self) -> char {
56        // `.next()` optimizes better than `.nth(1)`
57        let mut iter = self.chars.clone();
58        iter.next();
59        iter.next().unwrap_or(EOF_CHAR)
60    }
61
62    /// Peeks the third symbol from the input stream without consuming it.
63    pub fn third(&self) -> char {
64        // `.next()` optimizes better than `.nth(1)`
65        let mut iter = self.chars.clone();
66        iter.next();
67        iter.next();
68        iter.next().unwrap_or(EOF_CHAR)
69    }
70
71    /// Checks if there is nothing more to consume.
72    pub(crate) fn is_eof(&self) -> bool {
73        self.chars.as_str().is_empty()
74    }
75
76    /// Returns amount of already consumed symbols.
77    pub(crate) fn pos_within_token(&self) -> u32 {
78        (self.len_remaining - self.chars.as_str().len()) as u32
79    }
80
81    /// Resets the number of bytes consumed to 0.
82    pub(crate) fn reset_pos_within_token(&mut self) {
83        self.len_remaining = self.chars.as_str().len();
84    }
85
86    /// Moves to the next character.
87    pub(crate) fn bump(&mut self) -> Option<char> {
88        let c = self.chars.next()?;
89
90        #[cfg(debug_assertions)]
91        {
92            self.prev = c;
93        }
94
95        Some(c)
96    }
97
98    /// Eats symbols while predicate returns true or until the end of file is reached.
99    pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
100        // It was tried making optimized version of this for eg. line comments, but
101        // LLVM can inline all of this and compile it down to fast iteration over bytes.
102        while predicate(self.first()) && !self.is_eof() {
103            self.bump();
104        }
105    }
106
107    pub(crate) fn eat_until(&mut self, byte: u8) {
108        self.chars = match memchr::memchr(byte, self.as_str().as_bytes()) {
109            Some(index) => self.as_str()[index..].chars(),
110            None => "".chars(),
111        }
112    }
113}