mz_ore/
lex.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License in the LICENSE file at the
6// root of this repository, or online at
7//
8//     http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16//! Lexing utilities.
17
18/// A cursor over a string with a variety of lexing convenience methods.
19#[derive(Debug)]
20pub struct LexBuf<'a> {
21    buf: &'a str,
22    pos: usize,
23}
24
25impl<'a> LexBuf<'a> {
26    /// Creates a new lexical buffer.
27    ///
28    /// The internal cursor is initialized to point at the start of `buf`.
29    pub fn new(buf: &'a str) -> LexBuf<'a> {
30        LexBuf { buf, pos: 0 }
31    }
32
33    /// Returns the next character in the buffer, if any, without advancing the
34    /// internal cursor.
35    pub fn peek(&self) -> Option<char> {
36        self.buf[self.pos..].chars().next()
37    }
38
39    /// Returns a slice containing the next `n` characters in the buffer.
40    ///
41    /// Returns `None` if there are not at least `n` characters remaining.
42    /// Advances the internal cursor `n` characters, or to the end of the buffer
43    /// if there are less than `n` characters remaining.
44    pub fn next_n(&mut self, n: usize) -> Option<&'a str> {
45        let start = self.pos;
46        for _ in 0..n {
47            self.next()?;
48        }
49        Some(&self.buf[start..self.pos])
50    }
51
52    /// Returns the previous character in the buffer, positioning the internal
53    /// cursor to before the character.
54    ///
55    /// The next call to `LexBuf::next` will return the same character.
56    ///
57    /// # Panics
58    ///
59    /// Panics if `prev` is called when the internal cursor is positioned at
60    /// the beginning of the buffer.
61    pub fn prev(&mut self) -> char {
62        if let Some(c) = self.buf[..self.pos].chars().rev().next() {
63            self.pos -= c.len_utf8();
64            c
65        } else {
66            panic!("LexBuf::prev called on buffer at position 0")
67        }
68    }
69
70    /// Advances the internal cursor past the next character in the buffer if
71    /// the character is `ch`.
72    ///
73    /// Returns whether the cursor advanced.
74    pub fn consume(&mut self, ch: char) -> bool {
75        if self.peek() == Some(ch) {
76            self.next();
77            true
78        } else {
79            false
80        }
81    }
82
83    /// Advances the internal cursor past `s` if it exactly matches the next
84    /// characters in the buffer.
85    ///
86    /// Returns whether the cursor advanced.
87    pub fn consume_str(&mut self, s: &str) -> bool {
88        if self.buf[self.pos..].starts_with(s) {
89            self.pos += s.len();
90            true
91        } else {
92            false
93        }
94    }
95
96    /// Searches the buffer for `delim`, returning the string from the current
97    /// cursor position to the start of `delim`.
98    ///
99    /// Returns `None` if `delim` is not found in the buffer. The internal
100    /// cursor is advanced past the end of `delim`, or to the end of the buffer
101    /// if `delim` is not found.
102    pub fn take_to_delimiter(&mut self, delim: &str) -> Option<&'a str> {
103        if let Some(pos) = self.buf[self.pos..].find(delim) {
104            let s = &self.buf[self.pos..self.pos + pos];
105            self.pos += pos + delim.len();
106            Some(s)
107        } else {
108            self.pos = self.buf.len();
109            None
110        }
111    }
112
113    /// Searches the remaining buffer for the first character to fail to satisfy
114    /// `predicate`, returning the string from the current cursor position to
115    /// the failing character.
116    ///
117    /// Advances the cursor to the character that failed the predicate, or to
118    /// the end of the string if no character failed the predicate.
119    pub fn take_while<P>(&mut self, mut predicate: P) -> &'a str
120    where
121        P: FnMut(char) -> bool,
122    {
123        let pos = self.pos;
124        while let Some(ch) = self.peek() {
125            if predicate(ch) {
126                self.next();
127            } else {
128                break;
129            }
130        }
131        &self.buf[pos..self.pos]
132    }
133
134    /// Reports the current position of the cursor in the buffer.
135    pub fn pos(&self) -> usize {
136        self.pos
137    }
138
139    /// Returns the string that the lexical buffer wraps.
140    ///
141    /// Note that the entire string is returned, regardless of the position of
142    /// the buffer's internal cursor.
143    pub fn inner(&self) -> &'a str {
144        self.buf
145    }
146}
147
148impl<'a> Iterator for LexBuf<'a> {
149    type Item = char;
150
151    /// Returns the next character in the buffer, if any, advancing the internal
152    /// cursor past the character.
153    ///
154    /// It is safe to call `next` after it returns `None`.
155    fn next(&mut self) -> Option<char> {
156        let c = self.peek();
157        if let Some(c) = c {
158            self.pos += c.len_utf8();
159        }
160        c
161    }
162}