mz_ore/lex.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License in the LICENSE file at the
6// root of this repository, or online at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16//! Lexing utilities.
17
18/// A cursor over a string with a variety of lexing convenience methods.
19#[derive(Debug)]
20pub struct LexBuf<'a> {
21 buf: &'a str,
22 pos: usize,
23}
24
25impl<'a> LexBuf<'a> {
26 /// Creates a new lexical buffer.
27 ///
28 /// The internal cursor is initialized to point at the start of `buf`.
29 pub fn new(buf: &'a str) -> LexBuf<'a> {
30 LexBuf { buf, pos: 0 }
31 }
32
33 /// Returns the next character in the buffer, if any, without advancing the
34 /// internal cursor.
35 pub fn peek(&self) -> Option<char> {
36 self.buf[self.pos..].chars().next()
37 }
38
39 /// Returns a slice containing the next `n` characters in the buffer.
40 ///
41 /// Returns `None` if there are not at least `n` characters remaining.
42 /// Advances the internal cursor `n` characters, or to the end of the buffer
43 /// if there are less than `n` characters remaining.
44 pub fn next_n(&mut self, n: usize) -> Option<&'a str> {
45 let start = self.pos;
46 for _ in 0..n {
47 self.next()?;
48 }
49 Some(&self.buf[start..self.pos])
50 }
51
52 /// Returns the previous character in the buffer, positioning the internal
53 /// cursor to before the character.
54 ///
55 /// The next call to `LexBuf::next` will return the same character.
56 ///
57 /// # Panics
58 ///
59 /// Panics if `prev` is called when the internal cursor is positioned at
60 /// the beginning of the buffer.
61 pub fn prev(&mut self) -> char {
62 if let Some(c) = self.buf[..self.pos].chars().rev().next() {
63 self.pos -= c.len_utf8();
64 c
65 } else {
66 panic!("LexBuf::prev called on buffer at position 0")
67 }
68 }
69
70 /// Advances the internal cursor past the next character in the buffer if
71 /// the character is `ch`.
72 ///
73 /// Returns whether the cursor advanced.
74 pub fn consume(&mut self, ch: char) -> bool {
75 if self.peek() == Some(ch) {
76 self.next();
77 true
78 } else {
79 false
80 }
81 }
82
83 /// Advances the internal cursor past `s` if it exactly matches the next
84 /// characters in the buffer.
85 ///
86 /// Returns whether the cursor advanced.
87 pub fn consume_str(&mut self, s: &str) -> bool {
88 if self.buf[self.pos..].starts_with(s) {
89 self.pos += s.len();
90 true
91 } else {
92 false
93 }
94 }
95
96 /// Searches the buffer for `delim`, returning the string from the current
97 /// cursor position to the start of `delim`.
98 ///
99 /// Returns `None` if `delim` is not found in the buffer. The internal
100 /// cursor is advanced past the end of `delim`, or to the end of the buffer
101 /// if `delim` is not found.
102 pub fn take_to_delimiter(&mut self, delim: &str) -> Option<&'a str> {
103 if let Some(pos) = self.buf[self.pos..].find(delim) {
104 let s = &self.buf[self.pos..self.pos + pos];
105 self.pos += pos + delim.len();
106 Some(s)
107 } else {
108 self.pos = self.buf.len();
109 None
110 }
111 }
112
113 /// Searches the remaining buffer for the first character to fail to satisfy
114 /// `predicate`, returning the string from the current cursor position to
115 /// the failing character.
116 ///
117 /// Advances the cursor to the character that failed the predicate, or to
118 /// the end of the string if no character failed the predicate.
119 pub fn take_while<P>(&mut self, mut predicate: P) -> &'a str
120 where
121 P: FnMut(char) -> bool,
122 {
123 let pos = self.pos;
124 while let Some(ch) = self.peek() {
125 if predicate(ch) {
126 self.next();
127 } else {
128 break;
129 }
130 }
131 &self.buf[pos..self.pos]
132 }
133
134 /// Reports the current position of the cursor in the buffer.
135 pub fn pos(&self) -> usize {
136 self.pos
137 }
138
139 /// Returns the string that the lexical buffer wraps.
140 ///
141 /// Note that the entire string is returned, regardless of the position of
142 /// the buffer's internal cursor.
143 pub fn inner(&self) -> &'a str {
144 self.buf
145 }
146}
147
148impl<'a> Iterator for LexBuf<'a> {
149 type Item = char;
150
151 /// Returns the next character in the buffer, if any, advancing the internal
152 /// cursor past the character.
153 ///
154 /// It is safe to call `next` after it returns `None`.
155 fn next(&mut self) -> Option<char> {
156 let c = self.peek();
157 if let Some(c) = c {
158 self.pos += c.len_utf8();
159 }
160 c
161 }
162}