1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
// Copyright Materialize, Inc. and contributors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License in the LICENSE file at the
// root of this repository, or online at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Lexing utilities.

/// A cursor over a string with a variety of lexing convenience methods.
#[derive(Debug)]
pub struct LexBuf<'a> {
    buf: &'a str,
    pos: usize,
}

impl<'a> LexBuf<'a> {
    /// Creates a new lexical buffer.
    ///
    /// The internal cursor is initialized to point at the start of `buf`.
    pub fn new(buf: &'a str) -> LexBuf<'a> {
        LexBuf { buf, pos: 0 }
    }

    /// Returns the next character in the buffer, if any, without advancing the
    /// internal cursor.
    pub fn peek(&self) -> Option<char> {
        self.buf[self.pos..].chars().next()
    }

    /// Returns a slice containing the next `n` characters in the buffer.
    ///
    /// Returns `None` if there are not at least `n` characters remaining.
    /// Advances the internal cursor `n` characters, or to the end of the buffer
    /// if there are less than `n` characters remaining.
    pub fn next_n(&mut self, n: usize) -> Option<&'a str> {
        let start = self.pos;
        for _ in 0..n {
            self.next()?;
        }
        Some(&self.buf[start..self.pos])
    }

    /// Returns the previous character in the buffer, positioning the internal
    /// cursor to before the character.
    ///
    /// The next call to `LexBuf::next` will return the same character.
    ///
    /// # Panics
    ///
    /// Panics if `prev` is called when the internal cursor is positioned at
    /// the beginning of the buffer.
    pub fn prev(&mut self) -> char {
        if let Some(c) = self.buf[..self.pos].chars().rev().next() {
            self.pos -= c.len_utf8();
            c
        } else {
            panic!("LexBuf::prev called on buffer at position 0")
        }
    }

    /// Advances the internal cursor past the next character in the buffer if
    /// the character is `ch`.
    ///
    /// Returns whether the cursor was advanced.
    pub fn consume(&mut self, ch: char) -> bool {
        if self.peek() == Some(ch) {
            self.next();
            true
        } else {
            false
        }
    }

    /// Searches the buffer for `delim`, returning the string from the current
    /// cursor position to the start of `delim`.
    ///
    /// Returns `None` if `delim` is not found in the buffer. The internal
    /// cursor is advanced past the end of `delim`, or to the end of the buffer
    /// if `delim` is not found.
    pub fn take_to_delimiter(&mut self, delim: &str) -> Option<&'a str> {
        if let Some(pos) = self.buf[self.pos..].find(delim) {
            let s = &self.buf[self.pos..self.pos + pos];
            self.pos += pos + delim.len();
            Some(s)
        } else {
            self.pos = self.buf.len();
            None
        }
    }

    /// Searches the remaining buffer for the first character to fail to satisfy
    /// `predicate`, returning the string from the current cursor position to
    /// the failing character.
    ///
    /// Advances the cursor to the character that failed the predicate, or to
    /// the end of the string if no character failed the predicate.
    pub fn take_while<P>(&mut self, mut predicate: P) -> &'a str
    where
        P: FnMut(char) -> bool,
    {
        let pos = self.pos;
        while let Some(ch) = self.peek() {
            if predicate(ch) {
                self.next();
            } else {
                break;
            }
        }
        &self.buf[pos..self.pos]
    }

    /// Reports the current position of the cursor in the buffer.
    pub fn pos(&self) -> usize {
        self.pos
    }

    /// Returns the string that the lexical buffer wraps.
    ///
    /// Note that the entire string is returned, regardless of the position of
    /// the buffer's internal cursor.
    pub fn inner(&self) -> &'a str {
        &self.buf
    }
}

impl<'a> Iterator for LexBuf<'a> {
    type Item = char;

    /// Returns the next character in the buffer, if any, advancing the internal
    /// cursor past the character.
    ///
    /// It is safe to call `next` after it returns `None`.
    fn next(&mut self) -> Option<char> {
        let c = self.peek();
        if let Some(c) = c {
            self.pos += c.len_utf8();
        }
        c
    }
}