time/format_description/parse/lexer.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
//! Lexer for parsing format descriptions.
use core::iter;
use super::{Location, Span};
/// A token emitted by the lexer. There is no semantic meaning at this stage.
pub(super) enum Token<'a> {
/// A literal string, formatted and parsed as-is.
Literal {
/// The string itself.
value: &'a [u8],
/// Where the string was in the format string.
span: Span,
},
/// An opening or closing bracket. May or may not be the start or end of a component.
Bracket {
/// Whether the bracket is opening or closing.
kind: BracketKind,
/// Where the bracket was in the format string.
location: Location,
},
/// One part of a component. This could be its name, a modifier, or whitespace.
ComponentPart {
/// Whether the part is whitespace or not.
kind: ComponentKind,
/// The part itself.
value: &'a [u8],
/// Where the part was in the format string.
span: Span,
},
}
/// What type of bracket is present.
pub(super) enum BracketKind {
/// An opening bracket: `[`
Opening,
/// A closing bracket: `]`
Closing,
}
/// Indicates whether the component is whitespace or not.
pub(super) enum ComponentKind {
#[allow(clippy::missing_docs_in_private_items)]
Whitespace,
#[allow(clippy::missing_docs_in_private_items)]
NotWhitespace,
}
/// Attach [`Location`] information to each byte in the iterator.
fn attach_location(iter: impl Iterator<Item = u8>) -> impl Iterator<Item = (u8, Location)> {
let mut line = 1;
let mut column = 1;
let mut byte_pos = 0;
iter.map(move |byte| {
let location = Location {
line,
column,
byte: byte_pos,
};
column += 1;
byte_pos += 1;
if byte == b'\n' {
line += 1;
column = 1;
}
(byte, location)
})
}
/// Parse the string into a series of [`Token`]s.
pub(super) fn lex(mut input: &[u8]) -> impl Iterator<Item = Token<'_>> {
let mut depth: u8 = 0;
let mut iter = attach_location(input.iter().copied()).peekable();
let mut second_bracket_location = None;
iter::from_fn(move || {
// There is a flag set to emit the second half of an escaped bracket pair.
if let Some(location) = second_bracket_location.take() {
return Some(Token::Bracket {
kind: BracketKind::Opening,
location,
});
}
Some(match iter.next()? {
(b'[', location) => {
if let Some((_, second_location)) = iter.next_if(|&(byte, _)| byte == b'[') {
// escaped bracket
second_bracket_location = Some(second_location);
input = &input[2..];
} else {
// opening bracket
depth += 1;
input = &input[1..];
}
Token::Bracket {
kind: BracketKind::Opening,
location,
}
}
// closing bracket
(b']', location) if depth > 0 => {
depth -= 1;
input = &input[1..];
Token::Bracket {
kind: BracketKind::Closing,
location,
}
}
// literal
(_, start_location) if depth == 0 => {
let mut bytes = 1;
let mut end_location = start_location;
while let Some((_, location)) = iter.next_if(|&(byte, _)| byte != b'[') {
end_location = location;
bytes += 1;
}
let value = &input[..bytes];
input = &input[bytes..];
Token::Literal {
value,
span: Span::start_end(start_location, end_location),
}
}
// component part
(byte, start_location) => {
let mut bytes = 1;
let mut end_location = start_location;
let is_whitespace = byte.is_ascii_whitespace();
while let Some((_, location)) = iter.next_if(|&(byte, _)| {
byte != b'[' && byte != b']' && is_whitespace == byte.is_ascii_whitespace()
}) {
end_location = location;
bytes += 1;
}
let value = &input[..bytes];
input = &input[bytes..];
Token::ComponentPart {
kind: if is_whitespace {
ComponentKind::Whitespace
} else {
ComponentKind::NotWhitespace
},
value,
span: Span::start_end(start_location, end_location),
}
}
})
})
}