regex_syntax/error.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
use alloc::{
format,
string::{String, ToString},
vec,
vec::Vec,
};
use crate::{ast, hir};
/// This error type encompasses any error that can be returned by this crate.
///
/// This error type is marked as `non_exhaustive`. This means that adding a
/// new variant is not considered a breaking change.
#[non_exhaustive]
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Error {
/// An error that occurred while translating concrete syntax into abstract
/// syntax (AST).
Parse(ast::Error),
/// An error that occurred while translating abstract syntax into a high
/// level intermediate representation (HIR).
Translate(hir::Error),
}
impl From<ast::Error> for Error {
fn from(err: ast::Error) -> Error {
Error::Parse(err)
}
}
impl From<hir::Error> for Error {
fn from(err: hir::Error) -> Error {
Error::Translate(err)
}
}
#[cfg(feature = "std")]
impl std::error::Error for Error {}
impl core::fmt::Display for Error {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match *self {
Error::Parse(ref x) => x.fmt(f),
Error::Translate(ref x) => x.fmt(f),
}
}
}
/// A helper type for formatting nice error messages.
///
/// This type is responsible for reporting regex parse errors in a nice human
/// readable format. Most of its complexity is from interspersing notational
/// markers pointing out the position where an error occurred.
#[derive(Debug)]
pub struct Formatter<'e, E> {
/// The original regex pattern in which the error occurred.
pattern: &'e str,
/// The error kind. It must impl fmt::Display.
err: &'e E,
/// The primary span of the error.
span: &'e ast::Span,
/// An auxiliary and optional span, in case the error needs to point to
/// two locations (e.g., when reporting a duplicate capture group name).
aux_span: Option<&'e ast::Span>,
}
impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> {
fn from(err: &'e ast::Error) -> Self {
Formatter {
pattern: err.pattern(),
err: err.kind(),
span: err.span(),
aux_span: err.auxiliary_span(),
}
}
}
impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
fn from(err: &'e hir::Error) -> Self {
Formatter {
pattern: err.pattern(),
err: err.kind(),
span: err.span(),
aux_span: None,
}
}
}
impl<'e, E: core::fmt::Display> core::fmt::Display for Formatter<'e, E> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let spans = Spans::from_formatter(self);
if self.pattern.contains('\n') {
let divider = repeat_char('~', 79);
writeln!(f, "regex parse error:")?;
writeln!(f, "{}", divider)?;
let notated = spans.notate();
write!(f, "{}", notated)?;
writeln!(f, "{}", divider)?;
// If we have error spans that cover multiple lines, then we just
// note the line numbers.
if !spans.multi_line.is_empty() {
let mut notes = vec![];
for span in &spans.multi_line {
notes.push(format!(
"on line {} (column {}) through line {} (column {})",
span.start.line,
span.start.column,
span.end.line,
span.end.column - 1
));
}
writeln!(f, "{}", notes.join("\n"))?;
}
write!(f, "error: {}", self.err)?;
} else {
writeln!(f, "regex parse error:")?;
let notated = Spans::from_formatter(self).notate();
write!(f, "{}", notated)?;
write!(f, "error: {}", self.err)?;
}
Ok(())
}
}
/// This type represents an arbitrary number of error spans in a way that makes
/// it convenient to notate the regex pattern. ("Notate" means "point out
/// exactly where the error occurred in the regex pattern.")
///
/// Technically, we can only ever have two spans given our current error
/// structure. However, after toiling with a specific algorithm for handling
/// two spans, it became obvious that an algorithm to handle an arbitrary
/// number of spans was actually much simpler.
struct Spans<'p> {
/// The original regex pattern string.
pattern: &'p str,
/// The total width that should be used for line numbers. The width is
/// used for left padding the line numbers for alignment.
///
/// A value of `0` means line numbers should not be displayed. That is,
/// the pattern is itself only one line.
line_number_width: usize,
/// All error spans that occur on a single line. This sequence always has
/// length equivalent to the number of lines in `pattern`, where the index
/// of the sequence represents a line number, starting at `0`. The spans
/// in each line are sorted in ascending order.
by_line: Vec<Vec<ast::Span>>,
/// All error spans that occur over one or more lines. That is, the start
/// and end position of the span have different line numbers. The spans are
/// sorted in ascending order.
multi_line: Vec<ast::Span>,
}
impl<'p> Spans<'p> {
/// Build a sequence of spans from a formatter.
fn from_formatter<'e, E: core::fmt::Display>(
fmter: &'p Formatter<'e, E>,
) -> Spans<'p> {
let mut line_count = fmter.pattern.lines().count();
// If the pattern ends with a `\n` literal, then our line count is
// off by one, since a span can occur immediately after the last `\n`,
// which is consider to be an additional line.
if fmter.pattern.ends_with('\n') {
line_count += 1;
}
let line_number_width =
if line_count <= 1 { 0 } else { line_count.to_string().len() };
let mut spans = Spans {
pattern: &fmter.pattern,
line_number_width,
by_line: vec![vec![]; line_count],
multi_line: vec![],
};
spans.add(fmter.span.clone());
if let Some(span) = fmter.aux_span {
spans.add(span.clone());
}
spans
}
/// Add the given span to this sequence, putting it in the right place.
fn add(&mut self, span: ast::Span) {
// This is grossly inefficient since we sort after each add, but right
// now, we only ever add two spans at most.
if span.is_one_line() {
let i = span.start.line - 1; // because lines are 1-indexed
self.by_line[i].push(span);
self.by_line[i].sort();
} else {
self.multi_line.push(span);
self.multi_line.sort();
}
}
/// Notate the pattern string with carents (`^`) pointing at each span
/// location. This only applies to spans that occur within a single line.
fn notate(&self) -> String {
let mut notated = String::new();
for (i, line) in self.pattern.lines().enumerate() {
if self.line_number_width > 0 {
notated.push_str(&self.left_pad_line_number(i + 1));
notated.push_str(": ");
} else {
notated.push_str(" ");
}
notated.push_str(line);
notated.push('\n');
if let Some(notes) = self.notate_line(i) {
notated.push_str(¬es);
notated.push('\n');
}
}
notated
}
/// Return notes for the line indexed at `i` (zero-based). If there are no
/// spans for the given line, then `None` is returned. Otherwise, an
/// appropriately space padded string with correctly positioned `^` is
/// returned, accounting for line numbers.
fn notate_line(&self, i: usize) -> Option<String> {
let spans = &self.by_line[i];
if spans.is_empty() {
return None;
}
let mut notes = String::new();
for _ in 0..self.line_number_padding() {
notes.push(' ');
}
let mut pos = 0;
for span in spans {
for _ in pos..(span.start.column - 1) {
notes.push(' ');
pos += 1;
}
let note_len = span.end.column.saturating_sub(span.start.column);
for _ in 0..core::cmp::max(1, note_len) {
notes.push('^');
pos += 1;
}
}
Some(notes)
}
/// Left pad the given line number with spaces such that it is aligned with
/// other line numbers.
fn left_pad_line_number(&self, n: usize) -> String {
let n = n.to_string();
let pad = self.line_number_width.checked_sub(n.len()).unwrap();
let mut result = repeat_char(' ', pad);
result.push_str(&n);
result
}
/// Return the line number padding beginning at the start of each line of
/// the pattern.
///
/// If the pattern is only one line, then this returns a fixed padding
/// for visual indentation.
fn line_number_padding(&self) -> usize {
if self.line_number_width == 0 {
4
} else {
2 + self.line_number_width
}
}
}
fn repeat_char(c: char, count: usize) -> String {
core::iter::repeat(c).take(count).collect()
}
#[cfg(test)]
mod tests {
use alloc::string::ToString;
use crate::ast::parse::Parser;
fn assert_panic_message(pattern: &str, expected_msg: &str) {
let result = Parser::new().parse(pattern);
match result {
Ok(_) => {
panic!("regex should not have parsed");
}
Err(err) => {
assert_eq!(err.to_string(), expected_msg.trim());
}
}
}
// See: https://github.com/rust-lang/regex/issues/464
#[test]
fn regression_464() {
let err = Parser::new().parse("a{\n").unwrap_err();
// This test checks that the error formatter doesn't panic.
assert!(!err.to_string().is_empty());
}
// See: https://github.com/rust-lang/regex/issues/545
#[test]
fn repetition_quantifier_expects_a_valid_decimal() {
assert_panic_message(
r"\\u{[^}]*}",
r#"
regex parse error:
\\u{[^}]*}
^
error: repetition quantifier expects a valid decimal
"#,
);
}
}