askama_parser/
lib.rs

1#![deny(unreachable_pub)]
2#![deny(elided_lifetimes_in_paths)]
3
4use std::borrow::Cow;
5use std::cell::Cell;
6use std::{fmt, str};
7
8use nom::branch::alt;
9use nom::bytes::complete::{escaped, is_not, tag, take_till};
10use nom::character::complete::{anychar, char, one_of, satisfy};
11use nom::combinator::{cut, eof, map, opt, recognize};
12use nom::error::{Error, ErrorKind, FromExternalError};
13use nom::multi::{many0_count, many1};
14use nom::sequence::{delimited, pair, preceded, terminated, tuple};
15use nom::{error_position, AsChar, InputTakeAtPosition};
16
17pub mod expr;
18pub use expr::Expr;
19pub mod node;
20pub use node::Node;
21#[cfg(test)]
22mod tests;
23
24mod _parsed {
25    use std::cmp::PartialEq;
26    use std::{fmt, mem};
27
28    use super::node::Node;
29    use super::{Ast, ParseError, Syntax};
30
31    #[derive(Default)]
32    pub struct Parsed {
33        // `source` must outlive `ast`, so `ast` must be declared before `source`
34        ast: Ast<'static>,
35        #[allow(dead_code)]
36        source: String,
37    }
38
39    impl Parsed {
40        pub fn new(source: String, syntax: &Syntax<'_>) -> Result<Self, ParseError> {
41            // Self-referential borrowing: `self` will keep the source alive as `String`,
42            // internally we will transmute it to `&'static str` to satisfy the compiler.
43            // However, we only expose the nodes with a lifetime limited to `self`.
44            let src = unsafe { mem::transmute::<&str, &'static str>(source.as_str()) };
45            let ast = Ast::from_str(src, syntax)?;
46            Ok(Self { ast, source })
47        }
48
49        // The return value's lifetime must be limited to `self` to uphold the unsafe invariant.
50        pub fn nodes(&self) -> &[Node<'_>] {
51            &self.ast.nodes
52        }
53    }
54
55    impl fmt::Debug for Parsed {
56        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
57            f.debug_struct("Parsed")
58                .field("nodes", &self.ast.nodes)
59                .finish_non_exhaustive()
60        }
61    }
62
63    impl PartialEq for Parsed {
64        fn eq(&self, other: &Self) -> bool {
65            self.ast.nodes == other.ast.nodes
66        }
67    }
68}
69
70pub use _parsed::Parsed;
71
72#[derive(Debug, Default)]
73pub struct Ast<'a> {
74    nodes: Vec<Node<'a>>,
75}
76
77impl<'a> Ast<'a> {
78    pub fn from_str(src: &'a str, syntax: &Syntax<'_>) -> Result<Self, ParseError> {
79        let parse = |i: &'a str| Node::many(i, &State::new(syntax));
80        let (input, message) = match terminated(parse, cut(eof))(src) {
81            Ok(("", nodes)) => return Ok(Self { nodes }),
82            Ok(_) => unreachable!("eof() is not eof?"),
83            Err(
84                nom::Err::Error(ErrorContext { input, message, .. })
85                | nom::Err::Failure(ErrorContext { input, message, .. }),
86            ) => (input, message),
87            Err(nom::Err::Incomplete(_)) => return Err(ParseError("parsing incomplete".into())),
88        };
89
90        let offset = src.len() - input.len();
91        let (source_before, source_after) = src.split_at(offset);
92
93        let source_after = match source_after.char_indices().enumerate().take(41).last() {
94            Some((40, (i, _))) => format!("{:?}...", &source_after[..i]),
95            _ => format!("{source_after:?}"),
96        };
97
98        let (row, last_line) = source_before.lines().enumerate().last().unwrap_or_default();
99        let column = last_line.chars().count();
100
101        let msg = format!(
102            "{}problems parsing template source at row {}, column {} near:\n{}",
103            if let Some(message) = message {
104                format!("{message}\n")
105            } else {
106                String::new()
107            },
108            row + 1,
109            column,
110            source_after,
111        );
112
113        Err(ParseError(msg))
114    }
115
116    pub fn nodes(&self) -> &[Node<'a>] {
117        &self.nodes
118    }
119}
120
121#[derive(Debug, Clone, PartialEq, Eq)]
122pub struct ParseError(String);
123
124impl std::error::Error for ParseError {}
125
126impl fmt::Display for ParseError {
127    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
128        self.0.fmt(f)
129    }
130}
131
132pub(crate) type ParseResult<'a, T = &'a str> = Result<(&'a str, T), nom::Err<ErrorContext<'a>>>;
133
134/// This type is used to handle `nom` errors and in particular to add custom error messages.
135/// It used to generate `ParserError`.
136///
137/// It cannot be used to replace `ParseError` because it expects a generic, which would make
138/// `askama`'s users experience less good (since this generic is only needed for `nom`).
139#[derive(Debug)]
140pub(crate) struct ErrorContext<'a> {
141    pub(crate) input: &'a str,
142    pub(crate) message: Option<Cow<'static, str>>,
143}
144
145impl<'a> nom::error::ParseError<&'a str> for ErrorContext<'a> {
146    fn from_error_kind(input: &'a str, _code: ErrorKind) -> Self {
147        Self {
148            input,
149            message: None,
150        }
151    }
152
153    fn append(_: &'a str, _: ErrorKind, other: Self) -> Self {
154        other
155    }
156}
157
158impl<'a, E: std::fmt::Display> FromExternalError<&'a str, E> for ErrorContext<'a> {
159    fn from_external_error(input: &'a str, _kind: ErrorKind, e: E) -> Self {
160        Self {
161            input,
162            message: Some(Cow::Owned(e.to_string())),
163        }
164    }
165}
166
167impl<'a> ErrorContext<'a> {
168    pub(crate) fn from_err(error: nom::Err<Error<&'a str>>) -> nom::Err<Self> {
169        match error {
170            nom::Err::Incomplete(i) => nom::Err::Incomplete(i),
171            nom::Err::Failure(Error { input, .. }) => nom::Err::Failure(Self {
172                input,
173                message: None,
174            }),
175            nom::Err::Error(Error { input, .. }) => nom::Err::Error(Self {
176                input,
177                message: None,
178            }),
179        }
180    }
181}
182
183fn is_ws(c: char) -> bool {
184    matches!(c, ' ' | '\t' | '\r' | '\n')
185}
186
187fn not_ws(c: char) -> bool {
188    !is_ws(c)
189}
190
191fn ws<'a, O>(
192    inner: impl FnMut(&'a str) -> ParseResult<'a, O>,
193) -> impl FnMut(&'a str) -> ParseResult<'a, O> {
194    delimited(take_till(not_ws), inner, take_till(not_ws))
195}
196
197/// Skips input until `end` was found, but does not consume it.
198/// Returns tuple that would be returned when parsing `end`.
199fn skip_till<'a, O>(
200    end: impl FnMut(&'a str) -> ParseResult<'a, O>,
201) -> impl FnMut(&'a str) -> ParseResult<'a, (&'a str, O)> {
202    enum Next<O> {
203        IsEnd(O),
204        NotEnd(char),
205    }
206    let mut next = alt((map(end, Next::IsEnd), map(anychar, Next::NotEnd)));
207    move |start: &'a str| {
208        let mut i = start;
209        loop {
210            let (j, is_end) = next(i)?;
211            match is_end {
212                Next::IsEnd(lookahead) => return Ok((i, (j, lookahead))),
213                Next::NotEnd(_) => i = j,
214            }
215        }
216    }
217}
218
219fn keyword<'a>(k: &'a str) -> impl FnMut(&'a str) -> ParseResult<'_> {
220    move |i: &'a str| -> ParseResult<'a> {
221        let (j, v) = identifier(i)?;
222        if k == v {
223            Ok((j, v))
224        } else {
225            Err(nom::Err::Error(error_position!(i, ErrorKind::Tag)))
226        }
227    }
228}
229
230fn identifier(input: &str) -> ParseResult<'_> {
231    fn start(s: &str) -> ParseResult<'_> {
232        s.split_at_position1_complete(
233            |c| !(c.is_alpha() || c == '_' || c >= '\u{0080}'),
234            nom::error::ErrorKind::Alpha,
235        )
236    }
237
238    fn tail(s: &str) -> ParseResult<'_> {
239        s.split_at_position1_complete(
240            |c| !(c.is_alphanum() || c == '_' || c >= '\u{0080}'),
241            nom::error::ErrorKind::Alpha,
242        )
243    }
244
245    recognize(pair(start, opt(tail)))(input)
246}
247
248fn bool_lit(i: &str) -> ParseResult<'_> {
249    alt((keyword("false"), keyword("true")))(i)
250}
251
252fn num_lit(i: &str) -> ParseResult<'_> {
253    let integer_suffix = |i| {
254        alt((
255            tag("i8"),
256            tag("i16"),
257            tag("i32"),
258            tag("i64"),
259            tag("i128"),
260            tag("isize"),
261            tag("u8"),
262            tag("u16"),
263            tag("u32"),
264            tag("u64"),
265            tag("u128"),
266            tag("usize"),
267        ))(i)
268    };
269    let float_suffix = |i| alt((tag("f32"), tag("f64")))(i);
270
271    recognize(tuple((
272        opt(char('-')),
273        alt((
274            recognize(tuple((
275                char('0'),
276                alt((
277                    recognize(tuple((char('b'), separated_digits(2, false)))),
278                    recognize(tuple((char('o'), separated_digits(8, false)))),
279                    recognize(tuple((char('x'), separated_digits(16, false)))),
280                )),
281                opt(integer_suffix),
282            ))),
283            recognize(tuple((
284                separated_digits(10, true),
285                opt(alt((
286                    integer_suffix,
287                    float_suffix,
288                    recognize(tuple((
289                        opt(tuple((char('.'), separated_digits(10, true)))),
290                        one_of("eE"),
291                        opt(one_of("+-")),
292                        separated_digits(10, false),
293                        opt(float_suffix),
294                    ))),
295                    recognize(tuple((
296                        char('.'),
297                        separated_digits(10, true),
298                        opt(float_suffix),
299                    ))),
300                ))),
301            ))),
302        )),
303    )))(i)
304}
305
306/// Underscore separated digits of the given base, unless `start` is true this may start
307/// with an underscore.
308fn separated_digits(radix: u32, start: bool) -> impl Fn(&str) -> ParseResult<'_> {
309    move |i| {
310        recognize(tuple((
311            |i| match start {
312                true => Ok((i, 0)),
313                false => many0_count(char('_'))(i),
314            },
315            satisfy(|ch| ch.is_digit(radix)),
316            many0_count(satisfy(|ch| ch == '_' || ch.is_digit(radix))),
317        )))(i)
318    }
319}
320
321fn str_lit(i: &str) -> ParseResult<'_> {
322    let (i, s) = delimited(
323        char('"'),
324        opt(escaped(is_not("\\\""), '\\', anychar)),
325        char('"'),
326    )(i)?;
327    Ok((i, s.unwrap_or_default()))
328}
329
330fn char_lit(i: &str) -> ParseResult<'_> {
331    let (i, s) = delimited(
332        char('\''),
333        opt(escaped(is_not("\\\'"), '\\', anychar)),
334        char('\''),
335    )(i)?;
336    Ok((i, s.unwrap_or_default()))
337}
338
339enum PathOrIdentifier<'a> {
340    Path(Vec<&'a str>),
341    Identifier(&'a str),
342}
343
344fn path_or_identifier(i: &str) -> ParseResult<'_, PathOrIdentifier<'_>> {
345    let root = ws(opt(tag("::")));
346    let tail = opt(many1(preceded(ws(tag("::")), identifier)));
347
348    let (i, (root, start, rest)) = tuple((root, identifier, tail))(i)?;
349    let rest = rest.as_deref().unwrap_or_default();
350
351    // The returned identifier can be assumed to be path if:
352    // - it is an absolute path (starts with `::`), or
353    // - it has multiple components (at least one `::`), or
354    // - the first letter is uppercase
355    match (root, start, rest) {
356        (Some(_), start, tail) => {
357            let mut path = Vec::with_capacity(2 + tail.len());
358            path.push("");
359            path.push(start);
360            path.extend(rest);
361            Ok((i, PathOrIdentifier::Path(path)))
362        }
363        (None, name, []) if name.chars().next().map_or(true, |c| c.is_lowercase()) => {
364            Ok((i, PathOrIdentifier::Identifier(name)))
365        }
366        (None, start, tail) => {
367            let mut path = Vec::with_capacity(1 + tail.len());
368            path.push(start);
369            path.extend(rest);
370            Ok((i, PathOrIdentifier::Path(path)))
371        }
372    }
373}
374
375struct State<'a> {
376    syntax: &'a Syntax<'a>,
377    loop_depth: Cell<usize>,
378    level: Cell<Level>,
379}
380
381impl<'a> State<'a> {
382    fn new(syntax: &'a Syntax<'a>) -> State<'a> {
383        State {
384            syntax,
385            loop_depth: Cell::new(0),
386            level: Cell::new(Level::default()),
387        }
388    }
389
390    fn nest<'b>(&self, i: &'b str) -> ParseResult<'b, ()> {
391        let (_, level) = self.level.get().nest(i)?;
392        self.level.set(level);
393        Ok((i, ()))
394    }
395
396    fn leave(&self) {
397        self.level.set(self.level.get().leave());
398    }
399
400    fn tag_block_start<'i>(&self, i: &'i str) -> ParseResult<'i> {
401        tag(self.syntax.block_start)(i)
402    }
403
404    fn tag_block_end<'i>(&self, i: &'i str) -> ParseResult<'i> {
405        tag(self.syntax.block_end)(i)
406    }
407
408    fn tag_comment_start<'i>(&self, i: &'i str) -> ParseResult<'i> {
409        tag(self.syntax.comment_start)(i)
410    }
411
412    fn tag_comment_end<'i>(&self, i: &'i str) -> ParseResult<'i> {
413        tag(self.syntax.comment_end)(i)
414    }
415
416    fn tag_expr_start<'i>(&self, i: &'i str) -> ParseResult<'i> {
417        tag(self.syntax.expr_start)(i)
418    }
419
420    fn tag_expr_end<'i>(&self, i: &'i str) -> ParseResult<'i> {
421        tag(self.syntax.expr_end)(i)
422    }
423
424    fn enter_loop(&self) {
425        self.loop_depth.set(self.loop_depth.get() + 1);
426    }
427
428    fn leave_loop(&self) {
429        self.loop_depth.set(self.loop_depth.get() - 1);
430    }
431
432    fn is_in_loop(&self) -> bool {
433        self.loop_depth.get() > 0
434    }
435}
436
437#[derive(Debug)]
438pub struct Syntax<'a> {
439    pub block_start: &'a str,
440    pub block_end: &'a str,
441    pub expr_start: &'a str,
442    pub expr_end: &'a str,
443    pub comment_start: &'a str,
444    pub comment_end: &'a str,
445}
446
447impl Default for Syntax<'static> {
448    fn default() -> Self {
449        Self {
450            block_start: "{%",
451            block_end: "%}",
452            expr_start: "{{",
453            expr_end: "}}",
454            comment_start: "{#",
455            comment_end: "#}",
456        }
457    }
458}
459
460#[derive(Clone, Copy, Default)]
461pub(crate) struct Level(u8);
462
463impl Level {
464    fn nest(self, i: &str) -> ParseResult<'_, Level> {
465        if self.0 >= Self::MAX_DEPTH {
466            return Err(ErrorContext::from_err(nom::Err::Failure(error_position!(
467                i,
468                ErrorKind::TooLarge
469            ))));
470        }
471
472        Ok((i, Level(self.0 + 1)))
473    }
474
475    fn leave(&self) -> Self {
476        Level(self.0 - 1)
477    }
478
479    const MAX_DEPTH: u8 = 128;
480}