pest_meta/optimizer/
mod.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018 DragoÈ™ Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10//! Different optimizations for pest's ASTs.
11
12use crate::ast::*;
13use std::collections::HashMap;
14
15#[cfg(test)]
16macro_rules! box_tree {
17    ( $node:ident( $(                      $child:ident( $($args:tt)* )     ),+ ) ) => (
18      $node      ( $( Box::new( box_tree!( $child      ( $($args   )* ) ) ) ),+ )
19    );
20    ($expr:expr) => ($expr);
21}
22
23mod concatenator;
24mod factorizer;
25mod lister;
26mod restorer;
27mod rotater;
28mod skipper;
29mod unroller;
30
31/// Takes pest's ASTs and optimizes them
32pub fn optimize(rules: Vec<Rule>) -> Vec<OptimizedRule> {
33    let map = to_hash_map(&rules);
34    let optimized: Vec<OptimizedRule> = rules
35        .into_iter()
36        .map(rotater::rotate)
37        .map(|rule| skipper::skip(rule, &map))
38        .map(unroller::unroll)
39        .map(concatenator::concatenate)
40        .map(factorizer::factor)
41        .map(lister::list)
42        .map(rule_to_optimized_rule)
43        .collect();
44
45    let optimized_map = to_optimized_hash_map(&optimized);
46    optimized
47        .into_iter()
48        .map(|rule| restorer::restore_on_err(rule, &optimized_map))
49        .collect()
50}
51
52fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule {
53    fn to_optimized(expr: Expr) -> OptimizedExpr {
54        match expr {
55            Expr::Str(string) => OptimizedExpr::Str(string),
56            Expr::Insens(string) => OptimizedExpr::Insens(string),
57            Expr::Range(start, end) => OptimizedExpr::Range(start, end),
58            Expr::Ident(ident) => OptimizedExpr::Ident(ident),
59            Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end),
60            Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))),
61            Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))),
62            Expr::Seq(lhs, rhs) => {
63                OptimizedExpr::Seq(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
64            }
65            Expr::Choice(lhs, rhs) => {
66                OptimizedExpr::Choice(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
67            }
68            Expr::Opt(expr) => OptimizedExpr::Opt(Box::new(to_optimized(*expr))),
69            Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))),
70            Expr::Skip(strings) => OptimizedExpr::Skip(strings),
71            Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))),
72            #[cfg(feature = "grammar-extras")]
73            Expr::NodeTag(expr, tag) => OptimizedExpr::NodeTag(Box::new(to_optimized(*expr)), tag),
74            #[cfg(feature = "grammar-extras")]
75            Expr::RepOnce(expr) => OptimizedExpr::RepOnce(Box::new(to_optimized(*expr))),
76            #[cfg(not(feature = "grammar-extras"))]
77            Expr::RepOnce(_) => unreachable!("No valid transformation to OptimizedRule"),
78            Expr::RepExact(..) | Expr::RepMin(..) | Expr::RepMax(..) | Expr::RepMinMax(..) => {
79                unreachable!("No valid transformation to OptimizedRule")
80            }
81        }
82    }
83
84    OptimizedRule {
85        name: rule.name,
86        ty: rule.ty,
87        expr: to_optimized(rule.expr),
88    }
89}
90
91macro_rules! to_hash_map {
92    ($func_name:ident, $rule:ty, $expr:ty) => {
93        fn $func_name(rules: &[$rule]) -> HashMap<String, $expr> {
94            rules
95                .iter()
96                .map(|r| (r.name.clone(), r.expr.clone()))
97                .collect()
98        }
99    };
100}
101to_hash_map!(to_hash_map, Rule, Expr);
102to_hash_map!(to_optimized_hash_map, OptimizedRule, OptimizedExpr);
103
104/// The optimized version of the pest AST's `Rule`.
105#[derive(Clone, Debug, Eq, PartialEq)]
106pub struct OptimizedRule {
107    /// The name of the rule.
108    pub name: String,
109    /// The type of the rule.
110    pub ty: RuleType,
111    /// The optimized expression of the rule.
112    pub expr: OptimizedExpr,
113}
114
115/// The optimized version of the pest AST's `Expr`.
116///
117/// # Warning: Semantic Versioning
118/// There may be non-breaking changes to the meta-grammar
119/// between minor versions. Those non-breaking changes, however,
120/// may translate into semver-breaking changes due to the additional variants
121/// propaged from the `Rule` enum. This is a known issue and will be fixed in the
122/// future (e.g. by increasing MSRV and non_exhaustive annotations).
123#[derive(Clone, Debug, Eq, PartialEq)]
124pub enum OptimizedExpr {
125    /// Matches an exact string, e.g. `"a"`
126    Str(String),
127    /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"`
128    Insens(String),
129    /// Matches one character in the range, e.g. `'a'..'z'`
130    Range(String, String),
131    /// Matches the rule with the given name, e.g. `a`
132    Ident(String),
133    /// Matches a custom part of the stack, e.g. `PEEK[..]`
134    PeekSlice(i32, Option<i32>),
135    /// Positive lookahead; matches expression without making progress, e.g. `&e`
136    PosPred(Box<OptimizedExpr>),
137    /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e`
138    NegPred(Box<OptimizedExpr>),
139    /// Matches a sequence of two expressions, e.g. `e1 ~ e2`
140    Seq(Box<OptimizedExpr>, Box<OptimizedExpr>),
141    /// Matches either of two expressions, e.g. `e1 | e2`
142    Choice(Box<OptimizedExpr>, Box<OptimizedExpr>),
143    /// Optionally matches an expression, e.g. `e?`
144    Opt(Box<OptimizedExpr>),
145    /// Matches an expression zero or more times, e.g. `e*`
146    Rep(Box<OptimizedExpr>),
147    /// Matches an expression one or more times, e.g. `e+`
148    #[cfg(feature = "grammar-extras")]
149    RepOnce(Box<OptimizedExpr>),
150    /// Continues to match expressions until one of the strings in the `Vec` is found
151    Skip(Vec<String>),
152    /// Matches an expression and pushes it to the stack, e.g. `push(e)`
153    Push(Box<OptimizedExpr>),
154    /// Matches an expression and assigns a label to it, e.g. #label = exp
155    #[cfg(feature = "grammar-extras")]
156    NodeTag(Box<OptimizedExpr>, String),
157    /// Restores an expression's checkpoint
158    RestoreOnErr(Box<OptimizedExpr>),
159}
160
161impl OptimizedExpr {
162    /// Returns a top-down iterator over the `OptimizedExpr`.
163    pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator {
164        OptimizedExprTopDownIterator::new(self)
165    }
166
167    /// Applies `f` to the `OptimizedExpr` top-down.
168    pub fn map_top_down<F>(self, mut f: F) -> OptimizedExpr
169    where
170        F: FnMut(OptimizedExpr) -> OptimizedExpr,
171    {
172        fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
173        where
174            F: FnMut(OptimizedExpr) -> OptimizedExpr,
175        {
176            let expr = f(expr);
177
178            match expr {
179                OptimizedExpr::PosPred(expr) => {
180                    let mapped = Box::new(map_internal(*expr, f));
181                    OptimizedExpr::PosPred(mapped)
182                }
183                OptimizedExpr::NegPred(expr) => {
184                    let mapped = Box::new(map_internal(*expr, f));
185                    OptimizedExpr::NegPred(mapped)
186                }
187                OptimizedExpr::Seq(lhs, rhs) => {
188                    let mapped_lhs = Box::new(map_internal(*lhs, f));
189                    let mapped_rhs = Box::new(map_internal(*rhs, f));
190                    OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
191                }
192                OptimizedExpr::Choice(lhs, rhs) => {
193                    let mapped_lhs = Box::new(map_internal(*lhs, f));
194                    let mapped_rhs = Box::new(map_internal(*rhs, f));
195                    OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
196                }
197                OptimizedExpr::Rep(expr) => {
198                    let mapped = Box::new(map_internal(*expr, f));
199                    OptimizedExpr::Rep(mapped)
200                }
201                OptimizedExpr::Opt(expr) => {
202                    let mapped = Box::new(map_internal(*expr, f));
203                    OptimizedExpr::Opt(mapped)
204                }
205                OptimizedExpr::Push(expr) => {
206                    let mapped = Box::new(map_internal(*expr, f));
207                    OptimizedExpr::Push(mapped)
208                }
209                expr => expr,
210            }
211        }
212
213        map_internal(self, &mut f)
214    }
215
216    /// Applies `f` to the `OptimizedExpr` bottom-up.
217    pub fn map_bottom_up<F>(self, mut f: F) -> OptimizedExpr
218    where
219        F: FnMut(OptimizedExpr) -> OptimizedExpr,
220    {
221        fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
222        where
223            F: FnMut(OptimizedExpr) -> OptimizedExpr,
224        {
225            let mapped = match expr {
226                OptimizedExpr::PosPred(expr) => {
227                    let mapped = Box::new(map_internal(*expr, f));
228                    OptimizedExpr::PosPred(mapped)
229                }
230                OptimizedExpr::NegPred(expr) => {
231                    let mapped = Box::new(map_internal(*expr, f));
232                    OptimizedExpr::NegPred(mapped)
233                }
234                OptimizedExpr::Seq(lhs, rhs) => {
235                    let mapped_lhs = Box::new(map_internal(*lhs, f));
236                    let mapped_rhs = Box::new(map_internal(*rhs, f));
237                    OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
238                }
239                OptimizedExpr::Choice(lhs, rhs) => {
240                    let mapped_lhs = Box::new(map_internal(*lhs, f));
241                    let mapped_rhs = Box::new(map_internal(*rhs, f));
242                    OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
243                }
244                OptimizedExpr::Rep(expr) => {
245                    let mapped = Box::new(map_internal(*expr, f));
246                    OptimizedExpr::Rep(mapped)
247                }
248                OptimizedExpr::Opt(expr) => {
249                    let mapped = Box::new(map_internal(*expr, f));
250                    OptimizedExpr::Opt(mapped)
251                }
252                OptimizedExpr::Push(expr) => {
253                    let mapped = Box::new(map_internal(*expr, f));
254                    OptimizedExpr::Push(mapped)
255                }
256                expr => expr,
257            };
258
259            f(mapped)
260        }
261
262        map_internal(self, &mut f)
263    }
264}
265
266impl core::fmt::Display for OptimizedExpr {
267    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
268        match self {
269            OptimizedExpr::Str(s) => write!(f, "{:?}", s),
270            OptimizedExpr::Insens(s) => write!(f, "^{:?}", s),
271            OptimizedExpr::Range(start, end) => {
272                let start = start.chars().next().expect("Empty range start.");
273                let end = end.chars().next().expect("Empty range end.");
274                write!(f, "({:?}..{:?})", start, end)
275            }
276            OptimizedExpr::Ident(id) => write!(f, "{}", id),
277            OptimizedExpr::PeekSlice(start, end) => match end {
278                Some(end) => write!(f, "PEEK[{}..{}]", start, end),
279                None => write!(f, "PEEK[{}..]", start),
280            },
281            OptimizedExpr::PosPred(expr) => write!(f, "&{}", expr.as_ref()),
282            OptimizedExpr::NegPred(expr) => write!(f, "!{}", expr.as_ref()),
283            OptimizedExpr::Seq(lhs, rhs) => {
284                let mut nodes = Vec::new();
285                nodes.push(lhs);
286                let mut current = rhs;
287                while let OptimizedExpr::Seq(lhs, rhs) = current.as_ref() {
288                    nodes.push(lhs);
289                    current = rhs;
290                }
291                nodes.push(current);
292                let sequence = nodes
293                    .iter()
294                    .map(|node| format!("{}", node))
295                    .collect::<Vec<_>>()
296                    .join(" ~ ");
297                write!(f, "({})", sequence)
298            }
299            OptimizedExpr::Choice(lhs, rhs) => {
300                let mut nodes = Vec::new();
301                nodes.push(lhs);
302                let mut current = rhs;
303                while let OptimizedExpr::Choice(lhs, rhs) = current.as_ref() {
304                    nodes.push(lhs);
305                    current = rhs;
306                }
307                nodes.push(current);
308                let sequence = nodes
309                    .iter()
310                    .map(|node| format!("{}", node))
311                    .collect::<Vec<_>>()
312                    .join(" | ");
313                write!(f, "({})", sequence)
314            }
315            OptimizedExpr::Opt(expr) => write!(f, "{}?", expr),
316            OptimizedExpr::Rep(expr) => write!(f, "{}*", expr),
317            #[cfg(feature = "grammar-extras")]
318            OptimizedExpr::RepOnce(expr) => write!(f, "{}+", expr),
319            OptimizedExpr::Skip(strings) => {
320                let strings = strings
321                    .iter()
322                    .map(|s| format!("{:?}", s))
323                    .collect::<Vec<_>>()
324                    .join(" | ");
325                write!(f, "(!({}) ~ ANY)*", strings)
326            }
327            OptimizedExpr::Push(expr) => write!(f, "PUSH({})", expr),
328            #[cfg(feature = "grammar-extras")]
329            OptimizedExpr::NodeTag(expr, tag) => {
330                write!(f, "(#{} = {})", tag, expr)
331            }
332            OptimizedExpr::RestoreOnErr(expr) => core::fmt::Display::fmt(expr.as_ref(), f),
333        }
334    }
335}
336
337/// A top-down iterator over an `OptimizedExpr`.
338pub struct OptimizedExprTopDownIterator {
339    current: Option<OptimizedExpr>,
340    next: Option<OptimizedExpr>,
341    right_branches: Vec<OptimizedExpr>,
342}
343
344impl OptimizedExprTopDownIterator {
345    /// Creates a new top down iterator from an `OptimizedExpr`.
346    pub fn new(expr: &OptimizedExpr) -> Self {
347        let mut iter = OptimizedExprTopDownIterator {
348            current: None,
349            next: None,
350            right_branches: vec![],
351        };
352        iter.iterate_expr(expr.clone());
353        iter
354    }
355
356    fn iterate_expr(&mut self, expr: OptimizedExpr) {
357        self.current = Some(expr.clone());
358        match expr {
359            OptimizedExpr::Seq(lhs, rhs) => {
360                self.right_branches.push(*rhs);
361                self.next = Some(*lhs);
362            }
363            OptimizedExpr::Choice(lhs, rhs) => {
364                self.right_branches.push(*rhs);
365                self.next = Some(*lhs);
366            }
367            OptimizedExpr::PosPred(expr)
368            | OptimizedExpr::NegPred(expr)
369            | OptimizedExpr::Rep(expr)
370            | OptimizedExpr::Opt(expr)
371            | OptimizedExpr::Push(expr) => {
372                self.next = Some(*expr);
373            }
374            _ => {
375                self.next = None;
376            }
377        }
378    }
379}
380
381impl Iterator for OptimizedExprTopDownIterator {
382    type Item = OptimizedExpr;
383
384    fn next(&mut self) -> Option<Self::Item> {
385        let result = self.current.take();
386
387        if let Some(expr) = self.next.take() {
388            self.iterate_expr(expr);
389        } else if let Some(expr) = self.right_branches.pop() {
390            self.iterate_expr(expr);
391        }
392
393        result
394    }
395}
396
397#[cfg(test)]
398mod tests {
399    use super::*;
400
401    #[test]
402    fn rotate() {
403        let rules = {
404            use crate::ast::Expr::*;
405            vec![Rule {
406                name: "rule".to_owned(),
407                ty: RuleType::Normal,
408                expr: box_tree!(Choice(
409                    Choice(
410                        Choice(Str(String::from("a")), Str(String::from("b"))),
411                        Str(String::from("c"))
412                    ),
413                    Str(String::from("d"))
414                )),
415            }]
416        };
417        let rotated = {
418            use crate::optimizer::OptimizedExpr::*;
419            vec![OptimizedRule {
420                name: "rule".to_owned(),
421                ty: RuleType::Normal,
422                expr: box_tree!(Choice(
423                    Str(String::from("a")),
424                    Choice(
425                        Str(String::from("b")),
426                        Choice(Str(String::from("c")), Str(String::from("d")))
427                    )
428                )),
429            }]
430        };
431
432        assert_eq!(optimize(rules), rotated);
433    }
434
435    #[test]
436    fn skip() {
437        let rules = {
438            use crate::ast::Expr::*;
439            vec![Rule {
440                name: "rule".to_owned(),
441                ty: RuleType::Atomic,
442                expr: box_tree!(Rep(Seq(
443                    NegPred(Choice(Str(String::from("a")), Str(String::from("b")))),
444                    Ident(String::from("ANY"))
445                ))),
446            }]
447        };
448        let skipped = vec![OptimizedRule {
449            name: "rule".to_owned(),
450            ty: RuleType::Atomic,
451            expr: OptimizedExpr::Skip(vec![String::from("a"), String::from("b")]),
452        }];
453
454        assert_eq!(optimize(rules), skipped);
455    }
456
457    #[test]
458    fn concat_strings() {
459        let rules = {
460            use crate::ast::Expr::*;
461            vec![Rule {
462                name: "rule".to_owned(),
463                ty: RuleType::Atomic,
464                expr: box_tree!(Seq(
465                    Seq(Str(String::from("a")), Str(String::from("b"))),
466                    Seq(Str(String::from("c")), Str(String::from("d")))
467                )),
468            }]
469        };
470        let concatenated = vec![OptimizedRule {
471            name: "rule".to_owned(),
472            ty: RuleType::Atomic,
473            expr: OptimizedExpr::Str(String::from("abcd")),
474        }];
475
476        assert_eq!(optimize(rules), concatenated);
477    }
478
479    #[test]
480    fn unroll_loop_exact() {
481        let rules = vec![Rule {
482            name: "rule".to_owned(),
483            ty: RuleType::Atomic,
484            expr: Expr::RepExact(Box::new(Expr::Ident(String::from("a"))), 3),
485        }];
486        let unrolled = {
487            use crate::optimizer::OptimizedExpr::*;
488            vec![OptimizedRule {
489                name: "rule".to_owned(),
490                ty: RuleType::Atomic,
491                expr: box_tree!(Seq(
492                    Ident(String::from("a")),
493                    Seq(Ident(String::from("a")), Ident(String::from("a")))
494                )),
495            }]
496        };
497
498        assert_eq!(optimize(rules), unrolled);
499    }
500
501    #[test]
502    fn unroll_loop_max() {
503        let rules = vec![Rule {
504            name: "rule".to_owned(),
505            ty: RuleType::Atomic,
506            expr: Expr::RepMax(Box::new(Expr::Str("a".to_owned())), 3),
507        }];
508        let unrolled = {
509            use crate::optimizer::OptimizedExpr::*;
510            vec![OptimizedRule {
511                name: "rule".to_owned(),
512                ty: RuleType::Atomic,
513                expr: box_tree!(Seq(
514                    Opt(Str(String::from("a"))),
515                    Seq(Opt(Str(String::from("a"))), Opt(Str(String::from("a"))))
516                )),
517            }]
518        };
519
520        assert_eq!(optimize(rules), unrolled);
521    }
522
523    #[test]
524    fn unroll_loop_min() {
525        let rules = vec![Rule {
526            name: "rule".to_owned(),
527            ty: RuleType::Atomic,
528            expr: Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 2),
529        }];
530        let unrolled = {
531            use crate::optimizer::OptimizedExpr::*;
532            vec![OptimizedRule {
533                name: "rule".to_owned(),
534                ty: RuleType::Atomic,
535                expr: box_tree!(Seq(
536                    Str(String::from("a")),
537                    Seq(Str(String::from("a")), Rep(Str(String::from("a"))))
538                )),
539            }]
540        };
541
542        assert_eq!(optimize(rules), unrolled);
543    }
544
545    #[test]
546    fn unroll_loop_min_max() {
547        let rules = vec![Rule {
548            name: "rule".to_owned(),
549            ty: RuleType::Atomic,
550            expr: Expr::RepMinMax(Box::new(Expr::Str("a".to_owned())), 2, 3),
551        }];
552        let unrolled = {
553            use crate::optimizer::OptimizedExpr::*;
554            vec![OptimizedRule {
555                name: "rule".to_owned(),
556                ty: RuleType::Atomic,
557                /* TODO possible room for improvement here:
558                 * if the sequences were rolled out in the opposite
559                 * order, we could further optimize the strings
560                 * in cases like this.
561                Str(String::from(("aa")),
562                Opt(Str(String::from("a")))
563                */
564                expr: box_tree!(Seq(
565                    Str(String::from("a")),
566                    Seq(Str(String::from("a")), Opt(Str(String::from("a"))))
567                )),
568            }]
569        };
570
571        assert_eq!(optimize(rules), unrolled);
572    }
573
574    #[test]
575    fn concat_insensitive_strings() {
576        let rules = {
577            use crate::ast::Expr::*;
578            vec![Rule {
579                name: "rule".to_owned(),
580                ty: RuleType::Atomic,
581                expr: box_tree!(Seq(
582                    Seq(Insens(String::from("a")), Insens(String::from("b"))),
583                    Seq(Insens(String::from("c")), Insens(String::from("d")))
584                )),
585            }]
586        };
587        let concatenated = vec![OptimizedRule {
588            name: "rule".to_owned(),
589            ty: RuleType::Atomic,
590            expr: OptimizedExpr::Insens(String::from("abcd")),
591        }];
592
593        assert_eq!(optimize(rules), concatenated);
594    }
595
596    #[test]
597    fn long_common_sequence() {
598        let rules = {
599            use crate::ast::Expr::*;
600            vec![Rule {
601                name: "rule".to_owned(),
602                ty: RuleType::Silent,
603                expr: box_tree!(Choice(
604                    Seq(
605                        Ident(String::from("a")),
606                        Seq(Ident(String::from("b")), Ident(String::from("c")))
607                    ),
608                    Seq(
609                        Seq(Ident(String::from("a")), Ident(String::from("b"))),
610                        Ident(String::from("d"))
611                    )
612                )),
613            }]
614        };
615        let optimized = {
616            use crate::optimizer::OptimizedExpr::*;
617            vec![OptimizedRule {
618                name: "rule".to_owned(),
619                ty: RuleType::Silent,
620                expr: box_tree!(Seq(
621                    Ident(String::from("a")),
622                    Seq(
623                        Ident(String::from("b")),
624                        Choice(Ident(String::from("c")), Ident(String::from("d")))
625                    )
626                )),
627            }]
628        };
629
630        assert_eq!(optimize(rules), optimized);
631    }
632
633    #[test]
634    fn short_common_sequence() {
635        let rules = {
636            use crate::ast::Expr::*;
637            vec![Rule {
638                name: "rule".to_owned(),
639                ty: RuleType::Atomic,
640                expr: box_tree!(Choice(
641                    Seq(Ident(String::from("a")), Ident(String::from("b"))),
642                    Ident(String::from("a"))
643                )),
644            }]
645        };
646        let optimized = {
647            use crate::optimizer::OptimizedExpr::*;
648            vec![OptimizedRule {
649                name: "rule".to_owned(),
650                ty: RuleType::Atomic,
651                expr: box_tree!(Seq(Ident(String::from("a")), Opt(Ident(String::from("b"))))),
652            }]
653        };
654
655        assert_eq!(optimize(rules), optimized);
656    }
657
658    #[test]
659    fn impossible_common_sequence() {
660        let rules = {
661            use crate::ast::Expr::*;
662            vec![Rule {
663                name: "rule".to_owned(),
664                ty: RuleType::Silent,
665                expr: box_tree!(Choice(
666                    Ident(String::from("a")),
667                    Seq(Ident(String::from("a")), Ident(String::from("b")))
668                )),
669            }]
670        };
671        let optimized = {
672            use crate::optimizer::OptimizedExpr::*;
673            vec![OptimizedRule {
674                name: "rule".to_owned(),
675                ty: RuleType::Silent,
676                expr: box_tree!(Ident(String::from("a"))),
677            }]
678        };
679
680        assert_eq!(optimize(rules), optimized);
681    }
682
683    #[test]
684    fn lister() {
685        let rules = {
686            use crate::ast::Expr::*;
687            vec![Rule {
688                name: "rule".to_owned(),
689                ty: RuleType::Silent,
690                expr: box_tree!(Seq(
691                    Rep(Seq(Ident(String::from("a")), Ident(String::from("b")))),
692                    Ident(String::from("a"))
693                )),
694            }]
695        };
696        let optimized = {
697            use crate::optimizer::OptimizedExpr::*;
698            vec![OptimizedRule {
699                name: "rule".to_owned(),
700                ty: RuleType::Silent,
701                expr: box_tree!(Seq(
702                    Ident(String::from("a")),
703                    Rep(Seq(Ident(String::from("b")), Ident(String::from("a"))))
704                )),
705            }]
706        };
707
708        assert_eq!(optimize(rules), optimized);
709    }
710
711    mod display {
712        use super::super::*;
713        /// In previous implementation of Display for OptimizedExpr
714        /// in commit 48e0a8bd3d43a17c1c78f099610b745d18ec0c5f (actually committed by me),
715        /// Str("\n") will be displayed as
716        /// "
717        /// "
718        ///
719        /// It will not break the compilation in normal use.
720        ///
721        /// But when I use it in automatically generating documents,
722        /// it will quite confusing and we'll be unable to distinguish \n and \r.
723        ///
724        /// And `cargo expand` will emit codes that can't be compiled,
725        /// for it expand `#[doc("...")]` to `/// ...`,
726        /// and when the document comment breaks the line,
727        /// it will be expanded into wrong codes.
728        #[test]
729        fn control_character() {
730            assert_eq!(OptimizedExpr::Str("\n".to_owned()).to_string(), "\"\\n\"");
731            assert_eq!(
732                OptimizedExpr::Insens("\n".to_owned()).to_string(),
733                "^\"\\n\"",
734            );
735            assert_eq!(
736                OptimizedExpr::Range("\n".to_owned(), "\r".to_owned()).to_string(),
737                "('\\n'..'\\r')",
738            );
739            assert_eq!(
740                OptimizedExpr::Skip(vec![
741                    "\n".to_owned(),
742                    "\r".to_owned(),
743                    "\n\r".to_owned(),
744                    "\0".to_owned(),
745                ])
746                .to_string(),
747                r#"(!("\n" | "\r" | "\n\r" | "\0") ~ ANY)*"#,
748            );
749
750            assert_ne!(OptimizedExpr::Str("\n".to_owned()).to_string(), "\"\n\"");
751        }
752
753        #[test]
754        fn str() {
755            assert_eq!(OptimizedExpr::Str("a".to_owned()).to_string(), r#""a""#);
756        }
757
758        #[test]
759        fn insens() {
760            assert_eq!(OptimizedExpr::Insens("a".to_owned()).to_string(), r#"^"a""#);
761        }
762
763        #[test]
764        fn range() {
765            assert_eq!(
766                OptimizedExpr::Range("a".to_owned(), "z".to_owned()).to_string(),
767                r#"('a'..'z')"#,
768            );
769        }
770
771        #[test]
772        fn ident() {
773            assert_eq!(OptimizedExpr::Ident("a".to_owned()).to_string(), r#"a"#);
774        }
775
776        #[test]
777        fn peek_slice() {
778            assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]");
779            assert_eq!(
780                OptimizedExpr::PeekSlice(0, Some(-1)).to_string(),
781                "PEEK[0..-1]",
782            );
783            assert_eq!(
784                OptimizedExpr::PeekSlice(2, Some(3)).to_string(),
785                "PEEK[2..3]",
786            );
787            assert_eq!(
788                OptimizedExpr::PeekSlice(2, Some(-1)).to_string(),
789                "PEEK[2..-1]",
790            );
791            assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]");
792        }
793
794        #[test]
795        fn pos_pred() {
796            assert_eq!(
797                OptimizedExpr::PosPred(Box::new(OptimizedExpr::NegPred(Box::new(
798                    OptimizedExpr::Ident("a".to_owned()),
799                ))))
800                .to_string(),
801                "&!a",
802            );
803            assert_eq!(
804                OptimizedExpr::PosPred(Box::new(OptimizedExpr::Choice(
805                    Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident(
806                        "a".to_owned(),
807                    )))),
808                    Box::new(OptimizedExpr::Str("a".to_owned())),
809                )))
810                .to_string(),
811                r#"&(a* | "a")"#,
812            );
813            assert_eq!(
814                OptimizedExpr::PosPred(Box::new(OptimizedExpr::RestoreOnErr(Box::new(
815                    OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("a".to_owned()))),
816                ))))
817                .to_string(),
818                "&!a",
819            );
820        }
821
822        #[test]
823        fn neg_pred() {
824            assert_eq!(
825                OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
826                r#"!e"#,
827            );
828            assert_eq!(
829                OptimizedExpr::NegPred(Box::new(OptimizedExpr::Choice(
830                    Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident(
831                        "a".to_owned(),
832                    )))),
833                    Box::new(OptimizedExpr::Str("a".to_owned())),
834                )))
835                .to_string(),
836                r#"!(PUSH(a) | "a")"#,
837            );
838        }
839
840        #[test]
841        fn seq() {
842            assert_eq!(
843                OptimizedExpr::Seq(
844                    Box::new(OptimizedExpr::Ident("e1".to_owned())),
845                    Box::new(OptimizedExpr::Ident("e2".to_owned())),
846                )
847                .to_string(),
848                r#"(e1 ~ e2)"#,
849            );
850            assert_eq!(
851                Expr::Seq(
852                    Box::new(Expr::Ident("e1".to_owned())),
853                    Box::new(Expr::Seq(
854                        Box::new(Expr::Ident("e2".to_owned())),
855                        Box::new(Expr::Ident("e3".to_owned())),
856                    )),
857                )
858                .to_string(),
859                "(e1 ~ e2 ~ e3)",
860            );
861            assert_eq!(
862                Expr::Seq(
863                    Box::new(Expr::Ident("e1".to_owned())),
864                    Box::new(Expr::Seq(
865                        Box::new(Expr::Ident("e2".to_owned())),
866                        Box::new(Expr::Seq(
867                            Box::new(Expr::Ident("e3".to_owned())),
868                            Box::new(Expr::Ident("e4".to_owned())),
869                        )),
870                    )),
871                )
872                .to_string(),
873                "(e1 ~ e2 ~ e3 ~ e4)",
874            );
875            assert_eq!(
876                Expr::Seq(
877                    Box::new(Expr::Ident("e1".to_owned())),
878                    Box::new(Expr::Choice(
879                        Box::new(Expr::Ident("e2".to_owned())),
880                        Box::new(Expr::Seq(
881                            Box::new(Expr::Ident("e3".to_owned())),
882                            Box::new(Expr::Ident("e4".to_owned())),
883                        )),
884                    )),
885                )
886                .to_string(),
887                "(e1 ~ (e2 | (e3 ~ e4)))",
888            );
889            assert_eq!(
890                Expr::Seq(
891                    Box::new(Expr::Ident("e1".to_owned())),
892                    Box::new(Expr::Seq(
893                        Box::new(Expr::Ident("e2".to_owned())),
894                        Box::new(Expr::Choice(
895                            Box::new(Expr::Ident("e3".to_owned())),
896                            Box::new(Expr::Ident("e4".to_owned())),
897                        )),
898                    )),
899                )
900                .to_string(),
901                "(e1 ~ e2 ~ (e3 | e4))",
902            );
903            assert_eq!(
904                OptimizedExpr::Seq(
905                    Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Str(
906                        "a".to_owned(),
907                    )))),
908                    Box::new(OptimizedExpr::Seq(
909                        Box::new(OptimizedExpr::Ident("b".to_owned())),
910                        Box::new(OptimizedExpr::Insens("c".to_owned())),
911                    )),
912                )
913                .to_string(),
914                r#"("a"* ~ b ~ ^"c")"#,
915            );
916            assert_eq!(
917                OptimizedExpr::Seq(
918                    Box::new(OptimizedExpr::PosPred(Box::new(OptimizedExpr::Range(
919                        "a".to_owned(),
920                        "z".to_owned(),
921                    )))),
922                    Box::new(OptimizedExpr::NegPred(Box::new(OptimizedExpr::Opt(
923                        Box::new(OptimizedExpr::Range("A".to_owned(), "Z".to_owned())),
924                    )))),
925                )
926                .to_string(),
927                "(&('a'..'z') ~ !('A'..'Z')?)",
928            );
929        }
930
931        #[test]
932        fn choice() {
933            assert_eq!(
934                OptimizedExpr::Choice(
935                    Box::new(OptimizedExpr::Ident("e1".to_owned())),
936                    Box::new(OptimizedExpr::Ident("e2".to_owned())),
937                )
938                .to_string(),
939                r#"(e1 | e2)"#,
940            );
941            assert_eq!(
942                Expr::Choice(
943                    Box::new(Expr::Ident("e1".to_owned())),
944                    Box::new(Expr::Choice(
945                        Box::new(Expr::Ident("e2".to_owned())),
946                        Box::new(Expr::Ident("e3".to_owned())),
947                    )),
948                )
949                .to_string(),
950                "(e1 | e2 | e3)",
951            );
952            assert_eq!(
953                Expr::Choice(
954                    Box::new(Expr::Ident("e1".to_owned())),
955                    Box::new(Expr::Choice(
956                        Box::new(Expr::Ident("e2".to_owned())),
957                        Box::new(Expr::Choice(
958                            Box::new(Expr::Ident("e3".to_owned())),
959                            Box::new(Expr::Ident("e4".to_owned())),
960                        )),
961                    )),
962                )
963                .to_string(),
964                "(e1 | e2 | e3 | e4)",
965            );
966            assert_eq!(
967                Expr::Choice(
968                    Box::new(Expr::Ident("e1".to_owned())),
969                    Box::new(Expr::Seq(
970                        Box::new(Expr::Ident("e2".to_owned())),
971                        Box::new(Expr::Choice(
972                            Box::new(Expr::Ident("e3".to_owned())),
973                            Box::new(Expr::Ident("e4".to_owned())),
974                        )),
975                    )),
976                )
977                .to_string(),
978                "(e1 | (e2 ~ (e3 | e4)))",
979            );
980            assert_eq!(
981                OptimizedExpr::Choice(
982                    Box::new(OptimizedExpr::Str("a".to_owned())),
983                    Box::new(OptimizedExpr::Choice(
984                        Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident(
985                            "b".to_owned(),
986                        )))),
987                        Box::new(OptimizedExpr::Insens("c".to_owned())),
988                    )),
989                )
990                .to_string(),
991                r#"("a" | PUSH(b) | ^"c")"#,
992            );
993        }
994
995        #[test]
996        fn opt() {
997            assert_eq!(
998                OptimizedExpr::Opt(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
999                "e?",
1000            );
1001        }
1002
1003        #[test]
1004        fn rep() {
1005            assert_eq!(
1006                OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident("x".to_owned()))).to_string(),
1007                "x*",
1008            );
1009            assert_eq!(
1010                OptimizedExpr::Rep(Box::new(OptimizedExpr::Range(
1011                    "0".to_owned(),
1012                    "9".to_owned(),
1013                )))
1014                .to_string(),
1015                "('0'..'9')*",
1016            );
1017        }
1018
1019        #[test]
1020        #[cfg(feature = "grammar-extras")]
1021        fn rep_once() {
1022            assert_eq!(
1023                OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
1024                "e+",
1025            );
1026            assert_eq!(
1027                OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Range(
1028                    "0".to_owned(),
1029                    "9".to_owned(),
1030                )))
1031                .to_string(),
1032                "('0'..'9')+",
1033            );
1034        }
1035
1036        #[test]
1037        fn skip() {
1038            assert_eq!(
1039                OptimizedExpr::Skip(
1040                    ["a", "bc"]
1041                        .into_iter()
1042                        .map(|s| s.to_owned())
1043                        .collect::<Vec<_>>(),
1044                )
1045                .to_string(),
1046                r#"(!("a" | "bc") ~ ANY)*"#,
1047            );
1048        }
1049
1050        #[test]
1051        fn inline_skip() {
1052            use crate::ast::Expr::*;
1053            let rules = vec![
1054                Rule {
1055                    name: "inline".to_owned(),
1056                    ty: RuleType::Atomic,
1057                    expr: Str("a".to_owned()),
1058                },
1059                Rule {
1060                    name: "skip".to_owned(),
1061                    ty: RuleType::Atomic,
1062                    expr: box_tree!(Rep(Seq(
1063                        NegPred(Choice(
1064                            Ident(String::from("inline")),
1065                            Str(String::from("b"))
1066                        )),
1067                        Ident("ANY".to_owned())
1068                    ))),
1069                },
1070            ];
1071            let map = to_hash_map(&rules);
1072            let rule = skipper::skip(rules[1].clone(), &map);
1073            assert!(matches!(rule, Rule { expr: Skip(..), .. }));
1074            let choices = match rule.expr {
1075                Skip(choices) => choices,
1076                _ => unreachable!(),
1077            };
1078            assert_eq!(choices, vec!["a".to_owned(), "b".to_owned()]);
1079        }
1080
1081        #[test]
1082        fn push() {
1083            assert_eq!(
1084                OptimizedExpr::Push(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
1085                "PUSH(e)",
1086            );
1087        }
1088
1089        #[test]
1090        #[cfg(feature = "grammar-extras")]
1091        fn node_tag() {
1092            assert_eq!(
1093                OptimizedExpr::NodeTag(
1094                    Box::new(OptimizedExpr::Ident("expr".to_owned())),
1095                    "label".to_owned(),
1096                )
1097                .to_string(),
1098                r#"(#label = expr)"#,
1099            );
1100            assert_eq!(
1101                OptimizedExpr::NodeTag(
1102                    Box::new(OptimizedExpr::Ident("x".to_owned())),
1103                    "X".to_owned(),
1104                )
1105                .to_string(),
1106                r#"(#X = x)"#,
1107            );
1108            assert_eq!(
1109                OptimizedExpr::NodeTag(
1110                    Box::new(OptimizedExpr::Seq(
1111                        Box::new(OptimizedExpr::Ident("x".to_owned())),
1112                        Box::new(OptimizedExpr::Str("y".to_owned())),
1113                    )),
1114                    "X".to_owned(),
1115                )
1116                .to_string(),
1117                r#"(#X = (x ~ "y"))"#,
1118            );
1119        }
1120
1121        #[test]
1122        fn restore_on_err() {
1123            assert_eq!(
1124                OptimizedExpr::RestoreOnErr(Box::new(OptimizedExpr::Ident("e".to_owned())))
1125                    .to_string(),
1126                "e",
1127            );
1128        }
1129    }
1130}