fancy_regex/
analyze.rs
1use alloc::string::String;
24use alloc::vec::Vec;
25use core::cmp::min;
26
27use bit_set::BitSet;
28
29use crate::parse::ExprTree;
30use crate::{CompileError, Error, Expr, Result};
31
32#[derive(Debug)]
33pub struct Info<'a> {
34 pub(crate) start_group: usize,
35 pub(crate) end_group: usize,
36 pub(crate) min_size: usize,
37 pub(crate) const_size: bool,
38 pub(crate) hard: bool,
39 pub(crate) expr: &'a Expr,
40 pub(crate) children: Vec<Info<'a>>,
41}
42
43impl<'a> Info<'a> {
44 pub(crate) fn is_literal(&self) -> bool {
45 match *self.expr {
46 Expr::Literal { casei, .. } => !casei,
47 Expr::Concat(_) => self.children.iter().all(|child| child.is_literal()),
48 _ => false,
49 }
50 }
51
52 pub(crate) fn push_literal(&self, buf: &mut String) {
53 match *self.expr {
54 Expr::Literal { ref val, .. } => buf.push_str(val),
56 Expr::Concat(_) => {
57 for child in &self.children {
58 child.push_literal(buf);
59 }
60 }
61 _ => panic!("push_literal called on non-literal"),
62 }
63 }
64}
65
66struct Analyzer<'a> {
67 backrefs: &'a BitSet,
68 group_ix: usize,
69}
70
71impl<'a> Analyzer<'a> {
72 fn visit(&mut self, expr: &'a Expr) -> Result<Info<'a>> {
73 let start_group = self.group_ix;
74 let mut children = Vec::new();
75 let mut min_size = 0;
76 let mut const_size = false;
77 let mut hard = false;
78 match *expr {
79 Expr::Assertion(assertion) if assertion.is_hard() => {
80 const_size = true;
81 hard = true;
82 }
83 Expr::Empty | Expr::Assertion(_) => {
84 const_size = true;
85 }
86 Expr::Any { .. } => {
87 min_size = 1;
88 const_size = true;
89 }
90 Expr::Literal { ref val, casei } => {
91 min_size = 1;
93 const_size = literal_const_size(val, casei);
94 }
95 Expr::Concat(ref v) => {
96 const_size = true;
97 for child in v {
98 let child_info = self.visit(child)?;
99 min_size += child_info.min_size;
100 const_size &= child_info.const_size;
101 hard |= child_info.hard;
102 children.push(child_info);
103 }
104 }
105 Expr::Alt(ref v) => {
106 let child_info = self.visit(&v[0])?;
107 min_size = child_info.min_size;
108 const_size = child_info.const_size;
109 hard = child_info.hard;
110 children.push(child_info);
111 for child in &v[1..] {
112 let child_info = self.visit(child)?;
113 const_size &= child_info.const_size && min_size == child_info.min_size;
114 min_size = min(min_size, child_info.min_size);
115 hard |= child_info.hard;
116 children.push(child_info);
117 }
118 }
119 Expr::Group(ref child) => {
120 let group = self.group_ix;
121 self.group_ix += 1;
122 let child_info = self.visit(child)?;
123 min_size = child_info.min_size;
124 const_size = child_info.const_size;
125 hard = child_info.hard | self.backrefs.contains(group);
129 children.push(child_info);
130 }
131 Expr::LookAround(ref child, _) => {
132 let child_info = self.visit(child)?;
133 const_size = true;
135 hard = true;
136 children.push(child_info);
137 }
138 Expr::Repeat {
139 ref child, lo, hi, ..
140 } => {
141 let child_info = self.visit(child)?;
142 min_size = child_info.min_size * lo;
143 const_size = child_info.const_size && lo == hi;
144 hard = child_info.hard;
145 children.push(child_info);
146 }
147 Expr::Delegate { size, .. } => {
148 min_size = size;
150 const_size = true;
151 }
152 Expr::Backref(group) => {
153 if group >= self.group_ix {
154 return Err(Error::CompileError(CompileError::InvalidBackref));
155 }
156 hard = true;
157 }
158 Expr::AtomicGroup(ref child) => {
159 let child_info = self.visit(child)?;
160 min_size = child_info.min_size;
161 const_size = child_info.const_size;
162 hard = true; children.push(child_info);
164 }
165 Expr::KeepOut => {
166 hard = true;
167 const_size = true;
168 }
169 Expr::ContinueFromPreviousMatchEnd => {
170 hard = true;
171 const_size = true;
172 }
173 Expr::BackrefExistsCondition(group) => {
174 if group >= self.group_ix {
175 return Err(Error::CompileError(CompileError::InvalidBackref));
176 }
177 hard = true;
178 const_size = true;
179 }
180 Expr::Conditional {
181 ref condition,
182 ref true_branch,
183 ref false_branch,
184 } => {
185 hard = true;
186
187 let child_info_condition = self.visit(condition)?;
188 let child_info_truth = self.visit(true_branch)?;
189 let child_info_false = self.visit(false_branch)?;
190
191 min_size = child_info_condition.min_size
192 + min(child_info_truth.min_size, child_info_false.min_size);
193 const_size = child_info_condition.const_size
194 && child_info_truth.const_size
195 && child_info_false.const_size
196 && child_info_condition.min_size + child_info_truth.min_size == child_info_false.min_size;
198
199 children.push(child_info_condition);
200 children.push(child_info_truth);
201 children.push(child_info_false);
202 }
203 };
204
205 Ok(Info {
206 expr,
207 children,
208 start_group,
209 end_group: self.group_ix,
210 min_size,
211 const_size,
212 hard,
213 })
214 }
215}
216
217fn literal_const_size(_: &str, _: bool) -> bool {
218 true
222}
223
224pub fn analyze<'a>(tree: &'a ExprTree) -> Result<Info<'a>> {
226 let mut analyzer = Analyzer {
227 backrefs: &tree.backrefs,
228 group_ix: 0,
229 };
230
231 analyzer.visit(&tree.expr)
232}
233
234#[cfg(test)]
235mod tests {
236 use super::analyze;
237 use crate::Expr;
239
240 #[test]
255 fn invalid_backref_1() {
256 assert!(analyze(&Expr::parse_tree(".\\0").unwrap()).is_err());
257 }
258
259 #[test]
260 fn invalid_backref_2() {
261 assert!(analyze(&Expr::parse_tree("(.\\1)").unwrap()).is_err());
262 }
263
264 #[test]
265 fn invalid_backref_3() {
266 assert!(analyze(&Expr::parse_tree("\\1(.)").unwrap()).is_err());
267 }
268
269 #[test]
270 fn is_literal() {
271 let tree = Expr::parse_tree("abc").unwrap();
272 let info = analyze(&tree).unwrap();
273 assert_eq!(info.is_literal(), true);
274 }
275
276 #[test]
277 fn is_literal_with_repeat() {
278 let tree = Expr::parse_tree("abc*").unwrap();
279 let info = analyze(&tree).unwrap();
280 assert_eq!(info.is_literal(), false);
281 }
282}