pest/iterators/pair.rs
1// pest. The Elegant Parser
2// Copyright (c) 2018 DragoČ™ Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use alloc::format;
11use alloc::rc::Rc;
12#[cfg(feature = "pretty-print")]
13use alloc::string::String;
14use alloc::vec::Vec;
15use core::borrow::Borrow;
16use core::fmt;
17use core::hash::{Hash, Hasher};
18use core::ptr;
19use core::str;
20
21#[cfg(feature = "pretty-print")]
22use serde::ser::SerializeStruct;
23
24use super::line_index::LineIndex;
25use super::pairs::{self, Pairs};
26use super::queueable_token::QueueableToken;
27use super::tokens::{self, Tokens};
28use crate::span::{self, Span};
29use crate::RuleType;
30
31/// A matching pair of [`Token`]s and everything between them.
32///
33/// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the
34/// same `Rule`, with the condition that all `Token`s between them can form such pairs as well.
35/// This is similar to the [brace matching problem](https://en.wikipedia.org/wiki/Brace_matching) in
36/// editors.
37///
38/// [`Token`]: ../enum.Token.html
39#[derive(Clone)]
40pub struct Pair<'i, R> {
41    queue: Rc<Vec<QueueableToken<'i, R>>>,
42    input: &'i str,
43    /// Token index into `queue`.
44    start: usize,
45    line_index: Rc<LineIndex>,
46}
47
48pub fn new<'i, R: RuleType>(
49    queue: Rc<Vec<QueueableToken<'i, R>>>,
50    input: &'i str,
51    line_index: Rc<LineIndex>,
52    start: usize,
53) -> Pair<'i, R> {
54    Pair {
55        queue,
56        input,
57        start,
58        line_index,
59    }
60}
61
62impl<'i, R: RuleType> Pair<'i, R> {
63    /// Returns the `Rule` of the `Pair`.
64    ///
65    /// # Examples
66    ///
67    /// ```
68    /// # use std::rc::Rc;
69    /// # use pest;
70    /// # #[allow(non_camel_case_types)]
71    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
72    /// enum Rule {
73    ///     a
74    /// }
75    ///
76    /// let input = "";
77    /// let pair = pest::state(input, |state| {
78    ///     // generating Token pair with Rule::a ...
79    /// #     state.rule(Rule::a, |s| Ok(s))
80    /// }).unwrap().next().unwrap();
81    ///
82    /// assert_eq!(pair.as_rule(), Rule::a);
83    /// ```
84    #[inline]
85    pub fn as_rule(&self) -> R {
86        match self.queue[self.pair()] {
87            QueueableToken::End { rule, .. } => rule,
88            _ => unreachable!(),
89        }
90    }
91
92    /// Captures a slice from the `&str` defined by the token `Pair`.
93    ///
94    /// # Examples
95    ///
96    /// ```
97    /// # use std::rc::Rc;
98    /// # use pest;
99    /// # #[allow(non_camel_case_types)]
100    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
101    /// enum Rule {
102    ///     ab
103    /// }
104    ///
105    /// let input = "ab";
106    /// let pair = pest::state(input, |state| {
107    ///     // generating Token pair with Rule::ab ...
108    /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
109    /// }).unwrap().next().unwrap();
110    ///
111    /// assert_eq!(pair.as_str(), "ab");
112    /// ```
113    #[inline]
114    pub fn as_str(&self) -> &'i str {
115        let start = self.pos(self.start);
116        let end = self.pos(self.pair());
117
118        // Generated positions always come from Positions and are UTF-8 borders.
119        &self.input[start..end]
120    }
121
122    /// Returns the input string of the `Pair`.
123    ///
124    /// This function returns the input string of the `Pair` as a `&str`. This is the source string
125    /// from which the `Pair` was created. The returned `&str` can be used to examine the contents of
126    /// the `Pair` or to perform further processing on the string.
127    ///
128    /// # Examples
129    ///
130    /// ```
131    /// # use std::rc::Rc;
132    /// # use pest;
133    /// # #[allow(non_camel_case_types)]
134    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
135    /// enum Rule {
136    ///     ab
137    /// }
138    ///
139    /// // Example: Get input string from a Pair
140    ///
141    /// let input = "ab";
142    /// let pair = pest::state(input, |state| {
143    ///     // generating Token pair with Rule::ab ...
144    /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
145    /// }).unwrap().next().unwrap();
146    ///
147    /// assert_eq!(pair.as_str(), "ab");
148    /// assert_eq!(input, pair.get_input());
149    /// ```
150    pub fn get_input(&self) -> &'i str {
151        self.input
152    }
153
154    /// Returns the `Span` defined by the `Pair`, consuming it.
155    ///
156    /// # Examples
157    ///
158    /// ```
159    /// # use std::rc::Rc;
160    /// # use pest;
161    /// # #[allow(non_camel_case_types)]
162    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
163    /// enum Rule {
164    ///     ab
165    /// }
166    ///
167    /// let input = "ab";
168    /// let pair = pest::state(input, |state| {
169    ///     // generating Token pair with Rule::ab ...
170    /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
171    /// }).unwrap().next().unwrap();
172    ///
173    /// assert_eq!(pair.into_span().as_str(), "ab");
174    /// ```
175    #[inline]
176    #[deprecated(since = "2.0.0", note = "Please use `as_span` instead")]
177    pub fn into_span(self) -> Span<'i> {
178        self.as_span()
179    }
180
181    /// Returns the `Span` defined by the `Pair`, **without** consuming it.
182    ///
183    /// # Examples
184    ///
185    /// ```
186    /// # use std::rc::Rc;
187    /// # use pest;
188    /// # #[allow(non_camel_case_types)]
189    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
190    /// enum Rule {
191    ///     ab
192    /// }
193    ///
194    /// let input = "ab";
195    /// let pair = pest::state(input, |state| {
196    ///     // generating Token pair with Rule::ab ...
197    /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
198    /// }).unwrap().next().unwrap();
199    ///
200    /// assert_eq!(pair.as_span().as_str(), "ab");
201    /// ```
202    #[inline]
203    pub fn as_span(&self) -> Span<'i> {
204        let start = self.pos(self.start);
205        let end = self.pos(self.pair());
206
207        span::Span::new_internal(self.input, start, end)
208    }
209
210    /// Get current node tag
211    #[inline]
212    pub fn as_node_tag(&self) -> Option<&str> {
213        match &self.queue[self.pair()] {
214            QueueableToken::End { tag, .. } => tag.as_ref().map(|x| x.borrow()),
215            _ => None,
216        }
217    }
218
219    /// Returns the inner `Pairs` between the `Pair`, consuming it.
220    ///
221    /// # Examples
222    ///
223    /// ```
224    /// # use std::rc::Rc;
225    /// # use pest;
226    /// # #[allow(non_camel_case_types)]
227    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
228    /// enum Rule {
229    ///     a
230    /// }
231    ///
232    /// let input = "";
233    /// let pair = pest::state(input, |state| {
234    ///     // generating Token pair with Rule::a ...
235    /// #     state.rule(Rule::a, |s| Ok(s))
236    /// }).unwrap().next().unwrap();
237    ///
238    /// assert!(pair.into_inner().next().is_none());
239    /// ```
240    #[inline]
241    pub fn into_inner(self) -> Pairs<'i, R> {
242        let pair = self.pair();
243
244        pairs::new(
245            self.queue,
246            self.input,
247            Some(self.line_index),
248            self.start + 1,
249            pair,
250        )
251    }
252
253    /// Returns the `Tokens` for the `Pair`.
254    ///
255    /// # Examples
256    ///
257    /// ```
258    /// # use std::rc::Rc;
259    /// # use pest;
260    /// # #[allow(non_camel_case_types)]
261    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
262    /// enum Rule {
263    ///     a
264    /// }
265    ///
266    /// let input = "";
267    /// let pair = pest::state(input, |state| {
268    ///     // generating Token pair with Rule::a ...
269    /// #     state.rule(Rule::a, |s| Ok(s))
270    /// }).unwrap().next().unwrap();
271    /// let tokens: Vec<_> = pair.tokens().collect();
272    ///
273    /// assert_eq!(tokens.len(), 2);
274    /// ```
275    #[inline]
276    pub fn tokens(self) -> Tokens<'i, R> {
277        let end = self.pair();
278
279        tokens::new(self.queue, self.input, self.start, end + 1)
280    }
281
282    /// Generates a string that stores the lexical information of `self` in
283    /// a pretty-printed JSON format.
284    #[cfg(feature = "pretty-print")]
285    pub fn to_json(&self) -> String {
286        ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
287    }
288
289    /// Returns the `line`, `col` of this pair start.
290    pub fn line_col(&self) -> (usize, usize) {
291        let pos = self.pos(self.start);
292        self.line_index.line_col(self.input, pos)
293    }
294
295    fn pair(&self) -> usize {
296        match self.queue[self.start] {
297            QueueableToken::Start {
298                end_token_index, ..
299            } => end_token_index,
300            _ => unreachable!(),
301        }
302    }
303
304    fn pos(&self, index: usize) -> usize {
305        match self.queue[index] {
306            QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
307                input_pos
308            }
309        }
310    }
311}
312
313impl<'i, R: RuleType> Pairs<'i, R> {
314    /// Create a new `Pairs` iterator containing just the single `Pair`.
315    pub fn single(pair: Pair<'i, R>) -> Self {
316        let end = pair.pair();
317        pairs::new(
318            pair.queue,
319            pair.input,
320            Some(pair.line_index),
321            pair.start,
322            end,
323        )
324    }
325}
326
327impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> {
328    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
329        let pair = &mut f.debug_struct("Pair");
330        pair.field("rule", &self.as_rule());
331        // In order not to break compatibility
332        if let Some(s) = self.as_node_tag() {
333            pair.field("node_tag", &s);
334        }
335        pair.field("span", &self.as_span())
336            .field("inner", &self.clone().into_inner().collect::<Vec<_>>())
337            .finish()
338    }
339}
340
341impl<'i, R: RuleType> fmt::Display for Pair<'i, R> {
342    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
343        let rule = self.as_rule();
344        let start = self.pos(self.start);
345        let end = self.pos(self.pair());
346        let mut pairs = self.clone().into_inner().peekable();
347
348        if pairs.peek().is_none() {
349            write!(f, "{:?}({}, {})", rule, start, end)
350        } else {
351            write!(
352                f,
353                "{:?}({}, {}, [{}])",
354                rule,
355                start,
356                end,
357                pairs
358                    .map(|pair| format!("{}", pair))
359                    .collect::<Vec<_>>()
360                    .join(", ")
361            )
362        }
363    }
364}
365
366impl<'i, R: PartialEq> PartialEq for Pair<'i, R> {
367    fn eq(&self, other: &Pair<'i, R>) -> bool {
368        Rc::ptr_eq(&self.queue, &other.queue)
369            && ptr::eq(self.input, other.input)
370            && self.start == other.start
371    }
372}
373
374impl<'i, R: Eq> Eq for Pair<'i, R> {}
375
376impl<'i, R: Hash> Hash for Pair<'i, R> {
377    fn hash<H: Hasher>(&self, state: &mut H) {
378        (&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state);
379        (self.input as *const str).hash(state);
380        self.start.hash(state);
381    }
382}
383
384#[cfg(feature = "pretty-print")]
385impl<'i, R: RuleType> ::serde::Serialize for Pair<'i, R> {
386    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
387    where
388        S: ::serde::Serializer,
389    {
390        let start = self.pos(self.start);
391        let end = self.pos(self.pair());
392        let rule = format!("{:?}", self.as_rule());
393        let inner = self.clone().into_inner();
394
395        let mut ser = serializer.serialize_struct("Pairs", 3)?;
396        ser.serialize_field("pos", &(start, end))?;
397        ser.serialize_field("rule", &rule)?;
398
399        if inner.peek().is_none() {
400            ser.serialize_field("inner", &self.as_str())?;
401        } else {
402            ser.serialize_field("inner", &inner)?;
403        }
404
405        ser.end()
406    }
407}
408
409#[cfg(test)]
410mod tests {
411    use crate::macros::tests::*;
412    use crate::parser::Parser;
413
414    #[test]
415    #[cfg(feature = "pretty-print")]
416    fn test_pretty_print() {
417        let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap();
418
419        let expected = r#"{
420  "pos": [
421    0,
422    3
423  ],
424  "rule": "a",
425  "inner": {
426    "pos": [
427      1,
428      2
429    ],
430    "pairs": [
431      {
432        "pos": [
433          1,
434          2
435        ],
436        "rule": "b",
437        "inner": "b"
438      }
439    ]
440  }
441}"#;
442
443        assert_eq!(expected, pair.to_json());
444    }
445
446    #[test]
447    fn pair_into_inner() {
448        let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap(); // the tokens a(b())
449
450        let pairs = pair.into_inner(); // the tokens b()
451
452        assert_eq!(2, pairs.tokens().count());
453    }
454
455    #[test]
456    fn get_input_of_pair() {
457        let input = "abcde";
458        let pair = AbcParser::parse(Rule::a, input).unwrap().next().unwrap();
459
460        assert_eq!(input, pair.get_input());
461    }
462}