xml/
parser.rs

1// RustyXML
2// Copyright 2013-2016 RustyXML developers
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9//
10// The parser herein is derived from OFXMLParser as included with
11// ObjFW, Copyright (c) 2008-2013 Jonathan Schleifer.
12// Permission to license this derived work under MIT license has been granted by ObjFW's author.
13
14use super::{unescape, EndTag, StartTag};
15use std::collections::{HashMap, VecDeque};
16use std::error::Error;
17use std::fmt;
18use std::iter::Iterator;
19use std::mem;
20
21#[derive(PartialEq, Eq, Debug)]
22/// Events returned by the `Parser`
23pub enum Event {
24    /// Event indicating processing information was found
25    PI(String),
26    /// Event indicating a start tag was found
27    ElementStart(StartTag),
28    /// Event indicating a end tag was found
29    ElementEnd(EndTag),
30    /// Event indicating character data was found
31    Characters(String),
32    /// Event indicating CDATA was found
33    CDATA(String),
34    /// Event indicating a comment was found
35    Comment(String),
36}
37
38#[derive(PartialEq, Debug, Clone)]
39#[allow(missing_copy_implementations)]
40/// The structure returned, when erroneous XML is read
41pub struct ParserError {
42    /// The line number at which the error occurred
43    pub line: u32,
44    /// The column number at which the error occurred
45    pub col: u32,
46    /// A message describing the type of the error
47    pub msg: &'static str,
48}
49
50impl Error for ParserError {
51    fn description(&self) -> &str {
52        self.msg
53    }
54}
55
56impl fmt::Display for ParserError {
57    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
58        write!(
59            f,
60            "Parse error; Line: {}, Column: {}, Reason: {}",
61            self.line, self.col, self.msg,
62        )
63    }
64}
65
66// Event based parser
67enum State {
68    OutsideTag,
69    TagOpened,
70    InProcessingInstructions,
71    InTagName,
72    InCloseTagName,
73    InTag,
74    InAttrName,
75    InAttrValue,
76    ExpectDelimiter,
77    ExpectClose,
78    ExpectSpaceOrClose,
79    InExclamationMark,
80    InCDATAOpening,
81    InCDATA,
82    InCommentOpening,
83    InComment1,
84    InComment2,
85    InDoctype,
86}
87
88/// A streaming XML parser
89///
90/// Data is fed to the parser using the `feed_str()` method.
91/// The `Event`s, and `ParserError`s generated while parsing the string
92/// can be requested by iterating over the parser
93///
94/// ~~~
95/// use xml::Parser;
96///
97/// let mut p = Parser::new();
98/// p.feed_str("<a href='http://rust-lang.org'>Rust</a>");
99/// for event in p {
100///     match event {
101///        // [...]
102///        _ => ()
103///     }
104/// }
105/// ~~~
106pub struct Parser {
107    line: u32,
108    col: u32,
109    has_error: bool,
110    data: VecDeque<char>,
111    buf: String,
112    namespaces: Vec<HashMap<String, String>>,
113    attributes: Vec<(String, Option<String>, String)>,
114    st: State,
115    name: Option<(Option<String>, String)>,
116    attr: Option<(Option<String>, String)>,
117    delim: Option<char>,
118    level: u8,
119}
120
121impl Parser {
122    /// Returns a new `Parser`
123    pub fn new() -> Parser {
124        let mut ns = HashMap::with_capacity(2);
125        // Add standard namespaces
126        ns.insert(
127            "xml".to_owned(),
128            "http://www.w3.org/XML/1998/namespace".to_owned(),
129        );
130        ns.insert(
131            "xmlns".to_owned(),
132            "http://www.w3.org/2000/xmlns/".to_owned(),
133        );
134
135        Parser {
136            line: 1,
137            col: 0,
138            has_error: false,
139            data: VecDeque::with_capacity(4096),
140            buf: String::new(),
141            namespaces: vec![ns],
142            attributes: Vec::new(),
143            st: State::OutsideTag,
144            name: None,
145            attr: None,
146            delim: None,
147            level: 0,
148        }
149    }
150
151    /// Feeds a string slice to the parser
152    pub fn feed_str(&mut self, data: &str) {
153        self.data.extend(data.chars());
154    }
155}
156
157impl Iterator for Parser {
158    type Item = Result<Event, ParserError>;
159
160    fn next(&mut self) -> Option<Result<Event, ParserError>> {
161        if self.has_error {
162            return None;
163        }
164
165        loop {
166            let c = match self.data.pop_front() {
167                Some(c) => c,
168                None => return None,
169            };
170
171            if c == '\n' {
172                self.line += 1;
173                self.col = 0;
174            } else {
175                self.col += 1;
176            }
177
178            match self.parse_character(c) {
179                Ok(None) => continue,
180                Ok(Some(event)) => {
181                    return Some(Ok(event));
182                }
183                Err(e) => {
184                    self.has_error = true;
185                    return Some(Err(e));
186                }
187            }
188        }
189    }
190}
191
192#[inline]
193// Parse a QName to get Prefix and LocalPart
194fn parse_qname(mut qname: String) -> (Option<String>, String) {
195    if let Some(i) = qname.find(':') {
196        let local = qname.split_off(i + 1);
197        qname.pop();
198        (Some(qname), local)
199    } else {
200        (None, qname)
201    }
202}
203
204fn unescape_owned(input: String) -> Result<String, String> {
205    if input.find('&').is_none() {
206        Ok(input)
207    } else {
208        unescape(&input)
209    }
210}
211
212impl Parser {
213    // Get the namespace currently bound to a prefix.
214    // Bindings are stored as a stack of HashMaps, we start searching in the top most HashMap
215    // and traverse down until the prefix is found.
216    fn namespace_for_prefix(&self, prefix: &str) -> Option<String> {
217        for ns in self.namespaces.iter().rev() {
218            if let Some(namespace) = ns.get(prefix) {
219                if namespace.is_empty() {
220                    return None;
221                }
222                return Some(namespace.clone());
223            }
224        }
225        None
226    }
227
228    fn take_buf(&mut self) -> String {
229        self.buf.split_off(0)
230    }
231
232    fn error(&self, msg: &'static str) -> Result<Option<Event>, ParserError> {
233        Err(ParserError {
234            line: self.line,
235            col: self.col,
236            msg,
237        })
238    }
239
240    fn parse_character(&mut self, c: char) -> Result<Option<Event>, ParserError> {
241        // println(fmt!("Now in state: %?", self.st));
242        match self.st {
243            State::OutsideTag => self.outside_tag(c),
244            State::TagOpened => self.tag_opened(c),
245            State::InProcessingInstructions => self.in_processing_instructions(c),
246            State::InTagName => self.in_tag_name(c),
247            State::InCloseTagName => self.in_close_tag_name(c),
248            State::InTag => self.in_tag(c),
249            State::InAttrName => self.in_attr_name(c),
250            State::InAttrValue => self.in_attr_value(c),
251            State::ExpectDelimiter => self.expect_delimiter(c),
252            State::ExpectClose => self.expect_close(c),
253            State::ExpectSpaceOrClose => self.expect_space_or_close(c),
254            State::InExclamationMark => self.in_exclamation_mark(c),
255            State::InCDATAOpening => self.in_cdata_opening(c),
256            State::InCDATA => self.in_cdata(c),
257            State::InCommentOpening => self.in_comment_opening(c),
258            State::InComment1 => self.in_comment1(c),
259            State::InComment2 => self.in_comment2(c),
260            State::InDoctype => self.in_doctype(c),
261        }
262    }
263
264    // Outside any tag, or other construct
265    // '<' => TagOpened, producing Event::Characters
266    fn outside_tag(&mut self, c: char) -> Result<Option<Event>, ParserError> {
267        match c {
268            '<' if self.buf.is_empty() => self.st = State::TagOpened,
269            '<' => {
270                self.st = State::TagOpened;
271                let buf = match unescape_owned(self.take_buf()) {
272                    Ok(unescaped) => unescaped,
273                    Err(_) => return self.error("Found invalid entity"),
274                };
275                return Ok(Some(Event::Characters(buf)));
276            }
277            _ => self.buf.push(c),
278        }
279        Ok(None)
280    }
281
282    // Character following a '<', starting a tag or other construct
283    // '?' => InProcessingInstructions
284    // '!' => InExclamationMark
285    // '/' => InCloseTagName
286    //  _  => InTagName
287    fn tag_opened(&mut self, c: char) -> Result<Option<Event>, ParserError> {
288        self.st = match c {
289            '?' => State::InProcessingInstructions,
290            '!' => State::InExclamationMark,
291            '/' => State::InCloseTagName,
292            _ => {
293                self.buf.push(c);
294                State::InTagName
295            }
296        };
297        Ok(None)
298    }
299
300    // Inside a processing instruction
301    // '?' '>' => OutsideTag, producing PI
302    fn in_processing_instructions(&mut self, c: char) -> Result<Option<Event>, ParserError> {
303        match c {
304            '?' => {
305                self.level = 1;
306                self.buf.push(c);
307            }
308            '>' if self.level == 1 => {
309                self.level = 0;
310                self.st = State::OutsideTag;
311                let _ = self.buf.pop();
312                let buf = self.take_buf();
313                return Ok(Some(Event::PI(buf)));
314            }
315            _ => self.buf.push(c),
316        }
317        Ok(None)
318    }
319
320    // Inside a tag name (opening tag)
321    // '/' => ExpectClose, producing Event::ElementStart
322    // '>' => OutsideTag, producing Event::ElementStart
323    // ' ' or '\t' or '\r' or '\n' => InTag
324    fn in_tag_name(&mut self, c: char) -> Result<Option<Event>, ParserError> {
325        match c {
326            '/' | '>' => {
327                let (prefix, name) = parse_qname(self.take_buf());
328                let ns = match prefix {
329                    None => self.namespace_for_prefix(""),
330                    Some(ref pre) => match self.namespace_for_prefix(&pre) {
331                        None => return self.error("Unbound namespace prefix in tag name"),
332                        ns => ns,
333                    },
334                };
335
336                self.namespaces.push(HashMap::new());
337                self.st = if c == '/' {
338                    self.name = Some((prefix.clone(), name.clone()));
339                    State::ExpectClose
340                } else {
341                    State::OutsideTag
342                };
343
344                return Ok(Some(Event::ElementStart(StartTag {
345                    name,
346                    ns,
347                    prefix,
348                    attributes: HashMap::new(),
349                })));
350            }
351            ' ' | '\t' | '\r' | '\n' => {
352                self.namespaces.push(HashMap::new());
353                self.name = Some(parse_qname(self.take_buf()));
354                self.st = State::InTag;
355            }
356            _ => self.buf.push(c),
357        }
358        Ok(None)
359    }
360
361    // Inside a tag name (closing tag)
362    // '>' => OutsideTag, producing ElementEnd
363    // ' ' or '\t' or '\r' or '\n' => ExpectSpaceOrClose, producing ElementEnd
364    fn in_close_tag_name(&mut self, c: char) -> Result<Option<Event>, ParserError> {
365        match c {
366            ' ' | '\t' | '\r' | '\n' | '>' => {
367                let (prefix, name) = parse_qname(self.take_buf());
368
369                let ns = match prefix {
370                    None => self.namespace_for_prefix(""),
371                    Some(ref pre) => match self.namespace_for_prefix(&pre) {
372                        None => return self.error("Unbound namespace prefix in tag name"),
373                        ns => ns,
374                    },
375                };
376
377                self.namespaces.pop();
378                self.st = if c == '>' {
379                    State::OutsideTag
380                } else {
381                    State::ExpectSpaceOrClose
382                };
383
384                Ok(Some(Event::ElementEnd(EndTag { name, ns, prefix })))
385            }
386            _ => {
387                self.buf.push(c);
388                Ok(None)
389            }
390        }
391    }
392
393    // Inside a tag, parsing attributes
394    // '/' => ExpectClose, producing StartTag
395    // '>' => OutsideTag, producing StartTag
396    // ' ' or '\t' or '\r' or '\n' => InAttrName
397    fn in_tag(&mut self, c: char) -> Result<Option<Event>, ParserError> {
398        match c {
399            '/' | '>' => {
400                let attributes = mem::replace(&mut self.attributes, Vec::new());
401                let (prefix, name) = self
402                    .name
403                    .take()
404                    .expect("Internal error: No element name set");
405                let ns = match prefix {
406                    None => self.namespace_for_prefix(""),
407                    Some(ref pre) => match self.namespace_for_prefix(&pre) {
408                        None => return self.error("Unbound namespace prefix in tag name"),
409                        ns => ns,
410                    },
411                };
412
413                let mut attributes_map: HashMap<(String, Option<String>), String> = HashMap::new();
414
415                // At this point attribute namespaces are really just prefixes,
416                // map them to the actual namespace
417                for (name, ns, value) in attributes {
418                    let ns = match ns {
419                        None => None,
420                        Some(ref prefix) => match self.namespace_for_prefix(&prefix) {
421                            None => {
422                                return self.error("Unbound namespace prefix in attribute name")
423                            }
424                            ns => ns,
425                        },
426                    };
427                    if attributes_map.insert((name, ns), value).is_some() {
428                        return self.error("Duplicate attribute");
429                    }
430                }
431
432                self.st = if c == '/' {
433                    self.name = Some((prefix.clone(), name.clone()));
434                    State::ExpectClose
435                } else {
436                    State::OutsideTag
437                };
438
439                return Ok(Some(Event::ElementStart(StartTag {
440                    name,
441                    ns,
442                    prefix,
443                    attributes: attributes_map,
444                })));
445            }
446            ' ' | '\t' | '\r' | '\n' => (),
447            _ => {
448                self.buf.push(c);
449                self.st = State::InAttrName;
450            }
451        }
452        Ok(None)
453    }
454
455    // Inside an attribute name
456    // '=' => ExpectDelimiter
457    fn in_attr_name(&mut self, c: char) -> Result<Option<Event>, ParserError> {
458        match c {
459            '=' => {
460                self.level = 0;
461                self.attr = Some(parse_qname(self.take_buf()));
462                self.st = State::ExpectDelimiter;
463            }
464            ' ' | '\t' | '\r' | '\n' => self.level = 1,
465            _ if self.level == 0 => self.buf.push(c),
466            _ => return self.error("Space occured in attribute name"),
467        }
468        Ok(None)
469    }
470
471    // Inside an attribute value
472    // delimiter => InTag, adds attribute
473    fn in_attr_value(&mut self, c: char) -> Result<Option<Event>, ParserError> {
474        if c == self
475            .delim
476            .expect("Internal error: In attribute value, but no delimiter set")
477        {
478            self.delim = None;
479            self.st = State::InTag;
480            let attr = self.attr.take();
481            let (prefix, name) =
482                attr.expect("Internal error: In attribute value, but no attribute name set");
483            let value = match unescape_owned(self.take_buf()) {
484                Ok(unescaped) => unescaped,
485                Err(_) => return self.error("Found invalid entity"),
486            };
487
488            let last = self
489                .namespaces
490                .last_mut()
491                .expect("Internal error: Empty namespace stack");
492            match prefix {
493                None if name == "xmlns" => {
494                    last.insert(String::new(), value.clone());
495                }
496                Some(ref prefix) if prefix == "xmlns" => {
497                    last.insert(name.clone(), value.clone());
498                }
499                _ => (),
500            }
501
502            self.attributes.push((name, prefix, value));
503        } else {
504            self.buf.push(c);
505        }
506        Ok(None)
507    }
508
509    // Looking for an attribute value delimiter
510    // '"' or '\'' => InAttrValue, sets delimiter
511    fn expect_delimiter(&mut self, c: char) -> Result<Option<Event>, ParserError> {
512        match c {
513            '"' | '\'' => {
514                self.delim = Some(c);
515                self.st = State::InAttrValue;
516            }
517            ' ' | '\t' | '\r' | '\n' => (),
518            _ => return self.error("Attribute value not enclosed in ' or \""),
519        }
520        Ok(None)
521    }
522
523    // Expect closing '>' of an empty-element tag (no whitespace allowed)
524    // '>' => OutsideTag
525    fn expect_close(&mut self, c: char) -> Result<Option<Event>, ParserError> {
526        match c {
527            '>' => {
528                self.st = State::OutsideTag;
529                let (prefix, name) = self
530                    .name
531                    .take()
532                    .expect("Internal error: No element name set");
533                let ns = match prefix {
534                    None => self.namespace_for_prefix(""),
535                    Some(ref pre) => match self.namespace_for_prefix(&pre) {
536                        None => return self.error("Unbound namespace prefix in tag name"),
537                        ns => ns,
538                    },
539                };
540                self.namespaces.pop();
541                Ok(Some(Event::ElementEnd(EndTag { name, ns, prefix })))
542            }
543            _ => self.error("Expected '>' to close tag"),
544        }
545    }
546
547    // Expect closing '>' of a start tag
548    // '>' => OutsideTag
549    fn expect_space_or_close(&mut self, c: char) -> Result<Option<Event>, ParserError> {
550        match c {
551            ' ' | '\t' | '\r' | '\n' => Ok(None),
552            '>' => {
553                self.st = State::OutsideTag;
554                Ok(None)
555            }
556            _ => self.error("Expected '>' to close tag, or LWS"),
557        }
558    }
559
560    // After an '!' trying to determine the type of the following construct
561    // '-' => InCommentOpening
562    // '[' => InCDATAOpening
563    // 'D' => InDoctype
564    fn in_exclamation_mark(&mut self, c: char) -> Result<Option<Event>, ParserError> {
565        self.st = match c {
566            '-' => State::InCommentOpening,
567            '[' => State::InCDATAOpening,
568            'D' => State::InDoctype,
569            _ => return self.error("Malformed XML"),
570        };
571        Ok(None)
572    }
573
574    // Opening sequence of Event::CDATA
575    // 'C' 'D' 'A' 'T' 'A' '[' => InCDATA
576    fn in_cdata_opening(&mut self, c: char) -> Result<Option<Event>, ParserError> {
577        static CDATA_PATTERN: [char; 6] = ['C', 'D', 'A', 'T', 'A', '['];
578        if c == CDATA_PATTERN[self.level as usize] {
579            self.level += 1;
580        } else {
581            return self.error("Invalid CDATA opening sequence");
582        }
583
584        if self.level == 6 {
585            self.level = 0;
586            self.st = State::InCDATA;
587        }
588        Ok(None)
589    }
590
591    // Inside CDATA
592    // ']' ']' '>' => OutsideTag, producing Event::CDATA
593    fn in_cdata(&mut self, c: char) -> Result<Option<Event>, ParserError> {
594        match c {
595            ']' => {
596                self.buf.push(c);
597                self.level += 1;
598            }
599            '>' if self.level >= 2 => {
600                self.st = State::OutsideTag;
601                self.level = 0;
602                let len = self.buf.len();
603                self.buf.truncate(len - 2);
604                let buf = self.take_buf();
605                return Ok(Some(Event::CDATA(buf)));
606            }
607            _ => {
608                self.buf.push(c);
609                self.level = 0;
610            }
611        }
612        Ok(None)
613    }
614
615    // Opening sequence of a comment
616    // '-' => InComment1
617    fn in_comment_opening(&mut self, c: char) -> Result<Option<Event>, ParserError> {
618        if c == '-' {
619            self.st = State::InComment1;
620            self.level = 0;
621            Ok(None)
622        } else {
623            self.error("Expected 2nd '-' to start comment")
624        }
625    }
626
627    // Inside a comment
628    // '-' '-' => InComment2
629    fn in_comment1(&mut self, c: char) -> Result<Option<Event>, ParserError> {
630        if c == '-' {
631            self.level += 1;
632        } else {
633            self.level = 0;
634        }
635
636        if self.level == 2 {
637            self.level = 0;
638            self.st = State::InComment2;
639        }
640
641        self.buf.push(c);
642
643        Ok(None)
644    }
645
646    // Closing a comment
647    // '>' => OutsideTag, producing Comment
648    fn in_comment2(&mut self, c: char) -> Result<Option<Event>, ParserError> {
649        if c != '>' {
650            self.error("No more than one adjacent '-' allowed in a comment")
651        } else {
652            self.st = State::OutsideTag;
653            let len = self.buf.len();
654            self.buf.truncate(len - 2);
655            let buf = self.take_buf();
656            Ok(Some(Event::Comment(buf)))
657        }
658    }
659
660    // Inside a doctype
661    // '>' after appropriate opening => OutsideTag
662    fn in_doctype(&mut self, c: char) -> Result<Option<Event>, ParserError> {
663        static DOCTYPE_PATTERN: [char; 6] = ['O', 'C', 'T', 'Y', 'P', 'E'];
664        match self.level {
665            0..=5 => {
666                if c == DOCTYPE_PATTERN[self.level as usize] {
667                    self.level += 1;
668                } else {
669                    return self.error("Invalid DOCTYPE");
670                }
671            }
672            6 => {
673                match c {
674                    ' ' | '\t' | '\r' | '\n' => (),
675                    _ => return self.error("Invalid DOCTYPE"),
676                }
677                self.level += 1;
678            }
679            _ if c == '>' => {
680                self.level = 0;
681                self.st = State::OutsideTag;
682            }
683            _ => (),
684        }
685        Ok(None)
686    }
687}
688
689#[cfg(test)]
690mod parser_tests {
691    use std::collections::HashMap;
692
693    use super::super::{EndTag, Event, ParserError, StartTag};
694    use super::Parser;
695
696    #[test]
697    fn test_start_tag() {
698        let mut p = Parser::new();
699        let mut i = 0u8;
700        p.feed_str("<a>");
701        for event in p {
702            i += 1;
703            assert_eq!(
704                event,
705                Ok(Event::ElementStart(StartTag {
706                    name: "a".to_owned(),
707                    ns: None,
708                    prefix: None,
709                    attributes: HashMap::new()
710                })),
711            );
712        }
713        assert_eq!(i, 1u8);
714    }
715
716    #[test]
717    fn test_end_tag() {
718        let mut p = Parser::new();
719        let mut i = 0u8;
720        p.feed_str("</a>");
721        for event in p {
722            i += 1;
723            assert_eq!(
724                event,
725                Ok(Event::ElementEnd(EndTag {
726                    name: "a".to_owned(),
727                    ns: None,
728                    prefix: None
729                })),
730            );
731        }
732        assert_eq!(i, 1u8);
733    }
734
735    #[test]
736    fn test_self_closing_with_space() {
737        let mut p = Parser::new();
738        p.feed_str("<register />");
739
740        let v: Vec<Result<Event, ParserError>> = p.collect();
741        assert_eq!(
742            v,
743            vec![
744                Ok(Event::ElementStart(StartTag {
745                    name: "register".to_owned(),
746                    ns: None,
747                    prefix: None,
748                    attributes: HashMap::new()
749                })),
750                Ok(Event::ElementEnd(EndTag {
751                    name: "register".to_owned(),
752                    ns: None,
753                    prefix: None,
754                }))
755            ],
756        );
757    }
758
759    #[test]
760    fn test_self_closing_without_space() {
761        let mut p = Parser::new();
762        p.feed_str("<register/>");
763
764        let v: Vec<Result<Event, ParserError>> = p.collect();
765        assert_eq!(
766            v,
767            vec![
768                Ok(Event::ElementStart(StartTag {
769                    name: "register".to_owned(),
770                    ns: None,
771                    prefix: None,
772                    attributes: HashMap::new()
773                })),
774                Ok(Event::ElementEnd(EndTag {
775                    name: "register".to_owned(),
776                    ns: None,
777                    prefix: None,
778                }))
779            ],
780        );
781    }
782
783    #[test]
784    fn test_self_closing_namespace() {
785        let mut p = Parser::new();
786        p.feed_str("<foo:a xmlns:foo='urn:foo'/>");
787
788        let v: Vec<Result<Event, ParserError>> = p.collect();
789        let mut attr: HashMap<(String, Option<String>), String> = HashMap::new();
790        attr.insert(
791            (
792                "foo".to_owned(),
793                Some("http://www.w3.org/2000/xmlns/".to_owned()),
794            ),
795            "urn:foo".to_owned(),
796        );
797        assert_eq!(
798            v,
799            vec![
800                Ok(Event::ElementStart(StartTag {
801                    name: "a".to_owned(),
802                    ns: Some("urn:foo".to_owned()),
803                    prefix: Some("foo".to_owned()),
804                    attributes: attr,
805                })),
806                Ok(Event::ElementEnd(EndTag {
807                    name: "a".to_owned(),
808                    ns: Some("urn:foo".to_owned()),
809                    prefix: Some("foo".to_owned()),
810                }))
811            ],
812        );
813    }
814
815    #[test]
816    fn test_pi() {
817        let mut p = Parser::new();
818        let mut i = 0u8;
819        p.feed_str("<?xml version='1.0' encoding='utf-8'?>");
820        for event in p {
821            i += 1;
822            assert_eq!(
823                event,
824                Ok(Event::PI("xml version='1.0' encoding='utf-8'".to_owned())),
825            );
826        }
827        assert_eq!(i, 1u8);
828    }
829
830    #[test]
831    fn test_comment() {
832        let mut p = Parser::new();
833        let mut i = 0u8;
834        p.feed_str("<!--Nothing to see-->");
835        for event in p {
836            i += 1;
837            assert_eq!(event, Ok(Event::Comment("Nothing to see".to_owned())));
838        }
839        assert_eq!(i, 1u8);
840    }
841    #[test]
842    fn test_cdata() {
843        let mut p = Parser::new();
844        let mut i = 0u8;
845        p.feed_str("<![CDATA[<html><head><title>x</title></head><body/></html>]]>");
846        for event in p {
847            i += 1;
848            assert_eq!(
849                event,
850                Ok(Event::CDATA(
851                    "<html><head><title>x</title></head><body/></html>".to_owned()
852                )),
853            );
854        }
855        assert_eq!(i, 1u8);
856    }
857
858    #[test]
859    fn test_characters() {
860        let mut p = Parser::new();
861        let mut i = 0u8;
862        p.feed_str("<text>Hello World, it&apos;s a nice day</text>");
863        for event in p {
864            i += 1;
865            if i == 2 {
866                assert_eq!(
867                    event,
868                    Ok(Event::Characters("Hello World, it's a nice day".to_owned())),
869                );
870            }
871        }
872        assert_eq!(i, 3u8);
873    }
874
875    #[test]
876    fn test_doctype() {
877        let mut p = Parser::new();
878        let mut i = 0u8;
879        p.feed_str("<!DOCTYPE html>");
880        for _ in p {
881            i += 1;
882        }
883        assert_eq!(i, 0u8);
884    }
885}