xml/
lib.rs

1// RustyXML
2// Copyright 2013-2016 RustyXML developers
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10#![crate_name = "xml"]
11#![crate_type = "lib"]
12#![forbid(non_camel_case_types)]
13#![warn(missing_docs)]
14// Required for benchmarks
15#![cfg_attr(feature = "bench", feature(test))]
16
17/*!
18 * An XML parsing library
19 */
20
21pub use crate::element::ChildElements;
22pub use crate::element::Element;
23pub use crate::element_builder::BuilderError;
24pub use crate::element_builder::ElementBuilder;
25pub use crate::parser::Event;
26pub use crate::parser::Parser;
27pub use crate::parser::ParserError;
28
29use std::char;
30use std::collections::HashMap;
31use std::fmt;
32
33mod element;
34mod element_builder;
35mod parser;
36
37// General functions
38
39#[inline]
40/// Escapes ', ", &, <, and > with the appropriate XML entities.
41pub fn escape(input: &str) -> String {
42    let mut result = String::with_capacity(input.len());
43
44    for c in input.chars() {
45        match c {
46            '&' => result.push_str("&amp;"),
47            '<' => result.push_str("&lt;"),
48            '>' => result.push_str("&gt;"),
49            '\'' => result.push_str("&apos;"),
50            '"' => result.push_str("&quot;"),
51            o => result.push(o),
52        }
53    }
54    result
55}
56
57#[inline]
58/// Unescapes all valid XML entities in a string.
59/// Returns the first invalid entity on failure.
60pub fn unescape(input: &str) -> Result<String, String> {
61    let mut result = String::with_capacity(input.len());
62
63    let mut it = input.split('&');
64
65    // Push everything before the first '&'
66    if let Some(sub) = it.next() {
67        result.push_str(sub);
68    }
69
70    for sub in it {
71        match sub.find(';') {
72            Some(idx) => {
73                let ent = &sub[..idx];
74                match ent {
75                    "quot" => result.push('"'),
76                    "apos" => result.push('\''),
77                    "gt" => result.push('>'),
78                    "lt" => result.push('<'),
79                    "amp" => result.push('&'),
80                    ent => {
81                        let val = if ent.starts_with("#x") {
82                            u32::from_str_radix(&ent[2..], 16).ok()
83                        } else if ent.starts_with('#') {
84                            u32::from_str_radix(&ent[1..], 10).ok()
85                        } else {
86                            None
87                        };
88                        match val.and_then(char::from_u32) {
89                            Some(c) => result.push(c),
90                            None => return Err(format!("&{};", ent)),
91                        }
92                    }
93                }
94                result.push_str(&sub[idx + 1..]);
95            }
96            None => return Err("&".to_owned() + sub),
97        }
98    }
99    Ok(result)
100}
101
102// General types
103#[derive(Clone, PartialEq, Debug)]
104/// An Enum describing a XML Node
105pub enum Xml {
106    /// An XML Element
107    ElementNode(Element),
108    /// Character Data
109    CharacterNode(String),
110    /// CDATA
111    CDATANode(String),
112    /// A XML Comment
113    CommentNode(String),
114    /// Processing Information
115    PINode(String),
116}
117
118#[derive(PartialEq, Eq, Debug)]
119/// Structure describing an opening tag
120pub struct StartTag {
121    /// The tag's name
122    pub name: String,
123    /// The tag's namespace
124    pub ns: Option<String>,
125    /// The tag's prefix
126    pub prefix: Option<String>,
127    /// The tag's attributes
128    pub attributes: HashMap<(String, Option<String>), String>,
129}
130
131#[derive(PartialEq, Eq, Debug)]
132/// Structure describing a closing tag
133pub struct EndTag {
134    /// The tag's name
135    pub name: String,
136    /// The tag's namespace
137    pub ns: Option<String>,
138    /// The tag's prefix
139    pub prefix: Option<String>,
140}
141
142impl fmt::Display for Xml {
143    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
144        match *self {
145            Xml::ElementNode(ref elem) => elem.fmt(f),
146            Xml::CharacterNode(ref data) => write!(f, "{}", escape(&data)),
147            Xml::CDATANode(ref data) => write!(f, "<![CDATA[{}]]>", &data),
148            Xml::CommentNode(ref data) => write!(f, "<!--{}-->", &data),
149            Xml::PINode(ref data) => write!(f, "<?{}?>", &data),
150        }
151    }
152}
153
154#[cfg(test)]
155mod lib_tests {
156    use super::{escape, unescape, Element, Xml};
157
158    #[test]
159    fn test_escape() {
160        let esc = escape("&<>'\"");
161        assert_eq!(esc, "&amp;&lt;&gt;&apos;&quot;");
162    }
163
164    #[test]
165    fn test_unescape() {
166        let unesc = unescape("&amp;lt;&lt;&gt;&apos;&quot;&#x201c;&#x201d;&#38;&#34;");
167        assert_eq!(
168            unesc.as_ref().map(|x| &x[..]),
169            Ok("&lt;<>'\"\u{201c}\u{201d}&\""),
170        );
171    }
172
173    #[test]
174    fn test_unescape_invalid() {
175        let unesc = unescape("&amp;&nbsp;");
176        assert_eq!(unesc.as_ref().map_err(|x| &x[..]), Err("&nbsp;"));
177    }
178
179    #[test]
180    fn test_show_element() {
181        let elem = Element::new("a".to_owned(), None, vec![]);
182        assert_eq!(format!("{}", elem), "<a/>");
183
184        let elem = Element::new(
185            "a".to_owned(),
186            None,
187            vec![("href".to_owned(), None, "http://rust-lang.org".to_owned())],
188        );
189        assert_eq!(format!("{}", elem), "<a href='http://rust-lang.org'/>");
190
191        let mut elem = Element::new("a".to_owned(), None, vec![]);
192        elem.tag(Element::new("b".to_owned(), None, vec![]));
193        assert_eq!(format!("{}", elem), "<a><b/></a>");
194
195        let mut elem = Element::new(
196            "a".to_owned(),
197            None,
198            vec![("href".to_owned(), None, "http://rust-lang.org".to_owned())],
199        );
200        elem.tag(Element::new("b".to_owned(), None, vec![]));
201        assert_eq!(
202            format!("{}", elem),
203            "<a href='http://rust-lang.org'><b/></a>",
204        );
205    }
206
207    #[test]
208    fn test_show_element_xmlns() {
209        let elem: Element = "<a xmlns='urn:test'/>".parse().unwrap();
210        assert_eq!(format!("{}", elem), "<a xmlns='urn:test'/>");
211
212        let elem: Element = "<a xmlns='urn:test'><b xmlns='urn:toast'/></a>"
213            .parse()
214            .unwrap();
215        assert_eq!(
216            format!("{}", elem),
217            "<a xmlns='urn:test'><b xmlns='urn:toast'/></a>",
218        );
219
220        let elem = Element::new(
221            "a".to_owned(),
222            Some("urn:test".to_owned()),
223            vec![("href".to_owned(), None, "http://rust-lang.org".to_owned())],
224        );
225        assert_eq!(
226            format!("{}", elem),
227            "<a xmlns='urn:test' href='http://rust-lang.org'/>",
228        );
229    }
230
231    #[test]
232    fn test_show_characters() {
233        let chars = Xml::CharacterNode("some text".to_owned());
234        assert_eq!(format!("{}", chars), "some text");
235    }
236
237    #[test]
238    fn test_show_cdata() {
239        let chars = Xml::CDATANode("some text".to_owned());
240        assert_eq!(format!("{}", chars), "<![CDATA[some text]]>");
241    }
242
243    #[test]
244    fn test_show_comment() {
245        let chars = Xml::CommentNode("some text".to_owned());
246        assert_eq!(format!("{}", chars), "<!--some text-->");
247    }
248
249    #[test]
250    fn test_show_pi() {
251        let chars = Xml::PINode("xml version='1.0'".to_owned());
252        assert_eq!(format!("{}", chars), "<?xml version='1.0'?>");
253    }
254
255    #[test]
256    fn test_content_str() {
257        let mut elem = Element::new("a".to_owned(), None, vec![]);
258        elem.pi("processing information".to_owned())
259            .cdata("<hello/>".to_owned())
260            .tag_stay(Element::new("b".to_owned(), None, vec![]))
261            .text("World".to_owned())
262            .comment("Nothing to see".to_owned());
263        assert_eq!(elem.content_str(), "<hello/>World");
264    }
265}
266
267#[cfg(test)]
268#[cfg(feature = "bench")]
269mod lib_bench {
270    extern crate test;
271
272    use self::test::Bencher;
273    use super::{escape, unescape};
274    use std::iter::repeat;
275
276    #[bench]
277    fn bench_escape(bh: &mut Bencher) {
278        let input: String = repeat("&<>'\"").take(100).collect();
279        bh.iter(|| escape(&input));
280        bh.bytes = input.len() as u64;
281    }
282
283    #[bench]
284    fn bench_unescape(bh: &mut Bencher) {
285        let input: String = repeat("&amp;&lt;&gt;&apos;&quot;").take(50).collect();
286        bh.iter(|| unescape(&input));
287        bh.bytes = input.len() as u64;
288    }
289}