quick_xml/de/mod.rs
1//! Serde `Deserializer` module.
2//!
3//! Due to the complexity of the XML standard and the fact that Serde was developed
4//! with JSON in mind, not all Serde concepts apply smoothly to XML. This leads to
5//! that fact that some XML concepts are inexpressible in terms of Serde derives
6//! and may require manual deserialization.
7//!
8//! The most notable restriction is the ability to distinguish between _elements_
9//! and _attributes_, as no other format used by serde has such a conception.
10//!
11//! Due to that the mapping is performed in a best effort manner.
12//!
13//!
14//!
15//! Table of Contents
16//! =================
17//! - [Mapping XML to Rust types](#mapping-xml-to-rust-types)
18//! - [Basics](#basics)
19//! - [Optional attributes and elements](#optional-attributes-and-elements)
20//! - [Choices (`xs:choice` XML Schema type)](#choices-xschoice-xml-schema-type)
21//! - [Sequences (`xs:all` and `xs:sequence` XML Schema types)](#sequences-xsall-and-xssequence-xml-schema-types)
22//! - [Composition Rules](#composition-rules)
23//! - [Enum Representations](#enum-representations)
24//! - [Normal enum variant](#normal-enum-variant)
25//! - [`$text` enum variant](#text-enum-variant)
26//! - [Difference between `$text` and `$value` special names](#difference-between-text-and-value-special-names)
27//! - [`$text`](#text)
28//! - [`$value`](#value)
29//! - [Primitives and sequences of primitives](#primitives-and-sequences-of-primitives)
30//! - [Structs and sequences of structs](#structs-and-sequences-of-structs)
31//! - [Enums and sequences of enums](#enums-and-sequences-of-enums)
32//! - [Frequently Used Patterns](#frequently-used-patterns)
33//! - [`<element>` lists](#element-lists)
34//! - [Overlapped (Out-of-Order) Elements](#overlapped-out-of-order-elements)
35//! - [Internally Tagged Enums](#internally-tagged-enums)
36//!
37//!
38//!
39//! Mapping XML to Rust types
40//! =========================
41//!
42//! Type names are never considered when deserializing, so you can name your
43//! types as you wish. Other general rules:
44//! - `struct` field name could be represented in XML only as an attribute name
45//! or an element name;
46//! - `enum` variant name could be represented in XML only as an attribute name
47//! or an element name;
48//! - the unit struct, unit type `()` and unit enum variant can be deserialized
49//! from any valid XML content:
50//! - attribute and element names;
51//! - attribute and element values;
52//! - text or CDATA content (including mixed text and CDATA content).
53//!
54//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
55//!
56//! NOTE: All tests are marked with an `ignore` option, even though they do
57//! compile. This is because rustdoc marks such blocks with an information
58//! icon unlike `no_run` blocks.
59//!
60//! </div>
61//!
62//! <table>
63//! <thead>
64//! <tr><th colspan="2">
65//!
66//! ## Basics
67//!
68//! </th></tr>
69//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
70//! </thead>
71//! <tbody style="vertical-align:top;">
72//! <tr>
73//! <td>
74//! Content of attributes and text / CDATA content of elements (including mixed
75//! text and CDATA content):
76//!
77//! ```xml
78//! <... ...="content" />
79//! ```
80//! ```xml
81//! <...>content</...>
82//! ```
83//! ```xml
84//! <...><![CDATA[content]]></...>
85//! ```
86//! ```xml
87//! <...>text<![CDATA[cdata]]>text</...>
88//! ```
89//! Mixed text / CDATA content represents one logical string, `"textcdatatext"` in that case.
90//! </td>
91//! <td>
92//!
93//! You can use any type that can be deserialized from an `&str`, for example:
94//! - [`String`] and [`&str`]
95//! - [`Cow<str>`]
96//! - [`u32`], [`f32`] and other numeric types
97//! - `enum`s, like
98//! ```
99//! # use pretty_assertions::assert_eq;
100//! # use serde::Deserialize;
101//! # #[derive(Debug, PartialEq)]
102//! #[derive(Deserialize)]
103//! enum Language {
104//! Rust,
105//! Cpp,
106//! #[serde(other)]
107//! Other,
108//! }
109//! # #[derive(Debug, PartialEq, Deserialize)]
110//! # struct X { #[serde(rename = "$text")] x: Language }
111//! # assert_eq!(X { x: Language::Rust }, quick_xml::de::from_str("<x>Rust</x>").unwrap());
112//! # assert_eq!(X { x: Language::Cpp }, quick_xml::de::from_str("<x>C<![CDATA[p]]>p</x>").unwrap());
113//! # assert_eq!(X { x: Language::Other }, quick_xml::de::from_str("<x><![CDATA[other]]></x>").unwrap());
114//! ```
115//!
116//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
117//!
118//! NOTE: deserialization to non-owned types (i.e. borrow from the input),
119//! such as `&str`, is possible only if you parse document in the UTF-8
120//! encoding and content does not contain entity references such as `&`,
121//! or character references such as `
`, as well as text content represented
122//! by one piece of [text] or [CDATA] element.
123//! </div>
124//! <!-- TODO: document an error type returned -->
125//!
126//! [text]: Event::Text
127//! [CDATA]: Event::CData
128//! </td>
129//! </tr>
130//! <!-- 2 ===================================================================================== -->
131//! <tr>
132//! <td>
133//!
134//! Content of attributes and text / CDATA content of elements (including mixed
135//! text and CDATA content), which represents a space-delimited lists, as
136//! specified in the XML Schema specification for [`xs:list`] `simpleType`:
137//!
138//! ```xml
139//! <... ...="element1 element2 ..." />
140//! ```
141//! ```xml
142//! <...>
143//! element1
144//! element2
145//! ...
146//! </...>
147//! ```
148//! ```xml
149//! <...><![CDATA[
150//! element1
151//! element2
152//! ...
153//! ]]></...>
154//! ```
155//!
156//! [`xs:list`]: https://www.w3.org/TR/xmlschema11-2/#list-datatypes
157//! </td>
158//! <td>
159//!
160//! Use any type that deserialized using [`deserialize_seq()`] call, for example:
161//!
162//! ```
163//! type List = Vec<u32>;
164//! ```
165//!
166//! See the next row to learn where in your struct definition you should
167//! use that type.
168//!
169//! According to the XML Schema specification, delimiters for elements is one
170//! or more space (`' '`, `'\r'`, `'\n'`, and `'\t'`) character(s).
171//!
172//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
173//!
174//! NOTE: according to the XML Schema restrictions, you cannot escape those
175//! white-space characters, so list elements will _never_ contain them.
176//! In practice you will usually use `xs:list`s for lists of numbers or enumerated
177//! values which looks like identifiers in many languages, for example, `item`,
178//! `some_item` or `some-item`, so that shouldn't be a problem.
179//!
180//! NOTE: according to the XML Schema specification, list elements can be
181//! delimited only by spaces. Other delimiters (for example, commas) are not
182//! allowed.
183//!
184//! </div>
185//!
186//! [`deserialize_seq()`]: de::Deserializer::deserialize_seq
187//! </td>
188//! </tr>
189//! <!-- 3 ===================================================================================== -->
190//! <tr>
191//! <td>
192//! A typical XML with attributes. The root tag name does not matter:
193//!
194//! ```xml
195//! <any-tag one="..." two="..."/>
196//! ```
197//! </td>
198//! <td>
199//!
200//! A structure where each XML attribute is mapped to a field with a name
201//! starting with `@`. Because Rust identifiers do not permit the `@` character,
202//! you should use the `#[serde(rename = "@...")]` attribute to rename it.
203//! The name of the struct itself does not matter:
204//!
205//! ```
206//! # use serde::Deserialize;
207//! # type T = ();
208//! # type U = ();
209//! // Get both attributes
210//! # #[derive(Debug, PartialEq)]
211//! #[derive(Deserialize)]
212//! struct AnyName {
213//! #[serde(rename = "@one")]
214//! one: T,
215//!
216//! #[serde(rename = "@two")]
217//! two: U,
218//! }
219//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
220//! ```
221//! ```
222//! # use serde::Deserialize;
223//! # type T = ();
224//! // Get only the one attribute, ignore the other
225//! # #[derive(Debug, PartialEq)]
226//! #[derive(Deserialize)]
227//! struct AnyName {
228//! #[serde(rename = "@one")]
229//! one: T,
230//! }
231//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
232//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."/>"#).unwrap();
233//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
234//! ```
235//! ```
236//! # use serde::Deserialize;
237//! // Ignore all attributes
238//! // You can also use the `()` type (unit type)
239//! # #[derive(Debug, PartialEq)]
240//! #[derive(Deserialize)]
241//! struct AnyName;
242//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
243//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
244//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
245//! ```
246//!
247//! All these structs can be used to deserialize from an XML on the
248//! left side depending on amount of information that you want to get.
249//! Of course, you can combine them with elements extractor structs (see below).
250//!
251//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
252//!
253//! NOTE: XML allows you to have an attribute and an element with the same name
254//! inside the one element. quick-xml deals with that by prepending a `@` prefix
255//! to the name of attributes.
256//! </div>
257//! </td>
258//! </tr>
259//! <!-- 4 ===================================================================================== -->
260//! <tr>
261//! <td>
262//! A typical XML with child elements. The root tag name does not matter:
263//!
264//! ```xml
265//! <any-tag>
266//! <one>...</one>
267//! <two>...</two>
268//! </any-tag>
269//! ```
270//! </td>
271//! <td>
272//! A structure where each XML child element is mapped to the field.
273//! Each element name becomes a name of field. The name of the struct itself
274//! does not matter:
275//!
276//! ```
277//! # use serde::Deserialize;
278//! # type T = ();
279//! # type U = ();
280//! // Get both elements
281//! # #[derive(Debug, PartialEq)]
282//! #[derive(Deserialize)]
283//! struct AnyName {
284//! one: T,
285//! two: U,
286//! }
287//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
288//! #
289//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap_err();
290//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap_err();
291//! ```
292//! ```
293//! # use serde::Deserialize;
294//! # type T = ();
295//! // Get only the one element, ignore the other
296//! # #[derive(Debug, PartialEq)]
297//! #[derive(Deserialize)]
298//! struct AnyName {
299//! one: T,
300//! }
301//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
302//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
303//! ```
304//! ```
305//! # use serde::Deserialize;
306//! // Ignore all elements
307//! // You can also use the `()` type (unit type)
308//! # #[derive(Debug, PartialEq)]
309//! #[derive(Deserialize)]
310//! struct AnyName;
311//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
312//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
313//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap();
314//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
315//! ```
316//!
317//! All these structs can be used to deserialize from an XML on the
318//! left side depending on amount of information that you want to get.
319//! Of course, you can combine them with attributes extractor structs (see above).
320//!
321//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
322//!
323//! NOTE: XML allows you to have an attribute and an element with the same name
324//! inside the one element. quick-xml deals with that by prepending a `@` prefix
325//! to the name of attributes.
326//! </div>
327//! </td>
328//! </tr>
329//! <!-- 5 ===================================================================================== -->
330//! <tr>
331//! <td>
332//! An XML with an attribute and a child element named equally:
333//!
334//! ```xml
335//! <any-tag field="...">
336//! <field>...</field>
337//! </any-tag>
338//! ```
339//! </td>
340//! <td>
341//!
342//! You MUST specify `#[serde(rename = "@field")]` on a field that will be used
343//! for an attribute:
344//!
345//! ```
346//! # use pretty_assertions::assert_eq;
347//! # use serde::Deserialize;
348//! # type T = ();
349//! # type U = ();
350//! # #[derive(Debug, PartialEq)]
351//! #[derive(Deserialize)]
352//! struct AnyName {
353//! #[serde(rename = "@field")]
354//! attribute: T,
355//! field: U,
356//! }
357//! # assert_eq!(
358//! # AnyName { attribute: (), field: () },
359//! # quick_xml::de::from_str(r#"
360//! # <any-tag field="...">
361//! # <field>...</field>
362//! # </any-tag>
363//! # "#).unwrap(),
364//! # );
365//! ```
366//! </td>
367//! </tr>
368//! <!-- ======================================================================================= -->
369//! <tr><th colspan="2">
370//!
371//! ## Optional attributes and elements
372//!
373//! </th></tr>
374//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
375//! <!-- 6 ===================================================================================== -->
376//! <tr>
377//! <td>
378//! An optional XML attribute that you want to capture.
379//! The root tag name does not matter:
380//!
381//! ```xml
382//! <any-tag optional="..."/>
383//! ```
384//! ```xml
385//! <any-tag/>
386//! ```
387//! </td>
388//! <td>
389//!
390//! A structure with an optional field, renamed according to the requirements
391//! for attributes:
392//!
393//! ```
394//! # use pretty_assertions::assert_eq;
395//! # use serde::Deserialize;
396//! # type T = ();
397//! # #[derive(Debug, PartialEq)]
398//! #[derive(Deserialize)]
399//! struct AnyName {
400//! #[serde(rename = "@optional")]
401//! optional: Option<T>,
402//! }
403//! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag optional="..."/>"#).unwrap());
404//! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap());
405//! ```
406//! When the XML attribute is present, type `T` will be deserialized from
407//! an attribute value (which is a string). Note, that if `T = String` or other
408//! string type, the empty attribute is mapped to a `Some("")`, whereas `None`
409//! represents the missed attribute:
410//! ```xml
411//! <any-tag optional="..."/><!-- Some("...") -->
412//! <any-tag optional=""/> <!-- Some("") -->
413//! <any-tag/> <!-- None -->
414//! ```
415//! </td>
416//! </tr>
417//! <!-- 7 ===================================================================================== -->
418//! <tr>
419//! <td>
420//! An optional XML elements that you want to capture.
421//! The root tag name does not matter:
422//!
423//! ```xml
424//! <any-tag/>
425//! <optional>...</optional>
426//! </any-tag>
427//! ```
428//! ```xml
429//! <any-tag/>
430//! <optional/>
431//! </any-tag>
432//! ```
433//! ```xml
434//! <any-tag/>
435//! ```
436//! </td>
437//! <td>
438//!
439//! A structure with an optional field:
440//!
441//! ```
442//! # use pretty_assertions::assert_eq;
443//! # use serde::Deserialize;
444//! # type T = ();
445//! # #[derive(Debug, PartialEq)]
446//! #[derive(Deserialize)]
447//! struct AnyName {
448//! optional: Option<T>,
449//! }
450//! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag><optional>...</optional></any-tag>"#).unwrap());
451//! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap());
452//! ```
453//! When the XML element is present, type `T` will be deserialized from an
454//! element (which is a string or a multi-mapping -- i.e. mapping which can have
455//! duplicated keys).
456//! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
457//!
458//! Currently some edge cases exists described in the issue [#497].
459//! </div>
460//! </td>
461//! </tr>
462//! <!-- ======================================================================================= -->
463//! <tr><th colspan="2">
464//!
465//! ## Choices (`xs:choice` XML Schema type)
466//!
467//! </th></tr>
468//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
469//! <!-- 8 ===================================================================================== -->
470//! <tr>
471//! <td>
472//! An XML with different root tag names, as well as text / CDATA content:
473//!
474//! ```xml
475//! <one field1="...">...</one>
476//! ```
477//! ```xml
478//! <two>
479//! <field2>...</field2>
480//! </two>
481//! ```
482//! ```xml
483//! Text <![CDATA[or (mixed)
484//! CDATA]]> content
485//! ```
486//! </td>
487//! <td>
488//!
489//! An enum where each variant has the name of a possible root tag. The name of
490//! the enum itself does not matter.
491//!
492//! If you need to get the textual content, mark a variant with `#[serde(rename = "$text")]`.
493//!
494//! All these structs can be used to deserialize from any XML on the
495//! left side depending on amount of information that you want to get:
496//!
497//! ```
498//! # use pretty_assertions::assert_eq;
499//! # use serde::Deserialize;
500//! # type T = ();
501//! # type U = ();
502//! # #[derive(Debug, PartialEq)]
503//! #[derive(Deserialize)]
504//! #[serde(rename_all = "snake_case")]
505//! enum AnyName {
506//! One { #[serde(rename = "@field1")] field1: T },
507//! Two { field2: U },
508//!
509//! /// Use unit variant, if you do not care of a content.
510//! /// You can use tuple variant if you want to parse
511//! /// textual content as an xs:list.
512//! /// Struct variants are not supported and will return
513//! /// Err(Unsupported)
514//! #[serde(rename = "$text")]
515//! Text(String),
516//! }
517//! # assert_eq!(AnyName::One { field1: () }, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
518//! # assert_eq!(AnyName::Two { field2: () }, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
519//! # assert_eq!(AnyName::Text("text cdata ".into()), quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
520//! ```
521//! ```
522//! # use pretty_assertions::assert_eq;
523//! # use serde::Deserialize;
524//! # type T = ();
525//! # #[derive(Debug, PartialEq)]
526//! #[derive(Deserialize)]
527//! struct Two {
528//! field2: T,
529//! }
530//! # #[derive(Debug, PartialEq)]
531//! #[derive(Deserialize)]
532//! #[serde(rename_all = "snake_case")]
533//! enum AnyName {
534//! // `field1` content discarded
535//! One,
536//! Two(Two),
537//! #[serde(rename = "$text")]
538//! Text,
539//! }
540//! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
541//! # assert_eq!(AnyName::Two(Two { field2: () }), quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
542//! # assert_eq!(AnyName::Text, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
543//! ```
544//! ```
545//! # use pretty_assertions::assert_eq;
546//! # use serde::Deserialize;
547//! # #[derive(Debug, PartialEq)]
548//! #[derive(Deserialize)]
549//! #[serde(rename_all = "snake_case")]
550//! enum AnyName {
551//! One,
552//! // the <two> and textual content will be mapped to this
553//! #[serde(other)]
554//! Other,
555//! }
556//! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
557//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
558//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
559//! ```
560//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
561//!
562//! NOTE: You should have variants for all possible tag names in your enum
563//! or have an `#[serde(other)]` variant.
564//! <!-- TODO: document an error type if that requirement is violated -->
565//! </div>
566//! </td>
567//! </tr>
568//! <!-- 9 ===================================================================================== -->
569//! <tr>
570//! <td>
571//!
572//! `<xs:choice>` embedded in the other element, and at the same time you want
573//! to get access to other attributes that can appear in the same container
574//! (`<any-tag>`). Also this case can be described, as if you want to choose
575//! Rust enum variant based on a tag name:
576//!
577//! ```xml
578//! <any-tag field="...">
579//! <one>...</one>
580//! </any-tag>
581//! ```
582//! ```xml
583//! <any-tag field="...">
584//! <two>...</two>
585//! </any-tag>
586//! ```
587//! ```xml
588//! <any-tag field="...">
589//! Text <![CDATA[or (mixed)
590//! CDATA]]> content
591//! </any-tag>
592//! ```
593//! </td>
594//! <td>
595//!
596//! A structure with a field which type is an `enum`.
597//!
598//! If you need to get a textual content, mark a variant with `#[serde(rename = "$text")]`.
599//!
600//! Names of the enum, struct, and struct field with `Choice` type does not matter:
601//!
602//! ```
603//! # use pretty_assertions::assert_eq;
604//! # use serde::Deserialize;
605//! # type T = ();
606//! # #[derive(Debug, PartialEq)]
607//! #[derive(Deserialize)]
608//! #[serde(rename_all = "snake_case")]
609//! enum Choice {
610//! One,
611//! Two,
612//!
613//! /// Use unit variant, if you do not care of a content.
614//! /// You can use tuple variant if you want to parse
615//! /// textual content as an xs:list.
616//! /// Struct variants are not supported and will return
617//! /// Err(Unsupported)
618//! #[serde(rename = "$text")]
619//! Text(String),
620//! }
621//! # #[derive(Debug, PartialEq)]
622//! #[derive(Deserialize)]
623//! struct AnyName {
624//! #[serde(rename = "@field")]
625//! field: T,
626//!
627//! #[serde(rename = "$value")]
628//! any_name: Choice,
629//! }
630//! # assert_eq!(
631//! # AnyName { field: (), any_name: Choice::One },
632//! # quick_xml::de::from_str(r#"<any-tag field="..."><one>...</one></any-tag>"#).unwrap(),
633//! # );
634//! # assert_eq!(
635//! # AnyName { field: (), any_name: Choice::Two },
636//! # quick_xml::de::from_str(r#"<any-tag field="..."><two>...</two></any-tag>"#).unwrap(),
637//! # );
638//! # assert_eq!(
639//! # AnyName { field: (), any_name: Choice::Text("text cdata ".into()) },
640//! # quick_xml::de::from_str(r#"<any-tag field="...">text <![CDATA[ cdata ]]></any-tag>"#).unwrap(),
641//! # );
642//! ```
643//! </td>
644//! </tr>
645//! <!-- 10 ==================================================================================== -->
646//! <tr>
647//! <td>
648//!
649//! `<xs:choice>` embedded in the other element, and at the same time you want
650//! to get access to other elements that can appear in the same container
651//! (`<any-tag>`). Also this case can be described, as if you want to choose
652//! Rust enum variant based on a tag name:
653//!
654//! ```xml
655//! <any-tag>
656//! <field>...</field>
657//! <one>...</one>
658//! </any-tag>
659//! ```
660//! ```xml
661//! <any-tag>
662//! <two>...</two>
663//! <field>...</field>
664//! </any-tag>
665//! ```
666//! </td>
667//! <td>
668//!
669//! A structure with a field which type is an `enum`.
670//!
671//! Names of the enum, struct, and struct field with `Choice` type does not matter:
672//!
673//! ```
674//! # use pretty_assertions::assert_eq;
675//! # use serde::Deserialize;
676//! # type T = ();
677//! # #[derive(Debug, PartialEq)]
678//! #[derive(Deserialize)]
679//! #[serde(rename_all = "snake_case")]
680//! enum Choice {
681//! One,
682//! Two,
683//! }
684//! # #[derive(Debug, PartialEq)]
685//! #[derive(Deserialize)]
686//! struct AnyName {
687//! field: T,
688//!
689//! #[serde(rename = "$value")]
690//! any_name: Choice,
691//! }
692//! # assert_eq!(
693//! # AnyName { field: (), any_name: Choice::One },
694//! # quick_xml::de::from_str(r#"<any-tag><field>...</field><one>...</one></any-tag>"#).unwrap(),
695//! # );
696//! # assert_eq!(
697//! # AnyName { field: (), any_name: Choice::Two },
698//! # quick_xml::de::from_str(r#"<any-tag><two>...</two><field>...</field></any-tag>"#).unwrap(),
699//! # );
700//! ```
701//!
702//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
703//!
704//! NOTE: if your `Choice` enum would contain an `#[serde(other)]`
705//! variant, element `<field>` will be mapped to the `field` and not to the enum
706//! variant.
707//! </div>
708//!
709//! </td>
710//! </tr>
711//! <!-- 11 ==================================================================================== -->
712//! <tr>
713//! <td>
714//!
715//! `<xs:choice>` encapsulated in other element with a fixed name:
716//!
717//! ```xml
718//! <any-tag field="...">
719//! <choice>
720//! <one>...</one>
721//! </choice>
722//! </any-tag>
723//! ```
724//! ```xml
725//! <any-tag field="...">
726//! <choice>
727//! <two>...</two>
728//! </choice>
729//! </any-tag>
730//! ```
731//! </td>
732//! <td>
733//!
734//! A structure with a field of an intermediate type with one field of `enum` type.
735//! Actually, this example is not necessary, because you can construct it by yourself
736//! using the composition rules that were described above. However the XML construction
737//! described here is very common, so it is shown explicitly.
738//!
739//! Names of the enum and struct does not matter:
740//!
741//! ```
742//! # use pretty_assertions::assert_eq;
743//! # use serde::Deserialize;
744//! # type T = ();
745//! # #[derive(Debug, PartialEq)]
746//! #[derive(Deserialize)]
747//! #[serde(rename_all = "snake_case")]
748//! enum Choice {
749//! One,
750//! Two,
751//! }
752//! # #[derive(Debug, PartialEq)]
753//! #[derive(Deserialize)]
754//! struct Holder {
755//! #[serde(rename = "$value")]
756//! any_name: Choice,
757//! }
758//! # #[derive(Debug, PartialEq)]
759//! #[derive(Deserialize)]
760//! struct AnyName {
761//! #[serde(rename = "@field")]
762//! field: T,
763//!
764//! choice: Holder,
765//! }
766//! # assert_eq!(
767//! # AnyName { field: (), choice: Holder { any_name: Choice::One } },
768//! # quick_xml::de::from_str(r#"<any-tag field="..."><choice><one>...</one></choice></any-tag>"#).unwrap(),
769//! # );
770//! # assert_eq!(
771//! # AnyName { field: (), choice: Holder { any_name: Choice::Two } },
772//! # quick_xml::de::from_str(r#"<any-tag field="..."><choice><two>...</two></choice></any-tag>"#).unwrap(),
773//! # );
774//! ```
775//! </td>
776//! </tr>
777//! <!-- 12 ==================================================================================== -->
778//! <tr>
779//! <td>
780//!
781//! `<xs:choice>` encapsulated in other element with a fixed name:
782//!
783//! ```xml
784//! <any-tag>
785//! <field>...</field>
786//! <choice>
787//! <one>...</one>
788//! </choice>
789//! </any-tag>
790//! ```
791//! ```xml
792//! <any-tag>
793//! <choice>
794//! <two>...</two>
795//! </choice>
796//! <field>...</field>
797//! </any-tag>
798//! ```
799//! </td>
800//! <td>
801//!
802//! A structure with a field of an intermediate type with one field of `enum` type.
803//! Actually, this example is not necessary, because you can construct it by yourself
804//! using the composition rules that were described above. However the XML construction
805//! described here is very common, so it is shown explicitly.
806//!
807//! Names of the enum and struct does not matter:
808//!
809//! ```
810//! # use pretty_assertions::assert_eq;
811//! # use serde::Deserialize;
812//! # type T = ();
813//! # #[derive(Debug, PartialEq)]
814//! #[derive(Deserialize)]
815//! #[serde(rename_all = "snake_case")]
816//! enum Choice {
817//! One,
818//! Two,
819//! }
820//! # #[derive(Debug, PartialEq)]
821//! #[derive(Deserialize)]
822//! struct Holder {
823//! #[serde(rename = "$value")]
824//! any_name: Choice,
825//! }
826//! # #[derive(Debug, PartialEq)]
827//! #[derive(Deserialize)]
828//! struct AnyName {
829//! field: T,
830//!
831//! choice: Holder,
832//! }
833//! # assert_eq!(
834//! # AnyName { field: (), choice: Holder { any_name: Choice::One } },
835//! # quick_xml::de::from_str(r#"<any-tag><field>...</field><choice><one>...</one></choice></any-tag>"#).unwrap(),
836//! # );
837//! # assert_eq!(
838//! # AnyName { field: (), choice: Holder { any_name: Choice::Two } },
839//! # quick_xml::de::from_str(r#"<any-tag><choice><two>...</two></choice><field>...</field></any-tag>"#).unwrap(),
840//! # );
841//! ```
842//! </td>
843//! </tr>
844//! <!-- ======================================================================================== -->
845//! <tr><th colspan="2">
846//!
847//! ## Sequences (`xs:all` and `xs:sequence` XML Schema types)
848//!
849//! </th></tr>
850//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
851//! <!-- 13 ==================================================================================== -->
852//! <tr>
853//! <td>
854//! A sequence inside of a tag without a dedicated name:
855//!
856//! ```xml
857//! <any-tag/>
858//! ```
859//! ```xml
860//! <any-tag>
861//! <item/>
862//! </any-tag>
863//! ```
864//! ```xml
865//! <any-tag>
866//! <item/>
867//! <item/>
868//! <item/>
869//! </any-tag>
870//! ```
871//! </td>
872//! <td>
873//!
874//! A structure with a field which is a sequence type, for example, [`Vec`].
875//! Because XML syntax does not distinguish between empty sequences and missed
876//! elements, we should indicate that on the Rust side, because serde will require
877//! that field `item` exists. You can do that in two possible ways:
878//!
879//! Use the `#[serde(default)]` attribute for a [field] or the entire [struct]:
880//! ```
881//! # use pretty_assertions::assert_eq;
882//! # use serde::Deserialize;
883//! # type Item = ();
884//! # #[derive(Debug, PartialEq)]
885//! #[derive(Deserialize)]
886//! struct AnyName {
887//! #[serde(default)]
888//! item: Vec<Item>,
889//! }
890//! # assert_eq!(
891//! # AnyName { item: vec![] },
892//! # quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(),
893//! # );
894//! # assert_eq!(
895//! # AnyName { item: vec![()] },
896//! # quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(),
897//! # );
898//! # assert_eq!(
899//! # AnyName { item: vec![(), (), ()] },
900//! # quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(),
901//! # );
902//! ```
903//!
904//! Use the [`Option`]. In that case inner array will always contains at least one
905//! element after deserialization:
906//! ```ignore
907//! # use pretty_assertions::assert_eq;
908//! # use serde::Deserialize;
909//! # type Item = ();
910//! # #[derive(Debug, PartialEq)]
911//! #[derive(Deserialize)]
912//! struct AnyName {
913//! item: Option<Vec<Item>>,
914//! }
915//! # assert_eq!(
916//! # AnyName { item: None },
917//! # quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(),
918//! # );
919//! # assert_eq!(
920//! # AnyName { item: Some(vec![()]) },
921//! # quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(),
922//! # );
923//! # assert_eq!(
924//! # AnyName { item: Some(vec![(), (), ()]) },
925//! # quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(),
926//! # );
927//! ```
928//!
929//! See also [Frequently Used Patterns](#element-lists).
930//!
931//! [field]: https://serde.rs/field-attrs.html#default
932//! [struct]: https://serde.rs/container-attrs.html#default
933//! </td>
934//! </tr>
935//! <!-- 14 ==================================================================================== -->
936//! <tr>
937//! <td>
938//! A sequence with a strict order, probably with mixed content
939//! (text / CDATA and tags):
940//!
941//! ```xml
942//! <one>...</one>
943//! text
944//! <![CDATA[cdata]]>
945//! <two>...</two>
946//! <one>...</one>
947//! ```
948//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
949//!
950//! NOTE: this is just an example for showing mapping. XML does not allow
951//! multiple root tags -- you should wrap the sequence into a tag.
952//! </div>
953//! </td>
954//! <td>
955//!
956//! All elements mapped to the heterogeneous sequential type: tuple or named tuple.
957//! Each element of the tuple should be able to be deserialized from the nested
958//! element content (`...`), except the enum types which would be deserialized
959//! from the full element (`<one>...</one>`), so they could use the element name
960//! to choose the right variant:
961//!
962//! ```
963//! # use pretty_assertions::assert_eq;
964//! # use serde::Deserialize;
965//! # type One = ();
966//! # type Two = ();
967//! # /*
968//! type One = ...;
969//! type Two = ...;
970//! # */
971//! # #[derive(Debug, PartialEq)]
972//! #[derive(Deserialize)]
973//! struct AnyName(One, String, Two, One);
974//! # assert_eq!(
975//! # AnyName((), "text cdata".into(), (), ()),
976//! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
977//! # );
978//! ```
979//! ```
980//! # use pretty_assertions::assert_eq;
981//! # use serde::Deserialize;
982//! # #[derive(Debug, PartialEq)]
983//! #[derive(Deserialize)]
984//! #[serde(rename_all = "snake_case")]
985//! enum Choice {
986//! One,
987//! }
988//! # type Two = ();
989//! # /*
990//! type Two = ...;
991//! # */
992//! type AnyName = (Choice, String, Two, Choice);
993//! # assert_eq!(
994//! # (Choice::One, "text cdata".to_string(), (), Choice::One),
995//! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
996//! # );
997//! ```
998//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
999//!
1000//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1001//! so you cannot have two adjacent string types in your sequence.
1002//!
1003//! NOTE: In the case that the list might contain tags that are overlapped with
1004//! tags that do not correspond to the list you should add the feature [`overlapped-lists`].
1005//! </div>
1006//! </td>
1007//! </tr>
1008//! <!-- 15 ==================================================================================== -->
1009//! <tr>
1010//! <td>
1011//! A sequence with a non-strict order, probably with a mixed content
1012//! (text / CDATA and tags).
1013//!
1014//! ```xml
1015//! <one>...</one>
1016//! text
1017//! <![CDATA[cdata]]>
1018//! <two>...</two>
1019//! <one>...</one>
1020//! ```
1021//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1022//!
1023//! NOTE: this is just an example for showing mapping. XML does not allow
1024//! multiple root tags -- you should wrap the sequence into a tag.
1025//! </div>
1026//! </td>
1027//! <td>
1028//! A homogeneous sequence of elements with a fixed or dynamic size:
1029//!
1030//! ```
1031//! # use pretty_assertions::assert_eq;
1032//! # use serde::Deserialize;
1033//! # #[derive(Debug, PartialEq)]
1034//! #[derive(Deserialize)]
1035//! #[serde(rename_all = "snake_case")]
1036//! enum Choice {
1037//! One,
1038//! Two,
1039//! #[serde(other)]
1040//! Other,
1041//! }
1042//! type AnyName = [Choice; 4];
1043//! # assert_eq!(
1044//! # [Choice::One, Choice::Other, Choice::Two, Choice::One],
1045//! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1046//! # );
1047//! ```
1048//! ```
1049//! # use pretty_assertions::assert_eq;
1050//! # use serde::Deserialize;
1051//! # #[derive(Debug, PartialEq)]
1052//! #[derive(Deserialize)]
1053//! #[serde(rename_all = "snake_case")]
1054//! enum Choice {
1055//! One,
1056//! Two,
1057//! #[serde(rename = "$text")]
1058//! Other(String),
1059//! }
1060//! type AnyName = Vec<Choice>;
1061//! # assert_eq!(
1062//! # vec![
1063//! # Choice::One,
1064//! # Choice::Other("text cdata".into()),
1065//! # Choice::Two,
1066//! # Choice::One,
1067//! # ],
1068//! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1069//! # );
1070//! ```
1071//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1072//!
1073//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1074//! so you cannot have two adjacent string types in your sequence.
1075//! </div>
1076//! </td>
1077//! </tr>
1078//! <!-- 16 ==================================================================================== -->
1079//! <tr>
1080//! <td>
1081//! A sequence with a strict order, probably with a mixed content,
1082//! (text and tags) inside of the other element:
1083//!
1084//! ```xml
1085//! <any-tag attribute="...">
1086//! <one>...</one>
1087//! text
1088//! <![CDATA[cdata]]>
1089//! <two>...</two>
1090//! <one>...</one>
1091//! </any-tag>
1092//! ```
1093//! </td>
1094//! <td>
1095//!
1096//! A structure where all child elements mapped to the one field which have
1097//! a heterogeneous sequential type: tuple or named tuple. Each element of the
1098//! tuple should be able to be deserialized from the full element (`<one>...</one>`).
1099//!
1100//! You MUST specify `#[serde(rename = "$value")]` on that field:
1101//!
1102//! ```
1103//! # use pretty_assertions::assert_eq;
1104//! # use serde::Deserialize;
1105//! # type One = ();
1106//! # type Two = ();
1107//! # /*
1108//! type One = ...;
1109//! type Two = ...;
1110//! # */
1111//!
1112//! # #[derive(Debug, PartialEq)]
1113//! #[derive(Deserialize)]
1114//! struct AnyName {
1115//! #[serde(rename = "@attribute")]
1116//! # attribute: (),
1117//! # /*
1118//! attribute: ...,
1119//! # */
1120//! // Does not (yet?) supported by the serde
1121//! // https://github.com/serde-rs/serde/issues/1905
1122//! // #[serde(flatten)]
1123//! #[serde(rename = "$value")]
1124//! any_name: (One, String, Two, One),
1125//! }
1126//! # assert_eq!(
1127//! # AnyName { attribute: (), any_name: ((), "text cdata".into(), (), ()) },
1128//! # quick_xml::de::from_str("\
1129//! # <any-tag attribute='...'>\
1130//! # <one>...</one>\
1131//! # text \
1132//! # <![CDATA[cdata]]>\
1133//! # <two>...</two>\
1134//! # <one>...</one>\
1135//! # </any-tag>"
1136//! # ).unwrap(),
1137//! # );
1138//! ```
1139//! ```
1140//! # use pretty_assertions::assert_eq;
1141//! # use serde::Deserialize;
1142//! # type One = ();
1143//! # type Two = ();
1144//! # /*
1145//! type One = ...;
1146//! type Two = ...;
1147//! # */
1148//!
1149//! # #[derive(Debug, PartialEq)]
1150//! #[derive(Deserialize)]
1151//! struct NamedTuple(One, String, Two, One);
1152//!
1153//! # #[derive(Debug, PartialEq)]
1154//! #[derive(Deserialize)]
1155//! struct AnyName {
1156//! #[serde(rename = "@attribute")]
1157//! # attribute: (),
1158//! # /*
1159//! attribute: ...,
1160//! # */
1161//! // Does not (yet?) supported by the serde
1162//! // https://github.com/serde-rs/serde/issues/1905
1163//! // #[serde(flatten)]
1164//! #[serde(rename = "$value")]
1165//! any_name: NamedTuple,
1166//! }
1167//! # assert_eq!(
1168//! # AnyName { attribute: (), any_name: NamedTuple((), "text cdata".into(), (), ()) },
1169//! # quick_xml::de::from_str("\
1170//! # <any-tag attribute='...'>\
1171//! # <one>...</one>\
1172//! # text \
1173//! # <![CDATA[cdata]]>\
1174//! # <two>...</two>\
1175//! # <one>...</one>\
1176//! # </any-tag>"
1177//! # ).unwrap(),
1178//! # );
1179//! ```
1180//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1181//!
1182//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1183//! so you cannot have two adjacent string types in your sequence.
1184//! </div>
1185//! </td>
1186//! </tr>
1187//! <!-- 17 ==================================================================================== -->
1188//! <tr>
1189//! <td>
1190//! A sequence with a non-strict order, probably with a mixed content
1191//! (text / CDATA and tags) inside of the other element:
1192//!
1193//! ```xml
1194//! <any-tag>
1195//! <one>...</one>
1196//! text
1197//! <![CDATA[cdata]]>
1198//! <two>...</two>
1199//! <one>...</one>
1200//! </any-tag>
1201//! ```
1202//! </td>
1203//! <td>
1204//!
1205//! A structure where all child elements mapped to the one field which have
1206//! a homogeneous sequential type: array-like container. A container type `T`
1207//! should be able to be deserialized from the nested element content (`...`),
1208//! except if it is an enum type which would be deserialized from the full
1209//! element (`<one>...</one>`).
1210//!
1211//! You MUST specify `#[serde(rename = "$value")]` on that field:
1212//!
1213//! ```
1214//! # use pretty_assertions::assert_eq;
1215//! # use serde::Deserialize;
1216//! # #[derive(Debug, PartialEq)]
1217//! #[derive(Deserialize)]
1218//! #[serde(rename_all = "snake_case")]
1219//! enum Choice {
1220//! One,
1221//! Two,
1222//! #[serde(rename = "$text")]
1223//! Other(String),
1224//! }
1225//! # #[derive(Debug, PartialEq)]
1226//! #[derive(Deserialize)]
1227//! struct AnyName {
1228//! #[serde(rename = "@attribute")]
1229//! # attribute: (),
1230//! # /*
1231//! attribute: ...,
1232//! # */
1233//! // Does not (yet?) supported by the serde
1234//! // https://github.com/serde-rs/serde/issues/1905
1235//! // #[serde(flatten)]
1236//! #[serde(rename = "$value")]
1237//! any_name: [Choice; 4],
1238//! }
1239//! # assert_eq!(
1240//! # AnyName { attribute: (), any_name: [
1241//! # Choice::One,
1242//! # Choice::Other("text cdata".into()),
1243//! # Choice::Two,
1244//! # Choice::One,
1245//! # ] },
1246//! # quick_xml::de::from_str("\
1247//! # <any-tag attribute='...'>\
1248//! # <one>...</one>\
1249//! # text \
1250//! # <![CDATA[cdata]]>\
1251//! # <two>...</two>\
1252//! # <one>...</one>\
1253//! # </any-tag>"
1254//! # ).unwrap(),
1255//! # );
1256//! ```
1257//! ```
1258//! # use pretty_assertions::assert_eq;
1259//! # use serde::Deserialize;
1260//! # #[derive(Debug, PartialEq)]
1261//! #[derive(Deserialize)]
1262//! #[serde(rename_all = "snake_case")]
1263//! enum Choice {
1264//! One,
1265//! Two,
1266//! #[serde(rename = "$text")]
1267//! Other(String),
1268//! }
1269//! # #[derive(Debug, PartialEq)]
1270//! #[derive(Deserialize)]
1271//! struct AnyName {
1272//! #[serde(rename = "@attribute")]
1273//! # attribute: (),
1274//! # /*
1275//! attribute: ...,
1276//! # */
1277//! // Does not (yet?) supported by the serde
1278//! // https://github.com/serde-rs/serde/issues/1905
1279//! // #[serde(flatten)]
1280//! #[serde(rename = "$value")]
1281//! any_name: Vec<Choice>,
1282//! }
1283//! # assert_eq!(
1284//! # AnyName { attribute: (), any_name: vec![
1285//! # Choice::One,
1286//! # Choice::Other("text cdata".into()),
1287//! # Choice::Two,
1288//! # Choice::One,
1289//! # ] },
1290//! # quick_xml::de::from_str("\
1291//! # <any-tag attribute='...'>\
1292//! # <one>...</one>\
1293//! # text \
1294//! # <![CDATA[cdata]]>\
1295//! # <two>...</two>\
1296//! # <one>...</one>\
1297//! # </any-tag>"
1298//! # ).unwrap(),
1299//! # );
1300//! ```
1301//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1302//!
1303//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1304//! so you cannot have two adjacent string types in your sequence.
1305//! </div>
1306//! </td>
1307//! </tr>
1308//! </tbody>
1309//! </table>
1310//!
1311//!
1312//!
1313//! Composition Rules
1314//! =================
1315//!
1316//! The XML format is very different from other formats supported by `serde`.
1317//! One such difference it is how data in the serialized form is related to
1318//! the Rust type. Usually each byte in the data can be associated only with
1319//! one field in the data structure. However, XML is an exception.
1320//!
1321//! For example, took this XML:
1322//!
1323//! ```xml
1324//! <any>
1325//! <key attr="value"/>
1326//! </any>
1327//! ```
1328//!
1329//! and try to deserialize it to the struct `AnyName`:
1330//!
1331//! ```no_run
1332//! # use serde::Deserialize;
1333//! #[derive(Deserialize)]
1334//! struct AnyName { // AnyName calls `deserialize_struct` on `<any><key attr="value"/></any>`
1335//! // Used data: ^^^^^^^^^^^^^^^^^^^
1336//! key: Inner, // Inner calls `deserialize_struct` on `<key attr="value"/>`
1337//! // Used data: ^^^^^^^^^^^^
1338//! }
1339//! #[derive(Deserialize)]
1340//! struct Inner {
1341//! #[serde(rename = "@attr")]
1342//! attr: String, // String calls `deserialize_string` on `value`
1343//! // Used data: ^^^^^
1344//! }
1345//! ```
1346//!
1347//! Comments shows what methods of a [`Deserializer`] called by each struct
1348//! `deserialize` method and which input their seen. **Used data** shows, what
1349//! content is actually used for deserializing. As you see, name of the inner
1350//! `<key>` tag used both as a map key / outer struct field name and as part
1351//! of the inner struct (although _value_ of the tag, i.e. `key` is not used
1352//! by it).
1353//!
1354//!
1355//!
1356//! Enum Representations
1357//! ====================
1358//!
1359//! `quick-xml` represents enums differently in normal fields, `$text` fields and
1360//! `$value` fields. A normal representation is compatible with serde's adjacent
1361//! and internal tags feature -- tag for adjacently and internally tagged enums
1362//! are serialized using [`Serializer::serialize_unit_variant`] and deserialized
1363//! using [`Deserializer::deserialize_enum`].
1364//!
1365//! Use those simple rules to remember, how enum would be represented in XML:
1366//! - In `$value` field the representation is always the same as top-level representation;
1367//! - In `$text` field the representation is always the same as in normal field,
1368//! but surrounding tags with field name are removed;
1369//! - In normal field the representation is always contains a tag with a field name.
1370//!
1371//! Normal enum variant
1372//! -------------------
1373//!
1374//! To model an `xs:choice` XML construct use `$value` field.
1375//! To model a top-level `xs:choice` just use the enum type.
1376//!
1377//! |Kind |Top-level and in `$value` field |In normal field |In `$text` field |
1378//! |-------|-----------------------------------------|---------------------|---------------------|
1379//! |Unit |`<Unit/>` |`<field>Unit</field>`|`Unit` |
1380//! |Newtype|`<Newtype>42</Newtype>` |Err(Unsupported) |Err(Unsupported) |
1381//! |Tuple |`<Tuple>42</Tuple><Tuple>answer</Tuple>` |Err(Unsupported) |Err(Unsupported) |
1382//! |Struct |`<Struct><q>42</q><a>answer</a></Struct>`|Err(Unsupported) |Err(Unsupported) |
1383//!
1384//! `$text` enum variant
1385//! --------------------
1386//!
1387//! |Kind |Top-level and in `$value` field |In normal field |In `$text` field |
1388//! |-------|-----------------------------------------|---------------------|---------------------|
1389//! |Unit |_(empty)_ |`<field/>` |_(empty)_ |
1390//! |Newtype|`42` |Err(Unsupported) [^1]|Err(Unsupported) [^2]|
1391//! |Tuple |`42 answer` |Err(Unsupported) [^3]|Err(Unsupported) [^4]|
1392//! |Struct |Err(Unsupported) |Err(Unsupported) |Err(Unsupported) |
1393//!
1394//! [^1]: If this serialize as `<field>42</field>` then it will be ambiguity during deserialization,
1395//! because it clash with `Unit` representation in normal field.
1396//!
1397//! [^2]: If this serialize as `42` then it will be ambiguity during deserialization,
1398//! because it clash with `Unit` representation in `$text` field.
1399//!
1400//! [^3]: If this serialize as `<field>42 answer</field>` then it will be ambiguity during deserialization,
1401//! because it clash with `Unit` representation in normal field.
1402//!
1403//! [^4]: If this serialize as `42 answer` then it will be ambiguity during deserialization,
1404//! because it clash with `Unit` representation in `$text` field.
1405//!
1406//!
1407//!
1408//! Difference between `$text` and `$value` special names
1409//! =====================================================
1410//!
1411//! quick-xml supports two special names for fields -- `$text` and `$value`.
1412//! Although they may seem the same, there is a distinction. Two different
1413//! names is required mostly for serialization, because quick-xml should know
1414//! how you want to serialize certain constructs, which could be represented
1415//! through XML in multiple different ways.
1416//!
1417//! The only difference is in how complex types and sequences are serialized.
1418//! If you doubt which one you should select, begin with [`$value`](#value).
1419//!
1420//! ## `$text`
1421//! `$text` is used when you want to write your XML as a text or a CDATA content.
1422//! More formally, field with that name represents simple type definition with
1423//! `{variety} = atomic` or `{variety} = union` whose basic members are all atomic,
1424//! as described in the [specification].
1425//!
1426//! As a result, not all types of such fields can be serialized. Only serialization
1427//! of following types are supported:
1428//! - all primitive types (strings, numbers, booleans)
1429//! - unit variants of enumerations (serializes to a name of a variant)
1430//! - newtypes (delegates serialization to inner type)
1431//! - [`Option`] of above (`None` serializes to nothing)
1432//! - sequences (including tuples and tuple variants of enumerations) of above,
1433//! excluding `None` and empty string elements (because it will not be possible
1434//! to deserialize them back). The elements are separated by space(s)
1435//! - unit type `()` and unit structs (serializes to nothing)
1436//!
1437//! Complex types, such as structs and maps, are not supported in this field.
1438//! If you want them, you should use `$value`.
1439//!
1440//! Sequences serialized to a space-delimited string, that is why only certain
1441//! types are allowed in this mode:
1442//!
1443//! ```
1444//! # use serde::{Deserialize, Serialize};
1445//! # use quick_xml::de::from_str;
1446//! # use quick_xml::se::to_string;
1447//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1448//! struct AnyName {
1449//! #[serde(rename = "$text")]
1450//! field: Vec<usize>,
1451//! }
1452//!
1453//! let obj = AnyName { field: vec![1, 2, 3] };
1454//! let xml = to_string(&obj).unwrap();
1455//! assert_eq!(xml, "<AnyName>1 2 3</AnyName>");
1456//!
1457//! let object: AnyName = from_str(&xml).unwrap();
1458//! assert_eq!(object, obj);
1459//! ```
1460//!
1461//! ## `$value`
1462//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1463//!
1464//! NOTE: a name `#content` would better explain the purpose of that field,
1465//! but `$value` is used for compatibility with other XML serde crates, which
1466//! uses that name. This will allow you to switch XML crates more smoothly if required.
1467//! </div>
1468//!
1469//! Representation of primitive types in `$value` does not differ from their
1470//! representation in `$text` field. The difference is how sequences are serialized.
1471//! `$value` serializes each sequence item as a separate XML element. The name
1472//! of that element is taken from serialized type, and because only `enum`s provide
1473//! such name (their variant name), only they should be used for such fields.
1474//!
1475//! `$value` fields does not support `struct` types with fields, the serialization
1476//! of such types would end with an `Err(Unsupported)`. Unit structs and unit
1477//! type `()` serializing to nothing and can be deserialized from any content.
1478//!
1479//! Serialization and deserialization of `$value` field performed as usual, except
1480//! that name for an XML element will be given by the serialized type, instead of
1481//! field. The latter allow to serialize enumerated types, where variant is encoded
1482//! as a tag name, and, so, represent an XSD `xs:choice` schema by the Rust `enum`.
1483//!
1484//! In the example below, field will be serialized as `<field/>`, because elements
1485//! get their names from the field name. It cannot be deserialized, because `Enum`
1486//! expects elements `<A/>`, `<B/>` or `<C/>`, but `AnyName` looked only for `<field/>`:
1487//!
1488//! ```
1489//! # use serde::{Deserialize, Serialize};
1490//! # use pretty_assertions::assert_eq;
1491//! # #[derive(PartialEq, Debug)]
1492//! #[derive(Deserialize, Serialize)]
1493//! enum Enum { A, B, C }
1494//!
1495//! # #[derive(PartialEq, Debug)]
1496//! #[derive(Deserialize, Serialize)]
1497//! struct AnyName {
1498//! // <field>A</field>, <field>B</field>, or <field>C</field>
1499//! field: Enum,
1500//! }
1501//! # assert_eq!(
1502//! # quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(),
1503//! # "<AnyName><field>A</field></AnyName>",
1504//! # );
1505//! # assert_eq!(
1506//! # AnyName { field: Enum::B },
1507//! # quick_xml::de::from_str("<root><field>B</field></root>").unwrap(),
1508//! # );
1509//! ```
1510//!
1511//! If you rename field to `$value`, then `field` would be serialized as `<A/>`,
1512//! `<B/>` or `<C/>`, depending on the its content. It is also possible to
1513//! deserialize it from the same elements:
1514//!
1515//! ```
1516//! # use serde::{Deserialize, Serialize};
1517//! # use pretty_assertions::assert_eq;
1518//! # #[derive(Deserialize, Serialize, PartialEq, Debug)]
1519//! # enum Enum { A, B, C }
1520//! #
1521//! # #[derive(PartialEq, Debug)]
1522//! #[derive(Deserialize, Serialize)]
1523//! struct AnyName {
1524//! // <A/>, <B/> or <C/>
1525//! #[serde(rename = "$value")]
1526//! field: Enum,
1527//! }
1528//! # assert_eq!(
1529//! # quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(),
1530//! # "<AnyName><A/></AnyName>",
1531//! # );
1532//! # assert_eq!(
1533//! # AnyName { field: Enum::B },
1534//! # quick_xml::de::from_str("<root><B/></root>").unwrap(),
1535//! # );
1536//! ```
1537//!
1538//! ### Primitives and sequences of primitives
1539//!
1540//! Sequences serialized to a list of elements. Note, that types that does not
1541//! produce their own tag (i. e. primitives) are written as is, without delimiters:
1542//!
1543//! ```
1544//! # use serde::{Deserialize, Serialize};
1545//! # use pretty_assertions::assert_eq;
1546//! # use quick_xml::de::from_str;
1547//! # use quick_xml::se::to_string;
1548//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1549//! struct AnyName {
1550//! #[serde(rename = "$value")]
1551//! field: Vec<usize>,
1552//! }
1553//!
1554//! let obj = AnyName { field: vec![1, 2, 3] };
1555//! let xml = to_string(&obj).unwrap();
1556//! // Note, that types that does not produce their own tag are written as is!
1557//! assert_eq!(xml, "<AnyName>123</AnyName>");
1558//!
1559//! let object: AnyName = from_str("<AnyName>123</AnyName>").unwrap();
1560//! assert_eq!(object, AnyName { field: vec![123] });
1561//!
1562//! // `1 2 3` is mapped to a single `usize` element
1563//! // It is impossible to deserialize list of primitives to such field
1564//! from_str::<AnyName>("<AnyName>1 2 3</AnyName>").unwrap_err();
1565//! ```
1566//!
1567//! A particular case of that example is a string `$value` field, which probably
1568//! would be a most used example of that attribute:
1569//!
1570//! ```
1571//! # use serde::{Deserialize, Serialize};
1572//! # use pretty_assertions::assert_eq;
1573//! # use quick_xml::de::from_str;
1574//! # use quick_xml::se::to_string;
1575//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1576//! struct AnyName {
1577//! #[serde(rename = "$value")]
1578//! field: String,
1579//! }
1580//!
1581//! let obj = AnyName { field: "content".to_string() };
1582//! let xml = to_string(&obj).unwrap();
1583//! assert_eq!(xml, "<AnyName>content</AnyName>");
1584//! ```
1585//!
1586//! ### Structs and sequences of structs
1587//!
1588//! Note, that structures do not have a serializable name as well (name of the
1589//! type is never used), so it is impossible to serialize non-unit struct or
1590//! sequence of non-unit structs in `$value` field. (sequences of) unit structs
1591//! are serialized as empty string, because units itself serializing
1592//! to nothing:
1593//!
1594//! ```
1595//! # use serde::{Deserialize, Serialize};
1596//! # use pretty_assertions::assert_eq;
1597//! # use quick_xml::de::from_str;
1598//! # use quick_xml::se::to_string;
1599//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1600//! struct Unit;
1601//!
1602//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1603//! struct AnyName {
1604//! // #[serde(default)] is required to deserialization of empty lists
1605//! // This is a general note, not related to $value
1606//! #[serde(rename = "$value", default)]
1607//! field: Vec<Unit>,
1608//! }
1609//!
1610//! let obj = AnyName { field: vec![Unit, Unit, Unit] };
1611//! let xml = to_string(&obj).unwrap();
1612//! assert_eq!(xml, "<AnyName/>");
1613//!
1614//! let object: AnyName = from_str("<AnyName/>").unwrap();
1615//! assert_eq!(object, AnyName { field: vec![] });
1616//!
1617//! let object: AnyName = from_str("<AnyName></AnyName>").unwrap();
1618//! assert_eq!(object, AnyName { field: vec![] });
1619//!
1620//! let object: AnyName = from_str("<AnyName><A/><B/><C/></AnyName>").unwrap();
1621//! assert_eq!(object, AnyName { field: vec![Unit, Unit, Unit] });
1622//! ```
1623//!
1624//! ### Enums and sequences of enums
1625//!
1626//! Enumerations uses the variant name as an element name:
1627//!
1628//! ```
1629//! # use serde::{Deserialize, Serialize};
1630//! # use pretty_assertions::assert_eq;
1631//! # use quick_xml::de::from_str;
1632//! # use quick_xml::se::to_string;
1633//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1634//! struct AnyName {
1635//! #[serde(rename = "$value")]
1636//! field: Vec<Enum>,
1637//! }
1638//!
1639//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1640//! enum Enum { A, B, C }
1641//!
1642//! let obj = AnyName { field: vec![Enum::A, Enum::B, Enum::C] };
1643//! let xml = to_string(&obj).unwrap();
1644//! assert_eq!(
1645//! xml,
1646//! "<AnyName>\
1647//! <A/>\
1648//! <B/>\
1649//! <C/>\
1650//! </AnyName>"
1651//! );
1652//!
1653//! let object: AnyName = from_str(&xml).unwrap();
1654//! assert_eq!(object, obj);
1655//! ```
1656//!
1657//! ----------------------------------------------------------------------------
1658//!
1659//! You can have either `$text` or `$value` field in your structs. Unfortunately,
1660//! that is not enforced, so you can theoretically have both, but you should
1661//! avoid that.
1662//!
1663//!
1664//!
1665//! Frequently Used Patterns
1666//! ========================
1667//!
1668//! Some XML constructs used so frequent, that it is worth to document the recommended
1669//! way to represent them in the Rust. The sections below describes them.
1670//!
1671//! `<element>` lists
1672//! -----------------
1673//! Many XML formats wrap lists of elements in the additional container,
1674//! although this is not required by the XML rules:
1675//!
1676//! ```xml
1677//! <root>
1678//! <field1/>
1679//! <field2/>
1680//! <list><!-- Container -->
1681//! <element/>
1682//! <element/>
1683//! <element/>
1684//! </list>
1685//! <field3/>
1686//! </root>
1687//! ```
1688//! In this case, there is a great desire to describe this XML in this way:
1689//! ```
1690//! /// Represents <element/>
1691//! type Element = ();
1692//!
1693//! /// Represents <root>...</root>
1694//! struct AnyName {
1695//! // Incorrect
1696//! list: Vec<Element>,
1697//! }
1698//! ```
1699//! This will not work, because potentially `<list>` element can have attributes
1700//! and other elements inside. You should define the struct for the `<list>`
1701//! explicitly, as you do that in the XSD for that XML:
1702//! ```
1703//! /// Represents <element/>
1704//! type Element = ();
1705//!
1706//! /// Represents <root>...</root>
1707//! struct AnyName {
1708//! // Correct
1709//! list: List,
1710//! }
1711//! /// Represents <list>...</list>
1712//! struct List {
1713//! element: Vec<Element>,
1714//! }
1715//! ```
1716//!
1717//! If you want to simplify your API, you could write a simple function for unwrapping
1718//! inner list and apply it via [`deserialize_with`]:
1719//!
1720//! ```
1721//! # use pretty_assertions::assert_eq;
1722//! use quick_xml::de::from_str;
1723//! use serde::{Deserialize, Deserializer};
1724//!
1725//! /// Represents <element/>
1726//! type Element = ();
1727//!
1728//! /// Represents <root>...</root>
1729//! #[derive(Deserialize, Debug, PartialEq)]
1730//! struct AnyName {
1731//! #[serde(deserialize_with = "unwrap_list")]
1732//! list: Vec<Element>,
1733//! }
1734//!
1735//! fn unwrap_list<'de, D>(deserializer: D) -> Result<Vec<Element>, D::Error>
1736//! where
1737//! D: Deserializer<'de>,
1738//! {
1739//! /// Represents <list>...</list>
1740//! #[derive(Deserialize)]
1741//! struct List {
1742//! // default allows empty list
1743//! #[serde(default)]
1744//! element: Vec<Element>,
1745//! }
1746//! Ok(List::deserialize(deserializer)?.element)
1747//! }
1748//!
1749//! assert_eq!(
1750//! AnyName { list: vec![(), (), ()] },
1751//! from_str("
1752//! <root>
1753//! <list>
1754//! <element/>
1755//! <element/>
1756//! <element/>
1757//! </list>
1758//! </root>
1759//! ").unwrap(),
1760//! );
1761//! ```
1762//!
1763//! Instead of writing such functions manually, you also could try <https://lib.rs/crates/serde-query>.
1764//!
1765//! Overlapped (Out-of-Order) Elements
1766//! ----------------------------------
1767//! In the case that the list might contain tags that are overlapped with
1768//! tags that do not correspond to the list (this is a usual case in XML
1769//! documents) like this:
1770//! ```xml
1771//! <any-name>
1772//! <item/>
1773//! <another-item/>
1774//! <item/>
1775//! <item/>
1776//! </any-name>
1777//! ```
1778//! you should enable the [`overlapped-lists`] feature to make it possible
1779//! to deserialize this to:
1780//! ```no_run
1781//! # use serde::Deserialize;
1782//! #[derive(Deserialize)]
1783//! #[serde(rename_all = "kebab-case")]
1784//! struct AnyName {
1785//! item: Vec<()>,
1786//! another_item: (),
1787//! }
1788//! ```
1789//!
1790//!
1791//! Internally Tagged Enums
1792//! -----------------------
1793//! [Tagged enums] are currently not supported because of an issue in the Serde
1794//! design (see [serde#1183] and [quick-xml#586]) and missing optimizations in
1795//! Serde which could be useful for XML parsing ([serde#1495]). This can be worked
1796//! around by manually implementing deserialize with `#[serde(deserialize_with = "func")]`
1797//! or implementing [`Deserialize`], but this can get very tedious very fast for
1798//! files with large amounts of tagged enums. To help with this issue quick-xml
1799//! provides a macro [`impl_deserialize_for_internally_tagged_enum!`]. See the
1800//! macro documentation for details.
1801//!
1802//!
1803//! [`overlapped-lists`]: ../index.html#overlapped-lists
1804//! [specification]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition
1805//! [`deserialize_with`]: https://serde.rs/field-attrs.html#deserialize_with
1806//! [#497]: https://github.com/tafia/quick-xml/issues/497
1807//! [`Serializer::serialize_unit_variant`]: serde::Serializer::serialize_unit_variant
1808//! [`Deserializer::deserialize_enum`]: serde::Deserializer::deserialize_enum
1809//! [Tagged enums]: https://serde.rs/enum-representations.html#internally-tagged
1810//! [serde#1183]: https://github.com/serde-rs/serde/issues/1183
1811//! [serde#1495]: https://github.com/serde-rs/serde/issues/1495
1812//! [quick-xml#586]: https://github.com/tafia/quick-xml/issues/586
1813//! [`impl_deserialize_for_internally_tagged_enum!`]: crate::impl_deserialize_for_internally_tagged_enum
1814
1815// Macros should be defined before the modules that using them
1816// Also, macros should be imported before using them
1817use serde::serde_if_integer128;
1818
1819macro_rules! deserialize_type {
1820 ($deserialize:ident => $visit:ident, $($mut:tt)?) => {
1821 fn $deserialize<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1822 where
1823 V: Visitor<'de>,
1824 {
1825 // No need to unescape because valid integer representations cannot be escaped
1826 let text = self.read_string()?;
1827 visitor.$visit(text.parse()?)
1828 }
1829 };
1830}
1831
1832/// Implement deserialization methods for scalar types, such as numbers, strings,
1833/// byte arrays, booleans and identifiers.
1834macro_rules! deserialize_primitives {
1835 ($($mut:tt)?) => {
1836 deserialize_type!(deserialize_i8 => visit_i8, $($mut)?);
1837 deserialize_type!(deserialize_i16 => visit_i16, $($mut)?);
1838 deserialize_type!(deserialize_i32 => visit_i32, $($mut)?);
1839 deserialize_type!(deserialize_i64 => visit_i64, $($mut)?);
1840
1841 deserialize_type!(deserialize_u8 => visit_u8, $($mut)?);
1842 deserialize_type!(deserialize_u16 => visit_u16, $($mut)?);
1843 deserialize_type!(deserialize_u32 => visit_u32, $($mut)?);
1844 deserialize_type!(deserialize_u64 => visit_u64, $($mut)?);
1845
1846 serde_if_integer128! {
1847 deserialize_type!(deserialize_i128 => visit_i128, $($mut)?);
1848 deserialize_type!(deserialize_u128 => visit_u128, $($mut)?);
1849 }
1850
1851 deserialize_type!(deserialize_f32 => visit_f32, $($mut)?);
1852 deserialize_type!(deserialize_f64 => visit_f64, $($mut)?);
1853
1854 fn deserialize_bool<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1855 where
1856 V: Visitor<'de>,
1857 {
1858 let text = self.read_string()?;
1859
1860 str2bool(&text, visitor)
1861 }
1862
1863 /// Character represented as [strings](#method.deserialize_str).
1864 fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, DeError>
1865 where
1866 V: Visitor<'de>,
1867 {
1868 self.deserialize_str(visitor)
1869 }
1870
1871 fn deserialize_str<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1872 where
1873 V: Visitor<'de>,
1874 {
1875 let text = self.read_string()?;
1876 match text {
1877 Cow::Borrowed(string) => visitor.visit_borrowed_str(string),
1878 Cow::Owned(string) => visitor.visit_string(string),
1879 }
1880 }
1881
1882 /// Representation of owned strings the same as [non-owned](#method.deserialize_str).
1883 fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, DeError>
1884 where
1885 V: Visitor<'de>,
1886 {
1887 self.deserialize_str(visitor)
1888 }
1889
1890 /// Returns [`DeError::Unsupported`]
1891 fn deserialize_bytes<V>(self, _visitor: V) -> Result<V::Value, DeError>
1892 where
1893 V: Visitor<'de>,
1894 {
1895 Err(DeError::Unsupported("binary data content is not supported by XML format".into()))
1896 }
1897
1898 /// Forwards deserialization to the [`deserialize_bytes`](#method.deserialize_bytes).
1899 fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, DeError>
1900 where
1901 V: Visitor<'de>,
1902 {
1903 self.deserialize_bytes(visitor)
1904 }
1905
1906 /// Representation of the named units the same as [unnamed units](#method.deserialize_unit).
1907 fn deserialize_unit_struct<V>(
1908 self,
1909 _name: &'static str,
1910 visitor: V,
1911 ) -> Result<V::Value, DeError>
1912 where
1913 V: Visitor<'de>,
1914 {
1915 self.deserialize_unit(visitor)
1916 }
1917
1918 /// Representation of tuples the same as [sequences](#method.deserialize_seq).
1919 fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError>
1920 where
1921 V: Visitor<'de>,
1922 {
1923 self.deserialize_seq(visitor)
1924 }
1925
1926 /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple).
1927 fn deserialize_tuple_struct<V>(
1928 self,
1929 _name: &'static str,
1930 len: usize,
1931 visitor: V,
1932 ) -> Result<V::Value, DeError>
1933 where
1934 V: Visitor<'de>,
1935 {
1936 self.deserialize_tuple(len, visitor)
1937 }
1938
1939 /// Forwards deserialization to the [`deserialize_struct`](#method.deserialize_struct)
1940 /// with empty name and fields.
1941 #[inline]
1942 fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError>
1943 where
1944 V: Visitor<'de>,
1945 {
1946 self.deserialize_struct("", &[], visitor)
1947 }
1948
1949 /// Identifiers represented as [strings](#method.deserialize_str).
1950 fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, DeError>
1951 where
1952 V: Visitor<'de>,
1953 {
1954 self.deserialize_str(visitor)
1955 }
1956
1957 /// Forwards deserialization to the [`deserialize_unit`](#method.deserialize_unit).
1958 #[inline]
1959 fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, DeError>
1960 where
1961 V: Visitor<'de>,
1962 {
1963 self.deserialize_unit(visitor)
1964 }
1965 };
1966}
1967
1968mod key;
1969mod map;
1970mod resolver;
1971mod simple_type;
1972mod text;
1973mod var;
1974
1975pub use crate::errors::serialize::DeError;
1976pub use resolver::{EntityResolver, NoEntityResolver};
1977
1978use crate::{
1979 de::map::ElementMapAccess,
1980 encoding::Decoder,
1981 errors::Error,
1982 events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
1983 name::QName,
1984 reader::Reader,
1985};
1986use serde::de::{self, Deserialize, DeserializeOwned, DeserializeSeed, SeqAccess, Visitor};
1987use std::borrow::Cow;
1988#[cfg(feature = "overlapped-lists")]
1989use std::collections::VecDeque;
1990use std::io::BufRead;
1991use std::mem::replace;
1992#[cfg(feature = "overlapped-lists")]
1993use std::num::NonZeroUsize;
1994use std::ops::Deref;
1995
1996/// Data represented by a text node or a CDATA node. XML markup is not expected
1997pub(crate) const TEXT_KEY: &str = "$text";
1998/// Data represented by any XML markup inside
1999pub(crate) const VALUE_KEY: &str = "$value";
2000
2001/// Decoded and concatenated content of consequent [`Text`] and [`CData`]
2002/// events. _Consequent_ means that events should follow each other or be
2003/// delimited only by (any count of) [`Comment`] or [`PI`] events.
2004///
2005/// [`Text`]: Event::Text
2006/// [`CData`]: Event::CData
2007/// [`Comment`]: Event::Comment
2008/// [`PI`]: Event::PI
2009#[derive(Debug, PartialEq, Eq)]
2010pub struct Text<'a> {
2011 text: Cow<'a, str>,
2012}
2013
2014impl<'a> Deref for Text<'a> {
2015 type Target = str;
2016
2017 #[inline]
2018 fn deref(&self) -> &Self::Target {
2019 self.text.deref()
2020 }
2021}
2022
2023impl<'a> From<&'a str> for Text<'a> {
2024 #[inline]
2025 fn from(text: &'a str) -> Self {
2026 Self {
2027 text: Cow::Borrowed(text),
2028 }
2029 }
2030}
2031
2032////////////////////////////////////////////////////////////////////////////////////////////////////
2033
2034/// Simplified event which contains only these variants that used by deserializer
2035#[derive(Debug, PartialEq, Eq)]
2036pub enum DeEvent<'a> {
2037 /// Start tag (with attributes) `<tag attr="value">`.
2038 Start(BytesStart<'a>),
2039 /// End tag `</tag>`.
2040 End(BytesEnd<'a>),
2041 /// Decoded and concatenated content of consequent [`Text`] and [`CData`]
2042 /// events. _Consequent_ means that events should follow each other or be
2043 /// delimited only by (any count of) [`Comment`] or [`PI`] events.
2044 ///
2045 /// [`Text`]: Event::Text
2046 /// [`CData`]: Event::CData
2047 /// [`Comment`]: Event::Comment
2048 /// [`PI`]: Event::PI
2049 Text(Text<'a>),
2050 /// End of XML document.
2051 Eof,
2052}
2053
2054////////////////////////////////////////////////////////////////////////////////////////////////////
2055
2056/// Simplified event which contains only these variants that used by deserializer,
2057/// but [`Text`] events not yet fully processed.
2058///
2059/// [`Text`] events should be trimmed if they does not surrounded by the other
2060/// [`Text`] or [`CData`] events. This event contains intermediate state of [`Text`]
2061/// event, where they are trimmed from the start, but not from the end. To trim
2062/// end spaces we should lookahead by one deserializer event (i. e. skip all
2063/// comments and processing instructions).
2064///
2065/// [`Text`]: Event::Text
2066/// [`CData`]: Event::CData
2067#[derive(Debug, PartialEq, Eq)]
2068pub enum PayloadEvent<'a> {
2069 /// Start tag (with attributes) `<tag attr="value">`.
2070 Start(BytesStart<'a>),
2071 /// End tag `</tag>`.
2072 End(BytesEnd<'a>),
2073 /// Escaped character data between tags.
2074 Text(BytesText<'a>),
2075 /// Unescaped character data stored in `<![CDATA[...]]>`.
2076 CData(BytesCData<'a>),
2077 /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
2078 DocType(BytesText<'a>),
2079 /// End of XML document.
2080 Eof,
2081}
2082
2083impl<'a> PayloadEvent<'a> {
2084 /// Ensures that all data is owned to extend the object's lifetime if necessary.
2085 #[inline]
2086 fn into_owned(self) -> PayloadEvent<'static> {
2087 match self {
2088 PayloadEvent::Start(e) => PayloadEvent::Start(e.into_owned()),
2089 PayloadEvent::End(e) => PayloadEvent::End(e.into_owned()),
2090 PayloadEvent::Text(e) => PayloadEvent::Text(e.into_owned()),
2091 PayloadEvent::CData(e) => PayloadEvent::CData(e.into_owned()),
2092 PayloadEvent::DocType(e) => PayloadEvent::DocType(e.into_owned()),
2093 PayloadEvent::Eof => PayloadEvent::Eof,
2094 }
2095 }
2096}
2097
2098/// An intermediate reader that consumes [`PayloadEvent`]s and produces final [`DeEvent`]s.
2099/// [`PayloadEvent::Text`] events, that followed by any event except
2100/// [`PayloadEvent::Text`] or [`PayloadEvent::CData`], are trimmed from the end.
2101struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = NoEntityResolver> {
2102 /// A source of low-level XML events
2103 reader: R,
2104 /// Intermediate event, that could be returned by the next call to `next()`.
2105 /// If that is the `Text` event then leading spaces already trimmed, but
2106 /// trailing spaces is not. Before the event will be returned, trimming of
2107 /// the spaces could be necessary
2108 lookahead: Result<PayloadEvent<'i>, DeError>,
2109
2110 /// Used to resolve unknown entities that would otherwise cause the parser
2111 /// to return an [`EscapeError::UnrecognizedSymbol`] error.
2112 ///
2113 /// [`EscapeError::UnrecognizedSymbol`]: crate::escape::EscapeError::UnrecognizedSymbol
2114 entity_resolver: E,
2115}
2116
2117impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2118 fn new(mut reader: R, entity_resolver: E) -> Self {
2119 // Lookahead by one event immediately, so we do not need to check in the
2120 // loop if we need lookahead or not
2121 let lookahead = reader.next();
2122
2123 Self {
2124 reader,
2125 lookahead,
2126 entity_resolver,
2127 }
2128 }
2129
2130 /// Returns `true` if all events was consumed
2131 fn is_empty(&self) -> bool {
2132 matches!(self.lookahead, Ok(PayloadEvent::Eof))
2133 }
2134
2135 /// Read next event and put it in lookahead, return the current lookahead
2136 #[inline(always)]
2137 fn next_impl(&mut self) -> Result<PayloadEvent<'i>, DeError> {
2138 replace(&mut self.lookahead, self.reader.next())
2139 }
2140
2141 #[inline(always)]
2142 fn need_trim_end(&self) -> bool {
2143 // If next event is a text or CDATA, we should not trim trailing spaces
2144 !matches!(
2145 self.lookahead,
2146 Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_))
2147 )
2148 }
2149
2150 /// Read all consequent [`Text`] and [`CData`] events until non-text event
2151 /// occurs. Content of all events would be appended to `result` and returned
2152 /// as [`DeEvent::Text`].
2153 ///
2154 /// [`Text`]: PayloadEvent::Text
2155 /// [`CData`]: PayloadEvent::CData
2156 fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result<DeEvent<'i>, DeError> {
2157 loop {
2158 match self.lookahead {
2159 Ok(PayloadEvent::Text(_) | PayloadEvent::CData(_)) => {
2160 let text = self.next_text()?;
2161
2162 let mut s = result.into_owned();
2163 s += &text;
2164 result = Cow::Owned(s);
2165 }
2166 _ => break,
2167 }
2168 }
2169 Ok(DeEvent::Text(Text { text: result }))
2170 }
2171
2172 /// Read one text event, panics if current event is not a text event
2173 ///
2174 /// |Event |XML |Handling
2175 /// |-----------------------|---------------------------|----------------------------------------
2176 /// |[`PayloadEvent::Start`]|`<tag>...</tag>` |Possible panic (unreachable)
2177 /// |[`PayloadEvent::End`] |`</any-tag>` |Possible panic (unreachable)
2178 /// |[`PayloadEvent::Text`] |`text content` |Unescapes `text content` and returns it
2179 /// |[`PayloadEvent::CData`]|`<![CDATA[cdata content]]>`|Returns `cdata content` unchanged
2180 /// |[`PayloadEvent::Eof`] | |Possible panic (unreachable)
2181 #[inline(always)]
2182 fn next_text(&mut self) -> Result<Cow<'i, str>, DeError> {
2183 match self.next_impl()? {
2184 PayloadEvent::Text(mut e) => {
2185 if self.need_trim_end() {
2186 e.inplace_trim_end();
2187 }
2188 Ok(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
2189 }
2190 PayloadEvent::CData(e) => Ok(e.decode()?),
2191
2192 // SAFETY: this method is called only when we peeked Text or CData
2193 _ => unreachable!("Only `Text` and `CData` events can come here"),
2194 }
2195 }
2196
2197 /// Return an input-borrowing event.
2198 fn next(&mut self) -> Result<DeEvent<'i>, DeError> {
2199 loop {
2200 return match self.next_impl()? {
2201 PayloadEvent::Start(e) => Ok(DeEvent::Start(e)),
2202 PayloadEvent::End(e) => Ok(DeEvent::End(e)),
2203 PayloadEvent::Text(mut e) => {
2204 if self.need_trim_end() && e.inplace_trim_end() {
2205 continue;
2206 }
2207 self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
2208 }
2209 PayloadEvent::CData(e) => self.drain_text(e.decode()?),
2210 PayloadEvent::DocType(e) => {
2211 self.entity_resolver
2212 .capture(e)
2213 .map_err(|err| DeError::Custom(format!("cannot parse DTD: {}", err)))?;
2214 continue;
2215 }
2216 PayloadEvent::Eof => Ok(DeEvent::Eof),
2217 };
2218 }
2219 }
2220
2221 #[inline]
2222 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2223 match self.lookahead {
2224 // We pre-read event with the same name that is required to be skipped.
2225 // First call of `read_to_end` will end out pre-read event, the second
2226 // will consume other events
2227 Ok(PayloadEvent::Start(ref e)) if e.name() == name => {
2228 let result1 = self.reader.read_to_end(name);
2229 let result2 = self.reader.read_to_end(name);
2230
2231 // In case of error `next_impl` returns `Eof`
2232 let _ = self.next_impl();
2233 result1?;
2234 result2?;
2235 }
2236 // We pre-read event with the same name that is required to be skipped.
2237 // Because this is end event, we already consume the whole tree, so
2238 // nothing to do, just update lookahead
2239 Ok(PayloadEvent::End(ref e)) if e.name() == name => {
2240 let _ = self.next_impl();
2241 }
2242 Ok(_) => {
2243 let result = self.reader.read_to_end(name);
2244
2245 // In case of error `next_impl` returns `Eof`
2246 let _ = self.next_impl();
2247 result?;
2248 }
2249 // Read next lookahead event, unpack error from the current lookahead
2250 Err(_) => {
2251 self.next_impl()?;
2252 }
2253 }
2254 Ok(())
2255 }
2256
2257 #[inline]
2258 fn decoder(&self) -> Decoder {
2259 self.reader.decoder()
2260 }
2261}
2262
2263////////////////////////////////////////////////////////////////////////////////////////////////////
2264
2265/// Deserialize an instance of type `T` from a string of XML text.
2266pub fn from_str<'de, T>(s: &'de str) -> Result<T, DeError>
2267where
2268 T: Deserialize<'de>,
2269{
2270 let mut de = Deserializer::from_str(s);
2271 T::deserialize(&mut de)
2272}
2273
2274/// Deserialize from a reader. This method will do internal copies of data
2275/// readed from `reader`. If you want have a `&str` input and want to borrow
2276/// as much as possible, use [`from_str`].
2277pub fn from_reader<R, T>(reader: R) -> Result<T, DeError>
2278where
2279 R: BufRead,
2280 T: DeserializeOwned,
2281{
2282 let mut de = Deserializer::from_reader(reader);
2283 T::deserialize(&mut de)
2284}
2285
2286// TODO: According to the https://www.w3.org/TR/xmlschema11-2/#boolean,
2287// valid boolean representations are only "true", "false", "1", and "0"
2288fn str2bool<'de, V>(value: &str, visitor: V) -> Result<V::Value, DeError>
2289where
2290 V: de::Visitor<'de>,
2291{
2292 match value {
2293 "true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => {
2294 visitor.visit_bool(true)
2295 }
2296 "false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => {
2297 visitor.visit_bool(false)
2298 }
2299 _ => Err(DeError::InvalidBoolean(value.into())),
2300 }
2301}
2302
2303fn deserialize_bool<'de, V>(value: &[u8], decoder: Decoder, visitor: V) -> Result<V::Value, DeError>
2304where
2305 V: Visitor<'de>,
2306{
2307 #[cfg(feature = "encoding")]
2308 {
2309 let value = decoder.decode(value)?;
2310 // No need to unescape because valid boolean representations cannot be escaped
2311 str2bool(value.as_ref(), visitor)
2312 }
2313
2314 #[cfg(not(feature = "encoding"))]
2315 {
2316 // No need to unescape because valid boolean representations cannot be escaped
2317 match value {
2318 b"true" | b"1" | b"True" | b"TRUE" | b"t" | b"Yes" | b"YES" | b"yes" | b"y" => {
2319 visitor.visit_bool(true)
2320 }
2321 b"false" | b"0" | b"False" | b"FALSE" | b"f" | b"No" | b"NO" | b"no" | b"n" => {
2322 visitor.visit_bool(false)
2323 }
2324 e => Err(DeError::InvalidBoolean(decoder.decode(e)?.into())),
2325 }
2326 }
2327}
2328
2329////////////////////////////////////////////////////////////////////////////////////////////////////
2330
2331/// A structure that deserializes XML into Rust values.
2332pub struct Deserializer<'de, R, E: EntityResolver = NoEntityResolver>
2333where
2334 R: XmlRead<'de>,
2335{
2336 /// An XML reader that streams events into this deserializer
2337 reader: XmlReader<'de, R, E>,
2338
2339 /// When deserializing sequences sometimes we have to skip unwanted events.
2340 /// That events should be stored and then replayed. This is a replay buffer,
2341 /// that streams events while not empty. When it exhausted, events will
2342 /// requested from [`Self::reader`].
2343 #[cfg(feature = "overlapped-lists")]
2344 read: VecDeque<DeEvent<'de>>,
2345 /// When deserializing sequences sometimes we have to skip events, because XML
2346 /// is tolerant to elements order and even if in the XSD order is strictly
2347 /// specified (using `xs:sequence`) most of XML parsers allows order violations.
2348 /// That means, that elements, forming a sequence, could be overlapped with
2349 /// other elements, do not related to that sequence.
2350 ///
2351 /// In order to support this, deserializer will scan events and skip unwanted
2352 /// events, store them here. After call [`Self::start_replay()`] all events
2353 /// moved from this to [`Self::read`].
2354 #[cfg(feature = "overlapped-lists")]
2355 write: VecDeque<DeEvent<'de>>,
2356 /// Maximum number of events that can be skipped when processing sequences
2357 /// that occur out-of-order. This field is used to prevent potential
2358 /// denial-of-service (DoS) attacks which could cause infinite memory
2359 /// consumption when parsing a very large amount of XML into a sequence field.
2360 #[cfg(feature = "overlapped-lists")]
2361 limit: Option<NonZeroUsize>,
2362
2363 #[cfg(not(feature = "overlapped-lists"))]
2364 peek: Option<DeEvent<'de>>,
2365}
2366
2367impl<'de, R, E> Deserializer<'de, R, E>
2368where
2369 R: XmlRead<'de>,
2370 E: EntityResolver,
2371{
2372 /// Create an XML deserializer from one of the possible quick_xml input sources.
2373 ///
2374 /// Typically it is more convenient to use one of these methods instead:
2375 ///
2376 /// - [`Deserializer::from_str`]
2377 /// - [`Deserializer::from_reader`]
2378 fn new(reader: R, entity_resolver: E) -> Self {
2379 Self {
2380 reader: XmlReader::new(reader, entity_resolver),
2381
2382 #[cfg(feature = "overlapped-lists")]
2383 read: VecDeque::new(),
2384 #[cfg(feature = "overlapped-lists")]
2385 write: VecDeque::new(),
2386 #[cfg(feature = "overlapped-lists")]
2387 limit: None,
2388
2389 #[cfg(not(feature = "overlapped-lists"))]
2390 peek: None,
2391 }
2392 }
2393
2394 /// Returns `true` if all events was consumed.
2395 pub fn is_empty(&self) -> bool {
2396 #[cfg(feature = "overlapped-lists")]
2397 if self.read.is_empty() {
2398 return self.reader.is_empty();
2399 }
2400 #[cfg(not(feature = "overlapped-lists"))]
2401 if self.peek.is_none() {
2402 return self.reader.is_empty();
2403 }
2404 false
2405 }
2406
2407 /// Set the maximum number of events that could be skipped during deserialization
2408 /// of sequences.
2409 ///
2410 /// If `<element>` contains more than specified nested elements, `$text` or
2411 /// CDATA nodes, then [`DeError::TooManyEvents`] will be returned during
2412 /// deserialization of sequence field (any type that uses [`deserialize_seq`]
2413 /// for the deserialization, for example, `Vec<T>`).
2414 ///
2415 /// This method can be used to prevent a [DoS] attack and infinite memory
2416 /// consumption when parsing a very large XML to a sequence field.
2417 ///
2418 /// It is strongly recommended to set limit to some value when you parse data
2419 /// from untrusted sources. You should choose a value that your typical XMLs
2420 /// can have _between_ different elements that corresponds to the same sequence.
2421 ///
2422 /// # Examples
2423 ///
2424 /// Let's imagine, that we deserialize such structure:
2425 /// ```
2426 /// struct List {
2427 /// item: Vec<()>,
2428 /// }
2429 /// ```
2430 ///
2431 /// The XML that we try to parse look like this:
2432 /// ```xml
2433 /// <any-name>
2434 /// <item/>
2435 /// <!-- Bufferization starts at this point -->
2436 /// <another-item>
2437 /// <some-element>with text</some-element>
2438 /// <yet-another-element/>
2439 /// </another-item>
2440 /// <!-- Buffer will be emptied at this point; 7 events were buffered -->
2441 /// <item/>
2442 /// <!-- There is nothing to buffer, because elements follows each other -->
2443 /// <item/>
2444 /// </any-name>
2445 /// ```
2446 ///
2447 /// There, when we deserialize the `item` field, we need to buffer 7 events,
2448 /// before we can deserialize the second `<item/>`:
2449 ///
2450 /// - `<another-item>`
2451 /// - `<some-element>`
2452 /// - `$text(with text)`
2453 /// - `</some-element>`
2454 /// - `<yet-another-element/>` (virtual start event)
2455 /// - `<yet-another-element/>` (virtual end event)
2456 /// - `</another-item>`
2457 ///
2458 /// Note, that `<yet-another-element/>` internally represented as 2 events:
2459 /// one for the start tag and one for the end tag. In the future this can be
2460 /// eliminated, but for now we use [auto-expanding feature] of a reader,
2461 /// because this simplifies deserializer code.
2462 ///
2463 /// [`deserialize_seq`]: serde::Deserializer::deserialize_seq
2464 /// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack
2465 /// [auto-expanding feature]: Reader::expand_empty_elements
2466 #[cfg(feature = "overlapped-lists")]
2467 pub fn event_buffer_size(&mut self, limit: Option<NonZeroUsize>) -> &mut Self {
2468 self.limit = limit;
2469 self
2470 }
2471
2472 #[cfg(feature = "overlapped-lists")]
2473 fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
2474 if self.read.is_empty() {
2475 self.read.push_front(self.reader.next()?);
2476 }
2477 if let Some(event) = self.read.front() {
2478 return Ok(event);
2479 }
2480 // SAFETY: `self.read` was filled in the code above.
2481 // NOTE: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
2482 // if unsafe code will be allowed
2483 unreachable!()
2484 }
2485 #[cfg(not(feature = "overlapped-lists"))]
2486 fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
2487 if self.peek.is_none() {
2488 self.peek = Some(self.reader.next()?);
2489 }
2490 match self.peek.as_ref() {
2491 Some(v) => Ok(v),
2492 // SAFETY: a `None` variant for `self.peek` would have been replaced
2493 // by a `Some` variant in the code above.
2494 // TODO: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
2495 // if unsafe code will be allowed
2496 None => unreachable!(),
2497 }
2498 }
2499
2500 fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
2501 // Replay skipped or peeked events
2502 #[cfg(feature = "overlapped-lists")]
2503 if let Some(event) = self.read.pop_front() {
2504 return Ok(event);
2505 }
2506 #[cfg(not(feature = "overlapped-lists"))]
2507 if let Some(e) = self.peek.take() {
2508 return Ok(e);
2509 }
2510 self.reader.next()
2511 }
2512
2513 /// Returns the mark after which all events, skipped by [`Self::skip()`] call,
2514 /// should be replayed after calling [`Self::start_replay()`].
2515 #[cfg(feature = "overlapped-lists")]
2516 #[inline]
2517 #[must_use = "returned checkpoint should be used in `start_replay`"]
2518 fn skip_checkpoint(&self) -> usize {
2519 self.write.len()
2520 }
2521
2522 /// Extracts XML tree of events from and stores them in the skipped events
2523 /// buffer from which they can be retrieved later. You MUST call
2524 /// [`Self::start_replay()`] after calling this to give access to the skipped
2525 /// events and release internal buffers.
2526 #[cfg(feature = "overlapped-lists")]
2527 fn skip(&mut self) -> Result<(), DeError> {
2528 let event = self.next()?;
2529 self.skip_event(event)?;
2530 match self.write.back() {
2531 // Skip all subtree, if we skip a start event
2532 Some(DeEvent::Start(e)) => {
2533 let end = e.name().as_ref().to_owned();
2534 let mut depth = 0;
2535 loop {
2536 let event = self.next()?;
2537 match event {
2538 DeEvent::Start(ref e) if e.name().as_ref() == end => {
2539 self.skip_event(event)?;
2540 depth += 1;
2541 }
2542 DeEvent::End(ref e) if e.name().as_ref() == end => {
2543 self.skip_event(event)?;
2544 if depth == 0 {
2545 break;
2546 }
2547 depth -= 1;
2548 }
2549 DeEvent::Eof => {
2550 self.skip_event(event)?;
2551 break;
2552 }
2553 _ => self.skip_event(event)?,
2554 }
2555 }
2556 }
2557 _ => (),
2558 }
2559 Ok(())
2560 }
2561
2562 #[cfg(feature = "overlapped-lists")]
2563 #[inline]
2564 fn skip_event(&mut self, event: DeEvent<'de>) -> Result<(), DeError> {
2565 if let Some(max) = self.limit {
2566 if self.write.len() >= max.get() {
2567 return Err(DeError::TooManyEvents(max));
2568 }
2569 }
2570 self.write.push_back(event);
2571 Ok(())
2572 }
2573
2574 /// Moves buffered events, skipped after given `checkpoint` from [`Self::write`]
2575 /// skip buffer to [`Self::read`] buffer.
2576 ///
2577 /// After calling this method, [`Self::peek()`] and [`Self::next()`] starts
2578 /// return events that was skipped previously by calling [`Self::skip()`],
2579 /// and only when all that events will be consumed, the deserializer starts
2580 /// to drain events from underlying reader.
2581 ///
2582 /// This method MUST be called if any number of [`Self::skip()`] was called
2583 /// after [`Self::new()`] or `start_replay()` or you'll lost events.
2584 #[cfg(feature = "overlapped-lists")]
2585 fn start_replay(&mut self, checkpoint: usize) {
2586 if checkpoint == 0 {
2587 self.write.append(&mut self.read);
2588 std::mem::swap(&mut self.read, &mut self.write);
2589 } else {
2590 let mut read = self.write.split_off(checkpoint);
2591 read.append(&mut self.read);
2592 self.read = read;
2593 }
2594 }
2595
2596 #[inline]
2597 fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
2598 self.read_string_impl(true)
2599 }
2600
2601 /// Consumes consequent [`Text`] and [`CData`] (both a referred below as a _text_)
2602 /// events, merge them into one string. If there are no such events, returns
2603 /// an empty string.
2604 ///
2605 /// If `allow_start` is `false`, then only text events are consumed, for other
2606 /// events an error is returned (see table below).
2607 ///
2608 /// If `allow_start` is `true`, then two or three events are expected:
2609 /// - [`DeEvent::Start`];
2610 /// - _(optional)_ [`DeEvent::Text`] which content is returned;
2611 /// - [`DeEvent::End`]. If text event was missed, an empty string is returned.
2612 ///
2613 /// Corresponding events are consumed.
2614 ///
2615 /// # Handling events
2616 ///
2617 /// The table below shows how events is handled by this method:
2618 ///
2619 /// |Event |XML |Handling
2620 /// |------------------|---------------------------|----------------------------------------
2621 /// |[`DeEvent::Start`]|`<tag>...</tag>` |if `allow_start == true`, result determined by the second table, otherwise emits [`UnexpectedStart("tag")`](DeError::UnexpectedStart)
2622 /// |[`DeEvent::End`] |`</any-tag>` |Emits [`UnexpectedEnd("any-tag")`](DeError::UnexpectedEnd)
2623 /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged
2624 /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2625 ///
2626 /// Second event, consumed if [`DeEvent::Start`] was received and `allow_start == true`:
2627 ///
2628 /// |Event |XML |Handling
2629 /// |------------------|---------------------------|----------------------------------------------------------------------------------
2630 /// |[`DeEvent::Start`]|`<any-tag>...</any-tag>` |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart)
2631 /// |[`DeEvent::End`] |`</tag>` |Returns an empty slice. The reader guarantee that tag will match the open one
2632 /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, expects the `</tag>` after that
2633 /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2634 ///
2635 /// [`Text`]: Event::Text
2636 /// [`CData`]: Event::CData
2637 fn read_string_impl(&mut self, allow_start: bool) -> Result<Cow<'de, str>, DeError> {
2638 match self.next()? {
2639 DeEvent::Text(e) => Ok(e.text),
2640 // allow one nested level
2641 DeEvent::Start(_) if allow_start => self.read_text(),
2642 DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
2643 DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
2644 DeEvent::Eof => Err(DeError::UnexpectedEof),
2645 }
2646 }
2647 /// Consumes one [`DeEvent::Text`] event and ensures that it is followed by the
2648 /// [`DeEvent::End`] event.
2649 fn read_text(&mut self) -> Result<Cow<'de, str>, DeError> {
2650 match self.next()? {
2651 DeEvent::Text(e) => match self.next()? {
2652 // The matching tag name is guaranteed by the reader
2653 DeEvent::End(_) => Ok(e.text),
2654 // SAFETY: Cannot be two consequent Text events, they would be merged into one
2655 DeEvent::Text(_) => unreachable!(),
2656 DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
2657 DeEvent::Eof => Err(DeError::UnexpectedEof),
2658 },
2659 // We can get End event in case of `<tag></tag>` or `<tag/>` input
2660 // Return empty text in that case
2661 // The matching tag name is guaranteed by the reader
2662 DeEvent::End(_) => Ok("".into()),
2663 DeEvent::Start(s) => Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())),
2664 DeEvent::Eof => Err(DeError::UnexpectedEof),
2665 }
2666 }
2667
2668 /// Drops all events until event with [name](BytesEnd::name()) `name` won't be
2669 /// dropped. This method should be called after [`Self::next()`]
2670 #[cfg(feature = "overlapped-lists")]
2671 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2672 let mut depth = 0;
2673 loop {
2674 match self.read.pop_front() {
2675 Some(DeEvent::Start(e)) if e.name() == name => {
2676 depth += 1;
2677 }
2678 Some(DeEvent::End(e)) if e.name() == name => {
2679 if depth == 0 {
2680 break;
2681 }
2682 depth -= 1;
2683 }
2684
2685 // Drop all other skipped events
2686 Some(_) => continue,
2687
2688 // If we do not have skipped events, use effective reading that will
2689 // not allocate memory for events
2690 None => {
2691 // We should close all opened tags, because we could buffer
2692 // Start events, but not the corresponding End events. So we
2693 // keep reading events until we exit all nested tags.
2694 // `read_to_end()` will return an error if an Eof was encountered
2695 // preliminary (in case of malformed XML).
2696 //
2697 // <tag><tag></tag></tag>
2698 // ^^^^^^^^^^ - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2
2699 // ^^^^^^ - read by the first call of `self.reader.read_to_end()`
2700 // ^^^^^^ - read by the second call of `self.reader.read_to_end()`
2701 loop {
2702 self.reader.read_to_end(name)?;
2703 if depth == 0 {
2704 break;
2705 }
2706 depth -= 1;
2707 }
2708 break;
2709 }
2710 }
2711 }
2712 Ok(())
2713 }
2714 #[cfg(not(feature = "overlapped-lists"))]
2715 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2716 // First one might be in self.peek
2717 match self.next()? {
2718 DeEvent::Start(e) => self.reader.read_to_end(e.name())?,
2719 DeEvent::End(e) if e.name() == name => return Ok(()),
2720 _ => (),
2721 }
2722 self.reader.read_to_end(name)
2723 }
2724}
2725
2726impl<'de> Deserializer<'de, SliceReader<'de>> {
2727 /// Create new deserializer that will borrow data from the specified string.
2728 ///
2729 /// Deserializer created with this method will not resolve custom entities.
2730 #[allow(clippy::should_implement_trait)]
2731 pub fn from_str(source: &'de str) -> Self {
2732 Self::from_str_with_resolver(source, NoEntityResolver)
2733 }
2734}
2735
2736impl<'de, E> Deserializer<'de, SliceReader<'de>, E>
2737where
2738 E: EntityResolver,
2739{
2740 /// Create new deserializer that will borrow data from the specified string
2741 /// and use specified entity resolver.
2742 pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
2743 let mut reader = Reader::from_str(source);
2744 reader.expand_empty_elements(true);
2745
2746 Self::new(
2747 SliceReader {
2748 reader,
2749 start_trimmer: StartTrimmer::default(),
2750 },
2751 entity_resolver,
2752 )
2753 }
2754}
2755
2756impl<'de, R> Deserializer<'de, IoReader<R>>
2757where
2758 R: BufRead,
2759{
2760 /// Create new deserializer that will copy data from the specified reader
2761 /// into internal buffer.
2762 ///
2763 /// If you already have a string use [`Self::from_str`] instead, because it
2764 /// will borrow instead of copy. If you have `&[u8]` which is known to represent
2765 /// UTF-8, you can decode it first before using [`from_str`].
2766 ///
2767 /// Deserializer created with this method will not resolve custom entities.
2768 pub fn from_reader(reader: R) -> Self {
2769 Self::with_resolver(reader, NoEntityResolver)
2770 }
2771}
2772
2773impl<'de, R, E> Deserializer<'de, IoReader<R>, E>
2774where
2775 R: BufRead,
2776 E: EntityResolver,
2777{
2778 /// Create new deserializer that will copy data from the specified reader
2779 /// into internal buffer and use specified entity resolver.
2780 ///
2781 /// If you already have a string use [`Self::from_str`] instead, because it
2782 /// will borrow instead of copy. If you have `&[u8]` which is known to represent
2783 /// UTF-8, you can decode it first before using [`from_str`].
2784 pub fn with_resolver(reader: R, entity_resolver: E) -> Self {
2785 let mut reader = Reader::from_reader(reader);
2786 reader.expand_empty_elements(true);
2787
2788 Self::new(
2789 IoReader {
2790 reader,
2791 start_trimmer: StartTrimmer::default(),
2792 buf: Vec::new(),
2793 },
2794 entity_resolver,
2795 )
2796 }
2797}
2798
2799impl<'de, 'a, R, E> de::Deserializer<'de> for &'a mut Deserializer<'de, R, E>
2800where
2801 R: XmlRead<'de>,
2802 E: EntityResolver,
2803{
2804 type Error = DeError;
2805
2806 deserialize_primitives!();
2807
2808 fn deserialize_struct<V>(
2809 self,
2810 _name: &'static str,
2811 fields: &'static [&'static str],
2812 visitor: V,
2813 ) -> Result<V::Value, DeError>
2814 where
2815 V: Visitor<'de>,
2816 {
2817 match self.next()? {
2818 DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self, e, fields)?),
2819 DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
2820 DeEvent::Text(_) => Err(DeError::ExpectedStart),
2821 DeEvent::Eof => Err(DeError::UnexpectedEof),
2822 }
2823 }
2824
2825 /// Unit represented in XML as a `xs:element` or text/CDATA content.
2826 /// Any content inside `xs:element` is ignored and skipped.
2827 ///
2828 /// Produces unit struct from any of following inputs:
2829 /// - any `<tag ...>...</tag>`
2830 /// - any `<tag .../>`
2831 /// - any consequent text / CDATA content (can consist of several parts
2832 /// delimited by comments and processing instructions)
2833 ///
2834 /// # Events handling
2835 ///
2836 /// |Event |XML |Handling
2837 /// |------------------|---------------------------|-------------------------------------------
2838 /// |[`DeEvent::Start`]|`<tag>...</tag>` |Calls `visitor.visit_unit()`, consumes all events up to and including corresponding `End` event
2839 /// |[`DeEvent::End`] |`</tag>` |Emits [`UnexpectedEnd("tag")`](DeError::UnexpectedEnd)
2840 /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Calls `visitor.visit_unit()`. The content is ignored
2841 /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2842 fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, DeError>
2843 where
2844 V: Visitor<'de>,
2845 {
2846 match self.next()? {
2847 DeEvent::Start(s) => {
2848 self.read_to_end(s.name())?;
2849 visitor.visit_unit()
2850 }
2851 DeEvent::Text(_) => visitor.visit_unit(),
2852 DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
2853 DeEvent::Eof => Err(DeError::UnexpectedEof),
2854 }
2855 }
2856
2857 /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
2858 /// with the same deserializer.
2859 fn deserialize_newtype_struct<V>(
2860 self,
2861 _name: &'static str,
2862 visitor: V,
2863 ) -> Result<V::Value, DeError>
2864 where
2865 V: Visitor<'de>,
2866 {
2867 visitor.visit_newtype_struct(self)
2868 }
2869
2870 fn deserialize_enum<V>(
2871 self,
2872 _name: &'static str,
2873 _variants: &'static [&'static str],
2874 visitor: V,
2875 ) -> Result<V::Value, DeError>
2876 where
2877 V: Visitor<'de>,
2878 {
2879 visitor.visit_enum(var::EnumAccess::new(self))
2880 }
2881
2882 fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, DeError>
2883 where
2884 V: Visitor<'de>,
2885 {
2886 visitor.visit_seq(self)
2887 }
2888
2889 fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
2890 where
2891 V: Visitor<'de>,
2892 {
2893 match self.peek()? {
2894 DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
2895 DeEvent::Eof => visitor.visit_none(),
2896 _ => visitor.visit_some(self),
2897 }
2898 }
2899
2900 fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, DeError>
2901 where
2902 V: Visitor<'de>,
2903 {
2904 match self.peek()? {
2905 DeEvent::Text(_) => self.deserialize_str(visitor),
2906 _ => self.deserialize_map(visitor),
2907 }
2908 }
2909}
2910
2911/// An accessor to sequence elements forming a value for top-level sequence of XML
2912/// elements.
2913///
2914/// Technically, multiple top-level elements violates XML rule of only one top-level
2915/// element, but we consider this as several concatenated XML documents.
2916impl<'de, 'a, R, E> SeqAccess<'de> for &'a mut Deserializer<'de, R, E>
2917where
2918 R: XmlRead<'de>,
2919 E: EntityResolver,
2920{
2921 type Error = DeError;
2922
2923 fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error>
2924 where
2925 T: DeserializeSeed<'de>,
2926 {
2927 match self.peek()? {
2928 DeEvent::Eof => {
2929 // We need to consume event in order to self.is_empty() worked
2930 self.next()?;
2931 Ok(None)
2932 }
2933
2934 // Start(tag), End(tag), Text
2935 _ => seed.deserialize(&mut **self).map(Some),
2936 }
2937 }
2938}
2939
2940////////////////////////////////////////////////////////////////////////////////////////////////////
2941
2942/// Helper struct that contains a state for an algorithm of converting events
2943/// from raw events to semi-trimmed events that is independent from a way of
2944/// events reading.
2945struct StartTrimmer {
2946 /// If `true`, then leading whitespace will be removed from next returned
2947 /// [`Event::Text`]. This field is set to `true` after reading each event
2948 /// except [`Event::Text`] and [`Event::CData`], so [`Event::Text`] events
2949 /// read right after them does not trimmed.
2950 trim_start: bool,
2951}
2952
2953impl StartTrimmer {
2954 /// Converts raw reader's event into a payload event.
2955 /// Returns `None`, if event should be skipped.
2956 #[inline(always)]
2957 fn trim<'a>(&mut self, event: Event<'a>) -> Option<PayloadEvent<'a>> {
2958 let (event, trim_next_event) = match event {
2959 Event::DocType(e) => (PayloadEvent::DocType(e), true),
2960 Event::Start(e) => (PayloadEvent::Start(e), true),
2961 Event::End(e) => (PayloadEvent::End(e), true),
2962 Event::Eof => (PayloadEvent::Eof, true),
2963
2964 // Do not trim next text event after Text or CDATA event
2965 Event::CData(e) => (PayloadEvent::CData(e), false),
2966 Event::Text(mut e) => {
2967 // If event is empty after trimming, skip it
2968 if self.trim_start && e.inplace_trim_start() {
2969 return None;
2970 }
2971 (PayloadEvent::Text(e), false)
2972 }
2973
2974 _ => return None,
2975 };
2976 self.trim_start = trim_next_event;
2977 Some(event)
2978 }
2979}
2980
2981impl Default for StartTrimmer {
2982 #[inline]
2983 fn default() -> Self {
2984 Self { trim_start: true }
2985 }
2986}
2987
2988////////////////////////////////////////////////////////////////////////////////////////////////////
2989
2990/// Trait used by the deserializer for iterating over input. This is manually
2991/// "specialized" for iterating over `&[u8]`.
2992///
2993/// You do not need to implement this trait, it is needed to abstract from
2994/// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in
2995/// deserializer
2996pub trait XmlRead<'i> {
2997 /// Return an input-borrowing event.
2998 fn next(&mut self) -> Result<PayloadEvent<'i>, DeError>;
2999
3000 /// Skips until end element is found. Unlike `next()` it will not allocate
3001 /// when it cannot satisfy the lifetime.
3002 fn read_to_end(&mut self, name: QName) -> Result<(), DeError>;
3003
3004 /// A copy of the reader's decoder used to decode strings.
3005 fn decoder(&self) -> Decoder;
3006}
3007
3008/// XML input source that reads from a std::io input stream.
3009///
3010/// You cannot create it, it is created automatically when you call
3011/// [`Deserializer::from_reader`]
3012pub struct IoReader<R: BufRead> {
3013 reader: Reader<R>,
3014 start_trimmer: StartTrimmer,
3015 buf: Vec<u8>,
3016}
3017
3018impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
3019 fn next(&mut self) -> Result<PayloadEvent<'static>, DeError> {
3020 loop {
3021 self.buf.clear();
3022
3023 let event = self.reader.read_event_into(&mut self.buf)?;
3024 if let Some(event) = self.start_trimmer.trim(event) {
3025 return Ok(event.into_owned());
3026 }
3027 }
3028 }
3029
3030 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
3031 match self.reader.read_to_end_into(name, &mut self.buf) {
3032 Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
3033 Err(e) => Err(e.into()),
3034 Ok(_) => Ok(()),
3035 }
3036 }
3037
3038 fn decoder(&self) -> Decoder {
3039 self.reader.decoder()
3040 }
3041}
3042
3043/// XML input source that reads from a slice of bytes and can borrow from it.
3044///
3045/// You cannot create it, it is created automatically when you call
3046/// [`Deserializer::from_str`].
3047pub struct SliceReader<'de> {
3048 reader: Reader<&'de [u8]>,
3049 start_trimmer: StartTrimmer,
3050}
3051
3052impl<'de> XmlRead<'de> for SliceReader<'de> {
3053 fn next(&mut self) -> Result<PayloadEvent<'de>, DeError> {
3054 loop {
3055 let event = self.reader.read_event()?;
3056 if let Some(event) = self.start_trimmer.trim(event) {
3057 return Ok(event);
3058 }
3059 }
3060 }
3061
3062 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
3063 match self.reader.read_to_end(name) {
3064 Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
3065 Err(e) => Err(e.into()),
3066 Ok(_) => Ok(()),
3067 }
3068 }
3069
3070 fn decoder(&self) -> Decoder {
3071 self.reader.decoder()
3072 }
3073}
3074
3075#[cfg(test)]
3076mod tests {
3077 use super::*;
3078 use pretty_assertions::assert_eq;
3079
3080 fn make_de<'de>(source: &'de str) -> Deserializer<'de, SliceReader<'de>> {
3081 dbg!(source);
3082 Deserializer::from_str(source)
3083 }
3084
3085 #[cfg(feature = "overlapped-lists")]
3086 mod skip {
3087 use super::*;
3088 use crate::de::DeEvent::*;
3089 use crate::events::BytesEnd;
3090 use pretty_assertions::assert_eq;
3091
3092 /// Checks that `peek()` and `read()` behaves correctly after `skip()`
3093 #[test]
3094 fn read_and_peek() {
3095 let mut de = make_de(
3096 r#"
3097 <root>
3098 <inner>
3099 text
3100 <inner/>
3101 </inner>
3102 <next/>
3103 <target/>
3104 </root>
3105 "#,
3106 );
3107
3108 // Initial conditions - both are empty
3109 assert_eq!(de.read, vec![]);
3110 assert_eq!(de.write, vec![]);
3111
3112 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3113 assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("inner")));
3114
3115 // Mark that start_replay() should begin replay from this point
3116 let checkpoint = de.skip_checkpoint();
3117 assert_eq!(checkpoint, 0);
3118
3119 // Should skip first <inner> tree
3120 de.skip().unwrap();
3121 assert_eq!(de.read, vec![]);
3122 assert_eq!(
3123 de.write,
3124 vec![
3125 Start(BytesStart::new("inner")),
3126 Text("text".into()),
3127 Start(BytesStart::new("inner")),
3128 End(BytesEnd::new("inner")),
3129 End(BytesEnd::new("inner")),
3130 ]
3131 );
3132
3133 // Consume <next/>. Now unconsumed XML looks like:
3134 //
3135 // <inner>
3136 // text
3137 // <inner/>
3138 // </inner>
3139 // <target/>
3140 // </root>
3141 assert_eq!(de.next().unwrap(), Start(BytesStart::new("next")));
3142 assert_eq!(de.next().unwrap(), End(BytesEnd::new("next")));
3143
3144 // We finish writing. Next call to `next()` should start replay that messages:
3145 //
3146 // <inner>
3147 // text
3148 // <inner/>
3149 // </inner>
3150 //
3151 // and after that stream that messages:
3152 //
3153 // <target/>
3154 // </root>
3155 de.start_replay(checkpoint);
3156 assert_eq!(
3157 de.read,
3158 vec![
3159 Start(BytesStart::new("inner")),
3160 Text("text".into()),
3161 Start(BytesStart::new("inner")),
3162 End(BytesEnd::new("inner")),
3163 End(BytesEnd::new("inner")),
3164 ]
3165 );
3166 assert_eq!(de.write, vec![]);
3167 assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3168
3169 // Mark that start_replay() should begin replay from this point
3170 let checkpoint = de.skip_checkpoint();
3171 assert_eq!(checkpoint, 0);
3172
3173 // Skip `$text` node and consume <inner/> after it
3174 de.skip().unwrap();
3175 assert_eq!(
3176 de.read,
3177 vec![
3178 Start(BytesStart::new("inner")),
3179 End(BytesEnd::new("inner")),
3180 End(BytesEnd::new("inner")),
3181 ]
3182 );
3183 assert_eq!(
3184 de.write,
3185 vec![
3186 // This comment here to keep the same formatting of both arrays
3187 // otherwise rustfmt suggest one-line it
3188 Text("text".into()),
3189 ]
3190 );
3191
3192 assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3193 assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3194
3195 // We finish writing. Next call to `next()` should start replay messages:
3196 //
3197 // text
3198 // </inner>
3199 //
3200 // and after that stream that messages:
3201 //
3202 // <target/>
3203 // </root>
3204 de.start_replay(checkpoint);
3205 assert_eq!(
3206 de.read,
3207 vec![
3208 // This comment here to keep the same formatting as others
3209 // otherwise rustfmt suggest one-line it
3210 Text("text".into()),
3211 End(BytesEnd::new("inner")),
3212 ]
3213 );
3214 assert_eq!(de.write, vec![]);
3215 assert_eq!(de.next().unwrap(), Text("text".into()));
3216 assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3217 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target")));
3218 assert_eq!(de.next().unwrap(), End(BytesEnd::new("target")));
3219 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3220 assert_eq!(de.next().unwrap(), Eof);
3221 }
3222
3223 /// Checks that `read_to_end()` behaves correctly after `skip()`
3224 #[test]
3225 fn read_to_end() {
3226 let mut de = make_de(
3227 r#"
3228 <root>
3229 <skip>
3230 text
3231 <skip/>
3232 </skip>
3233 <target>
3234 <target/>
3235 </target>
3236 </root>
3237 "#,
3238 );
3239
3240 // Initial conditions - both are empty
3241 assert_eq!(de.read, vec![]);
3242 assert_eq!(de.write, vec![]);
3243
3244 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3245
3246 // Mark that start_replay() should begin replay from this point
3247 let checkpoint = de.skip_checkpoint();
3248 assert_eq!(checkpoint, 0);
3249
3250 // Skip the <skip> tree
3251 de.skip().unwrap();
3252 assert_eq!(de.read, vec![]);
3253 assert_eq!(
3254 de.write,
3255 vec![
3256 Start(BytesStart::new("skip")),
3257 Text("text".into()),
3258 Start(BytesStart::new("skip")),
3259 End(BytesEnd::new("skip")),
3260 End(BytesEnd::new("skip")),
3261 ]
3262 );
3263
3264 // Drop all events that represents <target> tree. Now unconsumed XML looks like:
3265 //
3266 // <skip>
3267 // text
3268 // <skip/>
3269 // </skip>
3270 // </root>
3271 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target")));
3272 de.read_to_end(QName(b"target")).unwrap();
3273 assert_eq!(de.read, vec![]);
3274 assert_eq!(
3275 de.write,
3276 vec![
3277 Start(BytesStart::new("skip")),
3278 Text("text".into()),
3279 Start(BytesStart::new("skip")),
3280 End(BytesEnd::new("skip")),
3281 End(BytesEnd::new("skip")),
3282 ]
3283 );
3284
3285 // We finish writing. Next call to `next()` should start replay that messages:
3286 //
3287 // <skip>
3288 // text
3289 // <skip/>
3290 // </skip>
3291 //
3292 // and after that stream that messages:
3293 //
3294 // </root>
3295 de.start_replay(checkpoint);
3296 assert_eq!(
3297 de.read,
3298 vec![
3299 Start(BytesStart::new("skip")),
3300 Text("text".into()),
3301 Start(BytesStart::new("skip")),
3302 End(BytesEnd::new("skip")),
3303 End(BytesEnd::new("skip")),
3304 ]
3305 );
3306 assert_eq!(de.write, vec![]);
3307
3308 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skip")));
3309 de.read_to_end(QName(b"skip")).unwrap();
3310
3311 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3312 assert_eq!(de.next().unwrap(), Eof);
3313 }
3314
3315 /// Checks that replay replayes only part of events
3316 /// Test for https://github.com/tafia/quick-xml/issues/435
3317 #[test]
3318 fn partial_replay() {
3319 let mut de = make_de(
3320 r#"
3321 <root>
3322 <skipped-1/>
3323 <skipped-2/>
3324 <inner>
3325 <skipped-3/>
3326 <skipped-4/>
3327 <target-2/>
3328 </inner>
3329 <target-1/>
3330 </root>
3331 "#,
3332 );
3333
3334 // Initial conditions - both are empty
3335 assert_eq!(de.read, vec![]);
3336 assert_eq!(de.write, vec![]);
3337
3338 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3339
3340 // start_replay() should start replay from this point
3341 let checkpoint1 = de.skip_checkpoint();
3342 assert_eq!(checkpoint1, 0);
3343
3344 // Should skip first and second <skipped-N/> elements
3345 de.skip().unwrap(); // skipped-1
3346 de.skip().unwrap(); // skipped-2
3347 assert_eq!(de.read, vec![]);
3348 assert_eq!(
3349 de.write,
3350 vec![
3351 Start(BytesStart::new("skipped-1")),
3352 End(BytesEnd::new("skipped-1")),
3353 Start(BytesStart::new("skipped-2")),
3354 End(BytesEnd::new("skipped-2")),
3355 ]
3356 );
3357
3358 ////////////////////////////////////////////////////////////////////////////////////////
3359
3360 assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3361 assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("skipped-3")));
3362 assert_eq!(
3363 de.read,
3364 vec![
3365 // This comment here to keep the same formatting of both arrays
3366 // otherwise rustfmt suggest one-line it
3367 Start(BytesStart::new("skipped-3")),
3368 ]
3369 );
3370 assert_eq!(
3371 de.write,
3372 vec![
3373 Start(BytesStart::new("skipped-1")),
3374 End(BytesEnd::new("skipped-1")),
3375 Start(BytesStart::new("skipped-2")),
3376 End(BytesEnd::new("skipped-2")),
3377 ]
3378 );
3379
3380 // start_replay() should start replay from this point
3381 let checkpoint2 = de.skip_checkpoint();
3382 assert_eq!(checkpoint2, 4);
3383
3384 // Should skip third and forth <skipped-N/> elements
3385 de.skip().unwrap(); // skipped-3
3386 de.skip().unwrap(); // skipped-4
3387 assert_eq!(de.read, vec![]);
3388 assert_eq!(
3389 de.write,
3390 vec![
3391 // checkpoint 1
3392 Start(BytesStart::new("skipped-1")),
3393 End(BytesEnd::new("skipped-1")),
3394 Start(BytesStart::new("skipped-2")),
3395 End(BytesEnd::new("skipped-2")),
3396 // checkpoint 2
3397 Start(BytesStart::new("skipped-3")),
3398 End(BytesEnd::new("skipped-3")),
3399 Start(BytesStart::new("skipped-4")),
3400 End(BytesEnd::new("skipped-4")),
3401 ]
3402 );
3403 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-2")));
3404 assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-2")));
3405 assert_eq!(de.peek().unwrap(), &End(BytesEnd::new("inner")));
3406 assert_eq!(
3407 de.read,
3408 vec![
3409 // This comment here to keep the same formatting of both arrays
3410 // otherwise rustfmt suggest one-line it
3411 End(BytesEnd::new("inner")),
3412 ]
3413 );
3414 assert_eq!(
3415 de.write,
3416 vec![
3417 // checkpoint 1
3418 Start(BytesStart::new("skipped-1")),
3419 End(BytesEnd::new("skipped-1")),
3420 Start(BytesStart::new("skipped-2")),
3421 End(BytesEnd::new("skipped-2")),
3422 // checkpoint 2
3423 Start(BytesStart::new("skipped-3")),
3424 End(BytesEnd::new("skipped-3")),
3425 Start(BytesStart::new("skipped-4")),
3426 End(BytesEnd::new("skipped-4")),
3427 ]
3428 );
3429
3430 // Start replay events from checkpoint 2
3431 de.start_replay(checkpoint2);
3432 assert_eq!(
3433 de.read,
3434 vec![
3435 Start(BytesStart::new("skipped-3")),
3436 End(BytesEnd::new("skipped-3")),
3437 Start(BytesStart::new("skipped-4")),
3438 End(BytesEnd::new("skipped-4")),
3439 End(BytesEnd::new("inner")),
3440 ]
3441 );
3442 assert_eq!(
3443 de.write,
3444 vec![
3445 Start(BytesStart::new("skipped-1")),
3446 End(BytesEnd::new("skipped-1")),
3447 Start(BytesStart::new("skipped-2")),
3448 End(BytesEnd::new("skipped-2")),
3449 ]
3450 );
3451
3452 // Replayed events
3453 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-3")));
3454 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-3")));
3455 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-4")));
3456 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-4")));
3457
3458 assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3459 assert_eq!(de.read, vec![]);
3460 assert_eq!(
3461 de.write,
3462 vec![
3463 Start(BytesStart::new("skipped-1")),
3464 End(BytesEnd::new("skipped-1")),
3465 Start(BytesStart::new("skipped-2")),
3466 End(BytesEnd::new("skipped-2")),
3467 ]
3468 );
3469
3470 ////////////////////////////////////////////////////////////////////////////////////////
3471
3472 // New events
3473 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-1")));
3474 assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-1")));
3475
3476 assert_eq!(de.read, vec![]);
3477 assert_eq!(
3478 de.write,
3479 vec![
3480 Start(BytesStart::new("skipped-1")),
3481 End(BytesEnd::new("skipped-1")),
3482 Start(BytesStart::new("skipped-2")),
3483 End(BytesEnd::new("skipped-2")),
3484 ]
3485 );
3486
3487 // Start replay events from checkpoint 1
3488 de.start_replay(checkpoint1);
3489 assert_eq!(
3490 de.read,
3491 vec![
3492 Start(BytesStart::new("skipped-1")),
3493 End(BytesEnd::new("skipped-1")),
3494 Start(BytesStart::new("skipped-2")),
3495 End(BytesEnd::new("skipped-2")),
3496 ]
3497 );
3498 assert_eq!(de.write, vec![]);
3499
3500 // Replayed events
3501 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-1")));
3502 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-1")));
3503 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-2")));
3504 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-2")));
3505
3506 assert_eq!(de.read, vec![]);
3507 assert_eq!(de.write, vec![]);
3508
3509 // New events
3510 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3511 assert_eq!(de.next().unwrap(), Eof);
3512 }
3513
3514 /// Checks that limiting buffer size works correctly
3515 #[test]
3516 fn limit() {
3517 use serde::Deserialize;
3518
3519 #[derive(Debug, Deserialize)]
3520 #[allow(unused)]
3521 struct List {
3522 item: Vec<()>,
3523 }
3524
3525 let mut de = make_de(
3526 r#"
3527 <any-name>
3528 <item/>
3529 <another-item>
3530 <some-element>with text</some-element>
3531 <yet-another-element/>
3532 </another-item>
3533 <item/>
3534 <item/>
3535 </any-name>
3536 "#,
3537 );
3538 de.event_buffer_size(NonZeroUsize::new(3));
3539
3540 match List::deserialize(&mut de) {
3541 Err(DeError::TooManyEvents(count)) => assert_eq!(count.get(), 3),
3542 e => panic!("Expected `Err(TooManyEvents(3))`, but found {:?}", e),
3543 }
3544 }
3545
3546 /// Without handling Eof in `skip` this test failed with memory allocation
3547 #[test]
3548 fn invalid_xml() {
3549 use crate::de::DeEvent::*;
3550
3551 let mut de = make_de("<root>");
3552
3553 // Cache all events
3554 let checkpoint = de.skip_checkpoint();
3555 de.skip().unwrap();
3556 de.start_replay(checkpoint);
3557 assert_eq!(de.read, vec![Start(BytesStart::new("root")), Eof]);
3558 }
3559 }
3560
3561 mod read_to_end {
3562 use super::*;
3563 use crate::de::DeEvent::*;
3564 use pretty_assertions::assert_eq;
3565
3566 #[test]
3567 fn complex() {
3568 let mut de = make_de(
3569 r#"
3570 <root>
3571 <tag a="1"><tag>text</tag>content</tag>
3572 <tag a="2"><![CDATA[cdata content]]></tag>
3573 <self-closed/>
3574 </root>
3575 "#,
3576 );
3577
3578 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3579
3580 assert_eq!(
3581 de.next().unwrap(),
3582 Start(BytesStart::from_content(r#"tag a="1""#, 3))
3583 );
3584 assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());
3585
3586 assert_eq!(
3587 de.next().unwrap(),
3588 Start(BytesStart::from_content(r#"tag a="2""#, 3))
3589 );
3590 assert_eq!(de.next().unwrap(), Text("cdata content".into()));
3591 assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag")));
3592
3593 assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed")));
3594 assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());
3595
3596 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3597 assert_eq!(de.next().unwrap(), Eof);
3598 }
3599
3600 #[test]
3601 fn invalid_xml1() {
3602 let mut de = make_de("<tag><tag></tag>");
3603
3604 assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
3605 assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag")));
3606
3607 match de.read_to_end(QName(b"tag")) {
3608 Err(DeError::UnexpectedEof) => (),
3609 x => panic!("Expected `Err(UnexpectedEof)`, but found {:?}", x),
3610 }
3611 assert_eq!(de.next().unwrap(), Eof);
3612 }
3613
3614 #[test]
3615 fn invalid_xml2() {
3616 let mut de = make_de("<tag><![CDATA[]]><tag></tag>");
3617
3618 assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
3619 assert_eq!(de.peek().unwrap(), &Text("".into()));
3620
3621 match de.read_to_end(QName(b"tag")) {
3622 Err(DeError::UnexpectedEof) => (),
3623 x => panic!("Expected `Err(UnexpectedEof)`, but found {:?}", x),
3624 }
3625 assert_eq!(de.next().unwrap(), Eof);
3626 }
3627 }
3628
3629 #[test]
3630 fn borrowing_reader_parity() {
3631 let s = r#"
3632 <item name="hello" source="world.rs">Some text</item>
3633 <item2/>
3634 <item3 value="world" />
3635 "#;
3636
3637 let mut reader1 = IoReader {
3638 reader: Reader::from_reader(s.as_bytes()),
3639 start_trimmer: StartTrimmer::default(),
3640 buf: Vec::new(),
3641 };
3642 let mut reader2 = SliceReader {
3643 reader: Reader::from_str(s),
3644 start_trimmer: StartTrimmer::default(),
3645 };
3646
3647 loop {
3648 let event1 = reader1.next().unwrap();
3649 let event2 = reader2.next().unwrap();
3650
3651 if let (PayloadEvent::Eof, PayloadEvent::Eof) = (&event1, &event2) {
3652 break;
3653 }
3654
3655 assert_eq!(event1, event2);
3656 }
3657 }
3658
3659 #[test]
3660 fn borrowing_reader_events() {
3661 let s = r#"
3662 <item name="hello" source="world.rs">Some text</item>
3663 <item2></item2>
3664 <item3/>
3665 <item4 value="world" />
3666 "#;
3667
3668 let mut reader = SliceReader {
3669 reader: Reader::from_str(s),
3670 start_trimmer: StartTrimmer::default(),
3671 };
3672
3673 reader.reader.expand_empty_elements(true);
3674
3675 let mut events = Vec::new();
3676
3677 loop {
3678 let event = reader.next().unwrap();
3679 if let PayloadEvent::Eof = event {
3680 break;
3681 }
3682 events.push(event);
3683 }
3684
3685 use crate::de::PayloadEvent::*;
3686
3687 assert_eq!(
3688 events,
3689 vec![
3690 Start(BytesStart::from_content(
3691 r#"item name="hello" source="world.rs""#,
3692 4
3693 )),
3694 Text(BytesText::from_escaped("Some text")),
3695 End(BytesEnd::new("item")),
3696 Start(BytesStart::from_content("item2", 5)),
3697 End(BytesEnd::new("item2")),
3698 Start(BytesStart::from_content("item3", 5)),
3699 End(BytesEnd::new("item3")),
3700 Start(BytesStart::from_content(r#"item4 value="world" "#, 5)),
3701 End(BytesEnd::new("item4")),
3702 ]
3703 )
3704 }
3705
3706 /// Ensures, that [`Deserializer::read_string()`] never can get an `End` event,
3707 /// because parser reports error early
3708 #[test]
3709 fn read_string() {
3710 match from_str::<String>(r#"</root>"#) {
3711 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
3712 assert_eq!(expected, "");
3713 assert_eq!(found, "root");
3714 }
3715 x => panic!(
3716 r#"Expected `Err(InvalidXml(EndEventMismatch("", "root")))`, but found {:?}"#,
3717 x
3718 ),
3719 }
3720
3721 let s: String = from_str(r#"<root></root>"#).unwrap();
3722 assert_eq!(s, "");
3723
3724 match from_str::<String>(r#"<root></other>"#) {
3725 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
3726 assert_eq!(expected, "root");
3727 assert_eq!(found, "other");
3728 }
3729 x => panic!(
3730 r#"Expected `Err(InvalidXml(EndEventMismatch("root", "other")))`, but found {:?}"#,
3731 x
3732 ),
3733 }
3734 }
3735
3736 /// Tests for https://github.com/tafia/quick-xml/issues/474.
3737 ///
3738 /// That tests ensures that comments and processed instructions is ignored
3739 /// and can split one logical string in pieces.
3740 mod merge_text {
3741 use super::*;
3742 use pretty_assertions::assert_eq;
3743
3744 #[test]
3745 fn text() {
3746 let mut de = make_de("text");
3747 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
3748 }
3749
3750 #[test]
3751 fn cdata() {
3752 let mut de = make_de("<![CDATA[cdata]]>");
3753 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata".into()));
3754 }
3755
3756 #[test]
3757 fn text_and_cdata() {
3758 let mut de = make_de("text and <![CDATA[cdata]]>");
3759 assert_eq!(de.next().unwrap(), DeEvent::Text("text and cdata".into()));
3760 }
3761
3762 #[test]
3763 fn text_and_empty_cdata() {
3764 let mut de = make_de("text and <![CDATA[]]>");
3765 assert_eq!(de.next().unwrap(), DeEvent::Text("text and ".into()));
3766 }
3767
3768 #[test]
3769 fn cdata_and_text() {
3770 let mut de = make_de("<![CDATA[cdata]]> and text");
3771 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata and text".into()));
3772 }
3773
3774 #[test]
3775 fn empty_cdata_and_text() {
3776 let mut de = make_de("<![CDATA[]]> and text");
3777 assert_eq!(de.next().unwrap(), DeEvent::Text(" and text".into()));
3778 }
3779
3780 #[test]
3781 fn cdata_and_cdata() {
3782 let mut de = make_de(
3783 "\
3784 <![CDATA[cdata]]]]>\
3785 <![CDATA[>cdata]]>\
3786 ",
3787 );
3788 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3789 }
3790
3791 mod comment_between {
3792 use super::*;
3793 use pretty_assertions::assert_eq;
3794
3795 #[test]
3796 fn text() {
3797 let mut de = make_de(
3798 "\
3799 text \
3800 <!--comment 1--><!--comment 2--> \
3801 text\
3802 ",
3803 );
3804 assert_eq!(de.next().unwrap(), DeEvent::Text("text text".into()));
3805 }
3806
3807 #[test]
3808 fn cdata() {
3809 let mut de = make_de(
3810 "\
3811 <![CDATA[cdata]]]]>\
3812 <!--comment 1--><!--comment 2-->\
3813 <![CDATA[>cdata]]>\
3814 ",
3815 );
3816 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3817 }
3818
3819 #[test]
3820 fn text_and_cdata() {
3821 let mut de = make_de(
3822 "\
3823 text \
3824 <!--comment 1--><!--comment 2-->\
3825 <![CDATA[ cdata]]>\
3826 ",
3827 );
3828 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata".into()));
3829 }
3830
3831 #[test]
3832 fn text_and_empty_cdata() {
3833 let mut de = make_de(
3834 "\
3835 text \
3836 <!--comment 1--><!--comment 2-->\
3837 <![CDATA[]]>\
3838 ",
3839 );
3840 assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into()));
3841 }
3842
3843 #[test]
3844 fn cdata_and_text() {
3845 let mut de = make_de(
3846 "\
3847 <![CDATA[cdata ]]>\
3848 <!--comment 1--><!--comment 2--> \
3849 text \
3850 ",
3851 );
3852 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text".into()));
3853 }
3854
3855 #[test]
3856 fn empty_cdata_and_text() {
3857 let mut de = make_de(
3858 "\
3859 <![CDATA[]]>\
3860 <!--comment 1--><!--comment 2--> \
3861 text \
3862 ",
3863 );
3864 assert_eq!(de.next().unwrap(), DeEvent::Text(" text".into()));
3865 }
3866
3867 #[test]
3868 fn cdata_and_cdata() {
3869 let mut de = make_de(
3870 "\
3871 <![CDATA[cdata]]]>\
3872 <!--comment 1--><!--comment 2-->\
3873 <![CDATA[]>cdata]]>\
3874 ",
3875 );
3876 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3877 }
3878 }
3879
3880 mod pi_between {
3881 use super::*;
3882 use pretty_assertions::assert_eq;
3883
3884 #[test]
3885 fn text() {
3886 let mut de = make_de(
3887 "\
3888 text \
3889 <?pi 1?><?pi 2?> \
3890 text\
3891 ",
3892 );
3893 assert_eq!(de.next().unwrap(), DeEvent::Text("text text".into()));
3894 }
3895
3896 #[test]
3897 fn cdata() {
3898 let mut de = make_de(
3899 "\
3900 <![CDATA[cdata]]]]>\
3901 <?pi 1?><?pi 2?>\
3902 <![CDATA[>cdata]]>\
3903 ",
3904 );
3905 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3906 }
3907
3908 #[test]
3909 fn text_and_cdata() {
3910 let mut de = make_de(
3911 "\
3912 text \
3913 <?pi 1?><?pi 2?>\
3914 <![CDATA[ cdata]]>\
3915 ",
3916 );
3917 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata".into()));
3918 }
3919
3920 #[test]
3921 fn text_and_empty_cdata() {
3922 let mut de = make_de(
3923 "\
3924 text \
3925 <?pi 1?><?pi 2?>\
3926 <![CDATA[]]>\
3927 ",
3928 );
3929 assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into()));
3930 }
3931
3932 #[test]
3933 fn cdata_and_text() {
3934 let mut de = make_de(
3935 "\
3936 <![CDATA[cdata ]]>\
3937 <?pi 1?><?pi 2?> \
3938 text \
3939 ",
3940 );
3941 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text".into()));
3942 }
3943
3944 #[test]
3945 fn empty_cdata_and_text() {
3946 let mut de = make_de(
3947 "\
3948 <![CDATA[]]>\
3949 <?pi 1?><?pi 2?> \
3950 text \
3951 ",
3952 );
3953 assert_eq!(de.next().unwrap(), DeEvent::Text(" text".into()));
3954 }
3955
3956 #[test]
3957 fn cdata_and_cdata() {
3958 let mut de = make_de(
3959 "\
3960 <![CDATA[cdata]]]>\
3961 <?pi 1?><?pi 2?>\
3962 <![CDATA[]>cdata]]>\
3963 ",
3964 );
3965 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3966 }
3967 }
3968 }
3969
3970 /// Tests for https://github.com/tafia/quick-xml/issues/474.
3971 ///
3972 /// This tests ensures that any combination of payload data is processed
3973 /// as expected.
3974 mod triples {
3975 use super::*;
3976 use pretty_assertions::assert_eq;
3977
3978 mod start {
3979 use super::*;
3980
3981 /// <tag1><tag2>...
3982 mod start {
3983 use super::*;
3984 use pretty_assertions::assert_eq;
3985
3986 #[test]
3987 fn start() {
3988 let mut de = make_de("<tag1><tag2><tag3>");
3989 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
3990 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
3991 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag3")));
3992 assert_eq!(de.next().unwrap(), DeEvent::Eof);
3993 }
3994
3995 /// Not matching end tag will result to error
3996 #[test]
3997 fn end() {
3998 let mut de = make_de("<tag1><tag2></tag2>");
3999 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4000 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4001 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag2")));
4002 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4003 }
4004
4005 #[test]
4006 fn text() {
4007 let mut de = make_de("<tag1><tag2> text ");
4008 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4009 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4010 // Text is trimmed from both sides
4011 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4012 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4013 }
4014
4015 #[test]
4016 fn cdata() {
4017 let mut de = make_de("<tag1><tag2><![CDATA[ cdata ]]>");
4018 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4019 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4020 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4021 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4022 }
4023
4024 #[test]
4025 fn eof() {
4026 let mut de = make_de("<tag1><tag2>");
4027 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4028 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4029 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4030 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4031 }
4032 }
4033
4034 /// <tag></tag>...
4035 mod end {
4036 use super::*;
4037 use pretty_assertions::assert_eq;
4038
4039 #[test]
4040 fn start() {
4041 let mut de = make_de("<tag></tag><tag2>");
4042 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4043 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4044 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4045 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4046 }
4047
4048 #[test]
4049 fn end() {
4050 let mut de = make_de("<tag></tag></tag2>");
4051 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4052 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4053 match de.next() {
4054 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
4055 assert_eq!(expected, "");
4056 assert_eq!(found, "tag2");
4057 }
4058 x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag2' }})`, but got {:?}", x),
4059 }
4060 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4061 }
4062
4063 #[test]
4064 fn text() {
4065 let mut de = make_de("<tag></tag> text ");
4066 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4067 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4068 // Text is trimmed from both sides
4069 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4070 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4071 }
4072
4073 #[test]
4074 fn cdata() {
4075 let mut de = make_de("<tag></tag><![CDATA[ cdata ]]>");
4076 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4077 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4078 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4079 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4080 }
4081
4082 #[test]
4083 fn eof() {
4084 let mut de = make_de("<tag></tag>");
4085 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4086 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4087 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4088 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4089 }
4090 }
4091
4092 /// <tag> text ...
4093 mod text {
4094 use super::*;
4095 use pretty_assertions::assert_eq;
4096
4097 #[test]
4098 fn start() {
4099 let mut de = make_de("<tag> text <tag2>");
4100 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4101 // Text is trimmed from both sides
4102 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4103 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4104 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4105 }
4106
4107 #[test]
4108 fn end() {
4109 let mut de = make_de("<tag> text </tag>");
4110 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4111 // Text is trimmed from both sides
4112 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4113 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4114 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4115 }
4116
4117 // start::text::text has no difference from start::text
4118
4119 #[test]
4120 fn cdata() {
4121 let mut de = make_de("<tag> text <![CDATA[ cdata ]]>");
4122 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4123 // Text is trimmed from the start
4124 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
4125 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4126 }
4127
4128 #[test]
4129 fn eof() {
4130 let mut de = make_de("<tag> text ");
4131 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4132 // Text is trimmed from both sides
4133 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4134 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4135 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4136 }
4137 }
4138
4139 /// <tag><![CDATA[ cdata ]]>...
4140 mod cdata {
4141 use super::*;
4142 use pretty_assertions::assert_eq;
4143
4144 #[test]
4145 fn start() {
4146 let mut de = make_de("<tag><![CDATA[ cdata ]]><tag2>");
4147 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4148 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4149 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4150 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4151 }
4152
4153 #[test]
4154 fn end() {
4155 let mut de = make_de("<tag><![CDATA[ cdata ]]></tag>");
4156 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4157 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4158 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4159 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4160 }
4161
4162 #[test]
4163 fn text() {
4164 let mut de = make_de("<tag><![CDATA[ cdata ]]> text ");
4165 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4166 // Text is trimmed from the end
4167 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
4168 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4169 }
4170
4171 #[test]
4172 fn cdata() {
4173 let mut de = make_de("<tag><![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4174 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4175 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4176 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4177 }
4178
4179 #[test]
4180 fn eof() {
4181 let mut de = make_de("<tag><![CDATA[ cdata ]]>");
4182 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4183 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4184 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4185 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4186 }
4187 }
4188 }
4189
4190 /// Start from End event will always generate an error
4191 #[test]
4192 fn end() {
4193 let mut de = make_de("</tag>");
4194 match de.next() {
4195 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
4196 assert_eq!(expected, "");
4197 assert_eq!(found, "tag");
4198 }
4199 x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}", x),
4200 }
4201 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4202 }
4203
4204 mod text {
4205 use super::*;
4206 use pretty_assertions::assert_eq;
4207
4208 mod start {
4209 use super::*;
4210 use pretty_assertions::assert_eq;
4211
4212 #[test]
4213 fn start() {
4214 let mut de = make_de(" text <tag1><tag2>");
4215 // Text is trimmed from both sides
4216 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4217 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4218 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4219 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4220 }
4221
4222 /// Not matching end tag will result in error
4223 #[test]
4224 fn end() {
4225 let mut de = make_de(" text <tag></tag>");
4226 // Text is trimmed from both sides
4227 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4228 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4229 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4230 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4231 }
4232
4233 #[test]
4234 fn text() {
4235 let mut de = make_de(" text <tag> text2 ");
4236 // Text is trimmed from both sides
4237 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4238 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4239 // Text is trimmed from both sides
4240 assert_eq!(de.next().unwrap(), DeEvent::Text("text2".into()));
4241 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4242 }
4243
4244 #[test]
4245 fn cdata() {
4246 let mut de = make_de(" text <tag><![CDATA[ cdata ]]>");
4247 // Text is trimmed from both sides
4248 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4249 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4250 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4251 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4252 }
4253
4254 #[test]
4255 fn eof() {
4256 // Text is trimmed from both sides
4257 let mut de = make_de(" text <tag>");
4258 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4259 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4260 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4261 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4262 }
4263 }
4264
4265 /// End event without corresponding start event will always generate an error
4266 #[test]
4267 fn end() {
4268 let mut de = make_de(" text </tag>");
4269 // Text is trimmed from both sides
4270 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4271 match de.next() {
4272 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
4273 assert_eq!(expected, "");
4274 assert_eq!(found, "tag");
4275 }
4276 x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}", x),
4277 }
4278 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4279 }
4280
4281 // text::text::something is equivalent to text::something
4282
4283 mod cdata {
4284 use super::*;
4285 use pretty_assertions::assert_eq;
4286
4287 #[test]
4288 fn start() {
4289 let mut de = make_de(" text <![CDATA[ cdata ]]><tag>");
4290 // Text is trimmed from the start
4291 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
4292 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4293 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4294 }
4295
4296 #[test]
4297 fn end() {
4298 let mut de = make_de(" text <![CDATA[ cdata ]]></tag>");
4299 // Text is trimmed from the start
4300 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
4301 match de.next() {
4302 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
4303 assert_eq!(expected, "");
4304 assert_eq!(found, "tag");
4305 }
4306 x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}", x),
4307 }
4308 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4309 }
4310
4311 #[test]
4312 fn text() {
4313 let mut de = make_de(" text <![CDATA[ cdata ]]> text2 ");
4314 // Text is trimmed from the start and from the end
4315 assert_eq!(
4316 de.next().unwrap(),
4317 DeEvent::Text("text cdata text2".into())
4318 );
4319 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4320 }
4321
4322 #[test]
4323 fn cdata() {
4324 let mut de = make_de(" text <![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4325 // Text is trimmed from the start
4326 assert_eq!(
4327 de.next().unwrap(),
4328 DeEvent::Text("text cdata cdata2 ".into())
4329 );
4330 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4331 }
4332
4333 #[test]
4334 fn eof() {
4335 let mut de = make_de(" text <![CDATA[ cdata ]]>");
4336 // Text is trimmed from the start
4337 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
4338 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4339 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4340 }
4341 }
4342 }
4343
4344 mod cdata {
4345 use super::*;
4346 use pretty_assertions::assert_eq;
4347
4348 mod start {
4349 use super::*;
4350 use pretty_assertions::assert_eq;
4351
4352 #[test]
4353 fn start() {
4354 let mut de = make_de("<![CDATA[ cdata ]]><tag1><tag2>");
4355 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4356 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4357 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4358 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4359 }
4360
4361 /// Not matching end tag will result in error
4362 #[test]
4363 fn end() {
4364 let mut de = make_de("<![CDATA[ cdata ]]><tag></tag>");
4365 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4366 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4367 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4368 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4369 }
4370
4371 #[test]
4372 fn text() {
4373 let mut de = make_de("<![CDATA[ cdata ]]><tag> text ");
4374 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4375 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4376 // Text is trimmed from both sides
4377 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4378 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4379 }
4380
4381 #[test]
4382 fn cdata() {
4383 let mut de = make_de("<![CDATA[ cdata ]]><tag><![CDATA[ cdata2 ]]>");
4384 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4385 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4386 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata2 ".into()));
4387 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4388 }
4389
4390 #[test]
4391 fn eof() {
4392 let mut de = make_de("<![CDATA[ cdata ]]><tag>");
4393 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4394 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4395 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4396 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4397 }
4398 }
4399
4400 /// End event without corresponding start event will always generate an error
4401 #[test]
4402 fn end() {
4403 let mut de = make_de("<![CDATA[ cdata ]]></tag>");
4404 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4405 match de.next() {
4406 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
4407 assert_eq!(expected, "");
4408 assert_eq!(found, "tag");
4409 }
4410 x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}", x),
4411 }
4412 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4413 }
4414
4415 mod text {
4416 use super::*;
4417 use pretty_assertions::assert_eq;
4418
4419 #[test]
4420 fn start() {
4421 let mut de = make_de("<![CDATA[ cdata ]]> text <tag>");
4422 // Text is trimmed from the end
4423 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
4424 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4425 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4426 }
4427
4428 #[test]
4429 fn end() {
4430 let mut de = make_de("<![CDATA[ cdata ]]> text </tag>");
4431 // Text is trimmed from the end
4432 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
4433 match de.next() {
4434 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
4435 assert_eq!(expected, "");
4436 assert_eq!(found, "tag");
4437 }
4438 x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}", x),
4439 }
4440 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4441 }
4442
4443 // cdata::text::text is equivalent to cdata::text
4444
4445 #[test]
4446 fn cdata() {
4447 let mut de = make_de("<![CDATA[ cdata ]]> text <![CDATA[ cdata2 ]]>");
4448 assert_eq!(
4449 de.next().unwrap(),
4450 DeEvent::Text(" cdata text cdata2 ".into())
4451 );
4452 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4453 }
4454
4455 #[test]
4456 fn eof() {
4457 let mut de = make_de("<![CDATA[ cdata ]]> text ");
4458 // Text is trimmed from the end
4459 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
4460 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4461 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4462 }
4463 }
4464
4465 mod cdata {
4466 use super::*;
4467 use pretty_assertions::assert_eq;
4468
4469 #[test]
4470 fn start() {
4471 let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><tag>");
4472 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4473 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4474 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4475 }
4476
4477 #[test]
4478 fn end() {
4479 let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]></tag>");
4480 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4481 match de.next() {
4482 Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
4483 assert_eq!(expected, "");
4484 assert_eq!(found, "tag");
4485 }
4486 x => panic!("Expected `InvalidXml(EndEventMismatch {{ expected = '', found = 'tag' }})`, but got {:?}", x),
4487 }
4488 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4489 }
4490
4491 #[test]
4492 fn text() {
4493 let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]> text ");
4494 // Text is trimmed from the end
4495 assert_eq!(
4496 de.next().unwrap(),
4497 DeEvent::Text(" cdata cdata2 text".into())
4498 );
4499 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4500 }
4501
4502 #[test]
4503 fn cdata() {
4504 let mut de =
4505 make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><![CDATA[ cdata3 ]]>");
4506 assert_eq!(
4507 de.next().unwrap(),
4508 DeEvent::Text(" cdata cdata2 cdata3 ".into())
4509 );
4510 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4511 }
4512
4513 #[test]
4514 fn eof() {
4515 let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4516 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4517 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4518 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4519 }
4520 }
4521 }
4522 }
4523}