quick_xml/de/map.rs
1//! Serde `Deserializer` module
2
3use crate::{
4 de::key::QNameDeserializer,
5 de::resolver::EntityResolver,
6 de::simple_type::SimpleTypeDeserializer,
7 de::text::TextDeserializer,
8 de::{str2bool, DeEvent, Deserializer, XmlRead, TEXT_KEY, VALUE_KEY},
9 encoding::Decoder,
10 errors::serialize::DeError,
11 events::attributes::IterState,
12 events::BytesStart,
13 name::QName,
14};
15use serde::de::value::BorrowedStrDeserializer;
16use serde::de::{self, DeserializeSeed, Deserializer as _, MapAccess, SeqAccess, Visitor};
17use serde::serde_if_integer128;
18use std::borrow::Cow;
19use std::ops::Range;
20
21/// Defines a source that should be used to deserialize a value in the next call
22/// to [`next_value_seed()`](MapAccess::next_value_seed)
23#[derive(Debug, PartialEq)]
24enum ValueSource {
25 /// Source are not specified, because [`next_key_seed()`] not yet called.
26 /// This is an initial state and state after deserializing value
27 /// (after call of [`next_value_seed()`]).
28 ///
29 /// Attempt to call [`next_value_seed()`] while accessor in this state would
30 /// return a [`DeError::KeyNotRead`] error.
31 ///
32 /// [`next_key_seed()`]: MapAccess::next_key_seed
33 /// [`next_value_seed()`]: MapAccess::next_value_seed
34 Unknown,
35 /// Next value should be deserialized from an attribute value; value is located
36 /// at specified span.
37 Attribute(Range<usize>),
38 /// Value should be deserialized from the text content of the XML node, which
39 /// represented or by an ordinary text node, or by a CDATA node:
40 ///
41 /// ```xml
42 /// <any-tag>
43 /// <key>text content</key>
44 /// <!-- ^^^^^^^^^^^^ - this will be used to deserialize map value -->
45 /// </any-tag>
46 /// ```
47 /// ```xml
48 /// <any-tag>
49 /// <key><![CDATA[cdata content]]></key>
50 /// <!-- ^^^^^^^^^^^^^ - this will be used to deserialize a map value -->
51 /// </any-tag>
52 /// ```
53 Text,
54 /// Next value should be deserialized from an element with an any name, except
55 /// elements with a name matching one of the struct fields. Corresponding tag
56 /// name will always be associated with a field with name [`VALUE_KEY`].
57 ///
58 /// That state is set when call to [`peek()`] returns a [`Start`] event, which
59 /// [`name()`] is not listed in the [list of known fields] (which for a struct
60 /// is a list of field names, and for a map that is an empty list), _and_
61 /// struct has a field with a special name [`VALUE_KEY`].
62 ///
63 /// When in this state, next event, returned by [`next()`], will be a [`Start`],
64 /// which represents both a key, and a value. Value would be deserialized from
65 /// the whole element and how is will be done determined by the value deserializer.
66 /// The [`ElementMapAccess`] do not consume any events in that state.
67 ///
68 /// Because in that state any encountered `<tag>` is mapped to the [`VALUE_KEY`]
69 /// field, it is possible to use tag name as an enum discriminator, so `enum`s
70 /// can be deserialized from that XMLs:
71 ///
72 /// ```xml
73 /// <any-tag>
74 /// <variant1>...</variant1>
75 /// <!-- ~~~~~~~~ - this data will determine that this is Enum::variant1 -->
76 /// <!--^^^^^^^^^^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
77 /// </any-tag>
78 /// ```
79 /// ```xml
80 /// <any-tag>
81 /// <variant2>...</variant2>
82 /// <!-- ~~~~~~~~ - this data will determine that this is Enum::variant2 -->
83 /// <!--^^^^^^^^^^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
84 /// </any-tag>
85 /// ```
86 ///
87 /// both can be deserialized into
88 ///
89 /// ```ignore
90 /// enum Enum {
91 /// variant1,
92 /// variant2,
93 /// }
94 /// struct AnyName {
95 /// #[serde(rename = "$value")]
96 /// field: Enum,
97 /// }
98 /// ```
99 ///
100 /// That is possible, because value deserializer have access to the full content
101 /// of a `<variant1>...</variant1>` or `<variant2>...</variant2>` node, including
102 /// the tag name.
103 ///
104 /// [`Start`]: DeEvent::Start
105 /// [`peek()`]: Deserializer::peek()
106 /// [`next()`]: Deserializer::next()
107 /// [`name()`]: BytesStart::name()
108 /// [`Text`]: Self::Text
109 /// [list of known fields]: ElementMapAccess::fields
110 Content,
111 /// Next value should be deserialized from an element with a dedicated name.
112 /// If deserialized type is a sequence, then that sequence will collect all
113 /// elements with the same name until it will be filled. If not all elements
114 /// would be consumed, the rest will be ignored.
115 ///
116 /// That state is set when call to [`peek()`] returns a [`Start`] event, which
117 /// [`name()`] represents a field name. That name will be deserialized as a key.
118 ///
119 /// When in this state, next event, returned by [`next()`], will be a [`Start`],
120 /// which represents both a key, and a value. Value would be deserialized from
121 /// the whole element and how is will be done determined by the value deserializer.
122 /// The [`ElementMapAccess`] do not consume any events in that state.
123 ///
124 /// An illustration below shows, what data is used to deserialize key and value:
125 /// ```xml
126 /// <any-tag>
127 /// <key>...</key>
128 /// <!-- ~~~ - this data will be used to deserialize a map key -->
129 /// <!--^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
130 /// </any-tag>
131 /// ```
132 ///
133 /// Although value deserializer will have access to the full content of a `<key>`
134 /// node (including the tag name), it will not get much benefits from that,
135 /// because tag name will always be fixed for a given map field (equal to a
136 /// field name). So, if the field type is an `enum`, it cannot select its
137 /// variant based on the tag name. If that is needed, then [`Content`] variant
138 /// of this enum should be used. Such usage is enabled by annotating a struct
139 /// field as "content" field, which implemented as given the field a special
140 /// [`VALUE_KEY`] name.
141 ///
142 /// [`Start`]: DeEvent::Start
143 /// [`peek()`]: Deserializer::peek()
144 /// [`next()`]: Deserializer::next()
145 /// [`name()`]: BytesStart::name()
146 /// [`Content`]: Self::Content
147 Nested,
148}
149
150////////////////////////////////////////////////////////////////////////////////////////////////////
151
152/// A deserializer that extracts map-like structures from an XML. This deserializer
153/// represents a one XML tag:
154///
155/// ```xml
156/// <tag>...</tag>
157/// ```
158///
159/// Name of this tag is stored in a [`Self::start`] property.
160///
161/// # Lifetimes
162///
163/// - `'de` lifetime represents a buffer, from which deserialized values can
164/// borrow their data. Depending on the underlying reader, there can be an
165/// internal buffer of deserializer (i.e. deserializer itself) or an input
166/// (in that case it is possible to approach zero-copy deserialization).
167///
168/// - `'d` lifetime represents a parent deserializer, which could own the data
169/// buffer.
170pub(crate) struct ElementMapAccess<'de, 'd, R, E>
171where
172 R: XmlRead<'de>,
173 E: EntityResolver,
174{
175 /// Tag -- owner of attributes
176 start: BytesStart<'de>,
177 de: &'d mut Deserializer<'de, R, E>,
178 /// State of the iterator over attributes. Contains the next position in the
179 /// inner `start` slice, from which next attribute should be parsed.
180 iter: IterState,
181 /// Current state of the accessor that determines what next call to API
182 /// methods should return.
183 source: ValueSource,
184 /// List of field names of the struct. It is empty for maps
185 fields: &'static [&'static str],
186 /// If `true`, then the deserialized struct has a field with a special name:
187 /// [`VALUE_KEY`]. That field should be deserialized from the whole content
188 /// of an XML node, including tag name:
189 ///
190 /// ```xml
191 /// <tag>value for VALUE_KEY field<tag>
192 /// ```
193 has_value_field: bool,
194}
195
196impl<'de, 'd, R, E> ElementMapAccess<'de, 'd, R, E>
197where
198 R: XmlRead<'de>,
199 E: EntityResolver,
200{
201 /// Create a new ElementMapAccess
202 pub fn new(
203 de: &'d mut Deserializer<'de, R, E>,
204 start: BytesStart<'de>,
205 fields: &'static [&'static str],
206 ) -> Result<Self, DeError> {
207 Ok(Self {
208 de,
209 iter: IterState::new(start.name().as_ref().len(), false),
210 start,
211 source: ValueSource::Unknown,
212 fields,
213 has_value_field: fields.contains(&VALUE_KEY),
214 })
215 }
216}
217
218impl<'de, 'd, R, E> MapAccess<'de> for ElementMapAccess<'de, 'd, R, E>
219where
220 R: XmlRead<'de>,
221 E: EntityResolver,
222{
223 type Error = DeError;
224
225 fn next_key_seed<K: DeserializeSeed<'de>>(
226 &mut self,
227 seed: K,
228 ) -> Result<Option<K::Value>, Self::Error> {
229 debug_assert_eq!(self.source, ValueSource::Unknown);
230
231 // FIXME: There error positions counted from the start of tag name - need global position
232 let slice = &self.start.buf;
233 let decoder = self.de.reader.decoder();
234
235 if let Some(a) = self.iter.next(slice).transpose()? {
236 // try getting map from attributes (key= "value")
237 let (key, value) = a.into();
238 self.source = ValueSource::Attribute(value.unwrap_or_default());
239
240 let de = QNameDeserializer::from_attr(QName(&slice[key]), decoder)?;
241 seed.deserialize(de).map(Some)
242 } else {
243 // try getting from events (<key>value</key>)
244 match self.de.peek()? {
245 // We shouldn't have both `$value` and `$text` fields in the same
246 // struct, so if we have `$value` field, the we should deserialize
247 // text content to `$value`
248 DeEvent::Text(_) if self.has_value_field => {
249 self.source = ValueSource::Content;
250 // Deserialize `key` from special attribute name which means
251 // that value should be taken from the text content of the
252 // XML node
253 let de = BorrowedStrDeserializer::<DeError>::new(VALUE_KEY);
254 seed.deserialize(de).map(Some)
255 }
256 DeEvent::Text(_) => {
257 self.source = ValueSource::Text;
258 // Deserialize `key` from special attribute name which means
259 // that value should be taken from the text content of the
260 // XML node
261 let de = BorrowedStrDeserializer::<DeError>::new(TEXT_KEY);
262 seed.deserialize(de).map(Some)
263 }
264 // Used to deserialize collections of enums, like:
265 // <root>
266 // <A/>
267 // <B/>
268 // <C/>
269 // </root>
270 //
271 // into
272 //
273 // enum Enum { A, B, С }
274 // struct Root {
275 // #[serde(rename = "$value")]
276 // items: Vec<Enum>,
277 // }
278 // TODO: This should be handled by #[serde(flatten)]
279 // See https://github.com/serde-rs/serde/issues/1905
280 DeEvent::Start(e) if self.has_value_field && not_in(self.fields, e, decoder)? => {
281 self.source = ValueSource::Content;
282
283 let de = BorrowedStrDeserializer::<DeError>::new(VALUE_KEY);
284 seed.deserialize(de).map(Some)
285 }
286 DeEvent::Start(e) => {
287 self.source = ValueSource::Nested;
288
289 let de = QNameDeserializer::from_elem(e.raw_name(), decoder)?;
290 seed.deserialize(de).map(Some)
291 }
292 // Stop iteration after reaching a closing tag
293 // The matching tag name is guaranteed by the reader if our
294 // deserializer implementation is correct
295 DeEvent::End(e) => {
296 debug_assert_eq!(self.start.name(), e.name());
297 // Consume End
298 self.de.next()?;
299 Ok(None)
300 }
301 // We cannot get `Eof` legally, because we always inside of the
302 // opened tag `self.start`
303 DeEvent::Eof => Err(DeError::UnexpectedEof),
304 }
305 }
306 }
307
308 fn next_value_seed<K: DeserializeSeed<'de>>(
309 &mut self,
310 seed: K,
311 ) -> Result<K::Value, Self::Error> {
312 match std::mem::replace(&mut self.source, ValueSource::Unknown) {
313 ValueSource::Attribute(value) => seed.deserialize(SimpleTypeDeserializer::from_part(
314 &self.start.buf,
315 value,
316 true,
317 self.de.reader.decoder(),
318 )),
319 // This arm processes the following XML shape:
320 // <any-tag>
321 // text value
322 // </any-tag>
323 // The whole map represented by an `<any-tag>` element, the map key
324 // is implicit and equals to the `TEXT_KEY` constant, and the value
325 // is a `Text` event (the value deserializer will see that event)
326 // This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs
327 ValueSource::Text => match self.de.next()? {
328 DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)),
329 // SAFETY: We set `Text` only when we seen `Text`
330 _ => unreachable!(),
331 },
332 // This arm processes the following XML shape:
333 // <any-tag>
334 // <any>...</any>
335 // </any-tag>
336 // The whole map represented by an `<any-tag>` element, the map key
337 // is implicit and equals to the `VALUE_KEY` constant, and the value
338 // is a `Start` event (the value deserializer will see that event)
339 ValueSource::Content => seed.deserialize(MapValueDeserializer {
340 map: self,
341 fixed_name: false,
342 }),
343 // This arm processes the following XML shape:
344 // <any-tag>
345 // <tag>...</tag>
346 // </any-tag>
347 // The whole map represented by an `<any-tag>` element, the map key
348 // is a `tag`, and the value is a `Start` event (the value deserializer
349 // will see that event)
350 ValueSource::Nested => seed.deserialize(MapValueDeserializer {
351 map: self,
352 fixed_name: true,
353 }),
354 ValueSource::Unknown => Err(DeError::KeyNotRead),
355 }
356 }
357}
358
359////////////////////////////////////////////////////////////////////////////////////////////////////
360
361/// A deserializer for a value of map or struct. That deserializer slightly
362/// differently processes events for a primitive types and sequences than
363/// a [`Deserializer`].
364///
365/// This deserializer used to deserialize two kinds of fields:
366/// - usual fields with a dedicated name, such as `field_one` or `field_two`, in
367/// that case field [`Self::fixed_name`] is `true`;
368/// - the special `$value` field which represents any tag or a textual content
369/// in the XML which would be found in the document, in that case field
370/// [`Self::fixed_name`] is `false`.
371///
372/// This deserializer can see two kind of events at the start:
373/// - [`DeEvent::Text`]
374/// - [`DeEvent::Start`]
375///
376/// which represents two possible variants of items:
377/// ```xml
378/// <item>A tag item</item>
379/// A text item
380/// <yet another="tag item"/>
381/// ```
382///
383/// This deserializer are very similar to a [`ElementDeserializer`]. The only difference
384/// in the `deserialize_seq` method. This deserializer will act as an iterator
385/// over tags / text within it's parent tag, whereas the [`ElementDeserializer`]
386/// will represent sequences as an `xs:list`.
387///
388/// This deserializer processes items as following:
389/// - primitives (numbers, booleans, strings, characters) are deserialized either
390/// from a text content, or unwrapped from a one level of a tag. So, `123` and
391/// `<int>123</int>` both can be deserialized into an `u32`;
392/// - `Option`:
393/// - empty text of [`DeEvent::Text`] is deserialized as `None`;
394/// - everything else are deserialized as `Some` using the same deserializer,
395/// including `<tag/>` or `<tag></tag>`;
396/// - units (`()`) and unit structs consumes the whole text or element subtree;
397/// - newtype structs are deserialized by forwarding deserialization of inner type
398/// with the same deserializer;
399/// - sequences, tuples and tuple structs are deserialized by iterating within the
400/// parent tag and deserializing each tag or text content using [`ElementDeserializer`];
401/// - structs and maps are deserialized using new instance of [`ElementMapAccess`];
402/// - enums:
403/// - in case of [`DeEvent::Text`] event the text content is deserialized as
404/// a `$text` variant. Enum content is deserialized from the text using
405/// [`SimpleTypeDeserializer`];
406/// - in case of [`DeEvent::Start`] event the tag name is deserialized as
407/// an enum tag, and the content inside are deserialized as an enum content.
408/// Depending on a variant kind deserialization is performed as:
409/// - unit variants: consuming text content or a subtree;
410/// - newtype variants: forward deserialization to the inner type using
411/// this deserializer;
412/// - tuple variants: call [`deserialize_tuple`] of this deserializer;
413/// - struct variants: call [`deserialize_struct`] of this deserializer.
414///
415/// [`deserialize_tuple`]: #method.deserialize_tuple
416/// [`deserialize_struct`]: #method.deserialize_struct
417struct MapValueDeserializer<'de, 'd, 'm, R, E>
418where
419 R: XmlRead<'de>,
420 E: EntityResolver,
421{
422 /// Access to the map that created this deserializer. Gives access to the
423 /// context, such as list of fields, that current map known about.
424 map: &'m mut ElementMapAccess<'de, 'd, R, E>,
425 /// Whether this deserializer was created for deserialization from an element
426 /// with fixed name, or the elements with different names or even text are allowed.
427 ///
428 /// If this field is `true`, we process `<tag>` element in the following XML shape:
429 ///
430 /// ```xml
431 /// <any-tag>
432 /// <tag>...</tag>
433 /// </any-tag>
434 /// ```
435 ///
436 /// The whole map represented by an `<any-tag>` element, the map key is a `tag`,
437 /// and the value starts with is a `Start("tag")` (the value deserializer will
438 /// see that event first) and extended to the matching `End("tag")` event.
439 /// In order to deserialize primitives (such as `usize`) we need to allow to
440 /// look inside the one levels of tags, so the
441 ///
442 /// ```xml
443 /// <tag>42<tag>
444 /// ```
445 ///
446 /// could be deserialized into `42usize` without problems, and at the same time
447 ///
448 /// ```xml
449 /// <tag>
450 /// <key1/>
451 /// <key2/>
452 /// <!--...-->
453 /// <tag>
454 /// ```
455 /// could be deserialized to a struct.
456 ///
457 /// If this field is `false`, we processes the one of following XML shapes:
458 ///
459 /// ```xml
460 /// <any-tag>
461 /// text value
462 /// </any-tag>
463 /// ```
464 /// ```xml
465 /// <any-tag>
466 /// <![CDATA[cdata value]]>
467 /// </any-tag>
468 /// ```
469 /// ```xml
470 /// <any-tag>
471 /// <any>...</any>
472 /// </any-tag>
473 /// ```
474 ///
475 /// The whole map represented by an `<any-tag>` element, the map key is
476 /// implicit and equals to the [`VALUE_KEY`] constant, and the value is
477 /// a [`Text`], or a [`Start`] event (the value deserializer will see one of
478 /// those events). In the first two cases the value of this field do not matter
479 /// (because we already see the textual event and there no reasons to look
480 /// "inside" something), but in the last case the primitives should raise
481 /// a deserialization error, because that means that you trying to deserialize
482 /// the following struct:
483 ///
484 /// ```ignore
485 /// struct AnyName {
486 /// #[serde(rename = "$value")]
487 /// any_name: String,
488 /// }
489 /// ```
490 /// which means that `any_name` should get a content of the `<any-tag>` element.
491 ///
492 /// Changing this can be valuable for <https://github.com/tafia/quick-xml/issues/383>,
493 /// but those fields should be explicitly marked that they want to get any
494 /// possible markup as a `String` and that mark is different from marking them
495 /// as accepting "text content" which the currently `$text` means.
496 ///
497 /// [`Text`]: DeEvent::Text
498 /// [`Start`]: DeEvent::Start
499 fixed_name: bool,
500}
501
502impl<'de, 'd, 'm, R, E> MapValueDeserializer<'de, 'd, 'm, R, E>
503where
504 R: XmlRead<'de>,
505 E: EntityResolver,
506{
507 /// Returns a next string as concatenated content of consequent [`Text`] and
508 /// [`CData`] events, used inside [`deserialize_primitives!()`].
509 ///
510 /// [`Text`]: crate::events::Event::Text
511 /// [`CData`]: crate::events::Event::CData
512 #[inline]
513 fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
514 // TODO: Read the whole content to fix https://github.com/tafia/quick-xml/issues/483
515 self.map.de.read_string_impl(self.fixed_name)
516 }
517}
518
519impl<'de, 'd, 'm, R, E> de::Deserializer<'de> for MapValueDeserializer<'de, 'd, 'm, R, E>
520where
521 R: XmlRead<'de>,
522 E: EntityResolver,
523{
524 type Error = DeError;
525
526 deserialize_primitives!(mut);
527
528 #[inline]
529 fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
530 where
531 V: Visitor<'de>,
532 {
533 self.map.de.deserialize_unit(visitor)
534 }
535
536 fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
537 where
538 V: Visitor<'de>,
539 {
540 match self.map.de.peek()? {
541 DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
542 _ => visitor.visit_some(self),
543 }
544 }
545
546 /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
547 /// with the same deserializer.
548 fn deserialize_newtype_struct<V>(
549 self,
550 _name: &'static str,
551 visitor: V,
552 ) -> Result<V::Value, Self::Error>
553 where
554 V: Visitor<'de>,
555 {
556 visitor.visit_newtype_struct(self)
557 }
558
559 /// Deserializes each `<tag>` in
560 /// ```xml
561 /// <any-tag>
562 /// <tag>...</tag>
563 /// <tag>...</tag>
564 /// <tag>...</tag>
565 /// </any-tag>
566 /// ```
567 /// as a sequence item, where `<any-tag>` represents a Map in a [`Self::map`],
568 /// and a `<tag>` is a sequential field of that map.
569 fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error>
570 where
571 V: Visitor<'de>,
572 {
573 let filter = if self.fixed_name {
574 match self.map.de.peek()? {
575 // Clone is cheap if event borrows from the input
576 DeEvent::Start(e) => TagFilter::Include(e.clone()),
577 // SAFETY: we use that deserializer with `fixed_name == true`
578 // only from the `ElementMapAccess::next_value_seed` and only when we
579 // peeked `Start` event
580 _ => unreachable!(),
581 }
582 } else {
583 TagFilter::Exclude(self.map.fields)
584 };
585 visitor.visit_seq(MapValueSeqAccess {
586 #[cfg(feature = "overlapped-lists")]
587 checkpoint: self.map.de.skip_checkpoint(),
588
589 map: self.map,
590 filter,
591 })
592 }
593
594 #[inline]
595 fn deserialize_struct<V>(
596 self,
597 name: &'static str,
598 fields: &'static [&'static str],
599 visitor: V,
600 ) -> Result<V::Value, Self::Error>
601 where
602 V: Visitor<'de>,
603 {
604 self.map.de.deserialize_struct(name, fields, visitor)
605 }
606
607 fn deserialize_enum<V>(
608 self,
609 _name: &'static str,
610 _variants: &'static [&'static str],
611 visitor: V,
612 ) -> Result<V::Value, Self::Error>
613 where
614 V: Visitor<'de>,
615 {
616 if self.fixed_name {
617 match self.map.de.next()? {
618 // Handles <field>UnitEnumVariant</field>
619 DeEvent::Start(_) => {
620 // skip <field>, read text after it and ensure that it is ended by </field>
621 let text = self.map.de.read_text()?;
622 if text.is_empty() {
623 // Map empty text (<field/>) to a special `$text` variant
624 visitor.visit_enum(SimpleTypeDeserializer::from_text(TEXT_KEY.into()))
625 } else {
626 visitor.visit_enum(SimpleTypeDeserializer::from_text(text))
627 }
628 }
629 // SAFETY: we use that deserializer with `fixed_name == true`
630 // only from the `MapAccess::next_value_seed` and only when we
631 // peeked `Start` event
632 _ => unreachable!(),
633 }
634 } else {
635 visitor.visit_enum(self)
636 }
637 }
638
639 fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
640 where
641 V: Visitor<'de>,
642 {
643 match self.map.de.peek()? {
644 DeEvent::Text(_) => self.deserialize_str(visitor),
645 _ => self.deserialize_map(visitor),
646 }
647 }
648}
649
650impl<'de, 'd, 'm, R, E> de::EnumAccess<'de> for MapValueDeserializer<'de, 'd, 'm, R, E>
651where
652 R: XmlRead<'de>,
653 E: EntityResolver,
654{
655 type Error = DeError;
656 type Variant = MapValueVariantAccess<'de, 'd, 'm, R, E>;
657
658 fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
659 where
660 V: DeserializeSeed<'de>,
661 {
662 let decoder = self.map.de.reader.decoder();
663 let (name, is_text) = match self.map.de.peek()? {
664 DeEvent::Start(e) => (
665 seed.deserialize(QNameDeserializer::from_elem(e.raw_name(), decoder)?)?,
666 false,
667 ),
668 DeEvent::Text(_) => (
669 seed.deserialize(BorrowedStrDeserializer::<DeError>::new(TEXT_KEY))?,
670 true,
671 ),
672 DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().into_inner().to_vec())),
673 DeEvent::Eof => return Err(DeError::UnexpectedEof),
674 };
675 Ok((
676 name,
677 MapValueVariantAccess {
678 map: self.map,
679 is_text,
680 },
681 ))
682 }
683}
684
685struct MapValueVariantAccess<'de, 'd, 'm, R, E>
686where
687 R: XmlRead<'de>,
688 E: EntityResolver,
689{
690 /// Access to the map that created this enum accessor. Gives access to the
691 /// context, such as list of fields, that current map known about.
692 map: &'m mut ElementMapAccess<'de, 'd, R, E>,
693 /// `true` if variant should be deserialized from a textual content
694 /// and `false` if from tag
695 is_text: bool,
696}
697
698impl<'de, 'd, 'm, R, E> de::VariantAccess<'de> for MapValueVariantAccess<'de, 'd, 'm, R, E>
699where
700 R: XmlRead<'de>,
701 E: EntityResolver,
702{
703 type Error = DeError;
704
705 fn unit_variant(self) -> Result<(), Self::Error> {
706 match self.map.de.next()? {
707 // Consume subtree
708 DeEvent::Start(e) => self.map.de.read_to_end(e.name()),
709 // Does not needed to deserialize using SimpleTypeDeserializer, because
710 // it returns `()` when `deserialize_unit()` is requested
711 DeEvent::Text(_) => Ok(()),
712 // SAFETY: the other events are filtered in `variant_seed()`
713 _ => unreachable!("Only `Start` or `Text` events are possible here"),
714 }
715 }
716
717 fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
718 where
719 T: DeserializeSeed<'de>,
720 {
721 if self.is_text {
722 match self.map.de.next()? {
723 DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)),
724 // SAFETY: the other events are filtered in `variant_seed()`
725 _ => unreachable!("Only `Text` events are possible here"),
726 }
727 } else {
728 seed.deserialize(MapValueDeserializer {
729 map: self.map,
730 // Because element name already was either mapped to a field name,
731 // or to a variant name, we should not treat it as variable
732 fixed_name: true,
733 })
734 }
735 }
736
737 fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
738 where
739 V: Visitor<'de>,
740 {
741 if self.is_text {
742 match self.map.de.next()? {
743 DeEvent::Text(e) => {
744 SimpleTypeDeserializer::from_text_content(e).deserialize_tuple(len, visitor)
745 }
746 // SAFETY: the other events are filtered in `variant_seed()`
747 _ => unreachable!("Only `Text` events are possible here"),
748 }
749 } else {
750 MapValueDeserializer {
751 map: self.map,
752 // Because element name already was either mapped to a field name,
753 // or to a variant name, we should not treat it as variable
754 fixed_name: true,
755 }
756 .deserialize_tuple(len, visitor)
757 }
758 }
759
760 fn struct_variant<V>(
761 self,
762 fields: &'static [&'static str],
763 visitor: V,
764 ) -> Result<V::Value, Self::Error>
765 where
766 V: Visitor<'de>,
767 {
768 match self.map.de.next()? {
769 DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self.map.de, e, fields)?),
770 DeEvent::Text(e) => {
771 SimpleTypeDeserializer::from_text_content(e).deserialize_struct("", fields, visitor)
772 }
773 // SAFETY: the other events are filtered in `variant_seed()`
774 _ => unreachable!("Only `Start` or `Text` events are possible here"),
775 }
776 }
777}
778
779////////////////////////////////////////////////////////////////////////////////////////////////////
780
781/// Check if tag `start` is included in the `fields` list. `decoder` is used to
782/// get a string representation of a tag.
783///
784/// Returns `true`, if `start` is not in the `fields` list and `false` otherwise.
785fn not_in(
786 fields: &'static [&'static str],
787 start: &BytesStart,
788 decoder: Decoder,
789) -> Result<bool, DeError> {
790 let tag = decoder.decode(start.name().into_inner())?;
791
792 Ok(fields.iter().all(|&field| field != tag.as_ref()))
793}
794
795/// A filter that determines, what tags should form a sequence.
796///
797/// There are two types of sequences:
798/// - sequence where each element represented by tags with the same name
799/// - sequence where each element can have a different tag
800///
801/// The first variant could represent a collection of structs, the second --
802/// a collection of enum variants.
803///
804/// In the second case we don't know what tag name should be expected as a
805/// sequence element, so we accept any element. Since the sequence are flattened
806/// into maps, we skip elements which have dedicated fields in a struct by using an
807/// `Exclude` filter that filters out elements with names matching field names
808/// from the struct.
809///
810/// # Lifetimes
811///
812/// `'de` represents a lifetime of the XML input, when filter stores the
813/// dedicated tag name
814#[derive(Debug)]
815enum TagFilter<'de> {
816 /// A `SeqAccess` interested only in tags with specified name to deserialize
817 /// an XML like this:
818 ///
819 /// ```xml
820 /// <...>
821 /// <tag/>
822 /// <tag/>
823 /// <tag/>
824 /// ...
825 /// </...>
826 /// ```
827 ///
828 /// The tag name is stored inside (`b"tag"` for that example)
829 Include(BytesStart<'de>), //TODO: Need to store only name instead of a whole tag
830 /// A `SeqAccess` interested in tags with any name, except explicitly listed.
831 /// Excluded tags are used as struct field names and therefore should not
832 /// fall into a `$value` category
833 Exclude(&'static [&'static str]),
834}
835
836impl<'de> TagFilter<'de> {
837 fn is_suitable(&self, start: &BytesStart, decoder: Decoder) -> Result<bool, DeError> {
838 match self {
839 Self::Include(n) => Ok(n.name() == start.name()),
840 Self::Exclude(fields) => not_in(fields, start, decoder),
841 }
842 }
843}
844
845////////////////////////////////////////////////////////////////////////////////////////////////////
846
847/// An accessor to sequence elements forming a value for struct field.
848/// Technically, this sequence is flattened out into structure and sequence
849/// elements are overlapped with other fields of a structure. Each call to
850/// [`Self::next_element_seed`] consumes a next sub-tree or consequent list
851/// of [`Text`] and [`CData`] events.
852///
853/// ```xml
854/// <>
855/// ...
856/// <item>The is the one item</item>
857/// This is <![CDATA[one another]]> item<!-- even when--> it splitted by comments
858/// <tag>...and that is the third!</tag>
859/// ...
860/// </>
861/// ```
862///
863/// Depending on [`Self::filter`], only some of that possible constructs would be
864/// an element.
865///
866/// [`Text`]: crate::events::Event::Text
867/// [`CData`]: crate::events::Event::CData
868struct MapValueSeqAccess<'de, 'd, 'm, R, E>
869where
870 R: XmlRead<'de>,
871 E: EntityResolver,
872{
873 /// Accessor to a map that creates this accessor and to a deserializer for
874 /// a sequence items.
875 map: &'m mut ElementMapAccess<'de, 'd, R, E>,
876 /// Filter that determines whether a tag is a part of this sequence.
877 ///
878 /// When feature [`overlapped-lists`] is not activated, iteration will stop
879 /// when found a tag that does not pass this filter.
880 ///
881 /// When feature [`overlapped-lists`] is activated, all tags, that not pass
882 /// this check, will be skipped.
883 ///
884 /// [`overlapped-lists`]: ../../index.html#overlapped-lists
885 filter: TagFilter<'de>,
886
887 /// Checkpoint after which all skipped events should be returned. All events,
888 /// that was skipped before creating this checkpoint, will still stay buffered
889 /// and will not be returned
890 #[cfg(feature = "overlapped-lists")]
891 checkpoint: usize,
892}
893
894#[cfg(feature = "overlapped-lists")]
895impl<'de, 'd, 'm, R, E> Drop for MapValueSeqAccess<'de, 'd, 'm, R, E>
896where
897 R: XmlRead<'de>,
898 E: EntityResolver,
899{
900 fn drop(&mut self) {
901 self.map.de.start_replay(self.checkpoint);
902 }
903}
904
905impl<'de, 'd, 'm, R, E> SeqAccess<'de> for MapValueSeqAccess<'de, 'd, 'm, R, E>
906where
907 R: XmlRead<'de>,
908 E: EntityResolver,
909{
910 type Error = DeError;
911
912 fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, DeError>
913 where
914 T: DeserializeSeed<'de>,
915 {
916 let decoder = self.map.de.reader.decoder();
917 loop {
918 break match self.map.de.peek()? {
919 // If we see a tag that we not interested, skip it
920 #[cfg(feature = "overlapped-lists")]
921 DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => {
922 self.map.de.skip()?;
923 continue;
924 }
925 // Stop iteration when list elements ends
926 #[cfg(not(feature = "overlapped-lists"))]
927 DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => Ok(None),
928
929 // Stop iteration after reaching a closing tag
930 DeEvent::End(e) if e.name() == self.map.start.name() => Ok(None),
931 // This is a unmatched closing tag, so the XML is invalid
932 DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
933 // We cannot get `Eof` legally, because we always inside of the
934 // opened tag `self.map.start`
935 DeEvent::Eof => Err(DeError::UnexpectedEof),
936
937 DeEvent::Text(_) => match self.map.de.next()? {
938 DeEvent::Text(e) => seed.deserialize(TextDeserializer(e)).map(Some),
939 // SAFETY: we just checked that the next event is Text
940 _ => unreachable!(),
941 },
942 DeEvent::Start(_) => match self.map.de.next()? {
943 DeEvent::Start(start) => seed
944 .deserialize(ElementDeserializer {
945 start,
946 de: self.map.de,
947 })
948 .map(Some),
949 // SAFETY: we just checked that the next event is Start
950 _ => unreachable!(),
951 },
952 };
953 }
954 }
955}
956
957////////////////////////////////////////////////////////////////////////////////////////////////////
958
959/// A deserializer for a single tag item of a mixed sequence of tags and text.
960///
961/// This deserializer are very similar to a [`MapValueDeserializer`] (when it
962/// processes the [`DeEvent::Start`] event). The only difference in the
963/// [`deserialize_seq`] method. This deserializer will perform deserialization
964/// from the textual content between start and end events, whereas the
965/// [`MapValueDeserializer`] will iterate over tags / text within it's parent tag.
966///
967/// This deserializer processes items as following:
968/// - numbers are parsed from a text content between tags using [`FromStr`]. So,
969/// `<int>123</int>` can be deserialized into an `u32`;
970/// - booleans converted from a text content between tags according to the XML
971/// [specification]:
972/// - `"true"` and `"1"` converted to `true`;
973/// - `"false"` and `"0"` converted to `false`;
974/// - strings returned as a text content between tags;
975/// - characters also returned as strings. If string contain more than one character
976/// or empty, it is responsibility of a type to return an error;
977/// - `Option` are always deserialized as `Some` using the same deserializer,
978/// including `<tag/>` or `<tag></tag>`;
979/// - units (`()`) and unit structs consumes the whole element subtree;
980/// - newtype structs forwards deserialization to the inner type using
981/// [`SimpleTypeDeserializer`];
982/// - sequences, tuples and tuple structs are deserialized using [`SimpleTypeDeserializer`]
983/// (this is the difference): text content between tags is passed to
984/// [`SimpleTypeDeserializer`];
985/// - structs and maps are deserialized using new instance of [`ElementMapAccess`];
986/// - enums:
987/// - the variant name is deserialized using [`QNameDeserializer`] from the element name;
988/// - the content is deserialized using the same deserializer:
989/// - unit variants: consuming a subtree and return `()`;
990/// - newtype variants forwards deserialization to the inner type using
991/// this deserializer;
992/// - tuple variants: call [`deserialize_tuple`] of this deserializer;
993/// - struct variants: call [`deserialize_struct`] of this deserializer.
994///
995/// [`deserialize_seq`]: #method.deserialize_seq
996/// [`FromStr`]: std::str::FromStr
997/// [specification]: https://www.w3.org/TR/xmlschema11-2/#boolean
998/// [`deserialize_tuple`]: #method.deserialize_tuple
999/// [`deserialize_struct`]: #method.deserialize_struct
1000struct ElementDeserializer<'de, 'd, R, E>
1001where
1002 R: XmlRead<'de>,
1003 E: EntityResolver,
1004{
1005 start: BytesStart<'de>,
1006 de: &'d mut Deserializer<'de, R, E>,
1007}
1008
1009impl<'de, 'd, R, E> ElementDeserializer<'de, 'd, R, E>
1010where
1011 R: XmlRead<'de>,
1012 E: EntityResolver,
1013{
1014 /// Returns a next string as concatenated content of consequent [`Text`] and
1015 /// [`CData`] events, used inside [`deserialize_primitives!()`].
1016 ///
1017 /// [`Text`]: crate::events::Event::Text
1018 /// [`CData`]: crate::events::Event::CData
1019 #[inline]
1020 fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
1021 self.de.read_text()
1022 }
1023}
1024
1025impl<'de, 'd, R, E> de::Deserializer<'de> for ElementDeserializer<'de, 'd, R, E>
1026where
1027 R: XmlRead<'de>,
1028 E: EntityResolver,
1029{
1030 type Error = DeError;
1031
1032 deserialize_primitives!(mut);
1033
1034 fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1035 where
1036 V: Visitor<'de>,
1037 {
1038 // Consume subtree
1039 self.de.read_to_end(self.start.name())?;
1040 visitor.visit_unit()
1041 }
1042
1043 fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1044 where
1045 V: Visitor<'de>,
1046 {
1047 visitor.visit_some(self)
1048 }
1049
1050 /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
1051 /// with this deserializer.
1052 fn deserialize_newtype_struct<V>(
1053 self,
1054 _name: &'static str,
1055 visitor: V,
1056 ) -> Result<V::Value, Self::Error>
1057 where
1058 V: Visitor<'de>,
1059 {
1060 visitor.visit_newtype_struct(self)
1061 }
1062
1063 /// This method deserializes a sequence inside of element that itself is a
1064 /// sequence element:
1065 ///
1066 /// ```xml
1067 /// <>
1068 /// ...
1069 /// <self>inner sequence</self>
1070 /// <self>inner sequence</self>
1071 /// <self>inner sequence</self>
1072 /// ...
1073 /// </>
1074 /// ```
1075 fn deserialize_seq<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
1076 where
1077 V: Visitor<'de>,
1078 {
1079 let text = self.read_string()?;
1080 SimpleTypeDeserializer::from_text(text).deserialize_seq(visitor)
1081 }
1082
1083 fn deserialize_struct<V>(
1084 self,
1085 _name: &'static str,
1086 fields: &'static [&'static str],
1087 visitor: V,
1088 ) -> Result<V::Value, Self::Error>
1089 where
1090 V: Visitor<'de>,
1091 {
1092 visitor.visit_map(ElementMapAccess::new(self.de, self.start, fields)?)
1093 }
1094
1095 fn deserialize_enum<V>(
1096 self,
1097 _name: &'static str,
1098 _variants: &'static [&'static str],
1099 visitor: V,
1100 ) -> Result<V::Value, Self::Error>
1101 where
1102 V: Visitor<'de>,
1103 {
1104 visitor.visit_enum(self)
1105 }
1106
1107 #[inline]
1108 fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1109 where
1110 V: Visitor<'de>,
1111 {
1112 self.deserialize_map(visitor)
1113 }
1114}
1115
1116impl<'de, 'd, R, E> de::EnumAccess<'de> for ElementDeserializer<'de, 'd, R, E>
1117where
1118 R: XmlRead<'de>,
1119 E: EntityResolver,
1120{
1121 type Error = DeError;
1122 type Variant = Self;
1123
1124 fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
1125 where
1126 V: DeserializeSeed<'de>,
1127 {
1128 let name = seed.deserialize(QNameDeserializer::from_elem(
1129 self.start.raw_name(),
1130 self.de.reader.decoder(),
1131 )?)?;
1132 Ok((name, self))
1133 }
1134}
1135
1136impl<'de, 'd, R, E> de::VariantAccess<'de> for ElementDeserializer<'de, 'd, R, E>
1137where
1138 R: XmlRead<'de>,
1139 E: EntityResolver,
1140{
1141 type Error = DeError;
1142
1143 fn unit_variant(self) -> Result<(), Self::Error> {
1144 // Consume subtree
1145 self.de.read_to_end(self.start.name())
1146 }
1147
1148 fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
1149 where
1150 T: DeserializeSeed<'de>,
1151 {
1152 seed.deserialize(self)
1153 }
1154
1155 #[inline]
1156 fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
1157 where
1158 V: Visitor<'de>,
1159 {
1160 self.deserialize_tuple(len, visitor)
1161 }
1162
1163 #[inline]
1164 fn struct_variant<V>(
1165 self,
1166 fields: &'static [&'static str],
1167 visitor: V,
1168 ) -> Result<V::Value, Self::Error>
1169 where
1170 V: Visitor<'de>,
1171 {
1172 self.deserialize_struct("", fields, visitor)
1173 }
1174}
1175
1176////////////////////////////////////////////////////////////////////////////////////////////////////
1177
1178#[test]
1179fn test_not_in() {
1180 let tag = BytesStart::new("tag");
1181
1182 assert_eq!(not_in(&[], &tag, Decoder::utf8()).unwrap(), true);
1183 assert_eq!(
1184 not_in(&["no", "such", "tags"], &tag, Decoder::utf8()).unwrap(),
1185 true
1186 );
1187 assert_eq!(
1188 not_in(&["some", "tag", "included"], &tag, Decoder::utf8()).unwrap(),
1189 false
1190 );
1191}