quick_xml/de/
map.rs

1//! Serde `Deserializer` module
2
3use crate::{
4    de::key::QNameDeserializer,
5    de::resolver::EntityResolver,
6    de::simple_type::SimpleTypeDeserializer,
7    de::text::TextDeserializer,
8    de::{str2bool, DeEvent, Deserializer, XmlRead, TEXT_KEY, VALUE_KEY},
9    encoding::Decoder,
10    errors::serialize::DeError,
11    events::attributes::IterState,
12    events::BytesStart,
13    name::QName,
14};
15use serde::de::value::BorrowedStrDeserializer;
16use serde::de::{self, DeserializeSeed, Deserializer as _, MapAccess, SeqAccess, Visitor};
17use serde::serde_if_integer128;
18use std::borrow::Cow;
19use std::ops::Range;
20
21/// Defines a source that should be used to deserialize a value in the next call
22/// to [`next_value_seed()`](MapAccess::next_value_seed)
23#[derive(Debug, PartialEq)]
24enum ValueSource {
25    /// Source are not specified, because [`next_key_seed()`] not yet called.
26    /// This is an initial state and state after deserializing value
27    /// (after call of [`next_value_seed()`]).
28    ///
29    /// Attempt to call [`next_value_seed()`] while accessor in this state would
30    /// return a [`DeError::KeyNotRead`] error.
31    ///
32    /// [`next_key_seed()`]: MapAccess::next_key_seed
33    /// [`next_value_seed()`]: MapAccess::next_value_seed
34    Unknown,
35    /// Next value should be deserialized from an attribute value; value is located
36    /// at specified span.
37    Attribute(Range<usize>),
38    /// Value should be deserialized from the text content of the XML node, which
39    /// represented or by an ordinary text node, or by a CDATA node:
40    ///
41    /// ```xml
42    /// <any-tag>
43    ///     <key>text content</key>
44    /// <!--     ^^^^^^^^^^^^ - this will be used to deserialize map value -->
45    /// </any-tag>
46    /// ```
47    /// ```xml
48    /// <any-tag>
49    ///     <key><![CDATA[cdata content]]></key>
50    /// <!--              ^^^^^^^^^^^^^ - this will be used to deserialize a map value -->
51    /// </any-tag>
52    /// ```
53    Text,
54    /// Next value should be deserialized from an element with an any name, except
55    /// elements with a name matching one of the struct fields. Corresponding tag
56    /// name will always be associated with a field with name [`VALUE_KEY`].
57    ///
58    /// That state is set when call to [`peek()`] returns a [`Start`] event, which
59    /// [`name()`] is not listed in the [list of known fields] (which for a struct
60    /// is a list of field names, and for a map that is an empty list), _and_
61    /// struct has a field with a special name [`VALUE_KEY`].
62    ///
63    /// When in this state, next event, returned by [`next()`], will be a [`Start`],
64    /// which represents both a key, and a value. Value would be deserialized from
65    /// the whole element and how is will be done determined by the value deserializer.
66    /// The [`ElementMapAccess`] do not consume any events in that state.
67    ///
68    /// Because in that state any encountered `<tag>` is mapped to the [`VALUE_KEY`]
69    /// field, it is possible to use tag name as an enum discriminator, so `enum`s
70    /// can be deserialized from that XMLs:
71    ///
72    /// ```xml
73    /// <any-tag>
74    ///     <variant1>...</variant1>
75    /// <!-- ~~~~~~~~               - this data will determine that this is Enum::variant1 -->
76    /// <!--^^^^^^^^^^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
77    /// </any-tag>
78    /// ```
79    /// ```xml
80    /// <any-tag>
81    ///     <variant2>...</variant2>
82    /// <!-- ~~~~~~~~               - this data will determine that this is Enum::variant2 -->
83    /// <!--^^^^^^^^^^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
84    /// </any-tag>
85    /// ```
86    ///
87    /// both can be deserialized into
88    ///
89    /// ```ignore
90    /// enum Enum {
91    ///   variant1,
92    ///   variant2,
93    /// }
94    /// struct AnyName {
95    ///   #[serde(rename = "$value")]
96    ///   field: Enum,
97    /// }
98    /// ```
99    ///
100    /// That is possible, because value deserializer have access to the full content
101    /// of a `<variant1>...</variant1>` or `<variant2>...</variant2>` node, including
102    /// the tag name.
103    ///
104    /// [`Start`]: DeEvent::Start
105    /// [`peek()`]: Deserializer::peek()
106    /// [`next()`]: Deserializer::next()
107    /// [`name()`]: BytesStart::name()
108    /// [`Text`]: Self::Text
109    /// [list of known fields]: ElementMapAccess::fields
110    Content,
111    /// Next value should be deserialized from an element with a dedicated name.
112    /// If deserialized type is a sequence, then that sequence will collect all
113    /// elements with the same name until it will be filled. If not all elements
114    /// would be consumed, the rest will be ignored.
115    ///
116    /// That state is set when call to [`peek()`] returns a [`Start`] event, which
117    /// [`name()`] represents a field name. That name will be deserialized as a key.
118    ///
119    /// When in this state, next event, returned by [`next()`], will be a [`Start`],
120    /// which represents both a key, and a value. Value would be deserialized from
121    /// the whole element and how is will be done determined by the value deserializer.
122    /// The [`ElementMapAccess`] do not consume any events in that state.
123    ///
124    /// An illustration below shows, what data is used to deserialize key and value:
125    /// ```xml
126    /// <any-tag>
127    ///     <key>...</key>
128    /// <!-- ~~~           - this data will be used to deserialize a map key -->
129    /// <!--^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
130    /// </any-tag>
131    /// ```
132    ///
133    /// Although value deserializer will have access to the full content of a `<key>`
134    /// node (including the tag name), it will not get much benefits from that,
135    /// because tag name will always be fixed for a given map field (equal to a
136    /// field name). So, if the field type is an `enum`, it cannot select its
137    /// variant based on the tag name. If that is needed, then [`Content`] variant
138    /// of this enum should be used. Such usage is enabled by annotating a struct
139    /// field as "content" field, which implemented as given the field a special
140    /// [`VALUE_KEY`] name.
141    ///
142    /// [`Start`]: DeEvent::Start
143    /// [`peek()`]: Deserializer::peek()
144    /// [`next()`]: Deserializer::next()
145    /// [`name()`]: BytesStart::name()
146    /// [`Content`]: Self::Content
147    Nested,
148}
149
150////////////////////////////////////////////////////////////////////////////////////////////////////
151
152/// A deserializer that extracts map-like structures from an XML. This deserializer
153/// represents a one XML tag:
154///
155/// ```xml
156/// <tag>...</tag>
157/// ```
158///
159/// Name of this tag is stored in a [`Self::start`] property.
160///
161/// # Lifetimes
162///
163/// - `'de` lifetime represents a buffer, from which deserialized values can
164///   borrow their data. Depending on the underlying reader, there can be an
165///   internal buffer of deserializer (i.e. deserializer itself) or an input
166///   (in that case it is possible to approach zero-copy deserialization).
167///
168/// - `'d` lifetime represents a parent deserializer, which could own the data
169///   buffer.
170pub(crate) struct ElementMapAccess<'de, 'd, R, E>
171where
172    R: XmlRead<'de>,
173    E: EntityResolver,
174{
175    /// Tag -- owner of attributes
176    start: BytesStart<'de>,
177    de: &'d mut Deserializer<'de, R, E>,
178    /// State of the iterator over attributes. Contains the next position in the
179    /// inner `start` slice, from which next attribute should be parsed.
180    iter: IterState,
181    /// Current state of the accessor that determines what next call to API
182    /// methods should return.
183    source: ValueSource,
184    /// List of field names of the struct. It is empty for maps
185    fields: &'static [&'static str],
186    /// If `true`, then the deserialized struct has a field with a special name:
187    /// [`VALUE_KEY`]. That field should be deserialized from the whole content
188    /// of an XML node, including tag name:
189    ///
190    /// ```xml
191    /// <tag>value for VALUE_KEY field<tag>
192    /// ```
193    has_value_field: bool,
194}
195
196impl<'de, 'd, R, E> ElementMapAccess<'de, 'd, R, E>
197where
198    R: XmlRead<'de>,
199    E: EntityResolver,
200{
201    /// Create a new ElementMapAccess
202    pub fn new(
203        de: &'d mut Deserializer<'de, R, E>,
204        start: BytesStart<'de>,
205        fields: &'static [&'static str],
206    ) -> Result<Self, DeError> {
207        Ok(Self {
208            de,
209            iter: IterState::new(start.name().as_ref().len(), false),
210            start,
211            source: ValueSource::Unknown,
212            fields,
213            has_value_field: fields.contains(&VALUE_KEY),
214        })
215    }
216}
217
218impl<'de, 'd, R, E> MapAccess<'de> for ElementMapAccess<'de, 'd, R, E>
219where
220    R: XmlRead<'de>,
221    E: EntityResolver,
222{
223    type Error = DeError;
224
225    fn next_key_seed<K: DeserializeSeed<'de>>(
226        &mut self,
227        seed: K,
228    ) -> Result<Option<K::Value>, Self::Error> {
229        debug_assert_eq!(self.source, ValueSource::Unknown);
230
231        // FIXME: There error positions counted from the start of tag name - need global position
232        let slice = &self.start.buf;
233        let decoder = self.de.reader.decoder();
234
235        if let Some(a) = self.iter.next(slice).transpose()? {
236            // try getting map from attributes (key= "value")
237            let (key, value) = a.into();
238            self.source = ValueSource::Attribute(value.unwrap_or_default());
239
240            let de = QNameDeserializer::from_attr(QName(&slice[key]), decoder)?;
241            seed.deserialize(de).map(Some)
242        } else {
243            // try getting from events (<key>value</key>)
244            match self.de.peek()? {
245                // We shouldn't have both `$value` and `$text` fields in the same
246                // struct, so if we have `$value` field, the we should deserialize
247                // text content to `$value`
248                DeEvent::Text(_) if self.has_value_field => {
249                    self.source = ValueSource::Content;
250                    // Deserialize `key` from special attribute name which means
251                    // that value should be taken from the text content of the
252                    // XML node
253                    let de = BorrowedStrDeserializer::<DeError>::new(VALUE_KEY);
254                    seed.deserialize(de).map(Some)
255                }
256                DeEvent::Text(_) => {
257                    self.source = ValueSource::Text;
258                    // Deserialize `key` from special attribute name which means
259                    // that value should be taken from the text content of the
260                    // XML node
261                    let de = BorrowedStrDeserializer::<DeError>::new(TEXT_KEY);
262                    seed.deserialize(de).map(Some)
263                }
264                // Used to deserialize collections of enums, like:
265                // <root>
266                //   <A/>
267                //   <B/>
268                //   <C/>
269                // </root>
270                //
271                // into
272                //
273                // enum Enum { A, B, С }
274                // struct Root {
275                //     #[serde(rename = "$value")]
276                //     items: Vec<Enum>,
277                // }
278                // TODO: This should be handled by #[serde(flatten)]
279                // See https://github.com/serde-rs/serde/issues/1905
280                DeEvent::Start(e) if self.has_value_field && not_in(self.fields, e, decoder)? => {
281                    self.source = ValueSource::Content;
282
283                    let de = BorrowedStrDeserializer::<DeError>::new(VALUE_KEY);
284                    seed.deserialize(de).map(Some)
285                }
286                DeEvent::Start(e) => {
287                    self.source = ValueSource::Nested;
288
289                    let de = QNameDeserializer::from_elem(e.raw_name(), decoder)?;
290                    seed.deserialize(de).map(Some)
291                }
292                // Stop iteration after reaching a closing tag
293                // The matching tag name is guaranteed by the reader if our
294                // deserializer implementation is correct
295                DeEvent::End(e) => {
296                    debug_assert_eq!(self.start.name(), e.name());
297                    // Consume End
298                    self.de.next()?;
299                    Ok(None)
300                }
301                // We cannot get `Eof` legally, because we always inside of the
302                // opened tag `self.start`
303                DeEvent::Eof => Err(DeError::UnexpectedEof),
304            }
305        }
306    }
307
308    fn next_value_seed<K: DeserializeSeed<'de>>(
309        &mut self,
310        seed: K,
311    ) -> Result<K::Value, Self::Error> {
312        match std::mem::replace(&mut self.source, ValueSource::Unknown) {
313            ValueSource::Attribute(value) => seed.deserialize(SimpleTypeDeserializer::from_part(
314                &self.start.buf,
315                value,
316                true,
317                self.de.reader.decoder(),
318            )),
319            // This arm processes the following XML shape:
320            // <any-tag>
321            //   text value
322            // </any-tag>
323            // The whole map represented by an `<any-tag>` element, the map key
324            // is implicit and equals to the `TEXT_KEY` constant, and the value
325            // is a `Text` event (the value deserializer will see that event)
326            // This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs
327            ValueSource::Text => match self.de.next()? {
328                DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)),
329                // SAFETY: We set `Text` only when we seen `Text`
330                _ => unreachable!(),
331            },
332            // This arm processes the following XML shape:
333            // <any-tag>
334            //   <any>...</any>
335            // </any-tag>
336            // The whole map represented by an `<any-tag>` element, the map key
337            // is implicit and equals to the `VALUE_KEY` constant, and the value
338            // is a `Start` event (the value deserializer will see that event)
339            ValueSource::Content => seed.deserialize(MapValueDeserializer {
340                map: self,
341                fixed_name: false,
342            }),
343            // This arm processes the following XML shape:
344            // <any-tag>
345            //   <tag>...</tag>
346            // </any-tag>
347            // The whole map represented by an `<any-tag>` element, the map key
348            // is a `tag`, and the value is a `Start` event (the value deserializer
349            // will see that event)
350            ValueSource::Nested => seed.deserialize(MapValueDeserializer {
351                map: self,
352                fixed_name: true,
353            }),
354            ValueSource::Unknown => Err(DeError::KeyNotRead),
355        }
356    }
357}
358
359////////////////////////////////////////////////////////////////////////////////////////////////////
360
361/// A deserializer for a value of map or struct. That deserializer slightly
362/// differently processes events for a primitive types and sequences than
363/// a [`Deserializer`].
364///
365/// This deserializer used to deserialize two kinds of fields:
366/// - usual fields with a dedicated name, such as `field_one` or `field_two`, in
367///   that case field [`Self::fixed_name`] is `true`;
368/// - the special `$value` field which represents any tag or a textual content
369///   in the XML which would be found in the document, in that case field
370///   [`Self::fixed_name`] is `false`.
371///
372/// This deserializer can see two kind of events at the start:
373/// - [`DeEvent::Text`]
374/// - [`DeEvent::Start`]
375///
376/// which represents two possible variants of items:
377/// ```xml
378/// <item>A tag item</item>
379/// A text item
380/// <yet another="tag item"/>
381/// ```
382///
383/// This deserializer are very similar to a [`ElementDeserializer`]. The only difference
384/// in the `deserialize_seq` method. This deserializer will act as an iterator
385/// over tags / text within it's parent tag, whereas the [`ElementDeserializer`]
386/// will represent sequences as an `xs:list`.
387///
388/// This deserializer processes items as following:
389/// - primitives (numbers, booleans, strings, characters) are deserialized either
390///   from a text content, or unwrapped from a one level of a tag. So, `123` and
391///   `<int>123</int>` both can be deserialized into an `u32`;
392/// - `Option`:
393///   - empty text of [`DeEvent::Text`] is deserialized as `None`;
394///   - everything else are deserialized as `Some` using the same deserializer,
395///     including `<tag/>` or `<tag></tag>`;
396/// - units (`()`) and unit structs consumes the whole text or element subtree;
397/// - newtype structs are deserialized by forwarding deserialization of inner type
398///   with the same deserializer;
399/// - sequences, tuples and tuple structs are deserialized by iterating within the
400///   parent tag and deserializing each tag or text content using [`ElementDeserializer`];
401/// - structs and maps are deserialized using new instance of [`ElementMapAccess`];
402/// - enums:
403///   - in case of [`DeEvent::Text`] event the text content is deserialized as
404///     a `$text` variant. Enum content is deserialized from the text using
405///     [`SimpleTypeDeserializer`];
406///   - in case of [`DeEvent::Start`] event the tag name is deserialized as
407///     an enum tag, and the content inside are deserialized as an enum content.
408///     Depending on a variant kind deserialization is performed as:
409///     - unit variants: consuming text content or a subtree;
410///     - newtype variants: forward deserialization to the inner type using
411///       this deserializer;
412///     - tuple variants: call [`deserialize_tuple`] of this deserializer;
413///     - struct variants: call [`deserialize_struct`] of this deserializer.
414///
415/// [`deserialize_tuple`]: #method.deserialize_tuple
416/// [`deserialize_struct`]: #method.deserialize_struct
417struct MapValueDeserializer<'de, 'd, 'm, R, E>
418where
419    R: XmlRead<'de>,
420    E: EntityResolver,
421{
422    /// Access to the map that created this deserializer. Gives access to the
423    /// context, such as list of fields, that current map known about.
424    map: &'m mut ElementMapAccess<'de, 'd, R, E>,
425    /// Whether this deserializer was created for deserialization from an element
426    /// with fixed name, or the elements with different names or even text are allowed.
427    ///
428    /// If this field is `true`, we process `<tag>` element in the following XML shape:
429    ///
430    /// ```xml
431    /// <any-tag>
432    ///   <tag>...</tag>
433    /// </any-tag>
434    /// ```
435    ///
436    /// The whole map represented by an `<any-tag>` element, the map key is a `tag`,
437    /// and the value starts with is a `Start("tag")` (the value deserializer will
438    /// see that event first) and extended to the matching `End("tag")` event.
439    /// In order to deserialize primitives (such as `usize`) we need to allow to
440    /// look inside the one levels of tags, so the
441    ///
442    /// ```xml
443    /// <tag>42<tag>
444    /// ```
445    ///
446    /// could be deserialized into `42usize` without problems, and at the same time
447    ///
448    /// ```xml
449    /// <tag>
450    ///   <key1/>
451    ///   <key2/>
452    ///   <!--...-->
453    /// <tag>
454    /// ```
455    /// could be deserialized to a struct.
456    ///
457    /// If this field is `false`, we processes the one of following XML shapes:
458    ///
459    /// ```xml
460    /// <any-tag>
461    ///   text value
462    /// </any-tag>
463    /// ```
464    /// ```xml
465    /// <any-tag>
466    ///   <![CDATA[cdata value]]>
467    /// </any-tag>
468    /// ```
469    /// ```xml
470    /// <any-tag>
471    ///   <any>...</any>
472    /// </any-tag>
473    /// ```
474    ///
475    /// The whole map represented by an `<any-tag>` element, the map key is
476    /// implicit and equals to the [`VALUE_KEY`] constant, and the value is
477    /// a [`Text`], or a [`Start`] event (the value deserializer will see one of
478    /// those events). In the first two cases the value of this field do not matter
479    /// (because we already see the textual event and there no reasons to look
480    /// "inside" something), but in the last case the primitives should raise
481    /// a deserialization error, because that means that you trying to deserialize
482    /// the following struct:
483    ///
484    /// ```ignore
485    /// struct AnyName {
486    ///   #[serde(rename = "$value")]
487    ///   any_name: String,
488    /// }
489    /// ```
490    /// which means that `any_name` should get a content of the `<any-tag>` element.
491    ///
492    /// Changing this can be valuable for <https://github.com/tafia/quick-xml/issues/383>,
493    /// but those fields should be explicitly marked that they want to get any
494    /// possible markup as a `String` and that mark is different from marking them
495    /// as accepting "text content" which the currently `$text` means.
496    ///
497    /// [`Text`]: DeEvent::Text
498    /// [`Start`]: DeEvent::Start
499    fixed_name: bool,
500}
501
502impl<'de, 'd, 'm, R, E> MapValueDeserializer<'de, 'd, 'm, R, E>
503where
504    R: XmlRead<'de>,
505    E: EntityResolver,
506{
507    /// Returns a next string as concatenated content of consequent [`Text`] and
508    /// [`CData`] events, used inside [`deserialize_primitives!()`].
509    ///
510    /// [`Text`]: crate::events::Event::Text
511    /// [`CData`]: crate::events::Event::CData
512    #[inline]
513    fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
514        // TODO: Read the whole content to fix https://github.com/tafia/quick-xml/issues/483
515        self.map.de.read_string_impl(self.fixed_name)
516    }
517}
518
519impl<'de, 'd, 'm, R, E> de::Deserializer<'de> for MapValueDeserializer<'de, 'd, 'm, R, E>
520where
521    R: XmlRead<'de>,
522    E: EntityResolver,
523{
524    type Error = DeError;
525
526    deserialize_primitives!(mut);
527
528    #[inline]
529    fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
530    where
531        V: Visitor<'de>,
532    {
533        self.map.de.deserialize_unit(visitor)
534    }
535
536    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
537    where
538        V: Visitor<'de>,
539    {
540        match self.map.de.peek()? {
541            DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
542            _ => visitor.visit_some(self),
543        }
544    }
545
546    /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
547    /// with the same deserializer.
548    fn deserialize_newtype_struct<V>(
549        self,
550        _name: &'static str,
551        visitor: V,
552    ) -> Result<V::Value, Self::Error>
553    where
554        V: Visitor<'de>,
555    {
556        visitor.visit_newtype_struct(self)
557    }
558
559    /// Deserializes each `<tag>` in
560    /// ```xml
561    /// <any-tag>
562    ///   <tag>...</tag>
563    ///   <tag>...</tag>
564    ///   <tag>...</tag>
565    /// </any-tag>
566    /// ```
567    /// as a sequence item, where `<any-tag>` represents a Map in a [`Self::map`],
568    /// and a `<tag>` is a sequential field of that map.
569    fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error>
570    where
571        V: Visitor<'de>,
572    {
573        let filter = if self.fixed_name {
574            match self.map.de.peek()? {
575                // Clone is cheap if event borrows from the input
576                DeEvent::Start(e) => TagFilter::Include(e.clone()),
577                // SAFETY: we use that deserializer with `fixed_name == true`
578                // only from the `ElementMapAccess::next_value_seed` and only when we
579                // peeked `Start` event
580                _ => unreachable!(),
581            }
582        } else {
583            TagFilter::Exclude(self.map.fields)
584        };
585        visitor.visit_seq(MapValueSeqAccess {
586            #[cfg(feature = "overlapped-lists")]
587            checkpoint: self.map.de.skip_checkpoint(),
588
589            map: self.map,
590            filter,
591        })
592    }
593
594    #[inline]
595    fn deserialize_struct<V>(
596        self,
597        name: &'static str,
598        fields: &'static [&'static str],
599        visitor: V,
600    ) -> Result<V::Value, Self::Error>
601    where
602        V: Visitor<'de>,
603    {
604        self.map.de.deserialize_struct(name, fields, visitor)
605    }
606
607    fn deserialize_enum<V>(
608        self,
609        _name: &'static str,
610        _variants: &'static [&'static str],
611        visitor: V,
612    ) -> Result<V::Value, Self::Error>
613    where
614        V: Visitor<'de>,
615    {
616        if self.fixed_name {
617            match self.map.de.next()? {
618                // Handles <field>UnitEnumVariant</field>
619                DeEvent::Start(_) => {
620                    // skip <field>, read text after it and ensure that it is ended by </field>
621                    let text = self.map.de.read_text()?;
622                    if text.is_empty() {
623                        // Map empty text (<field/>) to a special `$text` variant
624                        visitor.visit_enum(SimpleTypeDeserializer::from_text(TEXT_KEY.into()))
625                    } else {
626                        visitor.visit_enum(SimpleTypeDeserializer::from_text(text))
627                    }
628                }
629                // SAFETY: we use that deserializer with `fixed_name == true`
630                // only from the `MapAccess::next_value_seed` and only when we
631                // peeked `Start` event
632                _ => unreachable!(),
633            }
634        } else {
635            visitor.visit_enum(self)
636        }
637    }
638
639    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
640    where
641        V: Visitor<'de>,
642    {
643        match self.map.de.peek()? {
644            DeEvent::Text(_) => self.deserialize_str(visitor),
645            _ => self.deserialize_map(visitor),
646        }
647    }
648}
649
650impl<'de, 'd, 'm, R, E> de::EnumAccess<'de> for MapValueDeserializer<'de, 'd, 'm, R, E>
651where
652    R: XmlRead<'de>,
653    E: EntityResolver,
654{
655    type Error = DeError;
656    type Variant = MapValueVariantAccess<'de, 'd, 'm, R, E>;
657
658    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
659    where
660        V: DeserializeSeed<'de>,
661    {
662        let decoder = self.map.de.reader.decoder();
663        let (name, is_text) = match self.map.de.peek()? {
664            DeEvent::Start(e) => (
665                seed.deserialize(QNameDeserializer::from_elem(e.raw_name(), decoder)?)?,
666                false,
667            ),
668            DeEvent::Text(_) => (
669                seed.deserialize(BorrowedStrDeserializer::<DeError>::new(TEXT_KEY))?,
670                true,
671            ),
672            DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().into_inner().to_vec())),
673            DeEvent::Eof => return Err(DeError::UnexpectedEof),
674        };
675        Ok((
676            name,
677            MapValueVariantAccess {
678                map: self.map,
679                is_text,
680            },
681        ))
682    }
683}
684
685struct MapValueVariantAccess<'de, 'd, 'm, R, E>
686where
687    R: XmlRead<'de>,
688    E: EntityResolver,
689{
690    /// Access to the map that created this enum accessor. Gives access to the
691    /// context, such as list of fields, that current map known about.
692    map: &'m mut ElementMapAccess<'de, 'd, R, E>,
693    /// `true` if variant should be deserialized from a textual content
694    /// and `false` if from tag
695    is_text: bool,
696}
697
698impl<'de, 'd, 'm, R, E> de::VariantAccess<'de> for MapValueVariantAccess<'de, 'd, 'm, R, E>
699where
700    R: XmlRead<'de>,
701    E: EntityResolver,
702{
703    type Error = DeError;
704
705    fn unit_variant(self) -> Result<(), Self::Error> {
706        match self.map.de.next()? {
707            // Consume subtree
708            DeEvent::Start(e) => self.map.de.read_to_end(e.name()),
709            // Does not needed to deserialize using SimpleTypeDeserializer, because
710            // it returns `()` when `deserialize_unit()` is requested
711            DeEvent::Text(_) => Ok(()),
712            // SAFETY: the other events are filtered in `variant_seed()`
713            _ => unreachable!("Only `Start` or `Text` events are possible here"),
714        }
715    }
716
717    fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
718    where
719        T: DeserializeSeed<'de>,
720    {
721        if self.is_text {
722            match self.map.de.next()? {
723                DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)),
724                // SAFETY: the other events are filtered in `variant_seed()`
725                _ => unreachable!("Only `Text` events are possible here"),
726            }
727        } else {
728            seed.deserialize(MapValueDeserializer {
729                map: self.map,
730                // Because element name already was either mapped to a field name,
731                // or to a variant name, we should not treat it as variable
732                fixed_name: true,
733            })
734        }
735    }
736
737    fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
738    where
739        V: Visitor<'de>,
740    {
741        if self.is_text {
742            match self.map.de.next()? {
743                DeEvent::Text(e) => {
744                    SimpleTypeDeserializer::from_text_content(e).deserialize_tuple(len, visitor)
745                }
746                // SAFETY: the other events are filtered in `variant_seed()`
747                _ => unreachable!("Only `Text` events are possible here"),
748            }
749        } else {
750            MapValueDeserializer {
751                map: self.map,
752                // Because element name already was either mapped to a field name,
753                // or to a variant name, we should not treat it as variable
754                fixed_name: true,
755            }
756            .deserialize_tuple(len, visitor)
757        }
758    }
759
760    fn struct_variant<V>(
761        self,
762        fields: &'static [&'static str],
763        visitor: V,
764    ) -> Result<V::Value, Self::Error>
765    where
766        V: Visitor<'de>,
767    {
768        match self.map.de.next()? {
769            DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self.map.de, e, fields)?),
770            DeEvent::Text(e) => {
771                SimpleTypeDeserializer::from_text_content(e).deserialize_struct("", fields, visitor)
772            }
773            // SAFETY: the other events are filtered in `variant_seed()`
774            _ => unreachable!("Only `Start` or `Text` events are possible here"),
775        }
776    }
777}
778
779////////////////////////////////////////////////////////////////////////////////////////////////////
780
781/// Check if tag `start` is included in the `fields` list. `decoder` is used to
782/// get a string representation of a tag.
783///
784/// Returns `true`, if `start` is not in the `fields` list and `false` otherwise.
785fn not_in(
786    fields: &'static [&'static str],
787    start: &BytesStart,
788    decoder: Decoder,
789) -> Result<bool, DeError> {
790    let tag = decoder.decode(start.name().into_inner())?;
791
792    Ok(fields.iter().all(|&field| field != tag.as_ref()))
793}
794
795/// A filter that determines, what tags should form a sequence.
796///
797/// There are two types of sequences:
798/// - sequence where each element represented by tags with the same name
799/// - sequence where each element can have a different tag
800///
801/// The first variant could represent a collection of structs, the second --
802/// a collection of enum variants.
803///
804/// In the second case we don't know what tag name should be expected as a
805/// sequence element, so we accept any element. Since the sequence are flattened
806/// into maps, we skip elements which have dedicated fields in a struct by using an
807/// `Exclude` filter that filters out elements with names matching field names
808/// from the struct.
809///
810/// # Lifetimes
811///
812/// `'de` represents a lifetime of the XML input, when filter stores the
813/// dedicated tag name
814#[derive(Debug)]
815enum TagFilter<'de> {
816    /// A `SeqAccess` interested only in tags with specified name to deserialize
817    /// an XML like this:
818    ///
819    /// ```xml
820    /// <...>
821    ///   <tag/>
822    ///   <tag/>
823    ///   <tag/>
824    ///   ...
825    /// </...>
826    /// ```
827    ///
828    /// The tag name is stored inside (`b"tag"` for that example)
829    Include(BytesStart<'de>), //TODO: Need to store only name instead of a whole tag
830    /// A `SeqAccess` interested in tags with any name, except explicitly listed.
831    /// Excluded tags are used as struct field names and therefore should not
832    /// fall into a `$value` category
833    Exclude(&'static [&'static str]),
834}
835
836impl<'de> TagFilter<'de> {
837    fn is_suitable(&self, start: &BytesStart, decoder: Decoder) -> Result<bool, DeError> {
838        match self {
839            Self::Include(n) => Ok(n.name() == start.name()),
840            Self::Exclude(fields) => not_in(fields, start, decoder),
841        }
842    }
843}
844
845////////////////////////////////////////////////////////////////////////////////////////////////////
846
847/// An accessor to sequence elements forming a value for struct field.
848/// Technically, this sequence is flattened out into structure and sequence
849/// elements are overlapped with other fields of a structure. Each call to
850/// [`Self::next_element_seed`] consumes a next sub-tree or consequent list
851/// of [`Text`] and [`CData`] events.
852///
853/// ```xml
854/// <>
855///   ...
856///   <item>The is the one item</item>
857///   This is <![CDATA[one another]]> item<!-- even when--> it splitted by comments
858///   <tag>...and that is the third!</tag>
859///   ...
860/// </>
861/// ```
862///
863/// Depending on [`Self::filter`], only some of that possible constructs would be
864/// an element.
865///
866/// [`Text`]: crate::events::Event::Text
867/// [`CData`]: crate::events::Event::CData
868struct MapValueSeqAccess<'de, 'd, 'm, R, E>
869where
870    R: XmlRead<'de>,
871    E: EntityResolver,
872{
873    /// Accessor to a map that creates this accessor and to a deserializer for
874    /// a sequence items.
875    map: &'m mut ElementMapAccess<'de, 'd, R, E>,
876    /// Filter that determines whether a tag is a part of this sequence.
877    ///
878    /// When feature [`overlapped-lists`] is not activated, iteration will stop
879    /// when found a tag that does not pass this filter.
880    ///
881    /// When feature [`overlapped-lists`] is activated, all tags, that not pass
882    /// this check, will be skipped.
883    ///
884    /// [`overlapped-lists`]: ../../index.html#overlapped-lists
885    filter: TagFilter<'de>,
886
887    /// Checkpoint after which all skipped events should be returned. All events,
888    /// that was skipped before creating this checkpoint, will still stay buffered
889    /// and will not be returned
890    #[cfg(feature = "overlapped-lists")]
891    checkpoint: usize,
892}
893
894#[cfg(feature = "overlapped-lists")]
895impl<'de, 'd, 'm, R, E> Drop for MapValueSeqAccess<'de, 'd, 'm, R, E>
896where
897    R: XmlRead<'de>,
898    E: EntityResolver,
899{
900    fn drop(&mut self) {
901        self.map.de.start_replay(self.checkpoint);
902    }
903}
904
905impl<'de, 'd, 'm, R, E> SeqAccess<'de> for MapValueSeqAccess<'de, 'd, 'm, R, E>
906where
907    R: XmlRead<'de>,
908    E: EntityResolver,
909{
910    type Error = DeError;
911
912    fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, DeError>
913    where
914        T: DeserializeSeed<'de>,
915    {
916        let decoder = self.map.de.reader.decoder();
917        loop {
918            break match self.map.de.peek()? {
919                // If we see a tag that we not interested, skip it
920                #[cfg(feature = "overlapped-lists")]
921                DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => {
922                    self.map.de.skip()?;
923                    continue;
924                }
925                // Stop iteration when list elements ends
926                #[cfg(not(feature = "overlapped-lists"))]
927                DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => Ok(None),
928
929                // Stop iteration after reaching a closing tag
930                DeEvent::End(e) if e.name() == self.map.start.name() => Ok(None),
931                // This is a unmatched closing tag, so the XML is invalid
932                DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
933                // We cannot get `Eof` legally, because we always inside of the
934                // opened tag `self.map.start`
935                DeEvent::Eof => Err(DeError::UnexpectedEof),
936
937                DeEvent::Text(_) => match self.map.de.next()? {
938                    DeEvent::Text(e) => seed.deserialize(TextDeserializer(e)).map(Some),
939                    // SAFETY: we just checked that the next event is Text
940                    _ => unreachable!(),
941                },
942                DeEvent::Start(_) => match self.map.de.next()? {
943                    DeEvent::Start(start) => seed
944                        .deserialize(ElementDeserializer {
945                            start,
946                            de: self.map.de,
947                        })
948                        .map(Some),
949                    // SAFETY: we just checked that the next event is Start
950                    _ => unreachable!(),
951                },
952            };
953        }
954    }
955}
956
957////////////////////////////////////////////////////////////////////////////////////////////////////
958
959/// A deserializer for a single tag item of a mixed sequence of tags and text.
960///
961/// This deserializer are very similar to a [`MapValueDeserializer`] (when it
962/// processes the [`DeEvent::Start`] event). The only difference in the
963/// [`deserialize_seq`] method. This deserializer will perform deserialization
964/// from the textual content between start and end events, whereas the
965/// [`MapValueDeserializer`] will iterate over tags / text within it's parent tag.
966///
967/// This deserializer processes items as following:
968/// - numbers are parsed from a text content between tags using [`FromStr`]. So,
969///   `<int>123</int>` can be deserialized into an `u32`;
970/// - booleans converted from a text content between tags according to the XML
971///   [specification]:
972///   - `"true"` and `"1"` converted to `true`;
973///   - `"false"` and `"0"` converted to `false`;
974/// - strings returned as a text content between tags;
975/// - characters also returned as strings. If string contain more than one character
976///   or empty, it is responsibility of a type to return an error;
977/// - `Option` are always deserialized as `Some` using the same deserializer,
978///   including `<tag/>` or `<tag></tag>`;
979/// - units (`()`) and unit structs consumes the whole element subtree;
980/// - newtype structs forwards deserialization to the inner type using
981///   [`SimpleTypeDeserializer`];
982/// - sequences, tuples and tuple structs are deserialized using [`SimpleTypeDeserializer`]
983///   (this is the difference): text content between tags is passed to
984///   [`SimpleTypeDeserializer`];
985/// - structs and maps are deserialized using new instance of [`ElementMapAccess`];
986/// - enums:
987///   - the variant name is deserialized using [`QNameDeserializer`] from the element name;
988///   - the content is deserialized using the same deserializer:
989///     - unit variants: consuming a subtree and return `()`;
990///     - newtype variants forwards deserialization to the inner type using
991///       this deserializer;
992///     - tuple variants: call [`deserialize_tuple`] of this deserializer;
993///     - struct variants: call [`deserialize_struct`] of this deserializer.
994///
995/// [`deserialize_seq`]: #method.deserialize_seq
996/// [`FromStr`]: std::str::FromStr
997/// [specification]: https://www.w3.org/TR/xmlschema11-2/#boolean
998/// [`deserialize_tuple`]: #method.deserialize_tuple
999/// [`deserialize_struct`]: #method.deserialize_struct
1000struct ElementDeserializer<'de, 'd, R, E>
1001where
1002    R: XmlRead<'de>,
1003    E: EntityResolver,
1004{
1005    start: BytesStart<'de>,
1006    de: &'d mut Deserializer<'de, R, E>,
1007}
1008
1009impl<'de, 'd, R, E> ElementDeserializer<'de, 'd, R, E>
1010where
1011    R: XmlRead<'de>,
1012    E: EntityResolver,
1013{
1014    /// Returns a next string as concatenated content of consequent [`Text`] and
1015    /// [`CData`] events, used inside [`deserialize_primitives!()`].
1016    ///
1017    /// [`Text`]: crate::events::Event::Text
1018    /// [`CData`]: crate::events::Event::CData
1019    #[inline]
1020    fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
1021        self.de.read_text()
1022    }
1023}
1024
1025impl<'de, 'd, R, E> de::Deserializer<'de> for ElementDeserializer<'de, 'd, R, E>
1026where
1027    R: XmlRead<'de>,
1028    E: EntityResolver,
1029{
1030    type Error = DeError;
1031
1032    deserialize_primitives!(mut);
1033
1034    fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1035    where
1036        V: Visitor<'de>,
1037    {
1038        // Consume subtree
1039        self.de.read_to_end(self.start.name())?;
1040        visitor.visit_unit()
1041    }
1042
1043    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1044    where
1045        V: Visitor<'de>,
1046    {
1047        visitor.visit_some(self)
1048    }
1049
1050    /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
1051    /// with this deserializer.
1052    fn deserialize_newtype_struct<V>(
1053        self,
1054        _name: &'static str,
1055        visitor: V,
1056    ) -> Result<V::Value, Self::Error>
1057    where
1058        V: Visitor<'de>,
1059    {
1060        visitor.visit_newtype_struct(self)
1061    }
1062
1063    /// This method deserializes a sequence inside of element that itself is a
1064    /// sequence element:
1065    ///
1066    /// ```xml
1067    /// <>
1068    ///   ...
1069    ///   <self>inner sequence</self>
1070    ///   <self>inner sequence</self>
1071    ///   <self>inner sequence</self>
1072    ///   ...
1073    /// </>
1074    /// ```
1075    fn deserialize_seq<V>(mut self, visitor: V) -> Result<V::Value, Self::Error>
1076    where
1077        V: Visitor<'de>,
1078    {
1079        let text = self.read_string()?;
1080        SimpleTypeDeserializer::from_text(text).deserialize_seq(visitor)
1081    }
1082
1083    fn deserialize_struct<V>(
1084        self,
1085        _name: &'static str,
1086        fields: &'static [&'static str],
1087        visitor: V,
1088    ) -> Result<V::Value, Self::Error>
1089    where
1090        V: Visitor<'de>,
1091    {
1092        visitor.visit_map(ElementMapAccess::new(self.de, self.start, fields)?)
1093    }
1094
1095    fn deserialize_enum<V>(
1096        self,
1097        _name: &'static str,
1098        _variants: &'static [&'static str],
1099        visitor: V,
1100    ) -> Result<V::Value, Self::Error>
1101    where
1102        V: Visitor<'de>,
1103    {
1104        visitor.visit_enum(self)
1105    }
1106
1107    #[inline]
1108    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1109    where
1110        V: Visitor<'de>,
1111    {
1112        self.deserialize_map(visitor)
1113    }
1114}
1115
1116impl<'de, 'd, R, E> de::EnumAccess<'de> for ElementDeserializer<'de, 'd, R, E>
1117where
1118    R: XmlRead<'de>,
1119    E: EntityResolver,
1120{
1121    type Error = DeError;
1122    type Variant = Self;
1123
1124    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
1125    where
1126        V: DeserializeSeed<'de>,
1127    {
1128        let name = seed.deserialize(QNameDeserializer::from_elem(
1129            self.start.raw_name(),
1130            self.de.reader.decoder(),
1131        )?)?;
1132        Ok((name, self))
1133    }
1134}
1135
1136impl<'de, 'd, R, E> de::VariantAccess<'de> for ElementDeserializer<'de, 'd, R, E>
1137where
1138    R: XmlRead<'de>,
1139    E: EntityResolver,
1140{
1141    type Error = DeError;
1142
1143    fn unit_variant(self) -> Result<(), Self::Error> {
1144        // Consume subtree
1145        self.de.read_to_end(self.start.name())
1146    }
1147
1148    fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
1149    where
1150        T: DeserializeSeed<'de>,
1151    {
1152        seed.deserialize(self)
1153    }
1154
1155    #[inline]
1156    fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
1157    where
1158        V: Visitor<'de>,
1159    {
1160        self.deserialize_tuple(len, visitor)
1161    }
1162
1163    #[inline]
1164    fn struct_variant<V>(
1165        self,
1166        fields: &'static [&'static str],
1167        visitor: V,
1168    ) -> Result<V::Value, Self::Error>
1169    where
1170        V: Visitor<'de>,
1171    {
1172        self.deserialize_struct("", fields, visitor)
1173    }
1174}
1175
1176////////////////////////////////////////////////////////////////////////////////////////////////////
1177
1178#[test]
1179fn test_not_in() {
1180    let tag = BytesStart::new("tag");
1181
1182    assert_eq!(not_in(&[], &tag, Decoder::utf8()).unwrap(), true);
1183    assert_eq!(
1184        not_in(&["no", "such", "tags"], &tag, Decoder::utf8()).unwrap(),
1185        true
1186    );
1187    assert_eq!(
1188        not_in(&["some", "tag", "included"], &tag, Decoder::utf8()).unwrap(),
1189        false
1190    );
1191}