quick_xml/de/
key.rs

1use crate::de::str2bool;
2use crate::encoding::Decoder;
3use crate::errors::serialize::DeError;
4use crate::name::QName;
5use crate::utils::CowRef;
6use serde::de::{DeserializeSeed, Deserializer, EnumAccess, VariantAccess, Visitor};
7use serde::{forward_to_deserialize_any, serde_if_integer128};
8use std::borrow::Cow;
9
10macro_rules! deserialize_num {
11    ($method:ident, $visit:ident) => {
12        fn $method<V>(self, visitor: V) -> Result<V::Value, Self::Error>
13        where
14            V: Visitor<'de>,
15        {
16            visitor.$visit(self.name.parse()?)
17        }
18    };
19}
20
21/// Decodes raw bytes using the deserializer encoding.
22/// The method will borrow if encoding is UTF-8 compatible and `name` contains
23/// only UTF-8 compatible characters (usually only ASCII characters).
24#[inline]
25fn decode_name<'n>(name: QName<'n>, decoder: Decoder) -> Result<Cow<'n, str>, DeError> {
26    let local = name.local_name();
27    Ok(decoder.decode(local.into_inner())?)
28}
29
30/// A deserializer for xml names of elements and attributes.
31///
32/// Used for deserializing values from:
33/// - attribute names (`<... name="..." ...>`)
34/// - element names (`<name>...</name>`)
35///
36/// Converts a name to an identifier string using the following rules:
37///
38/// - if it is an [`attribute`] name, put `@` in front of the identifier
39/// - if it is a namespace binding (`xmlns` or `xmlns:xxx`) put the decoded name
40///   to the identifier
41/// - put the decoded [`local_name()`] of a name to the identifier
42///
43/// The final identifier looks like `[@]local_name`, or `@xmlns`, or `@xmlns:binding`
44/// (where `[]` means optional element).
45///
46/// The deserializer also supports deserializing names as other primitive types:
47/// - numbers
48/// - booleans
49/// - unit (`()`) and unit structs
50/// - unit variants of the enumerations
51///
52/// Because `serde` does not define on which side type conversion should be
53/// performed, and because [`Deserialize`] implementation for that primitives
54/// in serde does not accept strings, the deserializer will perform conversion
55/// by itself.
56///
57/// The deserializer is able to deserialize unit and unit structs, but any name
58/// will be converted to the same unit instance. This is asymmetry with a serializer,
59/// which not able to serialize those types, because empty names are impossible
60/// in XML.
61///
62/// `deserialize_any()` returns the same result as `deserialize_identifier()`.
63///
64/// # Lifetimes
65///
66/// - `'i`: lifetime of the data that the deserializer borrows from the parsed input
67/// - `'d`: lifetime of a deserializer that holds a buffer with content of events
68///
69/// [`attribute`]: Self::from_attr
70/// [`local_name()`]: QName::local_name
71/// [`Deserialize`]: serde::Deserialize
72pub struct QNameDeserializer<'i, 'd> {
73    name: CowRef<'i, 'd, str>,
74}
75
76impl<'i, 'd> QNameDeserializer<'i, 'd> {
77    /// Creates deserializer from name of an attribute
78    pub fn from_attr(name: QName<'d>, decoder: Decoder) -> Result<Self, DeError> {
79        // https://github.com/tafia/quick-xml/issues/537
80        // Namespace bindings (xmlns:xxx) map to `@xmlns:xxx` instead of `@xxx`
81        let field = if name.as_namespace_binding().is_some() {
82            decoder.decode(name.into_inner())?
83        } else {
84            decode_name(name, decoder)?
85        };
86
87        Ok(Self {
88            name: CowRef::Owned(format!("@{field}")),
89        })
90    }
91
92    /// Creates deserializer from name of an element
93    pub fn from_elem(name: CowRef<'i, 'd, [u8]>, decoder: Decoder) -> Result<Self, DeError> {
94        let local = match name {
95            CowRef::Input(borrowed) => match decode_name(QName(borrowed), decoder)? {
96                Cow::Borrowed(borrowed) => CowRef::Input(borrowed),
97                Cow::Owned(owned) => CowRef::Owned(owned),
98            },
99            CowRef::Slice(borrowed) => match decode_name(QName(borrowed), decoder)? {
100                Cow::Borrowed(borrowed) => CowRef::Slice(borrowed),
101                Cow::Owned(owned) => CowRef::Owned(owned),
102            },
103            CowRef::Owned(owned) => match decode_name(QName(&owned), decoder)? {
104                // SAFETY: Because result is borrowed, no changes was done
105                // and we can safely unwrap here
106                Cow::Borrowed(_) => CowRef::Owned(String::from_utf8(owned).unwrap()),
107                Cow::Owned(owned) => CowRef::Owned(owned),
108            },
109        };
110
111        Ok(Self { name: local })
112    }
113}
114
115impl<'de, 'd> Deserializer<'de> for QNameDeserializer<'de, 'd> {
116    type Error = DeError;
117
118    forward_to_deserialize_any! {
119        char str string
120        bytes byte_buf
121        seq tuple tuple_struct
122        map struct
123        ignored_any
124    }
125
126    /// According to the <https://www.w3.org/TR/xmlschema11-2/#boolean>,
127    /// valid boolean representations are only `"true"`, `"false"`, `"1"`,
128    /// and `"0"`. But this method also handles following:
129    ///
130    /// |`bool` |XML content
131    /// |-------|-------------------------------------------------------------
132    /// |`true` |`"True"`,  `"TRUE"`,  `"t"`, `"Yes"`, `"YES"`, `"yes"`, `"y"`
133    /// |`false`|`"False"`, `"FALSE"`, `"f"`, `"No"`,  `"NO"`,  `"no"`,  `"n"`
134    fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value, Self::Error>
135    where
136        V: Visitor<'de>,
137    {
138        str2bool(self.name.as_ref(), visitor)
139    }
140
141    deserialize_num!(deserialize_i8, visit_i8);
142    deserialize_num!(deserialize_i16, visit_i16);
143    deserialize_num!(deserialize_i32, visit_i32);
144    deserialize_num!(deserialize_i64, visit_i64);
145
146    deserialize_num!(deserialize_u8, visit_u8);
147    deserialize_num!(deserialize_u16, visit_u16);
148    deserialize_num!(deserialize_u32, visit_u32);
149    deserialize_num!(deserialize_u64, visit_u64);
150
151    serde_if_integer128! {
152        deserialize_num!(deserialize_i128, visit_i128);
153        deserialize_num!(deserialize_u128, visit_u128);
154    }
155
156    deserialize_num!(deserialize_f32, visit_f32);
157    deserialize_num!(deserialize_f64, visit_f64);
158
159    /// Calls [`Visitor::visit_unit`]
160    fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Self::Error>
161    where
162        V: Visitor<'de>,
163    {
164        visitor.visit_unit()
165    }
166
167    /// Forwards deserialization to the [`Self::deserialize_unit`]
168    fn deserialize_unit_struct<V>(
169        self,
170        _name: &'static str,
171        visitor: V,
172    ) -> Result<V::Value, Self::Error>
173    where
174        V: Visitor<'de>,
175    {
176        self.deserialize_unit(visitor)
177    }
178
179    /// Forwards deserialization to the [`Self::deserialize_identifier`]
180    #[inline]
181    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
182    where
183        V: Visitor<'de>,
184    {
185        self.deserialize_identifier(visitor)
186    }
187
188    /// If `name` is an empty string then calls [`Visitor::visit_none`],
189    /// otherwise calls [`Visitor::visit_some`] with itself
190    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
191    where
192        V: Visitor<'de>,
193    {
194        if self.name.is_empty() {
195            visitor.visit_none()
196        } else {
197            visitor.visit_some(self)
198        }
199    }
200
201    fn deserialize_newtype_struct<V>(
202        self,
203        _name: &'static str,
204        visitor: V,
205    ) -> Result<V::Value, Self::Error>
206    where
207        V: Visitor<'de>,
208    {
209        visitor.visit_newtype_struct(self)
210    }
211
212    /// Calls a [`Visitor::visit_str`] if [`name`] contains only UTF-8
213    /// compatible encoded characters and represents an element name and
214    /// a [`Visitor::visit_string`] in all other cases.
215    ///
216    /// [`name`]: Self::name
217    fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, Self::Error>
218    where
219        V: Visitor<'de>,
220    {
221        match self.name {
222            CowRef::Input(name) => visitor.visit_borrowed_str(name),
223            CowRef::Slice(name) => visitor.visit_str(name),
224            CowRef::Owned(name) => visitor.visit_string(name),
225        }
226    }
227
228    fn deserialize_enum<V>(
229        self,
230        _name: &str,
231        _variants: &'static [&'static str],
232        visitor: V,
233    ) -> Result<V::Value, Self::Error>
234    where
235        V: Visitor<'de>,
236    {
237        visitor.visit_enum(self)
238    }
239}
240
241impl<'de, 'd> EnumAccess<'de> for QNameDeserializer<'de, 'd> {
242    type Error = DeError;
243    type Variant = QNameUnitOnly;
244
245    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
246    where
247        V: DeserializeSeed<'de>,
248    {
249        let name = seed.deserialize(self)?;
250        Ok((name, QNameUnitOnly))
251    }
252}
253
254////////////////////////////////////////////////////////////////////////////////////////////////////
255
256/// Deserializer of variant data, that supports only unit variants.
257/// Attempt to deserialize newtype, tuple or struct variant will return a
258/// [`DeError::Unsupported`] error.
259pub struct QNameUnitOnly;
260impl<'de> VariantAccess<'de> for QNameUnitOnly {
261    type Error = DeError;
262
263    #[inline]
264    fn unit_variant(self) -> Result<(), DeError> {
265        Ok(())
266    }
267
268    fn newtype_variant_seed<T>(self, _seed: T) -> Result<T::Value, DeError>
269    where
270        T: DeserializeSeed<'de>,
271    {
272        Err(DeError::Unsupported(
273            "enum newtype variants are not supported as an XML names".into(),
274        ))
275    }
276
277    fn tuple_variant<V>(self, _len: usize, _visitor: V) -> Result<V::Value, DeError>
278    where
279        V: Visitor<'de>,
280    {
281        Err(DeError::Unsupported(
282            "enum tuple variants are not supported as an XML names".into(),
283        ))
284    }
285
286    fn struct_variant<V>(
287        self,
288        _fields: &'static [&'static str],
289        _visitor: V,
290    ) -> Result<V::Value, DeError>
291    where
292        V: Visitor<'de>,
293    {
294        Err(DeError::Unsupported(
295            "enum struct variants are not supported as an XML names".into(),
296        ))
297    }
298}
299
300////////////////////////////////////////////////////////////////////////////////////////////////////
301
302#[cfg(test)]
303mod tests {
304    use super::*;
305    use crate::se::key::QNameSerializer;
306    use crate::utils::{ByteBuf, Bytes};
307    use pretty_assertions::assert_eq;
308    use serde::de::IgnoredAny;
309    use serde::{Deserialize, Serialize};
310    use std::collections::HashMap;
311
312    #[derive(Debug, Deserialize, Serialize, PartialEq)]
313    struct Unit;
314
315    #[derive(Debug, Deserialize, Serialize, PartialEq)]
316    struct Newtype(String);
317
318    #[derive(Debug, Deserialize, Serialize, PartialEq)]
319    struct Tuple((), ());
320
321    #[derive(Debug, Deserialize, Serialize, PartialEq)]
322    struct Struct {
323        key: String,
324        val: usize,
325    }
326
327    #[derive(Debug, Deserialize, Serialize, PartialEq)]
328    enum Enum {
329        Unit,
330        #[serde(rename = "@Attr")]
331        Attr,
332        Newtype(String),
333        Tuple(String, usize),
334        Struct {
335            key: String,
336            val: usize,
337        },
338    }
339
340    #[derive(Debug, Deserialize, PartialEq)]
341    #[serde(field_identifier)]
342    enum Id {
343        Field,
344    }
345
346    #[derive(Debug, Deserialize)]
347    #[serde(transparent)]
348    struct Any(IgnoredAny);
349    impl PartialEq for Any {
350        fn eq(&self, _other: &Any) -> bool {
351            true
352        }
353    }
354
355    /// Checks that given `$input` successfully deserializing into given `$result`
356    macro_rules! deserialized_to_only {
357        ($name:ident: $type:ty = $input:literal => $result:expr) => {
358            #[test]
359            fn $name() {
360                let de = QNameDeserializer {
361                    name: CowRef::Input($input),
362                };
363                let data: $type = Deserialize::deserialize(de).unwrap();
364
365                assert_eq!(data, $result);
366            }
367        };
368    }
369
370    /// Checks that given `$input` successfully deserializing into given `$result`
371    macro_rules! deserialized_to {
372        ($name:ident: $type:ty = $input:literal => $result:expr) => {
373            #[test]
374            fn $name() {
375                let de = QNameDeserializer {
376                    name: CowRef::Input($input),
377                };
378                let data: $type = Deserialize::deserialize(de).unwrap();
379
380                assert_eq!(data, $result);
381
382                // Roundtrip to ensure that serializer corresponds to deserializer
383                assert_eq!(
384                    data.serialize(QNameSerializer {
385                        writer: String::new()
386                    })
387                    .unwrap(),
388                    $input
389                );
390            }
391        };
392    }
393
394    /// Checks that attempt to deserialize given `$input` as a `$type` results to a
395    /// deserialization error `$kind` with `$reason`
396    macro_rules! err {
397        ($name:ident: $type:ty = $input:literal => $kind:ident($reason:literal)) => {
398            #[test]
399            fn $name() {
400                let de = QNameDeserializer {
401                    name: CowRef::Input($input),
402                };
403                let err = <$type as Deserialize>::deserialize(de).unwrap_err();
404
405                match err {
406                    DeError::$kind(e) => assert_eq!(e, $reason),
407                    _ => panic!(
408                        "Expected `{}({})`, found `{:?}`",
409                        stringify!($kind),
410                        $reason,
411                        err
412                    ),
413                }
414            }
415        };
416    }
417
418    deserialized_to!(false_: bool = "false" => false);
419    deserialized_to!(true_: bool  = "true" => true);
420
421    deserialized_to!(i8_:  i8  = "-2" => -2);
422    deserialized_to!(i16_: i16 = "-2" => -2);
423    deserialized_to!(i32_: i32 = "-2" => -2);
424    deserialized_to!(i64_: i64 = "-2" => -2);
425
426    deserialized_to!(u8_:  u8  = "3" => 3);
427    deserialized_to!(u16_: u16 = "3" => 3);
428    deserialized_to!(u32_: u32 = "3" => 3);
429    deserialized_to!(u64_: u64 = "3" => 3);
430
431    serde_if_integer128! {
432        deserialized_to!(i128_: i128 = "-2" => -2);
433        deserialized_to!(u128_: u128 = "2" => 2);
434    }
435
436    deserialized_to!(f32_: f32 = "1.23" => 1.23);
437    deserialized_to!(f64_: f64 = "1.23" => 1.23);
438
439    deserialized_to!(char_unescaped: char = "h" => 'h');
440    err!(char_escaped: char = "&lt;"
441        => Custom("invalid value: string \"&lt;\", expected a character"));
442
443    deserialized_to!(string: String = "&lt;escaped&#x20;string" => "&lt;escaped&#x20;string");
444    deserialized_to!(borrowed_str: &str = "name" => "name");
445
446    err!(byte_buf: ByteBuf = "&lt;escaped&#x20;string"
447        => Custom("invalid type: string \"&lt;escaped&#x20;string\", expected byte data"));
448    err!(borrowed_bytes: Bytes = "name"
449        => Custom("invalid type: string \"name\", expected borrowed bytes"));
450
451    deserialized_to!(option_none: Option<String> = "" => None);
452    deserialized_to!(option_some: Option<String> = "name" => Some("name".into()));
453
454    // Unit structs cannot be represented in some meaningful way, but it meaningful
455    // to use them as a placeholder when we want to deserialize _something_
456    deserialized_to_only!(unit: () = "anything" => ());
457    deserialized_to_only!(unit_struct: Unit = "anything" => Unit);
458
459    deserialized_to!(newtype: Newtype = "&lt;escaped&#x20;string" => Newtype("&lt;escaped&#x20;string".into()));
460
461    err!(seq: Vec<()> = "name"
462        => Custom("invalid type: string \"name\", expected a sequence"));
463    err!(tuple: ((), ()) = "name"
464        => Custom("invalid type: string \"name\", expected a tuple of size 2"));
465    err!(tuple_struct: Tuple = "name"
466        => Custom("invalid type: string \"name\", expected tuple struct Tuple"));
467
468    err!(map: HashMap<(), ()> = "name"
469        => Custom("invalid type: string \"name\", expected a map"));
470    err!(struct_: Struct = "name"
471        => Custom("invalid type: string \"name\", expected struct Struct"));
472
473    deserialized_to!(enum_unit: Enum = "Unit" => Enum::Unit);
474    deserialized_to!(enum_unit_for_attr: Enum = "@Attr" => Enum::Attr);
475    err!(enum_newtype: Enum = "Newtype"
476        => Unsupported("enum newtype variants are not supported as an XML names"));
477    err!(enum_tuple: Enum = "Tuple"
478        => Unsupported("enum tuple variants are not supported as an XML names"));
479    err!(enum_struct: Enum = "Struct"
480        => Unsupported("enum struct variants are not supported as an XML names"));
481
482    // Field identifiers cannot be serialized, and IgnoredAny represented _something_
483    // which is not concrete
484    deserialized_to_only!(identifier: Id = "Field" => Id::Field);
485    deserialized_to_only!(ignored_any: Any = "any-name" => Any(IgnoredAny));
486}