quick_xml/events/
mod.rs

1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::ops::Deref;
45use std::str::from_utf8;
46
47use crate::encoding::Decoder;
48use crate::errors::{Error, Result};
49use crate::escape::{escape, partial_escape, unescape_with};
50use crate::name::{LocalName, QName};
51use crate::reader::is_whitespace;
52use crate::utils::write_cow_string;
53#[cfg(feature = "serialize")]
54use crate::utils::CowRef;
55use attributes::{Attribute, Attributes};
56use std::mem::replace;
57
58/// Opening tag data (`Event::Start`), with optional attributes.
59///
60/// `<name attr="value">`.
61///
62/// The name can be accessed using the [`name`] or [`local_name`] methods.
63/// An iterator over the attributes is returned by the [`attributes`] method.
64///
65/// [`name`]: Self::name
66/// [`local_name`]: Self::local_name
67/// [`attributes`]: Self::attributes
68#[derive(Clone, Eq, PartialEq)]
69pub struct BytesStart<'a> {
70    /// content of the element, before any utf8 conversion
71    pub(crate) buf: Cow<'a, [u8]>,
72    /// end of the element name, the name starts at that the start of `buf`
73    pub(crate) name_len: usize,
74}
75
76impl<'a> BytesStart<'a> {
77    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
78    #[inline]
79    pub(crate) fn wrap(content: &'a [u8], name_len: usize) -> Self {
80        BytesStart {
81            buf: Cow::Borrowed(content),
82            name_len,
83        }
84    }
85
86    /// Creates a new `BytesStart` from the given name.
87    ///
88    /// # Warning
89    ///
90    /// `name` must be a valid name.
91    #[inline]
92    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
93        let buf = str_cow_to_bytes(name);
94        BytesStart {
95            name_len: buf.len(),
96            buf,
97        }
98    }
99
100    /// Creates a new `BytesStart` from the given content (name + attributes).
101    ///
102    /// # Warning
103    ///
104    /// `&content[..name_len]` must be a valid name, and the remainder of `content`
105    /// must be correctly-formed attributes. Neither are checked, it is possible
106    /// to generate invalid XML if `content` or `name_len` are incorrect.
107    #[inline]
108    pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
109        BytesStart {
110            buf: str_cow_to_bytes(content),
111            name_len,
112        }
113    }
114
115    /// Converts the event into an owned event.
116    pub fn into_owned(self) -> BytesStart<'static> {
117        BytesStart {
118            buf: Cow::Owned(self.buf.into_owned()),
119            name_len: self.name_len,
120        }
121    }
122
123    /// Converts the event into an owned event without taking ownership of Event
124    pub fn to_owned(&self) -> BytesStart<'static> {
125        BytesStart {
126            buf: Cow::Owned(self.buf.clone().into_owned()),
127            name_len: self.name_len,
128        }
129    }
130
131    /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
132    ///
133    /// # Example
134    ///
135    /// ```
136    /// use quick_xml::events::{BytesStart, Event};
137    /// # use quick_xml::writer::Writer;
138    /// # use quick_xml::Error;
139    ///
140    /// struct SomeStruct<'a> {
141    ///     attrs: BytesStart<'a>,
142    ///     // ...
143    /// }
144    /// # impl<'a> SomeStruct<'a> {
145    /// # fn example(&self) -> Result<(), Error> {
146    /// # let mut writer = Writer::new(Vec::new());
147    ///
148    /// writer.write_event(Event::Start(self.attrs.borrow()))?;
149    /// // ...
150    /// writer.write_event(Event::End(self.attrs.to_end()))?;
151    /// # Ok(())
152    /// # }}
153    /// ```
154    ///
155    /// [`to_end`]: Self::to_end
156    pub fn borrow(&self) -> BytesStart {
157        BytesStart {
158            buf: Cow::Borrowed(&self.buf),
159            name_len: self.name_len,
160        }
161    }
162
163    /// Creates new paired close tag
164    pub fn to_end(&self) -> BytesEnd {
165        BytesEnd::wrap(self.name().into_inner().into())
166    }
167
168    /// Gets the undecoded raw tag name, as present in the input stream.
169    #[inline]
170    pub fn name(&self) -> QName {
171        QName(&self.buf[..self.name_len])
172    }
173
174    /// Gets the undecoded raw local tag name (excluding namespace) as present
175    /// in the input stream.
176    ///
177    /// All content up to and including the first `:` character is removed from the tag name.
178    #[inline]
179    pub fn local_name(&self) -> LocalName {
180        self.name().into()
181    }
182
183    /// Edit the name of the BytesStart in-place
184    ///
185    /// # Warning
186    ///
187    /// `name` must be a valid name.
188    pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
189        let bytes = self.buf.to_mut();
190        bytes.splice(..self.name_len, name.iter().cloned());
191        self.name_len = name.len();
192        self
193    }
194
195    /// Gets the undecoded raw tag name, as present in the input stream, which
196    /// is borrowed either to the input, or to the event.
197    ///
198    /// # Lifetimes
199    ///
200    /// - `'a`: Lifetime of the input data from which this event is borrow
201    /// - `'e`: Lifetime of the concrete event instance
202    // TODO: We should made this is a part of public API, but with safe wrapped for a name
203    #[cfg(feature = "serialize")]
204    pub(crate) fn raw_name<'e>(&'e self) -> CowRef<'a, 'e, [u8]> {
205        match self.buf {
206            Cow::Borrowed(b) => CowRef::Input(&b[..self.name_len]),
207            Cow::Owned(ref o) => CowRef::Slice(&o[..self.name_len]),
208        }
209    }
210}
211
212/// Attribute-related methods
213impl<'a> BytesStart<'a> {
214    /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
215    ///
216    /// The yielded items must be convertible to [`Attribute`] using `Into`.
217    pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
218    where
219        I: IntoIterator,
220        I::Item: Into<Attribute<'b>>,
221    {
222        self.extend_attributes(attributes);
223        self
224    }
225
226    /// Add additional attributes to this tag using an iterator.
227    ///
228    /// The yielded items must be convertible to [`Attribute`] using `Into`.
229    pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
230    where
231        I: IntoIterator,
232        I::Item: Into<Attribute<'b>>,
233    {
234        for attr in attributes {
235            self.push_attribute(attr);
236        }
237        self
238    }
239
240    /// Adds an attribute to this element.
241    pub fn push_attribute<'b, A>(&mut self, attr: A)
242    where
243        A: Into<Attribute<'b>>,
244    {
245        let a = attr.into();
246        let bytes = self.buf.to_mut();
247        bytes.push(b' ');
248        bytes.extend_from_slice(a.key.as_ref());
249        bytes.extend_from_slice(b"=\"");
250        bytes.extend_from_slice(a.value.as_ref());
251        bytes.push(b'"');
252    }
253
254    /// Remove all attributes from the ByteStart
255    pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
256        self.buf.to_mut().truncate(self.name_len);
257        self
258    }
259
260    /// Returns an iterator over the attributes of this tag.
261    pub fn attributes(&self) -> Attributes {
262        Attributes::wrap(&self.buf, self.name_len, false)
263    }
264
265    /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
266    pub fn html_attributes(&self) -> Attributes {
267        Attributes::wrap(&self.buf, self.name_len, true)
268    }
269
270    /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
271    /// including the whitespace after the tag name if there is any.
272    #[inline]
273    pub fn attributes_raw(&self) -> &[u8] {
274        &self.buf[self.name_len..]
275    }
276
277    /// Try to get an attribute
278    pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
279        &'a self,
280        attr_name: N,
281    ) -> Result<Option<Attribute<'a>>> {
282        for a in self.attributes().with_checks(false) {
283            let a = a?;
284            if a.key.as_ref() == attr_name.as_ref() {
285                return Ok(Some(a));
286            }
287        }
288        Ok(None)
289    }
290}
291
292impl<'a> Debug for BytesStart<'a> {
293    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
294        write!(f, "BytesStart {{ buf: ")?;
295        write_cow_string(f, &self.buf)?;
296        write!(f, ", name_len: {} }}", self.name_len)
297    }
298}
299
300impl<'a> Deref for BytesStart<'a> {
301    type Target = [u8];
302
303    fn deref(&self) -> &[u8] {
304        &self.buf
305    }
306}
307
308#[cfg(feature = "arbitrary")]
309impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
310    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
311        let s = <&str>::arbitrary(u)?;
312        if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
313            return Err(arbitrary::Error::IncorrectFormat);
314        }
315        let mut result = Self::new(s);
316        result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter());
317        Ok(result)
318    }
319
320    fn size_hint(depth: usize) -> (usize, Option<usize>) {
321        return <&str as arbitrary::Arbitrary>::size_hint(depth);
322    }
323}
324////////////////////////////////////////////////////////////////////////////////////////////////////
325
326/// An XML declaration (`Event::Decl`).
327///
328/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
329#[derive(Clone, Debug, Eq, PartialEq)]
330pub struct BytesDecl<'a> {
331    content: BytesStart<'a>,
332}
333
334impl<'a> BytesDecl<'a> {
335    /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
336    /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
337    /// attribute.
338    ///
339    /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
340    /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
341    /// the double quote character is not allowed in any of the attribute values.
342    pub fn new(
343        version: &str,
344        encoding: Option<&str>,
345        standalone: Option<&str>,
346    ) -> BytesDecl<'static> {
347        // Compute length of the buffer based on supplied attributes
348        // ' encoding=""'   => 12
349        let encoding_attr_len = if let Some(xs) = encoding {
350            12 + xs.len()
351        } else {
352            0
353        };
354        // ' standalone=""' => 14
355        let standalone_attr_len = if let Some(xs) = standalone {
356            14 + xs.len()
357        } else {
358            0
359        };
360        // 'xml version=""' => 14
361        let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
362
363        buf.push_str("xml version=\"");
364        buf.push_str(version);
365
366        if let Some(encoding_val) = encoding {
367            buf.push_str("\" encoding=\"");
368            buf.push_str(encoding_val);
369        }
370
371        if let Some(standalone_val) = standalone {
372            buf.push_str("\" standalone=\"");
373            buf.push_str(standalone_val);
374        }
375        buf.push('"');
376
377        BytesDecl {
378            content: BytesStart::from_content(buf, 3),
379        }
380    }
381
382    /// Creates a `BytesDecl` from a `BytesStart`
383    pub fn from_start(start: BytesStart<'a>) -> Self {
384        Self { content: start }
385    }
386
387    /// Gets xml version, excluding quotes (`'` or `"`).
388    ///
389    /// According to the [grammar], the version *must* be the first thing in the declaration.
390    /// This method tries to extract the first thing in the declaration and return it.
391    /// In case of multiple attributes value of the first one is returned.
392    ///
393    /// If version is missed in the declaration, or the first thing is not a version,
394    /// [`Error::XmlDeclWithoutVersion`] will be returned.
395    ///
396    /// # Examples
397    ///
398    /// ```
399    /// use quick_xml::Error;
400    /// use quick_xml::events::{BytesDecl, BytesStart};
401    ///
402    /// // <?xml version='1.1'?>
403    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
404    /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
405    ///
406    /// // <?xml version='1.0' version='1.1'?>
407    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
408    /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
409    ///
410    /// // <?xml encoding='utf-8'?>
411    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
412    /// match decl.version() {
413    ///     Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding"),
414    ///     _ => assert!(false),
415    /// }
416    ///
417    /// // <?xml encoding='utf-8' version='1.1'?>
418    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
419    /// match decl.version() {
420    ///     Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding"),
421    ///     _ => assert!(false),
422    /// }
423    ///
424    /// // <?xml?>
425    /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
426    /// match decl.version() {
427    ///     Err(Error::XmlDeclWithoutVersion(None)) => {},
428    ///     _ => assert!(false),
429    /// }
430    /// ```
431    ///
432    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
433    pub fn version(&self) -> Result<Cow<[u8]>> {
434        // The version *must* be the first thing in the declaration.
435        match self.content.attributes().with_checks(false).next() {
436            Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
437            // first attribute was not "version"
438            Some(Ok(a)) => {
439                let found = from_utf8(a.key.as_ref())?.to_string();
440                Err(Error::XmlDeclWithoutVersion(Some(found)))
441            }
442            // error parsing attributes
443            Some(Err(e)) => Err(e.into()),
444            // no attributes
445            None => Err(Error::XmlDeclWithoutVersion(None)),
446        }
447    }
448
449    /// Gets xml encoding, excluding quotes (`'` or `"`).
450    ///
451    /// Although according to the [grammar] encoding must appear before `"standalone"`
452    /// and after `"version"`, this method does not check that. The first occurrence
453    /// of the attribute will be returned even if there are several. Also, method does
454    /// not restrict symbols that can forming the encoding, so the returned encoding
455    /// name may not correspond to the grammar.
456    ///
457    /// # Examples
458    ///
459    /// ```
460    /// use std::borrow::Cow;
461    /// use quick_xml::Error;
462    /// use quick_xml::events::{BytesDecl, BytesStart};
463    ///
464    /// // <?xml version='1.1'?>
465    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
466    /// assert!(decl.encoding().is_none());
467    ///
468    /// // <?xml encoding='utf-8'?>
469    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
470    /// match decl.encoding() {
471    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
472    ///     _ => assert!(false),
473    /// }
474    ///
475    /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
476    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
477    /// match decl.encoding() {
478    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
479    ///     _ => assert!(false),
480    /// }
481    /// ```
482    ///
483    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
484    pub fn encoding(&self) -> Option<Result<Cow<[u8]>>> {
485        self.content
486            .try_get_attribute("encoding")
487            .map(|a| a.map(|a| a.value))
488            .transpose()
489    }
490
491    /// Gets xml standalone, excluding quotes (`'` or `"`).
492    ///
493    /// Although according to the [grammar] standalone flag must appear after `"version"`
494    /// and `"encoding"`, this method does not check that. The first occurrence of the
495    /// attribute will be returned even if there are several. Also, method does not
496    /// restrict symbols that can forming the value, so the returned flag name may not
497    /// correspond to the grammar.
498    ///
499    /// # Examples
500    ///
501    /// ```
502    /// use std::borrow::Cow;
503    /// use quick_xml::Error;
504    /// use quick_xml::events::{BytesDecl, BytesStart};
505    ///
506    /// // <?xml version='1.1'?>
507    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
508    /// assert!(decl.standalone().is_none());
509    ///
510    /// // <?xml standalone='yes'?>
511    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
512    /// match decl.standalone() {
513    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
514    ///     _ => assert!(false),
515    /// }
516    ///
517    /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
518    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
519    /// match decl.standalone() {
520    ///     Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
521    ///     _ => assert!(false),
522    /// }
523    /// ```
524    ///
525    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
526    pub fn standalone(&self) -> Option<Result<Cow<[u8]>>> {
527        self.content
528            .try_get_attribute("standalone")
529            .map(|a| a.map(|a| a.value))
530            .transpose()
531    }
532
533    /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
534    /// algorithm.
535    ///
536    /// If encoding in not known, or `encoding` key was not found, returns `None`.
537    /// In case of duplicated `encoding` key, encoding, corresponding to the first
538    /// one, is returned.
539    #[cfg(feature = "encoding")]
540    pub fn encoder(&self) -> Option<&'static Encoding> {
541        self.encoding()
542            .and_then(|e| e.ok())
543            .and_then(|e| Encoding::for_label(&e))
544    }
545
546    /// Converts the event into an owned event.
547    pub fn into_owned(self) -> BytesDecl<'static> {
548        BytesDecl {
549            content: self.content.into_owned(),
550        }
551    }
552
553    /// Converts the event into a borrowed event.
554    #[inline]
555    pub fn borrow(&self) -> BytesDecl {
556        BytesDecl {
557            content: self.content.borrow(),
558        }
559    }
560}
561
562impl<'a> Deref for BytesDecl<'a> {
563    type Target = [u8];
564
565    fn deref(&self) -> &[u8] {
566        &self.content
567    }
568}
569
570#[cfg(feature = "arbitrary")]
571impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
572    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
573        Ok(Self::new(
574            <&str>::arbitrary(u)?,
575            Option::<&str>::arbitrary(u)?,
576            Option::<&str>::arbitrary(u)?,
577        ))
578    }
579
580    fn size_hint(depth: usize) -> (usize, Option<usize>) {
581        return <&str as arbitrary::Arbitrary>::size_hint(depth);
582    }
583}
584
585////////////////////////////////////////////////////////////////////////////////////////////////////
586
587/// A struct to manage `Event::End` events
588#[derive(Clone, Eq, PartialEq)]
589pub struct BytesEnd<'a> {
590    name: Cow<'a, [u8]>,
591}
592
593impl<'a> BytesEnd<'a> {
594    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
595    #[inline]
596    pub(crate) fn wrap(name: Cow<'a, [u8]>) -> Self {
597        BytesEnd { name }
598    }
599
600    /// Creates a new `BytesEnd` borrowing a slice.
601    ///
602    /// # Warning
603    ///
604    /// `name` must be a valid name.
605    #[inline]
606    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
607        Self::wrap(str_cow_to_bytes(name))
608    }
609
610    /// Converts the event into an owned event.
611    pub fn into_owned(self) -> BytesEnd<'static> {
612        BytesEnd {
613            name: Cow::Owned(self.name.into_owned()),
614        }
615    }
616
617    /// Converts the event into a borrowed event.
618    #[inline]
619    pub fn borrow(&self) -> BytesEnd {
620        BytesEnd {
621            name: Cow::Borrowed(&self.name),
622        }
623    }
624
625    /// Gets the undecoded raw tag name, as present in the input stream.
626    #[inline]
627    pub fn name(&self) -> QName {
628        QName(&self.name)
629    }
630
631    /// Gets the undecoded raw local tag name (excluding namespace) as present
632    /// in the input stream.
633    ///
634    /// All content up to and including the first `:` character is removed from the tag name.
635    #[inline]
636    pub fn local_name(&self) -> LocalName {
637        self.name().into()
638    }
639}
640
641impl<'a> Debug for BytesEnd<'a> {
642    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
643        write!(f, "BytesEnd {{ name: ")?;
644        write_cow_string(f, &self.name)?;
645        write!(f, " }}")
646    }
647}
648
649impl<'a> Deref for BytesEnd<'a> {
650    type Target = [u8];
651
652    fn deref(&self) -> &[u8] {
653        &self.name
654    }
655}
656
657#[cfg(feature = "arbitrary")]
658impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
659    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
660        Ok(Self::new(<&str>::arbitrary(u)?))
661    }
662    fn size_hint(depth: usize) -> (usize, Option<usize>) {
663        return <&str as arbitrary::Arbitrary>::size_hint(depth);
664    }
665}
666
667////////////////////////////////////////////////////////////////////////////////////////////////////
668
669/// Data from various events (most notably, `Event::Text`) that stored in XML
670/// in escaped form. Internally data is stored in escaped form
671#[derive(Clone, Eq, PartialEq)]
672pub struct BytesText<'a> {
673    /// Escaped then encoded content of the event. Content is encoded in the XML
674    /// document encoding when event comes from the reader and should be in the
675    /// document encoding when event passed to the writer
676    content: Cow<'a, [u8]>,
677    /// Encoding in which the `content` is stored inside the event
678    decoder: Decoder,
679}
680
681impl<'a> BytesText<'a> {
682    /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
683    #[inline]
684    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
685        Self {
686            content: content.into(),
687            decoder,
688        }
689    }
690
691    /// Creates a new `BytesText` from an escaped string.
692    #[inline]
693    pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
694        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
695    }
696
697    /// Creates a new `BytesText` from a string. The string is expected not to
698    /// be escaped.
699    #[inline]
700    pub fn new(content: &'a str) -> Self {
701        Self::from_escaped(escape(content))
702    }
703
704    /// Ensures that all data is owned to extend the object's lifetime if
705    /// necessary.
706    #[inline]
707    pub fn into_owned(self) -> BytesText<'static> {
708        BytesText {
709            content: self.content.into_owned().into(),
710            decoder: self.decoder,
711        }
712    }
713
714    /// Extracts the inner `Cow` from the `BytesText` event container.
715    #[inline]
716    pub fn into_inner(self) -> Cow<'a, [u8]> {
717        self.content
718    }
719
720    /// Converts the event into a borrowed event.
721    #[inline]
722    pub fn borrow(&self) -> BytesText {
723        BytesText {
724            content: Cow::Borrowed(&self.content),
725            decoder: self.decoder,
726        }
727    }
728
729    /// Decodes then unescapes the content of the event.
730    ///
731    /// This will allocate if the value contains any escape sequences or in
732    /// non-UTF-8 encoding.
733    pub fn unescape(&self) -> Result<Cow<'a, str>> {
734        self.unescape_with(|_| None)
735    }
736
737    /// Decodes then unescapes the content of the event with custom entities.
738    ///
739    /// This will allocate if the value contains any escape sequences or in
740    /// non-UTF-8 encoding.
741    pub fn unescape_with<'entity>(
742        &self,
743        resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
744    ) -> Result<Cow<'a, str>> {
745        let decoded = match &self.content {
746            Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
747            // Convert to owned, because otherwise Cow will be bound with wrong lifetime
748            Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(),
749        };
750
751        match unescape_with(&decoded, resolve_entity)? {
752            // Because result is borrowed, no replacements was done and we can use original string
753            Cow::Borrowed(_) => Ok(decoded),
754            Cow::Owned(s) => Ok(s.into()),
755        }
756    }
757
758    /// Removes leading XML whitespace bytes from text content.
759    ///
760    /// Returns `true` if content is empty after that
761    pub fn inplace_trim_start(&mut self) -> bool {
762        self.content = trim_cow(
763            replace(&mut self.content, Cow::Borrowed(b"")),
764            trim_xml_start,
765        );
766        self.content.is_empty()
767    }
768
769    /// Removes trailing XML whitespace bytes from text content.
770    ///
771    /// Returns `true` if content is empty after that
772    pub fn inplace_trim_end(&mut self) -> bool {
773        self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
774        self.content.is_empty()
775    }
776}
777
778impl<'a> Debug for BytesText<'a> {
779    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
780        write!(f, "BytesText {{ content: ")?;
781        write_cow_string(f, &self.content)?;
782        write!(f, " }}")
783    }
784}
785
786impl<'a> Deref for BytesText<'a> {
787    type Target = [u8];
788
789    fn deref(&self) -> &[u8] {
790        &self.content
791    }
792}
793
794#[cfg(feature = "arbitrary")]
795impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
796    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
797        let s = <&str>::arbitrary(u)?;
798        if !s.chars().all(char::is_alphanumeric) {
799            return Err(arbitrary::Error::IncorrectFormat);
800        }
801        Ok(Self::new(s))
802    }
803
804    fn size_hint(depth: usize) -> (usize, Option<usize>) {
805        return <&str as arbitrary::Arbitrary>::size_hint(depth);
806    }
807}
808
809////////////////////////////////////////////////////////////////////////////////////////////////////
810
811/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
812/// [convert](Self::escape) it to [`BytesText`]
813#[derive(Clone, Eq, PartialEq)]
814pub struct BytesCData<'a> {
815    content: Cow<'a, [u8]>,
816    /// Encoding in which the `content` is stored inside the event
817    decoder: Decoder,
818}
819
820impl<'a> BytesCData<'a> {
821    /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
822    #[inline]
823    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
824        Self {
825            content: content.into(),
826            decoder,
827        }
828    }
829
830    /// Creates a new `BytesCData` from a string.
831    ///
832    /// # Warning
833    ///
834    /// `content` must not contain the `]]>` sequence.
835    #[inline]
836    pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
837        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
838    }
839
840    /// Ensures that all data is owned to extend the object's lifetime if
841    /// necessary.
842    #[inline]
843    pub fn into_owned(self) -> BytesCData<'static> {
844        BytesCData {
845            content: self.content.into_owned().into(),
846            decoder: self.decoder,
847        }
848    }
849
850    /// Extracts the inner `Cow` from the `BytesCData` event container.
851    #[inline]
852    pub fn into_inner(self) -> Cow<'a, [u8]> {
853        self.content
854    }
855
856    /// Converts the event into a borrowed event.
857    #[inline]
858    pub fn borrow(&self) -> BytesCData {
859        BytesCData {
860            content: Cow::Borrowed(&self.content),
861            decoder: self.decoder,
862        }
863    }
864
865    /// Converts this CDATA content to an escaped version, that can be written
866    /// as an usual text in XML.
867    ///
868    /// This function performs following replacements:
869    ///
870    /// | Character | Replacement
871    /// |-----------|------------
872    /// | `<`       | `&lt;`
873    /// | `>`       | `&gt;`
874    /// | `&`       | `&amp;`
875    /// | `'`       | `&apos;`
876    /// | `"`       | `&quot;`
877    pub fn escape(self) -> Result<BytesText<'a>> {
878        let decoded = self.decode()?;
879        Ok(BytesText::wrap(
880            match escape(&decoded) {
881                // Because result is borrowed, no replacements was done and we can use original content
882                Cow::Borrowed(_) => self.content,
883                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
884            },
885            Decoder::utf8(),
886        ))
887    }
888
889    /// Converts this CDATA content to an escaped version, that can be written
890    /// as an usual text in XML.
891    ///
892    /// In XML text content, it is allowed (though not recommended) to leave
893    /// the quote special characters `"` and `'` unescaped.
894    ///
895    /// This function performs following replacements:
896    ///
897    /// | Character | Replacement
898    /// |-----------|------------
899    /// | `<`       | `&lt;`
900    /// | `>`       | `&gt;`
901    /// | `&`       | `&amp;`
902    pub fn partial_escape(self) -> Result<BytesText<'a>> {
903        let decoded = self.decode()?;
904        Ok(BytesText::wrap(
905            match partial_escape(&decoded) {
906                // Because result is borrowed, no replacements was done and we can use original content
907                Cow::Borrowed(_) => self.content,
908                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
909            },
910            Decoder::utf8(),
911        ))
912    }
913
914    /// Gets content of this text buffer in the specified encoding
915    pub(crate) fn decode(&self) -> Result<Cow<'a, str>> {
916        Ok(match &self.content {
917            Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
918            // Convert to owned, because otherwise Cow will be bound with wrong lifetime
919            Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(),
920        })
921    }
922}
923
924impl<'a> Debug for BytesCData<'a> {
925    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
926        write!(f, "BytesCData {{ content: ")?;
927        write_cow_string(f, &self.content)?;
928        write!(f, " }}")
929    }
930}
931
932impl<'a> Deref for BytesCData<'a> {
933    type Target = [u8];
934
935    fn deref(&self) -> &[u8] {
936        &self.content
937    }
938}
939
940#[cfg(feature = "arbitrary")]
941impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
942    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
943        Ok(Self::new(<&str>::arbitrary(u)?))
944    }
945    fn size_hint(depth: usize) -> (usize, Option<usize>) {
946        return <&str as arbitrary::Arbitrary>::size_hint(depth);
947    }
948}
949
950////////////////////////////////////////////////////////////////////////////////////////////////////
951
952/// Event emitted by [`Reader::read_event_into`].
953///
954/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
955#[derive(Clone, Debug, Eq, PartialEq)]
956#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
957pub enum Event<'a> {
958    /// Start tag (with attributes) `<tag attr="value">`.
959    Start(BytesStart<'a>),
960    /// End tag `</tag>`.
961    End(BytesEnd<'a>),
962    /// Empty element tag (with attributes) `<tag attr="value" />`.
963    Empty(BytesStart<'a>),
964    /// Escaped character data between tags.
965    Text(BytesText<'a>),
966    /// Unescaped character data stored in `<![CDATA[...]]>`.
967    CData(BytesCData<'a>),
968    /// Comment `<!-- ... -->`.
969    Comment(BytesText<'a>),
970    /// XML declaration `<?xml ...?>`.
971    Decl(BytesDecl<'a>),
972    /// Processing instruction `<?...?>`.
973    PI(BytesText<'a>),
974    /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
975    DocType(BytesText<'a>),
976    /// End of XML document.
977    Eof,
978}
979
980impl<'a> Event<'a> {
981    /// Converts the event to an owned version, untied to the lifetime of
982    /// buffer used when reading but incurring a new, separate allocation.
983    pub fn into_owned(self) -> Event<'static> {
984        match self {
985            Event::Start(e) => Event::Start(e.into_owned()),
986            Event::End(e) => Event::End(e.into_owned()),
987            Event::Empty(e) => Event::Empty(e.into_owned()),
988            Event::Text(e) => Event::Text(e.into_owned()),
989            Event::Comment(e) => Event::Comment(e.into_owned()),
990            Event::CData(e) => Event::CData(e.into_owned()),
991            Event::Decl(e) => Event::Decl(e.into_owned()),
992            Event::PI(e) => Event::PI(e.into_owned()),
993            Event::DocType(e) => Event::DocType(e.into_owned()),
994            Event::Eof => Event::Eof,
995        }
996    }
997
998    /// Converts the event into a borrowed event.
999    #[inline]
1000    pub fn borrow(&self) -> Event {
1001        match self {
1002            Event::Start(e) => Event::Start(e.borrow()),
1003            Event::End(e) => Event::End(e.borrow()),
1004            Event::Empty(e) => Event::Empty(e.borrow()),
1005            Event::Text(e) => Event::Text(e.borrow()),
1006            Event::Comment(e) => Event::Comment(e.borrow()),
1007            Event::CData(e) => Event::CData(e.borrow()),
1008            Event::Decl(e) => Event::Decl(e.borrow()),
1009            Event::PI(e) => Event::PI(e.borrow()),
1010            Event::DocType(e) => Event::DocType(e.borrow()),
1011            Event::Eof => Event::Eof,
1012        }
1013    }
1014}
1015
1016impl<'a> Deref for Event<'a> {
1017    type Target = [u8];
1018
1019    fn deref(&self) -> &[u8] {
1020        match *self {
1021            Event::Start(ref e) | Event::Empty(ref e) => e,
1022            Event::End(ref e) => e,
1023            Event::Text(ref e) => e,
1024            Event::Decl(ref e) => e,
1025            Event::PI(ref e) => e,
1026            Event::CData(ref e) => e,
1027            Event::Comment(ref e) => e,
1028            Event::DocType(ref e) => e,
1029            Event::Eof => &[],
1030        }
1031    }
1032}
1033
1034impl<'a> AsRef<Event<'a>> for Event<'a> {
1035    fn as_ref(&self) -> &Event<'a> {
1036        self
1037    }
1038}
1039
1040////////////////////////////////////////////////////////////////////////////////////////////////////
1041
1042#[inline]
1043fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1044    match content.into() {
1045        Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1046        Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1047    }
1048}
1049
1050/// Returns a byte slice with leading XML whitespace bytes removed.
1051///
1052/// 'Whitespace' refers to the definition used by [`is_whitespace`].
1053const fn trim_xml_start(mut bytes: &[u8]) -> &[u8] {
1054    // Note: A pattern matching based approach (instead of indexing) allows
1055    // making the function const.
1056    while let [first, rest @ ..] = bytes {
1057        if is_whitespace(*first) {
1058            bytes = rest;
1059        } else {
1060            break;
1061        }
1062    }
1063    bytes
1064}
1065
1066/// Returns a byte slice with trailing XML whitespace bytes removed.
1067///
1068/// 'Whitespace' refers to the definition used by [`is_whitespace`].
1069const fn trim_xml_end(mut bytes: &[u8]) -> &[u8] {
1070    // Note: A pattern matching based approach (instead of indexing) allows
1071    // making the function const.
1072    while let [rest @ .., last] = bytes {
1073        if is_whitespace(*last) {
1074            bytes = rest;
1075        } else {
1076            break;
1077        }
1078    }
1079    bytes
1080}
1081
1082fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1083where
1084    F: FnOnce(&[u8]) -> &[u8],
1085{
1086    match value {
1087        Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1088        Cow::Owned(mut bytes) => {
1089            let trimmed = trim(&bytes);
1090            if trimmed.len() != bytes.len() {
1091                bytes = trimmed.to_vec();
1092            }
1093            Cow::Owned(bytes)
1094        }
1095    }
1096}
1097
1098#[cfg(test)]
1099mod test {
1100    use super::*;
1101    use pretty_assertions::assert_eq;
1102
1103    #[test]
1104    fn bytestart_create() {
1105        let b = BytesStart::new("test");
1106        assert_eq!(b.len(), 4);
1107        assert_eq!(b.name(), QName(b"test"));
1108    }
1109
1110    #[test]
1111    fn bytestart_set_name() {
1112        let mut b = BytesStart::new("test");
1113        assert_eq!(b.len(), 4);
1114        assert_eq!(b.name(), QName(b"test"));
1115        assert_eq!(b.attributes_raw(), b"");
1116        b.push_attribute(("x", "a"));
1117        assert_eq!(b.len(), 10);
1118        assert_eq!(b.attributes_raw(), b" x=\"a\"");
1119        b.set_name(b"g");
1120        assert_eq!(b.len(), 7);
1121        assert_eq!(b.name(), QName(b"g"));
1122    }
1123
1124    #[test]
1125    fn bytestart_clear_attributes() {
1126        let mut b = BytesStart::new("test");
1127        b.push_attribute(("x", "y\"z"));
1128        b.push_attribute(("x", "y\"z"));
1129        b.clear_attributes();
1130        assert!(b.attributes().next().is_none());
1131        assert_eq!(b.len(), 4);
1132        assert_eq!(b.name(), QName(b"test"));
1133    }
1134}