1use crate::unescape::unescape;
7use std::borrow::Cow;
8use std::convert::TryFrom;
9use std::error::Error;
10use std::fmt::{Display, Formatter};
11use xmlparser::{ElementEnd, Token, Tokenizer};
12
13pub type Depth = usize;
14
15#[derive(Debug)]
19enum XmlDecodeErrorKind {
20 InvalidXml(xmlparser::Error),
21 InvalidEscape { esc: String },
22 Custom(Cow<'static, str>),
23 Unhandled(Box<dyn std::error::Error + Send + Sync + 'static>),
24}
25
26#[derive(Debug)]
27pub struct XmlDecodeError {
28 kind: XmlDecodeErrorKind,
29}
30
31impl Display for XmlDecodeError {
32 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
33 match &self.kind {
34 XmlDecodeErrorKind::InvalidXml(_) => write!(f, "XML parse error"),
35 XmlDecodeErrorKind::InvalidEscape { esc } => write!(f, "invalid XML escape: {}", esc),
36 XmlDecodeErrorKind::Custom(msg) => write!(f, "error parsing XML: {}", msg),
37 XmlDecodeErrorKind::Unhandled(_) => write!(f, "error parsing XML"),
38 }
39 }
40}
41
42impl Error for XmlDecodeError {
43 fn source(&self) -> Option<&(dyn Error + 'static)> {
44 match &self.kind {
45 XmlDecodeErrorKind::InvalidXml(source) => Some(source as _),
46 XmlDecodeErrorKind::Unhandled(source) => Some(source.as_ref() as _),
47 XmlDecodeErrorKind::InvalidEscape { .. } | XmlDecodeErrorKind::Custom(..) => None,
48 }
49 }
50}
51
52impl XmlDecodeError {
53 pub(crate) fn invalid_xml(error: xmlparser::Error) -> Self {
54 Self {
55 kind: XmlDecodeErrorKind::InvalidXml(error),
56 }
57 }
58
59 pub(crate) fn invalid_escape(esc: impl Into<String>) -> Self {
60 Self {
61 kind: XmlDecodeErrorKind::InvalidEscape { esc: esc.into() },
62 }
63 }
64
65 pub fn custom(msg: impl Into<Cow<'static, str>>) -> Self {
66 Self {
67 kind: XmlDecodeErrorKind::Custom(msg.into()),
68 }
69 }
70
71 pub fn unhandled(error: impl Into<Box<dyn Error + Send + Sync + 'static>>) -> Self {
72 Self {
73 kind: XmlDecodeErrorKind::Unhandled(error.into()),
74 }
75 }
76}
77
78#[derive(PartialEq, Debug)]
79pub struct Name<'a> {
80 pub prefix: &'a str,
81 pub local: &'a str,
82}
83
84impl Name<'_> {
85 pub fn matches(&self, tag_name: &str) -> bool {
87 let split = tag_name.find(':');
88 match split {
89 None => tag_name == self.local,
90 Some(idx) => {
91 let (prefix, local) = tag_name.split_at(idx);
92 let local = &local[1..];
93 self.local == local && self.prefix == prefix
94 }
95 }
96 }
97}
98
99#[derive(Debug, PartialEq)]
100pub struct Attr<'a> {
101 name: Name<'a>,
102 value: Cow<'a, str>,
104}
105
106#[derive(Debug, PartialEq)]
107pub struct StartEl<'a> {
108 name: Name<'a>,
109 attributes: Vec<Attr<'a>>,
110 closed: bool,
111 depth: Depth,
112}
113
114impl<'a> StartEl<'a> {
122 pub fn depth(&self) -> Depth {
123 self.depth
124 }
125
126 fn new(local: &'a str, prefix: &'a str, depth: Depth) -> Self {
127 Self {
128 name: Name { prefix, local },
129 attributes: vec![],
130 closed: false,
131 depth,
132 }
133 }
134
135 pub fn attr<'b>(&'b self, key: &'b str) -> Option<&'b str> {
139 self.attributes
140 .iter()
141 .find(|attr| attr.name.matches(key))
142 .map(|attr| attr.value.as_ref())
143 }
144
145 pub fn matches(&self, pat: &str) -> bool {
148 self.name.matches(pat)
149 }
150
151 pub fn local(&self) -> &str {
158 self.name.local
159 }
160
161 pub fn prefix(&self) -> &str {
167 self.name.prefix
168 }
169
170 fn end_el(&self, el: ElementEnd<'_>, depth: Depth) -> bool {
172 if depth != self.depth {
173 return false;
174 }
175 match el {
176 ElementEnd::Open => false,
177 ElementEnd::Close(prefix, local) => {
178 prefix.as_str() == self.name.prefix && local.as_str() == self.name.local
179 }
180 ElementEnd::Empty => false,
181 }
182 }
183}
184
185pub struct Document<'a> {
190 tokenizer: Tokenizer<'a>,
191 depth: Depth,
192}
193
194impl<'a> TryFrom<&'a [u8]> for Document<'a> {
195 type Error = XmlDecodeError;
196
197 fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> {
198 Ok(Document::new(
199 std::str::from_utf8(value).map_err(XmlDecodeError::unhandled)?,
200 ))
201 }
202}
203
204impl<'inp> Document<'inp> {
205 pub fn new(doc: &'inp str) -> Self {
206 Document {
207 tokenizer: Tokenizer::from(doc),
208 depth: 0,
209 }
210 }
211
212 pub fn next_start_element<'a>(&'a mut self) -> Option<StartEl<'inp>> {
228 next_start_element(self)
229 }
230
231 pub fn root_element<'a>(&'a mut self) -> Result<ScopedDecoder<'inp, 'a>, XmlDecodeError> {
233 let start_el = self
234 .next_start_element()
235 .ok_or_else(|| XmlDecodeError::custom("no root element"))?;
236 Ok(ScopedDecoder {
237 doc: self,
238 start_el,
239 terminated: false,
240 })
241 }
242
243 pub fn scoped_to<'a>(&'a mut self, start_el: StartEl<'inp>) -> ScopedDecoder<'inp, 'a> {
249 ScopedDecoder {
250 doc: self,
251 start_el,
252 terminated: false,
253 }
254 }
255}
256
257#[derive(Debug)]
260pub struct XmlToken<'inp>(Token<'inp>);
261
262impl<'inp> Iterator for Document<'inp> {
273 type Item = Result<(XmlToken<'inp>, Depth), XmlDecodeError>;
274 fn next<'a>(&'a mut self) -> Option<Result<(XmlToken<'inp>, Depth), XmlDecodeError>> {
275 let tok = self.tokenizer.next()?;
276 let tok = match tok {
277 Err(e) => return Some(Err(XmlDecodeError::invalid_xml(e))),
278 Ok(tok) => tok,
279 };
280 match tok {
282 Token::ElementEnd {
283 end: ElementEnd::Close(_, _),
284 ..
285 } => {
286 self.depth -= 1;
287 }
288 Token::ElementEnd {
289 end: ElementEnd::Empty,
290 ..
291 } => self.depth -= 1,
292 t @ Token::ElementStart { .. } => {
293 self.depth += 1;
294 return Some(Ok((XmlToken(t), self.depth - 1)));
297 }
298 _ => {}
299 }
300 Some(Ok((XmlToken(tok), self.depth)))
301 }
302}
303
304pub struct ScopedDecoder<'inp, 'a> {
309 doc: &'a mut Document<'inp>,
310 start_el: StartEl<'inp>,
311 terminated: bool,
312}
313
314impl Drop for ScopedDecoder<'_, '_> {
317 fn drop(&mut self) {
318 for _ in self {}
319 }
320}
321
322impl<'inp> ScopedDecoder<'inp, '_> {
323 pub fn start_el<'a>(&'a self) -> &'a StartEl<'inp> {
325 &self.start_el
326 }
327
328 pub fn next_tag<'a>(&'a mut self) -> Option<ScopedDecoder<'inp, 'a>> {
343 let next_tag = next_start_element(self)?;
344 Some(self.nested_decoder(next_tag))
345 }
346
347 fn nested_decoder<'a>(&'a mut self, start_el: StartEl<'inp>) -> ScopedDecoder<'inp, 'a> {
348 ScopedDecoder {
349 doc: self.doc,
350 start_el,
351 terminated: false,
352 }
353 }
354}
355
356impl<'inp, 'a> Iterator for ScopedDecoder<'inp, 'a> {
357 type Item = Result<(XmlToken<'inp>, Depth), XmlDecodeError>;
358
359 fn next(&mut self) -> Option<Self::Item> {
360 if self.start_el.closed {
361 self.terminated = true;
362 }
363 if self.terminated {
364 return None;
365 }
366 let (tok, depth) = match self.doc.next() {
367 Some(Ok((tok, depth))) => (tok, depth),
368 other => return other,
369 };
370
371 match tok.0 {
372 Token::ElementEnd { end, .. } if self.start_el.end_el(end, depth) => {
373 self.terminated = true;
374 return None;
375 }
376 _ => {}
377 }
378 Some(Ok((tok, depth)))
379 }
380}
381
382fn next_start_element<'a, 'inp>(
384 tokens: &'a mut impl Iterator<Item = Result<(XmlToken<'inp>, Depth), XmlDecodeError>>,
385) -> Option<StartEl<'inp>> {
386 let mut out = StartEl::new("", "", 0);
387 loop {
388 match tokens.next()? {
389 Ok((XmlToken(Token::ElementStart { local, prefix, .. }), depth)) => {
390 out.name.local = local.as_str();
391 out.name.prefix = prefix.as_str();
392 out.depth = depth;
393 }
394 Ok((
395 XmlToken(Token::Attribute {
396 prefix,
397 local,
398 value,
399 ..
400 }),
401 _,
402 )) => out.attributes.push(Attr {
403 name: Name {
404 local: local.as_str(),
405 prefix: prefix.as_str(),
406 },
407 value: unescape(value.as_str()).ok()?,
408 }),
409 Ok((
410 XmlToken(Token::ElementEnd {
411 end: ElementEnd::Open,
412 ..
413 }),
414 _,
415 )) => break,
416 Ok((
417 XmlToken(Token::ElementEnd {
418 end: ElementEnd::Empty,
419 ..
420 }),
421 _,
422 )) => {
423 out.closed = true;
424 break;
425 }
426 _ => {}
427 }
428 }
429 Some(out)
430}
431
432pub fn try_data<'a, 'inp>(
437 tokens: &'a mut impl Iterator<Item = Result<(XmlToken<'inp>, Depth), XmlDecodeError>>,
438) -> Result<Cow<'inp, str>, XmlDecodeError> {
439 loop {
440 match tokens.next().map(|opt| opt.map(|opt| opt.0)) {
441 None => return Ok(Cow::Borrowed("")),
442 Some(Ok(XmlToken(Token::Text { text }))) => return unescape(text.as_str()),
443 Some(Ok(e @ XmlToken(Token::ElementStart { .. }))) => {
444 return Err(XmlDecodeError::custom(format!(
445 "looking for a data element, found: {:?}",
446 e
447 )))
448 }
449 Some(Err(e)) => return Err(e),
450 _ => {}
451 }
452 }
453}
454
455#[cfg(test)]
456mod test {
457 use crate::decode::{try_data, Attr, Depth, Document, Name, StartEl};
458
459 fn closed<'a>(local: &'a str, prefix: &'a str, depth: Depth) -> StartEl<'a> {
461 let mut s = StartEl::new(local, prefix, depth);
462 s.closed = true;
463 s
464 }
465
466 #[test]
467 fn scoped_tokens() {
468 let xml = r#"<Response><A></A></Response>"#;
469 let mut doc = Document::new(xml);
470 let mut root = doc.root_element().expect("valid document");
471 assert_eq!(root.start_el().local(), "Response");
472 assert_eq!(root.next_tag().expect("tag exists").start_el().local(), "A");
473 assert!(root.next_tag().is_none());
474 }
475
476 #[test]
477 fn handle_depth_properly() {
478 let xml = r#"<Response><Response></Response><A/></Response>"#;
479 let mut doc = Document::new(xml);
480 let mut scoped = doc.root_element().expect("valid document");
481 assert_eq!(
482 scoped.next_tag().unwrap().start_el(),
483 &StartEl::new("Response", "", 1)
484 );
485 let closed_a = closed("A", "", 1);
486 assert_eq!(scoped.next_tag().unwrap().start_el(), &closed_a);
487 assert!(scoped.next_tag().is_none())
488 }
489
490 #[test]
491 fn self_closing() {
492 let xml = r#"<Response/>"#;
493 let mut doc = Document::new(xml);
494 let mut scoped = doc.root_element().expect("valid doc");
495 assert!(scoped.start_el.closed);
496 assert!(scoped.next_tag().is_none())
497 }
498
499 #[test]
500 fn terminate_scope() {
501 let xml = r#"<Response><Struct><A></A><Also/></Struct><More/></Response>"#;
502 let mut doc = Document::new(xml);
503 let mut response_iter = doc.root_element().expect("valid doc");
504 let mut struct_iter = response_iter.next_tag().unwrap();
505 assert_eq!(
506 struct_iter.next_tag().as_ref().map(|t| t.start_el()),
507 Some(&StartEl::new("A", "", 2))
508 );
509 drop(struct_iter);
512 assert_eq!(
513 response_iter.next_tag().unwrap().start_el(),
514 &closed("More", "", 1)
515 );
516 }
517
518 #[test]
519 fn read_data_invalid() {
520 let xml = r#"<Response><A></A></Response>"#;
521 let mut doc = Document::new(xml);
522 let mut resp = doc.root_element().unwrap();
523 try_data(&mut resp).expect_err("no data");
524 }
525
526 #[test]
527 fn read_data() {
528 let xml = r#"<Response>hello</Response>"#;
529 let mut doc = Document::new(xml);
530 let mut scoped = doc.root_element().unwrap();
531 assert_eq!(try_data(&mut scoped).unwrap(), "hello");
532 }
533
534 #[test]
536 fn read_data_whitespace() {
537 let xml = r#"<Response> hello </Response>"#;
538 let mut doc = Document::new(xml);
539 let mut scoped = doc.root_element().unwrap();
540 assert_eq!(try_data(&mut scoped).unwrap(), " hello ");
541 }
542
543 #[test]
544 fn ignore_insignificant_whitespace() {
545 let xml = r#"<Response> <A> </A> </Response>"#;
546 let mut doc = Document::new(xml);
547 let mut resp = doc.root_element().unwrap();
548 let mut a = resp.next_tag().expect("should be a");
549 let data = try_data(&mut a).expect("valid");
550 assert_eq!(data, " ");
551 }
552
553 #[test]
554 fn read_attributes() {
555 let xml = r#"<Response xsi:type="CanonicalUser">hello</Response>"#;
556 let mut tokenizer = Document::new(xml);
557 let root = tokenizer.root_element().unwrap();
558
559 assert_eq!(
560 root.start_el().attributes,
561 vec![Attr {
562 name: Name {
563 prefix: "xsi",
564 local: "type"
565 },
566 value: "CanonicalUser".into()
567 }]
568 )
569 }
570
571 #[test]
572 fn unescape_data() {
573 let xml = r#"<Response key=""hey">">></Response>"#;
574 let mut doc = Document::new(xml);
575 let mut root = doc.root_element().unwrap();
576 assert_eq!(try_data(&mut root).unwrap(), ">");
577 assert_eq!(root.start_el().attr("key"), Some("\"hey\">"));
578 }
579
580 #[test]
581 fn nested_self_closer() {
582 let xml = r#"<XmlListsInputOutput>
583 <stringList/>
584 <stringSet></stringSet>
585 </XmlListsInputOutput>"#;
586 let mut doc = Document::new(xml);
587 let mut root = doc.root_element().unwrap();
588 let mut string_list = root.next_tag().unwrap();
589 assert_eq!(string_list.start_el(), &closed("stringList", "", 1));
590 assert!(string_list.next_tag().is_none());
591 drop(string_list);
592 assert_eq!(
593 root.next_tag().unwrap().start_el(),
594 &StartEl::new("stringSet", "", 1)
595 );
596 }
597
598 #[test]
599 fn confusing_nested_same_name_tag() {
600 let root_tags = &["a", "b", "c", "d"];
603 let xml = r#"<XmlListsInputOutput>
604 <a/>
605 <b>
606 <c/>
607 <b></b>
608 <here/>
609 </b>
610 <c></c>
611 <d>more</d>
612 </XmlListsInputOutput>"#;
613 let mut doc = Document::new(xml);
614 let mut root = doc.root_element().unwrap();
615 let mut cmp = vec![];
616 while let Some(tag) = root.next_tag() {
617 cmp.push(tag.start_el().local().to_owned());
618 }
619 assert_eq!(root_tags, cmp.as_slice());
620 }
621}