aws_smithy_xml/
unescape.rsuse crate::decode::XmlDecodeError;
use std::borrow::Cow;
pub(crate) fn unescape(s: &str) -> Result<Cow<'_, str>, XmlDecodeError> {
if !s.contains('&') {
return Ok(Cow::Borrowed(s));
}
let mut res = String::with_capacity(s.len());
let mut sections = s.split('&');
if let Some(prefix) = sections.next() {
res.push_str(prefix);
}
for section in sections {
match section.find(';') {
Some(idx) => {
let entity = §ion[..idx];
match entity {
"lt" => res.push('<'),
"gt" => res.push('>'),
"amp" => res.push('&'),
"quot" => res.push('"'),
"apos" => res.push('\''),
entity => {
let (entity, radix) = if let Some(entity) = entity.strip_prefix("#x") {
(entity, 16)
} else if let Some(entity) = entity.strip_prefix('#') {
(entity, 10)
} else {
return Err(XmlDecodeError::invalid_escape(entity));
};
let char_code = u32::from_str_radix(entity, radix).map_err(|_| {
XmlDecodeError::invalid_escape(format!(
"expected numeric escape in base {}; got: {}",
radix, &entity
))
})?;
let chr = std::char::from_u32(char_code).ok_or_else(|| {
XmlDecodeError::invalid_escape(format!(
"invalid char code: {}",
char_code
))
})?;
res.push(chr);
}
}
res.push_str(§ion[idx + 1..])
}
None => return Err(XmlDecodeError::invalid_escape("unterminated pattern")),
}
}
Ok(Cow::Owned(res))
}
#[cfg(test)]
mod test {
use crate::unescape::unescape;
use std::borrow::Cow;
#[test]
fn basic_unescape() {
assert_eq!(
unescape("< > ' " &").unwrap(),
"< > ' \" &"
);
assert_eq!(
unescape("Since a > b, b is less than a").unwrap(),
"Since a > b, b is less than a"
);
}
#[test]
fn no_need_to_escape() {
assert_eq!(unescape("hello 🍕!").unwrap(), Cow::Borrowed("hello 🍕!"));
}
#[test]
fn complex_unescape() {
assert_eq!(
unescape("a<b>c"d'e&f;;").unwrap(),
"a<b>c\"d'e&f;;"
);
assert_eq!(unescape("&lt;").unwrap(), "<")
}
#[test]
fn newline_encoding() {
assert_eq!(unescape(" ").unwrap(), "\n");
assert_eq!(unescape("
").unwrap(), "\r");
}
#[test]
fn xml_eol_encoding() {
assert_eq!(unescape("
 
").unwrap(), "\n \n");
assert_eq!(
unescape("a
 b
 c
").unwrap(),
"a\r\n b\n c\r"
);
assert_eq!(
unescape("a
… b…").unwrap(),
"a\r\u{0085} b\u{0085}"
);
assert_eq!(
unescape("a

 b… c
").unwrap(),
"a\r\u{2028} b\u{0085} c\u{2028}"
);
}
#[test]
fn invalid_escapes() {
unescape("<e;").expect_err("lte does not make a ≤");
unescape("<").expect_err("unterminated escape sequence");
unescape("&#Q1234;").expect_err("Q does not began a numeric sequence");
unescape(".14;").expect_err("decimal escape");
unescape("&#xZZ").expect_err("Z is not hex");
unescape("here is a & but without an escape sequence...").expect_err("naked &");
}
use proptest::prelude::*;
proptest! {
#[test]
fn no_panics(s: String) {
let unescaped = unescape(&s);
if s.contains('&') {
assert!(
matches!(unescaped, Ok(Cow::Owned(_)) | Err(_))
);
}
}
}
}