fancy_regex/expand.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
use crate::parse::{parse_decimal, parse_id};
use crate::{Captures, CompileError, Error, ParseError, Regex};
use std::borrow::Cow;
use std::io;
use std::mem;
/// A set of options for expanding a template string using the contents
/// of capture groups.
#[derive(Debug)]
pub struct Expander {
sub_char: char,
open: &'static str,
close: &'static str,
allow_undelimited_name: bool,
}
impl Default for Expander {
/// Returns the default expander used by [`Captures::expand`].
///
/// [`Captures::expand`]: struct.Captures.html#expand
fn default() -> Self {
Expander {
sub_char: '$',
open: "{",
close: "}",
allow_undelimited_name: true,
}
}
}
impl Expander {
/// Returns an expander that uses Python-compatible syntax.
///
/// Expands all instances of `\num` or `\g<name>` in `replacement`
/// to the corresponding capture group `num` or `name`, and writes
/// them to the `dst` buffer given.
///
/// `name` may be an integer corresponding to the index of the
/// capture group (counted by order of opening parenthesis where `\0` is the
/// entire match) or it can be a name (consisting of letters, digits or
/// underscores) corresponding to a named capture group.
///
/// `num` must be an integer corresponding to the index of the
/// capture group.
///
/// If `num` or `name` isn't a valid capture group (whether the name doesn't exist
/// or isn't a valid index), then it is replaced with the empty string.
///
/// The longest possible number is used. e.g., `\10` looks up capture
/// group 10 and not capture group 1 followed by a literal 0.
///
/// To write a literal `\`, use `\\`.
pub fn python() -> Expander {
Expander {
sub_char: '\\',
open: "g<",
close: ">",
allow_undelimited_name: false,
}
}
/// Checks `template` for errors. The following conditions are checked for:
///
/// - A reference to a numbered group that does not exist in `regex`
/// - A reference to a numbered group (other than 0) when `regex` contains named groups
/// - A reference to a named group that does not occur in `regex`
/// - An opening group name delimiter without a closing delimiter
/// - Using an empty string as a group name
pub fn check(&self, template: &str, regex: &Regex) -> crate::Result<()> {
let on_group_num = |num| {
if num == 0 {
Ok(())
} else if !regex.named_groups.is_empty() {
Err(Error::CompileError(CompileError::NamedBackrefOnly))
} else if num < regex.captures_len() {
Ok(())
} else {
Err(Error::CompileError(CompileError::InvalidBackref))
}
};
self.exec(template, |step| match step {
Step::Char(_) => Ok(()),
Step::GroupName(name) => {
if regex.named_groups.contains_key(name) {
Ok(())
} else if let Ok(num) = name.parse() {
on_group_num(num)
} else {
Err(Error::CompileError(CompileError::InvalidBackref))
}
}
Step::GroupNum(num) => on_group_num(num),
Step::Error => Err(Error::ParseError(
0,
ParseError::GeneralParseError(
"parse error in template while expanding".to_string(),
),
)),
})
}
/// Escapes the substitution character in `text` so it appears literally
/// in the output of `expansion`.
///
/// ```
/// assert_eq!(
/// fancy_regex::Expander::default().escape("Has a literal $ sign."),
/// "Has a literal $$ sign.",
/// );
/// ```
pub fn escape<'a>(&self, text: &'a str) -> Cow<'a, str> {
if text.contains(self.sub_char) {
let mut quoted = String::with_capacity(self.sub_char.len_utf8() * 2);
quoted.push(self.sub_char);
quoted.push(self.sub_char);
Cow::Owned(text.replace(self.sub_char, "ed))
} else {
Cow::Borrowed(text)
}
}
#[doc(hidden)]
#[deprecated(since = "0.4.0", note = "Use `escape` instead.")]
pub fn quote<'a>(&self, text: &'a str) -> Cow<'a, str> {
self.escape(text)
}
/// Expands the template string `template` using the syntax defined
/// by this expander and the values of capture groups from `captures`.
pub fn expansion(&self, template: &str, captures: &Captures<'_>) -> String {
let mut cursor = io::Cursor::new(Vec::with_capacity(template.len()));
self.write_expansion(&mut cursor, template, captures)
.expect("expansion succeeded");
String::from_utf8(cursor.into_inner()).expect("expansion is UTF-8")
}
/// Appends the expansion produced by `expansion` to `dst`. Potentially more efficient
/// than calling `expansion` directly and appending to an existing string.
pub fn append_expansion(&self, dst: &mut String, template: &str, captures: &Captures<'_>) {
let pos = dst.len();
let mut cursor = io::Cursor::new(mem::replace(dst, String::new()).into_bytes());
cursor.set_position(pos as u64);
self.write_expansion(&mut cursor, template, captures)
.expect("expansion succeeded");
*dst = String::from_utf8(cursor.into_inner()).expect("expansion is UTF-8");
}
/// Writes the expansion produced by `expansion` to `dst`. Potentially more efficient
/// than calling `expansion` directly and writing the result.
pub fn write_expansion(
&self,
mut dst: impl io::Write,
template: &str,
captures: &Captures<'_>,
) -> io::Result<()> {
self.exec(template, |step| match step {
Step::Char(c) => write!(dst, "{}", c),
Step::GroupName(name) => {
if let Some(m) = captures.name(name) {
write!(dst, "{}", m.as_str())
} else if let Some(m) = name.parse().ok().and_then(|num| captures.get(num)) {
write!(dst, "{}", m.as_str())
} else {
Ok(())
}
}
Step::GroupNum(num) => {
if let Some(m) = captures.get(num) {
write!(dst, "{}", m.as_str())
} else {
Ok(())
}
}
Step::Error => Ok(()),
})
}
fn exec<'t, E>(
&self,
template: &'t str,
mut f: impl FnMut(Step<'t>) -> Result<(), E>,
) -> Result<(), E> {
debug_assert!(!self.open.is_empty());
debug_assert!(!self.close.is_empty());
let mut iter = template.chars();
while let Some(c) = iter.next() {
if c == self.sub_char {
let tail = iter.as_str();
let skip = if tail.starts_with(self.sub_char) {
f(Step::Char(self.sub_char))?;
1
} else if let Some((id, skip)) =
parse_id(tail, self.open, self.close).or_else(|| {
if self.allow_undelimited_name {
parse_id(tail, "", "")
} else {
None
}
})
{
f(Step::GroupName(id))?;
skip
} else if let Some((skip, num)) = parse_decimal(tail, 0) {
f(Step::GroupNum(num))?;
skip
} else {
f(Step::Error)?;
f(Step::Char(self.sub_char))?;
0
};
iter = iter.as_str()[skip..].chars();
} else {
f(Step::Char(c))?;
}
}
Ok(())
}
}
enum Step<'a> {
Char(char),
GroupName(&'a str),
GroupNum(usize),
Error,
}