use crate::std_facade::{Box, Cow, String, ToOwned, Vec};
use core::fmt;
use core::mem;
use core::ops::RangeInclusive;
use core::u32;
use regex_syntax::hir::{
self, Hir,
HirKind::*,
Literal::*,
RepetitionKind::{self, *},
RepetitionRange::*,
};
use regex_syntax::{Error as ParseError, Parser};
use crate::bool;
use crate::char;
use crate::collection::{size_range, vec, SizeRange};
use crate::strategy::*;
use crate::test_runner::*;
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct StringParam(&'static str);
impl From<StringParam> for &'static str {
fn from(x: StringParam) -> Self {
x.0
}
}
impl From<&'static str> for StringParam {
fn from(x: &'static str) -> Self {
StringParam(x)
}
}
impl Default for StringParam {
fn default() -> Self {
StringParam("\\PC*")
}
}
#[allow(bare_trait_objects)]
mod error_container {
use super::*;
quick_error! {
#[derive(Debug)]
pub enum Error {
RegexSyntax(err: ParseError) {
from()
source(err)
display("{}", err)
}
UnsupportedRegex(message: &'static str) {
display("{}", message)
}
}
}
}
pub use self::error_container::Error;
opaque_strategy_wrapper! {
#[derive(Debug)]
pub struct RegexGeneratorStrategy[<T>][where T : fmt::Debug]
(SBoxedStrategy<T>) -> RegexGeneratorValueTree<T>;
pub struct RegexGeneratorValueTree[<T>][where T : fmt::Debug]
(Box<dyn ValueTree<Value = T>>) -> T;
}
impl Strategy for str {
type Tree = RegexGeneratorValueTree<String>;
type Value = String;
fn new_tree(&self, runner: &mut TestRunner) -> NewTree<Self> {
string_regex(self).unwrap().new_tree(runner)
}
}
type ParseResult<T> = Result<RegexGeneratorStrategy<T>, Error>;
#[doc(hidden)]
pub trait StrategyFromRegex: Sized + fmt::Debug {
type Strategy: Strategy<Value = Self>;
fn from_regex(regex: &str) -> Self::Strategy;
}
impl StrategyFromRegex for String {
type Strategy = RegexGeneratorStrategy<Self>;
fn from_regex(regex: &str) -> Self::Strategy {
string_regex(regex).unwrap()
}
}
impl StrategyFromRegex for Vec<u8> {
type Strategy = RegexGeneratorStrategy<Self>;
fn from_regex(regex: &str) -> Self::Strategy {
bytes_regex(regex).unwrap()
}
}
pub fn string_regex(regex: &str) -> ParseResult<String> {
string_regex_parsed(®ex_to_hir(regex)?)
}
pub fn string_regex_parsed(expr: &Hir) -> ParseResult<String> {
bytes_regex_parsed(expr)
.map(|v| {
v.prop_map(|bytes| {
String::from_utf8(bytes).expect("non-utf8 string")
})
.sboxed()
})
.map(RegexGeneratorStrategy)
}
pub fn bytes_regex(regex: &str) -> ParseResult<Vec<u8>> {
bytes_regex_parsed(®ex_to_hir(regex)?)
}
pub fn bytes_regex_parsed(expr: &Hir) -> ParseResult<Vec<u8>> {
match expr.kind() {
Empty => Ok(Just(vec![]).sboxed()),
Literal(lit) => Ok(Just(match lit {
Unicode(scalar) => to_bytes(*scalar),
Byte(byte) => vec![*byte],
})
.sboxed()),
Class(class) => Ok(match class {
hir::Class::Unicode(class) => {
unicode_class_strategy(class).prop_map(to_bytes).sboxed()
}
hir::Class::Bytes(class) => {
let subs = class.iter().map(|r| r.start()..=r.end());
Union::new(subs).prop_map(|b| vec![b]).sboxed()
}
}),
Repetition(rep) => Ok(vec(
bytes_regex_parsed(&rep.hir)?,
to_range(rep.kind.clone())?,
)
.prop_map(|parts| {
parts.into_iter().fold(vec![], |mut acc, child| {
acc.extend(child);
acc
})
})
.sboxed()),
Group(group) => bytes_regex_parsed(&group.hir).map(|v| v.0),
Concat(subs) => {
let subs = ConcatIter {
iter: subs.iter(),
buf: vec![],
next: None,
};
let ext = |(mut lhs, rhs): (Vec<_>, _)| {
lhs.extend(rhs);
lhs
};
Ok(subs
.fold(Ok(None), |accum: Result<_, Error>, rhs| {
Ok(match accum? {
None => Some(rhs?.sboxed()),
Some(accum) => {
Some((accum, rhs?).prop_map(ext).sboxed())
}
})
})?
.unwrap_or_else(|| Just(vec![]).sboxed()))
}
Alternation(subs) => {
Ok(Union::try_new(subs.iter().map(bytes_regex_parsed))?.sboxed())
}
Anchor(_) => {
unsupported("line/text anchors not supported for string generation")
}
WordBoundary(_) => unsupported(
"word boundary tests not supported for string generation",
),
}
.map(RegexGeneratorStrategy)
}
fn unicode_class_strategy(
class: &hir::ClassUnicode,
) -> char::CharStrategy<'static> {
static NONL_RANGES: &[RangeInclusive<char>] = &[
'\x00'..='\x09',
'\x0B'..=::core::char::MAX,
'\x0B'..=::core::char::MAX,
'\x0B'..=::core::char::MAX,
'\x0B'..=::core::char::MAX,
'\x0B'..=::core::char::MAX,
];
let dotnnl = |x: &hir::ClassUnicodeRange, y: &hir::ClassUnicodeRange| {
x.start() == '\0'
&& x.end() == '\x09'
&& y.start() == '\x0B'
&& y.end() == '\u{10FFFF}'
};
char::ranges(match class.ranges() {
[x, y] if dotnnl(x, y) || dotnnl(y, x) => Cow::Borrowed(NONL_RANGES),
_ => Cow::Owned(class.iter().map(|r| r.start()..=r.end()).collect()),
})
}
struct ConcatIter<'a, I> {
buf: Vec<u8>,
iter: I,
next: Option<&'a Hir>,
}
fn flush_lit_buf<I>(
it: &mut ConcatIter<'_, I>,
) -> Option<ParseResult<Vec<u8>>> {
Some(Ok(RegexGeneratorStrategy(
Just(mem::replace(&mut it.buf, vec![])).sboxed(),
)))
}
impl<'a, I: Iterator<Item = &'a Hir>> Iterator for ConcatIter<'a, I> {
type Item = ParseResult<Vec<u8>>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(next) = self.next.take() {
return Some(bytes_regex_parsed(next));
}
while let Some(next) = self.iter.next() {
match next.kind() {
Literal(Unicode(scalar)) => self.buf.extend(to_bytes(*scalar)),
Literal(Byte(byte)) => self.buf.push(*byte),
_ => {
return if !self.buf.is_empty() {
self.next = Some(next);
flush_lit_buf(self)
} else {
Some(bytes_regex_parsed(next))
};
}
}
}
if !self.buf.is_empty() {
flush_lit_buf(self)
} else {
self.next.take().map(bytes_regex_parsed)
}
}
}
fn to_range(kind: RepetitionKind) -> Result<SizeRange, Error> {
Ok(match kind {
ZeroOrOne => size_range(0..=1),
ZeroOrMore => size_range(0..=32),
OneOrMore => size_range(1..=32),
Range(range) => match range {
Exactly(count) if u32::MAX == count => {
return unsupported(
"Cannot have repetition of exactly u32::MAX",
)
}
Exactly(count) => size_range(count as usize),
AtLeast(min) => {
let max = if min < u32::MAX as u32 / 2 {
min as usize * 2
} else {
u32::MAX as usize
};
size_range((min as usize)..max)
}
Bounded(_, max) if u32::MAX == max => {
return unsupported("Cannot have repetition max of u32::MAX")
}
Bounded(min, max) => size_range((min as usize)..(max as usize + 1)),
},
})
}
fn to_bytes(khar: char) -> Vec<u8> {
let mut buf = [0u8; 4];
khar.encode_utf8(&mut buf).as_bytes().to_owned()
}
fn regex_to_hir(pattern: &str) -> Result<Hir, Error> {
Ok(Parser::new().parse(pattern)?)
}
fn unsupported<T>(error: &'static str) -> Result<T, Error> {
Err(Error::UnsupportedRegex(error))
}
#[cfg(test)]
mod test {
use std::collections::HashSet;
use regex::Regex;
use super::*;
fn do_test(
pattern: &str,
min_distinct: usize,
max_distinct: usize,
iterations: usize,
) {
let generated = generate_values_matching_regex(pattern, iterations);
assert!(
generated.len() >= min_distinct,
"Expected to generate at least {} strings, but only \
generated {}",
min_distinct,
generated.len()
);
assert!(
generated.len() <= max_distinct,
"Expected to generate at most {} strings, but \
generated {}",
max_distinct,
generated.len()
);
}
fn generate_values_matching_regex(
pattern: &str,
iterations: usize,
) -> HashSet<String> {
let rx = Regex::new(pattern).unwrap();
let mut generated = HashSet::new();
let strategy = string_regex(pattern).unwrap();
let mut runner = TestRunner::deterministic();
for _ in 0..iterations {
let mut value = strategy.new_tree(&mut runner).unwrap();
loop {
let s = value.current();
let ok = if let Some(matsch) = rx.find(&s) {
0 == matsch.start() && s.len() == matsch.end()
} else {
false
};
if !ok {
panic!(
"Generated string {:?} which does not match {:?}",
s, pattern
);
}
generated.insert(s);
if !value.simplify() {
break;
}
}
}
generated
}
#[test]
fn test_case_insensitive_produces_all_available_values() {
let mut expected: HashSet<String> = HashSet::new();
expected.insert("a".into());
expected.insert("b".into());
expected.insert("A".into());
expected.insert("B".into());
assert_eq!(generate_values_matching_regex("(?i:a|B)", 64), expected);
}
#[test]
fn test_literal() {
do_test("foo", 1, 1, 8);
}
#[test]
fn test_casei_literal() {
do_test("(?i:fOo)", 8, 8, 64);
}
#[test]
fn test_alternation() {
do_test("foo|bar|baz", 3, 3, 16);
}
#[test]
fn test_repitition() {
do_test("a{0,8}", 9, 9, 64);
}
#[test]
fn test_question() {
do_test("a?", 2, 2, 16);
}
#[test]
fn test_star() {
do_test("a*", 33, 33, 256);
}
#[test]
fn test_plus() {
do_test("a+", 32, 32, 256);
}
#[test]
fn test_n_to_range() {
do_test("a{4,}", 4, 4, 64);
}
#[test]
fn test_concatenation() {
do_test("(foo|bar)(xyzzy|plugh)", 4, 4, 32);
}
#[test]
fn test_ascii_class() {
do_test("[[:digit:]]", 10, 10, 256);
}
#[test]
fn test_unicode_class() {
do_test("\\p{Greek}", 24, 512, 256);
}
#[test]
fn test_dot() {
do_test(".", 200, 65536, 256);
}
#[test]
fn test_dot_s() {
do_test("(?s).", 200, 65536, 256);
}
#[test]
fn test_backslash_d_plus() {
do_test("\\d+", 1, 65536, 256);
}
fn assert_send_and_sync<T: Send + Sync>(_: T) {}
#[test]
fn regex_strategy_is_send_and_sync() {
assert_send_and_sync(string_regex(".").unwrap());
}
macro_rules! consistent {
($name:ident, $value:expr) => {
#[test]
fn $name() {
test_generates_matching_strings($value);
}
};
}
fn test_generates_matching_strings(pattern: &str) {
use std::time;
let mut runner = TestRunner::default();
let start = time::Instant::now();
if let Ok(strategy) = string_regex(pattern) {
let rx = Regex::new(pattern).unwrap();
for _ in 0..1000 {
let mut val = strategy.new_tree(&mut runner).unwrap();
for _ in 0..1000 {
let s = val.current();
assert!(
rx.is_match(&s),
"Produced string {:?}, which does not match {:?}",
s,
pattern
);
if !val.simplify() {
break;
}
}
if start.elapsed().as_secs() > 10 {
break;
}
}
}
}
include!("regex-contrib/crates_regex.rs");
}