datadriven/
lib.rs

1use std::collections::{HashMap, VecDeque};
2use std::env;
3use std::fmt::Write;
4use std::fs;
5use std::path::PathBuf;
6use std::result::Result;
7use std::str::FromStr;
8use thiserror::Error;
9
10#[cfg(feature = "async")]
11use futures::future::Future;
12
13#[derive(Error, Debug)]
14pub enum DataDrivenError {
15    #[error("parsing: {0}")]
16    Parse(String),
17    #[error("reading files: {0}")]
18    Io(std::io::Error),
19    #[error("{filename}:{line}: {inner}")]
20    WithContext {
21        line: usize,
22        filename: String,
23        inner: Box<DataDrivenError>,
24    },
25    #[error("argument: {0}")]
26    Argument(String),
27    #[error("didn't use all arguments: {0:?}")]
28    DidntUseAllArguments(Vec<String>),
29}
30
31impl DataDrivenError {
32    fn with_line(self, line: usize) -> Self {
33        match self {
34            DataDrivenError::WithContext {
35                filename, inner, ..
36            } => DataDrivenError::WithContext {
37                line,
38                filename,
39                inner,
40            },
41            e => DataDrivenError::WithContext {
42                line,
43                filename: Default::default(),
44                inner: Box::new(e),
45            },
46        }
47    }
48
49    fn with_filename(self, filename: String) -> Self {
50        match self {
51            DataDrivenError::WithContext { line, inner, .. } => DataDrivenError::WithContext {
52                line,
53                filename,
54                inner,
55            },
56            e => DataDrivenError::WithContext {
57                line: Default::default(),
58                filename,
59                inner: Box::new(e),
60            },
61        }
62    }
63}
64
65pub trait TestCaseResult {
66    type Err: std::fmt::Display + std::fmt::Debug;
67
68    fn result(self) -> Result<String, Self::Err>;
69}
70
71#[derive(Debug)]
72pub enum Never {}
73impl std::fmt::Display for Never {
74    fn fmt(&self, _: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
75        unreachable!()
76    }
77}
78
79impl TestCaseResult for String {
80    type Err = Never;
81    fn result(self) -> Result<String, Self::Err> {
82        Ok(self)
83    }
84}
85
86impl<S, E> TestCaseResult for Result<S, E>
87where
88    S: Into<String>,
89    E: std::fmt::Display + std::fmt::Debug,
90{
91    type Err = E;
92    fn result(self) -> Result<String, E> {
93        self.map(|s| s.into())
94    }
95}
96
97/// A single test case within a file.
98#[derive(Debug, Clone)]
99pub struct TestCase {
100    /// The header for a test that denotes what kind of test is being run.
101    pub directive: String,
102    /// Any arguments that have been declared after the directive.
103    pub args: HashMap<String, Vec<String>>,
104    /// The input to the test.
105    pub input: String,
106
107    directive_line: String,
108    expected: String,
109    line_number: usize,
110}
111
112impl TestCase {
113    /// Extract the given flag from the test case, removing it. Fails if there
114    /// are any arguments for the value. Returns true if the flag was present.
115    pub fn take_flag(&mut self, arg: &str) -> Result<bool, DataDrivenError> {
116        let contents = self.args.remove(arg);
117        Ok(if let Some(args) = contents {
118            if !args.is_empty() {
119                Err(DataDrivenError::Argument(format!(
120                    "must be no arguments to take_flag, {} had {}",
121                    arg,
122                    args.len(),
123                )))?;
124            }
125            true
126        } else {
127            false
128        })
129    }
130
131    /// Extract the given arg from the test case, removing it. Fails if there
132    /// isn't exactly one argument for the value.
133    pub fn take_arg<T>(&mut self, arg: &str) -> Result<T, DataDrivenError>
134    where
135        T: FromStr,
136        <T as std::str::FromStr>::Err: std::error::Error + Send + Sync + 'static,
137    {
138        let result = self.try_take_arg(arg)?;
139        if let Some(result) = result {
140            Ok(result)
141        } else {
142            Err(DataDrivenError::Argument(format!(
143                "no argument named {}",
144                arg
145            )))
146        }
147    }
148
149    /// Extract the given arg from the test case, removing it if it exists.
150    pub fn try_take_arg<T>(&mut self, arg: &str) -> Result<Option<T>, DataDrivenError>
151    where
152        T: FromStr,
153        <T as std::str::FromStr>::Err: std::error::Error + Send + Sync + 'static,
154    {
155        let contents = self.args.remove(arg);
156        Ok(if let Some(args) = contents {
157            match args.len() {
158                0 => None,
159                1 => Some(
160                    args[0]
161                        .parse()
162                        .map_err(|e| DataDrivenError::Argument(format!("couldn't parse: {}", e)))?,
163                ),
164                _ => Err(DataDrivenError::Argument(format!(
165                    "must be exactly one argument to take_arg, {} had {}",
166                    arg,
167                    args.len(),
168                )))?,
169            }
170        } else {
171            None
172        })
173    }
174
175    /// Extract the given args from the test case, removing it. Returns an error
176    /// if the argument was not present at all.
177    pub fn take_args<T>(&mut self, arg: &str) -> Result<Vec<T>, DataDrivenError>
178    where
179        T: FromStr,
180        <T as std::str::FromStr>::Err: std::error::Error + Send + Sync + 'static,
181    {
182        let result = self
183            .try_take_args(arg)
184            .map_err(|e| DataDrivenError::Argument(format!("couldn't parse: {}", e)))?;
185        if let Some(result) = result {
186            Ok(result)
187        } else {
188            Err(DataDrivenError::Argument(format!(
189                "no argument named {}",
190                arg
191            )))
192        }
193    }
194
195    /// Extract the given args from the test case, removing it.
196    pub fn try_take_args<T>(&mut self, arg: &str) -> Result<Option<Vec<T>>, DataDrivenError>
197    where
198        T: FromStr,
199        <T as std::str::FromStr>::Err: std::error::Error + Send + 'static,
200    {
201        let contents = self.args.remove(arg);
202        Ok(if let Some(args) = contents {
203            Some(
204                args.into_iter()
205                    .map(|a| {
206                        a.parse()
207                            .map_err(|e| DataDrivenError::Parse(format!("couldn't parse: {}", e)))
208                    })
209                    .collect::<Result<Vec<T>, DataDrivenError>>()?,
210            )
211        } else {
212            None
213        })
214    }
215
216    // Returns an error if there are any arguments that haven't been used.
217    pub fn expect_empty(&self) -> Result<(), DataDrivenError> {
218        if self.args.is_empty() {
219            Ok(())
220        } else {
221            Err(DataDrivenError::DidntUseAllArguments(
222                self.args.keys().cloned().collect::<Vec<_>>(),
223            ))
224        }
225    }
226}
227
228/// Walk a directory for test files and run each one as a test.
229pub fn walk<F>(dir: &str, f: F)
230where
231    F: FnMut(&mut TestFile),
232{
233    walk_exclusive(dir, f, |_| false);
234}
235
236/// The same as `walk` but accepts an additional matcher to exclude matching files from being
237/// tested.
238pub fn walk_exclusive<F, M>(dir: &str, mut f: F, exclusion_matcher: M)
239where
240    F: FnMut(&mut TestFile),
241    M: Fn(&TestFile) -> bool,
242{
243    let mut file_prefix = PathBuf::from(dir);
244    if let Ok(p) = env::var("RUN") {
245        file_prefix = file_prefix.join(p);
246    }
247
248    // Accumulate failures until the end since Rust doesn't let us "fail but keep going" in a test.
249    let mut failures = Vec::new();
250
251    let mut run = |file| {
252        let mut tf = TestFile::new(&file).unwrap();
253        if exclusion_matcher(&tf) {
254            return;
255        }
256        f(&mut tf);
257        if let Some(fail) = tf.failure {
258            failures.push(fail);
259        }
260    };
261
262    if file_prefix.is_dir() {
263        for file in test_files(PathBuf::from(dir)).unwrap() {
264            run(file);
265        }
266    } else if file_prefix.exists() {
267        run(file_prefix);
268    }
269
270    if !failures.is_empty() {
271        let mut msg = String::new();
272        for f in failures {
273            msg.push_str(&f);
274            msg.push('\n');
275        }
276        panic!("{}", msg);
277    }
278}
279
280// Ignore files named .XXX, XXX~ or #XXX#.
281fn should_ignore_file(name: &str) -> bool {
282    name.starts_with('.') || name.ends_with('~') || name.starts_with('#') && name.ends_with('#')
283}
284
285// Extracts all the non-directory children of dir. Not defensive against cycles!
286fn test_files(dir: PathBuf) -> Result<Vec<PathBuf>, DataDrivenError> {
287    let mut q = VecDeque::new();
288    q.push_back(dir);
289    let mut res = vec![];
290    while let Some(hd) = q.pop_front() {
291        for entry in fs::read_dir(hd).map_err(DataDrivenError::Io)? {
292            let path = entry.map_err(DataDrivenError::Io)?.path();
293            if path.is_dir() {
294                q.push_back(path);
295            } else if !should_ignore_file(path.file_name().unwrap().to_str().unwrap()) {
296                res.push(path);
297            }
298        }
299    }
300    Ok(res)
301}
302
303/// Parses a directive line of the form
304/// <directive> {arg={<value>|(<value>[,<value>]*)}}*
305/// Examples:
306///   hello                 => directive: "hello", no arguments
307///   hello world           => directive: "hello", world=[]
308///   hello world=foo       => directive: "hello", world=[foo]
309///   hello world=(foo,bar) => directive: "hello", world=[foo,bar]
310struct DirectiveParser {
311    chars: Vec<char>,
312    idx: usize,
313}
314
315impl DirectiveParser {
316    fn new(s: &str) -> Self {
317        DirectiveParser {
318            chars: s.chars().collect(),
319            idx: 0,
320        }
321    }
322
323    // Consume characters until we reach the end of the directive or hit a non-whitespace
324    // character.
325    fn munch(&mut self) {
326        while self.idx < self.chars.len() && self.chars[self.idx].is_ascii_whitespace() {
327            self.idx += 1;
328        }
329    }
330
331    fn peek(&mut self) -> Option<char> {
332        if self.idx >= self.chars.len() {
333            None
334        } else {
335            Some(self.chars[self.idx])
336        }
337    }
338
339    // If the next char is `ch`, consume it and return true. Otherwise, return false.
340    fn eat(&mut self, ch: char) -> bool {
341        if self.idx < self.chars.len() && self.chars[self.idx] == ch {
342            self.idx += 1;
343            true
344        } else {
345            false
346        }
347    }
348
349    fn is_wordchar(ch: char) -> bool {
350        ch.is_alphanumeric() || ch == '-' || ch == '_' || ch == '.'
351    }
352
353    fn parse_word(&mut self, context: &str) -> Result<String, DataDrivenError> {
354        let start = self.idx;
355        while self.peek().map_or(false, Self::is_wordchar) {
356            self.idx += 1;
357        }
358        if self.idx == start {
359            match self.peek() {
360                Some(ch) => Err(DataDrivenError::Parse(format!(
361                    "expected {}, got {}",
362                    context, ch
363                ))),
364                None => Err(DataDrivenError::Parse(format!(
365                    "expected {} but directive line ended",
366                    context
367                ))),
368            }?
369        }
370        let result = self.chars[start..self.idx].iter().collect();
371        self.munch();
372        Ok(result)
373    }
374
375    fn at_end(&self) -> bool {
376        self.idx >= self.chars.len()
377    }
378
379    fn parse_arg(&mut self) -> Result<(String, Vec<String>), DataDrivenError> {
380        let name = self.parse_word("argument name")?;
381        let vals = self.parse_vals()?;
382        Ok((name, vals))
383    }
384
385    // Parses an argument value, including the leading `=`.
386    fn parse_vals(&mut self) -> Result<Vec<String>, DataDrivenError> {
387        if !self.eat('=') {
388            return Ok(Vec::new());
389        }
390        self.munch();
391        if !self.eat('(') {
392            // If there's no leading paren, we parse a single argument as a singleton list.
393            return Ok(vec![self.parse_word("argument value")?]);
394        }
395        self.munch();
396        let mut vals = Vec::new();
397        while self.peek() != Some(')') {
398            vals.push(self.parse_word("argument value")?);
399            if !self.eat(',') {
400                break;
401            }
402            self.munch();
403        }
404        match self.peek() {
405            Some(')') => Ok(()),
406            Some(ch) => Err(DataDrivenError::Parse(format!(
407                "expected ',' or ')', got '{}'",
408                ch,
409            ))),
410            None => Err(DataDrivenError::Parse(
411                "expected ',' or '', but directive line ended".into(),
412            )),
413        }?;
414        self.idx += 1;
415        self.munch();
416        Ok(vals)
417    }
418
419    fn parse_directive(
420        &mut self,
421    ) -> Result<(String, HashMap<String, Vec<String>>), DataDrivenError> {
422        self.munch();
423        let directive = self.parse_word("directive")?;
424        let mut args = HashMap::new();
425        while !self.at_end() {
426            let (arg_name, arg_vals) = self.parse_arg()?;
427            if args.contains_key(&arg_name) {
428                Err(DataDrivenError::Parse(format!(
429                    "duplicate argument: {}",
430                    arg_name
431                )))?;
432            }
433            args.insert(arg_name, arg_vals);
434        }
435        Ok((directive, args))
436    }
437}
438
439// A stanza is some logical chunk of a test file. We need to remember the comments and not just
440// skip over them since we need to reproduce them when we rewrite.
441#[derive(Debug, Clone)]
442enum Stanza {
443    Test(TestCase),
444    Comment(String),
445}
446
447#[derive(Debug, Clone)]
448pub struct TestFile {
449    stanzas: Vec<Stanza>,
450
451    /// The name of the file
452    pub filename: String,
453
454    // failure gets set if a test failed during execution. We can't just return an error when that
455    // happens, since the user is calling `run` from a closure, so we have to buffer up a failure
456    // to be processed later (by `walk`).
457    failure: Option<String>,
458}
459
460fn write_result<W>(w: &mut W, s: String)
461where
462    W: Write,
463{
464    if !s.ends_with('\n') {
465        w.write_str("----\n----\n").unwrap();
466        w.write_str(&s).unwrap();
467        w.write_str("\n----\n---- (no newline)\n").unwrap();
468    } else if s.contains("\n\n") {
469        w.write_str("----\n----\n").unwrap();
470        w.write_str(&s).unwrap();
471        w.write_str("----\n----\n").unwrap();
472    } else {
473        w.write_str("----\n").unwrap();
474        w.write_str(&s).unwrap();
475    }
476}
477
478impl TestFile {
479    fn new(filename: &PathBuf) -> Result<Self, DataDrivenError> {
480        let contents = fs::read_to_string(filename).map_err(DataDrivenError::Io)?;
481        let stanzas =
482            Self::parse(&contents).map_err(|e| e.with_filename(filename.display().to_string()))?;
483        Ok(TestFile {
484            stanzas,
485            filename: filename.to_string_lossy().to_string(),
486            failure: None,
487        })
488    }
489
490    /// Run each test in this file in sequence by calling `f` on it. If any test fails, execution
491    /// halts. If the REWRITE environment variable is set, it will rewrite each file as it
492    /// processes it.
493    pub fn run<F, R>(&mut self, f: F)
494    where
495        F: FnMut(&mut TestCase) -> R,
496        R: TestCaseResult,
497    {
498        match env::var("REWRITE") {
499            Ok(_) => self.run_rewrite(f),
500            Err(_) => self.run_normal(f),
501        }
502    }
503
504    fn run_normal<F, R>(&mut self, mut f: F)
505    where
506        F: FnMut(&mut TestCase) -> R,
507        R: TestCaseResult,
508    {
509        for stanza in &mut self.stanzas {
510            if let Stanza::Test(case) = stanza {
511                let result = f(case);
512                match result.result() {
513                    Ok(result) => {
514                        if result != case.expected {
515                            self.failure = Some(format!(
516                                "failure:\n{}:{}:\n{}\nexpected:\n{}\nactual:\n{}",
517                                self.filename, case.line_number, case.input, case.expected, result
518                            ));
519                            // Yeah, ok, we're done here.
520                            break;
521                        }
522                    }
523                    Err(err) => {
524                        self.failure = Some(format!(
525                            "failure:\n{}:{}:\n{}\n{}",
526                            self.filename, case.line_number, case.input, err
527                        ));
528                    }
529                }
530            }
531        }
532    }
533
534    fn run_rewrite<F, R>(&mut self, mut f: F)
535    where
536        F: FnMut(&mut TestCase) -> R,
537        R: TestCaseResult,
538    {
539        let mut s = String::new();
540        for stanza in &mut self.stanzas {
541            match stanza {
542                Stanza::Test(case) => {
543                    s.push_str(&case.directive_line);
544                    s.push('\n');
545                    s.push_str(&case.input);
546                    write_result(&mut s, f(case).result().unwrap());
547                }
548                Stanza::Comment(c) => {
549                    s.push_str(c.as_str());
550                    s.push('\n');
551                }
552            }
553        }
554        // TODO(justin): surface these errors somehow?
555        fs::write(&self.filename, s).unwrap();
556    }
557
558    fn parse(f: &str) -> Result<Vec<Stanza>, DataDrivenError> {
559        let mut stanzas = vec![];
560        let lines: Vec<&str> = f.lines().collect();
561        let mut i = 0;
562        while i < lines.len() {
563            // TODO(justin): hacky implementation of comments
564            let line = lines[i]
565                .chars()
566                .take_while(|c| *c != '#')
567                .collect::<String>();
568
569            if line.trim() == "" {
570                stanzas.push(Stanza::Comment(lines[i].to_string()));
571                i += 1;
572                continue;
573            }
574
575            // Lines in text files are traditionally one-indexed.
576            let line_number = i + 1;
577
578            let mut parser = DirectiveParser::new(&line);
579            let directive_line = lines[i].to_string();
580            let (directive, args) = parser
581                .parse_directive()
582                .map_err(|e| e.with_line(line_number))?;
583
584            i += 1;
585            let mut input = String::new();
586            // Slurp up everything as the input until we hit a ----
587            while i < lines.len() && lines[i] != "----" {
588                input.push_str(lines[i]);
589                input.push('\n');
590                i += 1;
591            }
592            i += 1;
593            // If there is a second ----, we are in blank-line mode.
594            let blank_mode = i < lines.len() && lines[i] == "----";
595            if blank_mode {
596                i += 1;
597            }
598
599            // Then slurp up the expected.
600            let mut expected = String::new();
601            while i < lines.len() {
602                if blank_mode {
603                    if i + 1 >= lines.len() {
604                        Err(DataDrivenError::Parse(format!(
605                            "unclosed double-separator block for test case starting at line {}",
606                            line_number,
607                        )))?;
608                    }
609                    if i + 1 < lines.len() && lines[i] == "----" {
610                        if lines[i + 1] == "----" {
611                            i += 2;
612                            break;
613                        } else if lines[i + 1] == "---- (no newline)" {
614                            i += 2;
615                            if expected.ends_with('\n') {
616                                expected.pop().expect("should be nonempty.");
617                            }
618                            break;
619                        }
620                    }
621                } else if lines[i].trim() == "" {
622                    break;
623                }
624                expected.push_str(lines[i]);
625                expected.push('\n');
626                i += 1;
627            }
628
629            stanzas.push(Stanza::Test(TestCase {
630                directive_line,
631                directive: directive.to_string(),
632                input,
633                args,
634                expected,
635                line_number,
636            }));
637            i += 1;
638            if i < lines.len() {
639                stanzas.push(Stanza::Comment("".to_string()));
640            }
641        }
642
643        Ok(stanzas)
644    }
645}
646
647fn file_list(dir: &str) -> Vec<PathBuf> {
648    let mut file_prefix = PathBuf::from(dir);
649    if let Ok(p) = env::var("RUN") {
650        file_prefix = file_prefix.join(p);
651    }
652
653    if file_prefix.is_dir() {
654        test_files(PathBuf::from(dir)).unwrap()
655    } else if file_prefix.exists() {
656        vec![file_prefix]
657    } else {
658        vec![]
659    }
660}
661
662/// The async equivalent of `walk`. Must return the passed `TestFile`.
663#[cfg(feature = "async")]
664pub async fn walk_async<F, T>(dir: &str, f: F)
665where
666    F: FnMut(TestFile) -> T,
667    T: Future<Output = TestFile>,
668{
669    walk_async_exclusive(dir, f, |_| false).await;
670}
671
672/// The same as `walk_async` but accepts an additional matcher to exclude matching files from being
673/// tested.
674#[cfg(feature = "async")]
675pub async fn walk_async_exclusive<F, T, M>(dir: &str, mut f: F, exclusion_matcher: M)
676where
677    F: FnMut(TestFile) -> T,
678    T: Future<Output = TestFile>,
679    M: Fn(&TestFile) -> bool,
680{
681    // Accumulate failures until the end since Rust doesn't let us "fail but keep going" in a test.
682    let mut failures = Vec::new();
683    for file in file_list(dir) {
684        let tf = TestFile::new(&file).unwrap();
685        if exclusion_matcher(&tf) {
686            continue;
687        }
688        let tf = f(tf).await;
689        if let Some(fail) = tf.failure {
690            failures.push(fail);
691        }
692    }
693
694    if !failures.is_empty() {
695        let mut msg = String::new();
696        for f in failures {
697            msg.push_str(&f);
698            msg.push('\n');
699        }
700        panic!("{}", msg);
701    }
702}
703
704#[cfg(feature = "async")]
705impl TestFile {
706    /// The async equivalent of `run`.
707    pub async fn run_async<F, T>(&mut self, f: F)
708    where
709        F: FnMut(TestCase) -> T,
710        T: Future<Output = String>,
711    {
712        match env::var("REWRITE") {
713            Ok(_) => self.run_rewrite_async(f).await,
714            Err(_) => self.run_normal_async(f).await,
715        }
716    }
717
718    async fn run_normal_async<F, T>(&mut self, mut f: F)
719    where
720        F: FnMut(TestCase) -> T,
721        T: Future<Output = String>,
722    {
723        for stanza in self.stanzas.drain(..) {
724            if let Stanza::Test(case) = stanza {
725                let original_case = case.clone();
726                let result = f(case).await;
727                if result != original_case.expected {
728                    self.failure = Some(format!(
729                        "failure:\n{}:{}:\n{}\nexpected:\n{}\nactual:\n{}",
730                        self.filename,
731                        original_case.line_number,
732                        original_case.input,
733                        original_case.expected,
734                        result
735                    ));
736                    // Yeah, ok, we're done here.
737                    break;
738                }
739            }
740        }
741    }
742
743    async fn run_rewrite_async<F, T>(&mut self, mut f: F)
744    where
745        F: FnMut(TestCase) -> T,
746        T: Future<Output = String>,
747    {
748        let mut s = String::new();
749        for stanza in self.stanzas.drain(..) {
750            match stanza {
751                Stanza::Test(case) => {
752                    s.push_str(&case.directive_line);
753                    s.push('\n');
754                    s.push_str(&case.input);
755                    write_result(&mut s, f(case).await);
756                }
757                Stanza::Comment(c) => {
758                    s.push_str(&c);
759                    s.push('\n');
760                }
761            }
762        }
763        // TODO(justin): surface these errors somehow?
764        fs::write(&self.filename, s).unwrap();
765    }
766}
767
768#[cfg(test)]
769mod tests {
770    use super::*;
771
772    // That's dogfooding baby!
773    #[test]
774    fn parse_directive() {
775        walk("tests/parsing", |f| {
776            f.run(|s| -> String {
777                match DirectiveParser::new(s.input.trim()).parse_directive() {
778                    Ok((directive, mut args)) => {
779                        let mut sorted_args = args.drain().collect::<Vec<(String, Vec<String>)>>();
780                        sorted_args.sort_by(|a, b| a.0.cmp(&b.0));
781                        format!("directive: {}\nargs: {:?}\n", directive, sorted_args)
782                    }
783                    Err(err) => format!("error: {}\n", err),
784                }
785            });
786        });
787    }
788}