difflib/
differ.rs

1use sequencematcher::SequenceMatcher;
2use std::cmp;
3use utils::{count_leading, str_with_similar_chars};
4
5#[derive(Default)]
6pub struct Differ {
7    pub line_junk: Option<fn(&&str) -> bool>,
8    pub char_junk: Option<fn(&char) -> bool>,
9}
10
11impl Differ {
12    pub fn new() -> Differ {
13        Differ {
14            line_junk: None,
15            char_junk: None,
16        }
17    }
18
19    pub fn compare(&self, first_sequence: &[&str], second_sequence: &[&str]) -> Vec<String> {
20        let mut matcher = SequenceMatcher::new(first_sequence, second_sequence);
21        matcher.set_is_junk(self.line_junk);
22        let mut res = Vec::new();
23        for opcode in matcher.get_opcodes() {
24            let mut gen = Vec::new();
25            match opcode.tag.as_ref() {
26                "replace" => {
27                    gen = self.fancy_replace(
28                        first_sequence,
29                        opcode.first_start,
30                        opcode.first_end,
31                        second_sequence,
32                        opcode.second_start,
33                        opcode.second_end,
34                    )
35                }
36                "delete" => {
37                    gen = self.dump("-", first_sequence, opcode.first_start, opcode.first_end)
38                }
39                "insert" => {
40                    gen = self.dump("+", second_sequence, opcode.second_start, opcode.second_end)
41                }
42                "equal" => {
43                    gen = self.dump(" ", first_sequence, opcode.first_start, opcode.first_end)
44                }
45                _ => {}
46            }
47            for i in gen {
48                res.push(i);
49            }
50        }
51        res
52    }
53
54    fn dump(&self, tag: &str, sequence: &[&str], start: usize, end: usize) -> Vec<String> {
55        let mut res = Vec::new();
56        for i in start..end {
57            if let Some(s) = sequence.get(i) {
58                res.push(format!("{} {}", tag, s))
59            }
60        }
61        res
62    }
63
64    fn plain_replace(
65        &self,
66        first_sequence: &[&str],
67        first_start: usize,
68        first_end: usize,
69        second_sequence: &[&str],
70        second_start: usize,
71        second_end: usize,
72    ) -> Vec<String> {
73        if !(first_start < first_end && second_start < second_end) {
74            return Vec::new();
75        }
76        let (mut first, second) = if second_end - second_start < first_end - first_start {
77            (
78                self.dump("+", second_sequence, second_start, second_end),
79                self.dump("-", first_sequence, first_start, first_end),
80            )
81        } else {
82            (
83                self.dump("-", first_sequence, first_start, first_end),
84                self.dump("+", second_sequence, second_start, second_end),
85            )
86        };
87        for s in second {
88            first.push(s);
89        }
90        first
91    }
92
93    fn fancy_replace(
94        &self,
95        first_sequence: &[&str],
96        first_start: usize,
97        first_end: usize,
98        second_sequence: &[&str],
99        second_start: usize,
100        second_end: usize,
101    ) -> Vec<String> {
102        let mut res = Vec::new();
103        let (mut best_ratio, cutoff) = (0.74, 0.75);
104        let (mut best_i, mut best_j) = (0, 0);
105        let mut eqi: Option<usize> = None;
106        let mut eqj: Option<usize> = None;
107        for (j, second_sequence_str) in second_sequence
108            .iter()
109            .enumerate()
110            .take(second_end)
111            .skip(second_start)
112        {
113            for (i, first_sequence_str) in first_sequence
114                .iter()
115                .enumerate()
116                .take(second_end)
117                .skip(second_start)
118            {
119                if first_sequence_str == second_sequence_str {
120                    if eqi.is_none() {
121                        eqi = Some(i);
122                        eqj = Some(j);
123                    }
124                    continue;
125                }
126                let (first_sequence_chars, second_sequence_chars) = (
127                    first_sequence_str.chars().collect::<Vec<char>>(),
128                    second_sequence_str.chars().collect::<Vec<char>>(),
129                );
130                let mut cruncher =
131                    SequenceMatcher::new(&first_sequence_chars, &second_sequence_chars);
132                cruncher.set_is_junk(self.char_junk);
133                if cruncher.ratio() > best_ratio {
134                    best_ratio = cruncher.ratio();
135                    best_i = i;
136                    best_j = j;
137                }
138            }
139        }
140        if best_ratio < cutoff {
141            if eqi.is_none() {
142                res.extend(
143                    self.plain_replace(
144                        first_sequence,
145                        first_start,
146                        first_end,
147                        second_sequence,
148                        second_start,
149                        second_end,
150                    ).iter()
151                        .cloned(),
152                );
153                return res;
154            }
155            best_i = eqi.unwrap();
156            best_j = eqj.unwrap();
157        } else {
158            eqi = None;
159        }
160        res.extend(
161            self.fancy_helper(
162                first_sequence,
163                first_start,
164                best_i,
165                second_sequence,
166                second_start,
167                best_j,
168            ).iter()
169                .cloned(),
170        );
171        let first_element = &first_sequence[best_i];
172        let second_element = &second_sequence[best_j];
173        if eqi.is_none() {
174            let (mut first_tag, mut second_tag) = (String::new(), String::new());
175            let first_element_chars: Vec<char> = first_element.chars().collect();
176            let second_element_chars: Vec<char> = second_element.chars().collect();
177            let mut cruncher = SequenceMatcher::new(&first_element_chars, &second_element_chars);
178            cruncher.set_is_junk(self.char_junk);
179            for opcode in &cruncher.get_opcodes() {
180                let (first_length, second_length) = (
181                    opcode.first_end - opcode.first_start,
182                    opcode.second_end - opcode.second_start,
183                );
184                match opcode.tag.as_ref() {
185                    "replace" => {
186                        first_tag.push_str(&str_with_similar_chars('^', first_length));
187                        second_tag.push_str(&str_with_similar_chars('^', second_length));
188                    }
189                    "delete" => {
190                        first_tag.push_str(&str_with_similar_chars('-', first_length));
191                    }
192                    "insert" => {
193                        second_tag.push_str(&str_with_similar_chars('+', second_length));
194                    }
195                    "equal" => {
196                        first_tag.push_str(&str_with_similar_chars(' ', first_length));
197                        second_tag.push_str(&str_with_similar_chars(' ', second_length));
198                    }
199                    _ => {}
200                }
201            }
202            res.extend(
203                self.qformat(&first_element, &second_element, &first_tag, &second_tag)
204                    .iter()
205                    .cloned(),
206            );
207        } else {
208            let mut s = String::from("  ");
209            s.push_str(&first_element);
210            res.push(s);
211        }
212        res.extend(
213            self.fancy_helper(
214                first_sequence,
215                best_i + 1,
216                first_end,
217                second_sequence,
218                best_j + 1,
219                second_end,
220            ).iter()
221                .cloned(),
222        );
223        res
224    }
225
226    fn fancy_helper(
227        &self,
228        first_sequence: &[&str],
229        first_start: usize,
230        first_end: usize,
231        second_sequence: &[&str],
232        second_start: usize,
233        second_end: usize,
234    ) -> Vec<String> {
235        let mut res = Vec::new();
236        if first_start < first_end {
237            if second_start < second_end {
238                res = self.fancy_replace(
239                    first_sequence,
240                    first_start,
241                    first_end,
242                    second_sequence,
243                    second_start,
244                    second_end,
245                );
246            } else {
247                res = self.dump("-", first_sequence, first_start, first_end);
248            }
249        } else if second_start < second_end {
250            res = self.dump("+", second_sequence, second_start, second_end);
251        }
252        res
253    }
254
255    fn qformat(
256        &self,
257        first_line: &str,
258        second_line: &str,
259        first_tags: &str,
260        second_tags: &str,
261    ) -> Vec<String> {
262        let mut res = Vec::new();
263        let mut first_tags = first_tags;
264        let mut second_tags = second_tags;
265        let mut common = cmp::min(
266            count_leading(first_line, '\t'),
267            count_leading(second_line, '\t'),
268        );
269        common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' '));
270        common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' '));
271        first_tags = first_tags.split_at(common).1.trim_right();
272        second_tags = second_tags.split_at(common).1.trim_right();
273        let mut s = format!("- {}", first_line);
274        res.push(s);
275        if first_tags != "" {
276            s = format!("? {}{}\n", str_with_similar_chars('\t', common), first_tags);
277            res.push(s);
278        }
279        s = format!("+ {}", second_line);
280        res.push(s);
281        if second_tags != "" {
282            s = format!(
283                "? {}{}\n",
284                str_with_similar_chars('\t', common),
285                second_tags
286            );
287            res.push(s);
288        }
289        res
290    }
291
292    pub fn restore(delta: &[String], which: usize) -> Vec<String> {
293        if !(which == 1 || which == 2) {
294            panic!("Second parameter must be 1 or 2");
295        }
296        let mut res = Vec::new();
297        let tag = if which == 1 { "- " } else { "+ " }.to_string();
298        let prefixes = vec![tag, "  ".to_string()];
299        for line in delta {
300            for prefix in &prefixes {
301                if line.starts_with(prefix) {
302                    res.push(line.split_at(2).1.to_string());
303                }
304            }
305        }
306        res
307    }
308}
309
310#[test]
311fn test_fancy_replace() {
312    let differ = Differ::new();
313    let result = differ
314        .fancy_replace(&vec!["abcDefghiJkl\n"], 0, 1, &vec!["abcdefGhijkl\n"], 0, 1)
315        .join("");
316    assert_eq!(
317        result,
318        "- abcDefghiJkl\n?    ^  ^  ^\n+ abcdefGhijkl\n?    ^  ^  ^\n"
319    );
320}
321
322#[test]
323fn test_qformat() {
324    let differ = Differ::new();
325    let result = differ.qformat(
326        "\tabcDefghiJkl\n",
327        "\tabcdefGhijkl\n",
328        "  ^ ^  ^      ",
329        "  ^ ^  ^      ",
330    );
331    assert_eq!(
332        result,
333        vec![
334            "- \tabcDefghiJkl\n",
335            "? \t ^ ^  ^\n",
336            "+ \tabcdefGhijkl\n",
337            "? \t ^ ^  ^\n",
338        ]
339    );
340}