console/
ansi.rs

1use std::{
2    borrow::Cow,
3    iter::{FusedIterator, Peekable},
4    str::CharIndices,
5};
6
7#[derive(Debug, Clone, Copy)]
8enum State {
9    Start,
10    S1,
11    S2,
12    S3,
13    S4,
14    S5,
15    S6,
16    S7,
17    S8,
18    S9,
19    S10,
20    S11,
21    Trap,
22}
23
24impl Default for State {
25    fn default() -> Self {
26        Self::Start
27    }
28}
29
30impl State {
31    fn is_final(&self) -> bool {
32        #[allow(clippy::match_like_matches_macro)]
33        match self {
34            Self::S3 | Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S9 | Self::S11 => true,
35            _ => false,
36        }
37    }
38
39    fn is_trapped(&self) -> bool {
40        #[allow(clippy::match_like_matches_macro)]
41        match self {
42            Self::Trap => true,
43            _ => false,
44        }
45    }
46
47    fn transition(&mut self, c: char) {
48        *self = match c {
49            '\u{1b}' | '\u{9b}' => match self {
50                Self::Start => Self::S1,
51                _ => Self::Trap,
52            },
53            '(' | ')' => match self {
54                Self::S1 => Self::S2,
55                Self::S2 | Self::S4 => Self::S4,
56                _ => Self::Trap,
57            },
58            ';' => match self {
59                Self::S1 | Self::S2 | Self::S4 => Self::S4,
60                Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S10 => Self::S10,
61                _ => Self::Trap,
62            },
63
64            '[' | '#' | '?' => match self {
65                Self::S1 | Self::S2 | Self::S4 => Self::S4,
66                _ => Self::Trap,
67            },
68            '0'..='2' => match self {
69                Self::S1 | Self::S4 => Self::S5,
70                Self::S2 => Self::S3,
71                Self::S5 => Self::S6,
72                Self::S6 => Self::S7,
73                Self::S7 => Self::S8,
74                Self::S8 => Self::S9,
75                Self::S10 => Self::S5,
76                _ => Self::Trap,
77            },
78            '3'..='9' => match self {
79                Self::S1 | Self::S4 => Self::S5,
80                Self::S2 => Self::S5,
81                Self::S5 => Self::S6,
82                Self::S6 => Self::S7,
83                Self::S7 => Self::S8,
84                Self::S8 => Self::S9,
85                Self::S10 => Self::S5,
86                _ => Self::Trap,
87            },
88            'A'..='P' | 'R' | 'Z' | 'c' | 'f'..='n' | 'q' | 'r' | 'y' | '=' | '>' | '<' => {
89                match self {
90                    Self::S1
91                    | Self::S2
92                    | Self::S4
93                    | Self::S5
94                    | Self::S6
95                    | Self::S7
96                    | Self::S8
97                    | Self::S10 => Self::S11,
98                    _ => Self::Trap,
99                }
100            }
101            _ => Self::Trap,
102        };
103    }
104}
105
106#[derive(Debug)]
107struct Matches<'a> {
108    s: &'a str,
109    it: Peekable<CharIndices<'a>>,
110}
111
112impl<'a> Matches<'a> {
113    fn new(s: &'a str) -> Self {
114        let it = s.char_indices().peekable();
115        Self { s, it }
116    }
117}
118
119#[derive(Debug)]
120struct Match<'a> {
121    text: &'a str,
122    start: usize,
123    end: usize,
124}
125
126impl<'a> Match<'a> {
127    #[inline]
128    pub(crate) fn as_str(&self) -> &'a str {
129        &self.text[self.start..self.end]
130    }
131}
132
133impl<'a> Iterator for Matches<'a> {
134    type Item = Match<'a>;
135
136    fn next(&mut self) -> Option<Self::Item> {
137        find_ansi_code_exclusive(&mut self.it).map(|(start, end)| Match {
138            text: self.s,
139            start,
140            end,
141        })
142    }
143}
144
145impl FusedIterator for Matches<'_> {}
146
147fn find_ansi_code_exclusive(it: &mut Peekable<CharIndices>) -> Option<(usize, usize)> {
148    'outer: loop {
149        if let (start, '\u{1b}') | (start, '\u{9b}') = it.peek()? {
150            let start = *start;
151            let mut state = State::default();
152            let mut maybe_end = None;
153
154            loop {
155                let item = it.peek();
156
157                if let Some((idx, c)) = item {
158                    state.transition(*c);
159
160                    if state.is_final() {
161                        maybe_end = Some(*idx);
162                    }
163                }
164
165                // The match is greedy so run till we hit the trap state no matter what. A valid
166                // match is just one that was final at some point
167                if state.is_trapped() || item.is_none() {
168                    match maybe_end {
169                        Some(end) => {
170                            // All possible final characters are a single byte so it's safe to make
171                            // the end exclusive by just adding one
172                            return Some((start, end + 1));
173                        }
174                        // The character we are peeking right now might be the start of a match so
175                        // we want to continue the loop without popping off that char
176                        None => continue 'outer,
177                    }
178                }
179
180                it.next();
181            }
182        }
183
184        it.next();
185    }
186}
187
188/// Helper function to strip ansi codes.
189pub fn strip_ansi_codes(s: &str) -> Cow<str> {
190    let mut char_it = s.char_indices().peekable();
191    match find_ansi_code_exclusive(&mut char_it) {
192        Some(_) => {
193            let stripped: String = AnsiCodeIterator::new(s)
194                .filter_map(|(text, is_ansi)| if is_ansi { None } else { Some(text) })
195                .collect();
196            Cow::Owned(stripped)
197        }
198        None => Cow::Borrowed(s),
199    }
200}
201
202/// An iterator over ansi codes in a string.
203///
204/// This type can be used to scan over ansi codes in a string.
205/// It yields tuples in the form `(s, is_ansi)` where `s` is a slice of
206/// the original string and `is_ansi` indicates if the slice contains
207/// ansi codes or string values.
208pub struct AnsiCodeIterator<'a> {
209    s: &'a str,
210    pending_item: Option<(&'a str, bool)>,
211    last_idx: usize,
212    cur_idx: usize,
213    iter: Matches<'a>,
214}
215
216impl<'a> AnsiCodeIterator<'a> {
217    /// Creates a new ansi code iterator.
218    pub fn new(s: &'a str) -> AnsiCodeIterator<'a> {
219        AnsiCodeIterator {
220            s,
221            pending_item: None,
222            last_idx: 0,
223            cur_idx: 0,
224            iter: Matches::new(s),
225        }
226    }
227
228    /// Returns the string slice up to the current match.
229    pub fn current_slice(&self) -> &str {
230        &self.s[..self.cur_idx]
231    }
232
233    /// Returns the string slice from the current match to the end.
234    pub fn rest_slice(&self) -> &str {
235        &self.s[self.cur_idx..]
236    }
237}
238
239impl<'a> Iterator for AnsiCodeIterator<'a> {
240    type Item = (&'a str, bool);
241
242    fn next(&mut self) -> Option<(&'a str, bool)> {
243        if let Some(pending_item) = self.pending_item.take() {
244            self.cur_idx += pending_item.0.len();
245            Some(pending_item)
246        } else if let Some(m) = self.iter.next() {
247            let s = &self.s[self.last_idx..m.start];
248            self.last_idx = m.end;
249            if s.is_empty() {
250                self.cur_idx = m.end;
251                Some((m.as_str(), true))
252            } else {
253                self.cur_idx = m.start;
254                self.pending_item = Some((m.as_str(), true));
255                Some((s, false))
256            }
257        } else if self.last_idx < self.s.len() {
258            let rv = &self.s[self.last_idx..];
259            self.cur_idx = self.s.len();
260            self.last_idx = self.s.len();
261            Some((rv, false))
262        } else {
263            None
264        }
265    }
266}
267
268impl FusedIterator for AnsiCodeIterator<'_> {}
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273
274    use once_cell::sync::Lazy;
275    use proptest::prelude::*;
276    use regex::Regex;
277
278    // The manual dfa `State` is a handwritten translation from the previously used regex. That
279    // regex is kept here and used to ensure that the new matches are the same as the old
280    static STRIP_ANSI_RE: Lazy<Regex> = Lazy::new(|| {
281        Regex::new(
282            r"[\x1b\x9b]([()][012AB]|[\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><])",
283        )
284        .unwrap()
285    });
286
287    impl<'a> PartialEq<Match<'a>> for regex::Match<'_> {
288        fn eq(&self, other: &Match<'a>) -> bool {
289            self.start() == other.start && self.end() == other.end
290        }
291    }
292
293    proptest! {
294        #[test]
295        fn dfa_matches_old_regex(s in r"([\x1b\x9b]?.*){0,5}") {
296            let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect();
297            let new_matches: Vec<_> = Matches::new(&s).collect();
298            assert_eq!(old_matches, new_matches);
299        }
300    }
301
302    #[test]
303    fn dfa_matches_regex_on_small_strings() {
304        // To make sure the test runs in a reasonable time this is a slimmed down list of
305        // characters to reduce the groups that are only used with each other along with one
306        // arbitrarily chosen character not used in the regex (' ')
307        const POSSIBLE_BYTES: &[u8] = &[b' ', 0x1b, 0x9b, b'(', b'0', b'[', b';', b'3', b'C'];
308
309        fn check_all_strings_of_len(len: usize) {
310            _check_all_strings_of_len(len, &mut Vec::with_capacity(len));
311        }
312
313        fn _check_all_strings_of_len(len: usize, chunk: &mut Vec<u8>) {
314            if len == 0 {
315                if let Ok(s) = std::str::from_utf8(chunk) {
316                    let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(s).collect();
317                    let new_matches: Vec<_> = Matches::new(s).collect();
318                    assert_eq!(old_matches, new_matches);
319                }
320
321                return;
322            }
323
324            for b in POSSIBLE_BYTES {
325                chunk.push(*b);
326                _check_all_strings_of_len(len - 1, chunk);
327                chunk.pop();
328            }
329        }
330
331        for str_len in 0..=6 {
332            check_all_strings_of_len(str_len);
333        }
334    }
335
336    #[test]
337    fn complex_data() {
338        let s = std::fs::read_to_string(
339            std::path::Path::new("tests")
340                .join("data")
341                .join("sample_zellij_session.log"),
342        )
343        .unwrap();
344
345        let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect();
346        let new_matches: Vec<_> = Matches::new(&s).collect();
347        assert_eq!(old_matches, new_matches);
348    }
349
350    #[test]
351    fn state_machine() {
352        let ansi_code = "\x1b)B";
353        let mut state = State::default();
354        assert!(!state.is_final());
355
356        for c in ansi_code.chars() {
357            state.transition(c);
358        }
359        assert!(state.is_final());
360
361        state.transition('A');
362        assert!(state.is_trapped());
363    }
364
365    #[test]
366    fn back_to_back_entry_char() {
367        let s = "\x1b\x1bf";
368        let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
369        assert_eq!(&["\x1bf"], matches.as_slice());
370    }
371
372    #[test]
373    fn early_paren_can_use_many_chars() {
374        let s = "\x1b(C";
375        let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
376        assert_eq!(&[s], matches.as_slice());
377    }
378
379    #[test]
380    fn long_run_of_digits() {
381        let s = "\u{1b}00000";
382        let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
383        assert_eq!(&[s], matches.as_slice());
384    }
385
386    #[test]
387    fn test_ansi_iter_re_vt100() {
388        let s = "\x1b(0lpq\x1b)Benglish";
389        let mut iter = AnsiCodeIterator::new(s);
390        assert_eq!(iter.next(), Some(("\x1b(0", true)));
391        assert_eq!(iter.next(), Some(("lpq", false)));
392        assert_eq!(iter.next(), Some(("\x1b)B", true)));
393        assert_eq!(iter.next(), Some(("english", false)));
394    }
395
396    #[test]
397    fn test_ansi_iter_re() {
398        use crate::style;
399        let s = format!("Hello {}!", style("World").red().force_styling(true));
400        let mut iter = AnsiCodeIterator::new(&s);
401        assert_eq!(iter.next(), Some(("Hello ", false)));
402        assert_eq!(iter.current_slice(), "Hello ");
403        assert_eq!(iter.rest_slice(), "\x1b[31mWorld\x1b[0m!");
404        assert_eq!(iter.next(), Some(("\x1b[31m", true)));
405        assert_eq!(iter.current_slice(), "Hello \x1b[31m");
406        assert_eq!(iter.rest_slice(), "World\x1b[0m!");
407        assert_eq!(iter.next(), Some(("World", false)));
408        assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld");
409        assert_eq!(iter.rest_slice(), "\x1b[0m!");
410        assert_eq!(iter.next(), Some(("\x1b[0m", true)));
411        assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m");
412        assert_eq!(iter.rest_slice(), "!");
413        assert_eq!(iter.next(), Some(("!", false)));
414        assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m!");
415        assert_eq!(iter.rest_slice(), "");
416        assert_eq!(iter.next(), None);
417    }
418
419    #[test]
420    fn test_ansi_iter_re_on_multi() {
421        use crate::style;
422        let s = format!("{}", style("a").red().bold().force_styling(true));
423        let mut iter = AnsiCodeIterator::new(&s);
424        assert_eq!(iter.next(), Some(("\x1b[31m", true)));
425        assert_eq!(iter.current_slice(), "\x1b[31m");
426        assert_eq!(iter.rest_slice(), "\x1b[1ma\x1b[0m");
427        assert_eq!(iter.next(), Some(("\x1b[1m", true)));
428        assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1m");
429        assert_eq!(iter.rest_slice(), "a\x1b[0m");
430        assert_eq!(iter.next(), Some(("a", false)));
431        assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma");
432        assert_eq!(iter.rest_slice(), "\x1b[0m");
433        assert_eq!(iter.next(), Some(("\x1b[0m", true)));
434        assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma\x1b[0m");
435        assert_eq!(iter.rest_slice(), "");
436        assert_eq!(iter.next(), None);
437    }
438}