hex_literal/
comments.rs

1//! Provides an Iterator<Item=u8> decorator that uses a finite state machine to exclude comments
2//! from a string in linear time and constant space.
3
4use std::iter::Peekable;
5
6pub(crate) trait Exclude: Sized + Iterator<Item = u8> {
7    fn exclude_comments(self) -> ExcludingComments<Self>;
8}
9
10impl<T: Iterator<Item = u8>> Exclude for T {
11    fn exclude_comments(self) -> ExcludingComments<T> {
12        ExcludingComments::new_from_iter(self)
13    }
14}
15
16pub(crate) struct ExcludingComments<I: Iterator<Item = u8>> {
17    state: State,
18    iter: Peekable<I>,
19}
20
21impl<I: Iterator<Item = u8>> Iterator for ExcludingComments<I> {
22    type Item = u8;
23
24    fn next(&mut self) -> Option<Self::Item> {
25        let next_byte = self.next_byte();
26        if next_byte.is_none() {
27            match self.state {
28                State::BlockComment | State::PotentialBlockCommentEnd => {
29                    panic!("block comment not terminated with */")
30                }
31                State::PotentialComment { .. } => panic!("encountered isolated `/`"),
32                _ => {}
33            }
34        }
35        next_byte
36    }
37}
38
39/// States of the comment removal machine:
40/// <pre>
41///           Normal
42///            '/'                   
43///      PotentialComment
44///     '/'            '*'
45/// LineComment     BlockComment
46///    '\n'            '*'
47///   Normal      PotentialBlockCommentEnd
48///                    '/'           '_'
49///                   Normal     BlockComment
50/// </pre>  
51#[derive(Copy, Clone)]
52enum State {
53    Normal,
54    PotentialComment,
55    LineComment,
56    BlockComment,
57    PotentialBlockCommentEnd,
58}
59
60impl<I: Iterator<Item = u8>> ExcludingComments<I> {
61    fn new_from_iter(iter: I) -> Self {
62        Self {
63            state: State::Normal,
64            iter: iter.peekable(),
65        }
66    }
67
68    fn next_byte(&mut self) -> Option<u8> {
69        loop {
70            let next = self.iter.next()?;
71            self.state = match (self.state, next) {
72                (State::Normal, b'/') => State::PotentialComment,
73                (State::Normal, _) => return Some(next),
74                (State::PotentialComment, b'/') => State::LineComment,
75                (State::PotentialComment, b'*') => State::BlockComment,
76                (State::PotentialComment, _) => panic!("encountered isolated `/`"),
77                (State::LineComment, b'\n') => {
78                    self.state = State::Normal;
79                    return Some(b'\n');
80                }
81                (State::LineComment, _) => continue,
82                (State::BlockComment, b'*') => State::PotentialBlockCommentEnd,
83                (State::BlockComment, _) => continue,
84                (State::PotentialBlockCommentEnd, b'/') => State::Normal,
85                (State::PotentialBlockCommentEnd, _) => State::BlockComment,
86            };
87        }
88    }
89}
90
91#[cfg(test)]
92mod tests {
93    use std::vec::IntoIter;
94
95    use super::*;
96
97    /// Converts the input to an iterator of u8, excludes comments, maps back to char and collects
98    /// the results.
99    fn exclude_comments(input: &str) -> String {
100        let excluding_comments: ExcludingComments<IntoIter<u8>> = input
101            .to_string()
102            .into_bytes()
103            .into_iter()
104            .exclude_comments();
105        excluding_comments.map(|b| b as char).collect()
106    }
107
108    #[test]
109    fn empty() {
110        assert!(exclude_comments("").is_empty());
111    }
112
113    #[test]
114    fn single_char() {
115        assert_eq!(exclude_comments("0"), "0");
116    }
117
118    #[test]
119    fn two_chars() {
120        assert_eq!(exclude_comments("ab"), "ab");
121    }
122
123    #[test]
124    fn comment() {
125        assert_eq!(exclude_comments("ab//cd"), "ab");
126    }
127
128    #[test]
129    fn comments_are_ended_by_new_line() {
130        assert_eq!(exclude_comments("ab//comment\nde"), "ab\nde");
131    }
132
133    #[test]
134    fn new_lines_without_comments() {
135        assert_eq!(exclude_comments("ab\nde"), "ab\nde");
136    }
137
138    #[test]
139    #[should_panic]
140    fn panic_on_single_slash() {
141        exclude_comments("ab/cd");
142    }
143
144    #[test]
145    fn line_comments_on_multiple_lines() {
146        assert_eq!(
147            exclude_comments(
148                "
149line 1 //comment 1
150line 2 // comment 2 // comment 3
151line 3
152line 4 // comment 4"
153            ),
154            "
155line 1 
156line 2 
157line 3
158line 4 "
159        );
160    }
161
162    #[test]
163    fn block_comment() {
164        assert_eq!(exclude_comments("ab/*comment*/12"), "ab12");
165    }
166
167    #[test]
168    fn empty_block_comment() {
169        assert_eq!(exclude_comments("ab/**/12"), "ab12");
170    }
171
172    #[test]
173    fn block_comment_with_asterisk_and_slash_inside() {
174        assert_eq!(exclude_comments("ab/*false * asterisk and / */12"), "ab12");
175    }
176
177    #[test]
178    fn block_comment_within_line_comment() {
179        assert_eq!(exclude_comments("ab// /*comment*/12"), "ab");
180    }
181
182    #[test]
183    #[should_panic(expected = "block comment not terminated with */")]
184    fn block_comment_not_terminated() {
185        exclude_comments("ab /*comment");
186    }
187
188    #[test]
189    #[should_panic(expected = "block comment not terminated with */")]
190    fn block_comment_not_completely_terminated() {
191        exclude_comments("ab /*comment*");
192    }
193
194    #[test]
195    fn block_and_line_comments_on_multiple_lines() {
196        assert_eq!(
197            exclude_comments(
198                "
199line 1 /* comment 1 */
200line /* comment 2 */2 // line comment 1
201line 3 /* some comments
202over multiple lines
203*/
204line 4 /* more multiline comments
205* with leading
206* asterisks
207*/end// line comment 2"
208            ),
209            "
210line 1 
211line 2 
212line 3 
213line 4 end"
214        );
215    }
216}