insta/content/yaml/vendored/
scanner.rs

1use std::collections::VecDeque;
2use std::error::Error;
3use std::{char, fmt};
4
5#[derive(Clone, Copy, PartialEq, Debug, Eq)]
6pub enum TEncoding {
7    Utf8,
8}
9
10#[derive(Clone, Copy, PartialEq, Debug, Eq)]
11pub enum TScalarStyle {
12    Plain,
13    SingleQuoted,
14    DoubleQuoted,
15
16    Literal,
17    Foled,
18}
19
20#[derive(Clone, Copy, PartialEq, Debug, Eq)]
21pub struct Marker {
22    index: usize,
23    line: usize,
24    col: usize,
25}
26
27impl Marker {
28    fn new(index: usize, line: usize, col: usize) -> Marker {
29        Marker { index, line, col }
30    }
31}
32
33#[derive(Clone, PartialEq, Debug, Eq)]
34pub struct ScanError {
35    mark: Marker,
36    info: String,
37}
38
39impl ScanError {
40    pub fn new(loc: Marker, info: &str) -> ScanError {
41        ScanError {
42            mark: loc,
43            info: info.to_owned(),
44        }
45    }
46}
47
48impl Error for ScanError {
49    fn description(&self) -> &str {
50        self.info.as_ref()
51    }
52
53    fn cause(&self) -> Option<&dyn Error> {
54        None
55    }
56}
57
58impl fmt::Display for ScanError {
59    // col starts from 0
60    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
61        write!(
62            formatter,
63            "{} at line {} column {}",
64            self.info,
65            self.mark.line,
66            self.mark.col + 1
67        )
68    }
69}
70
71#[derive(Clone, PartialEq, Debug, Eq)]
72pub enum TokenType {
73    StreamStart(TEncoding),
74    StreamEnd,
75    /// major, minor
76    VersionDirective(u32, u32),
77    /// handle, prefix
78    TagDirective(String, String),
79    DocumentStart,
80    DocumentEnd,
81    BlockSequenceStart,
82    BlockMappingStart,
83    BlockEnd,
84    FlowSequenceStart,
85    FlowSequenceEnd,
86    FlowMappingStart,
87    FlowMappingEnd,
88    BlockEntry,
89    FlowEntry,
90    Key,
91    Value,
92    Alias(String),
93    Anchor(String),
94    /// handle, suffix
95    Tag(String, String),
96    Scalar(TScalarStyle, String),
97}
98
99#[derive(Clone, PartialEq, Debug, Eq)]
100pub struct Token(pub Marker, pub TokenType);
101
102#[derive(Clone, PartialEq, Debug, Eq)]
103struct SimpleKey {
104    possible: bool,
105    required: bool,
106    token_number: usize,
107    mark: Marker,
108}
109
110impl SimpleKey {
111    fn new(mark: Marker) -> SimpleKey {
112        SimpleKey {
113            possible: false,
114            required: false,
115            token_number: 0,
116            mark,
117        }
118    }
119}
120
121#[derive(Debug)]
122pub struct Scanner<T> {
123    rdr: T,
124    mark: Marker,
125    tokens: VecDeque<Token>,
126    buffer: VecDeque<char>,
127    error: Option<ScanError>,
128
129    stream_start_produced: bool,
130    stream_end_produced: bool,
131    adjacent_value_allowed_at: usize,
132    simple_key_allowed: bool,
133    simple_keys: Vec<SimpleKey>,
134    indent: isize,
135    indents: Vec<isize>,
136    flow_level: u8,
137    tokens_parsed: usize,
138    token_available: bool,
139}
140
141impl<T: Iterator<Item = char>> Iterator for Scanner<T> {
142    type Item = Token;
143    fn next(&mut self) -> Option<Token> {
144        if self.error.is_some() {
145            return None;
146        }
147        match self.next_token() {
148            Ok(tok) => tok,
149            Err(e) => {
150                self.error = Some(e);
151                None
152            }
153        }
154    }
155}
156
157#[inline]
158fn is_z(c: char) -> bool {
159    c == '\0'
160}
161
162#[inline]
163fn is_break(c: char) -> bool {
164    c == '\n' || c == '\r'
165}
166
167#[inline]
168fn is_breakz(c: char) -> bool {
169    is_break(c) || is_z(c)
170}
171
172#[inline]
173fn is_blank(c: char) -> bool {
174    c == ' ' || c == '\t'
175}
176
177#[inline]
178fn is_blankz(c: char) -> bool {
179    is_blank(c) || is_breakz(c)
180}
181
182#[inline]
183fn is_digit(c: char) -> bool {
184    c.is_ascii_digit()
185}
186
187#[inline]
188fn is_alpha(c: char) -> bool {
189    matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' | '-')
190}
191
192#[inline]
193fn is_hex(c: char) -> bool {
194    c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
195}
196
197#[inline]
198fn as_hex(c: char) -> u32 {
199    match c {
200        '0'..='9' => (c as u32) - ('0' as u32),
201        'a'..='f' => (c as u32) - ('a' as u32) + 10,
202        'A'..='F' => (c as u32) - ('A' as u32) + 10,
203        _ => unreachable!(),
204    }
205}
206
207#[inline]
208fn is_flow(c: char) -> bool {
209    matches!(c, ',' | '[' | ']' | '{' | '}')
210}
211
212pub type ScanResult = Result<(), ScanError>;
213
214impl<T: Iterator<Item = char>> Scanner<T> {
215    /// Creates the YAML tokenizer.
216    pub fn new(rdr: T) -> Scanner<T> {
217        Scanner {
218            rdr,
219            buffer: VecDeque::new(),
220            mark: Marker::new(0, 1, 0),
221            tokens: VecDeque::new(),
222            error: None,
223
224            stream_start_produced: false,
225            stream_end_produced: false,
226            adjacent_value_allowed_at: 0,
227            simple_key_allowed: true,
228            simple_keys: Vec::new(),
229            indent: -1,
230            indents: Vec::new(),
231            flow_level: 0,
232            tokens_parsed: 0,
233            token_available: false,
234        }
235    }
236    #[inline]
237    pub fn get_error(&self) -> Option<ScanError> {
238        self.error.clone()
239    }
240
241    #[inline]
242    fn lookahead(&mut self, count: usize) {
243        if self.buffer.len() >= count {
244            return;
245        }
246        for _ in 0..(count - self.buffer.len()) {
247            self.buffer.push_back(self.rdr.next().unwrap_or('\0'));
248        }
249    }
250    #[inline]
251    fn skip(&mut self) {
252        let c = self.buffer.pop_front().unwrap();
253
254        self.mark.index += 1;
255        if c == '\n' {
256            self.mark.line += 1;
257            self.mark.col = 0;
258        } else {
259            self.mark.col += 1;
260        }
261    }
262    #[inline]
263    fn skip_line(&mut self) {
264        if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
265            self.skip();
266            self.skip();
267        } else if is_break(self.buffer[0]) {
268            self.skip();
269        }
270    }
271    #[inline]
272    fn ch(&self) -> char {
273        self.buffer[0]
274    }
275    #[inline]
276    fn ch_is(&self, c: char) -> bool {
277        self.buffer[0] == c
278    }
279    #[allow(dead_code)]
280    #[inline]
281    fn eof(&self) -> bool {
282        self.ch_is('\0')
283    }
284    #[inline]
285    pub fn stream_started(&self) -> bool {
286        self.stream_start_produced
287    }
288    #[inline]
289    pub fn stream_ended(&self) -> bool {
290        self.stream_end_produced
291    }
292    #[inline]
293    pub fn mark(&self) -> Marker {
294        self.mark
295    }
296    #[inline]
297    fn read_break(&mut self, s: &mut String) {
298        if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
299            s.push('\n');
300            self.skip();
301            self.skip();
302        } else if self.buffer[0] == '\r' || self.buffer[0] == '\n' {
303            s.push('\n');
304            self.skip();
305        } else {
306            unreachable!();
307        }
308    }
309    fn insert_token(&mut self, pos: usize, tok: Token) {
310        let old_len = self.tokens.len();
311        assert!(pos <= old_len);
312        self.tokens.push_back(tok);
313        for i in 0..old_len - pos {
314            self.tokens.swap(old_len - i, old_len - i - 1);
315        }
316    }
317    fn allow_simple_key(&mut self) {
318        self.simple_key_allowed = true;
319    }
320    fn disallow_simple_key(&mut self) {
321        self.simple_key_allowed = false;
322    }
323
324    pub fn fetch_next_token(&mut self) -> ScanResult {
325        self.lookahead(1);
326        // println!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch());
327
328        if !self.stream_start_produced {
329            self.fetch_stream_start();
330            return Ok(());
331        }
332        self.skip_to_next_token();
333
334        self.stale_simple_keys()?;
335
336        let mark = self.mark;
337        self.unroll_indent(mark.col as isize);
338
339        self.lookahead(4);
340
341        if is_z(self.ch()) {
342            self.fetch_stream_end()?;
343            return Ok(());
344        }
345
346        // Is it a directive?
347        if self.mark.col == 0 && self.ch_is('%') {
348            return self.fetch_directive();
349        }
350
351        if self.mark.col == 0
352            && self.buffer[0] == '-'
353            && self.buffer[1] == '-'
354            && self.buffer[2] == '-'
355            && is_blankz(self.buffer[3])
356        {
357            self.fetch_document_indicator(TokenType::DocumentStart)?;
358            return Ok(());
359        }
360
361        if self.mark.col == 0
362            && self.buffer[0] == '.'
363            && self.buffer[1] == '.'
364            && self.buffer[2] == '.'
365            && is_blankz(self.buffer[3])
366        {
367            self.fetch_document_indicator(TokenType::DocumentEnd)?;
368            return Ok(());
369        }
370
371        let c = self.buffer[0];
372        let nc = self.buffer[1];
373        match c {
374            '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
375            '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
376            ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
377            '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
378            ',' => self.fetch_flow_entry(),
379            '-' if is_blankz(nc) => self.fetch_block_entry(),
380            '?' if is_blankz(nc) => self.fetch_key(),
381            ':' if is_blankz(nc)
382                || (self.flow_level > 0
383                    && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at)) =>
384            {
385                self.fetch_value()
386            }
387            // Is it an alias?
388            '*' => self.fetch_anchor(true),
389            // Is it an anchor?
390            '&' => self.fetch_anchor(false),
391            '!' => self.fetch_tag(),
392            // Is it a literal scalar?
393            '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
394            // Is it a folded scalar?
395            '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
396            '\'' => self.fetch_flow_scalar(true),
397            '"' => self.fetch_flow_scalar(false),
398            // plain scalar
399            '-' if !is_blankz(nc) => self.fetch_plain_scalar(),
400            ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(),
401            '%' | '@' | '`' => Err(ScanError::new(
402                self.mark,
403                &format!("unexpected character: `{}'", c),
404            )),
405            _ => self.fetch_plain_scalar(),
406        }
407    }
408
409    pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> {
410        if self.stream_end_produced {
411            return Ok(None);
412        }
413
414        if !self.token_available {
415            self.fetch_more_tokens()?;
416        }
417        let t = self.tokens.pop_front().unwrap();
418        self.token_available = false;
419        self.tokens_parsed += 1;
420
421        if let TokenType::StreamEnd = t.1 {
422            self.stream_end_produced = true;
423        }
424        Ok(Some(t))
425    }
426
427    pub fn fetch_more_tokens(&mut self) -> ScanResult {
428        let mut need_more;
429        loop {
430            need_more = false;
431            if self.tokens.is_empty() {
432                need_more = true;
433            } else {
434                self.stale_simple_keys()?;
435                for sk in &self.simple_keys {
436                    if sk.possible && sk.token_number == self.tokens_parsed {
437                        need_more = true;
438                        break;
439                    }
440                }
441            }
442
443            if !need_more {
444                break;
445            }
446            self.fetch_next_token()?;
447        }
448        self.token_available = true;
449
450        Ok(())
451    }
452
453    fn stale_simple_keys(&mut self) -> ScanResult {
454        for sk in &mut self.simple_keys {
455            if sk.possible
456                && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index)
457            {
458                if sk.required {
459                    return Err(ScanError::new(self.mark, "simple key expect ':'"));
460                }
461                sk.possible = false;
462            }
463        }
464        Ok(())
465    }
466
467    fn skip_to_next_token(&mut self) {
468        loop {
469            self.lookahead(1);
470            // TODO(chenyh) BOM
471            match self.ch() {
472                ' ' => self.skip(),
473                '\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(),
474                '\n' | '\r' => {
475                    self.lookahead(2);
476                    self.skip_line();
477                    if self.flow_level == 0 {
478                        self.allow_simple_key();
479                    }
480                }
481                '#' => {
482                    while !is_breakz(self.ch()) {
483                        self.skip();
484                        self.lookahead(1);
485                    }
486                }
487                _ => break,
488            }
489        }
490    }
491
492    fn fetch_stream_start(&mut self) {
493        let mark = self.mark;
494        self.indent = -1;
495        self.stream_start_produced = true;
496        self.allow_simple_key();
497        self.tokens
498            .push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8)));
499        self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
500    }
501
502    fn fetch_stream_end(&mut self) -> ScanResult {
503        // force new line
504        if self.mark.col != 0 {
505            self.mark.col = 0;
506            self.mark.line += 1;
507        }
508
509        self.unroll_indent(-1);
510        self.remove_simple_key()?;
511        self.disallow_simple_key();
512
513        self.tokens
514            .push_back(Token(self.mark, TokenType::StreamEnd));
515        Ok(())
516    }
517
518    fn fetch_directive(&mut self) -> ScanResult {
519        self.unroll_indent(-1);
520        self.remove_simple_key()?;
521
522        self.disallow_simple_key();
523
524        let tok = self.scan_directive()?;
525
526        self.tokens.push_back(tok);
527
528        Ok(())
529    }
530
531    fn scan_directive(&mut self) -> Result<Token, ScanError> {
532        let start_mark = self.mark;
533        self.skip();
534
535        let name = self.scan_directive_name()?;
536        let tok = match name.as_ref() {
537            "YAML" => self.scan_version_directive_value(&start_mark)?,
538            "TAG" => self.scan_tag_directive_value(&start_mark)?,
539            // XXX This should be a warning instead of an error
540            _ => {
541                // skip current line
542                self.lookahead(1);
543                while !is_breakz(self.ch()) {
544                    self.skip();
545                    self.lookahead(1);
546                }
547                // XXX return an empty TagDirective token
548                Token(
549                    start_mark,
550                    TokenType::TagDirective(String::new(), String::new()),
551                )
552                // return Err(ScanError::new(start_mark,
553                //     "while scanning a directive, found unknown directive name"))
554            }
555        };
556        self.lookahead(1);
557
558        while is_blank(self.ch()) {
559            self.skip();
560            self.lookahead(1);
561        }
562
563        if self.ch() == '#' {
564            while !is_breakz(self.ch()) {
565                self.skip();
566                self.lookahead(1);
567            }
568        }
569
570        if !is_breakz(self.ch()) {
571            return Err(ScanError::new(
572                start_mark,
573                "while scanning a directive, did not find expected comment or line break",
574            ));
575        }
576
577        // Eat a line break
578        if is_break(self.ch()) {
579            self.lookahead(2);
580            self.skip_line();
581        }
582
583        Ok(tok)
584    }
585
586    fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
587        self.lookahead(1);
588
589        while is_blank(self.ch()) {
590            self.skip();
591            self.lookahead(1);
592        }
593
594        let major = self.scan_version_directive_number(mark)?;
595
596        if self.ch() != '.' {
597            return Err(ScanError::new(
598                *mark,
599                "while scanning a YAML directive, did not find expected digit or '.' character",
600            ));
601        }
602
603        self.skip();
604
605        let minor = self.scan_version_directive_number(mark)?;
606
607        Ok(Token(*mark, TokenType::VersionDirective(major, minor)))
608    }
609
610    fn scan_directive_name(&mut self) -> Result<String, ScanError> {
611        let start_mark = self.mark;
612        let mut string = String::new();
613        self.lookahead(1);
614        while is_alpha(self.ch()) {
615            string.push(self.ch());
616            self.skip();
617            self.lookahead(1);
618        }
619
620        if string.is_empty() {
621            return Err(ScanError::new(
622                start_mark,
623                "while scanning a directive, could not find expected directive name",
624            ));
625        }
626
627        if !is_blankz(self.ch()) {
628            return Err(ScanError::new(
629                start_mark,
630                "while scanning a directive, found unexpected non-alphabetical character",
631            ));
632        }
633
634        Ok(string)
635    }
636
637    fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
638        let mut val = 0u32;
639        let mut length = 0usize;
640        self.lookahead(1);
641        while is_digit(self.ch()) {
642            if length + 1 > 9 {
643                return Err(ScanError::new(
644                    *mark,
645                    "while scanning a YAML directive, found extremely long version number",
646                ));
647            }
648            length += 1;
649            val = val * 10 + ((self.ch() as u32) - ('0' as u32));
650            self.skip();
651            self.lookahead(1);
652        }
653
654        if length == 0 {
655            return Err(ScanError::new(
656                *mark,
657                "while scanning a YAML directive, did not find expected version number",
658            ));
659        }
660
661        Ok(val)
662    }
663
664    fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
665        self.lookahead(1);
666        /* Eat whitespaces. */
667        while is_blank(self.ch()) {
668            self.skip();
669            self.lookahead(1);
670        }
671        let handle = self.scan_tag_handle(true, mark)?;
672
673        self.lookahead(1);
674        /* Eat whitespaces. */
675        while is_blank(self.ch()) {
676            self.skip();
677            self.lookahead(1);
678        }
679
680        let is_secondary = handle == "!!";
681        let prefix = self.scan_tag_uri(true, is_secondary, "", mark)?;
682
683        self.lookahead(1);
684
685        if is_blankz(self.ch()) {
686            Ok(Token(*mark, TokenType::TagDirective(handle, prefix)))
687        } else {
688            Err(ScanError::new(
689                *mark,
690                "while scanning TAG, did not find expected whitespace or line break",
691            ))
692        }
693    }
694
695    fn fetch_tag(&mut self) -> ScanResult {
696        self.save_simple_key()?;
697        self.disallow_simple_key();
698
699        let tok = self.scan_tag()?;
700        self.tokens.push_back(tok);
701        Ok(())
702    }
703
704    fn scan_tag(&mut self) -> Result<Token, ScanError> {
705        let start_mark = self.mark;
706        let mut handle = String::new();
707        let mut suffix;
708        let mut secondary = false;
709
710        // Check if the tag is in the canonical form (verbatim).
711        self.lookahead(2);
712
713        if self.buffer[1] == '<' {
714            // Eat '!<'
715            self.skip();
716            self.skip();
717            suffix = self.scan_tag_uri(false, false, "", &start_mark)?;
718
719            if self.ch() != '>' {
720                return Err(ScanError::new(
721                    start_mark,
722                    "while scanning a tag, did not find the expected '>'",
723                ));
724            }
725
726            self.skip();
727        } else {
728            // The tag has either the '!suffix' or the '!handle!suffix'
729            handle = self.scan_tag_handle(false, &start_mark)?;
730            // Check if it is, indeed, handle.
731            if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
732                if handle == "!!" {
733                    secondary = true;
734                }
735                suffix = self.scan_tag_uri(false, secondary, "", &start_mark)?;
736            } else {
737                suffix = self.scan_tag_uri(false, false, &handle, &start_mark)?;
738                handle = "!".to_string();
739                // A special case: the '!' tag.  Set the handle to '' and the
740                // suffix to '!'.
741                if suffix.is_empty() {
742                    handle.clear();
743                    suffix = "!".to_owned();
744                }
745            }
746        }
747
748        self.lookahead(1);
749        if is_blankz(self.ch()) {
750            // XXX: ex 7.2, an empty scalar can follow a secondary tag
751            Ok(Token(start_mark, TokenType::Tag(handle, suffix)))
752        } else {
753            Err(ScanError::new(
754                start_mark,
755                "while scanning a tag, did not find expected whitespace or line break",
756            ))
757        }
758    }
759
760    fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
761        let mut string = String::new();
762        self.lookahead(1);
763        if self.ch() != '!' {
764            return Err(ScanError::new(
765                *mark,
766                "while scanning a tag, did not find expected '!'",
767            ));
768        }
769
770        string.push(self.ch());
771        self.skip();
772
773        self.lookahead(1);
774        while is_alpha(self.ch()) {
775            string.push(self.ch());
776            self.skip();
777            self.lookahead(1);
778        }
779
780        // Check if the trailing character is '!' and copy it.
781        if self.ch() == '!' {
782            string.push(self.ch());
783            self.skip();
784        } else if directive && string != "!" {
785            // It's either the '!' tag or not really a tag handle.  If it's a %TAG
786            // directive, it's an error.  If it's a tag token, it must be a part of
787            // URI.
788            return Err(ScanError::new(
789                *mark,
790                "while parsing a tag directive, did not find expected '!'",
791            ));
792        }
793        Ok(string)
794    }
795
796    fn scan_tag_uri(
797        &mut self,
798        directive: bool,
799        _is_secondary: bool,
800        head: &str,
801        mark: &Marker,
802    ) -> Result<String, ScanError> {
803        let mut length = head.len();
804        let mut string = String::new();
805
806        // Copy the head if needed.
807        // Note that we don't copy the leading '!' character.
808        if length > 1 {
809            string.extend(head.chars().skip(1));
810        }
811
812        self.lookahead(1);
813        /*
814         * The set of characters that may appear in URI is as follows:
815         *
816         *      '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
817         *      '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
818         *      '%'.
819         */
820        while match self.ch() {
821            ';' | '/' | '?' | ':' | '@' | '&' => true,
822            '=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' => true,
823            '%' => true,
824            c if is_alpha(c) => true,
825            _ => false,
826        } {
827            // Check if it is a URI-escape sequence.
828            if self.ch() == '%' {
829                string.push(self.scan_uri_escapes(directive, mark)?);
830            } else {
831                string.push(self.ch());
832                self.skip();
833            }
834
835            length += 1;
836            self.lookahead(1);
837        }
838
839        if length == 0 {
840            return Err(ScanError::new(
841                *mark,
842                "while parsing a tag, did not find expected tag URI",
843            ));
844        }
845
846        Ok(string)
847    }
848
849    fn scan_uri_escapes(&mut self, _directive: bool, mark: &Marker) -> Result<char, ScanError> {
850        let mut width = 0usize;
851        let mut code = 0u32;
852        loop {
853            self.lookahead(3);
854
855            if !(self.ch() == '%' && is_hex(self.buffer[1]) && is_hex(self.buffer[2])) {
856                return Err(ScanError::new(
857                    *mark,
858                    "while parsing a tag, did not find URI escaped octet",
859                ));
860            }
861
862            let octet = (as_hex(self.buffer[1]) << 4) + as_hex(self.buffer[2]);
863            if width == 0 {
864                width = match octet {
865                    _ if octet & 0x80 == 0x00 => 1,
866                    _ if octet & 0xE0 == 0xC0 => 2,
867                    _ if octet & 0xF0 == 0xE0 => 3,
868                    _ if octet & 0xF8 == 0xF0 => 4,
869                    _ => {
870                        return Err(ScanError::new(
871                            *mark,
872                            "while parsing a tag, found an incorrect leading UTF-8 octet",
873                        ));
874                    }
875                };
876                code = octet;
877            } else {
878                if octet & 0xc0 != 0x80 {
879                    return Err(ScanError::new(
880                        *mark,
881                        "while parsing a tag, found an incorrect trailing UTF-8 octet",
882                    ));
883                }
884                code = (code << 8) + octet;
885            }
886
887            self.skip();
888            self.skip();
889            self.skip();
890
891            width -= 1;
892            if width == 0 {
893                break;
894            }
895        }
896
897        match char::from_u32(code) {
898            Some(ch) => Ok(ch),
899            None => Err(ScanError::new(
900                *mark,
901                "while parsing a tag, found an invalid UTF-8 codepoint",
902            )),
903        }
904    }
905
906    fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
907        self.save_simple_key()?;
908        self.disallow_simple_key();
909
910        let tok = self.scan_anchor(alias)?;
911
912        self.tokens.push_back(tok);
913
914        Ok(())
915    }
916
917    fn scan_anchor(&mut self, alias: bool) -> Result<Token, ScanError> {
918        let mut string = String::new();
919        let start_mark = self.mark;
920
921        self.skip();
922        self.lookahead(1);
923        while is_alpha(self.ch()) {
924            string.push(self.ch());
925            self.skip();
926            self.lookahead(1);
927        }
928
929        if string.is_empty()
930            || match self.ch() {
931                c if is_blankz(c) => false,
932                '?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false,
933                _ => true,
934            }
935        {
936            return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
937        }
938
939        if alias {
940            Ok(Token(start_mark, TokenType::Alias(string)))
941        } else {
942            Ok(Token(start_mark, TokenType::Anchor(string)))
943        }
944    }
945
946    fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult {
947        // The indicators '[' and '{' may start a simple key.
948        self.save_simple_key()?;
949
950        self.increase_flow_level()?;
951
952        self.allow_simple_key();
953
954        let start_mark = self.mark;
955        self.skip();
956
957        self.tokens.push_back(Token(start_mark, tok));
958        Ok(())
959    }
960
961    fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult {
962        self.remove_simple_key()?;
963        self.decrease_flow_level();
964
965        self.disallow_simple_key();
966
967        let start_mark = self.mark;
968        self.skip();
969
970        self.tokens.push_back(Token(start_mark, tok));
971        Ok(())
972    }
973
974    fn fetch_flow_entry(&mut self) -> ScanResult {
975        self.remove_simple_key()?;
976        self.allow_simple_key();
977
978        let start_mark = self.mark;
979        self.skip();
980
981        self.tokens
982            .push_back(Token(start_mark, TokenType::FlowEntry));
983        Ok(())
984    }
985
986    fn increase_flow_level(&mut self) -> ScanResult {
987        self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
988        self.flow_level = self
989            .flow_level
990            .checked_add(1)
991            .ok_or_else(|| ScanError::new(self.mark, "recursion limit exceeded"))?;
992        Ok(())
993    }
994    fn decrease_flow_level(&mut self) {
995        if self.flow_level > 0 {
996            self.flow_level -= 1;
997            self.simple_keys.pop().unwrap();
998        }
999    }
1000
1001    fn fetch_block_entry(&mut self) -> ScanResult {
1002        if self.flow_level == 0 {
1003            // Check if we are allowed to start a new entry.
1004            if !self.simple_key_allowed {
1005                return Err(ScanError::new(
1006                    self.mark,
1007                    "block sequence entries are not allowed in this context",
1008                ));
1009            }
1010
1011            let mark = self.mark;
1012            // generate BLOCK-SEQUENCE-START if indented
1013            self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
1014        } else {
1015            // - * only allowed in block
1016            return Err(ScanError::new(
1017                self.mark,
1018                r#""-" is only valid inside a block"#,
1019            ));
1020        }
1021        self.remove_simple_key()?;
1022        self.allow_simple_key();
1023
1024        let start_mark = self.mark;
1025        self.skip();
1026
1027        self.tokens
1028            .push_back(Token(start_mark, TokenType::BlockEntry));
1029        Ok(())
1030    }
1031
1032    fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult {
1033        self.unroll_indent(-1);
1034        self.remove_simple_key()?;
1035        self.disallow_simple_key();
1036
1037        let mark = self.mark;
1038
1039        self.skip();
1040        self.skip();
1041        self.skip();
1042
1043        self.tokens.push_back(Token(mark, t));
1044        Ok(())
1045    }
1046
1047    fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
1048        self.save_simple_key()?;
1049        self.allow_simple_key();
1050        let tok = self.scan_block_scalar(literal)?;
1051
1052        self.tokens.push_back(tok);
1053        Ok(())
1054    }
1055
1056    fn scan_block_scalar(&mut self, literal: bool) -> Result<Token, ScanError> {
1057        let start_mark = self.mark;
1058        let mut chomping: i32 = 0;
1059        let mut increment: usize = 0;
1060        let mut indent: usize = 0;
1061        let mut trailing_blank: bool;
1062        let mut leading_blank: bool = false;
1063
1064        let mut string = String::new();
1065        let mut leading_break = String::new();
1066        let mut trailing_breaks = String::new();
1067
1068        // skip '|' or '>'
1069        self.skip();
1070        self.lookahead(1);
1071
1072        if self.ch() == '+' || self.ch() == '-' {
1073            if self.ch() == '+' {
1074                chomping = 1;
1075            } else {
1076                chomping = -1;
1077            }
1078            self.skip();
1079            self.lookahead(1);
1080            if is_digit(self.ch()) {
1081                if self.ch() == '0' {
1082                    return Err(ScanError::new(
1083                        start_mark,
1084                        "while scanning a block scalar, found an indentation indicator equal to 0",
1085                    ));
1086                }
1087                increment = (self.ch() as usize) - ('0' as usize);
1088                self.skip();
1089            }
1090        } else if is_digit(self.ch()) {
1091            if self.ch() == '0' {
1092                return Err(ScanError::new(
1093                    start_mark,
1094                    "while scanning a block scalar, found an indentation indicator equal to 0",
1095                ));
1096            }
1097
1098            increment = (self.ch() as usize) - ('0' as usize);
1099            self.skip();
1100            self.lookahead(1);
1101            if self.ch() == '+' || self.ch() == '-' {
1102                if self.ch() == '+' {
1103                    chomping = 1;
1104                } else {
1105                    chomping = -1;
1106                }
1107                self.skip();
1108            }
1109        }
1110
1111        // Eat whitespaces and comments to the end of the line.
1112        self.lookahead(1);
1113
1114        while is_blank(self.ch()) {
1115            self.skip();
1116            self.lookahead(1);
1117        }
1118
1119        if self.ch() == '#' {
1120            while !is_breakz(self.ch()) {
1121                self.skip();
1122                self.lookahead(1);
1123            }
1124        }
1125
1126        // Check if we are at the end of the line.
1127        if !is_breakz(self.ch()) {
1128            return Err(ScanError::new(
1129                start_mark,
1130                "while scanning a block scalar, did not find expected comment or line break",
1131            ));
1132        }
1133
1134        if is_break(self.ch()) {
1135            self.lookahead(2);
1136            self.skip_line();
1137        }
1138
1139        if increment > 0 {
1140            indent = if self.indent >= 0 {
1141                (self.indent + increment as isize) as usize
1142            } else {
1143                increment
1144            }
1145        }
1146        // Scan the leading line breaks and determine the indentation level if needed.
1147        self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
1148
1149        self.lookahead(1);
1150
1151        let start_mark = self.mark;
1152
1153        while self.mark.col == indent && !is_z(self.ch()) {
1154            // We are at the beginning of a non-empty line.
1155            trailing_blank = is_blank(self.ch());
1156            if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
1157                if trailing_breaks.is_empty() {
1158                    string.push(' ');
1159                }
1160                leading_break.clear();
1161            } else {
1162                string.push_str(&leading_break);
1163                leading_break.clear();
1164            }
1165
1166            string.push_str(&trailing_breaks);
1167            trailing_breaks.clear();
1168
1169            leading_blank = is_blank(self.ch());
1170
1171            while !is_breakz(self.ch()) {
1172                string.push(self.ch());
1173                self.skip();
1174                self.lookahead(1);
1175            }
1176            // break on EOF
1177            if is_z(self.ch()) {
1178                break;
1179            }
1180
1181            self.lookahead(2);
1182            self.read_break(&mut leading_break);
1183
1184            // Eat the following indentation spaces and line breaks.
1185            self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
1186        }
1187
1188        // Chomp the tail.
1189        if chomping != -1 {
1190            string.push_str(&leading_break);
1191        }
1192
1193        if chomping == 1 {
1194            string.push_str(&trailing_breaks);
1195        }
1196
1197        if literal {
1198            Ok(Token(
1199                start_mark,
1200                TokenType::Scalar(TScalarStyle::Literal, string),
1201            ))
1202        } else {
1203            Ok(Token(
1204                start_mark,
1205                TokenType::Scalar(TScalarStyle::Foled, string),
1206            ))
1207        }
1208    }
1209
1210    fn block_scalar_breaks(&mut self, indent: &mut usize, breaks: &mut String) -> ScanResult {
1211        let mut max_indent = 0;
1212        loop {
1213            self.lookahead(1);
1214            while (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == ' ' {
1215                self.skip();
1216                self.lookahead(1);
1217            }
1218
1219            if self.mark.col > max_indent {
1220                max_indent = self.mark.col;
1221            }
1222
1223            // Check for a tab character messing the indentation.
1224            if (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == '\t' {
1225                return Err(ScanError::new(self.mark,
1226                        "while scanning a block scalar, found a tab character where an indentation space is expected"));
1227            }
1228
1229            if !is_break(self.ch()) {
1230                break;
1231            }
1232
1233            self.lookahead(2);
1234            // Consume the line break.
1235            self.read_break(breaks);
1236        }
1237
1238        if *indent == 0 {
1239            *indent = max_indent;
1240            if *indent < (self.indent + 1) as usize {
1241                *indent = (self.indent + 1) as usize;
1242            }
1243            if *indent < 1 {
1244                *indent = 1;
1245            }
1246        }
1247        Ok(())
1248    }
1249
1250    fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
1251        self.save_simple_key()?;
1252        self.disallow_simple_key();
1253
1254        let tok = self.scan_flow_scalar(single)?;
1255
1256        // From spec: To ensure JSON compatibility, if a key inside a flow mapping is JSON-like,
1257        // YAML allows the following value to be specified adjacent to the “:”.
1258        self.adjacent_value_allowed_at = self.mark.index;
1259
1260        self.tokens.push_back(tok);
1261        Ok(())
1262    }
1263
1264    fn scan_flow_scalar(&mut self, single: bool) -> Result<Token, ScanError> {
1265        let start_mark = self.mark;
1266
1267        let mut string = String::new();
1268        let mut leading_break = String::new();
1269        let mut trailing_breaks = String::new();
1270        let mut whitespaces = String::new();
1271        let mut leading_blanks;
1272
1273        /* Eat the left quote. */
1274        self.skip();
1275
1276        loop {
1277            /* Check for a document indicator. */
1278            self.lookahead(4);
1279
1280            if self.mark.col == 0
1281                && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
1282                    || ((self.buffer[0] == '.')
1283                        && (self.buffer[1] == '.')
1284                        && (self.buffer[2] == '.')))
1285                && is_blankz(self.buffer[3])
1286            {
1287                return Err(ScanError::new(
1288                    start_mark,
1289                    "while scanning a quoted scalar, found unexpected document indicator",
1290                ));
1291            }
1292
1293            if is_z(self.ch()) {
1294                return Err(ScanError::new(
1295                    start_mark,
1296                    "while scanning a quoted scalar, found unexpected end of stream",
1297                ));
1298            }
1299
1300            self.lookahead(2);
1301
1302            leading_blanks = false;
1303            // Consume non-blank characters.
1304
1305            while !is_blankz(self.ch()) {
1306                match self.ch() {
1307                    // Check for an escaped single quote.
1308                    '\'' if self.buffer[1] == '\'' && single => {
1309                        string.push('\'');
1310                        self.skip();
1311                        self.skip();
1312                    }
1313                    // Check for the right quote.
1314                    '\'' if single => break,
1315                    '"' if !single => break,
1316                    // Check for an escaped line break.
1317                    '\\' if !single && is_break(self.buffer[1]) => {
1318                        self.lookahead(3);
1319                        self.skip();
1320                        self.skip_line();
1321                        leading_blanks = true;
1322                        break;
1323                    }
1324                    // Check for an escape sequence.
1325                    '\\' if !single => {
1326                        let mut code_length = 0usize;
1327                        match self.buffer[1] {
1328                            '0' => string.push('\0'),
1329                            'a' => string.push('\x07'),
1330                            'b' => string.push('\x08'),
1331                            't' | '\t' => string.push('\t'),
1332                            'n' => string.push('\n'),
1333                            'v' => string.push('\x0b'),
1334                            'f' => string.push('\x0c'),
1335                            'r' => string.push('\x0d'),
1336                            'e' => string.push('\x1b'),
1337                            ' ' => string.push('\x20'),
1338                            '"' => string.push('"'),
1339                            '\'' => string.push('\''),
1340                            '\\' => string.push('\\'),
1341                            // NEL (#x85)
1342                            'N' => string.push(char::from_u32(0x85).unwrap()),
1343                            // #xA0
1344                            '_' => string.push(char::from_u32(0xA0).unwrap()),
1345                            // LS (#x2028)
1346                            'L' => string.push(char::from_u32(0x2028).unwrap()),
1347                            // PS (#x2029)
1348                            'P' => string.push(char::from_u32(0x2029).unwrap()),
1349                            'x' => code_length = 2,
1350                            'u' => code_length = 4,
1351                            'U' => code_length = 8,
1352                            _ => {
1353                                return Err(ScanError::new(
1354                                    start_mark,
1355                                    "while parsing a quoted scalar, found unknown escape character",
1356                                ))
1357                            }
1358                        }
1359                        self.skip();
1360                        self.skip();
1361                        // Consume an arbitrary escape code.
1362                        if code_length > 0 {
1363                            self.lookahead(code_length);
1364                            let mut value = 0u32;
1365                            for i in 0..code_length {
1366                                if !is_hex(self.buffer[i]) {
1367                                    return Err(ScanError::new(start_mark,
1368                                        "while parsing a quoted scalar, did not find expected hexadecimal number"));
1369                                }
1370                                value = (value << 4) + as_hex(self.buffer[i]);
1371                            }
1372
1373                            let ch = match char::from_u32(value) {
1374                                Some(v) => v,
1375                                None => {
1376                                    return Err(ScanError::new(start_mark,
1377                                        "while parsing a quoted scalar, found invalid Unicode character escape code"));
1378                                }
1379                            };
1380                            string.push(ch);
1381
1382                            for _ in 0..code_length {
1383                                self.skip();
1384                            }
1385                        }
1386                    }
1387                    c => {
1388                        string.push(c);
1389                        self.skip();
1390                    }
1391                }
1392                self.lookahead(2);
1393            }
1394            self.lookahead(1);
1395            match self.ch() {
1396                '\'' if single => break,
1397                '"' if !single => break,
1398                _ => {}
1399            }
1400
1401            // Consume blank characters.
1402            while is_blank(self.ch()) || is_break(self.ch()) {
1403                if is_blank(self.ch()) {
1404                    // Consume a space or a tab character.
1405                    if leading_blanks {
1406                        self.skip();
1407                    } else {
1408                        whitespaces.push(self.ch());
1409                        self.skip();
1410                    }
1411                } else {
1412                    self.lookahead(2);
1413                    // Check if it is a first line break.
1414                    if leading_blanks {
1415                        self.read_break(&mut trailing_breaks);
1416                    } else {
1417                        whitespaces.clear();
1418                        self.read_break(&mut leading_break);
1419                        leading_blanks = true;
1420                    }
1421                }
1422                self.lookahead(1);
1423            }
1424            // Join the whitespaces or fold line breaks.
1425            if leading_blanks {
1426                if leading_break.is_empty() {
1427                    string.push_str(&leading_break);
1428                    string.push_str(&trailing_breaks);
1429                    trailing_breaks.clear();
1430                    leading_break.clear();
1431                } else {
1432                    if trailing_breaks.is_empty() {
1433                        string.push(' ');
1434                    } else {
1435                        string.push_str(&trailing_breaks);
1436                        trailing_breaks.clear();
1437                    }
1438                    leading_break.clear();
1439                }
1440            } else {
1441                string.push_str(&whitespaces);
1442                whitespaces.clear();
1443            }
1444        } // loop
1445
1446        // Eat the right quote.
1447        self.skip();
1448
1449        if single {
1450            Ok(Token(
1451                start_mark,
1452                TokenType::Scalar(TScalarStyle::SingleQuoted, string),
1453            ))
1454        } else {
1455            Ok(Token(
1456                start_mark,
1457                TokenType::Scalar(TScalarStyle::DoubleQuoted, string),
1458            ))
1459        }
1460    }
1461
1462    fn fetch_plain_scalar(&mut self) -> ScanResult {
1463        self.save_simple_key()?;
1464        self.disallow_simple_key();
1465
1466        let tok = self.scan_plain_scalar()?;
1467
1468        self.tokens.push_back(tok);
1469        Ok(())
1470    }
1471
1472    fn scan_plain_scalar(&mut self) -> Result<Token, ScanError> {
1473        let indent = self.indent + 1;
1474        let start_mark = self.mark;
1475
1476        let mut string = String::new();
1477        let mut leading_break = String::new();
1478        let mut trailing_breaks = String::new();
1479        let mut whitespaces = String::new();
1480        let mut leading_blanks = false;
1481
1482        loop {
1483            /* Check for a document indicator. */
1484            self.lookahead(4);
1485
1486            if self.mark.col == 0
1487                && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
1488                    || ((self.buffer[0] == '.')
1489                        && (self.buffer[1] == '.')
1490                        && (self.buffer[2] == '.')))
1491                && is_blankz(self.buffer[3])
1492            {
1493                break;
1494            }
1495
1496            if self.ch() == '#' {
1497                break;
1498            }
1499            while !is_blankz(self.ch()) {
1500                // indicators can end a plain scalar, see 7.3.3. Plain Style
1501                match self.ch() {
1502                    ':' if is_blankz(self.buffer[1])
1503                        || (self.flow_level > 0 && is_flow(self.buffer[1])) =>
1504                    {
1505                        break;
1506                    }
1507                    ',' | '[' | ']' | '{' | '}' if self.flow_level > 0 => break,
1508                    _ => {}
1509                }
1510
1511                if leading_blanks || !whitespaces.is_empty() {
1512                    if leading_blanks {
1513                        if leading_break.is_empty() {
1514                            string.push_str(&leading_break);
1515                            string.push_str(&trailing_breaks);
1516                            trailing_breaks.clear();
1517                            leading_break.clear();
1518                        } else {
1519                            if trailing_breaks.is_empty() {
1520                                string.push(' ');
1521                            } else {
1522                                string.push_str(&trailing_breaks);
1523                                trailing_breaks.clear();
1524                            }
1525                            leading_break.clear();
1526                        }
1527                        leading_blanks = false;
1528                    } else {
1529                        string.push_str(&whitespaces);
1530                        whitespaces.clear();
1531                    }
1532                }
1533
1534                string.push(self.ch());
1535                self.skip();
1536                self.lookahead(2);
1537            }
1538            // is the end?
1539            if !(is_blank(self.ch()) || is_break(self.ch())) {
1540                break;
1541            }
1542            self.lookahead(1);
1543
1544            while is_blank(self.ch()) || is_break(self.ch()) {
1545                if is_blank(self.ch()) {
1546                    if leading_blanks && (self.mark.col as isize) < indent && self.ch() == '\t' {
1547                        return Err(ScanError::new(
1548                            start_mark,
1549                            "while scanning a plain scalar, found a tab",
1550                        ));
1551                    }
1552
1553                    if leading_blanks {
1554                        self.skip();
1555                    } else {
1556                        whitespaces.push(self.ch());
1557                        self.skip();
1558                    }
1559                } else {
1560                    self.lookahead(2);
1561                    // Check if it is a first line break
1562                    if leading_blanks {
1563                        self.read_break(&mut trailing_breaks);
1564                    } else {
1565                        whitespaces.clear();
1566                        self.read_break(&mut leading_break);
1567                        leading_blanks = true;
1568                    }
1569                }
1570                self.lookahead(1);
1571            }
1572
1573            // check indentation level
1574            if self.flow_level == 0 && (self.mark.col as isize) < indent {
1575                break;
1576            }
1577        }
1578
1579        if leading_blanks {
1580            self.allow_simple_key();
1581        }
1582
1583        Ok(Token(
1584            start_mark,
1585            TokenType::Scalar(TScalarStyle::Plain, string),
1586        ))
1587    }
1588
1589    fn fetch_key(&mut self) -> ScanResult {
1590        let start_mark = self.mark;
1591        if self.flow_level == 0 {
1592            // Check if we are allowed to start a new key (not necessarily simple).
1593            if !self.simple_key_allowed {
1594                return Err(ScanError::new(
1595                    self.mark,
1596                    "mapping keys are not allowed in this context",
1597                ));
1598            }
1599            self.roll_indent(
1600                start_mark.col,
1601                None,
1602                TokenType::BlockMappingStart,
1603                start_mark,
1604            );
1605        }
1606
1607        self.remove_simple_key()?;
1608
1609        if self.flow_level == 0 {
1610            self.allow_simple_key();
1611        } else {
1612            self.disallow_simple_key();
1613        }
1614
1615        self.skip();
1616        self.tokens.push_back(Token(start_mark, TokenType::Key));
1617        Ok(())
1618    }
1619
1620    fn fetch_value(&mut self) -> ScanResult {
1621        let sk = self.simple_keys.last().unwrap().clone();
1622        let start_mark = self.mark;
1623        if sk.possible {
1624            // insert simple key
1625            let tok = Token(sk.mark, TokenType::Key);
1626            let tokens_parsed = self.tokens_parsed;
1627            self.insert_token(sk.token_number - tokens_parsed, tok);
1628
1629            // Add the BLOCK-MAPPING-START token if needed.
1630            self.roll_indent(
1631                sk.mark.col,
1632                Some(sk.token_number),
1633                TokenType::BlockMappingStart,
1634                start_mark,
1635            );
1636
1637            self.simple_keys.last_mut().unwrap().possible = false;
1638            self.disallow_simple_key();
1639        } else {
1640            // The ':' indicator follows a complex key.
1641            if self.flow_level == 0 {
1642                if !self.simple_key_allowed {
1643                    return Err(ScanError::new(
1644                        start_mark,
1645                        "mapping values are not allowed in this context",
1646                    ));
1647                }
1648
1649                self.roll_indent(
1650                    start_mark.col,
1651                    None,
1652                    TokenType::BlockMappingStart,
1653                    start_mark,
1654                );
1655            }
1656
1657            if self.flow_level == 0 {
1658                self.allow_simple_key();
1659            } else {
1660                self.disallow_simple_key();
1661            }
1662        }
1663        self.skip();
1664        self.tokens.push_back(Token(start_mark, TokenType::Value));
1665
1666        Ok(())
1667    }
1668
1669    fn roll_indent(&mut self, col: usize, number: Option<usize>, tok: TokenType, mark: Marker) {
1670        if self.flow_level > 0 {
1671            return;
1672        }
1673
1674        if self.indent < col as isize {
1675            self.indents.push(self.indent);
1676            self.indent = col as isize;
1677            let tokens_parsed = self.tokens_parsed;
1678            match number {
1679                Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)),
1680                None => self.tokens.push_back(Token(mark, tok)),
1681            }
1682        }
1683    }
1684
1685    fn unroll_indent(&mut self, col: isize) {
1686        if self.flow_level > 0 {
1687            return;
1688        }
1689        while self.indent > col {
1690            self.tokens.push_back(Token(self.mark, TokenType::BlockEnd));
1691            self.indent = self.indents.pop().unwrap();
1692        }
1693    }
1694
1695    fn save_simple_key(&mut self) -> Result<(), ScanError> {
1696        let required = self.flow_level > 0 && self.indent == (self.mark.col as isize);
1697        if self.simple_key_allowed {
1698            let mut sk = SimpleKey::new(self.mark);
1699            sk.possible = true;
1700            sk.required = required;
1701            sk.token_number = self.tokens_parsed + self.tokens.len();
1702
1703            self.remove_simple_key()?;
1704
1705            self.simple_keys.pop();
1706            self.simple_keys.push(sk);
1707        }
1708        Ok(())
1709    }
1710
1711    fn remove_simple_key(&mut self) -> ScanResult {
1712        let last = self.simple_keys.last_mut().unwrap();
1713        if last.possible && last.required {
1714            return Err(ScanError::new(self.mark, "simple key expected"));
1715        }
1716
1717        last.possible = false;
1718        Ok(())
1719    }
1720}
1721
1722#[cfg(test)]
1723mod test {
1724    use super::TokenType::*;
1725    use super::*;
1726
1727    macro_rules! next {
1728        ($p:ident, $tk:pat) => {{
1729            let tok = $p.next().unwrap();
1730            match tok.1 {
1731                $tk => {}
1732                _ => panic!("unexpected token: {:?}", tok),
1733            }
1734        }};
1735    }
1736
1737    macro_rules! next_scalar {
1738        ($p:ident, $tk:expr, $v:expr) => {{
1739            let tok = $p.next().unwrap();
1740            match tok.1 {
1741                Scalar(style, ref v) => {
1742                    assert_eq!(style, $tk);
1743                    assert_eq!(v, $v);
1744                }
1745                _ => panic!("unexpected token: {:?}", tok),
1746            }
1747        }};
1748    }
1749
1750    macro_rules! end {
1751        ($p:ident) => {{
1752            assert_eq!($p.next(), None);
1753        }};
1754    }
1755    /// test cases in libyaml scanner.c
1756    #[test]
1757    fn test_empty() {
1758        let s = "";
1759        let mut p = Scanner::new(s.chars());
1760        next!(p, StreamStart(..));
1761        next!(p, StreamEnd);
1762        end!(p);
1763    }
1764
1765    #[test]
1766    fn test_scalar() {
1767        let s = "a scalar";
1768        let mut p = Scanner::new(s.chars());
1769        next!(p, StreamStart(..));
1770        next!(p, Scalar(TScalarStyle::Plain, _));
1771        next!(p, StreamEnd);
1772        end!(p);
1773    }
1774
1775    #[test]
1776    fn test_explicit_scalar() {
1777        let s = "---
1778'a scalar'
1779...
1780";
1781        let mut p = Scanner::new(s.chars());
1782        next!(p, StreamStart(..));
1783        next!(p, DocumentStart);
1784        next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1785        next!(p, DocumentEnd);
1786        next!(p, StreamEnd);
1787        end!(p);
1788    }
1789
1790    #[test]
1791    fn test_multiple_documents() {
1792        let s = "
1793'a scalar'
1794---
1795'a scalar'
1796---
1797'a scalar'
1798";
1799        let mut p = Scanner::new(s.chars());
1800        next!(p, StreamStart(..));
1801        next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1802        next!(p, DocumentStart);
1803        next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1804        next!(p, DocumentStart);
1805        next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1806        next!(p, StreamEnd);
1807        end!(p);
1808    }
1809
1810    #[test]
1811    fn test_a_flow_sequence() {
1812        let s = "[item 1, item 2, item 3]";
1813        let mut p = Scanner::new(s.chars());
1814        next!(p, StreamStart(..));
1815        next!(p, FlowSequenceStart);
1816        next_scalar!(p, TScalarStyle::Plain, "item 1");
1817        next!(p, FlowEntry);
1818        next!(p, Scalar(TScalarStyle::Plain, _));
1819        next!(p, FlowEntry);
1820        next!(p, Scalar(TScalarStyle::Plain, _));
1821        next!(p, FlowSequenceEnd);
1822        next!(p, StreamEnd);
1823        end!(p);
1824    }
1825
1826    #[test]
1827    fn test_a_flow_mapping() {
1828        let s = "
1829{
1830    a simple key: a value, # Note that the KEY token is produced.
1831    ? a complex key: another value,
1832}
1833";
1834        let mut p = Scanner::new(s.chars());
1835        next!(p, StreamStart(..));
1836        next!(p, FlowMappingStart);
1837        next!(p, Key);
1838        next!(p, Scalar(TScalarStyle::Plain, _));
1839        next!(p, Value);
1840        next!(p, Scalar(TScalarStyle::Plain, _));
1841        next!(p, FlowEntry);
1842        next!(p, Key);
1843        next_scalar!(p, TScalarStyle::Plain, "a complex key");
1844        next!(p, Value);
1845        next!(p, Scalar(TScalarStyle::Plain, _));
1846        next!(p, FlowEntry);
1847        next!(p, FlowMappingEnd);
1848        next!(p, StreamEnd);
1849        end!(p);
1850    }
1851
1852    #[test]
1853    fn test_block_sequences() {
1854        let s = "
1855- item 1
1856- item 2
1857-
1858  - item 3.1
1859  - item 3.2
1860-
1861  key 1: value 1
1862  key 2: value 2
1863";
1864        let mut p = Scanner::new(s.chars());
1865        next!(p, StreamStart(..));
1866        next!(p, BlockSequenceStart);
1867        next!(p, BlockEntry);
1868        next_scalar!(p, TScalarStyle::Plain, "item 1");
1869        next!(p, BlockEntry);
1870        next_scalar!(p, TScalarStyle::Plain, "item 2");
1871        next!(p, BlockEntry);
1872        next!(p, BlockSequenceStart);
1873        next!(p, BlockEntry);
1874        next_scalar!(p, TScalarStyle::Plain, "item 3.1");
1875        next!(p, BlockEntry);
1876        next_scalar!(p, TScalarStyle::Plain, "item 3.2");
1877        next!(p, BlockEnd);
1878        next!(p, BlockEntry);
1879        next!(p, BlockMappingStart);
1880        next!(p, Key);
1881        next_scalar!(p, TScalarStyle::Plain, "key 1");
1882        next!(p, Value);
1883        next_scalar!(p, TScalarStyle::Plain, "value 1");
1884        next!(p, Key);
1885        next_scalar!(p, TScalarStyle::Plain, "key 2");
1886        next!(p, Value);
1887        next_scalar!(p, TScalarStyle::Plain, "value 2");
1888        next!(p, BlockEnd);
1889        next!(p, BlockEnd);
1890        next!(p, StreamEnd);
1891        end!(p);
1892    }
1893
1894    #[test]
1895    fn test_block_mappings() {
1896        let s = "
1897a simple key: a value   # The KEY token is produced here.
1898? a complex key
1899: another value
1900a mapping:
1901  key 1: value 1
1902  key 2: value 2
1903a sequence:
1904  - item 1
1905  - item 2
1906";
1907        let mut p = Scanner::new(s.chars());
1908        next!(p, StreamStart(..));
1909        next!(p, BlockMappingStart);
1910        next!(p, Key);
1911        next!(p, Scalar(_, _));
1912        next!(p, Value);
1913        next!(p, Scalar(_, _));
1914        next!(p, Key);
1915        next!(p, Scalar(_, _));
1916        next!(p, Value);
1917        next!(p, Scalar(_, _));
1918        next!(p, Key);
1919        next!(p, Scalar(_, _));
1920        next!(p, Value); // libyaml comment seems to be wrong
1921        next!(p, BlockMappingStart);
1922        next!(p, Key);
1923        next!(p, Scalar(_, _));
1924        next!(p, Value);
1925        next!(p, Scalar(_, _));
1926        next!(p, Key);
1927        next!(p, Scalar(_, _));
1928        next!(p, Value);
1929        next!(p, Scalar(_, _));
1930        next!(p, BlockEnd);
1931        next!(p, Key);
1932        next!(p, Scalar(_, _));
1933        next!(p, Value);
1934        next!(p, BlockSequenceStart);
1935        next!(p, BlockEntry);
1936        next!(p, Scalar(_, _));
1937        next!(p, BlockEntry);
1938        next!(p, Scalar(_, _));
1939        next!(p, BlockEnd);
1940        next!(p, BlockEnd);
1941        next!(p, StreamEnd);
1942        end!(p);
1943    }
1944
1945    #[test]
1946    fn test_no_block_sequence_start() {
1947        let s = "
1948key:
1949- item 1
1950- item 2
1951";
1952        let mut p = Scanner::new(s.chars());
1953        next!(p, StreamStart(..));
1954        next!(p, BlockMappingStart);
1955        next!(p, Key);
1956        next_scalar!(p, TScalarStyle::Plain, "key");
1957        next!(p, Value);
1958        next!(p, BlockEntry);
1959        next_scalar!(p, TScalarStyle::Plain, "item 1");
1960        next!(p, BlockEntry);
1961        next_scalar!(p, TScalarStyle::Plain, "item 2");
1962        next!(p, BlockEnd);
1963        next!(p, StreamEnd);
1964        end!(p);
1965    }
1966
1967    #[test]
1968    fn test_collections_in_sequence() {
1969        let s = "
1970- - item 1
1971  - item 2
1972- key 1: value 1
1973  key 2: value 2
1974- ? complex key
1975  : complex value
1976";
1977        let mut p = Scanner::new(s.chars());
1978        next!(p, StreamStart(..));
1979        next!(p, BlockSequenceStart);
1980        next!(p, BlockEntry);
1981        next!(p, BlockSequenceStart);
1982        next!(p, BlockEntry);
1983        next_scalar!(p, TScalarStyle::Plain, "item 1");
1984        next!(p, BlockEntry);
1985        next_scalar!(p, TScalarStyle::Plain, "item 2");
1986        next!(p, BlockEnd);
1987        next!(p, BlockEntry);
1988        next!(p, BlockMappingStart);
1989        next!(p, Key);
1990        next_scalar!(p, TScalarStyle::Plain, "key 1");
1991        next!(p, Value);
1992        next_scalar!(p, TScalarStyle::Plain, "value 1");
1993        next!(p, Key);
1994        next_scalar!(p, TScalarStyle::Plain, "key 2");
1995        next!(p, Value);
1996        next_scalar!(p, TScalarStyle::Plain, "value 2");
1997        next!(p, BlockEnd);
1998        next!(p, BlockEntry);
1999        next!(p, BlockMappingStart);
2000        next!(p, Key);
2001        next_scalar!(p, TScalarStyle::Plain, "complex key");
2002        next!(p, Value);
2003        next_scalar!(p, TScalarStyle::Plain, "complex value");
2004        next!(p, BlockEnd);
2005        next!(p, BlockEnd);
2006        next!(p, StreamEnd);
2007        end!(p);
2008    }
2009
2010    #[test]
2011    fn test_collections_in_mapping() {
2012        let s = "
2013? a sequence
2014: - item 1
2015  - item 2
2016? a mapping
2017: key 1: value 1
2018  key 2: value 2
2019";
2020        let mut p = Scanner::new(s.chars());
2021        next!(p, StreamStart(..));
2022        next!(p, BlockMappingStart);
2023        next!(p, Key);
2024        next_scalar!(p, TScalarStyle::Plain, "a sequence");
2025        next!(p, Value);
2026        next!(p, BlockSequenceStart);
2027        next!(p, BlockEntry);
2028        next_scalar!(p, TScalarStyle::Plain, "item 1");
2029        next!(p, BlockEntry);
2030        next_scalar!(p, TScalarStyle::Plain, "item 2");
2031        next!(p, BlockEnd);
2032        next!(p, Key);
2033        next_scalar!(p, TScalarStyle::Plain, "a mapping");
2034        next!(p, Value);
2035        next!(p, BlockMappingStart);
2036        next!(p, Key);
2037        next_scalar!(p, TScalarStyle::Plain, "key 1");
2038        next!(p, Value);
2039        next_scalar!(p, TScalarStyle::Plain, "value 1");
2040        next!(p, Key);
2041        next_scalar!(p, TScalarStyle::Plain, "key 2");
2042        next!(p, Value);
2043        next_scalar!(p, TScalarStyle::Plain, "value 2");
2044        next!(p, BlockEnd);
2045        next!(p, BlockEnd);
2046        next!(p, StreamEnd);
2047        end!(p);
2048    }
2049
2050    #[test]
2051    fn test_spec_ex7_3() {
2052        let s = "
2053{
2054    ? foo :,
2055    : bar,
2056}
2057";
2058        let mut p = Scanner::new(s.chars());
2059        next!(p, StreamStart(..));
2060        next!(p, FlowMappingStart);
2061        next!(p, Key);
2062        next_scalar!(p, TScalarStyle::Plain, "foo");
2063        next!(p, Value);
2064        next!(p, FlowEntry);
2065        next!(p, Value);
2066        next_scalar!(p, TScalarStyle::Plain, "bar");
2067        next!(p, FlowEntry);
2068        next!(p, FlowMappingEnd);
2069        next!(p, StreamEnd);
2070        end!(p);
2071    }
2072
2073    #[test]
2074    fn test_plain_scalar_starting_with_indicators_in_flow() {
2075        // "Plain scalars must not begin with most indicators, as this would cause ambiguity with
2076        // other YAML constructs. However, the “:”, “?” and “-” indicators may be used as the first
2077        // character if followed by a non-space “safe” character, as this causes no ambiguity."
2078
2079        let s = "{a: :b}";
2080        let mut p = Scanner::new(s.chars());
2081        next!(p, StreamStart(..));
2082        next!(p, FlowMappingStart);
2083        next!(p, Key);
2084        next_scalar!(p, TScalarStyle::Plain, "a");
2085        next!(p, Value);
2086        next_scalar!(p, TScalarStyle::Plain, ":b");
2087        next!(p, FlowMappingEnd);
2088        next!(p, StreamEnd);
2089        end!(p);
2090
2091        let s = "{a: ?b}";
2092        let mut p = Scanner::new(s.chars());
2093        next!(p, StreamStart(..));
2094        next!(p, FlowMappingStart);
2095        next!(p, Key);
2096        next_scalar!(p, TScalarStyle::Plain, "a");
2097        next!(p, Value);
2098        next_scalar!(p, TScalarStyle::Plain, "?b");
2099        next!(p, FlowMappingEnd);
2100        next!(p, StreamEnd);
2101        end!(p);
2102    }
2103
2104    #[test]
2105    fn test_plain_scalar_starting_with_indicators_in_block() {
2106        let s = ":a";
2107        let mut p = Scanner::new(s.chars());
2108        next!(p, StreamStart(..));
2109        next_scalar!(p, TScalarStyle::Plain, ":a");
2110        next!(p, StreamEnd);
2111        end!(p);
2112
2113        let s = "?a";
2114        let mut p = Scanner::new(s.chars());
2115        next!(p, StreamStart(..));
2116        next_scalar!(p, TScalarStyle::Plain, "?a");
2117        next!(p, StreamEnd);
2118        end!(p);
2119    }
2120
2121    #[test]
2122    fn test_plain_scalar_containing_indicators_in_block() {
2123        let s = "a:,b";
2124        let mut p = Scanner::new(s.chars());
2125        next!(p, StreamStart(..));
2126        next_scalar!(p, TScalarStyle::Plain, "a:,b");
2127        next!(p, StreamEnd);
2128        end!(p);
2129
2130        let s = ":,b";
2131        let mut p = Scanner::new(s.chars());
2132        next!(p, StreamStart(..));
2133        next_scalar!(p, TScalarStyle::Plain, ":,b");
2134        next!(p, StreamEnd);
2135        end!(p);
2136    }
2137
2138    #[test]
2139    fn test_scanner_cr() {
2140        let s = "---\r\n- tok1\r\n- tok2";
2141        let mut p = Scanner::new(s.chars());
2142        next!(p, StreamStart(..));
2143        next!(p, DocumentStart);
2144        next!(p, BlockSequenceStart);
2145        next!(p, BlockEntry);
2146        next_scalar!(p, TScalarStyle::Plain, "tok1");
2147        next!(p, BlockEntry);
2148        next_scalar!(p, TScalarStyle::Plain, "tok2");
2149        next!(p, BlockEnd);
2150        next!(p, StreamEnd);
2151        end!(p);
2152    }
2153
2154    #[test]
2155    fn test_uri() {
2156        // TODO
2157    }
2158
2159    #[test]
2160    fn test_uri_escapes() {
2161        // TODO
2162    }
2163}