1use crate::content::yaml::vendored::scanner::*;
2use std::collections::HashMap;
3
4#[derive(Clone, Copy, PartialEq, Debug, Eq)]
5enum State {
6 StreamStart,
7 ImplicitDocumentStart,
8 DocumentStart,
9 DocumentContent,
10 DocumentEnd,
11 BlockNode,
12 BlockSequenceFirstEntry,
15 BlockSequenceEntry,
16 IndentlessSequenceEntry,
17 BlockMappingFirstKey,
18 BlockMappingKey,
19 BlockMappingValue,
20 FlowSequenceFirstEntry,
21 FlowSequenceEntry,
22 FlowSequenceEntryMappingKey,
23 FlowSequenceEntryMappingValue,
24 FlowSequenceEntryMappingEnd,
25 FlowMappingFirstKey,
26 FlowMappingKey,
27 FlowMappingValue,
28 FlowMappingEmptyValue,
29 End,
30}
31
32#[derive(Clone, PartialEq, Debug, Eq)]
35pub enum Event {
36 StreamStart,
38 StreamEnd,
39 DocumentStart,
40 DocumentEnd,
41 Alias(usize),
43 Scalar(String, TScalarStyle, usize, Option<TokenType>),
45 SequenceStart(usize),
47 SequenceEnd,
48 MappingStart(usize),
50 MappingEnd,
51}
52
53impl Event {
54 fn empty_scalar() -> Event {
55 Event::Scalar("~".to_owned(), TScalarStyle::Plain, 0, None)
57 }
58
59 fn empty_scalar_with_anchor(anchor: usize, tag: Option<TokenType>) -> Event {
60 Event::Scalar("".to_owned(), TScalarStyle::Plain, anchor, tag)
61 }
62}
63
64#[derive(Debug)]
65pub struct Parser<T> {
66 scanner: Scanner<T>,
67 states: Vec<State>,
68 state: State,
69 token: Option<Token>,
70 current: Option<(Event, Marker)>,
71 anchors: HashMap<String, usize>,
72 anchor_id: usize,
73}
74
75pub trait EventReceiver {
76 fn on_event(&mut self, ev: Event);
77}
78
79pub trait MarkedEventReceiver {
80 fn on_event(&mut self, ev: Event, _mark: Marker);
81}
82
83impl<R: EventReceiver> MarkedEventReceiver for R {
84 fn on_event(&mut self, ev: Event, _mark: Marker) {
85 self.on_event(ev)
86 }
87}
88
89pub type ParseResult = Result<(Event, Marker), ScanError>;
90
91impl<T: Iterator<Item = char>> Parser<T> {
92 pub fn new(src: T) -> Parser<T> {
93 Parser {
94 scanner: Scanner::new(src),
95 states: Vec::new(),
96 state: State::StreamStart,
97 token: None,
98 current: None,
99
100 anchors: HashMap::new(),
101 anchor_id: 1,
103 }
104 }
105
106 pub fn next(&mut self) -> ParseResult {
107 match self.current {
108 None => self.parse(),
109 Some(_) => Ok(self.current.take().unwrap()),
110 }
111 }
112
113 fn peek_token(&mut self) -> Result<&Token, ScanError> {
114 match self.token {
115 None => {
116 self.token = Some(self.scan_next_token()?);
117 Ok(self.token.as_ref().unwrap())
118 }
119 Some(ref tok) => Ok(tok),
120 }
121 }
122
123 fn scan_next_token(&mut self) -> Result<Token, ScanError> {
124 let token = self.scanner.next();
125 match token {
126 None => match self.scanner.get_error() {
127 None => Err(ScanError::new(self.scanner.mark(), "unexpected eof")),
128 Some(e) => Err(e),
129 },
130 Some(tok) => Ok(tok),
131 }
132 }
133
134 fn fetch_token(&mut self) -> Token {
135 self.token
136 .take()
137 .expect("fetch_token needs to be preceded by peek_token")
138 }
139
140 fn skip(&mut self) {
141 self.token = None;
142 }
144 fn pop_state(&mut self) {
145 self.state = self.states.pop().unwrap()
146 }
147 fn push_state(&mut self, state: State) {
148 self.states.push(state);
149 }
150
151 fn parse(&mut self) -> ParseResult {
152 if self.state == State::End {
153 return Ok((Event::StreamEnd, self.scanner.mark()));
154 }
155 let (ev, mark) = self.state_machine()?;
156 Ok((ev, mark))
158 }
159
160 pub fn load<R: MarkedEventReceiver>(
161 &mut self,
162 recv: &mut R,
163 multi: bool,
164 ) -> Result<(), ScanError> {
165 if !self.scanner.stream_started() {
166 let (ev, mark) = self.next()?;
167 assert_eq!(ev, Event::StreamStart);
168 recv.on_event(ev, mark);
169 }
170
171 if self.scanner.stream_ended() {
172 recv.on_event(Event::StreamEnd, self.scanner.mark());
174 return Ok(());
175 }
176 loop {
177 let (ev, mark) = self.next()?;
178 if ev == Event::StreamEnd {
179 recv.on_event(ev, mark);
180 return Ok(());
181 }
182 self.anchors.clear();
184 self.load_document(ev, mark, recv)?;
185 if !multi {
186 break;
187 }
188 }
189 Ok(())
190 }
191
192 fn load_document<R: MarkedEventReceiver>(
193 &mut self,
194 first_ev: Event,
195 mark: Marker,
196 recv: &mut R,
197 ) -> Result<(), ScanError> {
198 assert_eq!(first_ev, Event::DocumentStart);
199 recv.on_event(first_ev, mark);
200
201 let (ev, mark) = self.next()?;
202 self.load_node(ev, mark, recv)?;
203
204 let (ev, mark) = self.next()?;
206 assert_eq!(ev, Event::DocumentEnd);
207 recv.on_event(ev, mark);
208
209 Ok(())
210 }
211
212 fn load_node<R: MarkedEventReceiver>(
213 &mut self,
214 first_ev: Event,
215 mark: Marker,
216 recv: &mut R,
217 ) -> Result<(), ScanError> {
218 match first_ev {
219 Event::Alias(..) | Event::Scalar(..) => {
220 recv.on_event(first_ev, mark);
221 Ok(())
222 }
223 Event::SequenceStart(_) => {
224 recv.on_event(first_ev, mark);
225 self.load_sequence(recv)
226 }
227 Event::MappingStart(_) => {
228 recv.on_event(first_ev, mark);
229 self.load_mapping(recv)
230 }
231 _ => {
232 println!("UNREACHABLE EVENT: {:?}", first_ev);
233 unreachable!();
234 }
235 }
236 }
237
238 fn load_mapping<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
239 let (mut key_ev, mut key_mark) = self.next()?;
240 while key_ev != Event::MappingEnd {
241 self.load_node(key_ev, key_mark, recv)?;
243
244 let (ev, mark) = self.next()?;
246 self.load_node(ev, mark, recv)?;
247
248 let (ev, mark) = self.next()?;
250 key_ev = ev;
251 key_mark = mark;
252 }
253 recv.on_event(key_ev, key_mark);
254 Ok(())
255 }
256
257 fn load_sequence<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
258 let (mut ev, mut mark) = self.next()?;
259 while ev != Event::SequenceEnd {
260 self.load_node(ev, mark, recv)?;
261
262 let (next_ev, next_mark) = self.next()?;
264 ev = next_ev;
265 mark = next_mark;
266 }
267 recv.on_event(ev, mark);
268 Ok(())
269 }
270
271 fn state_machine(&mut self) -> ParseResult {
272 match self.state {
275 State::StreamStart => self.stream_start(),
276
277 State::ImplicitDocumentStart => self.document_start(true),
278 State::DocumentStart => self.document_start(false),
279 State::DocumentContent => self.document_content(),
280 State::DocumentEnd => self.document_end(),
281
282 State::BlockNode => self.parse_node(true, false),
283 State::BlockMappingFirstKey => self.block_mapping_key(true),
286 State::BlockMappingKey => self.block_mapping_key(false),
287 State::BlockMappingValue => self.block_mapping_value(),
288
289 State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
290 State::BlockSequenceEntry => self.block_sequence_entry(false),
291
292 State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
293 State::FlowSequenceEntry => self.flow_sequence_entry(false),
294
295 State::FlowMappingFirstKey => self.flow_mapping_key(true),
296 State::FlowMappingKey => self.flow_mapping_key(false),
297 State::FlowMappingValue => self.flow_mapping_value(false),
298
299 State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
300
301 State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
302 State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
303 State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(),
304 State::FlowMappingEmptyValue => self.flow_mapping_value(true),
305
306 State::End => unreachable!(),
308 }
309 }
310
311 fn stream_start(&mut self) -> ParseResult {
312 match *self.peek_token()? {
313 Token(mark, TokenType::StreamStart(_)) => {
314 self.state = State::ImplicitDocumentStart;
315 self.skip();
316 Ok((Event::StreamStart, mark))
317 }
318 Token(mark, _) => Err(ScanError::new(mark, "did not find expected <stream-start>")),
319 }
320 }
321
322 fn document_start(&mut self, implicit: bool) -> ParseResult {
323 if !implicit {
324 while let TokenType::DocumentEnd = self.peek_token()?.1 {
325 self.skip();
326 }
327 }
328
329 match *self.peek_token()? {
330 Token(mark, TokenType::StreamEnd) => {
331 self.state = State::End;
332 self.skip();
333 Ok((Event::StreamEnd, mark))
334 }
335 Token(_, TokenType::VersionDirective(..))
336 | Token(_, TokenType::TagDirective(..))
337 | Token(_, TokenType::DocumentStart) => {
338 self._explicit_document_start()
340 }
341 Token(mark, _) if implicit => {
342 self.parser_process_directives()?;
343 self.push_state(State::DocumentEnd);
344 self.state = State::BlockNode;
345 Ok((Event::DocumentStart, mark))
346 }
347 _ => {
348 self._explicit_document_start()
350 }
351 }
352 }
353
354 fn parser_process_directives(&mut self) -> Result<(), ScanError> {
355 loop {
356 match self.peek_token()?.1 {
357 TokenType::VersionDirective(_, _) => {
358 }
364 TokenType::TagDirective(..) => {
365 }
367 _ => break,
368 }
369 self.skip();
370 }
371 Ok(())
373 }
374
375 fn _explicit_document_start(&mut self) -> ParseResult {
376 self.parser_process_directives()?;
377 match *self.peek_token()? {
378 Token(mark, TokenType::DocumentStart) => {
379 self.push_state(State::DocumentEnd);
380 self.state = State::DocumentContent;
381 self.skip();
382 Ok((Event::DocumentStart, mark))
383 }
384 Token(mark, _) => Err(ScanError::new(
385 mark,
386 "did not find expected <document start>",
387 )),
388 }
389 }
390
391 fn document_content(&mut self) -> ParseResult {
392 match *self.peek_token()? {
393 Token(mark, TokenType::VersionDirective(..))
394 | Token(mark, TokenType::TagDirective(..))
395 | Token(mark, TokenType::DocumentStart)
396 | Token(mark, TokenType::DocumentEnd)
397 | Token(mark, TokenType::StreamEnd) => {
398 self.pop_state();
399 Ok((Event::empty_scalar(), mark))
401 }
402 _ => self.parse_node(true, false),
403 }
404 }
405
406 fn document_end(&mut self) -> ParseResult {
407 let mut _implicit = true;
408 let marker: Marker = match *self.peek_token()? {
409 Token(mark, TokenType::DocumentEnd) => {
410 self.skip();
411 _implicit = false;
412 mark
413 }
414 Token(mark, _) => mark,
415 };
416
417 self.state = State::DocumentStart;
419 Ok((Event::DocumentEnd, marker))
420 }
421
422 fn register_anchor(&mut self, name: String, _: &Marker) -> Result<usize, ScanError> {
423 let new_id = self.anchor_id;
429 self.anchor_id += 1;
430 self.anchors.insert(name, new_id);
431 Ok(new_id)
432 }
433
434 fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult {
435 let mut anchor_id = 0;
436 let mut tag = None;
437 match *self.peek_token()? {
438 Token(_, TokenType::Alias(_)) => {
439 self.pop_state();
440 if let Token(mark, TokenType::Alias(name)) = self.fetch_token() {
441 match self.anchors.get(&name) {
442 None => {
443 return Err(ScanError::new(
444 mark,
445 "while parsing node, found unknown anchor",
446 ))
447 }
448 Some(id) => return Ok((Event::Alias(*id), mark)),
449 }
450 } else {
451 unreachable!()
452 }
453 }
454 Token(_, TokenType::Anchor(_)) => {
455 if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
456 anchor_id = self.register_anchor(name, &mark)?;
457 if let TokenType::Tag(..) = self.peek_token()?.1 {
458 if let tg @ TokenType::Tag(..) = self.fetch_token().1 {
459 tag = Some(tg);
460 } else {
461 unreachable!()
462 }
463 }
464 } else {
465 unreachable!()
466 }
467 }
468 Token(_, TokenType::Tag(..)) => {
469 if let tg @ TokenType::Tag(..) = self.fetch_token().1 {
470 tag = Some(tg);
471 if let TokenType::Anchor(_) = self.peek_token()?.1 {
472 if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
473 anchor_id = self.register_anchor(name, &mark)?;
474 } else {
475 unreachable!()
476 }
477 }
478 } else {
479 unreachable!()
480 }
481 }
482 _ => {}
483 }
484 match *self.peek_token()? {
485 Token(mark, TokenType::BlockEntry) if indentless_sequence => {
486 self.state = State::IndentlessSequenceEntry;
487 Ok((Event::SequenceStart(anchor_id), mark))
488 }
489 Token(_, TokenType::Scalar(..)) => {
490 self.pop_state();
491 if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() {
492 Ok((Event::Scalar(v, style, anchor_id, tag), mark))
493 } else {
494 unreachable!()
495 }
496 }
497 Token(mark, TokenType::FlowSequenceStart) => {
498 self.state = State::FlowSequenceFirstEntry;
499 Ok((Event::SequenceStart(anchor_id), mark))
500 }
501 Token(mark, TokenType::FlowMappingStart) => {
502 self.state = State::FlowMappingFirstKey;
503 Ok((Event::MappingStart(anchor_id), mark))
504 }
505 Token(mark, TokenType::BlockSequenceStart) if block => {
506 self.state = State::BlockSequenceFirstEntry;
507 Ok((Event::SequenceStart(anchor_id), mark))
508 }
509 Token(mark, TokenType::BlockMappingStart) if block => {
510 self.state = State::BlockMappingFirstKey;
511 Ok((Event::MappingStart(anchor_id), mark))
512 }
513 Token(mark, _) if tag.is_some() || anchor_id > 0 => {
515 self.pop_state();
516 Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
517 }
518 Token(mark, _) => Err(ScanError::new(
519 mark,
520 "while parsing a node, did not find expected node content",
521 )),
522 }
523 }
524
525 fn block_mapping_key(&mut self, first: bool) -> ParseResult {
526 if first {
528 let _ = self.peek_token()?;
529 self.skip();
531 }
532 match *self.peek_token()? {
533 Token(_, TokenType::Key) => {
534 self.skip();
535 match *self.peek_token()? {
536 Token(mark, TokenType::Key)
537 | Token(mark, TokenType::Value)
538 | Token(mark, TokenType::BlockEnd) => {
539 self.state = State::BlockMappingValue;
540 Ok((Event::empty_scalar(), mark))
542 }
543 _ => {
544 self.push_state(State::BlockMappingValue);
545 self.parse_node(true, true)
546 }
547 }
548 }
549 Token(mark, TokenType::Value) => {
551 self.state = State::BlockMappingValue;
552 Ok((Event::empty_scalar(), mark))
553 }
554 Token(mark, TokenType::BlockEnd) => {
555 self.pop_state();
556 self.skip();
557 Ok((Event::MappingEnd, mark))
558 }
559 Token(mark, _) => Err(ScanError::new(
560 mark,
561 "while parsing a block mapping, did not find expected key",
562 )),
563 }
564 }
565
566 fn block_mapping_value(&mut self) -> ParseResult {
567 match *self.peek_token()? {
568 Token(_, TokenType::Value) => {
569 self.skip();
570 match *self.peek_token()? {
571 Token(mark, TokenType::Key)
572 | Token(mark, TokenType::Value)
573 | Token(mark, TokenType::BlockEnd) => {
574 self.state = State::BlockMappingKey;
575 Ok((Event::empty_scalar(), mark))
577 }
578 _ => {
579 self.push_state(State::BlockMappingKey);
580 self.parse_node(true, true)
581 }
582 }
583 }
584 Token(mark, _) => {
585 self.state = State::BlockMappingKey;
586 Ok((Event::empty_scalar(), mark))
588 }
589 }
590 }
591
592 fn flow_mapping_key(&mut self, first: bool) -> ParseResult {
593 if first {
594 let _ = self.peek_token()?;
595 self.skip();
596 }
597 let marker: Marker =
598 {
599 match *self.peek_token()? {
600 Token(mark, TokenType::FlowMappingEnd) => mark,
601 Token(mark, _) => {
602 if !first {
603 match *self.peek_token()? {
604 Token(_, TokenType::FlowEntry) => self.skip(),
605 Token(mark, _) => return Err(ScanError::new(mark,
606 "while parsing a flow mapping, did not find expected ',' or '}'"))
607 }
608 }
609
610 match *self.peek_token()? {
611 Token(_, TokenType::Key) => {
612 self.skip();
613 match *self.peek_token()? {
614 Token(mark, TokenType::Value)
615 | Token(mark, TokenType::FlowEntry)
616 | Token(mark, TokenType::FlowMappingEnd) => {
617 self.state = State::FlowMappingValue;
618 return Ok((Event::empty_scalar(), mark));
619 }
620 _ => {
621 self.push_state(State::FlowMappingValue);
622 return self.parse_node(false, false);
623 }
624 }
625 }
626 Token(marker, TokenType::Value) => {
627 self.state = State::FlowMappingValue;
628 return Ok((Event::empty_scalar(), marker));
629 }
630 Token(_, TokenType::FlowMappingEnd) => (),
631 _ => {
632 self.push_state(State::FlowMappingEmptyValue);
633 return self.parse_node(false, false);
634 }
635 }
636
637 mark
638 }
639 }
640 };
641
642 self.pop_state();
643 self.skip();
644 Ok((Event::MappingEnd, marker))
645 }
646
647 fn flow_mapping_value(&mut self, empty: bool) -> ParseResult {
648 let mark: Marker = {
649 if empty {
650 let Token(mark, _) = *self.peek_token()?;
651 self.state = State::FlowMappingKey;
652 return Ok((Event::empty_scalar(), mark));
653 } else {
654 match *self.peek_token()? {
655 Token(marker, TokenType::Value) => {
656 self.skip();
657 match self.peek_token()?.1 {
658 TokenType::FlowEntry | TokenType::FlowMappingEnd => {}
659 _ => {
660 self.push_state(State::FlowMappingKey);
661 return self.parse_node(false, false);
662 }
663 }
664 marker
665 }
666 Token(marker, _) => marker,
667 }
668 }
669 };
670
671 self.state = State::FlowMappingKey;
672 Ok((Event::empty_scalar(), mark))
673 }
674
675 fn flow_sequence_entry(&mut self, first: bool) -> ParseResult {
676 if first {
678 let _ = self.peek_token()?;
679 self.skip();
681 }
682 match *self.peek_token()? {
683 Token(mark, TokenType::FlowSequenceEnd) => {
684 self.pop_state();
685 self.skip();
686 return Ok((Event::SequenceEnd, mark));
687 }
688 Token(_, TokenType::FlowEntry) if !first => {
689 self.skip();
690 }
691 Token(mark, _) if !first => {
692 return Err(ScanError::new(
693 mark,
694 "while parsing a flow sequence, expected ',' or ']'",
695 ));
696 }
697 _ => { }
698 }
699 match *self.peek_token()? {
700 Token(mark, TokenType::FlowSequenceEnd) => {
701 self.pop_state();
702 self.skip();
703 Ok((Event::SequenceEnd, mark))
704 }
705 Token(mark, TokenType::Key) => {
706 self.state = State::FlowSequenceEntryMappingKey;
707 self.skip();
708 Ok((Event::MappingStart(0), mark))
709 }
710 _ => {
711 self.push_state(State::FlowSequenceEntry);
712 self.parse_node(false, false)
713 }
714 }
715 }
716
717 fn indentless_sequence_entry(&mut self) -> ParseResult {
718 match *self.peek_token()? {
719 Token(_, TokenType::BlockEntry) => (),
720 Token(mark, _) => {
721 self.pop_state();
722 return Ok((Event::SequenceEnd, mark));
723 }
724 }
725 self.skip();
726 match *self.peek_token()? {
727 Token(mark, TokenType::BlockEntry)
728 | Token(mark, TokenType::Key)
729 | Token(mark, TokenType::Value)
730 | Token(mark, TokenType::BlockEnd) => {
731 self.state = State::IndentlessSequenceEntry;
732 Ok((Event::empty_scalar(), mark))
733 }
734 _ => {
735 self.push_state(State::IndentlessSequenceEntry);
736 self.parse_node(true, false)
737 }
738 }
739 }
740
741 fn block_sequence_entry(&mut self, first: bool) -> ParseResult {
742 if first {
744 let _ = self.peek_token()?;
745 self.skip();
747 }
748 match *self.peek_token()? {
749 Token(mark, TokenType::BlockEnd) => {
750 self.pop_state();
751 self.skip();
752 Ok((Event::SequenceEnd, mark))
753 }
754 Token(_, TokenType::BlockEntry) => {
755 self.skip();
756 match *self.peek_token()? {
757 Token(mark, TokenType::BlockEntry) | Token(mark, TokenType::BlockEnd) => {
758 self.state = State::BlockSequenceEntry;
759 Ok((Event::empty_scalar(), mark))
760 }
761 _ => {
762 self.push_state(State::BlockSequenceEntry);
763 self.parse_node(true, false)
764 }
765 }
766 }
767 Token(mark, _) => Err(ScanError::new(
768 mark,
769 "while parsing a block collection, did not find expected '-' indicator",
770 )),
771 }
772 }
773
774 fn flow_sequence_entry_mapping_key(&mut self) -> ParseResult {
775 match *self.peek_token()? {
776 Token(mark, TokenType::Value)
777 | Token(mark, TokenType::FlowEntry)
778 | Token(mark, TokenType::FlowSequenceEnd) => {
779 self.skip();
780 self.state = State::FlowSequenceEntryMappingValue;
781 Ok((Event::empty_scalar(), mark))
782 }
783 _ => {
784 self.push_state(State::FlowSequenceEntryMappingValue);
785 self.parse_node(false, false)
786 }
787 }
788 }
789
790 fn flow_sequence_entry_mapping_value(&mut self) -> ParseResult {
791 match *self.peek_token()? {
792 Token(_, TokenType::Value) => {
793 self.skip();
794 self.state = State::FlowSequenceEntryMappingValue;
795 match *self.peek_token()? {
796 Token(mark, TokenType::FlowEntry) | Token(mark, TokenType::FlowSequenceEnd) => {
797 self.state = State::FlowSequenceEntryMappingEnd;
798 Ok((Event::empty_scalar(), mark))
799 }
800 _ => {
801 self.push_state(State::FlowSequenceEntryMappingEnd);
802 self.parse_node(false, false)
803 }
804 }
805 }
806 Token(mark, _) => {
807 self.state = State::FlowSequenceEntryMappingEnd;
808 Ok((Event::empty_scalar(), mark))
809 }
810 }
811 }
812
813 fn flow_sequence_entry_mapping_end(&mut self) -> ParseResult {
814 self.state = State::FlowSequenceEntry;
815 Ok((Event::MappingEnd, self.scanner.mark()))
816 }
817}