xmlparser/
stream.rs
1use core::char;
2use core::cmp;
3use core::ops::Range;
4use core::str;
5
6use crate::{
7 StreamError,
8 StrSpan,
9 TextPos,
10 XmlByteExt,
11 XmlCharExt,
12};
13
14type Result<T> = ::core::result::Result<T, StreamError>;
15
16
17#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
19pub enum Reference<'a> {
20 Entity(&'a str),
24
25 Char(char),
29}
30
31
32#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
34pub struct Stream<'a> {
35 pos: usize,
36 end: usize,
37 span: StrSpan<'a>,
38}
39
40impl<'a> From<&'a str> for Stream<'a> {
41 #[inline]
42 fn from(text: &'a str) -> Self {
43 Stream {
44 pos: 0,
45 end: text.len(),
46 span: text.into(),
47 }
48 }
49}
50
51impl<'a> From<StrSpan<'a>> for Stream<'a> {
52 #[inline]
53 fn from(span: StrSpan<'a>) -> Self {
54 Stream {
55 pos: 0,
56 end: span.as_str().len(),
57 span,
58 }
59 }
60}
61
62impl<'a> Stream<'a> {
63 #[inline]
65 pub fn from_substr(text: &'a str, fragment: Range<usize>) -> Self {
66 Stream {
67 pos: fragment.start,
68 end: fragment.end,
69 span: text.into(),
70 }
71 }
72
73 #[inline]
75 pub fn span(&self) -> StrSpan<'a> {
76 self.span
77 }
78
79 #[inline]
81 pub fn pos(&self) -> usize {
82 self.pos
83 }
84
85 #[inline]
89 pub fn jump_to_end(&mut self) {
90 self.pos = self.end;
91 }
92
93 #[inline]
105 pub fn at_end(&self) -> bool {
106 self.pos >= self.end
107 }
108
109 #[inline]
115 pub fn curr_byte(&self) -> Result<u8> {
116 if self.at_end() {
117 return Err(StreamError::UnexpectedEndOfStream);
118 }
119
120 Ok(self.curr_byte_unchecked())
121 }
122
123 #[inline]
129 pub fn curr_byte_unchecked(&self) -> u8 {
130 self.span.as_bytes()[self.pos]
131 }
132
133 #[inline]
139 pub fn next_byte(&self) -> Result<u8> {
140 if self.pos + 1 >= self.end {
141 return Err(StreamError::UnexpectedEndOfStream);
142 }
143
144 Ok(self.span.as_bytes()[self.pos + 1])
145 }
146
147 #[inline]
159 pub fn advance(&mut self, n: usize) {
160 debug_assert!(self.pos + n <= self.end);
161 self.pos += n;
162 }
163
164 #[inline]
179 pub fn starts_with(&self, text: &[u8]) -> bool {
180 self.span.as_bytes()[self.pos..self.end].starts_with(text)
181 }
182
183 pub fn consume_byte(&mut self, c: u8) -> Result<()> {
202 let curr = self.curr_byte()?;
203 if curr != c {
204 return Err(StreamError::InvalidChar(curr, c, self.gen_text_pos()));
205 }
206
207 self.advance(1);
208 Ok(())
209 }
210
211 pub fn try_consume_byte(&mut self, c: u8) -> bool {
215 match self.curr_byte() {
216 Ok(b) if b == c => {
217 self.advance(1);
218 true
219 }
220 _ => false,
221 }
222 }
223
224 pub fn skip_string(&mut self, text: &'static [u8]) -> Result<()> {
230 if !self.starts_with(text) {
231 let pos = self.gen_text_pos();
232
233 let expected = str::from_utf8(text).unwrap();
235
236 return Err(StreamError::InvalidString(expected, pos));
237 }
238
239 self.advance(text.len());
240 Ok(())
241 }
242
243 #[inline]
247 pub fn consume_bytes<F>(&mut self, f: F) -> StrSpan<'a>
248 where F: Fn(&Stream, u8) -> bool
249 {
250 let start = self.pos;
251 self.skip_bytes(f);
252 self.slice_back(start)
253 }
254
255 pub fn skip_bytes<F>(&mut self, f: F)
257 where F: Fn(&Stream, u8) -> bool
258 {
259 while !self.at_end() && f(self, self.curr_byte_unchecked()) {
260 self.advance(1);
261 }
262 }
263
264 #[inline]
268 pub fn consume_chars<F>(&mut self, f: F) -> Result<StrSpan<'a>>
269 where F: Fn(&Stream, char) -> bool
270 {
271 let start = self.pos;
272 self.skip_chars(f)?;
273 Ok(self.slice_back(start))
274 }
275
276 #[inline]
278 pub fn skip_chars<F>(&mut self, f: F) -> Result<()>
279 where F: Fn(&Stream, char) -> bool
280 {
281 for c in self.chars() {
282 if !c.is_xml_char() {
283 return Err(StreamError::NonXmlChar(c, self.gen_text_pos()));
284 } else if f(self, c) {
285 self.advance(c.len_utf8());
286 } else {
287 break;
288 }
289 }
290
291 Ok(())
292 }
293
294 #[inline]
295 pub(crate) fn chars(&self) -> str::Chars<'a> {
296 self.span.as_str()[self.pos..self.end].chars()
297 }
298
299 #[inline]
301 pub fn slice_back(&self, pos: usize) -> StrSpan<'a> {
302 self.span.slice_region(pos, self.pos)
303 }
304
305 #[inline]
307 pub fn slice_tail(&self) -> StrSpan<'a> {
308 self.span.slice_region(self.pos, self.end)
309 }
310
311 #[inline]
315 pub fn skip_spaces(&mut self) {
316 while !self.at_end() && self.curr_byte_unchecked().is_xml_space() {
317 self.advance(1);
318 }
319 }
320
321 #[inline]
323 pub fn starts_with_space(&self) -> bool {
324 !self.at_end() && self.curr_byte_unchecked().is_xml_space()
325 }
326
327 pub fn consume_spaces(&mut self) -> Result<()> {
337 if self.at_end() {
338 return Err(StreamError::UnexpectedEndOfStream);
339 }
340
341 if !self.starts_with_space() {
342 return Err(StreamError::InvalidSpace(self.curr_byte_unchecked(), self.gen_text_pos()));
343 }
344
345 self.skip_spaces();
346 Ok(())
347 }
348
349 pub fn try_consume_reference(&mut self) -> Option<Reference<'a>> {
353 let start = self.pos();
354
355 let mut s = self.clone();
357 match s.consume_reference() {
358 Ok(r) => {
359 self.advance(s.pos() - start);
362 Some(r)
363 }
364 Err(_) => {
365 None
366 }
367 }
368 }
369
370 pub fn consume_reference(&mut self) -> Result<Reference<'a>> {
378 self._consume_reference().map_err(|_| StreamError::InvalidReference)
379 }
380
381 #[inline(never)]
382 fn _consume_reference(&mut self) -> Result<Reference<'a>> {
383 if !self.try_consume_byte(b'&') {
384 return Err(StreamError::InvalidReference);
385 }
386
387 let reference = if self.try_consume_byte(b'#') {
388 let (value, radix) = if self.try_consume_byte(b'x') {
389 let value = self.consume_bytes(|_, c| c.is_xml_hex_digit()).as_str();
390 (value, 16)
391 } else {
392 let value = self.consume_bytes(|_, c| c.is_xml_digit()).as_str();
393 (value, 10)
394 };
395
396 let n = u32::from_str_radix(value, radix).map_err(|_| StreamError::InvalidReference)?;
397
398 let c = char::from_u32(n).unwrap_or('\u{FFFD}');
399 if !c.is_xml_char() {
400 return Err(StreamError::InvalidReference);
401 }
402
403 Reference::Char(c)
404 } else {
405 let name = self.consume_name()?;
406 match name.as_str() {
407 "quot" => Reference::Char('"'),
408 "amp" => Reference::Char('&'),
409 "apos" => Reference::Char('\''),
410 "lt" => Reference::Char('<'),
411 "gt" => Reference::Char('>'),
412 _ => Reference::Entity(name.as_str()),
413 }
414 };
415
416 self.consume_byte(b';')?;
417
418 Ok(reference)
419 }
420
421 pub fn consume_name(&mut self) -> Result<StrSpan<'a>> {
430 let start = self.pos();
431 self.skip_name()?;
432
433 let name = self.slice_back(start);
434 if name.is_empty() {
435 return Err(StreamError::InvalidName);
436 }
437
438 Ok(name)
439 }
440
441 pub fn skip_name(&mut self) -> Result<()> {
449 let mut iter = self.chars();
450 if let Some(c) = iter.next() {
451 if c.is_xml_name_start() {
452 self.advance(c.len_utf8());
453 } else {
454 return Err(StreamError::InvalidName);
455 }
456 }
457
458 for c in iter {
459 if c.is_xml_name() {
460 self.advance(c.len_utf8());
461 } else {
462 break;
463 }
464 }
465
466 Ok(())
467 }
468
469 #[inline(never)]
477 pub fn consume_qname(&mut self) -> Result<(StrSpan<'a>, StrSpan<'a>)> {
478 let start = self.pos();
479
480 let mut splitter = None;
481
482 while !self.at_end() {
483 let b = self.curr_byte_unchecked();
485 if b < 128 {
486 if b == b':' {
487 if splitter.is_none() {
488 splitter = Some(self.pos());
489 self.advance(1);
490 } else {
491 return Err(StreamError::InvalidName);
493 }
494 } else if b.is_xml_name() {
495 self.advance(1);
496 } else {
497 break;
498 }
499 } else {
500 match self.chars().nth(0) {
502 Some(c) if c.is_xml_name() => {
503 self.advance(c.len_utf8());
504 }
505 _ => break,
506 }
507 }
508 }
509
510 let (prefix, local) = if let Some(splitter) = splitter {
511 let prefix = self.span().slice_region(start, splitter);
512 let local = self.slice_back(splitter + 1);
513 (prefix, local)
514 } else {
515 let local = self.slice_back(start);
516 ("".into(), local)
517 };
518
519 if let Some(c) = prefix.as_str().chars().nth(0) {
521 if !c.is_xml_name_start() {
522 return Err(StreamError::InvalidName);
523 }
524 }
525
526 if let Some(c) = local.as_str().chars().nth(0) {
528 if !c.is_xml_name_start() {
529 return Err(StreamError::InvalidName);
530 }
531 } else {
532 return Err(StreamError::InvalidName);
534 }
535
536 Ok((prefix, local))
537 }
538
539 pub fn consume_eq(&mut self) -> Result<()> {
548 self.skip_spaces();
549 self.consume_byte(b'=')?;
550 self.skip_spaces();
551
552 Ok(())
553 }
554
555 pub fn consume_quote(&mut self) -> Result<u8> {
564 let c = self.curr_byte()?;
565 if c == b'\'' || c == b'"' {
566 self.advance(1);
567 Ok(c)
568 } else {
569 Err(StreamError::InvalidQuote(c, self.gen_text_pos()))
570 }
571 }
572
573 #[inline(never)]
577 pub fn gen_text_pos(&self) -> TextPos {
578 let text = self.span.as_str();
579 let end = self.pos;
580
581 let row = Self::calc_curr_row(text, end);
582 let col = Self::calc_curr_col(text, end);
583 TextPos::new(row, col)
584 }
585
586 #[inline(never)]
599 pub fn gen_text_pos_from(&self, pos: usize) -> TextPos {
600 let mut s = self.clone();
601 s.pos = cmp::min(pos, s.span.as_str().len());
602 s.gen_text_pos()
603 }
604
605 fn calc_curr_row(text: &str, end: usize) -> u32 {
606 let mut row = 1;
607 for c in &text.as_bytes()[..end] {
608 if *c == b'\n' {
609 row += 1;
610 }
611 }
612
613 row
614 }
615
616 fn calc_curr_col(text: &str, end: usize) -> u32 {
617 let mut col = 1;
618 for c in text[..end].chars().rev() {
619 if c == '\n' {
620 break;
621 } else {
622 col += 1;
623 }
624 }
625
626 col
627 }
628}