csv_async/byte_record.rs
1use std::cmp;
2use std::fmt;
3use std::iter::FromIterator;
4use std::ops::{self, Range};
5use std::result;
6
7#[cfg(feature = "with_serde")]
8use serde::de::Deserialize;
9
10#[cfg(feature = "with_serde")]
11use crate::deserializer::deserialize_byte_record;
12#[cfg(feature = "with_serde")]
13use crate::error::Result;
14use crate::error::{new_utf8_error, Utf8Error};
15use crate::string_record::StringRecord;
16
17/// A single CSV record stored as raw bytes.
18///
19/// A byte record permits reading or writing CSV rows that are not UTF-8.
20/// In general, you should prefer using a
21/// [`StringRecord`](struct.StringRecord.html)
22/// since it is more ergonomic, but a `ByteRecord` is provided in case you need
23/// it.
24///
25/// If you are using the Serde (de)serialization APIs, then you probably never
26/// need to interact with a `ByteRecord` or a `StringRecord`. However, there
27/// are some circumstances in which you might need to use a raw record type
28/// while still using Serde. For example, if you need to deserialize possibly
29/// invalid UTF-8 fields, then you'll need to first read your record into a
30/// `ByteRecord`, and then use `ByteRecord::deserialize` to run Serde. Another
31/// reason for using the raw record de-serialization APIs is if you're using
32/// Serde to read into borrowed data such as a `&'a str` or a `&'a [u8]`.
33///
34/// Two `ByteRecord`s are compared on the basis of their field data. Any
35/// position information associated with the records is ignored.
36#[derive(Eq)]
37pub struct ByteRecord(Box<ByteRecordInner>);
38
39impl PartialEq for ByteRecord {
40 fn eq(&self, other: &ByteRecord) -> bool {
41 if self.len() != other.len() {
42 return false;
43 }
44 self.iter().zip(other.iter()).all(|e| e.0 == e.1)
45 }
46}
47
48impl<T: AsRef<[u8]>> PartialEq<Vec<T>> for ByteRecord {
49 fn eq(&self, other: &Vec<T>) -> bool {
50 self.iter_eq(other)
51 }
52}
53
54impl<'a, T: AsRef<[u8]>> PartialEq<Vec<T>> for &'a ByteRecord {
55 fn eq(&self, other: &Vec<T>) -> bool {
56 self.iter_eq(other)
57 }
58}
59
60impl<T: AsRef<[u8]>> PartialEq<[T]> for ByteRecord {
61 fn eq(&self, other: &[T]) -> bool {
62 self.iter_eq(other)
63 }
64}
65
66impl<'a, T: AsRef<[u8]>> PartialEq<[T]> for &'a ByteRecord {
67 fn eq(&self, other: &[T]) -> bool {
68 self.iter_eq(other)
69 }
70}
71
72impl fmt::Debug for ByteRecord {
73 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
74 write!(f, "ByteRecord(")?;
75 f.debug_list()
76 .entries(self.iter().map(crate::debug::Bytes))
77 .finish()?;
78 write!(f, ")")?;
79 Ok(())
80 }
81}
82
83/// The inner portion of a byte record.
84///
85/// We use this memory layout so that moving a `ByteRecord` only requires
86/// moving a single pointer. The optimization is dubious at best, but does
87/// seem to result in slightly better numbers in microbenchmarks. Methinks this
88/// may heavily depend on the underlying allocator.
89#[derive(Debug, Eq, PartialEq)]
90struct ByteRecordInner {
91 /// The position of this byte record.
92 pos: Option<Position>,
93 /// All fields in this record, stored contiguously.
94 fields: Vec<u8>,
95 /// The number of and location of each field in this record.
96 bounds: Bounds,
97}
98
99impl Default for ByteRecord {
100 #[inline]
101 fn default() -> ByteRecord {
102 ByteRecord::new()
103 }
104}
105
106impl Clone for ByteRecord {
107 /// Clone this record, but only copy `fields` up to the end of bounds.
108 /// This is useful when one wants to copy a record, but not necessarily any
109 /// excess capacity in that record.
110 #[inline]
111 fn clone(&self) -> ByteRecord {
112 let mut br = ByteRecord::new();
113 br.0.pos = self.0.pos.clone();
114 br.0.bounds = self.0.bounds.clone();
115 br.0.fields = self.0.fields[..self.0.bounds.end()].to_vec();
116 br
117 }
118}
119
120impl ByteRecord {
121 /// Create a new empty `ByteRecord`.
122 ///
123 /// Note that you may find the `ByteRecord::from` constructor more
124 /// convenient, which is provided by an impl on the `From` trait.
125 ///
126 /// # Example: create an empty record
127 ///
128 /// ```
129 /// use csv_async::ByteRecord;
130 ///
131 /// let record = ByteRecord::new();
132 /// assert_eq!(record.len(), 0);
133 /// ```
134 ///
135 /// # Example: initialize a record from a `Vec`
136 ///
137 /// ```
138 /// use csv_async::ByteRecord;
139 ///
140 /// let record = ByteRecord::from(vec!["a", "b", "c"]);
141 /// assert_eq!(record.len(), 3);
142 /// ```
143 #[inline]
144 pub fn new() -> ByteRecord {
145 ByteRecord::with_capacity(0, 0)
146 }
147
148 /// Create a new empty `ByteRecord` with the given capacity settings.
149 ///
150 /// `buffer` refers to the capacity of the buffer used to store the
151 /// actual row contents. `fields` refers to the number of fields one
152 /// might expect to store.
153 #[inline]
154 pub fn with_capacity(buffer: usize, fields: usize) -> ByteRecord {
155 ByteRecord(Box::new(ByteRecordInner {
156 pos: None,
157 fields: vec![0; buffer],
158 bounds: Bounds::with_capacity(fields),
159 }))
160 }
161
162 /// Returns an iterator over all fields in this record.
163 ///
164 /// # Example
165 ///
166 /// This example shows how to iterate over each field in a `ByteRecord`.
167 ///
168 /// ```
169 /// use csv_async::ByteRecord;
170 ///
171 /// let record = ByteRecord::from(vec!["a", "b", "c"]);
172 /// for field in record.iter() {
173 /// assert!(field == b"a" || field == b"b" || field == b"c");
174 /// }
175 /// ```
176 #[inline]
177 pub fn iter(&self) -> ByteRecordIter {
178 self.into_iter()
179 }
180
181 /// Return the field at index `i`.
182 ///
183 /// If no field at index `i` exists, then this returns `None`.
184 ///
185 /// # Example
186 ///
187 /// ```
188 /// use csv_async::ByteRecord;
189 ///
190 /// let record = ByteRecord::from(vec!["a", "b", "c"]);
191 /// assert_eq!(record.get(1), Some(&b"b"[..]));
192 /// assert_eq!(record.get(3), None);
193 /// ```
194 #[inline]
195 pub fn get(&self, i: usize) -> Option<&[u8]> {
196 self.0.bounds.get(i).map(|range| &self.0.fields[range])
197 }
198
199 /// Returns true if and only if this record is empty.
200 ///
201 /// # Example
202 ///
203 /// ```
204 /// use csv_async::ByteRecord;
205 ///
206 /// assert!(ByteRecord::new().is_empty());
207 /// ```
208 #[inline]
209 pub fn is_empty(&self) -> bool {
210 self.len() == 0
211 }
212
213 /// Returns the number of fields in this record.
214 ///
215 /// # Example
216 ///
217 /// ```
218 /// use csv_async::ByteRecord;
219 ///
220 /// let record = ByteRecord::from(vec!["a", "b", "c"]);
221 /// assert_eq!(record.len(), 3);
222 /// ```
223 #[inline]
224 pub fn len(&self) -> usize {
225 self.0.bounds.len()
226 }
227
228 /// Truncate this record to `n` fields.
229 ///
230 /// If `n` is greater than the number of fields in this record, then this
231 /// has no effect.
232 ///
233 /// # Example
234 ///
235 /// ```
236 /// use csv_async::ByteRecord;
237 ///
238 /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
239 /// assert_eq!(record.len(), 3);
240 /// record.truncate(1);
241 /// assert_eq!(record.len(), 1);
242 /// assert_eq!(record, vec!["a"]);
243 /// ```
244 #[inline]
245 pub fn truncate(&mut self, n: usize) {
246 if n <= self.len() {
247 self.0.bounds.len = n;
248 }
249 }
250
251 /// Clear this record so that it has zero fields.
252 ///
253 /// This is equivalent to calling `truncate(0)`.
254 ///
255 /// Note that it is not necessary to clear the record to reuse it with
256 /// the CSV reader.
257 ///
258 /// # Example
259 ///
260 /// ```
261 /// use csv_async::ByteRecord;
262 ///
263 /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
264 /// assert_eq!(record.len(), 3);
265 /// record.clear();
266 /// assert_eq!(record.len(), 0);
267 /// ```
268 #[inline]
269 pub fn clear(&mut self) {
270 self.truncate(0);
271 }
272
273 /// Trim the fields of this record so that leading and trailing whitespace
274 /// is removed.
275 ///
276 /// This method uses the ASCII definition of whitespace. That is, only
277 /// bytes in the class `[\t\n\v\f\r ]` are trimmed.
278 ///
279 /// # Example
280 ///
281 /// ```
282 /// use csv_async::ByteRecord;
283 ///
284 /// let mut record = ByteRecord::from(vec![
285 /// " ", "\tfoo", "bar ", "b a z",
286 /// ]);
287 /// record.trim();
288 /// assert_eq!(record, vec!["", "foo", "bar", "b a z"]);
289 /// ```
290 pub fn trim(&mut self) {
291 let length = self.len();
292 if length == 0 {
293 return;
294 }
295 // TODO: We could likely do this in place, but for now, we allocate.
296 let mut trimmed =
297 ByteRecord::with_capacity(self.as_slice().len(), self.len());
298 trimmed.set_position(self.position().cloned());
299 for field in self.iter() {
300 trimmed.push_field(trim_ascii(field));
301 }
302 *self = trimmed;
303 }
304
305 /// Add a new field to this record.
306 ///
307 /// # Example
308 ///
309 /// ```
310 /// use csv_async::ByteRecord;
311 ///
312 /// let mut record = ByteRecord::new();
313 /// record.push_field(b"foo");
314 /// assert_eq!(&record[0], b"foo");
315 /// ```
316 #[inline]
317 pub fn push_field(&mut self, field: &[u8]) {
318 let (s, e) = (self.0.bounds.end(), self.0.bounds.end() + field.len());
319 while e > self.0.fields.len() {
320 self.expand_fields();
321 }
322 self.0.fields[s..e].copy_from_slice(field);
323 self.0.bounds.add(e);
324 }
325
326 /// Return the position of this record, if available.
327 ///
328 /// # Example
329 ///
330 /// ```
331 /// use std::error::Error;
332 /// use futures::stream::{self, StreamExt};
333 /// use csv_async::{ByteRecord, AsyncReaderBuilder};
334 ///
335 /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); }
336 /// async fn example() -> Result<(), Box<dyn Error>> {
337 /// let mut record = ByteRecord::new();
338 /// let mut rdr = AsyncReaderBuilder::new()
339 /// .has_headers(false)
340 /// .create_reader("a,b,c\nx,y,z".as_bytes());
341 ///
342 /// assert!(rdr.read_byte_record(&mut record).await?);
343 /// {
344 /// let pos = record.position().expect("a record position");
345 /// assert_eq!(pos.byte(), 0);
346 /// assert_eq!(pos.line(), 1);
347 /// assert_eq!(pos.record(), 0);
348 /// }
349 ///
350 /// assert!(rdr.read_byte_record(&mut record).await?);
351 /// {
352 /// let pos = record.position().expect("a record position");
353 /// assert_eq!(pos.byte(), 6);
354 /// assert_eq!(pos.line(), 2);
355 /// assert_eq!(pos.record(), 1);
356 /// }
357 ///
358 /// // Finish the CSV reader for good measure.
359 /// assert!(!rdr.read_byte_record(&mut record).await?);
360 /// Ok(())
361 /// }
362 /// ```
363 #[inline]
364 pub fn position(&self) -> Option<&Position> {
365 self.0.pos.as_ref()
366 }
367
368 /// Set the position of this record.
369 ///
370 /// # Example
371 ///
372 /// ```
373 /// use csv_async::{ByteRecord, Position};
374 ///
375 /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
376 /// let mut pos = Position::new();
377 /// pos.set_byte(100);
378 /// pos.set_line(4);
379 /// pos.set_record(2);
380 ///
381 /// record.set_position(Some(pos.clone()));
382 /// assert_eq!(record.position(), Some(&pos));
383 /// ```
384 #[inline]
385 pub fn set_position(&mut self, pos: Option<Position>) {
386 self.0.pos = pos;
387 }
388
389 /// Return the start and end position of a field in this record.
390 ///
391 /// If no such field exists at the given index, then return `None`.
392 ///
393 /// The range returned can be used with the slice returned by `as_slice`.
394 ///
395 /// # Example
396 ///
397 /// ```
398 /// use csv_async::ByteRecord;
399 ///
400 /// let record = ByteRecord::from(vec!["foo", "quux", "z"]);
401 /// let range = record.range(1).expect("a record range");
402 /// assert_eq!(&record.as_slice()[range], &b"quux"[..]);
403 /// ```
404 #[inline]
405 pub fn range(&self, i: usize) -> Option<Range<usize>> {
406 self.0.bounds.get(i)
407 }
408
409 /// Return the entire row as a single byte slice. The slice returned stores
410 /// all fields contiguously. The boundaries of each field can be determined
411 /// via the `range` method.
412 ///
413 /// # Example
414 ///
415 /// ```
416 /// use csv_async::ByteRecord;
417 ///
418 /// let record = ByteRecord::from(vec!["foo", "quux", "z"]);
419 /// assert_eq!(record.as_slice(), &b"fooquuxz"[..]);
420 /// ```
421 #[inline]
422 pub fn as_slice(&self) -> &[u8] {
423 &self.0.fields[..self.0.bounds.end()]
424 }
425
426 /// Retrieve the underlying parts of a byte record.
427 #[inline]
428 pub(crate) fn as_parts(&mut self) -> (&mut Vec<u8>, &mut Vec<usize>) {
429 let inner = &mut *self.0;
430 (&mut inner.fields, &mut inner.bounds.ends)
431 }
432
433 /// Set the number of fields in the given record record.
434 #[inline]
435 pub(crate) fn set_len(&mut self, len: usize) {
436 self.0.bounds.len = len;
437 }
438
439 /// Expand the capacity for storing fields.
440 #[inline]
441 pub(crate) fn expand_fields(&mut self) {
442 let new_len = self.0.fields.len().checked_mul(2).unwrap();
443 self.0.fields.resize(cmp::max(4, new_len), 0);
444 }
445
446 /// Expand the capacity for storing field ending positions.
447 #[inline]
448 pub(crate) fn expand_ends(&mut self) {
449 self.0.bounds.expand();
450 }
451
452 /// Validate the given record as UTF-8.
453 ///
454 /// If it's not UTF-8, return an error.
455 #[inline]
456 pub(crate) fn validate(&self) -> result::Result<(), Utf8Error> {
457 // If the entire buffer is ASCII, then we have nothing to fear.
458 if self.0.fields[..self.0.bounds.end()].is_ascii() {
459 return Ok(());
460 }
461 // Otherwise, we must check each field individually to ensure that
462 // it's valid UTF-8.
463 for (i, field) in self.iter().enumerate() {
464 if let Err(err) = std::str::from_utf8(field) {
465 return Err(new_utf8_error(i, err.valid_up_to()));
466 }
467 }
468 Ok(())
469 }
470
471 /// Compare the given byte record with the iterator of fields for equality.
472 pub(crate) fn iter_eq<I, T>(&self, other: I) -> bool
473 where
474 I: IntoIterator<Item = T>,
475 T: AsRef<[u8]>,
476 {
477 let mut it_record = self.iter();
478 let mut it_other = other.into_iter();
479 loop {
480 match (it_record.next(), it_other.next()) {
481 (None, None) => return true,
482 (None, Some(_)) | (Some(_), None) => return false,
483 (Some(x), Some(y)) => {
484 if x != y.as_ref() {
485 return false;
486 }
487 }
488 }
489 }
490 }
491 /// Deserialize this record.
492 ///
493 /// The `D` type parameter refers to the type that this record should be
494 /// deserialized into. The `'de` lifetime refers to the lifetime of the
495 /// `ByteRecord`. The `'de` lifetime permits deserializing into structs
496 /// that borrow field data from this record.
497 ///
498 /// An optional `headers` parameter permits deserializing into a struct
499 /// based on its field names (corresponding to header values) rather than
500 /// the order in which the fields are defined.
501 ///
502 /// # Example: without headers
503 ///
504 /// This shows how to deserialize a single row into a struct based on the
505 /// order in which fields occur. This example also shows how to borrow
506 /// fields from the `ByteRecord`, which results in zero allocation
507 /// deserialization.
508 ///
509 /// ```
510 /// use std::error::Error;
511 ///
512 /// use csv_async::ByteRecord;
513 /// use serde::Deserialize;
514 ///
515 /// #[derive(Deserialize)]
516 /// struct Row<'a> {
517 /// city: &'a str,
518 /// country: &'a str,
519 /// population: u64,
520 /// }
521 ///
522 /// # fn main() { example().unwrap() }
523 /// fn example() -> Result<(), Box<dyn Error>> {
524 /// let record = ByteRecord::from(vec![
525 /// "Boston", "United States", "4628910",
526 /// ]);
527 ///
528 /// let row: Row = record.deserialize(None)?;
529 /// assert_eq!(row.city, "Boston");
530 /// assert_eq!(row.country, "United States");
531 /// assert_eq!(row.population, 4628910);
532 /// Ok(())
533 /// }
534 /// ```
535 ///
536 /// # Example: with headers
537 ///
538 /// This example is like the previous one, but shows how to deserialize
539 /// into a struct based on the struct's field names. For this to work,
540 /// you must provide a header row.
541 ///
542 /// This example also shows that you can deserialize into owned data
543 /// types (e.g., `String`) instead of borrowed data types (e.g., `&str`).
544 ///
545 /// ```
546 /// use std::error::Error;
547 ///
548 /// use csv_async::ByteRecord;
549 /// use serde::Deserialize;
550 ///
551 /// #[derive(Deserialize)]
552 /// struct Row {
553 /// city: String,
554 /// country: String,
555 /// population: u64,
556 /// }
557 ///
558 /// # fn main() { example().unwrap() }
559 /// fn example() -> Result<(), Box<dyn Error>> {
560 /// // Notice that the fields are not in the same order
561 /// // as the fields in the struct!
562 /// let header = ByteRecord::from(vec![
563 /// "country", "city", "population",
564 /// ]);
565 /// let record = ByteRecord::from(vec![
566 /// "United States", "Boston", "4628910",
567 /// ]);
568 ///
569 /// let row: Row = record.deserialize(Some(&header))?;
570 /// assert_eq!(row.city, "Boston");
571 /// assert_eq!(row.country, "United States");
572 /// assert_eq!(row.population, 4628910);
573 /// Ok(())
574 /// }
575 /// ```
576 #[cfg(feature = "with_serde")]
577 pub fn deserialize<'de, D: Deserialize<'de>>(
578 &'de self,
579 headers: Option<&'de ByteRecord>,
580 ) -> Result<D> {
581 deserialize_byte_record(self, headers)
582 }
583}
584
585/// A position in CSV data.
586///
587/// A position is used to report errors in CSV data. All positions include the
588/// byte offset, line number and record index at which the error occurred.
589///
590/// Byte offsets and record indices start at `0`. Line numbers start at `1`.
591///
592/// A CSV reader will automatically assign the position of each record.
593#[derive(Clone, Debug, Eq, PartialEq)]
594pub struct Position {
595 byte: u64,
596 line: u64,
597 record: u64,
598}
599
600impl Position {
601 /// Returns a new position initialized to the start value.
602 #[inline]
603 pub fn new() -> Position {
604 Position::default()
605 }
606
607 /// The byte offset, starting at `0`, of this position.
608 #[inline]
609 pub fn byte(&self) -> u64 {
610 self.byte
611 }
612 /// The line number, starting at `1`, of this position.
613 #[inline]
614 pub fn line(&self) -> u64 {
615 self.line
616 }
617 /// The record index, starting with the first record at `0`.
618 #[inline]
619 pub fn record(&self) -> u64 {
620 self.record
621 }
622
623 /// Set the byte offset of this position.
624 #[inline]
625 pub fn set_byte(&mut self, byte: u64) -> &mut Position {
626 self.byte = byte;
627 self
628 }
629
630 /// Set the line number of this position.
631 ///
632 /// If the line number is less than `1`, then this method panics.
633 #[inline]
634 pub fn set_line(&mut self, line: u64) -> &mut Position {
635 assert!(line > 0);
636 self.line = line;
637 self
638 }
639
640 /// Set the record index of this position.
641 #[inline]
642 pub fn set_record(&mut self, record: u64) -> &mut Position {
643 self.record = record;
644 self
645 }
646}
647
648impl Default for Position {
649 fn default() -> Self {
650 Position { byte: 0, line: 1, record: 0 }
651 }
652}
653
654/// The bounds of fields in a single record.
655#[derive(Clone, Debug, Eq, PartialEq)]
656struct Bounds {
657 /// The ending index of each field.
658 ends: Vec<usize>,
659 /// The number of fields in this record.
660 ///
661 /// Technically, we could drop this field and maintain an invariant that
662 /// `ends.len()` is always the number of fields, but doing that efficiently
663 /// requires attention to safety. We play it safe at essentially no cost.
664 len: usize,
665}
666
667impl Default for Bounds {
668 #[inline]
669 fn default() -> Bounds {
670 Bounds::with_capacity(0)
671 }
672}
673
674impl Bounds {
675 /// Create a new set of bounds with the given capacity for storing the
676 /// ends of fields.
677 #[inline]
678 fn with_capacity(capacity: usize) -> Bounds {
679 Bounds { ends: vec![0; capacity], len: 0 }
680 }
681
682 /// Returns the bounds of field `i`.
683 #[inline]
684 fn get(&self, i: usize) -> Option<Range<usize>> {
685 if i >= self.len {
686 return None;
687 }
688 let end = match self.ends.get(i) {
689 None => return None,
690 Some(&end) => end,
691 };
692 let start = match i.checked_sub(1).and_then(|i| self.ends.get(i)) {
693 None => 0,
694 Some(&start) => start,
695 };
696 Some(ops::Range { start, end })
697 }
698
699 /// Returns a slice of ending positions of all fields.
700 #[inline]
701 fn ends(&self) -> &[usize] {
702 &self.ends[..self.len]
703 }
704
705 /// Return the last position of the last field.
706 ///
707 /// If there are no fields, this returns `0`.
708 #[inline]
709 fn end(&self) -> usize {
710 self.ends().last().copied().unwrap_or(0)
711 }
712
713 /// Returns the number of fields in these bounds.
714 #[inline]
715 fn len(&self) -> usize {
716 self.len
717 }
718
719 /// Expand the capacity for storing field ending positions.
720 #[inline]
721 fn expand(&mut self) {
722 let new_len = self.ends.len().checked_mul(2).unwrap();
723 self.ends.resize(cmp::max(4, new_len), 0);
724 }
725
726 /// Add a new field with the given ending position.
727 #[inline]
728 fn add(&mut self, pos: usize) {
729 if self.len >= self.ends.len() {
730 self.expand();
731 }
732 self.ends[self.len] = pos;
733 self.len += 1;
734 }
735}
736
737impl ops::Index<usize> for ByteRecord {
738 type Output = [u8];
739 #[inline]
740 fn index(&self, i: usize) -> &[u8] {
741 self.get(i).unwrap()
742 }
743}
744
745impl From<StringRecord> for ByteRecord {
746 #[inline]
747 fn from(record: StringRecord) -> ByteRecord {
748 record.into_byte_record()
749 }
750}
751
752impl<T: AsRef<[u8]>> From<Vec<T>> for ByteRecord {
753 #[inline]
754 fn from(xs: Vec<T>) -> ByteRecord {
755 ByteRecord::from_iter(&xs)
756 }
757}
758
759impl<'a, T: AsRef<[u8]>> From<&'a [T]> for ByteRecord {
760 #[inline]
761 fn from(xs: &'a [T]) -> ByteRecord {
762 ByteRecord::from_iter(xs)
763 }
764}
765
766impl<T: AsRef<[u8]>> FromIterator<T> for ByteRecord {
767 #[inline]
768 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> ByteRecord {
769 let mut record = ByteRecord::new();
770 record.extend(iter);
771 record
772 }
773}
774
775impl<T: AsRef<[u8]>> Extend<T> for ByteRecord {
776 #[inline]
777 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
778 for x in iter {
779 self.push_field(x.as_ref());
780 }
781 }
782}
783
784/// A double-ended iterator over the fields in a byte record.
785///
786/// The `'r` lifetime variable refers to the lifetime of the `ByteRecord` that
787/// is being iterated over.
788#[derive(Clone)]
789pub struct ByteRecordIter<'r> {
790 /// The record we are iterating over.
791 r: &'r ByteRecord,
792 /// The starting index of the previous field. (For reverse iteration.)
793 last_start: usize,
794 /// The ending index of the previous field. (For forward iteration.)
795 last_end: usize,
796 /// The index of forward iteration.
797 i_forward: usize,
798 /// The index of reverse iteration.
799 i_reverse: usize,
800}
801
802impl<'r> IntoIterator for &'r ByteRecord {
803 type IntoIter = ByteRecordIter<'r>;
804 type Item = &'r [u8];
805
806 #[inline]
807 fn into_iter(self) -> ByteRecordIter<'r> {
808 ByteRecordIter {
809 r: self,
810 last_start: self.as_slice().len(),
811 last_end: 0,
812 i_forward: 0,
813 i_reverse: self.len(),
814 }
815 }
816}
817
818impl<'r> ExactSizeIterator for ByteRecordIter<'r> {}
819
820impl<'r> Iterator for ByteRecordIter<'r> {
821 type Item = &'r [u8];
822
823 #[inline]
824 fn next(&mut self) -> Option<&'r [u8]> {
825 if self.i_forward == self.i_reverse {
826 None
827 } else {
828 let start = self.last_end;
829 let end = self.r.0.bounds.ends()[self.i_forward];
830 self.i_forward += 1;
831 self.last_end = end;
832 Some(&self.r.0.fields[start..end])
833 }
834 }
835
836 #[inline]
837 fn size_hint(&self) -> (usize, Option<usize>) {
838 let x = self.i_reverse - self.i_forward;
839 (x, Some(x))
840 }
841
842 #[inline]
843 fn count(self) -> usize {
844 self.len()
845 }
846}
847
848impl<'r> DoubleEndedIterator for ByteRecordIter<'r> {
849 #[inline]
850 fn next_back(&mut self) -> Option<&'r [u8]> {
851 if self.i_forward == self.i_reverse {
852 None
853 } else {
854 self.i_reverse -= 1;
855 let start = self
856 .i_reverse
857 .checked_sub(1)
858 .map(|i| self.r.0.bounds.ends()[i])
859 .unwrap_or(0);
860 let end = self.last_start;
861 self.last_start = start;
862 Some(&self.r.0.fields[start..end])
863 }
864 }
865}
866
867fn trim_ascii(bytes: &[u8]) -> &[u8] {
868 trim_ascii_start(trim_ascii_end(bytes))
869}
870
871fn trim_ascii_start(mut bytes: &[u8]) -> &[u8] {
872 while let [first, rest @ ..] = bytes {
873 if first.is_ascii_whitespace() {
874 bytes = rest;
875 } else {
876 break;
877 }
878 }
879 bytes
880}
881
882fn trim_ascii_end(mut bytes: &[u8]) -> &[u8] {
883 while let [rest @ .., last] = bytes {
884 if last.is_ascii_whitespace() {
885 bytes = rest;
886 } else {
887 break;
888 }
889 }
890 bytes
891}
892
893#[cfg(test)]
894mod tests {
895 use crate::string_record::StringRecord;
896
897 use super::ByteRecord;
898
899 fn b(s: &str) -> &[u8] {
900 s.as_bytes()
901 }
902
903 #[test]
904 fn record_1() {
905 let mut rec = ByteRecord::new();
906 rec.push_field(b"foo");
907
908 assert_eq!(rec.len(), 1);
909 assert_eq!(rec.get(0), Some(b("foo")));
910 assert_eq!(rec.get(1), None);
911 assert_eq!(rec.get(2), None);
912 }
913
914 #[test]
915 fn record_2() {
916 let mut rec = ByteRecord::new();
917 rec.push_field(b"foo");
918 rec.push_field(b"quux");
919
920 assert_eq!(rec.len(), 2);
921 assert_eq!(rec.get(0), Some(b("foo")));
922 assert_eq!(rec.get(1), Some(b("quux")));
923 assert_eq!(rec.get(2), None);
924 assert_eq!(rec.get(3), None);
925 }
926
927 #[test]
928 fn empty_record() {
929 let rec = ByteRecord::new();
930
931 assert_eq!(rec.len(), 0);
932 assert_eq!(rec.get(0), None);
933 assert_eq!(rec.get(1), None);
934 }
935
936 #[test]
937 fn trim_whitespace_only() {
938 let mut rec = ByteRecord::from(vec![b" \t\n\r\x0c"]);
939 rec.trim();
940 assert_eq!(rec.get(0), Some(b("")));
941 }
942
943 #[test]
944 fn trim_front() {
945 let mut rec = ByteRecord::from(vec![b" abc"]);
946 rec.trim();
947 assert_eq!(rec.get(0), Some(b("abc")));
948
949 let mut rec = ByteRecord::from(vec![b(" abc"), b(" xyz")]);
950 rec.trim();
951 assert_eq!(rec.get(0), Some(b("abc")));
952 assert_eq!(rec.get(1), Some(b("xyz")));
953 }
954
955 #[test]
956 fn trim_back() {
957 let mut rec = ByteRecord::from(vec![b"abc "]);
958 rec.trim();
959 assert_eq!(rec.get(0), Some(b("abc")));
960
961 let mut rec = ByteRecord::from(vec![b("abc "), b("xyz ")]);
962 rec.trim();
963 assert_eq!(rec.get(0), Some(b("abc")));
964 assert_eq!(rec.get(1), Some(b("xyz")));
965 }
966
967 #[test]
968 fn trim_both() {
969 let mut rec = ByteRecord::from(vec![b" abc "]);
970 rec.trim();
971 assert_eq!(rec.get(0), Some(b("abc")));
972
973 let mut rec = ByteRecord::from(vec![b(" abc "), b(" xyz ")]);
974 rec.trim();
975 assert_eq!(rec.get(0), Some(b("abc")));
976 assert_eq!(rec.get(1), Some(b("xyz")));
977 }
978
979 #[test]
980 fn trim_does_not_panic_on_empty_records_1() {
981 let mut rec = ByteRecord::from(vec![b""]);
982 rec.trim();
983 assert_eq!(rec.get(0), Some(b("")));
984 }
985
986 #[test]
987 fn trim_does_not_panic_on_empty_records_2() {
988 let mut rec = ByteRecord::from(vec![b"", b""]);
989 rec.trim();
990 assert_eq!(rec.get(0), Some(b("")));
991 assert_eq!(rec.get(1), Some(b("")));
992 }
993
994 #[test]
995 fn trim_does_not_panic_on_empty_records_3() {
996 let mut rec = ByteRecord::new();
997 rec.trim();
998 assert_eq!(rec.as_slice().len(), 0);
999 }
1000
1001 #[test]
1002 fn empty_field_1() {
1003 let mut rec = ByteRecord::new();
1004 rec.push_field(b"");
1005
1006 assert_eq!(rec.len(), 1);
1007 assert_eq!(rec.get(0), Some(b("")));
1008 assert_eq!(rec.get(1), None);
1009 assert_eq!(rec.get(2), None);
1010 }
1011
1012 #[test]
1013 fn empty_field_2() {
1014 let mut rec = ByteRecord::new();
1015 rec.push_field(b"");
1016 rec.push_field(b"");
1017
1018 assert_eq!(rec.len(), 2);
1019 assert_eq!(rec.get(0), Some(b("")));
1020 assert_eq!(rec.get(1), Some(b("")));
1021 assert_eq!(rec.get(2), None);
1022 assert_eq!(rec.get(3), None);
1023 }
1024
1025 #[test]
1026 fn empty_surround_1() {
1027 let mut rec = ByteRecord::new();
1028 rec.push_field(b"foo");
1029 rec.push_field(b"");
1030 rec.push_field(b"quux");
1031
1032 assert_eq!(rec.len(), 3);
1033 assert_eq!(rec.get(0), Some(b("foo")));
1034 assert_eq!(rec.get(1), Some(b("")));
1035 assert_eq!(rec.get(2), Some(b("quux")));
1036 assert_eq!(rec.get(3), None);
1037 assert_eq!(rec.get(4), None);
1038 }
1039
1040 #[test]
1041 fn empty_surround_2() {
1042 let mut rec = ByteRecord::new();
1043 rec.push_field(b"foo");
1044 rec.push_field(b"");
1045 rec.push_field(b"quux");
1046 rec.push_field(b"");
1047
1048 assert_eq!(rec.len(), 4);
1049 assert_eq!(rec.get(0), Some(b("foo")));
1050 assert_eq!(rec.get(1), Some(b("")));
1051 assert_eq!(rec.get(2), Some(b("quux")));
1052 assert_eq!(rec.get(3), Some(b("")));
1053 assert_eq!(rec.get(4), None);
1054 assert_eq!(rec.get(5), None);
1055 }
1056
1057 #[test]
1058 fn utf8_error_1() {
1059 let mut rec = ByteRecord::new();
1060 rec.push_field(b"foo");
1061 rec.push_field(b"b\xFFar");
1062
1063 let err = StringRecord::from_byte_record(rec).unwrap_err();
1064 assert_eq!(err.utf8_error().field(), 1);
1065 assert_eq!(err.utf8_error().valid_up_to(), 1);
1066 }
1067
1068 #[test]
1069 fn utf8_error_2() {
1070 let mut rec = ByteRecord::new();
1071 rec.push_field(b"\xFF");
1072
1073 let err = StringRecord::from_byte_record(rec).unwrap_err();
1074 assert_eq!(err.utf8_error().field(), 0);
1075 assert_eq!(err.utf8_error().valid_up_to(), 0);
1076 }
1077
1078 #[test]
1079 fn utf8_error_3() {
1080 let mut rec = ByteRecord::new();
1081 rec.push_field(b"a\xFF");
1082
1083 let err = StringRecord::from_byte_record(rec).unwrap_err();
1084 assert_eq!(err.utf8_error().field(), 0);
1085 assert_eq!(err.utf8_error().valid_up_to(), 1);
1086 }
1087
1088 #[test]
1089 fn utf8_error_4() {
1090 let mut rec = ByteRecord::new();
1091 rec.push_field(b"a");
1092 rec.push_field(b"b");
1093 rec.push_field(b"c");
1094 rec.push_field(b"d");
1095 rec.push_field(b"xyz\xFF");
1096
1097 let err = StringRecord::from_byte_record(rec).unwrap_err();
1098 assert_eq!(err.utf8_error().field(), 4);
1099 assert_eq!(err.utf8_error().valid_up_to(), 3);
1100 }
1101
1102 #[test]
1103 fn utf8_error_5() {
1104 let mut rec = ByteRecord::new();
1105 rec.push_field(b"a");
1106 rec.push_field(b"b");
1107 rec.push_field(b"c");
1108 rec.push_field(b"d");
1109 rec.push_field(b"\xFFxyz");
1110
1111 let err = StringRecord::from_byte_record(rec).unwrap_err();
1112 assert_eq!(err.utf8_error().field(), 4);
1113 assert_eq!(err.utf8_error().valid_up_to(), 0);
1114 }
1115
1116 // This tests a tricky case where a single field on its own isn't valid
1117 // UTF-8, but the concatenation of all fields is.
1118 #[test]
1119 fn utf8_error_6() {
1120 let mut rec = ByteRecord::new();
1121 rec.push_field(b"a\xc9");
1122 rec.push_field(b"\x91b");
1123
1124 let err = StringRecord::from_byte_record(rec).unwrap_err();
1125 assert_eq!(err.utf8_error().field(), 0);
1126 assert_eq!(err.utf8_error().valid_up_to(), 1);
1127 }
1128
1129 // This tests that we can always clear a `ByteRecord` and get a guaranteed
1130 // successful conversion to UTF-8. This permits reusing the allocation.
1131 #[test]
1132 fn utf8_clear_ok() {
1133 let mut rec = ByteRecord::new();
1134 rec.push_field(b"\xFF");
1135 assert!(StringRecord::from_byte_record(rec).is_err());
1136
1137 let mut rec = ByteRecord::new();
1138 rec.push_field(b"\xFF");
1139 rec.clear();
1140 assert!(StringRecord::from_byte_record(rec).is_ok());
1141 }
1142
1143 #[test]
1144 fn iter() {
1145 let data = vec!["foo", "bar", "baz", "quux", "wat"];
1146 let rec = ByteRecord::from(&*data);
1147 let got: Vec<&str> =
1148 rec.iter().map(|x| ::std::str::from_utf8(x).unwrap()).collect();
1149 assert_eq!(data, got);
1150 }
1151
1152 #[test]
1153 fn iter_reverse() {
1154 let mut data = vec!["foo", "bar", "baz", "quux", "wat"];
1155 let rec = ByteRecord::from(&*data);
1156 let got: Vec<&str> = rec
1157 .iter()
1158 .rev()
1159 .map(|x| ::std::str::from_utf8(x).unwrap())
1160 .collect();
1161 data.reverse();
1162 assert_eq!(data, got);
1163 }
1164
1165 #[test]
1166 fn iter_forward_and_reverse() {
1167 let data = vec!["foo", "bar", "baz", "quux", "wat"];
1168 let rec = ByteRecord::from(data);
1169 let mut it = rec.iter();
1170
1171 assert_eq!(it.next_back(), Some(b("wat")));
1172 assert_eq!(it.next(), Some(b("foo")));
1173 assert_eq!(it.next(), Some(b("bar")));
1174 assert_eq!(it.next_back(), Some(b("quux")));
1175 assert_eq!(it.next(), Some(b("baz")));
1176 assert_eq!(it.next_back(), None);
1177 assert_eq!(it.next(), None);
1178 }
1179
1180 // Check that record equality respects field boundaries.
1181 //
1182 // Regression test for #138.
1183 #[test]
1184 fn eq_field_boundaries() {
1185 let test1 = ByteRecord::from(vec!["12", "34"]);
1186 let test2 = ByteRecord::from(vec!["123", "4"]);
1187
1188 assert_ne!(test1, test2);
1189 }
1190
1191 // Check that record equality respects number of fields.
1192 //
1193 // Regression test for #138.
1194 #[test]
1195 fn eq_record_len() {
1196 let test1 = ByteRecord::from(vec!["12", "34", "56"]);
1197 let test2 = ByteRecord::from(vec!["12", "34"]);
1198 assert_ne!(test1, test2);
1199 }
1200}