csv_async/
byte_record.rs

1use std::cmp;
2use std::fmt;
3use std::iter::FromIterator;
4use std::ops::{self, Range};
5use std::result;
6
7#[cfg(feature = "with_serde")]
8use serde::de::Deserialize;
9
10#[cfg(feature = "with_serde")]
11use crate::deserializer::deserialize_byte_record;
12#[cfg(feature = "with_serde")]
13use crate::error::Result;
14use crate::error::{new_utf8_error, Utf8Error};
15use crate::string_record::StringRecord;
16
17/// A single CSV record stored as raw bytes.
18///
19/// A byte record permits reading or writing CSV rows that are not UTF-8.
20/// In general, you should prefer using a
21/// [`StringRecord`](struct.StringRecord.html)
22/// since it is more ergonomic, but a `ByteRecord` is provided in case you need
23/// it.
24///
25/// If you are using the Serde (de)serialization APIs, then you probably never
26/// need to interact with a `ByteRecord` or a `StringRecord`. However, there
27/// are some circumstances in which you might need to use a raw record type
28/// while still using Serde. For example, if you need to deserialize possibly
29/// invalid UTF-8 fields, then you'll need to first read your record into a
30/// `ByteRecord`, and then use `ByteRecord::deserialize` to run Serde. Another
31/// reason for using the raw record de-serialization APIs is if you're using
32/// Serde to read into borrowed data such as a `&'a str` or a `&'a [u8]`.
33///
34/// Two `ByteRecord`s are compared on the basis of their field data. Any
35/// position information associated with the records is ignored.
36#[derive(Eq)]
37pub struct ByteRecord(Box<ByteRecordInner>);
38
39impl PartialEq for ByteRecord {
40    fn eq(&self, other: &ByteRecord) -> bool {
41        if self.len() != other.len() {
42            return false;
43        }
44        self.iter().zip(other.iter()).all(|e| e.0 == e.1)
45    }
46}
47
48impl<T: AsRef<[u8]>> PartialEq<Vec<T>> for ByteRecord {
49    fn eq(&self, other: &Vec<T>) -> bool {
50        self.iter_eq(other)
51    }
52}
53
54impl<'a, T: AsRef<[u8]>> PartialEq<Vec<T>> for &'a ByteRecord {
55    fn eq(&self, other: &Vec<T>) -> bool {
56        self.iter_eq(other)
57    }
58}
59
60impl<T: AsRef<[u8]>> PartialEq<[T]> for ByteRecord {
61    fn eq(&self, other: &[T]) -> bool {
62        self.iter_eq(other)
63    }
64}
65
66impl<'a, T: AsRef<[u8]>> PartialEq<[T]> for &'a ByteRecord {
67    fn eq(&self, other: &[T]) -> bool {
68        self.iter_eq(other)
69    }
70}
71
72impl fmt::Debug for ByteRecord {
73    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
74        write!(f, "ByteRecord(")?;
75        f.debug_list()
76            .entries(self.iter().map(crate::debug::Bytes))
77            .finish()?;
78        write!(f, ")")?;
79        Ok(())
80    }
81}
82
83/// The inner portion of a byte record.
84///
85/// We use this memory layout so that moving a `ByteRecord` only requires
86/// moving a single pointer. The optimization is dubious at best, but does
87/// seem to result in slightly better numbers in microbenchmarks. Methinks this
88/// may heavily depend on the underlying allocator.
89#[derive(Debug, Eq, PartialEq)]
90struct ByteRecordInner {
91    /// The position of this byte record.
92    pos: Option<Position>,
93    /// All fields in this record, stored contiguously.
94    fields: Vec<u8>,
95    /// The number of and location of each field in this record.
96    bounds: Bounds,
97}
98
99impl Default for ByteRecord {
100    #[inline]
101    fn default() -> ByteRecord {
102        ByteRecord::new()
103    }
104}
105
106impl Clone for ByteRecord {
107    /// Clone this record, but only copy `fields` up to the end of bounds. 
108    /// This is useful when one wants to copy a record, but not necessarily any
109    /// excess capacity in that record.
110    #[inline]
111    fn clone(&self) -> ByteRecord {
112        let mut br = ByteRecord::new();
113        br.0.pos = self.0.pos.clone();
114        br.0.bounds = self.0.bounds.clone();
115        br.0.fields = self.0.fields[..self.0.bounds.end()].to_vec();
116        br
117    }
118}
119
120impl ByteRecord {
121    /// Create a new empty `ByteRecord`.
122    ///
123    /// Note that you may find the `ByteRecord::from` constructor more
124    /// convenient, which is provided by an impl on the `From` trait.
125    ///
126    /// # Example: create an empty record
127    ///
128    /// ```
129    /// use csv_async::ByteRecord;
130    ///
131    /// let record = ByteRecord::new();
132    /// assert_eq!(record.len(), 0);
133    /// ```
134    ///
135    /// # Example: initialize a record from a `Vec`
136    ///
137    /// ```
138    /// use csv_async::ByteRecord;
139    ///
140    /// let record = ByteRecord::from(vec!["a", "b", "c"]);
141    /// assert_eq!(record.len(), 3);
142    /// ```
143    #[inline]
144    pub fn new() -> ByteRecord {
145        ByteRecord::with_capacity(0, 0)
146    }
147
148    /// Create a new empty `ByteRecord` with the given capacity settings.
149    ///
150    /// `buffer` refers to the capacity of the buffer used to store the
151    /// actual row contents. `fields` refers to the number of fields one
152    /// might expect to store.
153    #[inline]
154    pub fn with_capacity(buffer: usize, fields: usize) -> ByteRecord {
155        ByteRecord(Box::new(ByteRecordInner {
156            pos: None,
157            fields: vec![0; buffer],
158            bounds: Bounds::with_capacity(fields),
159        }))
160    }
161
162    /// Returns an iterator over all fields in this record.
163    ///
164    /// # Example
165    ///
166    /// This example shows how to iterate over each field in a `ByteRecord`.
167    ///
168    /// ```
169    /// use csv_async::ByteRecord;
170    ///
171    /// let record = ByteRecord::from(vec!["a", "b", "c"]);
172    /// for field in record.iter() {
173    ///     assert!(field == b"a" || field == b"b" || field == b"c");
174    /// }
175    /// ```
176    #[inline]
177    pub fn iter(&self) -> ByteRecordIter {
178        self.into_iter()
179    }
180
181    /// Return the field at index `i`.
182    ///
183    /// If no field at index `i` exists, then this returns `None`.
184    ///
185    /// # Example
186    ///
187    /// ```
188    /// use csv_async::ByteRecord;
189    ///
190    /// let record = ByteRecord::from(vec!["a", "b", "c"]);
191    /// assert_eq!(record.get(1), Some(&b"b"[..]));
192    /// assert_eq!(record.get(3), None);
193    /// ```
194    #[inline]
195    pub fn get(&self, i: usize) -> Option<&[u8]> {
196        self.0.bounds.get(i).map(|range| &self.0.fields[range])
197    }
198
199    /// Returns true if and only if this record is empty.
200    ///
201    /// # Example
202    ///
203    /// ```
204    /// use csv_async::ByteRecord;
205    ///
206    /// assert!(ByteRecord::new().is_empty());
207    /// ```
208    #[inline]
209    pub fn is_empty(&self) -> bool {
210        self.len() == 0
211    }
212
213    /// Returns the number of fields in this record.
214    ///
215    /// # Example
216    ///
217    /// ```
218    /// use csv_async::ByteRecord;
219    ///
220    /// let record = ByteRecord::from(vec!["a", "b", "c"]);
221    /// assert_eq!(record.len(), 3);
222    /// ```
223    #[inline]
224    pub fn len(&self) -> usize {
225        self.0.bounds.len()
226    }
227
228    /// Truncate this record to `n` fields.
229    ///
230    /// If `n` is greater than the number of fields in this record, then this
231    /// has no effect.
232    ///
233    /// # Example
234    ///
235    /// ```
236    /// use csv_async::ByteRecord;
237    ///
238    /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
239    /// assert_eq!(record.len(), 3);
240    /// record.truncate(1);
241    /// assert_eq!(record.len(), 1);
242    /// assert_eq!(record, vec!["a"]);
243    /// ```
244    #[inline]
245    pub fn truncate(&mut self, n: usize) {
246        if n <= self.len() {
247            self.0.bounds.len = n;
248        }
249    }
250
251    /// Clear this record so that it has zero fields.
252    ///
253    /// This is equivalent to calling `truncate(0)`.
254    ///
255    /// Note that it is not necessary to clear the record to reuse it with
256    /// the CSV reader.
257    ///
258    /// # Example
259    ///
260    /// ```
261    /// use csv_async::ByteRecord;
262    ///
263    /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
264    /// assert_eq!(record.len(), 3);
265    /// record.clear();
266    /// assert_eq!(record.len(), 0);
267    /// ```
268    #[inline]
269    pub fn clear(&mut self) {
270        self.truncate(0);
271    }
272
273    /// Trim the fields of this record so that leading and trailing whitespace
274    /// is removed.
275    ///
276    /// This method uses the ASCII definition of whitespace. That is, only
277    /// bytes in the class `[\t\n\v\f\r ]` are trimmed.
278    ///
279    /// # Example
280    ///
281    /// ```
282    /// use csv_async::ByteRecord;
283    ///
284    /// let mut record = ByteRecord::from(vec![
285    ///     "  ", "\tfoo", "bar  ", "b a z",
286    /// ]);
287    /// record.trim();
288    /// assert_eq!(record, vec!["", "foo", "bar", "b a z"]);
289    /// ```
290    pub fn trim(&mut self) {
291        let length = self.len();
292        if length == 0 {
293            return;
294        }
295        // TODO: We could likely do this in place, but for now, we allocate.
296        let mut trimmed =
297            ByteRecord::with_capacity(self.as_slice().len(), self.len());
298        trimmed.set_position(self.position().cloned());
299        for field in self.iter() {
300            trimmed.push_field(trim_ascii(field));
301        }
302        *self = trimmed;
303    }
304
305    /// Add a new field to this record.
306    ///
307    /// # Example
308    ///
309    /// ```
310    /// use csv_async::ByteRecord;
311    ///
312    /// let mut record = ByteRecord::new();
313    /// record.push_field(b"foo");
314    /// assert_eq!(&record[0], b"foo");
315    /// ```
316    #[inline]
317    pub fn push_field(&mut self, field: &[u8]) {
318        let (s, e) = (self.0.bounds.end(), self.0.bounds.end() + field.len());
319        while e > self.0.fields.len() {
320            self.expand_fields();
321        }
322        self.0.fields[s..e].copy_from_slice(field);
323        self.0.bounds.add(e);
324    }
325
326    /// Return the position of this record, if available.
327    ///
328    /// # Example
329    ///
330    /// ```
331    /// use std::error::Error;
332    /// use futures::stream::{self, StreamExt};
333    /// use csv_async::{ByteRecord, AsyncReaderBuilder};
334    ///
335    /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); }
336    /// async fn example() -> Result<(), Box<dyn Error>> {
337    ///     let mut record = ByteRecord::new();
338    ///     let mut rdr = AsyncReaderBuilder::new()
339    ///         .has_headers(false)
340    ///         .create_reader("a,b,c\nx,y,z".as_bytes());
341    ///
342    ///     assert!(rdr.read_byte_record(&mut record).await?);
343    ///     {
344    ///         let pos = record.position().expect("a record position");
345    ///         assert_eq!(pos.byte(), 0);
346    ///         assert_eq!(pos.line(), 1);
347    ///         assert_eq!(pos.record(), 0);
348    ///     }
349    ///
350    ///     assert!(rdr.read_byte_record(&mut record).await?);
351    ///     {
352    ///         let pos = record.position().expect("a record position");
353    ///         assert_eq!(pos.byte(), 6);
354    ///         assert_eq!(pos.line(), 2);
355    ///         assert_eq!(pos.record(), 1);
356    ///     }
357    ///
358    ///     // Finish the CSV reader for good measure.
359    ///     assert!(!rdr.read_byte_record(&mut record).await?);
360    ///     Ok(())
361    /// }
362    /// ```
363    #[inline]
364    pub fn position(&self) -> Option<&Position> {
365        self.0.pos.as_ref()
366    }
367
368    /// Set the position of this record.
369    ///
370    /// # Example
371    ///
372    /// ```
373    /// use csv_async::{ByteRecord, Position};
374    ///
375    /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
376    /// let mut pos = Position::new();
377    /// pos.set_byte(100);
378    /// pos.set_line(4);
379    /// pos.set_record(2);
380    ///
381    /// record.set_position(Some(pos.clone()));
382    /// assert_eq!(record.position(), Some(&pos));
383    /// ```
384    #[inline]
385    pub fn set_position(&mut self, pos: Option<Position>) {
386        self.0.pos = pos;
387    }
388
389    /// Return the start and end position of a field in this record.
390    ///
391    /// If no such field exists at the given index, then return `None`.
392    ///
393    /// The range returned can be used with the slice returned by `as_slice`.
394    ///
395    /// # Example
396    ///
397    /// ```
398    /// use csv_async::ByteRecord;
399    ///
400    /// let record = ByteRecord::from(vec!["foo", "quux", "z"]);
401    /// let range = record.range(1).expect("a record range");
402    /// assert_eq!(&record.as_slice()[range], &b"quux"[..]);
403    /// ```
404    #[inline]
405    pub fn range(&self, i: usize) -> Option<Range<usize>> {
406        self.0.bounds.get(i)
407    }
408
409    /// Return the entire row as a single byte slice. The slice returned stores
410    /// all fields contiguously. The boundaries of each field can be determined
411    /// via the `range` method.
412    ///
413    /// # Example
414    ///
415    /// ```
416    /// use csv_async::ByteRecord;
417    ///
418    /// let record = ByteRecord::from(vec!["foo", "quux", "z"]);
419    /// assert_eq!(record.as_slice(), &b"fooquuxz"[..]);
420    /// ```
421    #[inline]
422    pub fn as_slice(&self) -> &[u8] {
423        &self.0.fields[..self.0.bounds.end()]
424    }
425
426    /// Retrieve the underlying parts of a byte record.
427    #[inline]
428    pub(crate) fn as_parts(&mut self) -> (&mut Vec<u8>, &mut Vec<usize>) {
429        let inner = &mut *self.0;
430        (&mut inner.fields, &mut inner.bounds.ends)
431    }
432
433    /// Set the number of fields in the given record record.
434    #[inline]
435    pub(crate) fn set_len(&mut self, len: usize) {
436        self.0.bounds.len = len;
437    }
438
439    /// Expand the capacity for storing fields.
440    #[inline]
441    pub(crate) fn expand_fields(&mut self) {
442        let new_len = self.0.fields.len().checked_mul(2).unwrap();
443        self.0.fields.resize(cmp::max(4, new_len), 0);
444    }
445
446    /// Expand the capacity for storing field ending positions.
447    #[inline]
448    pub(crate) fn expand_ends(&mut self) {
449        self.0.bounds.expand();
450    }
451
452    /// Validate the given record as UTF-8.
453    ///
454    /// If it's not UTF-8, return an error.
455    #[inline]
456    pub(crate) fn validate(&self) -> result::Result<(), Utf8Error> {
457        // If the entire buffer is ASCII, then we have nothing to fear.
458        if self.0.fields[..self.0.bounds.end()].is_ascii() {
459            return Ok(());
460        }
461        // Otherwise, we must check each field individually to ensure that
462        // it's valid UTF-8.
463        for (i, field) in self.iter().enumerate() {
464            if let Err(err) = std::str::from_utf8(field) {
465                return Err(new_utf8_error(i, err.valid_up_to()));
466            }
467        }
468        Ok(())
469    }
470
471    /// Compare the given byte record with the iterator of fields for equality.
472    pub(crate) fn iter_eq<I, T>(&self, other: I) -> bool
473    where
474        I: IntoIterator<Item = T>,
475        T: AsRef<[u8]>,
476    {
477        let mut it_record = self.iter();
478        let mut it_other = other.into_iter();
479        loop {
480            match (it_record.next(), it_other.next()) {
481                (None, None) => return true,
482                (None, Some(_)) | (Some(_), None) => return false,
483                (Some(x), Some(y)) => {
484                    if x != y.as_ref() {
485                        return false;
486                    }
487                }
488            }
489        }
490    }
491    /// Deserialize this record.
492    ///
493    /// The `D` type parameter refers to the type that this record should be
494    /// deserialized into. The `'de` lifetime refers to the lifetime of the
495    /// `ByteRecord`. The `'de` lifetime permits deserializing into structs
496    /// that borrow field data from this record.
497    ///
498    /// An optional `headers` parameter permits deserializing into a struct
499    /// based on its field names (corresponding to header values) rather than
500    /// the order in which the fields are defined.
501    ///
502    /// # Example: without headers
503    ///
504    /// This shows how to deserialize a single row into a struct based on the
505    /// order in which fields occur. This example also shows how to borrow
506    /// fields from the `ByteRecord`, which results in zero allocation
507    /// deserialization.
508    ///
509    /// ```
510    /// use std::error::Error;
511    ///
512    /// use csv_async::ByteRecord;
513    /// use serde::Deserialize;
514    ///
515    /// #[derive(Deserialize)]
516    /// struct Row<'a> {
517    ///     city: &'a str,
518    ///     country: &'a str,
519    ///     population: u64,
520    /// }
521    ///
522    /// # fn main() { example().unwrap() }
523    /// fn example() -> Result<(), Box<dyn Error>> {
524    ///     let record = ByteRecord::from(vec![
525    ///         "Boston", "United States", "4628910",
526    ///     ]);
527    ///
528    ///     let row: Row = record.deserialize(None)?;
529    ///     assert_eq!(row.city, "Boston");
530    ///     assert_eq!(row.country, "United States");
531    ///     assert_eq!(row.population, 4628910);
532    ///     Ok(())
533    /// }
534    /// ```
535    ///
536    /// # Example: with headers
537    ///
538    /// This example is like the previous one, but shows how to deserialize
539    /// into a struct based on the struct's field names. For this to work,
540    /// you must provide a header row.
541    ///
542    /// This example also shows that you can deserialize into owned data
543    /// types (e.g., `String`) instead of borrowed data types (e.g., `&str`).
544    ///
545    /// ```
546    /// use std::error::Error;
547    ///
548    /// use csv_async::ByteRecord;
549    /// use serde::Deserialize;
550    ///
551    /// #[derive(Deserialize)]
552    /// struct Row {
553    ///     city: String,
554    ///     country: String,
555    ///     population: u64,
556    /// }
557    ///
558    /// # fn main() { example().unwrap() }
559    /// fn example() -> Result<(), Box<dyn Error>> {
560    ///     // Notice that the fields are not in the same order
561    ///     // as the fields in the struct!
562    ///     let header = ByteRecord::from(vec![
563    ///         "country", "city", "population",
564    ///     ]);
565    ///     let record = ByteRecord::from(vec![
566    ///         "United States", "Boston", "4628910",
567    ///     ]);
568    ///
569    ///     let row: Row = record.deserialize(Some(&header))?;
570    ///     assert_eq!(row.city, "Boston");
571    ///     assert_eq!(row.country, "United States");
572    ///     assert_eq!(row.population, 4628910);
573    ///     Ok(())
574    /// }
575    /// ```
576    #[cfg(feature = "with_serde")]
577    pub fn deserialize<'de, D: Deserialize<'de>>(
578        &'de self,
579        headers: Option<&'de ByteRecord>,
580    ) -> Result<D> {
581        deserialize_byte_record(self, headers)
582    }
583}
584
585/// A position in CSV data.
586///
587/// A position is used to report errors in CSV data. All positions include the
588/// byte offset, line number and record index at which the error occurred.
589///
590/// Byte offsets and record indices start at `0`. Line numbers start at `1`.
591///
592/// A CSV reader will automatically assign the position of each record.
593#[derive(Clone, Debug, Eq, PartialEq)]
594pub struct Position {
595    byte: u64,
596    line: u64,
597    record: u64,
598}
599
600impl Position {
601    /// Returns a new position initialized to the start value.
602    #[inline]
603    pub fn new() -> Position {
604        Position::default()
605    }
606
607    /// The byte offset, starting at `0`, of this position.
608    #[inline]
609    pub fn byte(&self) -> u64 {
610        self.byte
611    }
612    /// The line number, starting at `1`, of this position.
613    #[inline]
614    pub fn line(&self) -> u64 {
615        self.line
616    }
617    /// The record index, starting with the first record at `0`.
618    #[inline]
619    pub fn record(&self) -> u64 {
620        self.record
621    }
622
623    /// Set the byte offset of this position.
624    #[inline]
625    pub fn set_byte(&mut self, byte: u64) -> &mut Position {
626        self.byte = byte;
627        self
628    }
629
630    /// Set the line number of this position.
631    ///
632    /// If the line number is less than `1`, then this method panics.
633    #[inline]
634    pub fn set_line(&mut self, line: u64) -> &mut Position {
635        assert!(line > 0);
636        self.line = line;
637        self
638    }
639
640    /// Set the record index of this position.
641    #[inline]
642    pub fn set_record(&mut self, record: u64) -> &mut Position {
643        self.record = record;
644        self
645    }
646}
647
648impl Default for Position {
649    fn default() -> Self {
650        Position { byte: 0, line: 1, record: 0 }
651    }
652}
653
654/// The bounds of fields in a single record.
655#[derive(Clone, Debug, Eq, PartialEq)]
656struct Bounds {
657    /// The ending index of each field.
658    ends: Vec<usize>,
659    /// The number of fields in this record.
660    ///
661    /// Technically, we could drop this field and maintain an invariant that
662    /// `ends.len()` is always the number of fields, but doing that efficiently
663    /// requires attention to safety. We play it safe at essentially no cost.
664    len: usize,
665}
666
667impl Default for Bounds {
668    #[inline]
669    fn default() -> Bounds {
670        Bounds::with_capacity(0)
671    }
672}
673
674impl Bounds {
675    /// Create a new set of bounds with the given capacity for storing the
676    /// ends of fields.
677    #[inline]
678    fn with_capacity(capacity: usize) -> Bounds {
679        Bounds { ends: vec![0; capacity], len: 0 }
680    }
681
682    /// Returns the bounds of field `i`.
683    #[inline]
684    fn get(&self, i: usize) -> Option<Range<usize>> {
685        if i >= self.len {
686            return None;
687        }
688        let end = match self.ends.get(i) {
689            None => return None,
690            Some(&end) => end,
691        };
692        let start = match i.checked_sub(1).and_then(|i| self.ends.get(i)) {
693            None => 0,
694            Some(&start) => start,
695        };
696        Some(ops::Range { start, end })
697    }
698
699    /// Returns a slice of ending positions of all fields.
700    #[inline]
701    fn ends(&self) -> &[usize] {
702        &self.ends[..self.len]
703    }
704
705    /// Return the last position of the last field.
706    ///
707    /// If there are no fields, this returns `0`.
708    #[inline]
709    fn end(&self) -> usize {
710        self.ends().last().copied().unwrap_or(0)
711    }
712
713    /// Returns the number of fields in these bounds.
714    #[inline]
715    fn len(&self) -> usize {
716        self.len
717    }
718
719    /// Expand the capacity for storing field ending positions.
720    #[inline]
721    fn expand(&mut self) {
722        let new_len = self.ends.len().checked_mul(2).unwrap();
723        self.ends.resize(cmp::max(4, new_len), 0);
724    }
725
726    /// Add a new field with the given ending position.
727    #[inline]
728    fn add(&mut self, pos: usize) {
729        if self.len >= self.ends.len() {
730            self.expand();
731        }
732        self.ends[self.len] = pos;
733        self.len += 1;
734    }
735}
736
737impl ops::Index<usize> for ByteRecord {
738    type Output = [u8];
739    #[inline]
740    fn index(&self, i: usize) -> &[u8] {
741        self.get(i).unwrap()
742    }
743}
744
745impl From<StringRecord> for ByteRecord {
746    #[inline]
747    fn from(record: StringRecord) -> ByteRecord {
748        record.into_byte_record()
749    }
750}
751
752impl<T: AsRef<[u8]>> From<Vec<T>> for ByteRecord {
753    #[inline]
754    fn from(xs: Vec<T>) -> ByteRecord {
755        ByteRecord::from_iter(&xs)
756    }
757}
758
759impl<'a, T: AsRef<[u8]>> From<&'a [T]> for ByteRecord {
760    #[inline]
761    fn from(xs: &'a [T]) -> ByteRecord {
762        ByteRecord::from_iter(xs)
763    }
764}
765
766impl<T: AsRef<[u8]>> FromIterator<T> for ByteRecord {
767    #[inline]
768    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> ByteRecord {
769        let mut record = ByteRecord::new();
770        record.extend(iter);
771        record
772    }
773}
774
775impl<T: AsRef<[u8]>> Extend<T> for ByteRecord {
776    #[inline]
777    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
778        for x in iter {
779            self.push_field(x.as_ref());
780        }
781    }
782}
783
784/// A double-ended iterator over the fields in a byte record.
785///
786/// The `'r` lifetime variable refers to the lifetime of the `ByteRecord` that
787/// is being iterated over.
788#[derive(Clone)]
789pub struct ByteRecordIter<'r> {
790    /// The record we are iterating over.
791    r: &'r ByteRecord,
792    /// The starting index of the previous field. (For reverse iteration.)
793    last_start: usize,
794    /// The ending index of the previous field. (For forward iteration.)
795    last_end: usize,
796    /// The index of forward iteration.
797    i_forward: usize,
798    /// The index of reverse iteration.
799    i_reverse: usize,
800}
801
802impl<'r> IntoIterator for &'r ByteRecord {
803    type IntoIter = ByteRecordIter<'r>;
804    type Item = &'r [u8];
805
806    #[inline]
807    fn into_iter(self) -> ByteRecordIter<'r> {
808        ByteRecordIter {
809            r: self,
810            last_start: self.as_slice().len(),
811            last_end: 0,
812            i_forward: 0,
813            i_reverse: self.len(),
814        }
815    }
816}
817
818impl<'r> ExactSizeIterator for ByteRecordIter<'r> {}
819
820impl<'r> Iterator for ByteRecordIter<'r> {
821    type Item = &'r [u8];
822
823    #[inline]
824    fn next(&mut self) -> Option<&'r [u8]> {
825        if self.i_forward == self.i_reverse {
826            None
827        } else {
828            let start = self.last_end;
829            let end = self.r.0.bounds.ends()[self.i_forward];
830            self.i_forward += 1;
831            self.last_end = end;
832            Some(&self.r.0.fields[start..end])
833        }
834    }
835
836    #[inline]
837    fn size_hint(&self) -> (usize, Option<usize>) {
838        let x = self.i_reverse - self.i_forward;
839        (x, Some(x))
840    }
841
842    #[inline]
843    fn count(self) -> usize {
844        self.len()
845    }
846}
847
848impl<'r> DoubleEndedIterator for ByteRecordIter<'r> {
849    #[inline]
850    fn next_back(&mut self) -> Option<&'r [u8]> {
851        if self.i_forward == self.i_reverse {
852            None
853        } else {
854            self.i_reverse -= 1;
855            let start = self
856                .i_reverse
857                .checked_sub(1)
858                .map(|i| self.r.0.bounds.ends()[i])
859                .unwrap_or(0);
860            let end = self.last_start;
861            self.last_start = start;
862            Some(&self.r.0.fields[start..end])
863        }
864    }
865}
866
867fn trim_ascii(bytes: &[u8]) -> &[u8] {
868    trim_ascii_start(trim_ascii_end(bytes))
869}
870
871fn trim_ascii_start(mut bytes: &[u8]) -> &[u8] {
872    while let [first, rest @ ..] = bytes {
873        if first.is_ascii_whitespace() {
874            bytes = rest;
875        } else {
876            break;
877        }
878    }
879    bytes
880}
881
882fn trim_ascii_end(mut bytes: &[u8]) -> &[u8] {
883    while let [rest @ .., last] = bytes {
884        if last.is_ascii_whitespace() {
885            bytes = rest;
886        } else {
887            break;
888        }
889    }
890    bytes
891}
892
893#[cfg(test)]
894mod tests {
895    use crate::string_record::StringRecord;
896
897    use super::ByteRecord;
898
899    fn b(s: &str) -> &[u8] {
900        s.as_bytes()
901    }
902
903    #[test]
904    fn record_1() {
905        let mut rec = ByteRecord::new();
906        rec.push_field(b"foo");
907
908        assert_eq!(rec.len(), 1);
909        assert_eq!(rec.get(0), Some(b("foo")));
910        assert_eq!(rec.get(1), None);
911        assert_eq!(rec.get(2), None);
912    }
913
914    #[test]
915    fn record_2() {
916        let mut rec = ByteRecord::new();
917        rec.push_field(b"foo");
918        rec.push_field(b"quux");
919
920        assert_eq!(rec.len(), 2);
921        assert_eq!(rec.get(0), Some(b("foo")));
922        assert_eq!(rec.get(1), Some(b("quux")));
923        assert_eq!(rec.get(2), None);
924        assert_eq!(rec.get(3), None);
925    }
926
927    #[test]
928    fn empty_record() {
929        let rec = ByteRecord::new();
930
931        assert_eq!(rec.len(), 0);
932        assert_eq!(rec.get(0), None);
933        assert_eq!(rec.get(1), None);
934    }
935
936    #[test]
937    fn trim_whitespace_only() {
938        let mut rec = ByteRecord::from(vec![b" \t\n\r\x0c"]);
939        rec.trim();
940        assert_eq!(rec.get(0), Some(b("")));
941    }
942
943    #[test]
944    fn trim_front() {
945        let mut rec = ByteRecord::from(vec![b" abc"]);
946        rec.trim();
947        assert_eq!(rec.get(0), Some(b("abc")));
948
949        let mut rec = ByteRecord::from(vec![b(" abc"), b("  xyz")]);
950        rec.trim();
951        assert_eq!(rec.get(0), Some(b("abc")));
952        assert_eq!(rec.get(1), Some(b("xyz")));
953    }
954
955    #[test]
956    fn trim_back() {
957        let mut rec = ByteRecord::from(vec![b"abc "]);
958        rec.trim();
959        assert_eq!(rec.get(0), Some(b("abc")));
960
961        let mut rec = ByteRecord::from(vec![b("abc "), b("xyz  ")]);
962        rec.trim();
963        assert_eq!(rec.get(0), Some(b("abc")));
964        assert_eq!(rec.get(1), Some(b("xyz")));
965    }
966
967    #[test]
968    fn trim_both() {
969        let mut rec = ByteRecord::from(vec![b" abc "]);
970        rec.trim();
971        assert_eq!(rec.get(0), Some(b("abc")));
972
973        let mut rec = ByteRecord::from(vec![b(" abc "), b("  xyz  ")]);
974        rec.trim();
975        assert_eq!(rec.get(0), Some(b("abc")));
976        assert_eq!(rec.get(1), Some(b("xyz")));
977    }
978
979    #[test]
980    fn trim_does_not_panic_on_empty_records_1() {
981        let mut rec = ByteRecord::from(vec![b""]);
982        rec.trim();
983        assert_eq!(rec.get(0), Some(b("")));
984    }
985
986    #[test]
987    fn trim_does_not_panic_on_empty_records_2() {
988        let mut rec = ByteRecord::from(vec![b"", b""]);
989        rec.trim();
990        assert_eq!(rec.get(0), Some(b("")));
991        assert_eq!(rec.get(1), Some(b("")));
992    }
993
994    #[test]
995    fn trim_does_not_panic_on_empty_records_3() {
996        let mut rec = ByteRecord::new();
997        rec.trim();
998        assert_eq!(rec.as_slice().len(), 0);
999    }
1000
1001    #[test]
1002    fn empty_field_1() {
1003        let mut rec = ByteRecord::new();
1004        rec.push_field(b"");
1005
1006        assert_eq!(rec.len(), 1);
1007        assert_eq!(rec.get(0), Some(b("")));
1008        assert_eq!(rec.get(1), None);
1009        assert_eq!(rec.get(2), None);
1010    }
1011
1012    #[test]
1013    fn empty_field_2() {
1014        let mut rec = ByteRecord::new();
1015        rec.push_field(b"");
1016        rec.push_field(b"");
1017
1018        assert_eq!(rec.len(), 2);
1019        assert_eq!(rec.get(0), Some(b("")));
1020        assert_eq!(rec.get(1), Some(b("")));
1021        assert_eq!(rec.get(2), None);
1022        assert_eq!(rec.get(3), None);
1023    }
1024
1025    #[test]
1026    fn empty_surround_1() {
1027        let mut rec = ByteRecord::new();
1028        rec.push_field(b"foo");
1029        rec.push_field(b"");
1030        rec.push_field(b"quux");
1031
1032        assert_eq!(rec.len(), 3);
1033        assert_eq!(rec.get(0), Some(b("foo")));
1034        assert_eq!(rec.get(1), Some(b("")));
1035        assert_eq!(rec.get(2), Some(b("quux")));
1036        assert_eq!(rec.get(3), None);
1037        assert_eq!(rec.get(4), None);
1038    }
1039
1040    #[test]
1041    fn empty_surround_2() {
1042        let mut rec = ByteRecord::new();
1043        rec.push_field(b"foo");
1044        rec.push_field(b"");
1045        rec.push_field(b"quux");
1046        rec.push_field(b"");
1047
1048        assert_eq!(rec.len(), 4);
1049        assert_eq!(rec.get(0), Some(b("foo")));
1050        assert_eq!(rec.get(1), Some(b("")));
1051        assert_eq!(rec.get(2), Some(b("quux")));
1052        assert_eq!(rec.get(3), Some(b("")));
1053        assert_eq!(rec.get(4), None);
1054        assert_eq!(rec.get(5), None);
1055    }
1056
1057    #[test]
1058    fn utf8_error_1() {
1059        let mut rec = ByteRecord::new();
1060        rec.push_field(b"foo");
1061        rec.push_field(b"b\xFFar");
1062
1063        let err = StringRecord::from_byte_record(rec).unwrap_err();
1064        assert_eq!(err.utf8_error().field(), 1);
1065        assert_eq!(err.utf8_error().valid_up_to(), 1);
1066    }
1067
1068    #[test]
1069    fn utf8_error_2() {
1070        let mut rec = ByteRecord::new();
1071        rec.push_field(b"\xFF");
1072
1073        let err = StringRecord::from_byte_record(rec).unwrap_err();
1074        assert_eq!(err.utf8_error().field(), 0);
1075        assert_eq!(err.utf8_error().valid_up_to(), 0);
1076    }
1077
1078    #[test]
1079    fn utf8_error_3() {
1080        let mut rec = ByteRecord::new();
1081        rec.push_field(b"a\xFF");
1082
1083        let err = StringRecord::from_byte_record(rec).unwrap_err();
1084        assert_eq!(err.utf8_error().field(), 0);
1085        assert_eq!(err.utf8_error().valid_up_to(), 1);
1086    }
1087
1088    #[test]
1089    fn utf8_error_4() {
1090        let mut rec = ByteRecord::new();
1091        rec.push_field(b"a");
1092        rec.push_field(b"b");
1093        rec.push_field(b"c");
1094        rec.push_field(b"d");
1095        rec.push_field(b"xyz\xFF");
1096
1097        let err = StringRecord::from_byte_record(rec).unwrap_err();
1098        assert_eq!(err.utf8_error().field(), 4);
1099        assert_eq!(err.utf8_error().valid_up_to(), 3);
1100    }
1101
1102    #[test]
1103    fn utf8_error_5() {
1104        let mut rec = ByteRecord::new();
1105        rec.push_field(b"a");
1106        rec.push_field(b"b");
1107        rec.push_field(b"c");
1108        rec.push_field(b"d");
1109        rec.push_field(b"\xFFxyz");
1110
1111        let err = StringRecord::from_byte_record(rec).unwrap_err();
1112        assert_eq!(err.utf8_error().field(), 4);
1113        assert_eq!(err.utf8_error().valid_up_to(), 0);
1114    }
1115
1116    // This tests a tricky case where a single field on its own isn't valid
1117    // UTF-8, but the concatenation of all fields is.
1118    #[test]
1119    fn utf8_error_6() {
1120        let mut rec = ByteRecord::new();
1121        rec.push_field(b"a\xc9");
1122        rec.push_field(b"\x91b");
1123
1124        let err = StringRecord::from_byte_record(rec).unwrap_err();
1125        assert_eq!(err.utf8_error().field(), 0);
1126        assert_eq!(err.utf8_error().valid_up_to(), 1);
1127    }
1128
1129    // This tests that we can always clear a `ByteRecord` and get a guaranteed
1130    // successful conversion to UTF-8. This permits reusing the allocation.
1131    #[test]
1132    fn utf8_clear_ok() {
1133        let mut rec = ByteRecord::new();
1134        rec.push_field(b"\xFF");
1135        assert!(StringRecord::from_byte_record(rec).is_err());
1136
1137        let mut rec = ByteRecord::new();
1138        rec.push_field(b"\xFF");
1139        rec.clear();
1140        assert!(StringRecord::from_byte_record(rec).is_ok());
1141    }
1142
1143    #[test]
1144    fn iter() {
1145        let data = vec!["foo", "bar", "baz", "quux", "wat"];
1146        let rec = ByteRecord::from(&*data);
1147        let got: Vec<&str> =
1148            rec.iter().map(|x| ::std::str::from_utf8(x).unwrap()).collect();
1149        assert_eq!(data, got);
1150    }
1151
1152    #[test]
1153    fn iter_reverse() {
1154        let mut data = vec!["foo", "bar", "baz", "quux", "wat"];
1155        let rec = ByteRecord::from(&*data);
1156        let got: Vec<&str> = rec
1157            .iter()
1158            .rev()
1159            .map(|x| ::std::str::from_utf8(x).unwrap())
1160            .collect();
1161        data.reverse();
1162        assert_eq!(data, got);
1163    }
1164
1165    #[test]
1166    fn iter_forward_and_reverse() {
1167        let data = vec!["foo", "bar", "baz", "quux", "wat"];
1168        let rec = ByteRecord::from(data);
1169        let mut it = rec.iter();
1170
1171        assert_eq!(it.next_back(), Some(b("wat")));
1172        assert_eq!(it.next(), Some(b("foo")));
1173        assert_eq!(it.next(), Some(b("bar")));
1174        assert_eq!(it.next_back(), Some(b("quux")));
1175        assert_eq!(it.next(), Some(b("baz")));
1176        assert_eq!(it.next_back(), None);
1177        assert_eq!(it.next(), None);
1178    }
1179
1180    // Check that record equality respects field boundaries.
1181    //
1182    // Regression test for #138.
1183    #[test]
1184    fn eq_field_boundaries() {
1185        let test1 = ByteRecord::from(vec!["12", "34"]);
1186        let test2 = ByteRecord::from(vec!["123", "4"]);
1187
1188        assert_ne!(test1, test2);
1189    }
1190
1191    // Check that record equality respects number of fields.
1192    //
1193    // Regression test for #138.
1194    #[test]
1195    fn eq_record_len() {
1196        let test1 = ByteRecord::from(vec!["12", "34", "56"]);
1197        let test2 = ByteRecord::from(vec!["12", "34"]);
1198        assert_ne!(test1, test2);
1199    }
1200}