csv_async/
string_record.rs

Help
1use std::fmt;
2use std::iter::FromIterator;
3use std::ops::{self, Range};
4use std::result;
5use std::str;
6
7#[cfg(not(feature = "tokio"))]
8use futures::io;
9
10#[cfg(feature = "tokio")]
11use tokio::io;
12
13#[cfg(feature = "with_serde")]
14use serde::de::Deserialize;
15
16use crate::async_readers::AsyncReaderImpl;
17use crate::byte_record::{ByteRecord, ByteRecordIter, Position};
18#[cfg(feature = "with_serde")]
19use crate::deserializer::deserialize_string_record;
20use crate::error::{Error, ErrorKind, FromUtf8Error, Result};
21
22/// A single CSV record stored as valid UTF-8 bytes.
23///
24/// A string record permits reading or writing CSV rows that are valid UTF-8.
25/// If string records are used to read CSV data that is not valid UTF-8, then
26/// the CSV reader will return an invalid UTF-8 error. If you do need to read
27/// possibly invalid UTF-8 data, then you should prefer using a
28/// [`ByteRecord`](struct.ByteRecord.html),
29/// since it makes no assumptions about UTF-8.
30///
31/// If you are using the Serde (de)serialization APIs, then you probably never
32/// need to interact with a `ByteRecord` or a `StringRecord`. However, there
33/// are some circumstances in which you might need to use a raw record type
34/// while still using Serde. For example, if you need to deserialize possibly
35/// invalid UTF-8 fields, then you'll need to first read your record into a
36/// `ByteRecord`, and then use `ByteRecord::deserialize` to run Serde. Another
37/// reason for using the raw record deserialization APIs is if you're using
38/// Serde to read into borrowed data such as a `&'a str` or a `&'a [u8]`.
39///
40/// Two `StringRecord`s are compared on the basis of their field data. Any
41/// position information associated with the records is ignored.
42#[derive(Clone, Eq)]
43pub struct StringRecord(ByteRecord);
44
45impl PartialEq for StringRecord {
46    fn eq(&self, other: &StringRecord) -> bool {
47        self.0.iter_eq(&other.0)
48    }
49}
50
51impl<T: AsRef<[u8]>> PartialEq<Vec<T>> for StringRecord {
52    fn eq(&self, other: &Vec<T>) -> bool {
53        self.0.iter_eq(other)
54    }
55}
56
57impl<'a, T: AsRef<[u8]>> PartialEq<Vec<T>> for &'a StringRecord {
58    fn eq(&self, other: &Vec<T>) -> bool {
59        self.0.iter_eq(other)
60    }
61}
62
63impl<T: AsRef<[u8]>> PartialEq<[T]> for StringRecord {
64    fn eq(&self, other: &[T]) -> bool {
65        self.0.iter_eq(other)
66    }
67}
68
69impl<'a, T: AsRef<[u8]>> PartialEq<[T]> for &'a StringRecord {
70    fn eq(&self, other: &[T]) -> bool {
71        self.0.iter_eq(other)
72    }
73}
74
75impl fmt::Debug for StringRecord {
76    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
77        let fields: Vec<&str> = self.iter().collect();
78        write!(f, "StringRecord({:?})", fields)
79    }
80}
81
82impl Default for StringRecord {
83    #[inline]
84    fn default() -> StringRecord {
85        StringRecord::new()
86    }
87}
88
89impl StringRecord {
90    /// Create a new empty `StringRecord`.
91    ///
92    /// Note that you may find the `StringRecord::from` constructor more
93    /// convenient, which is provided by an impl on the `From` trait.
94    ///
95    /// # Example: create an empty record
96    ///
97    /// ```
98    /// use csv_async::StringRecord;
99    ///
100    /// let record = StringRecord::new();
101    /// assert_eq!(record.len(), 0);
102    /// ```
103    ///
104    /// # Example: initialize a record from a `Vec`
105    ///
106    /// ```
107    /// use csv_async::StringRecord;
108    ///
109    /// let record = StringRecord::from(vec!["a", "b", "c"]);
110    /// assert_eq!(record.len(), 3);
111    /// ```
112    #[inline]
113    pub fn new() -> StringRecord {
114        StringRecord(ByteRecord::new())
115    }
116
117    /// Create a new empty `StringRecord` with the given capacity.
118    ///
119    /// `buffer` refers to the capacity of the buffer used to store the
120    /// actual row contents. `fields` refers to the number of fields one
121    /// might expect to store.
122    #[inline]
123    pub fn with_capacity(buffer: usize, fields: usize) -> StringRecord {
124        StringRecord(ByteRecord::with_capacity(buffer, fields))
125    }
126
127    /// Create a new `StringRecord` from a `ByteRecord`.
128    ///
129    /// Note that this does UTF-8 validation. If the given `ByteRecord` does
130    /// not contain valid UTF-8, then this returns an error. The error includes
131    /// the UTF-8 error and the original `ByteRecord`.
132    ///
133    /// # Example: valid UTF-8
134    ///
135    /// ```
136    /// use std::error::Error;
137    /// use csv_async::{ByteRecord, StringRecord};
138    ///
139    /// # fn main() { example().unwrap(); }
140    /// fn example() -> Result<(), Box<dyn Error>> {
141    ///     let byte_record = ByteRecord::from(vec!["a", "b", "c"]);
142    ///     let str_record = StringRecord::from_byte_record(byte_record)?;
143    ///     assert_eq!(str_record.len(), 3);
144    ///     Ok(())
145    /// }
146    /// ```
147    ///
148    /// # Example: invalid UTF-8
149    ///
150    /// ```
151    /// use csv_async::{ByteRecord, StringRecord};
152    ///
153    /// let byte_record = ByteRecord::from(vec![
154    ///     &b"quux"[..], &b"foo\xFFbar"[..], &b"c"[..],
155    /// ]);
156    /// let err = StringRecord::from_byte_record(byte_record).unwrap_err();
157    /// assert_eq!(err.utf8_error().field(), 1);
158    /// assert_eq!(err.utf8_error().valid_up_to(), 3);
159    /// ```
160    #[inline]
161    pub fn from_byte_record(
162        record: ByteRecord,
163    ) -> result::Result<StringRecord, FromUtf8Error> {
164        match record.validate() {
165            Ok(()) => Ok(StringRecord(record)),
166            Err(err) => Err(FromUtf8Error::new(record, err)),
167        }
168    }
169
170    /// Lossily create a new `StringRecord` from a `ByteRecord`.
171    ///
172    /// This is like `StringRecord::from_byte_record`, except all invalid UTF-8
173    /// sequences are replaced with the `U+FFFD REPLACEMENT CHARACTER`, which
174    /// looks like this: �.
175    ///
176    /// # Example: valid UTF-8
177    ///
178    /// ```
179    /// use csv_async::{ByteRecord, StringRecord};
180    ///
181    /// let byte_record = ByteRecord::from(vec!["a", "b", "c"]);
182    /// let str_record = StringRecord::from_byte_record_lossy(byte_record);
183    /// assert_eq!(str_record.len(), 3);
184    /// ```
185    ///
186    /// # Example: invalid UTF-8
187    ///
188    /// ```
189    /// use csv_async::{ByteRecord, StringRecord};
190    ///
191    /// let byte_record = ByteRecord::from(vec![
192    ///     &b"quux"[..], &b"foo\xFFbar"[..], &b"c"[..],
193    /// ]);
194    /// let str_record = StringRecord::from_byte_record_lossy(byte_record);
195    /// assert_eq!(&str_record[0], "quux");
196    /// assert_eq!(&str_record[1], "foo�bar");
197    /// assert_eq!(&str_record[2], "c");
198    /// ```
199    #[inline]
200    pub fn from_byte_record_lossy(record: ByteRecord) -> StringRecord {
201        // If the record is valid UTF-8, then take the easy path.
202        if let Ok(()) = record.validate() {
203            return StringRecord(record);
204        }
205        // TODO: We can be faster here. Not sure if it's worth it.
206        let mut str_record =
207            StringRecord::with_capacity(record.as_slice().len(), record.len());
208        for field in &record {
209            str_record.push_field(&String::from_utf8_lossy(field));
210        }
211        str_record
212    }
213
214    /// Returns an iterator over all fields in this record.
215    ///
216    /// # Example
217    ///
218    /// This example shows how to iterate over each field in a `StringRecord`.
219    ///
220    /// ```
221    /// use csv_async::StringRecord;
222    ///
223    /// let record = StringRecord::from(vec!["a", "b", "c"]);
224    /// for field in record.iter() {
225    ///     assert!(field == "a" || field == "b" || field == "c");
226    /// }
227    /// ```
228    #[inline]
229    pub fn iter(&self) -> StringRecordIter {
230        self.into_iter()
231    }
232
233    /// Return the field at zero-based index `i`.
234    ///
235    /// If no field at index `i` exists, then this returns `None`.
236    ///
237    /// # Example
238    ///
239    /// ```
240    /// use csv_async::StringRecord;
241    ///
242    /// let record = StringRecord::from(vec!["a", "b", "c"]);
243    /// assert_eq!(record.get(1), Some("b"));
244    /// assert_eq!(record.get(3), None);
245    /// ```
246    #[inline]
247    pub fn get(&self, i: usize) -> Option<&str> {
248        self.0.get(i).map(|bytes| {
249            debug_assert!(str::from_utf8(bytes).is_ok());
250            // This is safe because we guarantee that all string records
251            // have a valid UTF-8 buffer. It's also safe because we
252            // individually check each field for valid UTF-8.
253            unsafe { str::from_utf8_unchecked(bytes) }
254        })
255    }
256
257    /// Returns true if and only if this record is empty.
258    ///
259    /// # Example
260    ///
261    /// ```
262    /// use csv_async::StringRecord;
263    ///
264    /// assert!(StringRecord::new().is_empty());
265    /// ```
266    #[inline]
267    pub fn is_empty(&self) -> bool {
268        self.len() == 0
269    }
270
271    /// Returns the number of fields in this record.
272    ///
273    /// # Example
274    ///
275    /// ```
276    /// use csv_async::StringRecord;
277    ///
278    /// let record = StringRecord::from(vec!["a", "b", "c"]);
279    /// assert_eq!(record.len(), 3);
280    /// ```
281    #[inline]
282    pub fn len(&self) -> usize {
283        self.0.len()
284    }
285
286    /// Truncate this record to `n` fields.
287    ///
288    /// If `n` is greater than the number of fields in this record, then this
289    /// has no effect.
290    ///
291    /// # Example
292    ///
293    /// ```
294    /// use csv_async::StringRecord;
295    ///
296    /// let mut record = StringRecord::from(vec!["a", "b", "c"]);
297    /// assert_eq!(record.len(), 3);
298    /// record.truncate(1);
299    /// assert_eq!(record.len(), 1);
300    /// assert_eq!(record, vec!["a"]);
301    /// ```
302    #[inline]
303    pub fn truncate(&mut self, n: usize) {
304        self.0.truncate(n);
305    }
306
307    /// Clear this record so that it has zero fields.
308    ///
309    /// Note that it is not necessary to clear the record to reuse it with
310    /// the CSV reader.
311    ///
312    /// # Example
313    ///
314    /// ```
315    /// use csv_async::StringRecord;
316    ///
317    /// let mut record = StringRecord::from(vec!["a", "b", "c"]);
318    /// assert_eq!(record.len(), 3);
319    /// record.clear();
320    /// assert_eq!(record.len(), 0);
321    /// ```
322    #[inline]
323    pub fn clear(&mut self) {
324        self.0.clear();
325    }
326
327    /// Trim the fields of this record so that leading and trailing whitespace
328    /// is removed.
329    ///
330    /// This method uses the Unicode definition of whitespace.
331    ///
332    /// # Example
333    ///
334    /// ```
335    /// use csv_async::StringRecord;
336    ///
337    /// let mut record = StringRecord::from(vec![
338    ///     "  ", "\u{3000}\tfoo ", "bar  ", "b a z",
339    /// ]);
340    /// record.trim();
341    /// assert_eq!(record, vec!["", "foo", "bar", "b a z"]);
342    /// ```
343    pub fn trim(&mut self) {
344        let length = self.len();
345        if length == 0 {
346            return;
347        }
348        // TODO: We could likely do this in place, but for now, we allocate.
349        let mut trimmed =
350            StringRecord::with_capacity(self.as_slice().len(), self.len());
351        trimmed.set_position(self.position().cloned());
352        for field in &*self {
353            trimmed.push_field(field.trim());
354        }
355        *self = trimmed;
356    }
357
358    /// Add a new field to this record.
359    ///
360    /// # Example
361    ///
362    /// ```
363    /// use csv_async::StringRecord;
364    ///
365    /// let mut record = StringRecord::new();
366    /// record.push_field("foo");
367    /// assert_eq!(&record[0], "foo");
368    /// ```
369    #[inline]
370    pub fn push_field(&mut self, field: &str) {
371        self.0.push_field(field.as_bytes());
372    }
373
374    /// Return the position of this record, if available.
375    ///
376    /// # Example
377    ///
378    /// ```
379    /// use std::error::Error;
380    /// use futures::stream::{self, StreamExt};
381    /// use csv_async::{StringRecord, AsyncReaderBuilder};
382    ///
383    /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); }
384    /// async fn example() -> Result<(), Box<dyn Error>> {
385    ///     let mut record = StringRecord::new();
386    ///     let mut rdr = AsyncReaderBuilder::new()
387    ///         .has_headers(false)
388    ///         .create_reader("a,b,c\nx,y,z".as_bytes()
389    ///         );
390    ///     assert!(rdr.read_record(&mut record).await?);
391    ///     {
392    ///         let pos = record.position().expect("a record position");
393    ///         assert_eq!(pos.byte(), 0);
394    ///         assert_eq!(pos.line(), 1);
395    ///         assert_eq!(pos.record(), 0);
396    ///     }
397    ///
398    ///     assert!(rdr.read_record(&mut record).await?);
399    ///     {
400    ///         let pos = record.position().expect("a record position");
401    ///         assert_eq!(pos.byte(), 6);
402    ///         assert_eq!(pos.line(), 2);
403    ///         assert_eq!(pos.record(), 1);
404    ///     }
405    ///
406    ///     // Finish the CSV reader for good measure.
407    ///     assert!(!rdr.read_record(&mut record).await?);
408    ///     Ok(())
409    /// }
410    /// ```
411    #[inline]
412    pub fn position(&self) -> Option<&Position> {
413        self.0.position()
414    }
415
416    /// Set the position of this record.
417    ///
418    /// # Example
419    ///
420    /// ```
421    /// use csv_async::{StringRecord, Position};
422    ///
423    /// let mut record = StringRecord::from(vec!["a", "b", "c"]);
424    /// let mut pos = Position::new();
425    /// pos.set_byte(100);
426    /// pos.set_line(4);
427    /// pos.set_record(2);
428    ///
429    /// record.set_position(Some(pos.clone()));
430    /// assert_eq!(record.position(), Some(&pos));
431    /// ```
432    #[inline]
433    pub fn set_position(&mut self, pos: Option<Position>) {
434        self.0.set_position(pos);
435    }
436
437    /// Return the start and end position of a field in this record.
438    ///
439    /// If no such field exists at the given index, then return `None`.
440    ///
441    /// The range returned can be used with the slice returned by `as_slice`.
442    /// Namely, the range returned is guaranteed to start and end at valid
443    /// UTF-8 sequence boundaries.
444    ///
445    /// # Example
446    ///
447    /// ```
448    /// use csv_async::StringRecord;
449    ///
450    /// let record = StringRecord::from(vec!["foo", "quux", "z"]);
451    /// let range = record.range(1).expect("a record range");
452    /// assert_eq!(&record.as_slice()[range], "quux");
453    /// ```
454    #[inline]
455    pub fn range(&self, i: usize) -> Option<Range<usize>> {
456        self.0.range(i)
457    }
458
459    /// Return the entire row as a single string slice. The slice returned
460    /// stores all fields contiguously. The boundaries of each field can be
461    /// determined via the `range` method.
462    ///
463    /// # Example
464    ///
465    /// ```
466    /// use csv_async::StringRecord;
467    ///
468    /// let record = StringRecord::from(vec!["foo", "quux", "z"]);
469    /// assert_eq!(record.as_slice(), "fooquuxz");
470    /// ```
471    #[inline]
472    pub fn as_slice(&self) -> &str {
473        debug_assert!(str::from_utf8(self.0.as_slice()).is_ok());
474        // This is safe because we guarantee that each field is valid UTF-8.
475        // If each field is valid UTF-8, then the entire buffer (up to the end
476        // of the last field) must also be valid UTF-8.
477        unsafe { str::from_utf8_unchecked(self.0.as_slice()) }
478    }
479
480    /// Return a reference to this record's raw
481    /// [`ByteRecord`](struct.ByteRecord.html).
482    ///
483    /// # Example
484    ///
485    /// ```
486    /// use csv_async::StringRecord;
487    ///
488    /// let str_record = StringRecord::from(vec!["a", "b", "c"]);
489    /// let byte_record = str_record.as_byte_record();
490    /// assert_eq!(&byte_record[2], b"c");
491    /// ```
492    #[inline]
493    pub fn as_byte_record(&self) -> &ByteRecord {
494        &self.0
495    }
496
497    /// Convert this `StringRecord` into a
498    /// [`ByteRecord`](struct.ByteRecord.html).
499    ///
500    /// # Example
501    ///
502    /// ```
503    /// use csv_async::StringRecord;
504    ///
505    /// let str_record = StringRecord::from(vec!["a", "b", "c"]);
506    /// let byte_record = str_record.into_byte_record();
507    /// assert_eq!(&byte_record[2], b"c");
508    /// ```
509    ///
510    /// Note that this can also be achieved using the `From` impl:
511    ///
512    /// ```
513    /// use csv_async::{ByteRecord, StringRecord};
514    ///
515    /// // Using ByteRecord::from...
516    /// let str_record = StringRecord::from(vec!["a", "b", "c"]);
517    /// assert_eq!(ByteRecord::from(str_record).len(), 3);
518    ///
519    /// // Using StringRecord::into...
520    /// let str_record = StringRecord::from(vec!["a", "b", "c"]);
521    /// let byte_record: ByteRecord = str_record.into();
522    /// assert_eq!(byte_record.len(), 3);
523    /// ```
524    #[inline]
525    pub fn into_byte_record(self) -> ByteRecord {
526        self.0
527    }
528
529    /// Deserialize this record.
530    ///
531    /// The `D` type parameter refers to the type that this record should be
532    /// deserialized into. The `'de` lifetime refers to the lifetime of the
533    /// `StringRecord`. The `'de` lifetime permits deserializing into structs
534    /// that borrow field data from this record.
535    ///
536    /// An optional `headers` parameter permits deserializing into a struct
537    /// based on its field names (corresponding to header values) rather than
538    /// the order in which the fields are defined.
539    ///
540    /// # Example: without headers
541    ///
542    /// This shows how to deserialize a single row into a struct based on the
543    /// order in which fields occur. This example also shows how to borrow
544    /// fields from the `StringRecord`, which results in zero allocation
545    /// deserialization.
546    ///
547    /// ```
548    /// use std::error::Error;
549    ///
550    /// use csv_async::StringRecord;
551    /// use serde::Deserialize;
552    ///
553    /// #[derive(Deserialize)]
554    /// struct Row<'a> {
555    ///     city: &'a str,
556    ///     country: &'a str,
557    ///     population: u64,
558    /// }
559    ///
560    /// # fn main() { example().unwrap() }
561    /// fn example() -> Result<(), Box<dyn Error>> {
562    ///     let record = StringRecord::from(vec![
563    ///         "Boston", "United States", "4628910",
564    ///     ]);
565    ///
566    ///     let row: Row = record.deserialize(None)?;
567    ///     assert_eq!(row.city, "Boston");
568    ///     assert_eq!(row.country, "United States");
569    ///     assert_eq!(row.population, 4628910);
570    ///     Ok(())
571    /// }
572    /// ```
573    ///
574    /// # Example: with headers
575    ///
576    /// This example is like the previous one, but shows how to deserialize
577    /// into a struct based on the struct's field names. For this to work,
578    /// you must provide a header row.
579    ///
580    /// This example also shows that you can deserialize into owned data
581    /// types (e.g., `String`) instead of borrowed data types (e.g., `&str`).
582    ///
583    /// ```
584    /// use std::error::Error;
585    ///
586    /// use csv_async::StringRecord;
587    /// use serde::Deserialize;
588    ///
589    /// #[derive(Deserialize)]
590    /// struct Row {
591    ///     city: String,
592    ///     country: String,
593    ///     population: u64,
594    /// }
595    ///
596    /// # fn main() { example().unwrap() }
597    /// fn example() -> Result<(), Box<dyn Error>> {
598    ///     // Notice that the fields are not in the same order
599    ///     // as the fields in the struct!
600    ///     let header = StringRecord::from(vec![
601    ///         "country", "city", "population",
602    ///     ]);
603    ///     let record = StringRecord::from(vec![
604    ///         "United States", "Boston", "4628910",
605    ///     ]);
606    ///
607    ///     let row: Row = record.deserialize(Some(&header))?;
608    ///     assert_eq!(row.city, "Boston");
609    ///     assert_eq!(row.country, "United States");
610    ///     assert_eq!(row.population, 4628910);
611    ///     Ok(())
612    /// }
613    /// ```
614    #[cfg(feature = "with_serde")]
615    pub fn deserialize<'de, D: Deserialize<'de>>(
616        &'de self,
617        headers: Option<&'de StringRecord>,
618    ) -> Result<D> {
619        deserialize_string_record(self, headers)
620    }
621    
622    /// A safe function for reading CSV data into a `StringRecord`.
623    ///
624    /// This relies on the internal representation of `StringRecord`.
625    #[inline(always)]
626    pub(crate) async fn read<R: io::AsyncRead + std::marker::Unpin>(
627        &mut self,
628        rdr: &mut AsyncReaderImpl<R>,
629    ) -> Result<bool> {
630        // SAFETY: This code is critical to upholding the safety of other code
631        // blocks in this module. Namely, after calling `read_byte_record`,
632        // it is possible for `record` to contain invalid UTF-8. We check for
633        // this in the `validate` method, and if it does have invalid UTF-8, we
634        // clear the record. (It is bad for `record` to contain invalid UTF-8
635        // because other accessor methods, like `get`, assume that every field
636        // is valid UTF-8.)
637        let pos = rdr.position().clone();
638        let read_res = rdr.read_byte_record(&mut self.0).await;
639        let utf8_res = match self.0.validate() {
640            Ok(()) => Ok(()),
641            Err(err) => {
642                // If this record isn't valid UTF-8, then completely wipe it.
643                self.0.clear();
644                Err(err)
645            }
646        };
647        match (read_res, utf8_res) {
648            (Err(err), _) => Err(err),
649            (Ok(_), Err(err)) => {
650                Err(Error::new(ErrorKind::Utf8 { pos: Some(pos), err }))
651            }
652            (Ok(eof), Ok(())) => Ok(eof),
653        }
654    }
655}
656
657impl ops::Index<usize> for StringRecord {
658    type Output = str;
659    #[inline]
660    fn index(&self, i: usize) -> &str {
661        self.get(i).unwrap()
662    }
663}
664
665impl<T: AsRef<str>> From<Vec<T>> for StringRecord {
666    #[inline]
667    fn from(xs: Vec<T>) -> StringRecord {
668        StringRecord::from_iter(xs)
669    }
670}
671
672impl<'a, T: AsRef<str>> From<&'a [T]> for StringRecord {
673    #[inline]
674    fn from(xs: &'a [T]) -> StringRecord {
675        StringRecord::from_iter(xs)
676    }
677}
678
679impl<T: AsRef<str>> FromIterator<T> for StringRecord {
680    #[inline]
681    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> StringRecord {
682        let mut record = StringRecord::new();
683        record.extend(iter);
684        record
685    }
686}
687
688impl<T: AsRef<str>> Extend<T> for StringRecord {
689    #[inline]
690    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
691        for x in iter {
692            self.push_field(x.as_ref());
693        }
694    }
695}
696
697impl<'a> IntoIterator for &'a StringRecord {
698    type IntoIter = StringRecordIter<'a>;
699    type Item = &'a str;
700
701    #[inline]
702    fn into_iter(self) -> StringRecordIter<'a> {
703        StringRecordIter(self.0.iter())
704    }
705}
706
707/// An iterator over the fields in a string record.
708///
709/// The `'r` lifetime variable refers to the lifetime of the `StringRecord`
710/// that is being iterated over.
711#[derive(Clone)]
712pub struct StringRecordIter<'r>(ByteRecordIter<'r>);
713
714impl<'r> Iterator for StringRecordIter<'r> {
715    type Item = &'r str;
716
717    #[inline]
718    fn next(&mut self) -> Option<&'r str> {
719        self.0.next().map(|bytes| {
720            debug_assert!(str::from_utf8(bytes).is_ok());
721            // See StringRecord::get for safety argument.
722            unsafe { str::from_utf8_unchecked(bytes) }
723        })
724    }
725
726    #[inline]
727    fn size_hint(&self) -> (usize, Option<usize>) {
728        self.0.size_hint()
729    }
730
731    #[inline]
732    fn count(self) -> usize {
733        self.0.len()
734    }
735}
736
737impl<'r> DoubleEndedIterator for StringRecordIter<'r> {
738    #[inline]
739    fn next_back(&mut self) -> Option<&'r str> {
740        self.0.next_back().map(|bytes| {
741            debug_assert!(str::from_utf8(bytes).is_ok());
742            // See StringRecord::get for safety argument.
743            unsafe { str::from_utf8_unchecked(bytes) }
744        })
745    }
746}
747
748#[cfg(test)]
749mod tests {
750    use crate::string_record::StringRecord;
751
752    #[test]
753    fn trim_front() {
754        let mut rec = StringRecord::from(vec![" abc"]);
755        rec.trim();
756        assert_eq!(rec.get(0), Some("abc"));
757
758        let mut rec = StringRecord::from(vec![" abc", "  xyz"]);
759        rec.trim();
760        assert_eq!(rec.get(0), Some("abc"));
761        assert_eq!(rec.get(1), Some("xyz"));
762    }
763
764    #[test]
765    fn trim_back() {
766        let mut rec = StringRecord::from(vec!["abc "]);
767        rec.trim();
768        assert_eq!(rec.get(0), Some("abc"));
769
770        let mut rec = StringRecord::from(vec!["abc ", "xyz  "]);
771        rec.trim();
772        assert_eq!(rec.get(0), Some("abc"));
773        assert_eq!(rec.get(1), Some("xyz"));
774    }
775
776    #[test]
777    fn trim_both() {
778        let mut rec = StringRecord::from(vec![" abc "]);
779        rec.trim();
780        assert_eq!(rec.get(0), Some("abc"));
781
782        let mut rec = StringRecord::from(vec![" abc ", "  xyz  "]);
783        rec.trim();
784        assert_eq!(rec.get(0), Some("abc"));
785        assert_eq!(rec.get(1), Some("xyz"));
786    }
787
788    #[test]
789    fn trim_does_not_panic_on_empty_records_1() {
790        let mut rec = StringRecord::from(vec![""]);
791        rec.trim();
792        assert_eq!(rec.get(0), Some(""));
793    }
794
795    #[test]
796    fn trim_does_not_panic_on_empty_records_2() {
797        let mut rec = StringRecord::from(vec!["", ""]);
798        rec.trim();
799        assert_eq!(rec.get(0), Some(""));
800        assert_eq!(rec.get(1), Some(""));
801    }
802
803    #[test]
804    fn trim_does_not_panic_on_empty_records_3() {
805        let mut rec = StringRecord::new();
806        rec.trim();
807        assert_eq!(rec.as_slice().len(), 0);
808    }
809
810    #[test]
811    fn trim_whitespace_only() {
812        let mut rec = StringRecord::from(vec![
813            "\u{0009}\u{000A}\u{000B}\u{000C}\u{000D}\u{0020}\u{0085}\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}",
814        ]);
815        rec.trim();
816        assert_eq!(rec.get(0), Some(""));
817    }
818
819    // Check that record equality respects field boundaries.
820    //
821    // Regression test for #138.
822    #[test]
823    fn eq_field_boundaries() {
824        let test1 = StringRecord::from(vec!["12", "34"]);
825        let test2 = StringRecord::from(vec!["123", "4"]);
826
827        assert_ne!(test1, test2);
828    }
829
830    // Check that record equality respects number of fields.
831    //
832    // Regression test for #138.
833    #[test]
834    fn eq_record_len() {
835        let test1 = StringRecord::from(vec!["12", "34", "56"]);
836        let test2 = StringRecord::from(vec!["12", "34"]);
837        assert_ne!(test1, test2);
838    }
839}
csv_async/string_record.rs

csv_async/
string_record.rs