Skip to main content

columnar/
string.rs

1use super::{Clear, Columnar, Container, Len, Index, IndexAs, Push, HeapSize, Borrow};
2
3/// A stand-in for `Vec<String>`.
4#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
5#[derive(Copy, Clone, Debug, Default, PartialEq)]
6pub struct Strings<BC = Vec<u64>, VC = Vec<u8>> {
7    /// Bounds container; provides indexed access to offsets.
8    pub bounds: BC,
9    /// Values container; provides slice access to bytes.
10    pub values: VC,
11}
12
13impl Columnar for String {
14    #[inline(always)]
15    fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
16        self.clear();
17        self.push_str(other);
18    }
19    #[inline(always)]
20    fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other.to_string() }
21    type Container = Strings;
22}
23
24impl Columnar for Box<str> {
25    #[inline(always)]
26    fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
27        let mut s = String::from(std::mem::take(self));
28        s.clear();
29        s.push_str(other);
30        *self = s.into_boxed_str();
31    }
32    #[inline(always)]
33    fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { Self::from(other) }
34    type Container = Strings;
35}
36
37impl<BC: crate::common::BorrowIndexAs<u64>> Borrow for Strings<BC, Vec<u8>> {
38    type Ref<'a> = &'a str;
39    type Borrowed<'a> = Strings<BC::Borrowed<'a>, &'a [u8]> where BC: 'a;
40    #[inline(always)]
41    fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
42        Strings {
43            bounds: self.bounds.borrow(),
44            values: self.values.borrow(),
45        }
46    }
47    #[inline(always)]
48    fn reborrow<'c, 'a: 'c>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'c> where BC: 'a {
49        Strings {
50            bounds: BC::reborrow(thing.bounds),
51            values: thing.values,
52        }
53    }
54    #[inline(always)]
55    fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
56}
57
58impl<BC: crate::common::PushIndexAs<u64>> Container for Strings<BC, Vec<u8>> {
59    #[inline(always)]
60    fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
61        if !range.is_empty() {
62            // Imported bounds will be relative to this starting offset.
63            let values_len = self.values.len() as u64;
64
65            // Push all bytes that we can, all at once.
66            let other_lower = if range.start == 0 { 0 } else { other.bounds.index_as(range.start-1) };
67            let other_upper = other.bounds.index_as(range.end-1);
68            self.values.extend_from_self(other.values, other_lower as usize .. other_upper as usize);
69
70            // Each bound needs to be shifted by `values_len - other_lower`.
71            if values_len == other_lower {
72                self.bounds.extend_from_self(other.bounds, range);
73            }
74            else {
75                for index in range {
76                    let shifted = other.bounds.index_as(index) - other_lower + values_len;
77                    self.bounds.push(&shifted)
78                }
79            }
80        }
81    }
82
83    fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
84        self.bounds.reserve_for(selves.clone().map(|x| x.bounds));
85        self.values.reserve_for(selves.map(|x| x.values));
86    }
87
88}
89
90impl<'a, BC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Strings<BC, VC> {
91    #[inline(always)]
92    fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
93        crate::chain(self.bounds.as_bytes(), self.values.as_bytes())
94    }
95}
96impl<'a, BC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Strings<BC, VC> {
97    #[inline(always)]
98    fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
99        Self {
100            bounds: crate::FromBytes::from_bytes(bytes),
101            values: crate::FromBytes::from_bytes(bytes),
102        }
103    }
104}
105
106impl<BC: Len, VC> Len for Strings<BC, VC> {
107    #[inline(always)] fn len(&self) -> usize { self.bounds.len() }
108}
109
110impl<'a, BC: Len+IndexAs<u64>> Index for Strings<BC, &'a [u8]> {
111    type Ref = &'a str;
112    #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
113        let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
114        let upper = self.bounds.index_as(index);
115        let lower: usize = lower.try_into().expect("bounds must fit in `usize`");
116        let upper: usize = upper.try_into().expect("bounds must fit in `usize`");
117        std::str::from_utf8(&self.values[lower .. upper]).expect("&[u8] must be valid utf8")
118    }
119}
120impl<'a, BC: Len+IndexAs<u64>> Index for &'a Strings<BC, Vec<u8>> {
121    type Ref = &'a str;
122    #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
123        let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
124        let upper = self.bounds.index_as(index);
125        let lower: usize = lower.try_into().expect("bounds must fit in `usize`");
126        let upper: usize = upper.try_into().expect("bounds must fit in `usize`");
127        std::str::from_utf8(&self.values[lower .. upper]).expect("&[u8] must be valid utf8")
128    }
129}
130
131// This is a simpler implementation, but it leads to a performance regression
132// for Strings and str because it loses access to `Vec::extend_from_slice`.
133//
134// impl<BC: Push<u64>, D: std::fmt::Display> Push<D> for Strings<BC> {
135//     #[inline(always)]
136//     fn push(&mut self, item: D) {
137//         use std::io::Write;
138//         write!(self.values, "{}", item).unwrap();
139//         self.bounds.push(self.values.len() as u64);
140//     }
141// }
142
143impl<BC: for<'a> Push<&'a u64>> Push<&String> for Strings<BC> {
144    #[inline(always)] fn push(&mut self, item: &String) {
145        self.values.extend_from_slice(item.as_bytes());
146        self.bounds.push(&(self.values.len() as u64));
147    }
148}
149impl<BC: for<'a> Push<&'a u64>> Push<&str> for Strings<BC> {
150    #[inline]
151    fn push(&mut self, item: &str) {
152        self.values.extend_from_slice(item.as_bytes());
153        self.bounds.push(&(self.values.len() as u64));
154    }
155}
156impl<BC: for<'a> Push<&'a u64>> Push<&Box<str>> for Strings<BC> {
157    #[inline]
158    fn push(&mut self, item: &Box<str>) {
159        self.values.extend_from_slice(item.as_bytes());
160        self.bounds.push(&(self.values.len() as u64));
161    }
162}
163impl<'a, BC: for<'b> Push<&'b u64>> Push<std::fmt::Arguments<'a>> for Strings<BC> {
164    #[inline]
165    fn push(&mut self, item: std::fmt::Arguments<'a>) {
166        use std::io::Write;
167        self.values.write_fmt(item).expect("write_fmt failed");
168        self.bounds.push(&(self.values.len() as u64));
169    }
170}
171impl<'a, 'b, BC: for<'c> Push<&'c u64>> Push<&'b std::fmt::Arguments<'a>> for Strings<BC> {
172    #[inline]
173    fn push(&mut self, item: &'b std::fmt::Arguments<'a>) {
174        use std::io::Write;
175        self.values.write_fmt(*item).expect("write_fmt failed");
176        self.bounds.push(&(self.values.len() as u64));
177    }
178}
179impl<BC: Clear, VC: Clear> Clear for Strings<BC, VC> {
180    #[inline(always)]
181    fn clear(&mut self) {
182        self.bounds.clear();
183        self.values.clear();
184    }
185}
186impl<BC: HeapSize, VC: HeapSize> HeapSize for Strings<BC, VC> {
187    #[inline(always)]
188    fn heap_size(&self) -> (usize, usize) {
189        let (l0, c0) = self.bounds.heap_size();
190        let (l1, c1) = self.values.heap_size();
191        (l0 + l1, c0 + c1)
192    }
193}