#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
pub trait OffsetContainer<T>: Default + Extend<T> {
fn push(&mut self, item: T);
fn index(&self, index: usize) -> T;
fn clear(&mut self);
fn len(&self) -> usize;
#[inline]
#[must_use]
fn is_empty(&self) -> bool {
self.len() == 0
}
fn reserve(&mut self, additional: usize);
fn heap_size<F: FnMut(usize, usize)>(&self, callback: F);
}
#[derive(Eq, PartialEq, Debug, Default, Clone, Copy)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum OffsetStride {
#[default]
Empty,
Zero,
Striding(usize, usize),
Saturated(usize, usize, usize),
}
impl OffsetStride {
#[must_use]
pub fn push(&mut self, item: usize) -> bool {
match self {
OffsetStride::Empty => {
if item == 0 {
*self = OffsetStride::Zero;
true
} else {
false
}
}
OffsetStride::Zero => {
*self = OffsetStride::Striding(item, 2);
true
}
OffsetStride::Striding(stride, count) => {
if item == *stride * *count {
*count += 1;
true
} else if item == *stride * (*count - 1) {
*self = OffsetStride::Saturated(*stride, *count, 1);
true
} else {
false
}
}
OffsetStride::Saturated(stride, count, reps) => {
if item == *stride * (*count - 1) {
*reps += 1;
true
} else {
false
}
}
}
}
#[must_use]
pub fn index(&self, index: usize) -> usize {
match self {
OffsetStride::Empty => {
panic!("Empty OffsetStride")
}
OffsetStride::Zero => 0,
OffsetStride::Striding(stride, _steps) => *stride * index,
OffsetStride::Saturated(stride, steps, _reps) => {
if index < *steps {
*stride * index
} else {
*stride * (*steps - 1)
}
}
}
}
#[must_use]
pub fn len(&self) -> usize {
match self {
OffsetStride::Empty => 0,
OffsetStride::Zero => 1,
OffsetStride::Striding(_stride, steps) => *steps,
OffsetStride::Saturated(_stride, steps, reps) => *steps + *reps,
}
}
#[must_use]
pub fn is_empty(&self) -> bool {
matches!(self, OffsetStride::Empty)
}
pub fn clear(&mut self) {
*self = Self::default();
}
}
#[derive(Eq, PartialEq, Clone, Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct OffsetList {
pub smol: Vec<u32>,
pub chonk: Vec<u64>,
}
impl OffsetList {
#[must_use]
pub fn with_capacity(cap: usize) -> Self {
Self {
smol: Vec::with_capacity(cap),
chonk: Vec::new(),
}
}
pub fn push(&mut self, offset: usize) {
if self.chonk.is_empty() {
if let Ok(smol) = offset.try_into() {
self.smol.push(smol);
} else {
self.chonk.push(offset.try_into().unwrap());
}
} else {
self.chonk.push(offset.try_into().unwrap());
}
}
#[must_use]
pub fn index(&self, index: usize) -> usize {
if index < self.smol.len() {
self.smol[index].try_into().unwrap()
} else {
self.chonk[index - self.smol.len()].try_into().unwrap()
}
}
#[must_use]
pub fn len(&self) -> usize {
self.smol.len() + self.chonk.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn reserve(&mut self, additional: usize) {
self.smol.reserve(additional);
}
pub fn clear(&mut self) {
self.smol.clear();
self.chonk.clear();
}
fn heap_size<F: FnMut(usize, usize)>(&self, mut callback: F) {
self.smol.heap_size(&mut callback);
self.chonk.heap_size(callback);
}
}
#[derive(Eq, PartialEq, Default, Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct OffsetOptimized {
strided: OffsetStride,
spilled: OffsetList,
}
impl OffsetContainer<usize> for OffsetOptimized {
fn push(&mut self, item: usize) {
if self.spilled.is_empty() {
let inserted = self.strided.push(item);
if !inserted {
self.spilled.push(item);
}
} else {
self.spilled.push(item);
}
}
fn index(&self, index: usize) -> usize {
if index < self.strided.len() {
self.strided.index(index)
} else {
self.spilled.index(index - self.strided.len())
}
}
fn clear(&mut self) {
self.spilled.clear();
self.strided = OffsetStride::default();
}
fn len(&self) -> usize {
self.strided.len() + self.spilled.len()
}
fn reserve(&mut self, additional: usize) {
if !self.spilled.is_empty() {
self.spilled.reserve(additional);
}
}
fn heap_size<F: FnMut(usize, usize)>(&self, callback: F) {
self.spilled.heap_size(callback);
}
}
impl Extend<usize> for OffsetOptimized {
fn extend<T: IntoIterator<Item = usize>>(&mut self, iter: T) {
for item in iter {
self.push(item);
}
}
}
impl<T: Copy> OffsetContainer<T> for Vec<T> {
#[inline]
fn push(&mut self, item: T) {
self.push(item);
}
#[inline]
#[must_use]
fn index(&self, index: usize) -> T {
self[index]
}
#[inline]
fn clear(&mut self) {
self.clear();
}
#[inline]
#[must_use]
fn len(&self) -> usize {
self.len()
}
#[inline]
fn reserve(&mut self, additional: usize) {
self.reserve(additional);
}
fn heap_size<F: FnMut(usize, usize)>(&self, mut callback: F) {
let size_of_t = std::mem::size_of::<T>();
callback(self.len() * size_of_t, self.capacity() * size_of_t);
}
}
#[cfg(test)]
mod tests {
use crate::impls::deduplicate::ConsecutiveOffsetPairs;
use crate::{Push, Region, SliceRegion, StringRegion};
use super::*;
#[test]
fn test_offset_optimized() {
fn copy<R: Region + Push<T>, T>(r: &mut R, item: T) -> R::Index {
r.push(item)
}
let mut r = SliceRegion::<
ConsecutiveOffsetPairs<StringRegion, OffsetOptimized>,
OffsetOptimized,
>::default();
let idx = copy(&mut r, ["abc"]);
assert_eq!("abc", r.index(idx).get(0))
}
#[test]
fn test_offset_optimized_clear() {
let mut oo = OffsetOptimized::default();
oo.push(0);
assert_eq!(oo.len(), 1);
oo.clear();
assert_eq!(oo.len(), 0);
assert!(oo.is_empty());
oo.push(9999999999);
assert_eq!(oo.len(), 1);
oo.clear();
assert_eq!(oo.len(), 0);
}
#[test]
fn test_offset_optimized_reserve() {
let mut oo = OffsetOptimized::default();
oo.push(9999999999);
assert_eq!(oo.len(), 1);
oo.reserve(1);
}
#[test]
fn test_offset_optimized_heap_size() {
let mut oo = OffsetOptimized::default();
oo.push(9999999999);
let mut cap = 0;
oo.heap_size(|_, ca| {
cap += ca;
});
assert!(cap > 0);
}
#[test]
fn test_offset_stride_push() {
let mut os = OffsetStride::default();
assert_eq!(os.len(), 0);
assert!(os.is_empty());
assert!(os.push(0));
assert_eq!(os.index(0), 0);
assert_eq!(os.len(), 1);
assert!(!os.is_empty());
assert!(os.push(2));
assert_eq!(os.len(), 2);
assert_eq!(os.index(1), 2);
assert!(os.push(4));
assert_eq!(os.len(), 3);
assert_eq!(os.index(2), 4);
assert!(os.push(4));
assert_eq!(os.len(), 4);
assert_eq!(os.index(3), 4);
assert!(os.push(4));
assert_eq!(os.len(), 5);
assert_eq!(os.index(1), 2);
assert_eq!(os.index(4), 4);
assert!(!os.push(5));
os.clear();
assert!(!os.push(1));
}
#[test]
fn test_chonk() {
let mut ol = OffsetList::default();
ol.push(usize::MAX);
assert_eq!(usize::MAX, ol.index(0));
}
#[test]
#[should_panic]
fn test_offset_stride_index() {
let os = OffsetStride::default();
let _ = os.index(0);
}
}