integer_encoding/
varint.rs

1use std::mem::size_of;
2
3/// Most-significant byte, == 0x80
4pub const MSB: u8 = 0b1000_0000;
5/// All bits except for the most significant. Can be used as bitmask to drop the most-signficant
6/// bit using `&` (binary-and).
7const DROP_MSB: u8 = 0b0111_1111;
8
9/// How many bytes an integer uses when being encoded as a VarInt.
10#[inline]
11fn required_encoded_space_unsigned(mut v: u64) -> usize {
12    if v == 0 {
13        return 1;
14    }
15
16    let mut logcounter = 0;
17    while v > 0 {
18        logcounter += 1;
19        v >>= 7;
20    }
21    logcounter
22}
23
24/// How many bytes an integer uses when being encoded as a VarInt.
25#[inline]
26fn required_encoded_space_signed(v: i64) -> usize {
27    required_encoded_space_unsigned(zigzag_encode(v))
28}
29
30/// Varint (variable length integer) encoding, as described in
31/// https://developers.google.com/protocol-buffers/docs/encoding.
32///
33/// Uses zigzag encoding (also described there) for signed integer representation.
34pub trait VarInt: Sized + Copy {
35    /// Returns the number of bytes this number needs in its encoded form. Note: This varies
36    /// depending on the actual number you want to encode.
37    fn required_space(self) -> usize;
38    /// Decode a value from the slice. Returns the value and the number of bytes read from the
39    /// slice (can be used to read several consecutive values from a big slice)
40    /// return None if all bytes has MSB set.
41    fn decode_var(src: &[u8]) -> Option<(Self, usize)>;
42    /// Encode a value into the slice. The slice must be at least `required_space()` bytes long.
43    /// The number of bytes taken by the encoded integer is returned.
44    fn encode_var(self, src: &mut [u8]) -> usize;
45
46    /// Helper: Encode a value and return the encoded form as Vec. The Vec must be at least
47    /// `required_space()` bytes long.
48    fn encode_var_vec(self) -> Vec<u8> {
49        let mut v = Vec::new();
50        v.resize(self.required_space(), 0);
51        self.encode_var(&mut v);
52        v
53    }
54}
55
56#[inline]
57fn zigzag_encode(from: i64) -> u64 {
58    ((from << 1) ^ (from >> 63)) as u64
59}
60
61// see: http://stackoverflow.com/a/2211086/56332
62// casting required because operations like unary negation
63// cannot be performed on unsigned integers
64#[inline]
65fn zigzag_decode(from: u64) -> i64 {
66    ((from >> 1) ^ (-((from & 1) as i64)) as u64) as i64
67}
68
69pub(crate) trait VarIntMaxSize {
70    fn varint_max_size() -> usize;
71}
72
73impl<VI: VarInt> VarIntMaxSize for VI {
74    fn varint_max_size() -> usize {
75        (size_of::<VI>() * 8 + 7) / 7
76    }
77}
78
79macro_rules! impl_varint {
80    ($t:ty, unsigned) => {
81        impl VarInt for $t {
82            fn required_space(self) -> usize {
83                required_encoded_space_unsigned(self as u64)
84            }
85
86            fn decode_var(src: &[u8]) -> Option<(Self, usize)> {
87                let (n, s) = u64::decode_var(src)?;
88                Some((n as Self, s))
89            }
90
91            fn encode_var(self, dst: &mut [u8]) -> usize {
92                (self as u64).encode_var(dst)
93            }
94        }
95    };
96    ($t:ty, signed) => {
97        impl VarInt for $t {
98            fn required_space(self) -> usize {
99                required_encoded_space_signed(self as i64)
100            }
101
102            fn decode_var(src: &[u8]) -> Option<(Self, usize)> {
103                let (n, s) = i64::decode_var(src)?;
104                Some((n as Self, s))
105            }
106
107            fn encode_var(self, dst: &mut [u8]) -> usize {
108                (self as i64).encode_var(dst)
109            }
110        }
111    };
112}
113
114impl_varint!(usize, unsigned);
115impl_varint!(u32, unsigned);
116impl_varint!(u16, unsigned);
117impl_varint!(u8, unsigned);
118
119impl_varint!(isize, signed);
120impl_varint!(i32, signed);
121impl_varint!(i16, signed);
122impl_varint!(i8, signed);
123
124// Below are the "base implementations" doing the actual encodings; all other integer types are
125// first cast to these biggest types before being encoded.
126
127impl VarInt for u64 {
128    fn required_space(self) -> usize {
129        required_encoded_space_unsigned(self)
130    }
131
132    #[inline]
133    fn decode_var(src: &[u8]) -> Option<(Self, usize)> {
134        let mut result: u64 = 0;
135        let mut shift = 0;
136
137        let mut success = false;
138        for b in src.iter() {
139            let msb_dropped = b & DROP_MSB;
140            result |= (msb_dropped as u64) << shift;
141            shift += 7;
142
143            if b & MSB == 0 || shift > (9 * 7) {
144                success = b & MSB == 0;
145                break;
146            }
147        }
148
149        if success {
150            Some((result, shift / 7 as usize))
151        } else {
152            None
153        }
154    }
155
156    #[inline]
157    fn encode_var(self, dst: &mut [u8]) -> usize {
158        assert!(dst.len() >= self.required_space());
159        let mut n = self;
160        let mut i = 0;
161
162        while n >= 0x80 {
163            dst[i] = MSB | (n as u8);
164            i += 1;
165            n >>= 7;
166        }
167
168        dst[i] = n as u8;
169        i + 1
170    }
171}
172
173impl VarInt for i64 {
174    fn required_space(self) -> usize {
175        required_encoded_space_signed(self)
176    }
177
178    #[inline]
179    fn decode_var(src: &[u8]) -> Option<(Self, usize)> {
180        if let Some((result, size)) = u64::decode_var(src) {
181            Some((zigzag_decode(result) as Self, size))
182        } else {
183            None
184        }
185    }
186
187    #[inline]
188    fn encode_var(self, dst: &mut [u8]) -> usize {
189        assert!(dst.len() >= self.required_space());
190        let mut n: u64 = zigzag_encode(self as i64);
191        let mut i = 0;
192
193        while n >= 0x80 {
194            dst[i] = MSB | (n as u8);
195            i += 1;
196            n >>= 7;
197        }
198
199        dst[i] = n as u8;
200        i + 1
201    }
202}