mz_expr/scalar/func/
encoding.rs
1use mz_ore::fmt::FormatBuffer;
14use mz_repr::strconv;
15use uncased::UncasedStr;
16
17use crate::EvalError;
18
19pub trait Format {
21 fn encode(&self, bytes: &[u8]) -> String;
24
25 fn decode(&self, s: &str) -> Result<Vec<u8>, EvalError>;
28}
29
30struct Base64Format;
37
38impl Base64Format {
39 const CHARSET: &'static [u8] =
40 b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
41
42 fn encode_sextet(v: u8) -> char {
43 char::from(Self::CHARSET[usize::from(v)])
44 }
45
46 fn decode_sextet(b: u8) -> Result<u8, EvalError> {
47 match b {
48 b'A'..=b'Z' => Ok(b - b'A'),
49 b'a'..=b'z' => Ok(b - b'a' + 26),
50 b'0'..=b'9' => Ok(b + 4),
51 b'+' => Ok(62),
52 b'/' => Ok(63),
53 _ => Err(EvalError::InvalidBase64Symbol(char::from(b))),
54 }
55 }
56}
57
58impl Format for Base64Format {
59 fn encode(&self, bytes: &[u8]) -> String {
64 let mut buf = String::new();
76 for chunk in bytes.chunks(3) {
77 match chunk {
78 [o1, o2, o3] => {
79 let s1 = (o1 & 0b11111100) >> 2;
80 let s2 = (o1 & 0b00000011) << 4 | (o2 & 0b11110000) >> 4;
81 let s3 = (o2 & 0b00001111) << 2 | (o3 & 0b11000000) >> 6;
82 let s4 = o3 & 0b00111111;
83 buf.push(Self::encode_sextet(s1));
84 buf.push(Self::encode_sextet(s2));
85 buf.push(Self::encode_sextet(s3));
86 buf.push(Self::encode_sextet(s4));
87 }
88 [o1, o2] => {
89 let s1 = (o1 & 0b11111100) >> 2;
90 let s2 = (o1 & 0b00000011) << 4 | (o2 & 0b11110000) >> 4;
91 let s3 = (o2 & 0b00001111) << 2;
92 buf.push(Self::encode_sextet(s1));
93 buf.push(Self::encode_sextet(s2));
94 buf.push(Self::encode_sextet(s3));
95 buf.push('=');
96 }
97 [o1] => {
98 let s1 = (o1 & 0b11111100) >> 2;
99 let s2 = (o1 & 0b00000011) << 4;
100 buf.push(Self::encode_sextet(s1));
101 buf.push(Self::encode_sextet(s2));
102 buf.push('=');
103 buf.push('=');
104 }
105 _ => unreachable!(),
106 }
107 if buf.len() % 77 == 76 {
108 buf.push('\n');
109 }
110 }
111 buf
112 }
113
114 fn decode(&self, s: &str) -> Result<Vec<u8>, EvalError> {
115 let mut buf = vec![];
131 let mut bytes = s
132 .as_bytes()
133 .iter()
134 .copied()
135 .filter(|ch| !matches!(ch, b' ' | b'\t' | b'\n' | b'\r'));
136 loop {
137 match (bytes.next(), bytes.next(), bytes.next(), bytes.next()) {
138 (Some(c1), Some(c2), Some(b'='), Some(b'=')) => {
139 let s1 = Self::decode_sextet(c1)?;
140 let s2 = Self::decode_sextet(c2)?;
141 buf.push(s1 << 2 | (s2 & 0b110000) >> 4);
142 }
143 (Some(c1), Some(c2), Some(c3), Some(b'=')) => {
144 let s1 = Self::decode_sextet(c1)?;
145 let s2 = Self::decode_sextet(c2)?;
146 let s3 = Self::decode_sextet(c3)?;
147 buf.push(s1 << 2 | (s2 & 0b110000) >> 4);
148 buf.push((s2 & 0b001111) << 4 | (s3 & 0b111100) >> 2);
149 }
150 (Some(b'='), _, _, _) | (_, Some(b'='), _, _) | (_, _, Some(b'='), _) => {
151 return Err(EvalError::InvalidBase64Equals);
152 }
153 (Some(c1), Some(c2), Some(c3), Some(c4)) => {
154 let s1 = Self::decode_sextet(c1)?;
155 let s2 = Self::decode_sextet(c2)?;
156 let s3 = Self::decode_sextet(c3)?;
157 let s4 = Self::decode_sextet(c4)?;
158 buf.push(s1 << 2 | (s2 & 0b110000) >> 4);
159 buf.push((s2 & 0b001111) << 4 | (s3 & 0b111100) >> 2);
160 buf.push((s3 & 0b000011) << 6 | s4);
161 }
162 (None, None, None, None) => return Ok(buf),
163 _ => return Err(EvalError::InvalidBase64EndSequence),
164 }
165 }
166 }
167}
168
169struct EscapeFormat;
170
171impl Format for EscapeFormat {
172 fn encode(&self, bytes: &[u8]) -> String {
173 let mut buf = String::new();
174 for b in bytes {
175 match b {
176 b'\0' | (b'\x80'..=b'\xff') => {
177 buf.push('\\');
178 write!(&mut buf, "{:03o}", b);
179 }
180 b'\\' => buf.push_str("\\\\"),
181 _ => buf.push(char::from(*b)),
182 }
183 }
184 buf
185 }
186
187 fn decode(&self, s: &str) -> Result<Vec<u8>, EvalError> {
188 Ok(strconv::parse_bytes_traditional(s)?)
189 }
190}
191
192struct HexFormat;
193
194impl Format for HexFormat {
195 fn encode(&self, bytes: &[u8]) -> String {
196 hex::encode(bytes)
197 }
198
199 fn decode(&self, s: &str) -> Result<Vec<u8>, EvalError> {
200 Ok(strconv::parse_bytes_hex(s)?)
203 }
204}
205
206pub fn lookup_format(s: &str) -> Result<&'static dyn Format, EvalError> {
207 let s = UncasedStr::new(s);
208 if s == "base64" {
209 Ok(&Base64Format)
210 } else if s == "escape" {
211 Ok(&EscapeFormat)
212 } else if s == "hex" {
213 Ok(&HexFormat)
214 } else {
215 Err(EvalError::InvalidEncodingName(s.as_str().into()))
216 }
217}