protobuf/text_format/
parse.rs

1use std::str;
2
3use protobuf_support::lexer::int;
4use protobuf_support::lexer::loc::Loc;
5use protobuf_support::lexer::parser_language::ParserLanguage;
6use protobuf_support::lexer::str_lit::StrLitDecodeError;
7use protobuf_support::lexer::tokenizer::Tokenizer;
8use protobuf_support::lexer::tokenizer::TokenizerError;
9
10use crate::message_dyn::MessageDyn;
11use crate::message_full::MessageFull;
12use crate::reflect::EnumDescriptor;
13use crate::reflect::EnumValueDescriptor;
14use crate::reflect::MessageDescriptor;
15use crate::reflect::ReflectValueBox;
16use crate::reflect::RuntimeFieldType;
17use crate::reflect::RuntimeType;
18
19#[derive(Debug, thiserror::Error)]
20pub enum ParseErrorWithoutLoc {
21    #[error(transparent)]
22    TokenizerError(#[from] TokenizerError),
23    #[error(transparent)]
24    StrLitDecodeError(#[from] StrLitDecodeError),
25    #[error("Unknown field: `{}`", .0)]
26    UnknownField(String),
27    #[error("Unknown enum value: `{}`", .0)]
28    UnknownEnumValue(String),
29    #[error("Map field specified more than once: `{}`", .0)]
30    MapFieldIsSpecifiedMoreThanOnce(String),
31    #[error("Integer overflow")]
32    IntegerOverflow,
33    #[error("Expecting bool")]
34    ExpectingBool,
35    #[error("Message not initialized")]
36    MessageNotInitialized,
37}
38
39impl From<int::Overflow> for ParseErrorWithoutLoc {
40    fn from(_: int::Overflow) -> Self {
41        ParseErrorWithoutLoc::IntegerOverflow
42    }
43}
44
45/// Text format parse error.
46#[derive(Debug, thiserror::Error)]
47#[error("{}: {}", loc, error)]
48pub struct ParseError {
49    error: ParseErrorWithoutLoc,
50    loc: Loc,
51}
52
53pub type ParseResult<A> = Result<A, ParseErrorWithoutLoc>;
54pub type ParseWithLocResult<A> = Result<A, ParseError>;
55
56#[derive(Clone)]
57struct Parser<'a> {
58    tokenizer: Tokenizer<'a>,
59}
60
61impl<'a> Parser<'a> {
62    // Text format
63
64    fn next_field_name(&mut self) -> ParseResult<String> {
65        Ok(self.tokenizer.next_ident()?)
66    }
67
68    fn read_colon(&mut self, desc: &'static str) -> ParseResult<()> {
69        Ok(self.tokenizer.next_symbol_expect_eq(':', desc)?)
70    }
71
72    fn read_enum<'e>(&mut self, e: &'e EnumDescriptor) -> ParseResult<EnumValueDescriptor> {
73        self.read_colon("enum")?;
74
75        // TODO: read integer?
76        let ident = self.tokenizer.next_ident()?;
77        let value = match e.value_by_name(&ident) {
78            Some(value) => value,
79            None => return Err(ParseErrorWithoutLoc::UnknownEnumValue(ident)),
80        };
81        Ok(value)
82    }
83
84    fn read_u64(&mut self) -> ParseResult<u64> {
85        self.read_colon("u64")?;
86
87        Ok(self.tokenizer.next_int_lit()?)
88    }
89
90    fn read_u32(&mut self) -> ParseResult<u32> {
91        self.read_colon("int value")?;
92
93        let int_lit = self.tokenizer.next_int_lit()?;
94        let value_u32 = int_lit as u32;
95        if value_u32 as u64 != int_lit {
96            return Err(ParseErrorWithoutLoc::IntegerOverflow);
97        }
98        Ok(value_u32)
99    }
100
101    fn read_i64(&mut self) -> ParseResult<i64> {
102        self.read_colon("int value")?;
103
104        if self.tokenizer.next_symbol_if_eq('-')? {
105            let int_lit = self.tokenizer.next_int_lit()?;
106            Ok(int::neg(int_lit)?)
107        } else {
108            let int_lit = self.tokenizer.next_int_lit()?;
109            if int_lit > i64::MAX as u64 {
110                return Err(ParseErrorWithoutLoc::IntegerOverflow);
111            }
112            Ok(int_lit as i64)
113        }
114    }
115
116    fn read_i32(&mut self) -> ParseResult<i32> {
117        let value = self.read_i64()?;
118        if value < i32::min_value() as i64 || value > i32::max_value() as i64 {
119            return Err(ParseErrorWithoutLoc::IntegerOverflow);
120        }
121        Ok(value as i32)
122    }
123
124    fn read_f64(&mut self) -> ParseResult<f64> {
125        self.read_colon("float value")?;
126
127        let minus = self.tokenizer.next_symbol_if_eq('-')?;
128
129        let value = if let Ok(value) = self.tokenizer.next_int_lit() {
130            value as f64
131        } else {
132            self.tokenizer.next_float_lit()?
133        };
134
135        Ok(if minus { -value } else { value })
136    }
137
138    fn read_f32(&mut self) -> ParseResult<f32> {
139        Ok(self.read_f64()? as f32)
140    }
141
142    fn read_bool(&mut self) -> ParseResult<bool> {
143        self.read_colon("bool value")?;
144
145        if self.tokenizer.next_ident_if_eq("true")? {
146            Ok(true)
147        } else if self.tokenizer.next_ident_if_eq("false")? {
148            Ok(false)
149        } else {
150            Err(ParseErrorWithoutLoc::ExpectingBool)
151        }
152    }
153
154    fn read_string(&mut self) -> ParseResult<String> {
155        self.read_colon("string value")?;
156
157        Ok(self
158            .tokenizer
159            .next_str_lit()
160            .and_then(|s| s.decode_utf8().map_err(From::from))?)
161    }
162
163    fn read_bytes(&mut self) -> ParseResult<Vec<u8>> {
164        self.read_colon("bytes value")?;
165
166        Ok(self
167            .tokenizer
168            .next_str_lit()
169            .and_then(|s| s.decode_bytes().map_err(From::from))?)
170    }
171
172    fn read_message(&mut self, descriptor: &MessageDescriptor) -> ParseResult<Box<dyn MessageDyn>> {
173        let mut message = descriptor.new_instance();
174
175        let symbol = self.tokenizer.next_symbol_expect_eq_oneof(&['{', '<'])?;
176        let terminator = if symbol == '{' { '}' } else { '>' };
177        while !self.tokenizer.lookahead_is_symbol(terminator)? {
178            self.merge_field(&mut *message, descriptor)?;
179        }
180        self.tokenizer
181            .next_symbol_expect_eq(terminator, "message")?;
182        Ok(message)
183    }
184
185    fn read_map_entry(
186        &mut self,
187        k: &RuntimeType,
188        v: &RuntimeType,
189    ) -> ParseResult<(ReflectValueBox, ReflectValueBox)> {
190        let key_field_name: &str = "key";
191        let value_field_name: &str = "value";
192
193        let mut key = None;
194        let mut value = None;
195        self.tokenizer.next_symbol_expect_eq('{', "map entry")?;
196        while !self.tokenizer.lookahead_is_symbol('}')? {
197            let ident = self.next_field_name()?;
198            let (field, field_type) = if ident == key_field_name {
199                (&mut key, k)
200            } else if ident == value_field_name {
201                (&mut value, v)
202            } else {
203                return Err(ParseErrorWithoutLoc::UnknownField(ident));
204            };
205
206            if let Some(..) = *field {
207                return Err(ParseErrorWithoutLoc::MapFieldIsSpecifiedMoreThanOnce(ident));
208            }
209
210            let field_value = self.read_value_of_type(field_type)?;
211
212            *field = Some(field_value);
213        }
214        self.tokenizer.next_symbol_expect_eq('}', "map entry")?;
215        let key = match key {
216            Some(key) => key,
217            None => k.default_value_ref().to_box(),
218        };
219        let value = match value {
220            Some(value) => value,
221            None => v.default_value_ref().to_box(),
222        };
223        Ok((key, value))
224    }
225
226    fn read_value_of_type(&mut self, t: &RuntimeType) -> ParseResult<ReflectValueBox> {
227        Ok(match t {
228            RuntimeType::Enum(d) => {
229                let value = self.read_enum(&d)?.value();
230                ReflectValueBox::Enum(d.clone(), value)
231            }
232            RuntimeType::U32 => ReflectValueBox::U32(self.read_u32()?),
233            RuntimeType::U64 => ReflectValueBox::U64(self.read_u64()?),
234            RuntimeType::I32 => ReflectValueBox::I32(self.read_i32()?),
235            RuntimeType::I64 => ReflectValueBox::I64(self.read_i64()?),
236            RuntimeType::F32 => ReflectValueBox::F32(self.read_f32()?),
237            RuntimeType::F64 => ReflectValueBox::F64(self.read_f64()?),
238            RuntimeType::Bool => ReflectValueBox::Bool(self.read_bool()?),
239            RuntimeType::String => ReflectValueBox::String(self.read_string()?),
240            RuntimeType::VecU8 => ReflectValueBox::Bytes(self.read_bytes()?),
241            RuntimeType::Message(m) => ReflectValueBox::Message(self.read_message(&m)?),
242        })
243    }
244
245    fn merge_field(
246        &mut self,
247        message: &mut dyn MessageDyn,
248        descriptor: &MessageDescriptor,
249    ) -> ParseResult<()> {
250        let field_name = self.next_field_name()?;
251
252        let field = match descriptor.field_by_name(&field_name) {
253            Some(field) => field,
254            None => {
255                // TODO: shouldn't unknown fields be quietly skipped?
256                return Err(ParseErrorWithoutLoc::UnknownField(field_name));
257            }
258        };
259
260        match field.runtime_field_type() {
261            RuntimeFieldType::Singular(t) => {
262                let value = self.read_value_of_type(&t)?;
263                field.set_singular_field(message, value);
264            }
265            RuntimeFieldType::Repeated(t) => {
266                let value = self.read_value_of_type(&t)?;
267                field.mut_repeated(message).push(value);
268            }
269            RuntimeFieldType::Map(k, v) => {
270                let (k, v) = self.read_map_entry(&k, &v)?;
271                field.mut_map(message).insert(k, v);
272            }
273        };
274
275        Ok(())
276    }
277
278    fn merge_inner(&mut self, message: &mut dyn MessageDyn) -> ParseResult<()> {
279        loop {
280            if self.tokenizer.syntax_eof()? {
281                break;
282            }
283            let descriptor = message.descriptor_dyn();
284            self.merge_field(message, &descriptor)?;
285        }
286        Ok(())
287    }
288
289    fn merge(&mut self, message: &mut dyn MessageDyn) -> ParseWithLocResult<()> {
290        match self.merge_inner(message) {
291            Ok(()) => Ok(()),
292            Err(error) => Err(ParseError {
293                error,
294                loc: self.tokenizer.loc(),
295            }),
296        }
297    }
298}
299
300/// Parse text format message.
301///
302/// This function does not check if message required fields are set.
303pub fn merge_from_str(message: &mut dyn MessageDyn, input: &str) -> ParseWithLocResult<()> {
304    let mut parser = Parser {
305        tokenizer: Tokenizer::new(input, ParserLanguage::TextFormat),
306    };
307    parser.merge(message)
308}
309
310/// Parse text format message.
311pub fn parse_from_str<M: MessageFull>(input: &str) -> ParseWithLocResult<M> {
312    let mut m = M::new();
313    merge_from_str(&mut m, input)?;
314    if let Err(_) = m.check_initialized() {
315        return Err(ParseError {
316            error: ParseErrorWithoutLoc::MessageNotInitialized,
317            loc: Loc::start(),
318        });
319    }
320    Ok(m)
321}