protobuf/text_format/
parse.rs
1use std::str;
2
3use protobuf_support::lexer::int;
4use protobuf_support::lexer::loc::Loc;
5use protobuf_support::lexer::parser_language::ParserLanguage;
6use protobuf_support::lexer::str_lit::StrLitDecodeError;
7use protobuf_support::lexer::tokenizer::Tokenizer;
8use protobuf_support::lexer::tokenizer::TokenizerError;
9
10use crate::message_dyn::MessageDyn;
11use crate::message_full::MessageFull;
12use crate::reflect::EnumDescriptor;
13use crate::reflect::EnumValueDescriptor;
14use crate::reflect::MessageDescriptor;
15use crate::reflect::ReflectValueBox;
16use crate::reflect::RuntimeFieldType;
17use crate::reflect::RuntimeType;
18
19#[derive(Debug, thiserror::Error)]
20pub enum ParseErrorWithoutLoc {
21 #[error(transparent)]
22 TokenizerError(#[from] TokenizerError),
23 #[error(transparent)]
24 StrLitDecodeError(#[from] StrLitDecodeError),
25 #[error("Unknown field: `{}`", .0)]
26 UnknownField(String),
27 #[error("Unknown enum value: `{}`", .0)]
28 UnknownEnumValue(String),
29 #[error("Map field specified more than once: `{}`", .0)]
30 MapFieldIsSpecifiedMoreThanOnce(String),
31 #[error("Integer overflow")]
32 IntegerOverflow,
33 #[error("Expecting bool")]
34 ExpectingBool,
35 #[error("Message not initialized")]
36 MessageNotInitialized,
37}
38
39impl From<int::Overflow> for ParseErrorWithoutLoc {
40 fn from(_: int::Overflow) -> Self {
41 ParseErrorWithoutLoc::IntegerOverflow
42 }
43}
44
45#[derive(Debug, thiserror::Error)]
47#[error("{}: {}", loc, error)]
48pub struct ParseError {
49 error: ParseErrorWithoutLoc,
50 loc: Loc,
51}
52
53pub type ParseResult<A> = Result<A, ParseErrorWithoutLoc>;
54pub type ParseWithLocResult<A> = Result<A, ParseError>;
55
56#[derive(Clone)]
57struct Parser<'a> {
58 tokenizer: Tokenizer<'a>,
59}
60
61impl<'a> Parser<'a> {
62 fn next_field_name(&mut self) -> ParseResult<String> {
65 Ok(self.tokenizer.next_ident()?)
66 }
67
68 fn read_colon(&mut self, desc: &'static str) -> ParseResult<()> {
69 Ok(self.tokenizer.next_symbol_expect_eq(':', desc)?)
70 }
71
72 fn read_enum<'e>(&mut self, e: &'e EnumDescriptor) -> ParseResult<EnumValueDescriptor> {
73 self.read_colon("enum")?;
74
75 let ident = self.tokenizer.next_ident()?;
77 let value = match e.value_by_name(&ident) {
78 Some(value) => value,
79 None => return Err(ParseErrorWithoutLoc::UnknownEnumValue(ident)),
80 };
81 Ok(value)
82 }
83
84 fn read_u64(&mut self) -> ParseResult<u64> {
85 self.read_colon("u64")?;
86
87 Ok(self.tokenizer.next_int_lit()?)
88 }
89
90 fn read_u32(&mut self) -> ParseResult<u32> {
91 self.read_colon("int value")?;
92
93 let int_lit = self.tokenizer.next_int_lit()?;
94 let value_u32 = int_lit as u32;
95 if value_u32 as u64 != int_lit {
96 return Err(ParseErrorWithoutLoc::IntegerOverflow);
97 }
98 Ok(value_u32)
99 }
100
101 fn read_i64(&mut self) -> ParseResult<i64> {
102 self.read_colon("int value")?;
103
104 if self.tokenizer.next_symbol_if_eq('-')? {
105 let int_lit = self.tokenizer.next_int_lit()?;
106 Ok(int::neg(int_lit)?)
107 } else {
108 let int_lit = self.tokenizer.next_int_lit()?;
109 if int_lit > i64::MAX as u64 {
110 return Err(ParseErrorWithoutLoc::IntegerOverflow);
111 }
112 Ok(int_lit as i64)
113 }
114 }
115
116 fn read_i32(&mut self) -> ParseResult<i32> {
117 let value = self.read_i64()?;
118 if value < i32::min_value() as i64 || value > i32::max_value() as i64 {
119 return Err(ParseErrorWithoutLoc::IntegerOverflow);
120 }
121 Ok(value as i32)
122 }
123
124 fn read_f64(&mut self) -> ParseResult<f64> {
125 self.read_colon("float value")?;
126
127 let minus = self.tokenizer.next_symbol_if_eq('-')?;
128
129 let value = if let Ok(value) = self.tokenizer.next_int_lit() {
130 value as f64
131 } else {
132 self.tokenizer.next_float_lit()?
133 };
134
135 Ok(if minus { -value } else { value })
136 }
137
138 fn read_f32(&mut self) -> ParseResult<f32> {
139 Ok(self.read_f64()? as f32)
140 }
141
142 fn read_bool(&mut self) -> ParseResult<bool> {
143 self.read_colon("bool value")?;
144
145 if self.tokenizer.next_ident_if_eq("true")? {
146 Ok(true)
147 } else if self.tokenizer.next_ident_if_eq("false")? {
148 Ok(false)
149 } else {
150 Err(ParseErrorWithoutLoc::ExpectingBool)
151 }
152 }
153
154 fn read_string(&mut self) -> ParseResult<String> {
155 self.read_colon("string value")?;
156
157 Ok(self
158 .tokenizer
159 .next_str_lit()
160 .and_then(|s| s.decode_utf8().map_err(From::from))?)
161 }
162
163 fn read_bytes(&mut self) -> ParseResult<Vec<u8>> {
164 self.read_colon("bytes value")?;
165
166 Ok(self
167 .tokenizer
168 .next_str_lit()
169 .and_then(|s| s.decode_bytes().map_err(From::from))?)
170 }
171
172 fn read_message(&mut self, descriptor: &MessageDescriptor) -> ParseResult<Box<dyn MessageDyn>> {
173 let mut message = descriptor.new_instance();
174
175 let symbol = self.tokenizer.next_symbol_expect_eq_oneof(&['{', '<'])?;
176 let terminator = if symbol == '{' { '}' } else { '>' };
177 while !self.tokenizer.lookahead_is_symbol(terminator)? {
178 self.merge_field(&mut *message, descriptor)?;
179 }
180 self.tokenizer
181 .next_symbol_expect_eq(terminator, "message")?;
182 Ok(message)
183 }
184
185 fn read_map_entry(
186 &mut self,
187 k: &RuntimeType,
188 v: &RuntimeType,
189 ) -> ParseResult<(ReflectValueBox, ReflectValueBox)> {
190 let key_field_name: &str = "key";
191 let value_field_name: &str = "value";
192
193 let mut key = None;
194 let mut value = None;
195 self.tokenizer.next_symbol_expect_eq('{', "map entry")?;
196 while !self.tokenizer.lookahead_is_symbol('}')? {
197 let ident = self.next_field_name()?;
198 let (field, field_type) = if ident == key_field_name {
199 (&mut key, k)
200 } else if ident == value_field_name {
201 (&mut value, v)
202 } else {
203 return Err(ParseErrorWithoutLoc::UnknownField(ident));
204 };
205
206 if let Some(..) = *field {
207 return Err(ParseErrorWithoutLoc::MapFieldIsSpecifiedMoreThanOnce(ident));
208 }
209
210 let field_value = self.read_value_of_type(field_type)?;
211
212 *field = Some(field_value);
213 }
214 self.tokenizer.next_symbol_expect_eq('}', "map entry")?;
215 let key = match key {
216 Some(key) => key,
217 None => k.default_value_ref().to_box(),
218 };
219 let value = match value {
220 Some(value) => value,
221 None => v.default_value_ref().to_box(),
222 };
223 Ok((key, value))
224 }
225
226 fn read_value_of_type(&mut self, t: &RuntimeType) -> ParseResult<ReflectValueBox> {
227 Ok(match t {
228 RuntimeType::Enum(d) => {
229 let value = self.read_enum(&d)?.value();
230 ReflectValueBox::Enum(d.clone(), value)
231 }
232 RuntimeType::U32 => ReflectValueBox::U32(self.read_u32()?),
233 RuntimeType::U64 => ReflectValueBox::U64(self.read_u64()?),
234 RuntimeType::I32 => ReflectValueBox::I32(self.read_i32()?),
235 RuntimeType::I64 => ReflectValueBox::I64(self.read_i64()?),
236 RuntimeType::F32 => ReflectValueBox::F32(self.read_f32()?),
237 RuntimeType::F64 => ReflectValueBox::F64(self.read_f64()?),
238 RuntimeType::Bool => ReflectValueBox::Bool(self.read_bool()?),
239 RuntimeType::String => ReflectValueBox::String(self.read_string()?),
240 RuntimeType::VecU8 => ReflectValueBox::Bytes(self.read_bytes()?),
241 RuntimeType::Message(m) => ReflectValueBox::Message(self.read_message(&m)?),
242 })
243 }
244
245 fn merge_field(
246 &mut self,
247 message: &mut dyn MessageDyn,
248 descriptor: &MessageDescriptor,
249 ) -> ParseResult<()> {
250 let field_name = self.next_field_name()?;
251
252 let field = match descriptor.field_by_name(&field_name) {
253 Some(field) => field,
254 None => {
255 return Err(ParseErrorWithoutLoc::UnknownField(field_name));
257 }
258 };
259
260 match field.runtime_field_type() {
261 RuntimeFieldType::Singular(t) => {
262 let value = self.read_value_of_type(&t)?;
263 field.set_singular_field(message, value);
264 }
265 RuntimeFieldType::Repeated(t) => {
266 let value = self.read_value_of_type(&t)?;
267 field.mut_repeated(message).push(value);
268 }
269 RuntimeFieldType::Map(k, v) => {
270 let (k, v) = self.read_map_entry(&k, &v)?;
271 field.mut_map(message).insert(k, v);
272 }
273 };
274
275 Ok(())
276 }
277
278 fn merge_inner(&mut self, message: &mut dyn MessageDyn) -> ParseResult<()> {
279 loop {
280 if self.tokenizer.syntax_eof()? {
281 break;
282 }
283 let descriptor = message.descriptor_dyn();
284 self.merge_field(message, &descriptor)?;
285 }
286 Ok(())
287 }
288
289 fn merge(&mut self, message: &mut dyn MessageDyn) -> ParseWithLocResult<()> {
290 match self.merge_inner(message) {
291 Ok(()) => Ok(()),
292 Err(error) => Err(ParseError {
293 error,
294 loc: self.tokenizer.loc(),
295 }),
296 }
297 }
298}
299
300pub fn merge_from_str(message: &mut dyn MessageDyn, input: &str) -> ParseWithLocResult<()> {
304 let mut parser = Parser {
305 tokenizer: Tokenizer::new(input, ParserLanguage::TextFormat),
306 };
307 parser.merge(message)
308}
309
310pub fn parse_from_str<M: MessageFull>(input: &str) -> ParseWithLocResult<M> {
312 let mut m = M::new();
313 merge_from_str(&mut m, input)?;
314 if let Err(_) = m.check_initialized() {
315 return Err(ParseError {
316 error: ParseErrorWithoutLoc::MessageNotInitialized,
317 loc: Loc::start(),
318 });
319 }
320 Ok(m)
321}