protobuf_parse/pure/
model.rs

1//! A nom-based protobuf file parser
2//!
3//! This crate can be seen as a rust transcription of the
4//! [descriptor.proto](https://github.com/google/protobuf/blob/master/src/google/protobuf/descriptor.proto) file
5
6use std::fmt;
7use std::fmt::Write;
8use std::ops::Deref;
9use std::ops::RangeInclusive;
10
11use indexmap::IndexMap;
12use protobuf::reflect::ReflectValueBox;
13use protobuf::reflect::RuntimeType;
14use protobuf_support::lexer::float::format_protobuf_float;
15use protobuf_support::lexer::loc::Loc;
16use protobuf_support::lexer::str_lit::StrLit;
17
18use crate::model;
19use crate::proto_path::ProtoPathBuf;
20use crate::protobuf_abs_path::ProtobufAbsPath;
21use crate::protobuf_ident::ProtobufIdent;
22use crate::protobuf_path::ProtobufPath;
23use crate::pure::parser::Parser;
24pub use crate::pure::parser::ParserErrorWithLocation;
25
26#[derive(thiserror::Error, Debug)]
27enum ModelError {
28    #[error("cannot convert value `{1}` to type `{0}`")]
29    InconvertibleValue(RuntimeType, model::ProtobufConstant),
30}
31
32#[derive(Debug, Clone, PartialEq)]
33pub(crate) struct WithLoc<T> {
34    pub loc: Loc,
35    pub t: T,
36}
37
38impl<T> Deref for WithLoc<T> {
39    type Target = T;
40
41    fn deref(&self) -> &Self::Target {
42        &self.t
43    }
44}
45
46impl<T> WithLoc<T> {
47    pub fn with_loc(loc: Loc) -> impl FnOnce(T) -> WithLoc<T> {
48        move |t| WithLoc {
49            t,
50            loc: loc.clone(),
51        }
52    }
53}
54
55/// Protobuf syntax.
56#[derive(Debug, Clone, Copy, Eq, PartialEq)]
57pub(crate) enum Syntax {
58    /// Protobuf syntax [2](https://developers.google.com/protocol-buffers/docs/proto) (default)
59    Proto2,
60    /// Protobuf syntax [3](https://developers.google.com/protocol-buffers/docs/proto3)
61    Proto3,
62}
63
64impl Default for Syntax {
65    fn default() -> Syntax {
66        Syntax::Proto2
67    }
68}
69
70/// A field rule
71#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
72pub(crate) enum Rule {
73    /// A well-formed message can have zero or one of this field (but not more than one).
74    Optional,
75    /// This field can be repeated any number of times (including zero) in a well-formed message.
76    /// The order of the repeated values will be preserved.
77    Repeated,
78    /// A well-formed message must have exactly one of this field.
79    Required,
80}
81
82impl Rule {
83    pub(crate) const ALL: [Rule; 3] = [Rule::Optional, Rule::Repeated, Rule::Required];
84
85    pub(crate) const fn as_str(&self) -> &'static str {
86        match self {
87            Rule::Optional => "optional",
88            Rule::Repeated => "repeated",
89            Rule::Required => "required",
90        }
91    }
92}
93
94/// Protobuf group
95#[derive(Debug, Clone, PartialEq)]
96pub(crate) struct Group {
97    /// Group name
98    pub name: String,
99    pub fields: Vec<WithLoc<Field>>,
100}
101
102/// Protobuf supported field types
103#[derive(Debug, Clone, PartialEq)]
104pub(crate) enum FieldType {
105    /// Protobuf int32
106    ///
107    /// # Remarks
108    ///
109    /// Uses variable-length encoding. Inefficient for encoding negative numbers – if
110    /// your field is likely to have negative values, use sint32 instead.
111    Int32,
112    /// Protobuf int64
113    ///
114    /// # Remarks
115    ///
116    /// Uses variable-length encoding. Inefficient for encoding negative numbers – if
117    /// your field is likely to have negative values, use sint64 instead.
118    Int64,
119    /// Protobuf uint32
120    ///
121    /// # Remarks
122    ///
123    /// Uses variable-length encoding.
124    Uint32,
125    /// Protobuf uint64
126    ///
127    /// # Remarks
128    ///
129    /// Uses variable-length encoding.
130    Uint64,
131    /// Protobuf sint32
132    ///
133    /// # Remarks
134    ///
135    /// Uses ZigZag variable-length encoding. Signed int value. These more efficiently
136    /// encode negative numbers than regular int32s.
137    Sint32,
138    /// Protobuf sint64
139    ///
140    /// # Remarks
141    ///
142    /// Uses ZigZag variable-length encoding. Signed int value. These more efficiently
143    /// encode negative numbers than regular int32s.
144    Sint64,
145    /// Protobuf bool
146    Bool,
147    /// Protobuf fixed64
148    ///
149    /// # Remarks
150    ///
151    /// Always eight bytes. More efficient than uint64 if values are often greater than 2^56.
152    Fixed64,
153    /// Protobuf sfixed64
154    ///
155    /// # Remarks
156    ///
157    /// Always eight bytes.
158    Sfixed64,
159    /// Protobuf double
160    Double,
161    /// Protobuf string
162    ///
163    /// # Remarks
164    ///
165    /// A string must always contain UTF-8 encoded or 7-bit ASCII text.
166    String,
167    /// Protobuf bytes
168    ///
169    /// # Remarks
170    ///
171    /// May contain any arbitrary sequence of bytes.
172    Bytes,
173    /// Protobut fixed32
174    ///
175    /// # Remarks
176    ///
177    /// Always four bytes. More efficient than uint32 if values are often greater than 2^28.
178    Fixed32,
179    /// Protobut sfixed32
180    ///
181    /// # Remarks
182    ///
183    /// Always four bytes.
184    Sfixed32,
185    /// Protobut float
186    Float,
187    /// Protobuf message or enum (holds the name)
188    MessageOrEnum(ProtobufPath),
189    /// Protobut map
190    Map(Box<(FieldType, FieldType)>),
191    /// Protobuf group (deprecated)
192    Group(Group),
193}
194
195/// A Protobuf Field
196#[derive(Debug, Clone, PartialEq)]
197pub(crate) struct Field {
198    /// Field name
199    pub name: String,
200    /// Field `Rule`
201    pub rule: Option<Rule>,
202    /// Field type
203    pub typ: FieldType,
204    /// Tag number
205    pub number: i32,
206    /// Non-builtin options
207    pub options: Vec<ProtobufOption>,
208}
209
210/// A Protobuf field of oneof group
211#[derive(Debug, Clone, PartialEq)]
212pub(crate) enum FieldOrOneOf {
213    Field(WithLoc<Field>),
214    OneOf(OneOf),
215}
216
217/// A protobuf message
218#[derive(Debug, Clone, Default)]
219pub(crate) struct Message {
220    /// Message name
221    pub name: String,
222    /// Message fields and oneofs
223    pub fields: Vec<WithLoc<FieldOrOneOf>>,
224    /// Message reserved numbers
225    pub reserved_nums: Vec<RangeInclusive<i32>>,
226    /// Message reserved names
227    pub reserved_names: Vec<String>,
228    /// Nested messages
229    pub messages: Vec<WithLoc<Message>>,
230    /// Nested enums
231    pub enums: Vec<WithLoc<Enumeration>>,
232    /// Non-builtin options
233    pub options: Vec<ProtobufOption>,
234    /// Extension field numbers
235    pub extension_ranges: Vec<RangeInclusive<i32>>,
236    /// Extensions
237    pub extensions: Vec<WithLoc<Extension>>,
238}
239
240impl Message {
241    pub fn regular_fields_including_in_oneofs(&self) -> Vec<&WithLoc<Field>> {
242        self.fields
243            .iter()
244            .flat_map(|fo| match &fo.t {
245                FieldOrOneOf::Field(f) => vec![f],
246                FieldOrOneOf::OneOf(o) => o.fields.iter().collect(),
247            })
248            .collect()
249    }
250
251    /** Find a field by name. */
252    pub fn field_by_name(&self, name: &str) -> Option<&Field> {
253        self.regular_fields_including_in_oneofs()
254            .iter()
255            .find(|f| f.t.name == name)
256            .map(|f| &f.t)
257    }
258
259    pub fn _nested_extensions(&self) -> Vec<&Group> {
260        self.regular_fields_including_in_oneofs()
261            .into_iter()
262            .flat_map(|f| match &f.t.typ {
263                FieldType::Group(g) => Some(g),
264                _ => None,
265            })
266            .collect()
267    }
268
269    #[cfg(test)]
270    pub fn regular_fields_for_test(&self) -> Vec<&Field> {
271        self.fields
272            .iter()
273            .flat_map(|fo| match &fo.t {
274                FieldOrOneOf::Field(f) => Some(&f.t),
275                FieldOrOneOf::OneOf(_) => None,
276            })
277            .collect()
278    }
279
280    pub(crate) fn oneofs(&self) -> Vec<&OneOf> {
281        self.fields
282            .iter()
283            .flat_map(|fo| match &fo.t {
284                FieldOrOneOf::Field(_) => None,
285                FieldOrOneOf::OneOf(o) => Some(o),
286            })
287            .collect()
288    }
289}
290
291/// A protobuf enumeration field
292#[derive(Debug, Clone)]
293pub(crate) struct EnumValue {
294    /// enum value name
295    pub name: String,
296    /// enum value number
297    pub number: i32,
298    /// enum value options
299    pub options: Vec<ProtobufOption>,
300}
301
302/// A protobuf enumerator
303#[derive(Debug, Clone)]
304pub(crate) struct Enumeration {
305    /// enum name
306    pub name: String,
307    /// enum values
308    pub values: Vec<EnumValue>,
309    /// enum options
310    pub options: Vec<ProtobufOption>,
311    /// enum reserved numbers
312    pub reserved_nums: Vec<RangeInclusive<i32>>,
313    /// enum reserved names
314    pub reserved_names: Vec<String>,
315}
316
317/// A OneOf
318#[derive(Debug, Clone, Default, PartialEq)]
319pub(crate) struct OneOf {
320    /// OneOf name
321    pub name: String,
322    /// OneOf fields
323    pub fields: Vec<WithLoc<Field>>,
324    /// oneof options
325    pub options: Vec<ProtobufOption>,
326}
327
328#[derive(Debug, Clone)]
329pub(crate) struct Extension {
330    /// Extend this type with field
331    pub extendee: ProtobufPath,
332    /// Extension field
333    pub field: WithLoc<Field>,
334}
335
336/// Service method
337#[derive(Debug, Clone)]
338pub(crate) struct Method {
339    /// Method name
340    pub name: String,
341    /// Input type
342    pub input_type: ProtobufPath,
343    /// Output type
344    pub output_type: ProtobufPath,
345    /// If this method is client streaming
346    #[allow(dead_code)] // TODO
347    pub client_streaming: bool,
348    /// If this method is server streaming
349    #[allow(dead_code)] // TODO
350    pub server_streaming: bool,
351    /// Method options
352    pub options: Vec<ProtobufOption>,
353}
354
355/// Service definition
356#[derive(Debug, Clone)]
357pub(crate) struct Service {
358    /// Service name
359    pub name: String,
360    pub methods: Vec<Method>,
361    pub options: Vec<ProtobufOption>,
362}
363
364#[derive(Debug, Clone, PartialEq, Eq, Hash)]
365pub(crate) struct AnyTypeUrl {
366    pub(crate) prefix: String,
367    pub(crate) full_type_name: ProtobufPath,
368}
369
370impl fmt::Display for AnyTypeUrl {
371    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
372        write!(f, "{}/{}", self.prefix, self.full_type_name)
373    }
374}
375
376#[derive(Debug, Clone, PartialEq, Eq, Hash)]
377pub(crate) enum ProtobufConstantMessageFieldName {
378    Regular(String),
379    Extension(ProtobufPath),
380    AnyTypeUrl(AnyTypeUrl),
381}
382
383impl fmt::Display for ProtobufConstantMessageFieldName {
384    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
385        match self {
386            ProtobufConstantMessageFieldName::Regular(s) => write!(f, "{}", s),
387            ProtobufConstantMessageFieldName::Extension(p) => write!(f, "[{}]", p),
388            ProtobufConstantMessageFieldName::AnyTypeUrl(a) => write!(f, "[{}]", a),
389        }
390    }
391}
392
393#[derive(Debug, Clone, PartialEq, Default)]
394pub(crate) struct ProtobufConstantMessage {
395    pub(crate) fields: IndexMap<ProtobufConstantMessageFieldName, ProtobufConstant>,
396}
397
398/// constant = fullIdent |
399///            ( [ "-" | "+" ] intLit ) |
400///            ( [ "-" | "+" ] floatLit ) |
401///            strLit |
402///            boolLit |
403///            messageValue
404///
405/// https://protobuf.dev/reference/protobuf/proto2-spec/#constant
406/// https://protobuf.dev/reference/protobuf/proto3-spec/#constant
407/// https://protobuf.dev/reference/protobuf/textformat-spec/#fields
408#[derive(Debug, Clone, PartialEq)]
409pub(crate) enum ProtobufConstant {
410    U64(u64),
411    I64(i64),
412    F64(f64), // TODO: eq
413    Bool(bool),
414    Ident(ProtobufPath),
415    String(StrLit),
416    Message(ProtobufConstantMessage),
417    Repeated(Vec<ProtobufConstant>),
418}
419
420impl fmt::Display for ProtobufConstant {
421    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
422        match self {
423            ProtobufConstant::U64(v) => write!(f, "{}", v),
424            ProtobufConstant::I64(v) => write!(f, "{}", v),
425            ProtobufConstant::F64(v) => write!(f, "{}", format_protobuf_float(*v)),
426            ProtobufConstant::Bool(v) => write!(f, "{}", v),
427            ProtobufConstant::Ident(v) => write!(f, "{}", v),
428            ProtobufConstant::String(v) => write!(f, "{}", v),
429            // TODO: text format explicitly
430            ProtobufConstant::Message(v) => write!(f, "{:?}", v),
431            ProtobufConstant::Repeated(v) => write!(f, "{:?}", v),
432        }
433    }
434}
435
436impl ProtobufConstantMessage {
437    pub fn format(&self) -> String {
438        let mut s = String::new();
439        write!(s, "{{ ").unwrap();
440        for (n, v) in &self.fields {
441            match v {
442                ProtobufConstant::Message(m) => write!(s, "{} {}", n, m.format()).unwrap(),
443                v => write!(s, "{}: {} ", n, v.format()).unwrap(),
444            }
445        }
446        write!(s, "}}").unwrap();
447        s
448    }
449}
450
451impl ProtobufConstant {
452    pub fn format(&self) -> String {
453        match *self {
454            ProtobufConstant::U64(u) => u.to_string(),
455            ProtobufConstant::I64(i) => i.to_string(),
456            ProtobufConstant::F64(f) => format_protobuf_float(f),
457            ProtobufConstant::Bool(b) => b.to_string(),
458            ProtobufConstant::Ident(ref i) => format!("{}", i),
459            ProtobufConstant::String(ref s) => s.quoted(),
460            ProtobufConstant::Message(ref s) => s.format(),
461            ProtobufConstant::Repeated(ref l) => {
462                let mut s = String::from("[");
463                let mut it = l.iter().peekable();
464                while let Some(constant) = it.next() {
465                    s.push_str(&constant.format());
466                    if it.peek().is_some() {
467                        s.push(',');
468                    }
469                }
470                s.push(']');
471                s
472            }
473        }
474    }
475
476    /** Interpret .proto constant as an reflection value. */
477    pub fn as_type(&self, ty: RuntimeType) -> anyhow::Result<ReflectValueBox> {
478        match (self, &ty) {
479            (ProtobufConstant::Ident(ident), RuntimeType::Enum(e)) => {
480                if let Some(v) = e.value_by_name(&ident.to_string()) {
481                    return Ok(ReflectValueBox::Enum(e.clone(), v.value()));
482                }
483            }
484            (ProtobufConstant::Bool(b), RuntimeType::Bool) => return Ok(ReflectValueBox::Bool(*b)),
485            (ProtobufConstant::String(lit), RuntimeType::String) => {
486                return Ok(ReflectValueBox::String(lit.decode_utf8()?))
487            }
488            _ => {}
489        }
490        Err(ModelError::InconvertibleValue(ty.clone(), self.clone()).into())
491    }
492}
493
494/// Equivalent of `UninterpretedOption.NamePart`.
495#[derive(Debug, Clone, PartialEq)]
496pub(crate) enum ProtobufOptionNamePart {
497    Direct(ProtobufIdent),
498    Ext(ProtobufPath),
499}
500
501impl fmt::Display for ProtobufOptionNamePart {
502    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
503        match self {
504            ProtobufOptionNamePart::Direct(n) => write!(f, "{}", n),
505            ProtobufOptionNamePart::Ext(n) => write!(f, "({})", n),
506        }
507    }
508}
509
510#[derive(Debug, Clone, PartialEq)]
511pub(crate) struct ProtobufOptionNameExt(pub Vec<ProtobufOptionNamePart>);
512
513#[derive(Debug, Clone, PartialEq)]
514pub(crate) enum ProtobufOptionName {
515    Builtin(ProtobufIdent),
516    Ext(ProtobufOptionNameExt),
517}
518
519impl ProtobufOptionName {
520    pub fn simple(name: &str) -> ProtobufOptionName {
521        ProtobufOptionName::Builtin(ProtobufIdent::new(name))
522    }
523}
524
525impl fmt::Display for ProtobufOptionNameExt {
526    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
527        for (index, comp) in self.0.iter().enumerate() {
528            if index != 0 {
529                write!(f, ".")?;
530            }
531            write!(f, "{}", comp)?;
532        }
533        Ok(())
534    }
535}
536
537impl fmt::Display for ProtobufOptionName {
538    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
539        match self {
540            ProtobufOptionName::Builtin(n) => write!(f, "{}", n),
541            ProtobufOptionName::Ext(n) => write!(f, "{}", n),
542        }
543    }
544}
545
546#[derive(Debug, Clone, PartialEq)]
547pub(crate) struct ProtobufOption {
548    pub name: ProtobufOptionName,
549    pub value: ProtobufConstant,
550}
551
552/// Visibility of import statement
553#[derive(Debug, Clone, Eq, PartialEq)]
554pub(crate) enum ImportVis {
555    Default,
556    Public,
557    Weak,
558}
559
560impl Default for ImportVis {
561    fn default() -> Self {
562        ImportVis::Default
563    }
564}
565
566/// Import statement
567#[derive(Debug, Default, Clone)]
568pub(crate) struct Import {
569    pub path: ProtoPathBuf,
570    pub vis: ImportVis,
571}
572
573/// A File descriptor representing a whole .proto file
574#[derive(Debug, Default, Clone)]
575pub(crate) struct FileDescriptor {
576    /// Imports
577    pub imports: Vec<Import>,
578    /// Package
579    pub package: ProtobufAbsPath,
580    /// Protobuf Syntax
581    pub syntax: Syntax,
582    /// Top level messages
583    pub messages: Vec<WithLoc<Message>>,
584    /// Enums
585    pub enums: Vec<WithLoc<Enumeration>>,
586    /// Extensions
587    pub extensions: Vec<WithLoc<Extension>>,
588    /// Services
589    pub services: Vec<WithLoc<Service>>,
590    /// Non-builtin options
591    pub options: Vec<ProtobufOption>,
592}
593
594impl FileDescriptor {
595    /// Parses a .proto file content into a `FileDescriptor`
596    pub fn parse<S: AsRef<str>>(file: S) -> Result<Self, ParserErrorWithLocation> {
597        let mut parser = Parser::new(file.as_ref());
598        match parser.next_proto() {
599            Ok(r) => Ok(r),
600            Err(error) => {
601                let Loc { line, col } = parser.tokenizer.loc();
602                Err(ParserErrorWithLocation { error, line, col })
603            }
604        }
605    }
606}