-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathprotobuf.format.txt
490 lines (405 loc) · 25.4 KB
/
protobuf.format.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
┏━━━━━━━━━━━━━━┓
┃ PROTOBUF ┃
┗━━━━━━━━━━━━━━┛
ALTERNATIVES ==> #See serialization formats summary
VERSION ==> #21.9
#Serialization format:
# - optimized for low size
# - separate text definition and binary serialization
# - fairly schemaful
#Generates code stub to parse/serialize data.
FILE EXTENSION ==> #.proto
MIME TYPE ==> #application/protobuf
#As extension: +proto
┌───────────┐
│ TYPES │
└───────────┘
TYPE #Data type
VAL #VARR|BOOL|INT|NUM|STR
VAR #Must use [a-zA-Z0-9_] and start with [a-zA-Z]
VARR #Dot-delimited VAR
CASE ==> #Convention:
# - CamelCase for aggregates|types (service|rpc|message|enum type)
# - snake_case for fields (normal|oneof|map|option), package, files, builtin types
# - SNAKE_CASE for enum values
BOOL #true|false
bool #TYPE
INT #Can be:
# - N... (decimal)
# - 0N... (octal)
# - 0xN... 0X... (hex)
#Can be prefixed with +|-
[s|u]int32|64 #TYPE. 32|64 bits, signed|unsigned
#sint* is like int*, except how is is encoded (see below for difference)
ID #INT32 used as identifier in the binary format
#Max 2**29. Cannot use 19000-19999
NUM #Can be:
# - [N...][.N...][e|E[+|-]N...]
# - inf
# - nan
#Can be prefixed with +|-
float|double #TYPE. Floating point NUM 32|64 bits
[s]fixed32|64 #TYPE. Fixed point NUM 32|64 bits, signed|both
STR #"..." or '...' (same behavior, but " preferred)
#Can use:
# - any Unicode characters as is, except \0 \n \ and (depending on quote) ' "
# - \a \b \f \n \r \t \v \\ \' \"
# - \NNN (codepoint as octal)
# - \xNN or \XNN (codepoint as hex)
string #TYPE for human STR
bytes #TYPE for machine STR
TRANSTYPING ==> #The following TYPEs are automatically transtyped:
# - when server|client are using different *.proto
# - i.e. allows backward compatibility
#List of transtyping groups:
# - [u]int32|64|bool
# - sint32|64
# - string|bytes, providing valid UTF-8
# - [s]fixed32
# - [s]fixed64
┌────────────────┐
│ STATEMENTS │
└────────────────┘
STATEMENT; #Always ; at end, for each line except ones ending with {}
; #Empty statement. Can be placed mostly anywhere.
// COMMENT
/* COMMENT */ #
syntax = STR #Declare protobuf version
#Must be "proto2|proto3"
# - only proto3 documented here
# - proto2 can be imported from proto3, but not other way around
#Required. Must be first statement.
package "VARR" #When imported, this file's TYPEs must be namespaced by VARR
#Implementation is language-dependent.
#VARR can be overridden for specific language using top-level OPTs:
# - go_package STR (e.g. 'google.golang.org/protobuf/types/known/anypb')
# - java_package STR (e.g. 'com.google.protobuf')
# - csharp_namespace STR (e.g. 'Google.Protobuf.WellKnownTypes')
#By convention:
# - before imports
# - if VARR instead of VAR, each part should be same as directory name
import [weak|public] "PATH.proto" #Include another *.proto file
#Can use TYPEs of PATH.proto, but not ones imported by it
# - unless "public" (only for one depth level)
#If "weak", does not fail if "PATH.proto" does not exist
service VAR { ... } #A client|server.
rpc VAR MTYPE returns MTYPE2 #An endpoint. Child of service
[{ ... }] #MTYPE is ([stream] MESSAGE_TYPE)
message VAR { ... } #Like an OBJ. Can be nested.
#VAR is the TYPE
#If a TYPE is declared inside message and used outside of it:
# - must namespace it: VAR.TYPE
# - for TYPEs of: nested message, enum
#Unknown fields allowed, but might be skipped during serialization|parsing
option OPT_VARR = VAL #Top-level or child of service|rpc|message|enum|oneof
#Declare options for a specific file|service|rpc|message|enum|oneof
#It is possible to define custom options, but hard.
#Core OPTs documented here.
#OPT_VARR can be (OPT_VARR)
OPTS #[OPT_VARR=VAL],... (uses [] brackets)
#Same but for fields|map|enum values
enum VAR { ... } #Enum type. Top-level or child of message
#VAR is the TYPE
#By convention: prefer top-level, with a prefix.
VAR = ID [OPTS] #Enum field declaration. Child of enum.
#There must be at least one enum field with ID 0
# - used as default value
# - by convention named *_UNSPECIFIED
#Two fields cannot have same ID unless ENUM_OPT.allow_alias true
oneof VAR { ... } #Child of message
#Declares that at most one of several fields is declared at once.
#Allows serializing only one of them, i.e. more memory efficient.
#Cannot use ARRs nor MAPs
#Backward compatible changes:
# - creating new oneof
# - removing oneof fields
# - but not adding oneof fields to existing oneof
[repeated] [optional] #Field declaration. Child of message|oneof.
TYPE VAR = ID [OPTS] #"repeated" is for ordered ARR
#If both "optional" and not:
# - the default value is always assigned
# - the default value is the empty value (0, false, "", [], enum value with ID 0)
#If not "optional":
# - the default value is not serialized, whether assigned implicitly or not
# - this takes less space
#If "optional":
# - the default value is serialized, whether assigned implicitly or not
# - this allows using *has|clear*() methods
#By convention, VAR plural.
FIELD_OPT.deprecated #BOOL (def: false). Document as deprecated and|or print warnings.
map<KEY_TYPE, VAL_TYPE> VAR = ID #Key/value unordered map. Child of message
[OPTS] #KEY_TYPE cannot be float|double|bytes|message|map|enum
#VAL_TYPE cannot be map nor ARR
reserved ID [to ID2|max],... #Child of message|enum
reserved VAR,... #List of previously defined ID|VAR that have been deleted.
#Forbids re-using those, to keep backward compatibility.
┌─────────────────────┐
│ GOOGLE.PROTOBUF │
└─────────────────────┘
package "google.protobuf" #MESSAGE_TYPEs and ENUM_TYPEs always available.
#Mostly for reflection, i.e. protobuf to represent protobuf itself.
#Also define:
# - null|timestamp|duration
# - service versions, mixins
import #NAME is:
"google/protobuf/NAME.proto" # - wrappers (*Value)
# - struct (NullValue|ListValue|Struct|Value)
# - empty
# - any
# - type (Field.*|Syntax|Type|Option|Enum|EnumValue)
# - source_context
# - api (Api|Method|Mixin)
# - field_mask
# - duration
# - timestamp
BoolValue|[U]Int32|64Value|
FloatValue|DoubleValue| #Protobuf builtin type
StringValue|BytesValue #Has a single field: value BOOL|[U]INT32|64|FLOAT|DOUBLE|STR|BYTES
NullValue #null (not present in protobuf). ENUM_TYPE with single value: NULL_VALUE
ListValue #Protobuf ARR
LIST_VALUE.values #VALUE_ARR
Empty #MESSAGE with no fields.
Struct #Protobuf OBJ (service|message|map)
STRUCT.fields #MAP<STR,VALUE>
Value #Protobuf VAL (i.e. any type).
#If a union of one of the following.
VALUE.bool_value #BOOL
VALUE.number_value #DOUBLE, excluding inf|nan
VALUE.string_value #STR
VALUE.null_value #NULL_VALUE
VALUE.list_value #LIST_VALUE
VALUE.struct_value #STRUCT
Any #Protobuf VAL (i.e. any type)
ANY.value #BYTES
ANY.type_url #"URL" to a Type
Field.Kind #Protobuf type. ENUM_TYPE
#Can be: TYPE_BOOL|[S|U]INT32|64|DOUBLE|FLOAT|[S]FIXED32|64|STRING|BYTES|MESSAGE|ENUM|UNKNOWN
Field.Cardinality #Protobuf type optionality|repeatedness. ENUM_TYPE
#Can be: CARDINALITY_OPTIONAL|REPEATED|UNKNOWN
Syntax #Protobuf syntax. ENUM_TYPE
#Can be: SYNTAX_PROTO2|3
SourceContext #Information about the *.proto file
SOURCE_CONTEXT.file_name #"*.proto"
Api #Protobuf service
API.name #"VAR"
API.methods #METHOD_ARR
API.options #OPTION_ARR
API.source_context #SOURCE_CONTEXT
API.syntax #SYNTAX
API.version #Semver "MAJOR[.MINOR]"
API.mixins #MIXIN_ARR
Mixin #Protobuf service within another service
MIXIN.name #"VAR" of the other service
MIXIN.root #"PATH" where service is embedded
Method #Protobuf rpc
METHOD.request|response_streaming #BOOL
METHOD.request|response_type_url #"URL" to a Type
METHOD.options #OPTION_ARR
METHOD.syntax #SYNTAX
Type #Protobuf message
TYPE.name #"MESSAGE_TYPE"
TYPE.fields #FIELD_ARR
TYPE.options #OPTION_ARR
TYPE.oneofs #"VAR"_ARR of fields in oneofs
TYPE.source_context #SOURCE_CONTEXT
TYPE.syntax #SYNTAX
Field #Protobuf field
FIELD.name #"VAR"
FIELD.json_name #OPT.json_name
FIELD.number #ID
FIELD.kind #KIND representing the type
FIELD.type_url #"URL" to a Type
FIELD.cardinality #CARDINALITY
FIELD.oneof_index #INT32. 0 if in oneof. Otherwise 1-based index.
FIELD.packed #BOOL.
FIELD.options #OPTION_ARR
FieldMask #Protobuf field "VARR" path.
#ARR fields can only be specified as last VAR.
#Usually used for selection.
FIELD_MASK.paths #"VARR"_ARR
Option #Protobuf options
OPTION.name #"OPT_VARR"
OPTION.value #ANY
Enum #Protobuf enum type
ENUM.name #"VAR"
ENUM.enumvalue #ENUMVALUE_ARR
ENUM.options #OPTION_ARR
ENUM.source_context #SOURCE_CONTEXT
ENUM.syntax #SYNTAX
EnumValue #Protobuf enum value
ENUMVALUE.name #"VAR"
ENUMVALUE.number #ID
ENUMVALUE.options #OPTION_ARR
Timestamp #
TIMESTAMP.seconds #INT64. Unix time.
TIMESTAMP.nanos #INT32. Min 0, max 1e9
Duration #
DURATION.seconds #INT64. Min|max +|-1e4 years
DURATION.nanos #INT32. Min|max +|-1e9
┌───────────────────┐
│ BINARY FORMAT │
└───────────────────┘
BINARY FORMAT ==> #Created by protoc or by generated code (*Binary())
#Memory efficient:
# - only keep IDs, not VARs nor TYPEs
# - i.e. need FILE.proto to parse
# - use varint (see its doc)
# - default values are not serialized (since undefined values use default values)
# - sint* is like int*, but takes less memory for positive|negative values close to 0 (and vice-versa)
#Messages are unordered:
# - serializing the same message might give different output
┌──────────┐
│ JSON │
└──────────┘
JSON SERIALIZATION ==> # - bool -> BOOL
# - [u]int|fixed32 -> NUM
# - [u]int|fixed64 -> "NUM"
# - float|double -> NUM|"NaN"|"[-]Infinity"
# - string -> STR
# - bytes -> standard base64 STR with padding
# - message -> OBJ
# - key is lowerCamelCase'd
# - key can be set using OPT.json_name
# - repeated -> ARR
# - enum -> "VAR"
# - map -> OBJ
# - TYPE default value -> undefined
# - *Value -> same as underlying type
# - NullValue -> null
# - EMPTY -> OBJ
# - STRUCT -> OBJ
# - ANY -> OBJ, with @type instead of type_url
# - FIELD_MASK -> "VARR,..."
# - TIMESTAMP -> "YYYY-MM-DDTHH:MM:SS[.SSS[SSS[SSS]]]Z"
# - DURATION -> "N...[.N...]s"
JSON PARSING ==> # - BOOL -> bool
# - NUM|"NUM" -> [u]int|fixed32|64
# - NUM|"NUM[e...]"|"NaN"|"[-]Infinity" -> float|double
# - STR -> string
# - standard|url-safe base64 STR with|without padding -> bytes
# - undefined|null -> TYPE default value
# - OBJ -> message
# - key is either lowerCamelCase, protobuf name or OPT.json_name
# - "VAR"|ID_NUM -> enum
# - ARR -> repeated
# - [] or null -> empty repeated
# - OBJ -> map
# - same as underlying type -> *Value
# - null -> NullValue
# - OBJ -> EMPTY
# - OBJ -> STRUCT
# - OBJ, with @type instead of type_url -> ANY
# - "VARR,..." -> FIELD_MASK
# - "YYYY-MM-DDTHH:MM:SS[.SSS[SSS[SSS]]][TZ]" -> TIMESTAMP
# - "N...[.N...]s" -> DURATION
┌──────────────┐
│ COMPILER │
└──────────────┘
protoc FILE.proto... #Convert FILE.proto to generated code.
#The generated code usually needs some runtime library.
#Does not convert SERVICE|RPC.
@CONFIG_PATH #Content is CLI flags list
--js|objc|php|python|ruby|cpp|
csharp|java_out
[LANG_OPT[=STR],...]OUT_DIR #Where to output generated code, and for which language
--decode MESSAGE_TYPE #Take message in binary format in stdin and outputs it in debug text format to stdout
#Not compatible with --*_out
--encode MESSAGE_TYPE #Inverse
--decode_raw #Same as --decode but automatically guesses MESSAGE_TYPE
--descriptor_set_out FILE #Encode FILE.proto to binary ("file descriptor set")
--descriptor_set_in FILE,... #Inverse
--include_imports #With --descriptor_set_out: include imported definitions
--include_source_info #With --descriptor_set_out: include SourceContext
--dependency_out FILE #Write list of dependencies in a "make" format
-I|--proto_path DIR #To resolve "import" statements.
#Def: "."
#Should be set to repository root.
#Can be specified several times.
--deterministic_output #Make MAP fields order serialization determinisitc
--fatal_warnings #Non-0 exit code on warnings
--error_format STR #Error format among 'gcc' (def) or 'msvs' (Visual studio)
--print_free_field_numbers #Print ID_NUMs
--plugin [NAME=]PATH #Executable plugin. By def, search in PATH
┌──────────┐
│ NODE │
└──────────┘
google-protobuf #Runtime npm package.
#Required by the generated code.
#See protobufjs for alternative
LANG_OPT.import_style=STR #Can be:
# - 'closure' (def)
# - 'commonjs_strict':
# - filename is "FILE_pb.js"
# - --proto_path is OUT_DIR
# - 'browser': global object
# - 'commonjs': same as commonjs_strict + browser
# - 'es6': ES modules
LANG_OPT.error_on_name_conflict #BOOL (def: false). Fail if two TYPEs with same name
LANG_OPT.binary #BOOL (def: true). Enables *Binary()
EXPORT ==> #Output file exports top-level message|enum, namespaced by package
CASE ==> # - kept as is: package, message, enum
# - converted to UpperCase: fields, oneof, map
# - converted to UPPERCASE: enum fields, oneof cases
# - converted to lowercase: toObject()
VARR|PACKAGE #Just plain OBJ
new Message() #"Message" is the Message name
MESSAGE.toObject()->OBJ #
MESSAGE.cloneMessage()->MESSAGE2 #
MESSAGE.getFIELD()->VAL #
MESSAGE.setFIELD(VAL)->MESSAGE #
MESSAGE.hasFIELD(VAL)->BOOL #Only if "optional"
MESSAGE.clearFIELD(VAL)->MESSAGE #Only if "optional"
ARR_FIELD #Uses the following methods instead.
MESSAGE.getFIELDList()->ARR
MESSAGE.setFIELDList(ARR)->MESSAGE
MESSAGE.clearFIELDList(ARR)
->MESSAGE
MESSAGE.addFIELD(VAL[,INDEX])
->MESSAGE #Def INDEX: last
NESTED_MESSAGE_FIELD #undefined if not set
#Also have the following additional methods.
MESSAGE.clearFIELD()->MESSAGE
MESSAGE.hasFIELD()->BOOL #Only if nested message field
BYTES_FIELD #Use BUFFER|BASE64_STR
#Converted to BASE64_STR in toObject()
#Also have the following additional methods.
MESSAGE.getFIELD_asB64()
->'BASE64_STR'
MESSAGE.getFIELD_asU8()->UINT8_ARR#
MAP_FIELD #Uses the following methods instead.
MESSAGE.getFIELDMap()->MAP #
MESSAGE.clearFIELDMap()->MESSAGE #
MESSAGE.ONEOFCase #OBJ:
# - value is ID_INT
# - key is either:
# - field name
# - {ONEOF}_NOT_SET (value 0)
MESSAGE.getONEOFCase()->ID_INT #
ENUM #Just plain OBJ
MESSAGE.serializeBinary
()->UINT8_ARR
Message.deserializeBinary
(UINT8_ARR)->MESSAGE #Protobuf binary format
new BinaryWriter()
MESSAGE.serializeBinaryToWriter
(MESSAGE, BINARY_WRITER)
BINARY_WRITER.getResultBuffer()
->UINT8_ARR
new BinaryReader()
Message.
deserializeBinaryFromReader
(MESSAGE, BINARY_READER)->MESSAGE#Same but more advanced
┌────────────────────────────┐
│ LANGUAGE-SPECIFIC OPTS │
└────────────────────────────┘
LANGUAGE-SPECIFIC OPTS ==> #Top-level OPTs for specific languages.
OPT.java_outer_classname #"Class". For outermost Java class name.
#Def: filename, CamelCased
OPT.java_multiple_files #BOOL (def: false). For Java. Split generated code into several files.
OPT.cc_enable_arenas #BOOL (def: false). For C++. Memory optimization.
OPT.optimize_for #For C++|Java. How to optimize generated code. Can be:
# - "SPEED" (def)
# - "LITE_RUNTIME": like SPEED but uses libprotobuf-lite instead of libprotobuf, which
# has fewer features (no reflection nor descriptors)
# - "CODE_SIZE"
OPT.objc_class_prefix #STR. Objective-C class prefix.