protobuf-go/proto/encode.go

287 lines
8.8 KiB
Go
Raw Normal View History

// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package proto
import (
"fmt"
"sort"
"google.golang.org/protobuf/internal/encoding/wire"
"google.golang.org/protobuf/internal/errors"
"google.golang.org/protobuf/internal/mapsort"
"google.golang.org/protobuf/internal/pragma"
"google.golang.org/protobuf/reflect/protoreflect"
"google.golang.org/protobuf/runtime/protoiface"
)
// MarshalOptions configures the marshaler.
//
// Example usage:
// b, err := MarshalOptions{Deterministic: true}.Marshal(m)
type MarshalOptions struct {
// AllowPartial allows messages that have missing required fields to marshal
// without returning an error. If AllowPartial is false (the default),
// Marshal will return an error if there are any missing required fields.
AllowPartial bool
// Deterministic controls whether the same message will always be
// serialized to the same bytes within the same binary.
//
// Setting this option guarantees that repeated serialization of
// the same message will return the same bytes, and that different
// processes of the same binary (which may be executing on different
// machines) will serialize equal messages to the same bytes.
//
// Note that the deterministic serialization is NOT canonical across
// languages. It is not guaranteed to remain stable over time. It is
// unstable across different builds with schema changes due to unknown
// fields. Users who need canonical serialization (e.g., persistent
// storage in a canonical form, fingerprinting, etc.) must define
// their own canonicalization specification and implement their own
// serializer rather than relying on this API.
//
// If deterministic serialization is requested, map entries will be
// sorted by keys in lexographical order. This is an implementation
// detail and subject to change.
Deterministic bool
proto: replace CachedSize fast-path method with UseCachedSize option Using an option instead of a separate method has several useful properties: It makes it explicit whether the fast-path AppendMarshal is expected to use cached sizes or not. It properly plumbs the decision to use cached sizes through the call stack. Consider the case where message A includes B includes C: If A and C support cached sizes but B does not, we would like to use the size cache in all messages which support it. Placing this decision in the options allows this to work properly with no additional effort. Placing this option in MarshalOptions permits users to request use of existing cached sizes. This is a two-edged sword: There are places where this ability can permit substantial efficiencies, but this is also an exceedingly sharp-edged API. I believe that on balance the benefits outweigh the risks, especially since the prerequisites for using cached sizes are intuitively obvious. (You must have called Size, and you must not have changed the message.) This CL adds a Size method to MarshalOptions, rather than adding a SizeOptions type. Future additions to MarshalOptions may affect the size of the encoded output (e.g., an option to skip encoding unknown fields) and using the same options for both Marshal and Size makes it easier to use a consistent configuration for each. Change-Id: I6adbb55b717dd03d39f067e1d0b7381945000976 Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/171119 Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
2019-04-08 01:18:31 +00:00
// UseCachedSize indicates that the result of a previous Size call
// may be reused.
//
// Setting this option asserts that:
//
// 1. Size has previously been called on this message with identical
// options (except for UseCachedSize itself).
//
// 2. The message and all its submessages have not changed in any
// way since the Size call.
//
// If either of these invariants is broken, the results are undefined
// but may include panics or invalid output.
//
// Implementations MAY take this option into account to provide
// better performance, but there is no guarantee that they will do so.
// There is absolutely no guarantee that Size followed by Marshal with
// UseCachedSize set will perform equivalently to Marshal alone.
UseCachedSize bool
pragma.NoUnkeyedLiterals
}
var _ = protoiface.MarshalOptions(MarshalOptions{})
// Marshal returns the wire-format encoding of m.
func Marshal(m Message) ([]byte, error) {
return MarshalOptions{}.MarshalAppend(nil, m)
}
// Marshal returns the wire-format encoding of m.
func (o MarshalOptions) Marshal(m Message) ([]byte, error) {
return o.MarshalAppend(nil, m)
}
// MarshalAppend appends the wire-format encoding of m to b,
// returning the result.
func (o MarshalOptions) MarshalAppend(b []byte, m Message) ([]byte, error) {
internal/impl: add fast-path marshal implementation This is a port of the v1 table marshaler, with some substantial cleanup and refactoring. Benchstat results from the protobuf reference benchmark data comparing the v1 package with v2, with AllowPartial:true set for the new package. This is not an apples-to-apples comparison, since v1 doesn't have a way to disable required field checks. Required field checks in v2 package currently go through reflection, which performs terribly; my initial experimentation indicates that fast-path required field checks will not add a large amount of cost; these results are incomplete but not wholly inaccurate. name old time/op new time/op delta /dataset.google_message3_1.pb/Marshal-12 219ms ± 1% 232ms ± 1% +5.85% (p=0.004 n=6+5) /dataset.google_message2.pb/Marshal-12 261µs ± 3% 248µs ± 1% -5.14% (p=0.002 n=6+6) /dataset.google_message1_proto2.pb/Marshal-12 681ns ± 2% 637ns ± 3% -6.53% (p=0.002 n=6+6) /dataset.google_message1_proto3.pb/Marshal-12 1.10µs ± 8% 0.99µs ± 3% -9.63% (p=0.002 n=6+6) /dataset.google_message3_3.pb/Marshal-12 44.2ms ± 3% 35.2ms ± 1% -20.28% (p=0.004 n=6+5) /dataset.google_message4.pb/Marshal-12 91.4ms ± 2% 94.9ms ± 2% +3.78% (p=0.002 n=6+6) /dataset.google_message3_2.pb/Marshal-12 78.7ms ± 6% 80.8ms ± 4% ~ (p=0.310 n=6+6) /dataset.google_message3_4.pb/Marshal-12 10.6ms ± 3% 10.6ms ± 8% ~ (p=0.662 n=5+6) /dataset.google_message3_5.pb/Marshal-12 675ms ± 4% 510ms ± 2% -24.40% (p=0.002 n=6+6) /dataset.google_message3_1.pb/Marshal 219ms ± 1% 236ms ± 7% +8.06% (p=0.004 n=5+6) /dataset.google_message2.pb/Marshal 257µs ± 1% 250µs ± 3% ~ (p=0.052 n=5+6) /dataset.google_message1_proto2.pb/Marshal 685ns ± 1% 628ns ± 1% -8.41% (p=0.008 n=5+5) /dataset.google_message1_proto3.pb/Marshal 1.08µs ± 1% 0.98µs ± 2% -9.31% (p=0.004 n=5+6) /dataset.google_message3_3.pb/Marshal 43.7ms ± 1% 35.1ms ± 1% -19.76% (p=0.002 n=6+6) /dataset.google_message4.pb/Marshal 93.4ms ± 4% 94.9ms ± 2% ~ (p=0.180 n=6+6) /dataset.google_message3_2.pb/Marshal 105ms ± 2% 98ms ± 7% -6.81% (p=0.009 n=5+6) /dataset.google_message3_4.pb/Marshal 16.3ms ± 6% 15.7ms ± 3% -3.44% (p=0.041 n=6+6) /dataset.google_message3_5.pb/Marshal 676ms ± 4% 504ms ± 2% -25.50% (p=0.004 n=6+5) Change-Id: I72cc4597117f4cf5d236ef505777d49dd4a5f75d Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/171020 Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
2019-04-01 20:49:56 +00:00
// Set AllowPartial in recursive calls to marshal to avoid duplicating
// effort with the single initialization check below.
allowPartial := o.AllowPartial
o.AllowPartial = true
out, err := o.marshalMessageFast(b, m)
if err == errInternalNoFast {
out, err = o.marshalMessage(b, m.ProtoReflect())
}
var nerr errors.NonFatal
if !nerr.Merge(err) {
return out, err
}
internal/impl: add fast-path marshal implementation This is a port of the v1 table marshaler, with some substantial cleanup and refactoring. Benchstat results from the protobuf reference benchmark data comparing the v1 package with v2, with AllowPartial:true set for the new package. This is not an apples-to-apples comparison, since v1 doesn't have a way to disable required field checks. Required field checks in v2 package currently go through reflection, which performs terribly; my initial experimentation indicates that fast-path required field checks will not add a large amount of cost; these results are incomplete but not wholly inaccurate. name old time/op new time/op delta /dataset.google_message3_1.pb/Marshal-12 219ms ± 1% 232ms ± 1% +5.85% (p=0.004 n=6+5) /dataset.google_message2.pb/Marshal-12 261µs ± 3% 248µs ± 1% -5.14% (p=0.002 n=6+6) /dataset.google_message1_proto2.pb/Marshal-12 681ns ± 2% 637ns ± 3% -6.53% (p=0.002 n=6+6) /dataset.google_message1_proto3.pb/Marshal-12 1.10µs ± 8% 0.99µs ± 3% -9.63% (p=0.002 n=6+6) /dataset.google_message3_3.pb/Marshal-12 44.2ms ± 3% 35.2ms ± 1% -20.28% (p=0.004 n=6+5) /dataset.google_message4.pb/Marshal-12 91.4ms ± 2% 94.9ms ± 2% +3.78% (p=0.002 n=6+6) /dataset.google_message3_2.pb/Marshal-12 78.7ms ± 6% 80.8ms ± 4% ~ (p=0.310 n=6+6) /dataset.google_message3_4.pb/Marshal-12 10.6ms ± 3% 10.6ms ± 8% ~ (p=0.662 n=5+6) /dataset.google_message3_5.pb/Marshal-12 675ms ± 4% 510ms ± 2% -24.40% (p=0.002 n=6+6) /dataset.google_message3_1.pb/Marshal 219ms ± 1% 236ms ± 7% +8.06% (p=0.004 n=5+6) /dataset.google_message2.pb/Marshal 257µs ± 1% 250µs ± 3% ~ (p=0.052 n=5+6) /dataset.google_message1_proto2.pb/Marshal 685ns ± 1% 628ns ± 1% -8.41% (p=0.008 n=5+5) /dataset.google_message1_proto3.pb/Marshal 1.08µs ± 1% 0.98µs ± 2% -9.31% (p=0.004 n=5+6) /dataset.google_message3_3.pb/Marshal 43.7ms ± 1% 35.1ms ± 1% -19.76% (p=0.002 n=6+6) /dataset.google_message4.pb/Marshal 93.4ms ± 4% 94.9ms ± 2% ~ (p=0.180 n=6+6) /dataset.google_message3_2.pb/Marshal 105ms ± 2% 98ms ± 7% -6.81% (p=0.009 n=5+6) /dataset.google_message3_4.pb/Marshal 16.3ms ± 6% 15.7ms ± 3% -3.44% (p=0.041 n=6+6) /dataset.google_message3_5.pb/Marshal 676ms ± 4% 504ms ± 2% -25.50% (p=0.004 n=6+5) Change-Id: I72cc4597117f4cf5d236ef505777d49dd4a5f75d Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/171020 Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
2019-04-01 20:49:56 +00:00
if !allowPartial {
nerr.Merge(IsInitialized(m))
}
return out, nerr.E
}
func (o MarshalOptions) marshalMessageFast(b []byte, m Message) ([]byte, error) {
methods := protoMethods(m)
if methods == nil ||
methods.MarshalAppend == nil ||
(o.Deterministic && methods.Flags&protoiface.MethodFlagDeterministicMarshal == 0) {
return nil, errInternalNoFast
}
if methods.Size != nil {
sz := methods.Size(m)
if cap(b) < len(b)+sz {
x := make([]byte, len(b), len(b)+sz)
copy(x, b)
b = x
}
proto: replace CachedSize fast-path method with UseCachedSize option Using an option instead of a separate method has several useful properties: It makes it explicit whether the fast-path AppendMarshal is expected to use cached sizes or not. It properly plumbs the decision to use cached sizes through the call stack. Consider the case where message A includes B includes C: If A and C support cached sizes but B does not, we would like to use the size cache in all messages which support it. Placing this decision in the options allows this to work properly with no additional effort. Placing this option in MarshalOptions permits users to request use of existing cached sizes. This is a two-edged sword: There are places where this ability can permit substantial efficiencies, but this is also an exceedingly sharp-edged API. I believe that on balance the benefits outweigh the risks, especially since the prerequisites for using cached sizes are intuitively obvious. (You must have called Size, and you must not have changed the message.) This CL adds a Size method to MarshalOptions, rather than adding a SizeOptions type. Future additions to MarshalOptions may affect the size of the encoded output (e.g., an option to skip encoding unknown fields) and using the same options for both Marshal and Size makes it easier to use a consistent configuration for each. Change-Id: I6adbb55b717dd03d39f067e1d0b7381945000976 Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/171119 Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
2019-04-08 01:18:31 +00:00
o.UseCachedSize = true
}
return methods.MarshalAppend(b, m, protoiface.MarshalOptions(o))
}
func (o MarshalOptions) marshalMessage(b []byte, m protoreflect.Message) ([]byte, error) {
// There are many choices for what order we visit fields in. The default one here
// is chosen for reasonable efficiency and simplicity given the protoreflect API.
// It is not deterministic, since KnownFields.Range does not return fields in any
// defined order.
//
// When using deterministic serialization, we sort the known fields by field number.
fieldDescs := m.Descriptor().Fields()
knownFields := m.KnownFields()
var err error
var nerr errors.NonFatal
o.rangeKnown(knownFields, func(num protoreflect.FieldNumber, value protoreflect.Value) bool {
field := fieldDescs.ByNumber(num)
if field == nil {
field = knownFields.ExtensionTypes().ByNumber(num).Descriptor()
if field == nil {
panic(fmt.Errorf("no descriptor for field %d in %q", num, m.Descriptor().FullName()))
}
}
b, err = o.marshalField(b, field, value)
if nerr.Merge(err) {
err = nil
return true
}
return false
})
if err != nil {
return b, err
}
m.UnknownFields().Range(func(_ protoreflect.FieldNumber, raw protoreflect.RawFields) bool {
b = append(b, raw...)
return true
})
return b, nerr.E
}
// rangeKnown visits known fields in field number order when deterministic
// serialization is enabled.
func (o MarshalOptions) rangeKnown(knownFields protoreflect.KnownFields, f func(protoreflect.FieldNumber, protoreflect.Value) bool) {
if !o.Deterministic {
knownFields.Range(f)
return
}
nums := make([]protoreflect.FieldNumber, 0, knownFields.Len())
knownFields.Range(func(num protoreflect.FieldNumber, _ protoreflect.Value) bool {
nums = append(nums, num)
return true
})
sort.Slice(nums, func(a, b int) bool {
return nums[a] < nums[b]
})
for _, num := range nums {
if !f(num, knownFields.Get(num)) {
break
}
}
}
func (o MarshalOptions) marshalField(b []byte, fd protoreflect.FieldDescriptor, value protoreflect.Value) ([]byte, error) {
num := fd.Number()
kind := fd.Kind()
switch {
case fd.IsList():
return o.marshalList(b, num, fd, value.List())
case fd.IsMap():
return o.marshalMap(b, num, fd, value.Map())
default:
b = wire.AppendTag(b, num, wireTypes[kind])
return o.marshalSingular(b, num, fd, value)
}
}
func (o MarshalOptions) marshalList(b []byte, num wire.Number, fd protoreflect.FieldDescriptor, list protoreflect.List) ([]byte, error) {
if fd.IsPacked() {
b = wire.AppendTag(b, num, wire.BytesType)
b, pos := appendSpeculativeLength(b)
var nerr errors.NonFatal
for i, llen := 0, list.Len(); i < llen; i++ {
var err error
b, err = o.marshalSingular(b, num, fd, list.Get(i))
if !nerr.Merge(err) {
return b, err
}
}
b = finishSpeculativeLength(b, pos)
return b, nerr.E
}
kind := fd.Kind()
var nerr errors.NonFatal
for i, llen := 0, list.Len(); i < llen; i++ {
var err error
b = wire.AppendTag(b, num, wireTypes[kind])
b, err = o.marshalSingular(b, num, fd, list.Get(i))
if !nerr.Merge(err) {
return b, err
}
}
return b, nerr.E
}
func (o MarshalOptions) marshalMap(b []byte, num wire.Number, fd protoreflect.FieldDescriptor, mapv protoreflect.Map) ([]byte, error) {
keyf := fd.MapKey()
valf := fd.MapValue()
var nerr errors.NonFatal
var err error
o.rangeMap(mapv, keyf.Kind(), func(key protoreflect.MapKey, value protoreflect.Value) bool {
b = wire.AppendTag(b, num, wire.BytesType)
var pos int
b, pos = appendSpeculativeLength(b)
b, err = o.marshalField(b, keyf, key.Value())
if !nerr.Merge(err) {
return false
}
b, err = o.marshalField(b, valf, value)
if !nerr.Merge(err) {
return false
}
err = nil
b = finishSpeculativeLength(b, pos)
return true
})
if err != nil {
return b, err
}
return b, nerr.E
}
func (o MarshalOptions) rangeMap(mapv protoreflect.Map, kind protoreflect.Kind, f func(protoreflect.MapKey, protoreflect.Value) bool) {
if !o.Deterministic {
mapv.Range(f)
return
}
mapsort.Range(mapv, kind, f)
}
// When encoding length-prefixed fields, we speculatively set aside some number of bytes
// for the length, encode the data, and then encode the length (shifting the data if necessary
// to make room).
const speculativeLength = 1
func appendSpeculativeLength(b []byte) ([]byte, int) {
pos := len(b)
b = append(b, "\x00\x00\x00\x00"[:speculativeLength]...)
return b, pos
}
func finishSpeculativeLength(b []byte, pos int) []byte {
mlen := len(b) - pos - speculativeLength
msiz := wire.SizeVarint(uint64(mlen))
if msiz != speculativeLength {
for i := 0; i < msiz-speculativeLength; i++ {
b = append(b, 0)
}
copy(b[pos+msiz:], b[pos+speculativeLength:])
b = b[:pos+msiz+mlen]
}
wire.AppendVarint(b[:pos], uint64(mlen))
return b
}