protobuf-go/proto/encode.go

310 lines
9.5 KiB
Go
Raw Normal View History

// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package proto
import (
"sort"
"google.golang.org/protobuf/internal/encoding/messageset"
"google.golang.org/protobuf/internal/encoding/wire"
"google.golang.org/protobuf/internal/fieldsort"
"google.golang.org/protobuf/internal/mapsort"
"google.golang.org/protobuf/internal/pragma"
"google.golang.org/protobuf/reflect/protoreflect"
"google.golang.org/protobuf/runtime/protoiface"
)
// MarshalOptions configures the marshaler.
//
// Example usage:
// b, err := MarshalOptions{Deterministic: true}.Marshal(m)
type MarshalOptions struct {
pragma.NoUnkeyedLiterals
// AllowPartial allows messages that have missing required fields to marshal
// without returning an error. If AllowPartial is false (the default),
// Marshal will return an error if there are any missing required fields.
AllowPartial bool
// Deterministic controls whether the same message will always be
// serialized to the same bytes within the same binary.
//
// Setting this option guarantees that repeated serialization of
// the same message will return the same bytes, and that different
// processes of the same binary (which may be executing on different
// machines) will serialize equal messages to the same bytes.
// It has no effect on the resulting size of the encoded message compared
// to a non-deterministic marshal.
//
// Note that the deterministic serialization is NOT canonical across
// languages. It is not guaranteed to remain stable over time. It is
// unstable across different builds with schema changes due to unknown
// fields. Users who need canonical serialization (e.g., persistent
// storage in a canonical form, fingerprinting, etc.) must define
// their own canonicalization specification and implement their own
// serializer rather than relying on this API.
//
// If deterministic serialization is requested, map entries will be
// sorted by keys in lexographical order. This is an implementation
// detail and subject to change.
Deterministic bool
proto: replace CachedSize fast-path method with UseCachedSize option Using an option instead of a separate method has several useful properties: It makes it explicit whether the fast-path AppendMarshal is expected to use cached sizes or not. It properly plumbs the decision to use cached sizes through the call stack. Consider the case where message A includes B includes C: If A and C support cached sizes but B does not, we would like to use the size cache in all messages which support it. Placing this decision in the options allows this to work properly with no additional effort. Placing this option in MarshalOptions permits users to request use of existing cached sizes. This is a two-edged sword: There are places where this ability can permit substantial efficiencies, but this is also an exceedingly sharp-edged API. I believe that on balance the benefits outweigh the risks, especially since the prerequisites for using cached sizes are intuitively obvious. (You must have called Size, and you must not have changed the message.) This CL adds a Size method to MarshalOptions, rather than adding a SizeOptions type. Future additions to MarshalOptions may affect the size of the encoded output (e.g., an option to skip encoding unknown fields) and using the same options for both Marshal and Size makes it easier to use a consistent configuration for each. Change-Id: I6adbb55b717dd03d39f067e1d0b7381945000976 Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/171119 Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
2019-04-08 01:18:31 +00:00
// UseCachedSize indicates that the result of a previous Size call
// may be reused.
//
// Setting this option asserts that:
//
// 1. Size has previously been called on this message with identical
// options (except for UseCachedSize itself).
//
// 2. The message and all its submessages have not changed in any
// way since the Size call.
//
// If either of these invariants is violated,
// the results are undefined and may include panics or corrupted output.
proto: replace CachedSize fast-path method with UseCachedSize option Using an option instead of a separate method has several useful properties: It makes it explicit whether the fast-path AppendMarshal is expected to use cached sizes or not. It properly plumbs the decision to use cached sizes through the call stack. Consider the case where message A includes B includes C: If A and C support cached sizes but B does not, we would like to use the size cache in all messages which support it. Placing this decision in the options allows this to work properly with no additional effort. Placing this option in MarshalOptions permits users to request use of existing cached sizes. This is a two-edged sword: There are places where this ability can permit substantial efficiencies, but this is also an exceedingly sharp-edged API. I believe that on balance the benefits outweigh the risks, especially since the prerequisites for using cached sizes are intuitively obvious. (You must have called Size, and you must not have changed the message.) This CL adds a Size method to MarshalOptions, rather than adding a SizeOptions type. Future additions to MarshalOptions may affect the size of the encoded output (e.g., an option to skip encoding unknown fields) and using the same options for both Marshal and Size makes it easier to use a consistent configuration for each. Change-Id: I6adbb55b717dd03d39f067e1d0b7381945000976 Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/171119 Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
2019-04-08 01:18:31 +00:00
//
// Implementations MAY take this option into account to provide
// better performance, but there is no guarantee that they will do so.
// There is absolutely no guarantee that Size followed by Marshal with
// UseCachedSize set will perform equivalently to Marshal alone.
UseCachedSize bool
}
// Marshal returns the wire-format encoding of m.
func Marshal(m Message) ([]byte, error) {
out, err := MarshalOptions{}.marshal(nil, m)
return out.Buf, err
}
// Marshal returns the wire-format encoding of m.
func (o MarshalOptions) Marshal(m Message) ([]byte, error) {
out, err := o.marshal(nil, m)
return out.Buf, err
}
// MarshalAppend appends the wire-format encoding of m to b,
// returning the result.
func (o MarshalOptions) MarshalAppend(b []byte, m Message) ([]byte, error) {
out, err := o.marshal(b, m)
return out.Buf, err
}
// MarshalState returns the wire-format encoding of m.
//
// This method permits fine-grained control over the marshaler.
// Most users should use Marshal instead.
func (o MarshalOptions) MarshalState(m Message, in protoiface.MarshalInput) (protoiface.MarshalOutput, error) {
return o.marshal(in.Buf, m)
}
func (o MarshalOptions) marshal(b []byte, message Message) (out protoiface.MarshalOutput, err error) {
allowPartial := o.AllowPartial
o.AllowPartial = true
m := message.ProtoReflect()
if methods := protoMethods(m); methods != nil && methods.Marshal != nil &&
!(o.Deterministic && methods.Flags&protoiface.SupportMarshalDeterministic == 0) {
in := protoiface.MarshalInput{
Message: m,
Buf: b,
}
runtime/protoiface: use more efficient options representation Change the representation of option flags in protoiface from bools to a bitfield. This brings the representation of options in protoiface in sync with that in internal/impl. This change has several benefits: 1. We will probably find that we need to add more option flags over time. Converting to the more efficient representation of these flags as high in the call stack as possible minimizes the performance implication of the struct growing. 2. On a similar note, this avoids the need to convert from the compact representation to the larger one when passing from internal/impl to proto, since the {Marshal,Unmarshal}State methods take the compact form. 3. This removes unused options from protoiface. Instead of documenting that AllowPartial is always set, we can just not include an AllowPartial flag in the protoiface options. 4. Conversely, this provides a way to add option flags to protoiface that we don't want to expose in the proto package. name old time/op new time/op delta EmptyMessage/Wire/Marshal-12 11.1ns ± 7% 10.1ns ± 1% -9.35% (p=0.000 n=8+8) EmptyMessage/Wire/Unmarshal-12 7.07ns ± 0% 6.74ns ± 1% -4.58% (p=0.000 n=8+8) EmptyMessage/Wire/Validate-12 4.30ns ± 1% 3.80ns ± 8% -11.45% (p=0.000 n=7+8) RepeatedInt32/Wire/Marshal-12 1.17µs ± 1% 1.21µs ± 7% +4.09% (p=0.000 n=8+8) RepeatedInt32/Wire/Unmarshal-12 938ns ± 0% 942ns ± 3% ~ (p=0.178 n=7+8) RepeatedInt32/Wire/Validate-12 521ns ± 4% 543ns ± 7% ~ (p=0.157 n=7+8) Required/Wire/Marshal-12 97.2ns ± 1% 95.3ns ± 1% -1.98% (p=0.001 n=7+7) Required/Wire/Unmarshal-12 41.0ns ± 9% 38.6ns ± 3% -5.73% (p=0.048 n=8+8) Required/Wire/Validate-12 25.4ns ±11% 21.4ns ± 3% -15.62% (p=0.000 n=8+7) Change-Id: I3ac1b00ab36cfdf61316ec087a5dd20d9248e4f6 Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/216760 Reviewed-by: Joe Tsai <joetsai@google.com>
2020-01-28 21:32:01 +00:00
if o.Deterministic {
in.Flags |= protoiface.MarshalDeterministic
runtime/protoiface: use more efficient options representation Change the representation of option flags in protoiface from bools to a bitfield. This brings the representation of options in protoiface in sync with that in internal/impl. This change has several benefits: 1. We will probably find that we need to add more option flags over time. Converting to the more efficient representation of these flags as high in the call stack as possible minimizes the performance implication of the struct growing. 2. On a similar note, this avoids the need to convert from the compact representation to the larger one when passing from internal/impl to proto, since the {Marshal,Unmarshal}State methods take the compact form. 3. This removes unused options from protoiface. Instead of documenting that AllowPartial is always set, we can just not include an AllowPartial flag in the protoiface options. 4. Conversely, this provides a way to add option flags to protoiface that we don't want to expose in the proto package. name old time/op new time/op delta EmptyMessage/Wire/Marshal-12 11.1ns ± 7% 10.1ns ± 1% -9.35% (p=0.000 n=8+8) EmptyMessage/Wire/Unmarshal-12 7.07ns ± 0% 6.74ns ± 1% -4.58% (p=0.000 n=8+8) EmptyMessage/Wire/Validate-12 4.30ns ± 1% 3.80ns ± 8% -11.45% (p=0.000 n=7+8) RepeatedInt32/Wire/Marshal-12 1.17µs ± 1% 1.21µs ± 7% +4.09% (p=0.000 n=8+8) RepeatedInt32/Wire/Unmarshal-12 938ns ± 0% 942ns ± 3% ~ (p=0.178 n=7+8) RepeatedInt32/Wire/Validate-12 521ns ± 4% 543ns ± 7% ~ (p=0.157 n=7+8) Required/Wire/Marshal-12 97.2ns ± 1% 95.3ns ± 1% -1.98% (p=0.001 n=7+7) Required/Wire/Unmarshal-12 41.0ns ± 9% 38.6ns ± 3% -5.73% (p=0.048 n=8+8) Required/Wire/Validate-12 25.4ns ±11% 21.4ns ± 3% -15.62% (p=0.000 n=8+7) Change-Id: I3ac1b00ab36cfdf61316ec087a5dd20d9248e4f6 Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/216760 Reviewed-by: Joe Tsai <joetsai@google.com>
2020-01-28 21:32:01 +00:00
}
if o.UseCachedSize {
in.Flags |= protoiface.MarshalUseCachedSize
runtime/protoiface: use more efficient options representation Change the representation of option flags in protoiface from bools to a bitfield. This brings the representation of options in protoiface in sync with that in internal/impl. This change has several benefits: 1. We will probably find that we need to add more option flags over time. Converting to the more efficient representation of these flags as high in the call stack as possible minimizes the performance implication of the struct growing. 2. On a similar note, this avoids the need to convert from the compact representation to the larger one when passing from internal/impl to proto, since the {Marshal,Unmarshal}State methods take the compact form. 3. This removes unused options from protoiface. Instead of documenting that AllowPartial is always set, we can just not include an AllowPartial flag in the protoiface options. 4. Conversely, this provides a way to add option flags to protoiface that we don't want to expose in the proto package. name old time/op new time/op delta EmptyMessage/Wire/Marshal-12 11.1ns ± 7% 10.1ns ± 1% -9.35% (p=0.000 n=8+8) EmptyMessage/Wire/Unmarshal-12 7.07ns ± 0% 6.74ns ± 1% -4.58% (p=0.000 n=8+8) EmptyMessage/Wire/Validate-12 4.30ns ± 1% 3.80ns ± 8% -11.45% (p=0.000 n=7+8) RepeatedInt32/Wire/Marshal-12 1.17µs ± 1% 1.21µs ± 7% +4.09% (p=0.000 n=8+8) RepeatedInt32/Wire/Unmarshal-12 938ns ± 0% 942ns ± 3% ~ (p=0.178 n=7+8) RepeatedInt32/Wire/Validate-12 521ns ± 4% 543ns ± 7% ~ (p=0.157 n=7+8) Required/Wire/Marshal-12 97.2ns ± 1% 95.3ns ± 1% -1.98% (p=0.001 n=7+7) Required/Wire/Unmarshal-12 41.0ns ± 9% 38.6ns ± 3% -5.73% (p=0.048 n=8+8) Required/Wire/Validate-12 25.4ns ±11% 21.4ns ± 3% -15.62% (p=0.000 n=8+7) Change-Id: I3ac1b00ab36cfdf61316ec087a5dd20d9248e4f6 Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/216760 Reviewed-by: Joe Tsai <joetsai@google.com>
2020-01-28 21:32:01 +00:00
}
if methods.Size != nil {
sout := methods.Size(protoiface.SizeInput{
Message: m,
Flags: in.Flags,
})
if cap(b) < len(b)+sout.Size {
in.Buf = make([]byte, len(b), growcap(cap(b), len(b)+sout.Size))
copy(in.Buf, b)
}
in.Flags |= protoiface.MarshalUseCachedSize
}
out, err = methods.Marshal(in)
} else {
out.Buf, err = o.marshalMessageSlow(b, m)
}
if err != nil {
return out, err
}
if allowPartial {
return out, nil
}
return out, checkInitialized(m)
}
func (o MarshalOptions) marshalMessage(b []byte, m protoreflect.Message) ([]byte, error) {
out, err := o.marshal(b, m.Interface())
return out.Buf, err
}
// growcap scales up the capacity of a slice.
//
// Given a slice with a current capacity of oldcap and a desired
// capacity of wantcap, growcap returns a new capacity >= wantcap.
//
// The algorithm is mostly identical to the one used by append as of Go 1.14.
func growcap(oldcap, wantcap int) (newcap int) {
if wantcap > oldcap*2 {
newcap = wantcap
} else if oldcap < 1024 {
// The Go 1.14 runtime takes this case when len(s) < 1024,
// not when cap(s) < 1024. The difference doesn't seem
// significant here.
newcap = oldcap * 2
} else {
newcap = oldcap
for 0 < newcap && newcap < wantcap {
newcap += newcap / 4
}
if newcap <= 0 {
newcap = wantcap
}
}
return newcap
}
runtime/protoiface: move and rename XXX_Methods This CL moves and renames the protoreflect.ProtoMessage.XXX_Methods to protoreflect.Message.ProtoMethods. Since one needs to obtain a protoreflect.Message now to get at the fast-path methods, we modify the method signatures to take in a protoreflect.Message instead of protoreflect.ProtoMessage. Doing so also avoids the wrapper hack that was formerly done on impl.messageReflectWrapper. After this change the new protoc-gen-go no longer generates any XXX fields or methods. All internal fields and methods are truly hidden from the end-user. name old time/op new time/op delta Wire/Unmarshal/google_message1_proto2-4 1.50µs ±10% 1.50µs ± 2% ~ (p=0.483 n=10+9) Wire/Unmarshal/google_message1_proto3-4 1.06µs ± 6% 1.06µs ± 4% ~ (p=0.814 n=9+9) Wire/Unmarshal/google_message2-4 734µs ±22% 689µs ±13% ~ (p=0.133 n=10+9) Wire/Marshal/google_message1_proto2-4 790ns ±46% 652ns ± 8% ~ (p=0.590 n=10+9) Wire/Marshal/google_message1_proto3-4 872ns ± 4% 857ns ± 3% ~ (p=0.168 n=9+9) Wire/Marshal/google_message2-4 232µs ±16% 221µs ± 3% -4.75% (p=0.014 n=9+9) Wire/Size/google_message1_proto2-4 164ns ± 2% 167ns ± 4% +1.87% (p=0.046 n=9+10) Wire/Size/google_message1_proto3-4 240ns ± 9% 229ns ± 1% -4.81% (p=0.000 n=9+8) Wire/Size/google_message2-4 58.9µs ± 9% 59.6µs ± 2% +1.23% (p=0.040 n=9+9) name old alloc/op new alloc/op delta Wire/Unmarshal/google_message1_proto2-4 912B ± 0% 912B ± 0% ~ (all equal) Wire/Unmarshal/google_message1_proto3-4 688B ± 0% 688B ± 0% ~ (all equal) Wire/Unmarshal/google_message2-4 470kB ± 0% 470kB ± 0% ~ (p=0.215 n=10+10) Wire/Marshal/google_message1_proto2-4 240B ± 0% 240B ± 0% ~ (all equal) Wire/Marshal/google_message1_proto3-4 224B ± 0% 224B ± 0% ~ (all equal) Wire/Marshal/google_message2-4 90.1kB ± 0% 90.1kB ± 0% ~ (all equal) Wire/Size/google_message1_proto2-4 0.00B 0.00B ~ (all equal) Wire/Size/google_message1_proto3-4 0.00B 0.00B ~ (all equal) Wire/Size/google_message2-4 0.00B 0.00B ~ (all equal) name old allocs/op new allocs/op delta Wire/Unmarshal/google_message1_proto2-4 24.0 ± 0% 24.0 ± 0% ~ (all equal) Wire/Unmarshal/google_message1_proto3-4 6.00 ± 0% 6.00 ± 0% ~ (all equal) Wire/Unmarshal/google_message2-4 8.49k ± 0% 8.49k ± 0% ~ (all equal) Wire/Marshal/google_message1_proto2-4 1.00 ± 0% 1.00 ± 0% ~ (all equal) Wire/Marshal/google_message1_proto3-4 1.00 ± 0% 1.00 ± 0% ~ (all equal) Wire/Marshal/google_message2-4 1.00 ± 0% 1.00 ± 0% ~ (all equal) Wire/Size/google_message1_proto2-4 0.00 0.00 ~ (all equal) Wire/Size/google_message1_proto3-4 0.00 0.00 ~ (all equal) Wire/Size/google_message2-4 0.00 0.00 ~ (all equal) Change-Id: Ibf3263ad0f293326695c22020a92a6b938ef4f65 Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/185697 Reviewed-by: Damien Neil <dneil@google.com>
2019-07-11 06:14:31 +00:00
func (o MarshalOptions) marshalMessageSlow(b []byte, m protoreflect.Message) ([]byte, error) {
if messageset.IsMessageSet(m.Descriptor()) {
return marshalMessageSet(b, m, o)
}
// There are many choices for what order we visit fields in. The default one here
// is chosen for reasonable efficiency and simplicity given the protoreflect API.
// It is not deterministic, since Message.Range does not return fields in any
// defined order.
//
// When using deterministic serialization, we sort the known fields.
var err error
o.rangeFields(m, func(fd protoreflect.FieldDescriptor, v protoreflect.Value) bool {
b, err = o.marshalField(b, fd, v)
return err == nil
})
if err != nil {
return b, err
}
b = append(b, m.GetUnknown()...)
return b, nil
}
// rangeFields visits fields in a defined order when deterministic serialization is enabled.
func (o MarshalOptions) rangeFields(m protoreflect.Message, f func(protoreflect.FieldDescriptor, protoreflect.Value) bool) {
if !o.Deterministic {
m.Range(f)
return
}
var fds []protoreflect.FieldDescriptor
m.Range(func(fd protoreflect.FieldDescriptor, _ protoreflect.Value) bool {
fds = append(fds, fd)
return true
})
sort.Slice(fds, func(a, b int) bool {
return fieldsort.Less(fds[a], fds[b])
})
for _, fd := range fds {
if !f(fd, m.Get(fd)) {
break
}
}
}
func (o MarshalOptions) marshalField(b []byte, fd protoreflect.FieldDescriptor, value protoreflect.Value) ([]byte, error) {
switch {
case fd.IsList():
return o.marshalList(b, fd, value.List())
case fd.IsMap():
return o.marshalMap(b, fd, value.Map())
default:
b = wire.AppendTag(b, fd.Number(), wireTypes[fd.Kind()])
return o.marshalSingular(b, fd, value)
}
}
func (o MarshalOptions) marshalList(b []byte, fd protoreflect.FieldDescriptor, list protoreflect.List) ([]byte, error) {
if fd.IsPacked() && list.Len() > 0 {
b = wire.AppendTag(b, fd.Number(), wire.BytesType)
b, pos := appendSpeculativeLength(b)
for i, llen := 0, list.Len(); i < llen; i++ {
var err error
b, err = o.marshalSingular(b, fd, list.Get(i))
if err != nil {
return b, err
}
}
b = finishSpeculativeLength(b, pos)
return b, nil
}
kind := fd.Kind()
for i, llen := 0, list.Len(); i < llen; i++ {
var err error
b = wire.AppendTag(b, fd.Number(), wireTypes[kind])
b, err = o.marshalSingular(b, fd, list.Get(i))
if err != nil {
return b, err
}
}
return b, nil
}
func (o MarshalOptions) marshalMap(b []byte, fd protoreflect.FieldDescriptor, mapv protoreflect.Map) ([]byte, error) {
keyf := fd.MapKey()
valf := fd.MapValue()
var err error
o.rangeMap(mapv, keyf.Kind(), func(key protoreflect.MapKey, value protoreflect.Value) bool {
b = wire.AppendTag(b, fd.Number(), wire.BytesType)
var pos int
b, pos = appendSpeculativeLength(b)
b, err = o.marshalField(b, keyf, key.Value())
if err != nil {
return false
}
b, err = o.marshalField(b, valf, value)
if err != nil {
return false
}
b = finishSpeculativeLength(b, pos)
return true
})
return b, err
}
func (o MarshalOptions) rangeMap(mapv protoreflect.Map, kind protoreflect.Kind, f func(protoreflect.MapKey, protoreflect.Value) bool) {
if !o.Deterministic {
mapv.Range(f)
return
}
mapsort.Range(mapv, kind, f)
}
// When encoding length-prefixed fields, we speculatively set aside some number of bytes
// for the length, encode the data, and then encode the length (shifting the data if necessary
// to make room).
const speculativeLength = 1
func appendSpeculativeLength(b []byte) ([]byte, int) {
pos := len(b)
b = append(b, "\x00\x00\x00\x00"[:speculativeLength]...)
return b, pos
}
func finishSpeculativeLength(b []byte, pos int) []byte {
mlen := len(b) - pos - speculativeLength
msiz := wire.SizeVarint(uint64(mlen))
if msiz != speculativeLength {
for i := 0; i < msiz-speculativeLength; i++ {
b = append(b, 0)
}
copy(b[pos+msiz:], b[pos+speculativeLength:])
b = b[:pos+msiz+mlen]
}
wire.AppendVarint(b[:pos], uint64(mlen))
return b
}