protobuf-go/proto/encode.go
Damien Neil 524c60670a runtime/protoiface: use more efficient options representation
Change the representation of option flags in protoiface from bools to a
bitfield. This brings the representation of options in protoiface in
sync with that in internal/impl.

This change has several benefits:

1. We will probably find that we need to add more option flags over time.
Converting to the more efficient representation of these flags as high
in the call stack as possible minimizes the performance implication of
the struct growing.

2. On a similar note, this avoids the need to convert from the compact
representation to the larger one when passing from internal/impl to
proto, since the {Marshal,Unmarshal}State methods take the compact form.

3. This removes unused options from protoiface. Instead of documenting
that AllowPartial is always set, we can just not include an AllowPartial
flag in the protoiface options.

4. Conversely, this provides a way to add option flags to protoiface
that we don't want to expose in the proto package.

name                             old time/op    new time/op    delta
EmptyMessage/Wire/Marshal-12       11.1ns ± 7%    10.1ns ± 1%   -9.35%  (p=0.000 n=8+8)
EmptyMessage/Wire/Unmarshal-12     7.07ns ± 0%    6.74ns ± 1%   -4.58%  (p=0.000 n=8+8)
EmptyMessage/Wire/Validate-12      4.30ns ± 1%    3.80ns ± 8%  -11.45%  (p=0.000 n=7+8)
RepeatedInt32/Wire/Marshal-12      1.17µs ± 1%    1.21µs ± 7%   +4.09%  (p=0.000 n=8+8)
RepeatedInt32/Wire/Unmarshal-12     938ns ± 0%     942ns ± 3%     ~     (p=0.178 n=7+8)
RepeatedInt32/Wire/Validate-12      521ns ± 4%     543ns ± 7%     ~     (p=0.157 n=7+8)
Required/Wire/Marshal-12           97.2ns ± 1%    95.3ns ± 1%   -1.98%  (p=0.001 n=7+7)
Required/Wire/Unmarshal-12         41.0ns ± 9%    38.6ns ± 3%   -5.73%  (p=0.048 n=8+8)
Required/Wire/Validate-12          25.4ns ±11%    21.4ns ± 3%  -15.62%  (p=0.000 n=8+7)

Change-Id: I3ac1b00ab36cfdf61316ec087a5dd20d9248e4f6
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/216760
Reviewed-by: Joe Tsai <joetsai@google.com>
2020-01-28 23:33:31 +00:00

307 lines
9.4 KiB
Go

// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package proto
import (
"sort"
"google.golang.org/protobuf/internal/encoding/messageset"
"google.golang.org/protobuf/internal/encoding/wire"
"google.golang.org/protobuf/internal/fieldsort"
"google.golang.org/protobuf/internal/mapsort"
"google.golang.org/protobuf/internal/pragma"
"google.golang.org/protobuf/reflect/protoreflect"
"google.golang.org/protobuf/runtime/protoiface"
)
// MarshalOptions configures the marshaler.
//
// Example usage:
// b, err := MarshalOptions{Deterministic: true}.Marshal(m)
type MarshalOptions struct {
pragma.NoUnkeyedLiterals
// AllowPartial allows messages that have missing required fields to marshal
// without returning an error. If AllowPartial is false (the default),
// Marshal will return an error if there are any missing required fields.
AllowPartial bool
// Deterministic controls whether the same message will always be
// serialized to the same bytes within the same binary.
//
// Setting this option guarantees that repeated serialization of
// the same message will return the same bytes, and that different
// processes of the same binary (which may be executing on different
// machines) will serialize equal messages to the same bytes.
// It has no effect on the resulting size of the encoded message compared
// to a non-deterministic marshal.
//
// Note that the deterministic serialization is NOT canonical across
// languages. It is not guaranteed to remain stable over time. It is
// unstable across different builds with schema changes due to unknown
// fields. Users who need canonical serialization (e.g., persistent
// storage in a canonical form, fingerprinting, etc.) must define
// their own canonicalization specification and implement their own
// serializer rather than relying on this API.
//
// If deterministic serialization is requested, map entries will be
// sorted by keys in lexographical order. This is an implementation
// detail and subject to change.
Deterministic bool
// UseCachedSize indicates that the result of a previous Size call
// may be reused.
//
// Setting this option asserts that:
//
// 1. Size has previously been called on this message with identical
// options (except for UseCachedSize itself).
//
// 2. The message and all its submessages have not changed in any
// way since the Size call.
//
// If either of these invariants is broken, the results are undefined
// but may include panics or invalid output.
//
// Implementations MAY take this option into account to provide
// better performance, but there is no guarantee that they will do so.
// There is absolutely no guarantee that Size followed by Marshal with
// UseCachedSize set will perform equivalently to Marshal alone.
UseCachedSize bool
}
// Marshal returns the wire-format encoding of m.
func Marshal(m Message) ([]byte, error) {
out, err := MarshalOptions{}.marshal(nil, m)
return out.Buf, err
}
// Marshal returns the wire-format encoding of m.
func (o MarshalOptions) Marshal(m Message) ([]byte, error) {
out, err := o.marshal(nil, m)
return out.Buf, err
}
// MarshalAppend appends the wire-format encoding of m to b,
// returning the result.
func (o MarshalOptions) MarshalAppend(b []byte, m Message) ([]byte, error) {
out, err := o.marshal(b, m)
return out.Buf, err
}
// MarshalState returns the wire-format encoding of m.
//
// This method permits fine-grained control over the marshaler.
// Most users should use Marshal instead.
func (o MarshalOptions) MarshalState(m Message, in protoiface.MarshalInput) (protoiface.MarshalOutput, error) {
return o.marshal(in.Buf, m)
}
func (o MarshalOptions) marshal(b []byte, message Message) (out protoiface.MarshalOutput, err error) {
allowPartial := o.AllowPartial
o.AllowPartial = true
m := message.ProtoReflect()
if methods := protoMethods(m); methods != nil && methods.Marshal != nil &&
!(o.Deterministic && methods.Flags&protoiface.SupportMarshalDeterministic == 0) {
opts := protoiface.MarshalOptions{}
if o.Deterministic {
opts.Flags |= protoiface.MarshalDeterministic
}
if o.UseCachedSize {
opts.Flags |= protoiface.MarshalUseCachedSize
}
if methods.Size != nil {
sz := methods.Size(m, opts)
if cap(b) < len(b)+sz {
x := make([]byte, len(b), growcap(cap(b), len(b)+sz))
copy(x, b)
b = x
}
opts.Flags |= protoiface.MarshalUseCachedSize
}
out, err = methods.Marshal(m, protoiface.MarshalInput{
Buf: b,
}, opts)
} else {
out.Buf, err = o.marshalMessageSlow(b, m)
}
if err != nil {
return out, err
}
if allowPartial {
return out, nil
}
return out, isInitialized(m)
}
func (o MarshalOptions) marshalMessage(b []byte, m protoreflect.Message) ([]byte, error) {
out, err := o.marshal(b, m.Interface())
return out.Buf, err
}
// growcap scales up the capacity of a slice.
//
// Given a slice with a current capacity of oldcap and a desired
// capacity of wantcap, growcap returns a new capacity >= wantcap.
//
// The algorithm is mostly identical to the one used by append as of Go 1.14.
func growcap(oldcap, wantcap int) (newcap int) {
if wantcap > oldcap*2 {
newcap = wantcap
} else if oldcap < 1024 {
// The Go 1.14 runtime takes this case when len(s) < 1024,
// not when cap(s) < 1024. The difference doesn't seem
// significant here.
newcap = oldcap * 2
} else {
newcap = oldcap
for 0 < newcap && newcap < wantcap {
newcap += newcap / 4
}
if newcap <= 0 {
newcap = wantcap
}
}
return newcap
}
func (o MarshalOptions) marshalMessageSlow(b []byte, m protoreflect.Message) ([]byte, error) {
if messageset.IsMessageSet(m.Descriptor()) {
return marshalMessageSet(b, m, o)
}
// There are many choices for what order we visit fields in. The default one here
// is chosen for reasonable efficiency and simplicity given the protoreflect API.
// It is not deterministic, since Message.Range does not return fields in any
// defined order.
//
// When using deterministic serialization, we sort the known fields.
var err error
o.rangeFields(m, func(fd protoreflect.FieldDescriptor, v protoreflect.Value) bool {
b, err = o.marshalField(b, fd, v)
return err == nil
})
if err != nil {
return b, err
}
b = append(b, m.GetUnknown()...)
return b, nil
}
// rangeFields visits fields in a defined order when deterministic serialization is enabled.
func (o MarshalOptions) rangeFields(m protoreflect.Message, f func(protoreflect.FieldDescriptor, protoreflect.Value) bool) {
if !o.Deterministic {
m.Range(f)
return
}
var fds []protoreflect.FieldDescriptor
m.Range(func(fd protoreflect.FieldDescriptor, _ protoreflect.Value) bool {
fds = append(fds, fd)
return true
})
sort.Slice(fds, func(a, b int) bool {
return fieldsort.Less(fds[a], fds[b])
})
for _, fd := range fds {
if !f(fd, m.Get(fd)) {
break
}
}
}
func (o MarshalOptions) marshalField(b []byte, fd protoreflect.FieldDescriptor, value protoreflect.Value) ([]byte, error) {
switch {
case fd.IsList():
return o.marshalList(b, fd, value.List())
case fd.IsMap():
return o.marshalMap(b, fd, value.Map())
default:
b = wire.AppendTag(b, fd.Number(), wireTypes[fd.Kind()])
return o.marshalSingular(b, fd, value)
}
}
func (o MarshalOptions) marshalList(b []byte, fd protoreflect.FieldDescriptor, list protoreflect.List) ([]byte, error) {
if fd.IsPacked() && list.Len() > 0 {
b = wire.AppendTag(b, fd.Number(), wire.BytesType)
b, pos := appendSpeculativeLength(b)
for i, llen := 0, list.Len(); i < llen; i++ {
var err error
b, err = o.marshalSingular(b, fd, list.Get(i))
if err != nil {
return b, err
}
}
b = finishSpeculativeLength(b, pos)
return b, nil
}
kind := fd.Kind()
for i, llen := 0, list.Len(); i < llen; i++ {
var err error
b = wire.AppendTag(b, fd.Number(), wireTypes[kind])
b, err = o.marshalSingular(b, fd, list.Get(i))
if err != nil {
return b, err
}
}
return b, nil
}
func (o MarshalOptions) marshalMap(b []byte, fd protoreflect.FieldDescriptor, mapv protoreflect.Map) ([]byte, error) {
keyf := fd.MapKey()
valf := fd.MapValue()
var err error
o.rangeMap(mapv, keyf.Kind(), func(key protoreflect.MapKey, value protoreflect.Value) bool {
b = wire.AppendTag(b, fd.Number(), wire.BytesType)
var pos int
b, pos = appendSpeculativeLength(b)
b, err = o.marshalField(b, keyf, key.Value())
if err != nil {
return false
}
b, err = o.marshalField(b, valf, value)
if err != nil {
return false
}
b = finishSpeculativeLength(b, pos)
return true
})
return b, err
}
func (o MarshalOptions) rangeMap(mapv protoreflect.Map, kind protoreflect.Kind, f func(protoreflect.MapKey, protoreflect.Value) bool) {
if !o.Deterministic {
mapv.Range(f)
return
}
mapsort.Range(mapv, kind, f)
}
// When encoding length-prefixed fields, we speculatively set aside some number of bytes
// for the length, encode the data, and then encode the length (shifting the data if necessary
// to make room).
const speculativeLength = 1
func appendSpeculativeLength(b []byte) ([]byte, int) {
pos := len(b)
b = append(b, "\x00\x00\x00\x00"[:speculativeLength]...)
return b, pos
}
func finishSpeculativeLength(b []byte, pos int) []byte {
mlen := len(b) - pos - speculativeLength
msiz := wire.SizeVarint(uint64(mlen))
if msiz != speculativeLength {
for i := 0; i < msiz-speculativeLength; i++ {
b = append(b, 0)
}
copy(b[pos+msiz:], b[pos+speculativeLength:])
b = b[:pos+msiz+mlen]
}
wire.AppendVarint(b[:pos], uint64(mlen))
return b
}