mirror of
https://github.com/protocolbuffers/protobuf-go.git
synced 2025-01-28 14:54:21 +00:00
c37adefdac
This is a port of the v1 table marshaler, with some substantial cleanup and refactoring. Benchstat results from the protobuf reference benchmark data comparing the v1 package with v2, with AllowPartial:true set for the new package. This is not an apples-to-apples comparison, since v1 doesn't have a way to disable required field checks. Required field checks in v2 package currently go through reflection, which performs terribly; my initial experimentation indicates that fast-path required field checks will not add a large amount of cost; these results are incomplete but not wholly inaccurate. name old time/op new time/op delta /dataset.google_message3_1.pb/Marshal-12 219ms ± 1% 232ms ± 1% +5.85% (p=0.004 n=6+5) /dataset.google_message2.pb/Marshal-12 261µs ± 3% 248µs ± 1% -5.14% (p=0.002 n=6+6) /dataset.google_message1_proto2.pb/Marshal-12 681ns ± 2% 637ns ± 3% -6.53% (p=0.002 n=6+6) /dataset.google_message1_proto3.pb/Marshal-12 1.10µs ± 8% 0.99µs ± 3% -9.63% (p=0.002 n=6+6) /dataset.google_message3_3.pb/Marshal-12 44.2ms ± 3% 35.2ms ± 1% -20.28% (p=0.004 n=6+5) /dataset.google_message4.pb/Marshal-12 91.4ms ± 2% 94.9ms ± 2% +3.78% (p=0.002 n=6+6) /dataset.google_message3_2.pb/Marshal-12 78.7ms ± 6% 80.8ms ± 4% ~ (p=0.310 n=6+6) /dataset.google_message3_4.pb/Marshal-12 10.6ms ± 3% 10.6ms ± 8% ~ (p=0.662 n=5+6) /dataset.google_message3_5.pb/Marshal-12 675ms ± 4% 510ms ± 2% -24.40% (p=0.002 n=6+6) /dataset.google_message3_1.pb/Marshal 219ms ± 1% 236ms ± 7% +8.06% (p=0.004 n=5+6) /dataset.google_message2.pb/Marshal 257µs ± 1% 250µs ± 3% ~ (p=0.052 n=5+6) /dataset.google_message1_proto2.pb/Marshal 685ns ± 1% 628ns ± 1% -8.41% (p=0.008 n=5+5) /dataset.google_message1_proto3.pb/Marshal 1.08µs ± 1% 0.98µs ± 2% -9.31% (p=0.004 n=5+6) /dataset.google_message3_3.pb/Marshal 43.7ms ± 1% 35.1ms ± 1% -19.76% (p=0.002 n=6+6) /dataset.google_message4.pb/Marshal 93.4ms ± 4% 94.9ms ± 2% ~ (p=0.180 n=6+6) /dataset.google_message3_2.pb/Marshal 105ms ± 2% 98ms ± 7% -6.81% (p=0.009 n=5+6) /dataset.google_message3_4.pb/Marshal 16.3ms ± 6% 15.7ms ± 3% -3.44% (p=0.041 n=6+6) /dataset.google_message3_5.pb/Marshal 676ms ± 4% 504ms ± 2% -25.50% (p=0.004 n=6+5) Change-Id: I72cc4597117f4cf5d236ef505777d49dd4a5f75d Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/171020 Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
287 lines
8.8 KiB
Go
287 lines
8.8 KiB
Go
// Copyright 2019 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package proto
|
|
|
|
import (
|
|
"fmt"
|
|
"sort"
|
|
|
|
"google.golang.org/protobuf/internal/encoding/wire"
|
|
"google.golang.org/protobuf/internal/errors"
|
|
"google.golang.org/protobuf/internal/mapsort"
|
|
"google.golang.org/protobuf/internal/pragma"
|
|
"google.golang.org/protobuf/reflect/protoreflect"
|
|
"google.golang.org/protobuf/runtime/protoiface"
|
|
)
|
|
|
|
// MarshalOptions configures the marshaler.
|
|
//
|
|
// Example usage:
|
|
// b, err := MarshalOptions{Deterministic: true}.Marshal(m)
|
|
type MarshalOptions struct {
|
|
// AllowPartial allows messages that have missing required fields to marshal
|
|
// without returning an error. If AllowPartial is false (the default),
|
|
// Marshal will return an error if there are any missing required fields.
|
|
AllowPartial bool
|
|
|
|
// Deterministic controls whether the same message will always be
|
|
// serialized to the same bytes within the same binary.
|
|
//
|
|
// Setting this option guarantees that repeated serialization of
|
|
// the same message will return the same bytes, and that different
|
|
// processes of the same binary (which may be executing on different
|
|
// machines) will serialize equal messages to the same bytes.
|
|
//
|
|
// Note that the deterministic serialization is NOT canonical across
|
|
// languages. It is not guaranteed to remain stable over time. It is
|
|
// unstable across different builds with schema changes due to unknown
|
|
// fields. Users who need canonical serialization (e.g., persistent
|
|
// storage in a canonical form, fingerprinting, etc.) must define
|
|
// their own canonicalization specification and implement their own
|
|
// serializer rather than relying on this API.
|
|
//
|
|
// If deterministic serialization is requested, map entries will be
|
|
// sorted by keys in lexographical order. This is an implementation
|
|
// detail and subject to change.
|
|
Deterministic bool
|
|
|
|
// UseCachedSize indicates that the result of a previous Size call
|
|
// may be reused.
|
|
//
|
|
// Setting this option asserts that:
|
|
//
|
|
// 1. Size has previously been called on this message with identical
|
|
// options (except for UseCachedSize itself).
|
|
//
|
|
// 2. The message and all its submessages have not changed in any
|
|
// way since the Size call.
|
|
//
|
|
// If either of these invariants is broken, the results are undefined
|
|
// but may include panics or invalid output.
|
|
//
|
|
// Implementations MAY take this option into account to provide
|
|
// better performance, but there is no guarantee that they will do so.
|
|
// There is absolutely no guarantee that Size followed by Marshal with
|
|
// UseCachedSize set will perform equivalently to Marshal alone.
|
|
UseCachedSize bool
|
|
|
|
pragma.NoUnkeyedLiterals
|
|
}
|
|
|
|
var _ = protoiface.MarshalOptions(MarshalOptions{})
|
|
|
|
// Marshal returns the wire-format encoding of m.
|
|
func Marshal(m Message) ([]byte, error) {
|
|
return MarshalOptions{}.MarshalAppend(nil, m)
|
|
}
|
|
|
|
// Marshal returns the wire-format encoding of m.
|
|
func (o MarshalOptions) Marshal(m Message) ([]byte, error) {
|
|
return o.MarshalAppend(nil, m)
|
|
}
|
|
|
|
// MarshalAppend appends the wire-format encoding of m to b,
|
|
// returning the result.
|
|
func (o MarshalOptions) MarshalAppend(b []byte, m Message) ([]byte, error) {
|
|
// Set AllowPartial in recursive calls to marshal to avoid duplicating
|
|
// effort with the single initialization check below.
|
|
allowPartial := o.AllowPartial
|
|
o.AllowPartial = true
|
|
out, err := o.marshalMessageFast(b, m)
|
|
if err == errInternalNoFast {
|
|
out, err = o.marshalMessage(b, m.ProtoReflect())
|
|
}
|
|
var nerr errors.NonFatal
|
|
if !nerr.Merge(err) {
|
|
return out, err
|
|
}
|
|
if !allowPartial {
|
|
nerr.Merge(IsInitialized(m))
|
|
}
|
|
return out, nerr.E
|
|
}
|
|
|
|
func (o MarshalOptions) marshalMessageFast(b []byte, m Message) ([]byte, error) {
|
|
methods := protoMethods(m)
|
|
if methods == nil ||
|
|
methods.MarshalAppend == nil ||
|
|
(o.Deterministic && methods.Flags&protoiface.MethodFlagDeterministicMarshal == 0) {
|
|
return nil, errInternalNoFast
|
|
}
|
|
if methods.Size != nil {
|
|
sz := methods.Size(m)
|
|
if cap(b) < len(b)+sz {
|
|
x := make([]byte, len(b), len(b)+sz)
|
|
copy(x, b)
|
|
b = x
|
|
}
|
|
o.UseCachedSize = true
|
|
}
|
|
return methods.MarshalAppend(b, m, protoiface.MarshalOptions(o))
|
|
}
|
|
|
|
func (o MarshalOptions) marshalMessage(b []byte, m protoreflect.Message) ([]byte, error) {
|
|
// There are many choices for what order we visit fields in. The default one here
|
|
// is chosen for reasonable efficiency and simplicity given the protoreflect API.
|
|
// It is not deterministic, since KnownFields.Range does not return fields in any
|
|
// defined order.
|
|
//
|
|
// When using deterministic serialization, we sort the known fields by field number.
|
|
fieldDescs := m.Descriptor().Fields()
|
|
knownFields := m.KnownFields()
|
|
var err error
|
|
var nerr errors.NonFatal
|
|
o.rangeKnown(knownFields, func(num protoreflect.FieldNumber, value protoreflect.Value) bool {
|
|
field := fieldDescs.ByNumber(num)
|
|
if field == nil {
|
|
field = knownFields.ExtensionTypes().ByNumber(num).Descriptor()
|
|
if field == nil {
|
|
panic(fmt.Errorf("no descriptor for field %d in %q", num, m.Descriptor().FullName()))
|
|
}
|
|
}
|
|
b, err = o.marshalField(b, field, value)
|
|
if nerr.Merge(err) {
|
|
err = nil
|
|
return true
|
|
}
|
|
return false
|
|
})
|
|
if err != nil {
|
|
return b, err
|
|
}
|
|
m.UnknownFields().Range(func(_ protoreflect.FieldNumber, raw protoreflect.RawFields) bool {
|
|
b = append(b, raw...)
|
|
return true
|
|
})
|
|
return b, nerr.E
|
|
}
|
|
|
|
// rangeKnown visits known fields in field number order when deterministic
|
|
// serialization is enabled.
|
|
func (o MarshalOptions) rangeKnown(knownFields protoreflect.KnownFields, f func(protoreflect.FieldNumber, protoreflect.Value) bool) {
|
|
if !o.Deterministic {
|
|
knownFields.Range(f)
|
|
return
|
|
}
|
|
nums := make([]protoreflect.FieldNumber, 0, knownFields.Len())
|
|
knownFields.Range(func(num protoreflect.FieldNumber, _ protoreflect.Value) bool {
|
|
nums = append(nums, num)
|
|
return true
|
|
})
|
|
sort.Slice(nums, func(a, b int) bool {
|
|
return nums[a] < nums[b]
|
|
})
|
|
for _, num := range nums {
|
|
if !f(num, knownFields.Get(num)) {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
func (o MarshalOptions) marshalField(b []byte, fd protoreflect.FieldDescriptor, value protoreflect.Value) ([]byte, error) {
|
|
num := fd.Number()
|
|
kind := fd.Kind()
|
|
switch {
|
|
case fd.IsList():
|
|
return o.marshalList(b, num, fd, value.List())
|
|
case fd.IsMap():
|
|
return o.marshalMap(b, num, fd, value.Map())
|
|
default:
|
|
b = wire.AppendTag(b, num, wireTypes[kind])
|
|
return o.marshalSingular(b, num, fd, value)
|
|
}
|
|
}
|
|
|
|
func (o MarshalOptions) marshalList(b []byte, num wire.Number, fd protoreflect.FieldDescriptor, list protoreflect.List) ([]byte, error) {
|
|
if fd.IsPacked() {
|
|
b = wire.AppendTag(b, num, wire.BytesType)
|
|
b, pos := appendSpeculativeLength(b)
|
|
var nerr errors.NonFatal
|
|
for i, llen := 0, list.Len(); i < llen; i++ {
|
|
var err error
|
|
b, err = o.marshalSingular(b, num, fd, list.Get(i))
|
|
if !nerr.Merge(err) {
|
|
return b, err
|
|
}
|
|
}
|
|
b = finishSpeculativeLength(b, pos)
|
|
return b, nerr.E
|
|
}
|
|
|
|
kind := fd.Kind()
|
|
var nerr errors.NonFatal
|
|
for i, llen := 0, list.Len(); i < llen; i++ {
|
|
var err error
|
|
b = wire.AppendTag(b, num, wireTypes[kind])
|
|
b, err = o.marshalSingular(b, num, fd, list.Get(i))
|
|
if !nerr.Merge(err) {
|
|
return b, err
|
|
}
|
|
}
|
|
return b, nerr.E
|
|
}
|
|
|
|
func (o MarshalOptions) marshalMap(b []byte, num wire.Number, fd protoreflect.FieldDescriptor, mapv protoreflect.Map) ([]byte, error) {
|
|
keyf := fd.MapKey()
|
|
valf := fd.MapValue()
|
|
var nerr errors.NonFatal
|
|
var err error
|
|
o.rangeMap(mapv, keyf.Kind(), func(key protoreflect.MapKey, value protoreflect.Value) bool {
|
|
b = wire.AppendTag(b, num, wire.BytesType)
|
|
var pos int
|
|
b, pos = appendSpeculativeLength(b)
|
|
|
|
b, err = o.marshalField(b, keyf, key.Value())
|
|
if !nerr.Merge(err) {
|
|
return false
|
|
}
|
|
b, err = o.marshalField(b, valf, value)
|
|
if !nerr.Merge(err) {
|
|
return false
|
|
}
|
|
err = nil
|
|
|
|
b = finishSpeculativeLength(b, pos)
|
|
return true
|
|
})
|
|
if err != nil {
|
|
return b, err
|
|
}
|
|
return b, nerr.E
|
|
}
|
|
|
|
func (o MarshalOptions) rangeMap(mapv protoreflect.Map, kind protoreflect.Kind, f func(protoreflect.MapKey, protoreflect.Value) bool) {
|
|
if !o.Deterministic {
|
|
mapv.Range(f)
|
|
return
|
|
}
|
|
mapsort.Range(mapv, kind, f)
|
|
}
|
|
|
|
// When encoding length-prefixed fields, we speculatively set aside some number of bytes
|
|
// for the length, encode the data, and then encode the length (shifting the data if necessary
|
|
// to make room).
|
|
const speculativeLength = 1
|
|
|
|
func appendSpeculativeLength(b []byte) ([]byte, int) {
|
|
pos := len(b)
|
|
b = append(b, "\x00\x00\x00\x00"[:speculativeLength]...)
|
|
return b, pos
|
|
}
|
|
|
|
func finishSpeculativeLength(b []byte, pos int) []byte {
|
|
mlen := len(b) - pos - speculativeLength
|
|
msiz := wire.SizeVarint(uint64(mlen))
|
|
if msiz != speculativeLength {
|
|
for i := 0; i < msiz-speculativeLength; i++ {
|
|
b = append(b, 0)
|
|
}
|
|
copy(b[pos+msiz:], b[pos+speculativeLength:])
|
|
b = b[:pos+msiz+mlen]
|
|
}
|
|
wire.AppendVarint(b[:pos], uint64(mlen))
|
|
return b
|
|
}
|