mirror of
https://github.com/protocolbuffers/protobuf-go.git
synced 2025-02-06 09:40:07 +00:00
a3369c5dc2
Improve performance by replacing use of regular expressions with direct parsing code. Compared to latest version: name old time/op new time/op delta Text/Unmarshal/google_message1_proto2-4 21.8µs ± 5% 14.0µs ± 9% -35.69% (p=0.000 n=10+9) Text/Unmarshal/google_message1_proto3-4 19.6µs ± 4% 13.8µs ±10% -29.47% (p=0.000 n=10+10) Text/Unmarshal/google_message2-4 13.4ms ± 4% 4.9ms ± 4% -63.44% (p=0.000 n=10+10) Text/Marshal/google_message1_proto2-4 13.8µs ± 2% 14.1µs ± 4% +2.42% (p=0.011 n=9+10) Text/Marshal/google_message1_proto3-4 11.6µs ± 2% 11.8µs ± 8% ~ (p=0.573 n=8+10) Text/Marshal/google_message2-4 8.01ms ±48% 5.97ms ± 5% -25.44% (p=0.000 n=10+10) name old alloc/op new alloc/op delta Text/Unmarshal/google_message1_proto2-4 13.0kB ± 0% 12.6kB ± 0% -3.40% (p=0.000 n=10+10) Text/Unmarshal/google_message1_proto3-4 13.0kB ± 0% 12.5kB ± 0% -3.50% (p=0.000 n=10+10) Text/Unmarshal/google_message2-4 5.67MB ± 0% 5.50MB ± 0% -3.13% (p=0.000 n=10+10) Text/Marshal/google_message1_proto2-4 12.0kB ± 0% 12.1kB ± 0% +0.02% (p=0.000 n=10+10) Text/Marshal/google_message1_proto3-4 11.7kB ± 0% 11.7kB ± 0% +0.01% (p=0.000 n=10+10) Text/Marshal/google_message2-4 5.68MB ± 0% 5.68MB ± 0% +0.01% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Text/Unmarshal/google_message1_proto2-4 142 ± 0% 142 ± 0% ~ (all equal) Text/Unmarshal/google_message1_proto3-4 156 ± 0% 156 ± 0% ~ (all equal) Text/Unmarshal/google_message2-4 70.1k ± 0% 65.4k ± 0% -6.76% (p=0.000 n=10+10) Text/Marshal/google_message1_proto2-4 91.0 ± 0% 91.0 ± 0% ~ (all equal) Text/Marshal/google_message1_proto3-4 80.0 ± 0% 80.0 ± 0% ~ (all equal) Text/Marshal/google_message2-4 36.4k ± 0% 36.4k ± 0% ~ (all equal) Change-Id: Ia5d3c16e9e33961aae03bac0d53fcfc5b1943d2a Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/173360 Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
193 lines
5.0 KiB
Go
193 lines
5.0 KiB
Go
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package text
|
|
|
|
import (
|
|
"bytes"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"google.golang.org/protobuf/internal/detrand"
|
|
"google.golang.org/protobuf/internal/errors"
|
|
)
|
|
|
|
// Marshal serializes v as the proto text format, where v must be a Message.
|
|
// In the proto text format, the top-level value is always a message where the
|
|
// delimiters are elided.
|
|
//
|
|
// If indent is a non-empty string, it causes every entry in a List or Message
|
|
// to be preceded by the indent and trailed by a newline.
|
|
//
|
|
// If delims is not the zero value, it controls the delimiter characters used
|
|
// for messages (e.g., "{}" vs "<>").
|
|
//
|
|
// If outputASCII is true, strings will be serialized in such a way that
|
|
// multi-byte UTF-8 sequences are escaped. This property ensures that the
|
|
// overall output is ASCII (as opposed to UTF-8).
|
|
func Marshal(v Value, indent string, delims [2]byte, outputASCII bool) ([]byte, error) {
|
|
p := encoder{}
|
|
if len(indent) > 0 {
|
|
if strings.Trim(indent, " \t") != "" {
|
|
return nil, errors.New("indent may only be composed of space and tab characters")
|
|
}
|
|
p.indent = indent
|
|
p.newline = "\n"
|
|
}
|
|
switch delims {
|
|
case [2]byte{0, 0}:
|
|
p.delims = [2]byte{'{', '}'}
|
|
case [2]byte{'{', '}'}, [2]byte{'<', '>'}:
|
|
p.delims = delims
|
|
default:
|
|
return nil, errors.New("delimiters may only be \"{}\" or \"<>\"")
|
|
}
|
|
p.outputASCII = outputASCII
|
|
|
|
err := p.marshalMessage(v, false)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if len(indent) > 0 {
|
|
return append(bytes.TrimRight(p.out, "\n"), '\n'), nil
|
|
}
|
|
return p.out, nil
|
|
}
|
|
|
|
type encoder struct {
|
|
out []byte
|
|
|
|
indent string
|
|
indents []byte
|
|
newline string // set to "\n" if len(indent) > 0
|
|
delims [2]byte
|
|
outputASCII bool
|
|
}
|
|
|
|
func (p *encoder) marshalList(v Value) error {
|
|
if v.Type() != List {
|
|
return errors.New("invalid type %v, expected list", v.Type())
|
|
}
|
|
elems := v.List()
|
|
p.out = append(p.out, '[')
|
|
p.indents = append(p.indents, p.indent...)
|
|
if len(elems) > 0 {
|
|
p.out = append(p.out, p.newline...)
|
|
}
|
|
for i, elem := range elems {
|
|
p.out = append(p.out, p.indents...)
|
|
if err := p.marshalValue(elem); err != nil {
|
|
return err
|
|
}
|
|
if i < len(elems)-1 {
|
|
p.out = append(p.out, ',')
|
|
}
|
|
p.out = append(p.out, p.newline...)
|
|
}
|
|
p.indents = p.indents[:len(p.indents)-len(p.indent)]
|
|
if len(elems) > 0 {
|
|
p.out = append(p.out, p.indents...)
|
|
}
|
|
p.out = append(p.out, ']')
|
|
return nil
|
|
}
|
|
|
|
func (p *encoder) marshalMessage(v Value, emitDelims bool) error {
|
|
if v.Type() != Message {
|
|
return errors.New("invalid type %v, expected message", v.Type())
|
|
}
|
|
items := v.Message()
|
|
if emitDelims {
|
|
p.out = append(p.out, p.delims[0])
|
|
p.indents = append(p.indents, p.indent...)
|
|
if len(items) > 0 {
|
|
p.out = append(p.out, p.newline...)
|
|
}
|
|
}
|
|
for i, item := range items {
|
|
p.out = append(p.out, p.indents...)
|
|
if err := p.marshalKey(item[0]); err != nil {
|
|
return err
|
|
}
|
|
p.out = append(p.out, ':')
|
|
if len(p.indent) > 0 {
|
|
p.out = append(p.out, ' ')
|
|
}
|
|
// For multi-line output, add a random extra space after key: per message to
|
|
// make output unstable.
|
|
if len(p.indent) > 0 && detrand.Bool() {
|
|
p.out = append(p.out, ' ')
|
|
}
|
|
|
|
if err := p.marshalValue(item[1]); err != nil {
|
|
return err
|
|
}
|
|
if i < len(items)-1 && len(p.indent) == 0 {
|
|
p.out = append(p.out, ' ')
|
|
}
|
|
// For single-line output, add a random extra space after a field per message to
|
|
// make output unstable.
|
|
if len(p.indent) == 0 && detrand.Bool() && i != len(items)-1 {
|
|
p.out = append(p.out, ' ')
|
|
}
|
|
p.out = append(p.out, p.newline...)
|
|
}
|
|
if emitDelims {
|
|
p.indents = p.indents[:len(p.indents)-len(p.indent)]
|
|
if len(items) > 0 {
|
|
p.out = append(p.out, p.indents...)
|
|
}
|
|
p.out = append(p.out, p.delims[1])
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// This expression is more liberal than ConsumeAnyTypeUrl in C++.
|
|
// However, the C++ parser does not handle many legal URL strings.
|
|
// The Go implementation is more liberal to be backwards compatible with
|
|
// the historical Go implementation which was overly liberal (and buggy).
|
|
var urlRegexp = regexp.MustCompile(`^[-_a-zA-Z0-9]+([./][-_a-zA-Z0-9]+)*`)
|
|
|
|
func (p *encoder) marshalKey(v Value) error {
|
|
switch v.Type() {
|
|
case String:
|
|
var err error
|
|
p.out = append(p.out, '[')
|
|
if len(urlRegexp.FindString(v.str)) == len(v.str) {
|
|
p.out = append(p.out, v.str...)
|
|
} else {
|
|
err = p.marshalString(v)
|
|
}
|
|
p.out = append(p.out, ']')
|
|
return err
|
|
case Uint:
|
|
return p.marshalNumber(v)
|
|
case Name:
|
|
s, _ := v.Name()
|
|
p.out = append(p.out, s...)
|
|
return nil
|
|
default:
|
|
return errors.New("invalid type %v to encode key", v.Type())
|
|
}
|
|
}
|
|
|
|
func (p *encoder) marshalValue(v Value) error {
|
|
switch v.Type() {
|
|
case Bool, Int, Uint, Float32, Float64:
|
|
return p.marshalNumber(v)
|
|
case String:
|
|
return p.marshalString(v)
|
|
case List:
|
|
return p.marshalList(v)
|
|
case Message:
|
|
return p.marshalMessage(v, true)
|
|
case Name:
|
|
s, _ := v.Name()
|
|
p.out = append(p.out, s...)
|
|
return nil
|
|
default:
|
|
return errors.New("invalid type %v to encode value", v.Type())
|
|
}
|
|
}
|