mirror of
https://github.com/protocolbuffers/protobuf-go.git
synced 2025-02-11 06:40:37 +00:00
Improve performance by replacing use of regular expressions with direct parsing code. Compared to latest version: name old time/op new time/op delta Text/Unmarshal/google_message1_proto2-4 21.8µs ± 5% 14.0µs ± 9% -35.69% (p=0.000 n=10+9) Text/Unmarshal/google_message1_proto3-4 19.6µs ± 4% 13.8µs ±10% -29.47% (p=0.000 n=10+10) Text/Unmarshal/google_message2-4 13.4ms ± 4% 4.9ms ± 4% -63.44% (p=0.000 n=10+10) Text/Marshal/google_message1_proto2-4 13.8µs ± 2% 14.1µs ± 4% +2.42% (p=0.011 n=9+10) Text/Marshal/google_message1_proto3-4 11.6µs ± 2% 11.8µs ± 8% ~ (p=0.573 n=8+10) Text/Marshal/google_message2-4 8.01ms ±48% 5.97ms ± 5% -25.44% (p=0.000 n=10+10) name old alloc/op new alloc/op delta Text/Unmarshal/google_message1_proto2-4 13.0kB ± 0% 12.6kB ± 0% -3.40% (p=0.000 n=10+10) Text/Unmarshal/google_message1_proto3-4 13.0kB ± 0% 12.5kB ± 0% -3.50% (p=0.000 n=10+10) Text/Unmarshal/google_message2-4 5.67MB ± 0% 5.50MB ± 0% -3.13% (p=0.000 n=10+10) Text/Marshal/google_message1_proto2-4 12.0kB ± 0% 12.1kB ± 0% +0.02% (p=0.000 n=10+10) Text/Marshal/google_message1_proto3-4 11.7kB ± 0% 11.7kB ± 0% +0.01% (p=0.000 n=10+10) Text/Marshal/google_message2-4 5.68MB ± 0% 5.68MB ± 0% +0.01% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Text/Unmarshal/google_message1_proto2-4 142 ± 0% 142 ± 0% ~ (all equal) Text/Unmarshal/google_message1_proto3-4 156 ± 0% 156 ± 0% ~ (all equal) Text/Unmarshal/google_message2-4 70.1k ± 0% 65.4k ± 0% -6.76% (p=0.000 n=10+10) Text/Marshal/google_message1_proto2-4 91.0 ± 0% 91.0 ± 0% ~ (all equal) Text/Marshal/google_message1_proto3-4 80.0 ± 0% 80.0 ± 0% ~ (all equal) Text/Marshal/google_message2-4 36.4k ± 0% 36.4k ± 0% ~ (all equal) Change-Id: Ia5d3c16e9e33961aae03bac0d53fcfc5b1943d2a Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/173360 Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
338 lines
7.1 KiB
Go
338 lines
7.1 KiB
Go
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package text
|
|
|
|
import (
|
|
"bytes"
|
|
"io"
|
|
"math"
|
|
"strconv"
|
|
|
|
"google.golang.org/protobuf/internal/errors"
|
|
)
|
|
|
|
// marshalNumber encodes v as either a Bool, Int, Uint, Float32, or Float64.
|
|
func (p *encoder) marshalNumber(v Value) error {
|
|
var err error
|
|
p.out, err = appendNumber(p.out, v)
|
|
return err
|
|
}
|
|
func appendNumber(out []byte, v Value) ([]byte, error) {
|
|
if len(v.raw) > 0 {
|
|
switch v.Type() {
|
|
case Bool, Int, Uint, Float32, Float64:
|
|
return append(out, v.raw...), nil
|
|
}
|
|
}
|
|
switch v.Type() {
|
|
case Bool:
|
|
if b, _ := v.Bool(); b {
|
|
return append(out, "true"...), nil
|
|
} else {
|
|
return append(out, "false"...), nil
|
|
}
|
|
case Int:
|
|
return strconv.AppendInt(out, int64(v.num), 10), nil
|
|
case Uint:
|
|
return strconv.AppendUint(out, uint64(v.num), 10), nil
|
|
case Float32:
|
|
return appendFloat(out, v, 32)
|
|
case Float64:
|
|
return appendFloat(out, v, 64)
|
|
default:
|
|
return nil, errors.New("invalid type %v, expected bool or number", v.Type())
|
|
}
|
|
}
|
|
|
|
func appendFloat(out []byte, v Value, bitSize int) ([]byte, error) {
|
|
switch n := math.Float64frombits(v.num); {
|
|
case math.IsNaN(n):
|
|
return append(out, "nan"...), nil
|
|
case math.IsInf(n, +1):
|
|
return append(out, "inf"...), nil
|
|
case math.IsInf(n, -1):
|
|
return append(out, "-inf"...), nil
|
|
default:
|
|
return strconv.AppendFloat(out, n, 'g', -1, bitSize), nil
|
|
}
|
|
}
|
|
|
|
// These regular expressions were derived by reverse engineering the C++ code
|
|
// in tokenizer.cc and text_format.cc.
|
|
var (
|
|
literals = map[string]interface{}{
|
|
// These exact literals are the ones supported in C++.
|
|
// In C++, a 1-bit unsigned integers is also allowed to represent
|
|
// a boolean. This is handled in Value.Bool.
|
|
"t": true,
|
|
"true": true,
|
|
"True": true,
|
|
"f": false,
|
|
"false": false,
|
|
"False": false,
|
|
|
|
// C++ permits "-nan" and the case-insensitive variants of these.
|
|
// However, Go continues to be case-sensitive.
|
|
"nan": math.NaN(),
|
|
"inf": math.Inf(+1),
|
|
"-inf": math.Inf(-1),
|
|
}
|
|
)
|
|
|
|
// unmarshalNumber decodes a Bool, Int, Uint, or Float64 from the input.
|
|
func (p *decoder) unmarshalNumber() (Value, error) {
|
|
v, n, err := consumeNumber(p.in)
|
|
p.consume(n)
|
|
return v, err
|
|
}
|
|
|
|
func consumeNumber(in []byte) (Value, int, error) {
|
|
if len(in) == 0 {
|
|
return Value{}, 0, io.ErrUnexpectedEOF
|
|
}
|
|
if v, n := matchLiteral(in); n > 0 {
|
|
return rawValueOf(v, in[:n]), n, nil
|
|
}
|
|
|
|
num, ok := parseNumber(in)
|
|
if !ok {
|
|
return Value{}, 0, newSyntaxError("invalid %q as number or bool", errRegexp.Find(in))
|
|
}
|
|
|
|
if num.typ == numFloat {
|
|
f, err := strconv.ParseFloat(string(num.value), 64)
|
|
if err != nil {
|
|
return Value{}, 0, err
|
|
}
|
|
return rawValueOf(f, in[:num.size]), num.size, nil
|
|
}
|
|
|
|
if num.neg {
|
|
v, err := strconv.ParseInt(string(num.value), 0, 64)
|
|
if err != nil {
|
|
return Value{}, 0, err
|
|
}
|
|
return rawValueOf(v, num.value), num.size, nil
|
|
}
|
|
v, err := strconv.ParseUint(string(num.value), 0, 64)
|
|
if err != nil {
|
|
return Value{}, 0, err
|
|
}
|
|
return rawValueOf(v, num.value), num.size, nil
|
|
}
|
|
|
|
func matchLiteral(in []byte) (interface{}, int) {
|
|
switch in[0] {
|
|
case 't', 'T':
|
|
rest := in[1:]
|
|
if len(rest) == 0 || isDelim(rest[0]) {
|
|
return true, 1
|
|
}
|
|
if n := matchStringWithDelim("rue", rest); n > 0 {
|
|
return true, 4
|
|
}
|
|
case 'f', 'F':
|
|
rest := in[1:]
|
|
if len(rest) == 0 || isDelim(rest[0]) {
|
|
return false, 1
|
|
}
|
|
if n := matchStringWithDelim("alse", rest); n > 0 {
|
|
return false, 5
|
|
}
|
|
case 'n':
|
|
if n := matchStringWithDelim("nan", in); n > 0 {
|
|
return math.NaN(), 3
|
|
}
|
|
case 'i':
|
|
if n := matchStringWithDelim("inf", in); n > 0 {
|
|
return math.Inf(1), 3
|
|
}
|
|
case '-':
|
|
if n := matchStringWithDelim("-inf", in); n > 0 {
|
|
return math.Inf(-1), 4
|
|
}
|
|
}
|
|
return nil, 0
|
|
}
|
|
|
|
func matchStringWithDelim(s string, b []byte) int {
|
|
if !bytes.HasPrefix(b, []byte(s)) {
|
|
return 0
|
|
}
|
|
|
|
n := len(s)
|
|
if n < len(b) && !isDelim(b[n]) {
|
|
return 0
|
|
}
|
|
return n
|
|
}
|
|
|
|
type numType uint8
|
|
|
|
const (
|
|
numDec numType = (1 << iota) / 2
|
|
numHex
|
|
numOct
|
|
numFloat
|
|
)
|
|
|
|
// number is the result of parsing out a valid number from parseNumber. It
|
|
// contains data for doing float or integer conversion via the strconv package.
|
|
type number struct {
|
|
typ numType
|
|
neg bool
|
|
// Size of input taken up by the number. This may not be the same as
|
|
// len(number.value).
|
|
size int
|
|
// Bytes for doing strconv.Parse{Float,Int,Uint} conversion.
|
|
value []byte
|
|
}
|
|
|
|
// parseNumber constructs a number object from given input. It allows for the
|
|
// following patterns:
|
|
// integer: ^-?([1-9][0-9]*|0[xX][0-9a-fA-F]+|0[0-7]*)
|
|
// float: ^-?((0|[1-9][0-9]*)?([.][0-9]*)?([eE][+-]?[0-9]+)?[fF]?)
|
|
func parseNumber(input []byte) (number, bool) {
|
|
var size int
|
|
var neg bool
|
|
typ := numDec
|
|
|
|
s := input
|
|
if len(s) == 0 {
|
|
return number{}, false
|
|
}
|
|
|
|
// Optional -
|
|
if s[0] == '-' {
|
|
neg = true
|
|
s = s[1:]
|
|
size++
|
|
if len(s) == 0 {
|
|
return number{}, false
|
|
}
|
|
}
|
|
|
|
// C++ allows for whitespace and comments in between the negative sign and
|
|
// the rest of the number. This logic currently does not but is consistent
|
|
// with v1.
|
|
|
|
switch {
|
|
case s[0] == '0':
|
|
if len(s) > 1 {
|
|
switch {
|
|
case s[1] == 'x' || s[1] == 'X':
|
|
// Parse as hex number.
|
|
typ = numHex
|
|
n := 2
|
|
s = s[2:]
|
|
for len(s) > 0 && (('0' <= s[0] && s[0] <= '9') ||
|
|
('a' <= s[0] && s[0] <= 'f') ||
|
|
('A' <= s[0] && s[0] <= 'F')) {
|
|
s = s[1:]
|
|
n++
|
|
}
|
|
if n == 2 {
|
|
return number{}, false
|
|
}
|
|
size += n
|
|
|
|
case '0' <= s[1] && s[1] <= '7':
|
|
// Parse as octal number.
|
|
typ = numOct
|
|
n := 2
|
|
s = s[2:]
|
|
for len(s) > 0 && '0' <= s[0] && s[0] <= '7' {
|
|
s = s[1:]
|
|
n++
|
|
}
|
|
size += n
|
|
}
|
|
|
|
if typ&(numHex|numOct) > 0 {
|
|
if len(s) > 0 && !isDelim(s[0]) {
|
|
return number{}, false
|
|
}
|
|
return number{
|
|
typ: typ,
|
|
size: size,
|
|
neg: neg,
|
|
value: input[:size],
|
|
}, true
|
|
}
|
|
}
|
|
s = s[1:]
|
|
size++
|
|
|
|
case '1' <= s[0] && s[0] <= '9':
|
|
n := 1
|
|
s = s[1:]
|
|
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
|
|
s = s[1:]
|
|
n++
|
|
}
|
|
size += n
|
|
|
|
case s[0] == '.':
|
|
// Handled below.
|
|
|
|
default:
|
|
return number{}, false
|
|
}
|
|
|
|
// . followed by 0 or more digits.
|
|
if len(s) > 0 && s[0] == '.' {
|
|
typ = numFloat
|
|
n := 1
|
|
s = s[1:]
|
|
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
|
|
s = s[1:]
|
|
n++
|
|
}
|
|
size += n
|
|
}
|
|
|
|
// e or E followed by an optional - or + and 1 or more digits.
|
|
if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') {
|
|
typ = numFloat
|
|
s = s[1:]
|
|
n := 1
|
|
if s[0] == '+' || s[0] == '-' {
|
|
s = s[1:]
|
|
n++
|
|
if len(s) == 0 {
|
|
return number{}, false
|
|
}
|
|
}
|
|
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
|
|
s = s[1:]
|
|
n++
|
|
}
|
|
size += n
|
|
}
|
|
|
|
// At this point, input[:size] contains a valid number that can be converted
|
|
// via strconv.Parse{Float,Int,Uint}.
|
|
value := input[:size]
|
|
|
|
// Optional suffix f or F for floats.
|
|
if len(s) > 0 && (s[0] == 'f' || s[0] == 'F') {
|
|
typ = numFloat
|
|
s = s[1:]
|
|
size++
|
|
}
|
|
|
|
// Check that next byte is a delimiter or it is at the end.
|
|
if len(s) > 0 && !isDelim(s[0]) {
|
|
return number{}, false
|
|
}
|
|
|
|
return number{
|
|
typ: typ,
|
|
size: size,
|
|
neg: neg,
|
|
value: value,
|
|
}, true
|
|
}
|