internal/encoding/{json,text}: improve string parsing

Previous calls to indexNeedEscape with a type conversion from []byte
to string incurs allocation.

Make 2 different calls instead, one for string and one for bytes.

Type converting string to []byte does not incur extra allocation,
however, the benchmark results still show it to be slower by ~3% for
textpb and 6+% for jsonpb, hence decided to go with 2 separate calls
instead.

Results over current head:
name          old time/op    new time/op    delta
TextEncode-4    18.1ms ± 2%    18.3ms ± 2%     ~     (p=0.065 n=10+9)
TextDecode-4     233ms ± 3%     102ms ± 1%  -56.34%  (p=0.000 n=9+10)
JSONEncode-4    10.4ms ± 2%    10.5ms ± 0%   +0.56%  (p=0.019 n=9+9)
JSONDecode-4     870ms ± 2%     354ms ± 4%  -59.33%  (p=0.000 n=9+10)

name          old alloc/op   new alloc/op   delta
TextEncode-4    28.9MB ± 0%    28.9MB ± 0%   +0.00%  (p=0.000 n=10+9)
TextDecode-4    1.16GB ± 0%    0.03GB ± 0%  -97.44%  (p=0.000 n=9+10)
JSONEncode-4    3.94MB ± 0%    3.94MB ± 0%   +0.00%  (p=0.000 n=10+10)
JSONDecode-4    3.35GB ± 0%    0.01GB ± 0%  -99.83%  (p=0.000 n=10+10)

name          old allocs/op  new allocs/op  delta
TextEncode-4     73.5k ± 0%     73.5k ± 0%     ~     (all equal)
TextDecode-4      278k ± 0%      255k ± 0%   -8.26%  (p=0.000 n=9+10)
JSONEncode-4     63.8k ± 0%     63.8k ± 0%     ~     (all equal)
JSONDecode-4      247k ± 0%      210k ± 0%  -14.92%  (p=0.000 n=10+10)

Change-Id: Ibc64e9a7827ec1fffa213eb79f60497950203700
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/172239
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
This commit is contained in:
Herbie Ong 2019-04-16 00:14:03 -07:00
parent 00e50dc9c1
commit 1e09691415
2 changed files with 44 additions and 14 deletions

View File

@ -18,7 +18,7 @@ import (
func appendString(out []byte, in string) ([]byte, error) {
var nerr errors.NonFatal
out = append(out, '"')
i := indexNeedEscape(in)
i := indexNeedEscapeInString(in)
in, out = in[i:], append(out, in[:i]...)
for len(in) > 0 {
switch r, n := utf8.DecodeRuneInString(in); {
@ -47,7 +47,7 @@ func appendString(out []byte, in string) ([]byte, error) {
}
in = in[n:]
default:
i := indexNeedEscape(in[n:])
i := indexNeedEscapeInString(in[n:])
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
@ -65,7 +65,7 @@ func (d *Decoder) parseString(in []byte) (string, int, error) {
return "", 0, d.newSyntaxError("invalid character %q at start of string", in[0])
}
in = in[1:]
i := indexNeedEscape(string(in))
i := indexNeedEscapeInBytes(in)
in, out := in[i:], in[:i:i] // set cap to prevent mutations
for len(in) > 0 {
switch r, n := utf8.DecodeRune(in); {
@ -123,16 +123,16 @@ func (d *Decoder) parseString(in []byte) (string, int, error) {
return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:2])
}
default:
i := indexNeedEscape(string(in[n:]))
i := indexNeedEscapeInBytes(in[n:])
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
return "", 0, io.ErrUnexpectedEOF
}
// indexNeedEscape returns the index of the next character that needs escaping.
// If no characters need escaping, this returns the input length.
func indexNeedEscape(s string) int {
// indexNeedEscapeInString returns the index of the character that needs
// escaping. If no characters need escaping, this returns the input length.
func indexNeedEscapeInString(s string) int {
for i, r := range s {
if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
return i
@ -140,3 +140,18 @@ func indexNeedEscape(s string) int {
}
return len(s)
}
// indexNeedEscapeInBytes returns the index of the character that needs
// escaping. If no characters need escaping, this returns the input length.
// TODO: Remove this duplicate function when https://golang.org/issue/31506 gets
// resolved.
func indexNeedEscapeInBytes(b []byte) int {
for i := 0; i < len(b); {
r, n := utf8.DecodeRune(b[i:])
if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
return i
}
i += n
}
return len(b)
}

View File

@ -33,7 +33,7 @@ func appendString(out []byte, v Value, outputASCII bool) ([]byte, error) {
in := v.String()
out = append(out, '"')
i := indexNeedEscape(in)
i := indexNeedEscapeInString(in)
in, out = in[i:], append(out, in[:i]...)
for len(in) > 0 {
switch r, n := utf8.DecodeRuneInString(in); {
@ -72,7 +72,7 @@ func appendString(out []byte, v Value, outputASCII bool) ([]byte, error) {
}
in = in[n:]
default:
i := indexNeedEscape(in[n:])
i := indexNeedEscapeInString(in[n:])
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
@ -96,7 +96,7 @@ func consumeString(in []byte) (Value, int, error) {
return Value{}, 0, newSyntaxError("invalid character %q at start of string", in[0])
}
in = in[1:]
i := indexNeedEscape(string(in))
i := indexNeedEscapeInBytes(in)
in, out := in[i:], in[:i:i] // set cap to prevent mutations
for len(in) > 0 {
switch r, n := utf8.DecodeRune(in); {
@ -185,7 +185,7 @@ func consumeString(in []byte) (Value, int, error) {
return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:2])
}
default:
i := indexNeedEscape(string(in[n:]))
i := indexNeedEscapeInBytes(in[n:])
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
@ -217,9 +217,9 @@ func (p *decoder) unmarshalStrings() (Value, error) {
return rawValueOf(strings.Join(ss, ""), b[:len(b):len(b)]), nil
}
// indexNeedEscape returns the index of the next character that needs escaping.
// If no characters need escaping, this returns the input length.
func indexNeedEscape(s string) int {
// indexNeedEscapeInString returns the index of the character that needs
// escaping. If no characters need escaping, this returns the input length.
func indexNeedEscapeInString(s string) int {
for i := 0; i < len(s); i++ {
if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= utf8.RuneSelf {
return i
@ -227,3 +227,18 @@ func indexNeedEscape(s string) int {
}
return len(s)
}
// indexNeedEscapeInBytes returns the index of the character that needs
// escaping. If no characters need escaping, this returns the input length.
// TODO: Remove this duplicate function when https://golang.org/issue/31506 gets
// resolved.
func indexNeedEscapeInBytes(b []byte) int {
for i := 0; i < len(b); {
c, size := utf8.DecodeRune(b[i:])
if c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= utf8.RuneSelf {
return i
}
i += size
}
return len(b)
}