protobuf-go/internal/encoding/json/string.go
Joe Tsai 879b18d902 internal/encoding/json: initial commit of JSON parser/serializer
Package json provides a parser and serializer for the JSON format.
This focuses on the grammar of the format and is agnostic towards specific
semantics of protobuf types.

High-level API:
	func Marshal(v Value, indent string) ([]byte, error)
	func Unmarshal(b []byte) (Value, error)
	type Type uint8
	    const Null Type ...
	type Value struct{ ... }
	    func ValueOf(v interface{}) Value
		func (v Value) Type() Type
		func (v Value) Bool() bool
		func (v Value) Number() float64
		func (v Value) String() string
		func (v Value) Array() []Value
		func (v Value) Object() [][2]Value
		func (v Value) Raw() []byte

Change-Id: I26422f6b3881ef1a11b8aa95160645b1384b27b8
Reviewed-on: https://go-review.googlesource.com/127824
Reviewed-by: Herbie Ong <herbie@google.com>
2018-08-07 22:40:28 +00:00

161 lines
4.1 KiB
Go

// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package json
import (
"io"
"math/bits"
"strconv"
"unicode"
"unicode/utf16"
"unicode/utf8"
"google.golang.org/proto/internal/errors"
)
func (p *encoder) marshalString(v Value) error {
var err error
p.out, err = appendString(p.out, v)
return err
}
func appendString(out []byte, v Value) ([]byte, error) {
if v.Type() != String {
return nil, errors.New("invalid type %v, expected string", v.Type())
}
if len(v.raw) > 0 {
return append(out, v.raw...), nil
}
in := v.String()
var nerr errors.NonFatal
out = append(out, '"')
i := indexNeedEscape(in)
in, out = in[i:], append(out, in[:i]...)
for len(in) > 0 {
switch r, n := utf8.DecodeRuneInString(in); {
case r == utf8.RuneError && n == 1:
nerr.AppendInvalidUTF8("")
in, out = in[1:], append(out, in[0]) // preserve invalid byte
case r < ' ' || r == '"' || r == '\\':
out = append(out, '\\')
switch r {
case '"', '\\':
out = append(out, byte(r))
case '\b':
out = append(out, 'b')
case '\f':
out = append(out, 'f')
case '\n':
out = append(out, 'n')
case '\r':
out = append(out, 'r')
case '\t':
out = append(out, 't')
default:
out = append(out, 'u')
out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
out = strconv.AppendUint(out, uint64(r), 16)
}
in = in[n:]
default:
i := indexNeedEscape(in[n:])
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
out = append(out, '"')
return out, nerr.E
}
func (p *decoder) unmarshalString() (Value, error) {
v, n, err := consumeString(p.in)
p.consume(n)
return v, err
}
func consumeString(in []byte) (Value, int, error) {
var nerr errors.NonFatal
in0 := in
if len(in) == 0 {
return Value{}, 0, io.ErrUnexpectedEOF
}
if in[0] != '"' {
return Value{}, 0, newSyntaxError("invalid character %q at start of string", in[0])
}
in = in[1:]
i := indexNeedEscape(string(in))
in, out := in[i:], in[:i:i] // set cap to prevent mutations
for len(in) > 0 {
switch r, n := utf8.DecodeRune(in); {
case r == utf8.RuneError && n == 1:
nerr.AppendInvalidUTF8("")
in, out = in[1:], append(out, in[0]) // preserve invalid byte
case r < ' ':
return Value{}, 0, newSyntaxError("invalid character %q in string", r)
case r == '"':
in = in[1:]
n := len(in0) - len(in)
v := rawValueOf(string(out), in0[:n:n])
return v, n, nerr.E
case r == '\\':
if len(in) < 2 {
return Value{}, 0, io.ErrUnexpectedEOF
}
switch r := in[1]; r {
case '"', '\\', '/':
in, out = in[2:], append(out, r)
case 'b':
in, out = in[2:], append(out, '\b')
case 'f':
in, out = in[2:], append(out, '\f')
case 'n':
in, out = in[2:], append(out, '\n')
case 'r':
in, out = in[2:], append(out, '\r')
case 't':
in, out = in[2:], append(out, '\t')
case 'u':
if len(in) < 6 {
return Value{}, 0, io.ErrUnexpectedEOF
}
v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
if err != nil {
return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:6])
}
in = in[6:]
r := rune(v)
if utf16.IsSurrogate(r) {
if len(in) < 6 {
return Value{}, 0, io.ErrUnexpectedEOF
}
v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
r = utf16.DecodeRune(r, rune(v))
if in[0] != '\\' || in[1] != 'u' || r == unicode.ReplacementChar || err != nil {
return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:6])
}
in = in[6:]
}
out = append(out, string(r)...)
default:
return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:2])
}
default:
i := indexNeedEscape(string(in[n:]))
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
return Value{}, 0, io.ErrUnexpectedEOF
}
// indexNeedEscape returns the index of the next character that needs escaping.
// If no characters need escaping, this returns the input length.
func indexNeedEscape(s string) int {
for i, r := range s {
if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
return i
}
}
return len(s)
}