internal/encoding/jsonx: copy internal/encoding/json

We're rewriting internal/encoding/json. So, make a copy of it first in
order not to break encoding/jsonpb package.

Change-Id: I8b63c468d3f432102d2af4db22a7549998ce3876
Reviewed-on: https://go-review.googlesource.com/c/164642
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
This commit is contained in:
Herbie Ong 2019-03-01 16:10:08 -08:00
parent 707894e869
commit 1e1e78b71b
7 changed files with 1184 additions and 1 deletions

View File

@ -9,7 +9,7 @@ import (
"math" "math"
"sort" "sort"
"github.com/golang/protobuf/v2/internal/encoding/json" json "github.com/golang/protobuf/v2/internal/encoding/jsonx"
"github.com/golang/protobuf/v2/internal/errors" "github.com/golang/protobuf/v2/internal/errors"
"github.com/golang/protobuf/v2/internal/pragma" "github.com/golang/protobuf/v2/internal/pragma"
"github.com/golang/protobuf/v2/proto" "github.com/golang/protobuf/v2/proto"

View File

@ -0,0 +1,194 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package json
import (
"bytes"
"io"
"regexp"
"unicode/utf8"
"github.com/golang/protobuf/v2/internal/errors"
)
type syntaxError struct{ error }
func newSyntaxError(f string, x ...interface{}) error {
return syntaxError{errors.New(f, x...)}
}
// Unmarshal parses b as the JSON format.
// It returns a Value, which represents the input as an AST.
func Unmarshal(b []byte) (Value, error) {
p := decoder{in: b}
p.consume(0) // trim leading spaces
v, err := p.unmarshalValue()
if !p.nerr.Merge(err) {
if e, ok := err.(syntaxError); ok {
b = b[:len(b)-len(p.in)] // consumed input
line := bytes.Count(b, []byte("\n")) + 1
if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
b = b[i+1:]
}
column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
err = errors.New("syntax error (line %d:%d): %v", line, column, e.error)
}
return Value{}, err
}
if len(p.in) > 0 {
return Value{}, errors.New("%d bytes of unconsumed input", len(p.in))
}
return v, p.nerr.E
}
type decoder struct {
nerr errors.NonFatal
in []byte
}
var literalRegexp = regexp.MustCompile("^(null|true|false)")
func (p *decoder) unmarshalValue() (Value, error) {
if len(p.in) == 0 {
return Value{}, io.ErrUnexpectedEOF
}
switch p.in[0] {
case 'n', 't', 'f':
if n := matchWithDelim(literalRegexp, p.in); n > 0 {
var v Value
switch p.in[0] {
case 'n':
v = rawValueOf(nil, p.in[:n:n])
case 't':
v = rawValueOf(true, p.in[:n:n])
case 'f':
v = rawValueOf(false, p.in[:n:n])
}
p.consume(n)
return v, nil
}
return Value{}, newSyntaxError("invalid %q as literal", errRegexp.Find(p.in))
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
return p.unmarshalNumber()
case '"':
return p.unmarshalString()
case '[':
return p.unmarshalArray()
case '{':
return p.unmarshalObject()
default:
return Value{}, newSyntaxError("invalid %q as value", errRegexp.Find(p.in))
}
}
func (p *decoder) unmarshalArray() (Value, error) {
b := p.in
var elems []Value
if err := p.consumeChar('[', "at start of array"); err != nil {
return Value{}, err
}
if len(p.in) > 0 && p.in[0] != ']' {
for len(p.in) > 0 {
v, err := p.unmarshalValue()
if !p.nerr.Merge(err) {
return Value{}, err
}
elems = append(elems, v)
if !p.tryConsumeChar(',') {
break
}
}
}
if err := p.consumeChar(']', "at end of array"); err != nil {
return Value{}, err
}
b = b[:len(b)-len(p.in)]
return rawValueOf(elems, b[:len(b):len(b)]), nil
}
func (p *decoder) unmarshalObject() (Value, error) {
b := p.in
var items [][2]Value
if err := p.consumeChar('{', "at start of object"); err != nil {
return Value{}, err
}
if len(p.in) > 0 && p.in[0] != '}' {
for len(p.in) > 0 {
k, err := p.unmarshalString()
if !p.nerr.Merge(err) {
return Value{}, err
}
if err := p.consumeChar(':', "in object"); err != nil {
return Value{}, err
}
v, err := p.unmarshalValue()
if !p.nerr.Merge(err) {
return Value{}, err
}
items = append(items, [2]Value{k, v})
if !p.tryConsumeChar(',') {
break
}
}
}
if err := p.consumeChar('}', "at end of object"); err != nil {
return Value{}, err
}
b = b[:len(b)-len(p.in)]
return rawValueOf(items, b[:len(b):len(b)]), nil
}
func (p *decoder) consumeChar(c byte, msg string) error {
if p.tryConsumeChar(c) {
return nil
}
if len(p.in) == 0 {
return io.ErrUnexpectedEOF
}
return newSyntaxError("invalid character %q, expected %q %s", p.in[0], c, msg)
}
func (p *decoder) tryConsumeChar(c byte) bool {
if len(p.in) > 0 && p.in[0] == c {
p.consume(1)
return true
}
return false
}
// consume consumes n bytes of input and any subsequent whitespace.
func (p *decoder) consume(n int) {
p.in = p.in[n:]
for len(p.in) > 0 {
switch p.in[0] {
case ' ', '\n', '\r', '\t':
p.in = p.in[1:]
default:
return
}
}
}
// Any sequence that looks like a non-delimiter (for error reporting).
var errRegexp = regexp.MustCompile("^([-+._a-zA-Z0-9]{1,32}|.)")
// matchWithDelim matches r with the input b and verifies that the match
// terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
// As a special case, EOF is considered a delimiter.
func matchWithDelim(r *regexp.Regexp, b []byte) int {
n := len(r.Find(b))
if n < len(b) {
// Check that that the next character is a delimiter.
c := b[n]
notDelim := (c == '-' || c == '+' || c == '.' || c == '_' ||
('a' <= c && c <= 'z') ||
('A' <= c && c <= 'Z') ||
('0' <= c && c <= '9'))
if notDelim {
return 0
}
}
return n
}

View File

@ -0,0 +1,128 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package json
import (
"strings"
"github.com/golang/protobuf/v2/internal/errors"
)
// Marshal serializes v as the JSON format.
//
// If indent is a non-empty string, it causes every entry for an Array or Object
// to be preceded by the indent and trailed by a newline.
func Marshal(v Value, indent string) ([]byte, error) {
p := encoder{}
if len(indent) > 0 {
if strings.Trim(indent, " \t") != "" {
return nil, errors.New("indent may only be composed of space and tab characters")
}
p.indent = indent
p.newline = "\n"
}
err := p.marshalValue(v)
if !p.nerr.Merge(err) {
return nil, err
}
return p.out, p.nerr.E
}
type encoder struct {
nerr errors.NonFatal
out []byte
indent string
indents []byte
newline string // set to "\n" if len(indent) > 0
}
func (p *encoder) marshalValue(v Value) error {
switch v.Type() {
case Null:
p.out = append(p.out, "null"...)
return nil
case Bool:
if v.Bool() {
p.out = append(p.out, "true"...)
} else {
p.out = append(p.out, "false"...)
}
return nil
case Number:
return p.marshalNumber(v)
case String:
return p.marshalString(v)
case Array:
return p.marshalArray(v)
case Object:
return p.marshalObject(v)
default:
return errors.New("invalid type %v to encode value", v.Type())
}
}
func (p *encoder) marshalArray(v Value) error {
if v.Type() != Array {
return errors.New("invalid type %v, expected array", v.Type())
}
elems := v.Array()
p.out = append(p.out, '[')
p.indents = append(p.indents, p.indent...)
if len(elems) > 0 {
p.out = append(p.out, p.newline...)
}
for i, elem := range elems {
p.out = append(p.out, p.indents...)
if err := p.marshalValue(elem); !p.nerr.Merge(err) {
return err
}
if i < len(elems)-1 {
p.out = append(p.out, ',')
}
p.out = append(p.out, p.newline...)
}
p.indents = p.indents[:len(p.indents)-len(p.indent)]
if len(elems) > 0 {
p.out = append(p.out, p.indents...)
}
p.out = append(p.out, ']')
return nil
}
func (p *encoder) marshalObject(v Value) error {
if v.Type() != Object {
return errors.New("invalid type %v, expected object", v.Type())
}
items := v.Object()
p.out = append(p.out, '{')
p.indents = append(p.indents, p.indent...)
if len(items) > 0 {
p.out = append(p.out, p.newline...)
}
for i, item := range items {
p.out = append(p.out, p.indents...)
if err := p.marshalString(item[0]); !p.nerr.Merge(err) {
return err
}
p.out = append(p.out, ':')
if len(p.indent) > 0 {
p.out = append(p.out, ' ')
}
if err := p.marshalValue(item[1]); !p.nerr.Merge(err) {
return err
}
if i < len(items)-1 {
p.out = append(p.out, ',')
}
p.out = append(p.out, p.newline...)
}
p.indents = p.indents[:len(p.indents)-len(p.indent)]
if len(items) > 0 {
p.out = append(p.out, p.indents...)
}
p.out = append(p.out, '}')
return nil
}

View File

@ -0,0 +1,416 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package json
import (
"math"
"strings"
"testing"
"unicode/utf8"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
)
func Test(t *testing.T) {
const space = " \n\r\t"
var V = ValueOf
type Arr = []Value
type Obj = [][2]Value
tests := []struct {
in string
wantVal Value
wantOut string
wantOutIndent string
wantErr string
}{{
in: ``,
wantErr: `unexpected EOF`,
}, {
in: space,
wantErr: `unexpected EOF`,
}, {
in: space + `null` + space,
wantVal: V(nil),
wantOut: `null`,
wantOutIndent: `null`,
}, {
in: space + `true` + space,
wantVal: V(true),
wantOut: `true`,
wantOutIndent: `true`,
}, {
in: space + `false` + space,
wantVal: V(false),
wantOut: `false`,
wantOutIndent: `false`,
}, {
in: space + `0` + space,
wantVal: V(0.0),
wantOut: `0`,
wantOutIndent: `0`,
}, {
in: space + `"hello"` + space,
wantVal: V("hello"),
wantOut: `"hello"`,
wantOutIndent: `"hello"`,
}, {
in: space + `[]` + space,
wantVal: V(Arr{}),
wantOut: `[]`,
wantOutIndent: `[]`,
}, {
in: space + `{}` + space,
wantVal: V(Obj{}),
wantOut: `{}`,
wantOutIndent: `{}`,
}, {
in: `null#invalid`,
wantErr: `8 bytes of unconsumed input`,
}, {
in: `0#invalid`,
wantErr: `8 bytes of unconsumed input`,
}, {
in: `"hello"#invalid`,
wantErr: `8 bytes of unconsumed input`,
}, {
in: `[]#invalid`,
wantErr: `8 bytes of unconsumed input`,
}, {
in: `{}#invalid`,
wantErr: `8 bytes of unconsumed input`,
}, {
in: `[truee,true]`,
wantErr: `invalid "truee" as literal`,
}, {
in: `[falsee,false]`,
wantErr: `invalid "falsee" as literal`,
}, {
in: `[`,
wantErr: `unexpected EOF`,
}, {
in: `[{}]`,
wantVal: V(Arr{V(Obj{})}),
wantOut: "[{}]",
wantOutIndent: "[\n\t{}\n]",
}, {
in: `[{]}`,
wantErr: `invalid character ']' at start of string`,
}, {
in: `[,]`,
wantErr: `invalid "," as value`,
}, {
in: `{,}`,
wantErr: `invalid character ',' at start of string`,
}, {
in: `{"key""val"}`,
wantErr: `invalid character '"', expected ':' in object`,
}, {
in: `["elem0""elem1"]`,
wantErr: `invalid character '"', expected ']' at end of array`,
}, {
in: `{"hello"`,
wantErr: `unexpected EOF`,
}, {
in: `{"hello"}`,
wantErr: `invalid character '}', expected ':' in object`,
}, {
in: `{"hello":`,
wantErr: `unexpected EOF`,
}, {
in: `{"hello":}`,
wantErr: `invalid "}" as value`,
}, {
in: `{"hello":"goodbye"`,
wantErr: `unexpected EOF`,
}, {
in: `{"hello":"goodbye"]`,
wantErr: `invalid character ']', expected '}' at end of object`,
}, {
in: `{"hello":"goodbye"}`,
wantVal: V(Obj{{V("hello"), V("goodbye")}}),
wantOut: `{"hello":"goodbye"}`,
wantOutIndent: "{\n\t\"hello\": \"goodbye\"\n}",
}, {
in: `{"hello":"goodbye",}`,
wantErr: `invalid character '}' at start of string`,
}, {
in: `{"k":"v1","k":"v2"}`,
wantVal: V(Obj{
{V("k"), V("v1")}, {V("k"), V("v2")},
}),
wantOut: `{"k":"v1","k":"v2"}`,
wantOutIndent: "{\n\t\"k\": \"v1\",\n\t\"k\": \"v2\"\n}",
}, {
in: `{"k":{"k":{"k":"v"}}}`,
wantVal: V(Obj{
{V("k"), V(Obj{
{V("k"), V(Obj{
{V("k"), V("v")},
})},
})},
}),
wantOut: `{"k":{"k":{"k":"v"}}}`,
wantOutIndent: "{\n\t\"k\": {\n\t\t\"k\": {\n\t\t\t\"k\": \"v\"\n\t\t}\n\t}\n}",
}, {
in: `{"k":{"k":{"k":"v1","k":"v2"}}}`,
wantVal: V(Obj{
{V("k"), V(Obj{
{V("k"), V(Obj{
{V("k"), V("v1")},
{V("k"), V("v2")},
})},
})},
}),
wantOut: `{"k":{"k":{"k":"v1","k":"v2"}}}`,
wantOutIndent: "{\n\t\"k\": {\n\t\t\"k\": {\n\t\t\t\"k\": \"v1\",\n\t\t\t\"k\": \"v2\"\n\t\t}\n\t}\n}",
}, {
in: " x",
wantErr: `syntax error (line 1:3)`,
}, {
in: `["💩"x`,
wantErr: `syntax error (line 1:5)`,
}, {
in: "\n\n[\"🔥🔥🔥\"x",
wantErr: `syntax error (line 3:7)`,
}, {
in: `["👍🏻👍🏿"x`,
wantErr: `syntax error (line 1:8)`, // multi-rune emojis; could be column:6
}, {
in: "\"\x00\"",
wantErr: `invalid character '\x00' in string`,
}, {
in: "\"\xff\"",
wantErr: `invalid UTF-8 detected`,
wantVal: V(string("\xff")),
}, {
in: `"` + string(utf8.RuneError) + `"`,
wantVal: V(string(utf8.RuneError)),
wantOut: `"` + string(utf8.RuneError) + `"`,
}, {
in: `"\uFFFD"`,
wantVal: V(string(utf8.RuneError)),
wantOut: `"` + string(utf8.RuneError) + `"`,
}, {
in: `"\x"`,
wantErr: `invalid escape code "\\x" in string`,
}, {
in: `"\uXXXX"`,
wantErr: `invalid escape code "\\uXXXX" in string`,
}, {
in: `"\uDEAD"`, // unmatched surrogate pair
wantErr: `unexpected EOF`,
}, {
in: `"\uDEAD\uBEEF"`, // invalid surrogate half
wantErr: `invalid escape code "\\uBEEF" in string`,
}, {
in: `"\uD800\udead"`, // valid surrogate pair
wantVal: V("𐊭"),
wantOut: `"𐊭"`,
}, {
in: `"\u0000\"\\\/\b\f\n\r\t"`,
wantVal: V("\u0000\"\\/\b\f\n\r\t"),
wantOut: `"\u0000\"\\/\b\f\n\r\t"`,
}, {
in: `-`,
wantErr: `invalid "-" as number`,
}, {
in: `-0`,
wantVal: V(math.Copysign(0, -1)),
wantOut: `-0`,
}, {
in: `+0`,
wantErr: `invalid "+0" as value`,
}, {
in: `-+`,
wantErr: `invalid "-+" as number`,
}, {
in: `0.`,
wantErr: `invalid "0." as number`,
}, {
in: `.1`,
wantErr: `invalid ".1" as value`,
}, {
in: `0.e1`,
wantErr: `invalid "0.e1" as number`,
}, {
in: `0.0`,
wantVal: V(0.0),
wantOut: "0",
}, {
in: `01`,
wantErr: `invalid "01" as number`,
}, {
in: `0e`,
wantErr: `invalid "0e" as number`,
}, {
in: `0e0`,
wantVal: V(0.0),
wantOut: "0",
}, {
in: `0E0`,
wantVal: V(0.0),
wantOut: "0",
}, {
in: `0Ee`,
wantErr: `invalid "0Ee" as number`,
}, {
in: `-1.0E+1`,
wantVal: V(-10.0),
wantOut: "-10",
}, {
in: `
{
"firstName" : "John",
"lastName" : "Smith" ,
"isAlive" : true,
"age" : 27,
"address" : {
"streetAddress" : "21 2nd Street" ,
"city" : "New York" ,
"state" : "NY" ,
"postalCode" : "10021-3100"
},
"phoneNumbers" : [
{
"type" : "home" ,
"number" : "212 555-1234"
} ,
{
"type" : "office" ,
"number" : "646 555-4567"
} ,
{
"type" : "mobile" ,
"number" : "123 456-7890"
}
],
"children" : [] ,
"spouse" : null
}
`,
wantVal: V(Obj{
{V("firstName"), V("John")},
{V("lastName"), V("Smith")},
{V("isAlive"), V(true)},
{V("age"), V(27.0)},
{V("address"), V(Obj{
{V("streetAddress"), V("21 2nd Street")},
{V("city"), V("New York")},
{V("state"), V("NY")},
{V("postalCode"), V("10021-3100")},
})},
{V("phoneNumbers"), V(Arr{
V(Obj{
{V("type"), V("home")},
{V("number"), V("212 555-1234")},
}),
V(Obj{
{V("type"), V("office")},
{V("number"), V("646 555-4567")},
}),
V(Obj{
{V("type"), V("mobile")},
{V("number"), V("123 456-7890")},
}),
})},
{V("children"), V(Arr{})},
{V("spouse"), V(nil)},
}),
wantOut: `{"firstName":"John","lastName":"Smith","isAlive":true,"age":27,"address":{"streetAddress":"21 2nd Street","city":"New York","state":"NY","postalCode":"10021-3100"},"phoneNumbers":[{"type":"home","number":"212 555-1234"},{"type":"office","number":"646 555-4567"},{"type":"mobile","number":"123 456-7890"}],"children":[],"spouse":null}`,
wantOutIndent: `{
"firstName": "John",
"lastName": "Smith",
"isAlive": true,
"age": 27,
"address": {
"streetAddress": "21 2nd Street",
"city": "New York",
"state": "NY",
"postalCode": "10021-3100"
},
"phoneNumbers": [
{
"type": "home",
"number": "212 555-1234"
},
{
"type": "office",
"number": "646 555-4567"
},
{
"type": "mobile",
"number": "123 456-7890"
}
],
"children": [],
"spouse": null
}`,
}}
opts := cmp.Options{
cmpopts.EquateEmpty(),
cmp.Transformer("", func(v Value) interface{} {
switch v.typ {
case 0:
return nil // special case so Value{} == Value{}
case Null:
return nil
case Bool:
return v.Bool()
case Number:
return v.Number()
case String:
return v.String()
case Array:
return v.Array()
case Object:
return v.Object()
default:
panic("invalid type")
}
}),
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
if tt.in != "" || tt.wantVal.Type() != 0 || tt.wantErr != "" {
gotVal, err := Unmarshal([]byte(tt.in))
if err == nil {
if tt.wantErr != "" {
t.Errorf("Unmarshal(): got nil error, want %v", tt.wantErr)
}
} else {
if tt.wantErr == "" {
t.Errorf("Unmarshal(): got %v, want nil error", err)
} else if !strings.Contains(err.Error(), tt.wantErr) {
t.Errorf("Unmarshal(): got %v, want %v", err, tt.wantErr)
}
}
if diff := cmp.Diff(gotVal, tt.wantVal, opts); diff != "" {
t.Errorf("Unmarshal(): output mismatch (-got +want):\n%s", diff)
}
}
if tt.wantOut != "" {
gotOut, err := Marshal(tt.wantVal, "")
if err != nil {
t.Errorf("Marshal(): got %v, want nil error", err)
}
if string(gotOut) != tt.wantOut {
t.Errorf("Marshal():\ngot: %s\nwant: %s", gotOut, tt.wantOut)
}
}
if tt.wantOutIndent != "" {
gotOut, err := Marshal(tt.wantVal, "\t")
if err != nil {
t.Errorf("Marshal(Indent): got %v, want nil error", err)
}
if string(gotOut) != tt.wantOutIndent {
t.Errorf("Marshal(Indent):\ngot: %s\nwant: %s", gotOut, tt.wantOutIndent)
}
}
})
}
}

View File

@ -0,0 +1,79 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package json
import (
"io"
"math"
"regexp"
"strconv"
"github.com/golang/protobuf/v2/internal/errors"
)
// marshalNumber encodes v as a Number.
func (p *encoder) marshalNumber(v Value) error {
var err error
p.out, err = appendNumber(p.out, v)
return err
}
func appendNumber(out []byte, v Value) ([]byte, error) {
if v.Type() != Number {
return nil, errors.New("invalid type %v, expected number", v.Type())
}
if len(v.raw) > 0 {
return append(out, v.raw...), nil
}
n := v.Number()
if math.IsInf(n, 0) || math.IsNaN(n) {
return nil, errors.New("invalid number value: %v", n)
}
// JSON number formatting logic based on encoding/json.
// See floatEncoder.encode for reference.
bits := 64
if float64(float32(n)) == n {
bits = 32
}
fmt := byte('f')
if abs := math.Abs(n); abs != 0 {
if bits == 64 && (abs < 1e-6 || abs >= 1e21) || bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) {
fmt = 'e'
}
}
out = strconv.AppendFloat(out, n, fmt, -1, bits)
if fmt == 'e' {
n := len(out)
if n >= 4 && out[n-4] == 'e' && out[n-3] == '-' && out[n-2] == '0' {
out[n-2] = out[n-1]
out = out[:n-1]
}
}
return out, nil
}
// Exact expression to match a JSON floating-point number.
// JSON's grammar for floats is more restrictive than Go's grammar.
var floatRegexp = regexp.MustCompile("^-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?")
// unmarshalNumber decodes a Number from the input.
func (p *decoder) unmarshalNumber() (Value, error) {
v, n, err := consumeNumber(p.in)
p.consume(n)
return v, err
}
func consumeNumber(in []byte) (Value, int, error) {
if len(in) == 0 {
return Value{}, 0, io.ErrUnexpectedEOF
}
if n := matchWithDelim(floatRegexp, in); n > 0 {
v, err := strconv.ParseFloat(string(in[:n]), 64)
if err != nil {
return Value{}, 0, err
}
return rawValueOf(v, in[:n:n]), n, nil
}
return Value{}, 0, newSyntaxError("invalid %q as number", errRegexp.Find(in))
}

View File

@ -0,0 +1,160 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package json
import (
"io"
"math/bits"
"strconv"
"unicode"
"unicode/utf16"
"unicode/utf8"
"github.com/golang/protobuf/v2/internal/errors"
)
func (p *encoder) marshalString(v Value) error {
var err error
p.out, err = appendString(p.out, v)
return err
}
func appendString(out []byte, v Value) ([]byte, error) {
if v.Type() != String {
return nil, errors.New("invalid type %v, expected string", v.Type())
}
if len(v.raw) > 0 {
return append(out, v.raw...), nil
}
in := v.String()
var nerr errors.NonFatal
out = append(out, '"')
i := indexNeedEscape(in)
in, out = in[i:], append(out, in[:i]...)
for len(in) > 0 {
switch r, n := utf8.DecodeRuneInString(in); {
case r == utf8.RuneError && n == 1:
nerr.AppendInvalidUTF8("")
in, out = in[1:], append(out, in[0]) // preserve invalid byte
case r < ' ' || r == '"' || r == '\\':
out = append(out, '\\')
switch r {
case '"', '\\':
out = append(out, byte(r))
case '\b':
out = append(out, 'b')
case '\f':
out = append(out, 'f')
case '\n':
out = append(out, 'n')
case '\r':
out = append(out, 'r')
case '\t':
out = append(out, 't')
default:
out = append(out, 'u')
out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
out = strconv.AppendUint(out, uint64(r), 16)
}
in = in[n:]
default:
i := indexNeedEscape(in[n:])
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
out = append(out, '"')
return out, nerr.E
}
func (p *decoder) unmarshalString() (Value, error) {
v, n, err := consumeString(p.in)
p.consume(n)
return v, err
}
func consumeString(in []byte) (Value, int, error) {
var nerr errors.NonFatal
in0 := in
if len(in) == 0 {
return Value{}, 0, io.ErrUnexpectedEOF
}
if in[0] != '"' {
return Value{}, 0, newSyntaxError("invalid character %q at start of string", in[0])
}
in = in[1:]
i := indexNeedEscape(string(in))
in, out := in[i:], in[:i:i] // set cap to prevent mutations
for len(in) > 0 {
switch r, n := utf8.DecodeRune(in); {
case r == utf8.RuneError && n == 1:
nerr.AppendInvalidUTF8("")
in, out = in[1:], append(out, in[0]) // preserve invalid byte
case r < ' ':
return Value{}, 0, newSyntaxError("invalid character %q in string", r)
case r == '"':
in = in[1:]
n := len(in0) - len(in)
v := rawValueOf(string(out), in0[:n:n])
return v, n, nerr.E
case r == '\\':
if len(in) < 2 {
return Value{}, 0, io.ErrUnexpectedEOF
}
switch r := in[1]; r {
case '"', '\\', '/':
in, out = in[2:], append(out, r)
case 'b':
in, out = in[2:], append(out, '\b')
case 'f':
in, out = in[2:], append(out, '\f')
case 'n':
in, out = in[2:], append(out, '\n')
case 'r':
in, out = in[2:], append(out, '\r')
case 't':
in, out = in[2:], append(out, '\t')
case 'u':
if len(in) < 6 {
return Value{}, 0, io.ErrUnexpectedEOF
}
v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
if err != nil {
return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:6])
}
in = in[6:]
r := rune(v)
if utf16.IsSurrogate(r) {
if len(in) < 6 {
return Value{}, 0, io.ErrUnexpectedEOF
}
v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
r = utf16.DecodeRune(r, rune(v))
if in[0] != '\\' || in[1] != 'u' || r == unicode.ReplacementChar || err != nil {
return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:6])
}
in = in[6:]
}
out = append(out, string(r)...)
default:
return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:2])
}
default:
i := indexNeedEscape(string(in[n:]))
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
return Value{}, 0, io.ErrUnexpectedEOF
}
// indexNeedEscape returns the index of the next character that needs escaping.
// If no characters need escaping, this returns the input length.
func indexNeedEscape(s string) int {
for i, r := range s {
if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
return i
}
}
return len(s)
}

View File

@ -0,0 +1,206 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package json implements the JSON format.
// This package has no semantic understanding for protocol buffers and is only
// a parser and composer for the format.
//
// This follows RFC 7159, with some notable implementation specifics:
// * numbers that are out of range result in a decoding error
// * duplicate keys in objects are not rejected
//
// Reasons why the standard encoding/json package is not suitable:
// * information about duplicate keys is lost
// * invalid UTF-8 is silently coerced into utf8.RuneError
package json
import (
"fmt"
"strings"
)
// Type represents a type expressible in the JSON format.
type Type uint8
const (
_ Type = iota
// Null is the null literal (i.e., "null").
Null
// Bool is a boolean (i.e., "true" or "false").
Bool
// Number is a floating-point number (e.g., "1.234" or "1e100").
Number
// String is an escaped string (e.g., `"the quick brown fox"`).
String
// Array is an ordered list of values (e.g., `[0, "one", true]`).
Array
// Object is an ordered map of values (e.g., `{"key": null}`).
Object
)
func (t Type) String() string {
switch t {
case Null:
return "null"
case Bool:
return "bool"
case Number:
return "number"
case String:
return "string"
case Array:
return "array"
case Object:
return "object"
default:
return "<invalid>"
}
}
// Value contains a value of a given Type.
type Value struct {
typ Type
raw []byte // raw bytes of the serialized data
str string // only for String
num float64 // only for Bool or Number
arr []Value // only for Array
obj [][2]Value // only for Object
}
// ValueOf returns a Value for a given Go value:
// nil => Null
// bool => Bool
// int32, int64 => Number
// uint32, uint64 => Number
// float32, float64 => Number
// string, []byte => String
// []Value => Array
// [][2]Value => Object
//
// ValueOf panics if the Go type is not one of the above.
func ValueOf(v interface{}) Value {
switch v := v.(type) {
case nil:
return Value{typ: Null}
case bool:
if v {
return Value{typ: Bool, num: 1}
} else {
return Value{typ: Bool, num: 0}
}
case int32:
return Value{typ: Number, num: float64(v)}
case int64:
return Value{typ: Number, num: float64(v)} // possible loss of precision
case uint32:
return Value{typ: Number, num: float64(v)}
case uint64:
return Value{typ: Number, num: float64(v)} // possible loss of precision
case float32:
return Value{typ: Number, num: float64(v)}
case float64:
return Value{typ: Number, num: float64(v)}
case string:
return Value{typ: String, str: string(v)}
case []byte:
return Value{typ: String, str: string(v)}
case []Value:
return Value{typ: Array, arr: v}
case [][2]Value:
return Value{typ: Object, obj: v}
default:
panic(fmt.Sprintf("invalid type %T", v))
}
}
func rawValueOf(v interface{}, raw []byte) Value {
v2 := ValueOf(v)
v2.raw = raw
return v2
}
// Type is the type of the value.
func (v Value) Type() Type {
return v.typ
}
// Bool returns v as a bool and panics if it is not a Bool.
func (v Value) Bool() bool {
if v.typ != Bool {
panic("value is not a boolean")
}
return v.num != 0
}
// Number returns v as a float64 and panics if it is not a Number.
func (v Value) Number() float64 {
if v.typ != Number {
panic("value is not a number")
}
return v.num
}
// String returns v as a string if the Type is String.
// Otherwise, this returns a formatted string of v for debugging purposes.
//
// Since JSON strings must be UTF-8, the marshaler and unmarshaler will verify
// for UTF-8 correctness.
func (v Value) String() string {
if v.typ != String {
return v.stringValue()
}
return v.str
}
func (v Value) stringValue() string {
switch v.typ {
case Null, Bool, Number:
return string(v.Raw())
case Array:
var ss []string
for _, v := range v.Array() {
ss = append(ss, v.String())
}
return "[" + strings.Join(ss, ",") + "]"
case Object:
var ss []string
for _, v := range v.Object() {
ss = append(ss, v[0].String()+":"+v[1].String())
}
return "{" + strings.Join(ss, ",") + "}"
default:
return "<invalid>"
}
}
// Array returns the elements of v and panics if the Type is not Array.
// Mutations on the return value may not be observable from the Raw method.
func (v Value) Array() []Value {
if v.typ != Array {
panic("value is not an array")
}
return v.arr
}
// Object returns the items of v and panics if the Type is not Object.
// The [2]Value represents a key (of type String) and value pair.
//
// Mutations on the return value may not be observable from the Raw method.
func (v Value) Object() [][2]Value {
if v.typ != Object {
panic("value is not an object")
}
return v.obj
}
// Raw returns the raw representation of the value.
// The returned value may alias the input given to Unmarshal.
func (v Value) Raw() []byte {
if len(v.raw) > 0 {
return v.raw
}
p := encoder{}
if err := p.marshalValue(v); !p.nerr.Merge(err) {
return []byte("<invalid>")
}
return p.out
}