From 1e1e78b71bfc1fab32b4ef963a619cc5521a8610 Mon Sep 17 00:00:00 2001 From: Herbie Ong Date: Fri, 1 Mar 2019 16:10:08 -0800 Subject: [PATCH] internal/encoding/jsonx: copy internal/encoding/json We're rewriting internal/encoding/json. So, make a copy of it first in order not to break encoding/jsonpb package. Change-Id: I8b63c468d3f432102d2af4db22a7549998ce3876 Reviewed-on: https://go-review.googlesource.com/c/164642 Reviewed-by: Joe Tsai --- encoding/jsonpb/encode.go | 2 +- internal/encoding/jsonx/decode.go | 194 +++++++++++++ internal/encoding/jsonx/encode.go | 128 +++++++++ internal/encoding/jsonx/json_test.go | 416 +++++++++++++++++++++++++++ internal/encoding/jsonx/number.go | 79 +++++ internal/encoding/jsonx/string.go | 160 +++++++++++ internal/encoding/jsonx/value.go | 206 +++++++++++++ 7 files changed, 1184 insertions(+), 1 deletion(-) create mode 100644 internal/encoding/jsonx/decode.go create mode 100644 internal/encoding/jsonx/encode.go create mode 100644 internal/encoding/jsonx/json_test.go create mode 100644 internal/encoding/jsonx/number.go create mode 100644 internal/encoding/jsonx/string.go create mode 100644 internal/encoding/jsonx/value.go diff --git a/encoding/jsonpb/encode.go b/encoding/jsonpb/encode.go index 928b24fa..e5819f22 100644 --- a/encoding/jsonpb/encode.go +++ b/encoding/jsonpb/encode.go @@ -9,7 +9,7 @@ import ( "math" "sort" - "github.com/golang/protobuf/v2/internal/encoding/json" + json "github.com/golang/protobuf/v2/internal/encoding/jsonx" "github.com/golang/protobuf/v2/internal/errors" "github.com/golang/protobuf/v2/internal/pragma" "github.com/golang/protobuf/v2/proto" diff --git a/internal/encoding/jsonx/decode.go b/internal/encoding/jsonx/decode.go new file mode 100644 index 00000000..769619b7 --- /dev/null +++ b/internal/encoding/jsonx/decode.go @@ -0,0 +1,194 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "io" + "regexp" + "unicode/utf8" + + "github.com/golang/protobuf/v2/internal/errors" +) + +type syntaxError struct{ error } + +func newSyntaxError(f string, x ...interface{}) error { + return syntaxError{errors.New(f, x...)} +} + +// Unmarshal parses b as the JSON format. +// It returns a Value, which represents the input as an AST. +func Unmarshal(b []byte) (Value, error) { + p := decoder{in: b} + p.consume(0) // trim leading spaces + v, err := p.unmarshalValue() + if !p.nerr.Merge(err) { + if e, ok := err.(syntaxError); ok { + b = b[:len(b)-len(p.in)] // consumed input + line := bytes.Count(b, []byte("\n")) + 1 + if i := bytes.LastIndexByte(b, '\n'); i >= 0 { + b = b[i+1:] + } + column := utf8.RuneCount(b) + 1 // ignore multi-rune characters + err = errors.New("syntax error (line %d:%d): %v", line, column, e.error) + } + return Value{}, err + } + if len(p.in) > 0 { + return Value{}, errors.New("%d bytes of unconsumed input", len(p.in)) + } + return v, p.nerr.E +} + +type decoder struct { + nerr errors.NonFatal + in []byte +} + +var literalRegexp = regexp.MustCompile("^(null|true|false)") + +func (p *decoder) unmarshalValue() (Value, error) { + if len(p.in) == 0 { + return Value{}, io.ErrUnexpectedEOF + } + switch p.in[0] { + case 'n', 't', 'f': + if n := matchWithDelim(literalRegexp, p.in); n > 0 { + var v Value + switch p.in[0] { + case 'n': + v = rawValueOf(nil, p.in[:n:n]) + case 't': + v = rawValueOf(true, p.in[:n:n]) + case 'f': + v = rawValueOf(false, p.in[:n:n]) + } + p.consume(n) + return v, nil + } + return Value{}, newSyntaxError("invalid %q as literal", errRegexp.Find(p.in)) + case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return p.unmarshalNumber() + case '"': + return p.unmarshalString() + case '[': + return p.unmarshalArray() + case '{': + return p.unmarshalObject() + default: + return Value{}, newSyntaxError("invalid %q as value", errRegexp.Find(p.in)) + } +} + +func (p *decoder) unmarshalArray() (Value, error) { + b := p.in + var elems []Value + if err := p.consumeChar('[', "at start of array"); err != nil { + return Value{}, err + } + if len(p.in) > 0 && p.in[0] != ']' { + for len(p.in) > 0 { + v, err := p.unmarshalValue() + if !p.nerr.Merge(err) { + return Value{}, err + } + elems = append(elems, v) + if !p.tryConsumeChar(',') { + break + } + } + } + if err := p.consumeChar(']', "at end of array"); err != nil { + return Value{}, err + } + b = b[:len(b)-len(p.in)] + return rawValueOf(elems, b[:len(b):len(b)]), nil +} + +func (p *decoder) unmarshalObject() (Value, error) { + b := p.in + var items [][2]Value + if err := p.consumeChar('{', "at start of object"); err != nil { + return Value{}, err + } + if len(p.in) > 0 && p.in[0] != '}' { + for len(p.in) > 0 { + k, err := p.unmarshalString() + if !p.nerr.Merge(err) { + return Value{}, err + } + if err := p.consumeChar(':', "in object"); err != nil { + return Value{}, err + } + v, err := p.unmarshalValue() + if !p.nerr.Merge(err) { + return Value{}, err + } + items = append(items, [2]Value{k, v}) + if !p.tryConsumeChar(',') { + break + } + } + } + if err := p.consumeChar('}', "at end of object"); err != nil { + return Value{}, err + } + b = b[:len(b)-len(p.in)] + return rawValueOf(items, b[:len(b):len(b)]), nil +} + +func (p *decoder) consumeChar(c byte, msg string) error { + if p.tryConsumeChar(c) { + return nil + } + if len(p.in) == 0 { + return io.ErrUnexpectedEOF + } + return newSyntaxError("invalid character %q, expected %q %s", p.in[0], c, msg) +} + +func (p *decoder) tryConsumeChar(c byte) bool { + if len(p.in) > 0 && p.in[0] == c { + p.consume(1) + return true + } + return false +} + +// consume consumes n bytes of input and any subsequent whitespace. +func (p *decoder) consume(n int) { + p.in = p.in[n:] + for len(p.in) > 0 { + switch p.in[0] { + case ' ', '\n', '\r', '\t': + p.in = p.in[1:] + default: + return + } + } +} + +// Any sequence that looks like a non-delimiter (for error reporting). +var errRegexp = regexp.MustCompile("^([-+._a-zA-Z0-9]{1,32}|.)") + +// matchWithDelim matches r with the input b and verifies that the match +// terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]"). +// As a special case, EOF is considered a delimiter. +func matchWithDelim(r *regexp.Regexp, b []byte) int { + n := len(r.Find(b)) + if n < len(b) { + // Check that that the next character is a delimiter. + c := b[n] + notDelim := (c == '-' || c == '+' || c == '.' || c == '_' || + ('a' <= c && c <= 'z') || + ('A' <= c && c <= 'Z') || + ('0' <= c && c <= '9')) + if notDelim { + return 0 + } + } + return n +} diff --git a/internal/encoding/jsonx/encode.go b/internal/encoding/jsonx/encode.go new file mode 100644 index 00000000..93b09289 --- /dev/null +++ b/internal/encoding/jsonx/encode.go @@ -0,0 +1,128 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "strings" + + "github.com/golang/protobuf/v2/internal/errors" +) + +// Marshal serializes v as the JSON format. +// +// If indent is a non-empty string, it causes every entry for an Array or Object +// to be preceded by the indent and trailed by a newline. +func Marshal(v Value, indent string) ([]byte, error) { + p := encoder{} + if len(indent) > 0 { + if strings.Trim(indent, " \t") != "" { + return nil, errors.New("indent may only be composed of space and tab characters") + } + p.indent = indent + p.newline = "\n" + } + err := p.marshalValue(v) + if !p.nerr.Merge(err) { + return nil, err + } + return p.out, p.nerr.E +} + +type encoder struct { + nerr errors.NonFatal + out []byte + + indent string + indents []byte + newline string // set to "\n" if len(indent) > 0 +} + +func (p *encoder) marshalValue(v Value) error { + switch v.Type() { + case Null: + p.out = append(p.out, "null"...) + return nil + case Bool: + if v.Bool() { + p.out = append(p.out, "true"...) + } else { + p.out = append(p.out, "false"...) + } + return nil + case Number: + return p.marshalNumber(v) + case String: + return p.marshalString(v) + case Array: + return p.marshalArray(v) + case Object: + return p.marshalObject(v) + default: + return errors.New("invalid type %v to encode value", v.Type()) + } +} + +func (p *encoder) marshalArray(v Value) error { + if v.Type() != Array { + return errors.New("invalid type %v, expected array", v.Type()) + } + elems := v.Array() + p.out = append(p.out, '[') + p.indents = append(p.indents, p.indent...) + if len(elems) > 0 { + p.out = append(p.out, p.newline...) + } + for i, elem := range elems { + p.out = append(p.out, p.indents...) + if err := p.marshalValue(elem); !p.nerr.Merge(err) { + return err + } + if i < len(elems)-1 { + p.out = append(p.out, ',') + } + p.out = append(p.out, p.newline...) + } + p.indents = p.indents[:len(p.indents)-len(p.indent)] + if len(elems) > 0 { + p.out = append(p.out, p.indents...) + } + p.out = append(p.out, ']') + return nil +} + +func (p *encoder) marshalObject(v Value) error { + if v.Type() != Object { + return errors.New("invalid type %v, expected object", v.Type()) + } + items := v.Object() + p.out = append(p.out, '{') + p.indents = append(p.indents, p.indent...) + if len(items) > 0 { + p.out = append(p.out, p.newline...) + } + for i, item := range items { + p.out = append(p.out, p.indents...) + if err := p.marshalString(item[0]); !p.nerr.Merge(err) { + return err + } + p.out = append(p.out, ':') + if len(p.indent) > 0 { + p.out = append(p.out, ' ') + } + if err := p.marshalValue(item[1]); !p.nerr.Merge(err) { + return err + } + if i < len(items)-1 { + p.out = append(p.out, ',') + } + p.out = append(p.out, p.newline...) + } + p.indents = p.indents[:len(p.indents)-len(p.indent)] + if len(items) > 0 { + p.out = append(p.out, p.indents...) + } + p.out = append(p.out, '}') + return nil +} diff --git a/internal/encoding/jsonx/json_test.go b/internal/encoding/jsonx/json_test.go new file mode 100644 index 00000000..3e96fa67 --- /dev/null +++ b/internal/encoding/jsonx/json_test.go @@ -0,0 +1,416 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "math" + "strings" + "testing" + "unicode/utf8" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" +) + +func Test(t *testing.T) { + const space = " \n\r\t" + var V = ValueOf + type Arr = []Value + type Obj = [][2]Value + + tests := []struct { + in string + wantVal Value + wantOut string + wantOutIndent string + wantErr string + }{{ + in: ``, + wantErr: `unexpected EOF`, + }, { + in: space, + wantErr: `unexpected EOF`, + }, { + in: space + `null` + space, + wantVal: V(nil), + wantOut: `null`, + wantOutIndent: `null`, + }, { + in: space + `true` + space, + wantVal: V(true), + wantOut: `true`, + wantOutIndent: `true`, + }, { + in: space + `false` + space, + wantVal: V(false), + wantOut: `false`, + wantOutIndent: `false`, + }, { + in: space + `0` + space, + wantVal: V(0.0), + wantOut: `0`, + wantOutIndent: `0`, + }, { + in: space + `"hello"` + space, + wantVal: V("hello"), + wantOut: `"hello"`, + wantOutIndent: `"hello"`, + }, { + in: space + `[]` + space, + wantVal: V(Arr{}), + wantOut: `[]`, + wantOutIndent: `[]`, + }, { + in: space + `{}` + space, + wantVal: V(Obj{}), + wantOut: `{}`, + wantOutIndent: `{}`, + }, { + in: `null#invalid`, + wantErr: `8 bytes of unconsumed input`, + }, { + in: `0#invalid`, + wantErr: `8 bytes of unconsumed input`, + }, { + in: `"hello"#invalid`, + wantErr: `8 bytes of unconsumed input`, + }, { + in: `[]#invalid`, + wantErr: `8 bytes of unconsumed input`, + }, { + in: `{}#invalid`, + wantErr: `8 bytes of unconsumed input`, + }, { + in: `[truee,true]`, + wantErr: `invalid "truee" as literal`, + }, { + in: `[falsee,false]`, + wantErr: `invalid "falsee" as literal`, + }, { + in: `[`, + wantErr: `unexpected EOF`, + }, { + in: `[{}]`, + wantVal: V(Arr{V(Obj{})}), + wantOut: "[{}]", + wantOutIndent: "[\n\t{}\n]", + }, { + in: `[{]}`, + wantErr: `invalid character ']' at start of string`, + }, { + in: `[,]`, + wantErr: `invalid "," as value`, + }, { + in: `{,}`, + wantErr: `invalid character ',' at start of string`, + }, { + in: `{"key""val"}`, + wantErr: `invalid character '"', expected ':' in object`, + }, { + in: `["elem0""elem1"]`, + wantErr: `invalid character '"', expected ']' at end of array`, + }, { + in: `{"hello"`, + wantErr: `unexpected EOF`, + }, { + in: `{"hello"}`, + wantErr: `invalid character '}', expected ':' in object`, + }, { + in: `{"hello":`, + wantErr: `unexpected EOF`, + }, { + in: `{"hello":}`, + wantErr: `invalid "}" as value`, + }, { + in: `{"hello":"goodbye"`, + wantErr: `unexpected EOF`, + }, { + in: `{"hello":"goodbye"]`, + wantErr: `invalid character ']', expected '}' at end of object`, + }, { + in: `{"hello":"goodbye"}`, + wantVal: V(Obj{{V("hello"), V("goodbye")}}), + wantOut: `{"hello":"goodbye"}`, + wantOutIndent: "{\n\t\"hello\": \"goodbye\"\n}", + }, { + in: `{"hello":"goodbye",}`, + wantErr: `invalid character '}' at start of string`, + }, { + in: `{"k":"v1","k":"v2"}`, + wantVal: V(Obj{ + {V("k"), V("v1")}, {V("k"), V("v2")}, + }), + wantOut: `{"k":"v1","k":"v2"}`, + wantOutIndent: "{\n\t\"k\": \"v1\",\n\t\"k\": \"v2\"\n}", + }, { + in: `{"k":{"k":{"k":"v"}}}`, + wantVal: V(Obj{ + {V("k"), V(Obj{ + {V("k"), V(Obj{ + {V("k"), V("v")}, + })}, + })}, + }), + wantOut: `{"k":{"k":{"k":"v"}}}`, + wantOutIndent: "{\n\t\"k\": {\n\t\t\"k\": {\n\t\t\t\"k\": \"v\"\n\t\t}\n\t}\n}", + }, { + in: `{"k":{"k":{"k":"v1","k":"v2"}}}`, + wantVal: V(Obj{ + {V("k"), V(Obj{ + {V("k"), V(Obj{ + {V("k"), V("v1")}, + {V("k"), V("v2")}, + })}, + })}, + }), + wantOut: `{"k":{"k":{"k":"v1","k":"v2"}}}`, + wantOutIndent: "{\n\t\"k\": {\n\t\t\"k\": {\n\t\t\t\"k\": \"v1\",\n\t\t\t\"k\": \"v2\"\n\t\t}\n\t}\n}", + }, { + in: " x", + wantErr: `syntax error (line 1:3)`, + }, { + in: `["💩"x`, + wantErr: `syntax error (line 1:5)`, + }, { + in: "\n\n[\"🔥🔥🔥\"x", + wantErr: `syntax error (line 3:7)`, + }, { + in: `["👍🏻👍🏿"x`, + wantErr: `syntax error (line 1:8)`, // multi-rune emojis; could be column:6 + }, { + in: "\"\x00\"", + wantErr: `invalid character '\x00' in string`, + }, { + in: "\"\xff\"", + wantErr: `invalid UTF-8 detected`, + wantVal: V(string("\xff")), + }, { + in: `"` + string(utf8.RuneError) + `"`, + wantVal: V(string(utf8.RuneError)), + wantOut: `"` + string(utf8.RuneError) + `"`, + }, { + in: `"\uFFFD"`, + wantVal: V(string(utf8.RuneError)), + wantOut: `"` + string(utf8.RuneError) + `"`, + }, { + in: `"\x"`, + wantErr: `invalid escape code "\\x" in string`, + }, { + in: `"\uXXXX"`, + wantErr: `invalid escape code "\\uXXXX" in string`, + }, { + in: `"\uDEAD"`, // unmatched surrogate pair + wantErr: `unexpected EOF`, + }, { + in: `"\uDEAD\uBEEF"`, // invalid surrogate half + wantErr: `invalid escape code "\\uBEEF" in string`, + }, { + in: `"\uD800\udead"`, // valid surrogate pair + wantVal: V("𐊭"), + wantOut: `"𐊭"`, + }, { + in: `"\u0000\"\\\/\b\f\n\r\t"`, + wantVal: V("\u0000\"\\/\b\f\n\r\t"), + wantOut: `"\u0000\"\\/\b\f\n\r\t"`, + }, { + in: `-`, + wantErr: `invalid "-" as number`, + }, { + in: `-0`, + wantVal: V(math.Copysign(0, -1)), + wantOut: `-0`, + }, { + in: `+0`, + wantErr: `invalid "+0" as value`, + }, { + in: `-+`, + wantErr: `invalid "-+" as number`, + }, { + in: `0.`, + wantErr: `invalid "0." as number`, + }, { + in: `.1`, + wantErr: `invalid ".1" as value`, + }, { + in: `0.e1`, + wantErr: `invalid "0.e1" as number`, + }, { + in: `0.0`, + wantVal: V(0.0), + wantOut: "0", + }, { + in: `01`, + wantErr: `invalid "01" as number`, + }, { + in: `0e`, + wantErr: `invalid "0e" as number`, + }, { + in: `0e0`, + wantVal: V(0.0), + wantOut: "0", + }, { + in: `0E0`, + wantVal: V(0.0), + wantOut: "0", + }, { + in: `0Ee`, + wantErr: `invalid "0Ee" as number`, + }, { + in: `-1.0E+1`, + wantVal: V(-10.0), + wantOut: "-10", + }, { + in: ` + { + "firstName" : "John", + "lastName" : "Smith" , + "isAlive" : true, + "age" : 27, + "address" : { + "streetAddress" : "21 2nd Street" , + "city" : "New York" , + "state" : "NY" , + "postalCode" : "10021-3100" + }, + "phoneNumbers" : [ + { + "type" : "home" , + "number" : "212 555-1234" + } , + { + "type" : "office" , + "number" : "646 555-4567" + } , + { + "type" : "mobile" , + "number" : "123 456-7890" + } + ], + "children" : [] , + "spouse" : null + } + `, + wantVal: V(Obj{ + {V("firstName"), V("John")}, + {V("lastName"), V("Smith")}, + {V("isAlive"), V(true)}, + {V("age"), V(27.0)}, + {V("address"), V(Obj{ + {V("streetAddress"), V("21 2nd Street")}, + {V("city"), V("New York")}, + {V("state"), V("NY")}, + {V("postalCode"), V("10021-3100")}, + })}, + {V("phoneNumbers"), V(Arr{ + V(Obj{ + {V("type"), V("home")}, + {V("number"), V("212 555-1234")}, + }), + V(Obj{ + {V("type"), V("office")}, + {V("number"), V("646 555-4567")}, + }), + V(Obj{ + {V("type"), V("mobile")}, + {V("number"), V("123 456-7890")}, + }), + })}, + {V("children"), V(Arr{})}, + {V("spouse"), V(nil)}, + }), + wantOut: `{"firstName":"John","lastName":"Smith","isAlive":true,"age":27,"address":{"streetAddress":"21 2nd Street","city":"New York","state":"NY","postalCode":"10021-3100"},"phoneNumbers":[{"type":"home","number":"212 555-1234"},{"type":"office","number":"646 555-4567"},{"type":"mobile","number":"123 456-7890"}],"children":[],"spouse":null}`, + wantOutIndent: `{ + "firstName": "John", + "lastName": "Smith", + "isAlive": true, + "age": 27, + "address": { + "streetAddress": "21 2nd Street", + "city": "New York", + "state": "NY", + "postalCode": "10021-3100" + }, + "phoneNumbers": [ + { + "type": "home", + "number": "212 555-1234" + }, + { + "type": "office", + "number": "646 555-4567" + }, + { + "type": "mobile", + "number": "123 456-7890" + } + ], + "children": [], + "spouse": null +}`, + }} + + opts := cmp.Options{ + cmpopts.EquateEmpty(), + cmp.Transformer("", func(v Value) interface{} { + switch v.typ { + case 0: + return nil // special case so Value{} == Value{} + case Null: + return nil + case Bool: + return v.Bool() + case Number: + return v.Number() + case String: + return v.String() + case Array: + return v.Array() + case Object: + return v.Object() + default: + panic("invalid type") + } + }), + } + for _, tt := range tests { + t.Run("", func(t *testing.T) { + if tt.in != "" || tt.wantVal.Type() != 0 || tt.wantErr != "" { + gotVal, err := Unmarshal([]byte(tt.in)) + if err == nil { + if tt.wantErr != "" { + t.Errorf("Unmarshal(): got nil error, want %v", tt.wantErr) + } + } else { + if tt.wantErr == "" { + t.Errorf("Unmarshal(): got %v, want nil error", err) + } else if !strings.Contains(err.Error(), tt.wantErr) { + t.Errorf("Unmarshal(): got %v, want %v", err, tt.wantErr) + } + } + if diff := cmp.Diff(gotVal, tt.wantVal, opts); diff != "" { + t.Errorf("Unmarshal(): output mismatch (-got +want):\n%s", diff) + } + } + if tt.wantOut != "" { + gotOut, err := Marshal(tt.wantVal, "") + if err != nil { + t.Errorf("Marshal(): got %v, want nil error", err) + } + if string(gotOut) != tt.wantOut { + t.Errorf("Marshal():\ngot: %s\nwant: %s", gotOut, tt.wantOut) + } + } + if tt.wantOutIndent != "" { + gotOut, err := Marshal(tt.wantVal, "\t") + if err != nil { + t.Errorf("Marshal(Indent): got %v, want nil error", err) + } + if string(gotOut) != tt.wantOutIndent { + t.Errorf("Marshal(Indent):\ngot: %s\nwant: %s", gotOut, tt.wantOutIndent) + } + } + }) + } +} diff --git a/internal/encoding/jsonx/number.go b/internal/encoding/jsonx/number.go new file mode 100644 index 00000000..ec99cbda --- /dev/null +++ b/internal/encoding/jsonx/number.go @@ -0,0 +1,79 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "io" + "math" + "regexp" + "strconv" + + "github.com/golang/protobuf/v2/internal/errors" +) + +// marshalNumber encodes v as a Number. +func (p *encoder) marshalNumber(v Value) error { + var err error + p.out, err = appendNumber(p.out, v) + return err +} +func appendNumber(out []byte, v Value) ([]byte, error) { + if v.Type() != Number { + return nil, errors.New("invalid type %v, expected number", v.Type()) + } + if len(v.raw) > 0 { + return append(out, v.raw...), nil + } + n := v.Number() + if math.IsInf(n, 0) || math.IsNaN(n) { + return nil, errors.New("invalid number value: %v", n) + } + + // JSON number formatting logic based on encoding/json. + // See floatEncoder.encode for reference. + bits := 64 + if float64(float32(n)) == n { + bits = 32 + } + fmt := byte('f') + if abs := math.Abs(n); abs != 0 { + if bits == 64 && (abs < 1e-6 || abs >= 1e21) || bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) { + fmt = 'e' + } + } + out = strconv.AppendFloat(out, n, fmt, -1, bits) + if fmt == 'e' { + n := len(out) + if n >= 4 && out[n-4] == 'e' && out[n-3] == '-' && out[n-2] == '0' { + out[n-2] = out[n-1] + out = out[:n-1] + } + } + return out, nil +} + +// Exact expression to match a JSON floating-point number. +// JSON's grammar for floats is more restrictive than Go's grammar. +var floatRegexp = regexp.MustCompile("^-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?") + +// unmarshalNumber decodes a Number from the input. +func (p *decoder) unmarshalNumber() (Value, error) { + v, n, err := consumeNumber(p.in) + p.consume(n) + return v, err +} +func consumeNumber(in []byte) (Value, int, error) { + if len(in) == 0 { + return Value{}, 0, io.ErrUnexpectedEOF + } + if n := matchWithDelim(floatRegexp, in); n > 0 { + v, err := strconv.ParseFloat(string(in[:n]), 64) + if err != nil { + return Value{}, 0, err + } + return rawValueOf(v, in[:n:n]), n, nil + } + return Value{}, 0, newSyntaxError("invalid %q as number", errRegexp.Find(in)) +} diff --git a/internal/encoding/jsonx/string.go b/internal/encoding/jsonx/string.go new file mode 100644 index 00000000..813d8fa8 --- /dev/null +++ b/internal/encoding/jsonx/string.go @@ -0,0 +1,160 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "io" + "math/bits" + "strconv" + "unicode" + "unicode/utf16" + "unicode/utf8" + + "github.com/golang/protobuf/v2/internal/errors" +) + +func (p *encoder) marshalString(v Value) error { + var err error + p.out, err = appendString(p.out, v) + return err +} +func appendString(out []byte, v Value) ([]byte, error) { + if v.Type() != String { + return nil, errors.New("invalid type %v, expected string", v.Type()) + } + if len(v.raw) > 0 { + return append(out, v.raw...), nil + } + in := v.String() + + var nerr errors.NonFatal + out = append(out, '"') + i := indexNeedEscape(in) + in, out = in[i:], append(out, in[:i]...) + for len(in) > 0 { + switch r, n := utf8.DecodeRuneInString(in); { + case r == utf8.RuneError && n == 1: + nerr.AppendInvalidUTF8("") + in, out = in[1:], append(out, in[0]) // preserve invalid byte + case r < ' ' || r == '"' || r == '\\': + out = append(out, '\\') + switch r { + case '"', '\\': + out = append(out, byte(r)) + case '\b': + out = append(out, 'b') + case '\f': + out = append(out, 'f') + case '\n': + out = append(out, 'n') + case '\r': + out = append(out, 'r') + case '\t': + out = append(out, 't') + default: + out = append(out, 'u') + out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...) + out = strconv.AppendUint(out, uint64(r), 16) + } + in = in[n:] + default: + i := indexNeedEscape(in[n:]) + in, out = in[n+i:], append(out, in[:n+i]...) + } + } + out = append(out, '"') + return out, nerr.E +} + +func (p *decoder) unmarshalString() (Value, error) { + v, n, err := consumeString(p.in) + p.consume(n) + return v, err +} +func consumeString(in []byte) (Value, int, error) { + var nerr errors.NonFatal + in0 := in + if len(in) == 0 { + return Value{}, 0, io.ErrUnexpectedEOF + } + if in[0] != '"' { + return Value{}, 0, newSyntaxError("invalid character %q at start of string", in[0]) + } + in = in[1:] + i := indexNeedEscape(string(in)) + in, out := in[i:], in[:i:i] // set cap to prevent mutations + for len(in) > 0 { + switch r, n := utf8.DecodeRune(in); { + case r == utf8.RuneError && n == 1: + nerr.AppendInvalidUTF8("") + in, out = in[1:], append(out, in[0]) // preserve invalid byte + case r < ' ': + return Value{}, 0, newSyntaxError("invalid character %q in string", r) + case r == '"': + in = in[1:] + n := len(in0) - len(in) + v := rawValueOf(string(out), in0[:n:n]) + return v, n, nerr.E + case r == '\\': + if len(in) < 2 { + return Value{}, 0, io.ErrUnexpectedEOF + } + switch r := in[1]; r { + case '"', '\\', '/': + in, out = in[2:], append(out, r) + case 'b': + in, out = in[2:], append(out, '\b') + case 'f': + in, out = in[2:], append(out, '\f') + case 'n': + in, out = in[2:], append(out, '\n') + case 'r': + in, out = in[2:], append(out, '\r') + case 't': + in, out = in[2:], append(out, '\t') + case 'u': + if len(in) < 6 { + return Value{}, 0, io.ErrUnexpectedEOF + } + v, err := strconv.ParseUint(string(in[2:6]), 16, 16) + if err != nil { + return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:6]) + } + in = in[6:] + + r := rune(v) + if utf16.IsSurrogate(r) { + if len(in) < 6 { + return Value{}, 0, io.ErrUnexpectedEOF + } + v, err := strconv.ParseUint(string(in[2:6]), 16, 16) + r = utf16.DecodeRune(r, rune(v)) + if in[0] != '\\' || in[1] != 'u' || r == unicode.ReplacementChar || err != nil { + return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:6]) + } + in = in[6:] + } + out = append(out, string(r)...) + default: + return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:2]) + } + default: + i := indexNeedEscape(string(in[n:])) + in, out = in[n+i:], append(out, in[:n+i]...) + } + } + return Value{}, 0, io.ErrUnexpectedEOF +} + +// indexNeedEscape returns the index of the next character that needs escaping. +// If no characters need escaping, this returns the input length. +func indexNeedEscape(s string) int { + for i, r := range s { + if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError { + return i + } + } + return len(s) +} diff --git a/internal/encoding/jsonx/value.go b/internal/encoding/jsonx/value.go new file mode 100644 index 00000000..aeff2222 --- /dev/null +++ b/internal/encoding/jsonx/value.go @@ -0,0 +1,206 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package json implements the JSON format. +// This package has no semantic understanding for protocol buffers and is only +// a parser and composer for the format. +// +// This follows RFC 7159, with some notable implementation specifics: +// * numbers that are out of range result in a decoding error +// * duplicate keys in objects are not rejected +// +// Reasons why the standard encoding/json package is not suitable: +// * information about duplicate keys is lost +// * invalid UTF-8 is silently coerced into utf8.RuneError +package json + +import ( + "fmt" + "strings" +) + +// Type represents a type expressible in the JSON format. +type Type uint8 + +const ( + _ Type = iota + // Null is the null literal (i.e., "null"). + Null + // Bool is a boolean (i.e., "true" or "false"). + Bool + // Number is a floating-point number (e.g., "1.234" or "1e100"). + Number + // String is an escaped string (e.g., `"the quick brown fox"`). + String + // Array is an ordered list of values (e.g., `[0, "one", true]`). + Array + // Object is an ordered map of values (e.g., `{"key": null}`). + Object +) + +func (t Type) String() string { + switch t { + case Null: + return "null" + case Bool: + return "bool" + case Number: + return "number" + case String: + return "string" + case Array: + return "array" + case Object: + return "object" + default: + return "" + } +} + +// Value contains a value of a given Type. +type Value struct { + typ Type + raw []byte // raw bytes of the serialized data + str string // only for String + num float64 // only for Bool or Number + arr []Value // only for Array + obj [][2]Value // only for Object +} + +// ValueOf returns a Value for a given Go value: +// nil => Null +// bool => Bool +// int32, int64 => Number +// uint32, uint64 => Number +// float32, float64 => Number +// string, []byte => String +// []Value => Array +// [][2]Value => Object +// +// ValueOf panics if the Go type is not one of the above. +func ValueOf(v interface{}) Value { + switch v := v.(type) { + case nil: + return Value{typ: Null} + case bool: + if v { + return Value{typ: Bool, num: 1} + } else { + return Value{typ: Bool, num: 0} + } + case int32: + return Value{typ: Number, num: float64(v)} + case int64: + return Value{typ: Number, num: float64(v)} // possible loss of precision + case uint32: + return Value{typ: Number, num: float64(v)} + case uint64: + return Value{typ: Number, num: float64(v)} // possible loss of precision + case float32: + return Value{typ: Number, num: float64(v)} + case float64: + return Value{typ: Number, num: float64(v)} + case string: + return Value{typ: String, str: string(v)} + case []byte: + return Value{typ: String, str: string(v)} + case []Value: + return Value{typ: Array, arr: v} + case [][2]Value: + return Value{typ: Object, obj: v} + default: + panic(fmt.Sprintf("invalid type %T", v)) + } +} +func rawValueOf(v interface{}, raw []byte) Value { + v2 := ValueOf(v) + v2.raw = raw + return v2 +} + +// Type is the type of the value. +func (v Value) Type() Type { + return v.typ +} + +// Bool returns v as a bool and panics if it is not a Bool. +func (v Value) Bool() bool { + if v.typ != Bool { + panic("value is not a boolean") + } + return v.num != 0 +} + +// Number returns v as a float64 and panics if it is not a Number. +func (v Value) Number() float64 { + if v.typ != Number { + panic("value is not a number") + } + return v.num +} + +// String returns v as a string if the Type is String. +// Otherwise, this returns a formatted string of v for debugging purposes. +// +// Since JSON strings must be UTF-8, the marshaler and unmarshaler will verify +// for UTF-8 correctness. +func (v Value) String() string { + if v.typ != String { + return v.stringValue() + } + return v.str +} +func (v Value) stringValue() string { + switch v.typ { + case Null, Bool, Number: + return string(v.Raw()) + case Array: + var ss []string + for _, v := range v.Array() { + ss = append(ss, v.String()) + } + return "[" + strings.Join(ss, ",") + "]" + case Object: + var ss []string + for _, v := range v.Object() { + ss = append(ss, v[0].String()+":"+v[1].String()) + } + return "{" + strings.Join(ss, ",") + "}" + default: + return "" + } +} + +// Array returns the elements of v and panics if the Type is not Array. +// Mutations on the return value may not be observable from the Raw method. +func (v Value) Array() []Value { + if v.typ != Array { + panic("value is not an array") + } + return v.arr +} + +// Object returns the items of v and panics if the Type is not Object. +// The [2]Value represents a key (of type String) and value pair. +// +// Mutations on the return value may not be observable from the Raw method. +func (v Value) Object() [][2]Value { + if v.typ != Object { + panic("value is not an object") + } + return v.obj +} + +// Raw returns the raw representation of the value. +// The returned value may alias the input given to Unmarshal. +func (v Value) Raw() []byte { + if len(v.raw) > 0 { + return v.raw + } + p := encoder{} + if err := p.marshalValue(v); !p.nerr.Merge(err) { + return []byte("") + } + return p.out +}