Damien Neil 5ba0c29655 internal/encoding/json: fix crash in parsing
Fuzzer-detected crash when parsing: {""

Change-Id: I019c667f48e6a1237858b5abf7d34f43593fb3b6
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/212357
Reviewed-by: Herbie Ong <herbie@google.com>
2019-12-22 04:14:01 +00:00

471 lines
12 KiB
Go

// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package json
import (
"bytes"
"fmt"
"io"
"regexp"
"strconv"
"unicode/utf8"
"google.golang.org/protobuf/internal/errors"
)
// call specifies which Decoder method was invoked.
type call uint8
const (
readCall call = iota
peekCall
)
// Decoder is a token-based JSON decoder.
type Decoder struct {
// lastCall is last method called, either readCall or peekCall.
// Initial value is readCall.
lastCall call
// value contains the last read value.
value Value
// err contains the last read error.
err error
// startStack is a stack containing StartObject and StartArray types. The
// top of stack represents the object or the array the current value is
// directly located in.
startStack []Type
// orig is used in reporting line and column.
orig []byte
// in contains the unconsumed input.
in []byte
}
// NewDecoder returns a Decoder to read the given []byte.
func NewDecoder(b []byte) *Decoder {
return &Decoder{orig: b, in: b}
}
// Peek looks ahead and returns the next JSON type without advancing a read.
func (d *Decoder) Peek() Type {
defer func() { d.lastCall = peekCall }()
if d.lastCall == readCall {
d.value, d.err = d.Read()
}
return d.value.typ
}
// Read returns the next JSON value. It will return an error if there is no
// valid value. For String types containing invalid UTF8 characters, a non-fatal
// error is returned and caller can call Read for the next value.
func (d *Decoder) Read() (Value, error) {
defer func() { d.lastCall = readCall }()
if d.lastCall == peekCall {
return d.value, d.err
}
value, err := d.parseNext()
if err != nil {
return Value{}, err
}
n := value.size
switch value.typ {
case EOF:
if len(d.startStack) != 0 ||
d.value.typ&Null|Bool|Number|String|EndObject|EndArray == 0 {
return Value{}, io.ErrUnexpectedEOF
}
case Null:
if !d.isValueNext() {
return Value{}, d.newSyntaxError("unexpected value null")
}
case Bool, Number:
if !d.isValueNext() {
return Value{}, d.newSyntaxError("unexpected value %v", value.Raw())
}
case String:
if d.isValueNext() {
break
}
// Check if this is for an object name.
if d.value.typ&(StartObject|comma) == 0 {
return Value{}, d.newSyntaxError("unexpected value %v", value.Raw())
}
d.in = d.in[n:]
d.consume(0)
if len(d.in) == 0 {
return Value{}, d.newSyntaxError(`unexpected EOF, missing ":" after object name`)
}
if c := d.in[0]; c != ':' {
return Value{}, d.newSyntaxError(`unexpected character %v, missing ":" after object name`, string(c))
}
n = 1
value.typ = Name
case StartObject, StartArray:
if !d.isValueNext() {
return Value{}, d.newSyntaxError("unexpected character %v", value.Raw())
}
d.startStack = append(d.startStack, value.typ)
case EndObject:
if len(d.startStack) == 0 ||
d.value.typ == comma ||
d.startStack[len(d.startStack)-1] != StartObject {
return Value{}, d.newSyntaxError("unexpected character }")
}
d.startStack = d.startStack[:len(d.startStack)-1]
case EndArray:
if len(d.startStack) == 0 ||
d.value.typ == comma ||
d.startStack[len(d.startStack)-1] != StartArray {
return Value{}, d.newSyntaxError("unexpected character ]")
}
d.startStack = d.startStack[:len(d.startStack)-1]
case comma:
if len(d.startStack) == 0 ||
d.value.typ&(Null|Bool|Number|String|EndObject|EndArray) == 0 {
return Value{}, d.newSyntaxError("unexpected character ,")
}
}
// Update d.value only after validating value to be in the right sequence.
d.value = value
d.in = d.in[n:]
if d.value.typ == comma {
return d.Read()
}
return value, nil
}
// Any sequence that looks like a non-delimiter (for error reporting).
var errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`)
// parseNext parses for the next JSON value. It returns a Value object for
// different types, except for Name. It does not handle whether the next value
// is in a valid sequence or not.
func (d *Decoder) parseNext() (value Value, err error) {
// Trim leading spaces.
d.consume(0)
in := d.in
if len(in) == 0 {
return d.newValue(EOF, nil, 0), nil
}
switch in[0] {
case 'n':
n := matchWithDelim("null", in)
if n == 0 {
return Value{}, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
}
return d.newValue(Null, in, n), nil
case 't':
n := matchWithDelim("true", in)
if n == 0 {
return Value{}, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
}
return d.newBoolValue(in, n, true), nil
case 'f':
n := matchWithDelim("false", in)
if n == 0 {
return Value{}, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
}
return d.newBoolValue(in, n, false), nil
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
n, ok := consumeNumber(in)
if !ok {
return Value{}, d.newSyntaxError("invalid number %s", errRegexp.Find(in))
}
return d.newValue(Number, in, n), nil
case '"':
s, n, err := d.parseString(in)
if err != nil {
return Value{}, err
}
return d.newStringValue(in, n, s), nil
case '{':
return d.newValue(StartObject, in, 1), nil
case '}':
return d.newValue(EndObject, in, 1), nil
case '[':
return d.newValue(StartArray, in, 1), nil
case ']':
return d.newValue(EndArray, in, 1), nil
case ',':
return d.newValue(comma, in, 1), nil
}
return Value{}, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
}
// position returns line and column number of index in given orig slice.
func position(orig []byte, idx int) (int, int) {
b := orig[:idx]
line := bytes.Count(b, []byte("\n")) + 1
if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
b = b[i+1:]
}
column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
return line, column
}
// newSyntaxError returns an error with line and column information useful for
// syntax errors.
func (d *Decoder) newSyntaxError(f string, x ...interface{}) error {
e := errors.New(f, x...)
line, column := position(d.orig, len(d.orig)-len(d.in))
return errors.New("syntax error (line %d:%d): %v", line, column, e)
}
// matchWithDelim matches s with the input b and verifies that the match
// terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
// As a special case, EOF is considered a delimiter. It returns the length of s
// if there is a match, else 0.
func matchWithDelim(s string, b []byte) int {
if !bytes.HasPrefix(b, []byte(s)) {
return 0
}
n := len(s)
if n < len(b) && isNotDelim(b[n]) {
return 0
}
return n
}
// isNotDelim returns true if given byte is a not delimiter character.
func isNotDelim(c byte) bool {
return (c == '-' || c == '+' || c == '.' || c == '_' ||
('a' <= c && c <= 'z') ||
('A' <= c && c <= 'Z') ||
('0' <= c && c <= '9'))
}
// consume consumes n bytes of input and any subsequent whitespace.
func (d *Decoder) consume(n int) {
d.in = d.in[n:]
for len(d.in) > 0 {
switch d.in[0] {
case ' ', '\n', '\r', '\t':
d.in = d.in[1:]
default:
return
}
}
}
// isValueNext returns true if next type should be a JSON value: Null,
// Number, String or Bool.
func (d *Decoder) isValueNext() bool {
if len(d.startStack) == 0 {
return d.value.typ == 0
}
start := d.startStack[len(d.startStack)-1]
switch start {
case StartObject:
return d.value.typ&Name != 0
case StartArray:
return d.value.typ&(StartArray|comma) != 0
}
panic(fmt.Sprintf(
"unreachable logic in Decoder.isValueNext, lastType: %v, startStack: %v",
d.value.typ, start))
}
// newValue constructs a Value for given Type.
func (d *Decoder) newValue(typ Type, input []byte, size int) Value {
return Value{
typ: typ,
input: d.orig,
start: len(d.orig) - len(input),
size: size,
}
}
// newBoolValue constructs a Value for a JSON boolean.
func (d *Decoder) newBoolValue(input []byte, size int, b bool) Value {
return Value{
typ: Bool,
input: d.orig,
start: len(d.orig) - len(input),
size: size,
boo: b,
}
}
// newStringValue constructs a Value for a JSON string.
func (d *Decoder) newStringValue(input []byte, size int, s string) Value {
return Value{
typ: String,
input: d.orig,
start: len(d.orig) - len(input),
size: size,
str: s,
}
}
// Clone returns a copy of the Decoder for use in reading ahead the next JSON
// object, array or other values without affecting current Decoder.
func (d *Decoder) Clone() *Decoder {
ret := *d
ret.startStack = append([]Type(nil), ret.startStack...)
return &ret
}
// Value provides a parsed JSON type and value.
//
// The original input slice is stored in this struct in order to compute for
// position as needed. The raw JSON value is derived from the original input
// slice given start and size.
//
// For JSON boolean and string, it holds the converted value in boo and str
// fields respectively. For JSON number, the raw JSON value holds a valid number
// which is converted only in Int or Float. Other JSON types do not require any
// additional data.
type Value struct {
typ Type
input []byte
start int
size int
boo bool
str string
}
func (v Value) newError(f string, x ...interface{}) error {
e := errors.New(f, x...)
line, col := v.Position()
return errors.New("error (line %d:%d): %v", line, col, e)
}
// Type returns the JSON type.
func (v Value) Type() Type {
return v.typ
}
// Position returns the line and column of the value.
func (v Value) Position() (int, int) {
return position(v.input, v.start)
}
// Bool returns the bool value if token is Bool, else it will return an error.
func (v Value) Bool() (bool, error) {
if v.typ != Bool {
return false, v.newError("%s is not a bool", v.Raw())
}
return v.boo, nil
}
// String returns the string value for a JSON string token or the read value in
// string if token is not a string.
func (v Value) String() string {
if v.typ != String {
return v.Raw()
}
return v.str
}
// Name returns the object name if token is Name, else it will return an error.
func (v Value) Name() (string, error) {
if v.typ != Name {
return "", v.newError("%s is not an object name", v.Raw())
}
return v.str, nil
}
// Raw returns the read value in string.
func (v Value) Raw() string {
return string(v.input[v.start : v.start+v.size])
}
// Float returns the floating-point number if token is Number, else it will
// return an error.
//
// The floating-point precision is specified by the bitSize parameter: 32 for
// float32 or 64 for float64. If bitSize=32, the result still has type float64,
// but it will be convertible to float32 without changing its value. It will
// return an error if the number exceeds the floating point limits for given
// bitSize.
func (v Value) Float(bitSize int) (float64, error) {
if v.typ != Number {
return 0, v.newError("%s is not a number", v.Raw())
}
f, err := strconv.ParseFloat(v.Raw(), bitSize)
if err != nil {
return 0, v.newError("%v", err)
}
return f, nil
}
// Int returns the signed integer number if token is Number, else it will
// return an error.
//
// The given bitSize specifies the integer type that the result must fit into.
// It returns an error if the number is not an integer value or if the result
// exceeds the limits for given bitSize.
func (v Value) Int(bitSize int) (int64, error) {
s, err := v.getIntStr()
if err != nil {
return 0, err
}
n, err := strconv.ParseInt(s, 10, bitSize)
if err != nil {
return 0, v.newError("%v", err)
}
return n, nil
}
// Uint returns the signed integer number if token is Number, else it will
// return an error.
//
// The given bitSize specifies the unsigned integer type that the result must
// fit into. It returns an error if the number is not an unsigned integer value
// or if the result exceeds the limits for given bitSize.
func (v Value) Uint(bitSize int) (uint64, error) {
s, err := v.getIntStr()
if err != nil {
return 0, err
}
n, err := strconv.ParseUint(s, 10, bitSize)
if err != nil {
return 0, v.newError("%v", err)
}
return n, nil
}
func (v Value) getIntStr() (string, error) {
if v.typ != Number {
return "", v.newError("%s is not a number", v.input)
}
parts, ok := parseNumber(v.input[v.start : v.start+v.size])
if !ok {
return "", v.newError("%s is not a number", v.input)
}
num, ok := normalizeToIntString(parts)
if !ok {
return "", v.newError("cannot convert %s to integer", v.input)
}
return num, nil
}