protobuf-go/proto/decode.go
Damien Neil 8c86fc5e7d all: remove non-fatal UTF-8 validation errors (and non-fatal in general)
Immediately abort (un)marshal operations when encountering invalid UTF-8
data in proto3 strings. No other proto implementation supports non-UTF-8
data in proto3 strings (and many reject it in proto2 strings as well).
Producing invalid output is an interoperability threat (other
implementations won't be able to read it).

The case where existing string data is found to contain non-UTF8 data is
better handled by changing the field to the `bytes` type, which (aside
from UTF-8 validation) is wire-compatible with `string`.

Remove the errors.NonFatal type, since there are no remaining cases
where it is needed. "Non-fatal" errors which produce results and a
non-nil error are problematic because they compose poorly; the better
approach is to take an option like AllowPartial indicating which
conditions to check for.

Change-Id: I9d189ec6ffda7b5d96d094aa1b290af2e3f23736
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/183098
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
2019-06-20 20:55:13 +00:00

229 lines
6.2 KiB
Go

// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style.
// license that can be found in the LICENSE file.
package proto
import (
"google.golang.org/protobuf/internal/encoding/wire"
"google.golang.org/protobuf/internal/errors"
"google.golang.org/protobuf/internal/pragma"
"google.golang.org/protobuf/reflect/protoreflect"
"google.golang.org/protobuf/reflect/protoregistry"
"google.golang.org/protobuf/runtime/protoiface"
)
// UnmarshalOptions configures the unmarshaler.
//
// Example usage:
// err := UnmarshalOptions{DiscardUnknown: true}.Unmarshal(b, m)
type UnmarshalOptions struct {
// AllowPartial accepts input for messages that will result in missing
// required fields. If AllowPartial is false (the default), Unmarshal will
// return an error if there are any missing required fields.
AllowPartial bool
// If DiscardUnknown is set, unknown fields are ignored.
DiscardUnknown bool
// Resolver is used for looking up types when unmarshaling extension fields.
// If nil, this defaults to using protoregistry.GlobalTypes.
Resolver interface {
protoregistry.ExtensionTypeResolver
}
pragma.NoUnkeyedLiterals
}
var _ = protoiface.UnmarshalOptions(UnmarshalOptions{})
// Unmarshal parses the wire-format message in b and places the result in m.
func Unmarshal(b []byte, m Message) error {
return UnmarshalOptions{}.Unmarshal(b, m)
}
// Unmarshal parses the wire-format message in b and places the result in m.
func (o UnmarshalOptions) Unmarshal(b []byte, m Message) error {
if o.Resolver == nil {
o.Resolver = protoregistry.GlobalTypes
}
// TODO: Reset m?
err := o.unmarshalMessageFast(b, m)
if err == errInternalNoFast {
err = o.unmarshalMessage(b, m.ProtoReflect())
}
if err != nil {
return err
}
if o.AllowPartial {
return nil
}
return IsInitialized(m)
}
func (o UnmarshalOptions) unmarshalMessageFast(b []byte, m Message) error {
methods := protoMethods(m)
if methods == nil || methods.Unmarshal == nil {
return errInternalNoFast
}
return methods.Unmarshal(b, m, protoiface.UnmarshalOptions(o))
}
func (o UnmarshalOptions) unmarshalMessage(b []byte, m protoreflect.Message) error {
messageDesc := m.Descriptor()
fieldDescs := messageDesc.Fields()
for len(b) > 0 {
// Parse the tag (field number and wire type).
num, wtyp, tagLen := wire.ConsumeTag(b)
if tagLen < 0 {
return wire.ParseError(tagLen)
}
// Parse the field value.
fd := fieldDescs.ByNumber(num)
if fd == nil && messageDesc.ExtensionRanges().Has(num) {
extType, err := o.Resolver.FindExtensionByNumber(messageDesc.FullName(), num)
if err != nil && err != protoregistry.NotFound {
return err
}
fd = extType
}
var err error
var valLen int
switch {
case fd == nil:
err = errUnknown
case fd.IsList():
valLen, err = o.unmarshalList(b[tagLen:], wtyp, m.Mutable(fd).List(), fd)
case fd.IsMap():
valLen, err = o.unmarshalMap(b[tagLen:], wtyp, m.Mutable(fd).Map(), fd)
default:
valLen, err = o.unmarshalSingular(b[tagLen:], wtyp, m, fd)
}
if err == errUnknown {
valLen = wire.ConsumeFieldValue(num, wtyp, b[tagLen:])
if valLen < 0 {
return wire.ParseError(valLen)
}
m.SetUnknown(append(m.GetUnknown(), b[:tagLen+valLen]...))
} else if err != nil {
return err
}
b = b[tagLen+valLen:]
}
return nil
}
func (o UnmarshalOptions) unmarshalSingular(b []byte, wtyp wire.Type, m protoreflect.Message, fd protoreflect.FieldDescriptor) (n int, err error) {
v, n, err := o.unmarshalScalar(b, wtyp, fd)
if err != nil {
return 0, err
}
switch fd.Kind() {
case protoreflect.GroupKind, protoreflect.MessageKind:
// Messages are merged with any existing message value,
// unless the message is part of a oneof.
//
// TODO: C++ merges into oneofs, while v1 does not.
// Evaluate which behavior to pick.
var m2 protoreflect.Message
if m.Has(fd) && fd.ContainingOneof() == nil {
m2 = m.Mutable(fd).Message()
} else {
m2 = m.NewMessage(fd)
m.Set(fd, protoreflect.ValueOf(m2))
}
// Pass up errors (fatal and otherwise).
if err := o.unmarshalMessage(v.Bytes(), m2); err != nil {
return n, err
}
default:
// Non-message scalars replace the previous value.
m.Set(fd, v)
}
return n, nil
}
func (o UnmarshalOptions) unmarshalMap(b []byte, wtyp wire.Type, mapv protoreflect.Map, fd protoreflect.FieldDescriptor) (n int, err error) {
if wtyp != wire.BytesType {
return 0, errUnknown
}
b, n = wire.ConsumeBytes(b)
if n < 0 {
return 0, wire.ParseError(n)
}
var (
keyField = fd.MapKey()
valField = fd.MapValue()
key protoreflect.Value
val protoreflect.Value
haveKey bool
haveVal bool
)
switch valField.Kind() {
case protoreflect.GroupKind, protoreflect.MessageKind:
val = protoreflect.ValueOf(mapv.NewMessage())
}
// Map entries are represented as a two-element message with fields
// containing the key and value.
for len(b) > 0 {
num, wtyp, n := wire.ConsumeTag(b)
if n < 0 {
return 0, wire.ParseError(n)
}
b = b[n:]
err = errUnknown
switch num {
case 1:
key, n, err = o.unmarshalScalar(b, wtyp, keyField)
if err != nil {
break
}
haveKey = true
case 2:
var v protoreflect.Value
v, n, err = o.unmarshalScalar(b, wtyp, valField)
if err != nil {
break
}
switch valField.Kind() {
case protoreflect.GroupKind, protoreflect.MessageKind:
if err := o.unmarshalMessage(v.Bytes(), val.Message()); err != nil {
return 0, err
}
default:
val = v
}
haveVal = true
}
if err == errUnknown {
n = wire.ConsumeFieldValue(num, wtyp, b)
if n < 0 {
return 0, wire.ParseError(n)
}
} else if err != nil {
return 0, err
}
b = b[n:]
}
// Every map entry should have entries for key and value, but this is not strictly required.
if !haveKey {
key = keyField.Default()
}
if !haveVal {
switch valField.Kind() {
case protoreflect.GroupKind, protoreflect.MessageKind:
default:
val = valField.Default()
}
}
mapv.Set(key.MapKey(), val)
return n, nil
}
// errUnknown is used internally to indicate fields which should be added
// to the unknown field set of a message. It is never returned from an exported
// function.
var errUnknown = errors.New("BUG: internal error (unknown)")