encoding/prototext: add UnmarshalOptions.DiscardUnknown

This CL adds support for discarding unknown fields from the input.
We add support for parsing and resolving field numbers, so that
the DiscardUnknown option can ignore all unresolvable fields.
We continue to reject known fields identified by field number
since there are a number of edge cases that a difficult to resolve.

Change-Id: I5c88b7bae8656ce20e85e4b5c92d8564a5ff8bb6
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/195779
Reviewed-by: Herbie Ong <herbie@google.com>
This commit is contained in:
Joe Tsai 2019-09-15 00:17:39 -07:00
parent cd4a31e202
commit 8689fa59f4
2 changed files with 68 additions and 23 deletions

View File

@ -35,6 +35,12 @@ type UnmarshalOptions struct {
// return error if there are any missing required fields.
AllowPartial bool
// DiscardUnknown specifies whether to ignore unknown fields when parsing.
// An unknown field is any field whose field name or field number does not
// resolve to any known or extension field in the message.
// By default, unmarshal rejects unknown fields as an error.
DiscardUnknown bool
// Resolver is used for looking up types when unmarshaling
// google.protobuf.Any messages or extension fields.
// If nil, this defaults to using protoregistry.GlobalTypes.
@ -92,57 +98,74 @@ func (o UnmarshalOptions) unmarshalMessage(tmsg [][2]text.Value, m pref.Message)
tkey := tfield[0]
tval := tfield[1]
var fd pref.FieldDescriptor
// Resolve the field descriptor.
var name pref.Name
var fd pref.FieldDescriptor
var xt pref.ExtensionType
var xtErr error
switch tkey.Type() {
case text.Name:
name, _ = tkey.Name()
fd = fieldDescs.ByName(name)
switch {
case fd == nil:
if fd == nil {
// The proto name of a group field is in all lowercase,
// while the textproto field name is the group message name.
// Check to make sure that group name is correct.
gd := fieldDescs.ByName(pref.Name(strings.ToLower(string(name))))
if gd != nil && gd.Kind() == pref.GroupKind && gd.Message().Name() == name {
fd = gd
}
case fd.Kind() == pref.GroupKind && fd.Message().Name() != name:
} else if fd.Kind() == pref.GroupKind && fd.Message().Name() != name {
fd = nil // reset since field name is actually the message name
case fd.IsWeak() && fd.Message().IsPlaceholder():
fd = nil // reset since the weak reference is not linked in
}
case text.String:
// Handle extensions only. This code path is not for Any.
if messageDesc.FullName() == "google.protobuf.Any" {
break
}
// Extensions have to be registered first in the message's
// ExtensionTypes before setting a value to it.
extName := pref.FullName(tkey.String())
// Check first if it is already registered. This is the case for
// repeated fields.
xt, err := o.findExtension(extName)
if err != nil && err != protoregistry.NotFound {
return errors.New("unable to resolve [%v]: %v", extName, err)
xt, xtErr = o.findExtension(pref.FullName(tkey.String()))
case text.Uint:
v, _ := tkey.Uint(false)
num := pref.FieldNumber(v)
if !num.IsValid() {
return errors.New("invalid field number: %d", num)
}
if xt != nil {
fd = xt.TypeDescriptor()
if !messageDesc.ExtensionRanges().Has(fd.Number()) || fd.ContainingMessage().FullName() != messageDesc.FullName() {
return errors.New("message %v cannot be extended by %v", messageDesc.FullName(), fd.FullName())
}
fd = fieldDescs.ByNumber(num)
if fd == nil {
xt, xtErr = o.Resolver.FindExtensionByNumber(messageDesc.FullName(), num)
}
}
if xt != nil {
fd = xt.TypeDescriptor()
if !messageDesc.ExtensionRanges().Has(fd.Number()) || fd.ContainingMessage().FullName() != messageDesc.FullName() {
return errors.New("message %v cannot be extended by %v", messageDesc.FullName(), fd.FullName())
}
} else if xtErr != nil && xtErr != protoregistry.NotFound {
return errors.New("unable to resolve: %v", xtErr)
}
if fd != nil && fd.IsWeak() && fd.Message().IsPlaceholder() {
fd = nil // reset since the weak reference is not linked in
}
// Handle unknown fields.
if fd == nil {
// Ignore reserved names.
if messageDesc.ReservedNames().Has(name) {
if o.DiscardUnknown || messageDesc.ReservedNames().Has(name) {
continue
}
// TODO: Can provide option to ignore unknown message fields.
return errors.New("%v contains unknown field: %v", messageDesc.FullName(), tkey)
}
// Handle fields identified by field number.
if tkey.Type() == text.Uint {
// TODO: Add an option to permit parsing field numbers.
//
// This requires careful thought as the MarshalOptions.EmitUnknown
// option allows formatting unknown fields as the field number
// and the best-effort textual representation of the field value.
// In that case, it may not be possible to unmarshal the value from
// a parser that does have information about the unknown field.
return errors.New("cannot specify field by number: %v", tkey)
}
switch {
case fd.IsList():
// If input is not a list, turn it into a list.

View File

@ -174,6 +174,28 @@ s_string: "谷歌"
inputMessage: &pb3.Scalars{},
inputText: "unknown_field: 456",
wantErr: true,
}, {
desc: "proto2 message contains discarded unknown field",
umo: prototext.UnmarshalOptions{DiscardUnknown: true},
inputMessage: &pb2.Scalars{},
inputText: `unknown_field:123 1000:"hello"`,
}, {
desc: "proto3 message contains discarded unknown field",
umo: prototext.UnmarshalOptions{DiscardUnknown: true},
inputMessage: &pb3.Scalars{},
inputText: `unknown_field:456 1000:"goodbye"`,
}, {
desc: "proto2 message cannot parse field number",
umo: prototext.UnmarshalOptions{DiscardUnknown: true},
inputMessage: &pb2.Scalars{},
inputText: `13:"hello"`,
wantErr: true,
}, {
desc: "proto3 message cannot parse field number",
umo: prototext.UnmarshalOptions{DiscardUnknown: true},
inputMessage: &pb3.Scalars{},
inputText: `13:"goodbye"`,
wantErr: true,
}, {
desc: "proto2 numeric key field",
inputMessage: &pb2.Scalars{},