encoding/prototext: rewrite of internal/encoding/text

* Fixes golang/protobuf#842. Unmarshal can now parse singular or
  repeated message fields without the field separator.
* Fixes golang/protobuf#1011. Handles negative 0 properly.
* For unknown fields with fixed 32-bit and 64-bit wire types, output is
  now in hex format with 0x prefix similar to C++ lib output. Previous
  Go implementation simply outputs these as decimal numbers %d.
* All parsing errors, except for unexpected EOF should now contain line
  and column number info.
* Fixed following conformance-related features:
  * Parse nan,inf,-inf,infinity,-infinity as case-insensitive.
  * Interpret float32 overflows as inf or -inf.
  * Parse large int-like number as proto float.
* Discard unknown map field if DiscardUnknown=true.
* Allow whitespaces/comments in Any type URL and extension field names per spec.
* Improves performance and memory usage. It is now as fast and efficient as
  protojson, if not better on most benchmarks.

name                                     old time/op    new time/op    delta
Text/Unmarshal/google_message1_proto2-4    14.1µs ±43%     8.7µs ±12%  -38.27%  (p=0.000 n=10+10)
Text/Unmarshal/google_message1_proto3-4    11.6µs ±18%     7.7µs ± 9%  -33.69%  (p=0.000 n=10+10)
Text/Unmarshal/google_message2-4           6.20ms ±27%    4.10ms ± 5%  -33.95%  (p=0.000 n=10+10)
Text/Marshal/google_message1_proto2-4      12.8µs ± 6%    10.3µs ±23%  -19.54%  (p=0.000 n=9+10)
Text/Marshal/google_message1_proto3-4      11.9µs ±16%     8.6µs ±10%  -27.45%  (p=0.000 n=10+10)
Text/Marshal/google_message2-4             5.59ms ± 5%    5.30ms ±22%     ~     (p=0.356 n=9+10)
JSON/Unmarshal/google_message1_proto2-4    12.3µs ±61%    13.9µs ±26%     ~     (p=0.190 n=10+10)
JSON/Unmarshal/google_message1_proto3-4    7.51µs ± 6%    7.86µs ± 1%   +4.66%  (p=0.010 n=10+9)
JSON/Unmarshal/google_message2-4           3.74ms ± 2%    3.94ms ± 2%   +5.32%  (p=0.000 n=10+10)
JSON/Marshal/google_message1_proto2-4      9.90µs ±12%    9.95µs ± 4%     ~     (p=0.315 n=9+10)
JSON/Marshal/google_message1_proto3-4      7.55µs ± 4%    7.93µs ± 3%   +4.98%  (p=0.000 n=10+10)
JSON/Marshal/google_message2-4             4.29ms ± 5%    4.49ms ± 2%   +4.53%  (p=0.001 n=10+10)

name                                     old alloc/op   new alloc/op   delta
Text/Unmarshal/google_message1_proto2-4    12.5kB ± 0%     2.0kB ± 0%  -83.87%  (p=0.000 n=10+10)
Text/Unmarshal/google_message1_proto3-4    12.2kB ± 0%     1.8kB ± 0%  -85.33%  (p=0.000 n=10+10)
Text/Unmarshal/google_message2-4           5.35MB ± 0%    0.89MB ± 0%  -83.28%  (p=0.000 n=10+9)
Text/Marshal/google_message1_proto2-4      12.0kB ± 0%     1.4kB ± 0%  -88.15%  (p=0.000 n=10+10)
Text/Marshal/google_message1_proto3-4      12.4kB ± 0%     1.9kB ± 0%  -84.91%  (p=0.000 n=10+10)
Text/Marshal/google_message2-4             5.64MB ± 0%    1.02MB ± 0%  -81.85%  (p=0.000 n=10+9)
JSON/Unmarshal/google_message1_proto2-4    2.29kB ± 0%    2.29kB ± 0%     ~     (all equal)
JSON/Unmarshal/google_message1_proto3-4    2.08kB ± 0%    2.08kB ± 0%     ~     (all equal)
JSON/Unmarshal/google_message2-4            899kB ± 0%     899kB ± 0%     ~     (p=1.000 n=10+10)
JSON/Marshal/google_message1_proto2-4      1.46kB ± 0%    1.46kB ± 0%     ~     (all equal)
JSON/Marshal/google_message1_proto3-4      1.36kB ± 0%    1.36kB ± 0%     ~     (all equal)
JSON/Marshal/google_message2-4             1.19MB ± 0%    1.19MB ± 0%     ~     (p=0.197 n=10+10)

name                                     old allocs/op  new allocs/op  delta
Text/Unmarshal/google_message1_proto2-4       133 ± 0%        89 ± 0%  -33.08%  (p=0.000 n=10+10)
Text/Unmarshal/google_message1_proto3-4       108 ± 0%        67 ± 0%  -37.96%  (p=0.000 n=10+10)
Text/Unmarshal/google_message2-4            60.0k ± 0%     38.7k ± 0%  -35.52%  (p=0.000 n=10+10)
Text/Marshal/google_message1_proto2-4        65.0 ± 0%      25.0 ± 0%  -61.54%  (p=0.000 n=10+10)
Text/Marshal/google_message1_proto3-4        59.0 ± 0%      22.0 ± 0%  -62.71%  (p=0.000 n=10+10)
Text/Marshal/google_message2-4              27.4k ± 0%      7.3k ± 0%  -73.39%  (p=0.000 n=10+10)
JSON/Unmarshal/google_message1_proto2-4      95.0 ± 0%      95.0 ± 0%     ~     (all equal)
JSON/Unmarshal/google_message1_proto3-4      74.0 ± 0%      74.0 ± 0%     ~     (all equal)
JSON/Unmarshal/google_message2-4            36.3k ± 0%     36.3k ± 0%     ~     (all equal)
JSON/Marshal/google_message1_proto2-4        27.0 ± 0%      27.0 ± 0%     ~     (all equal)
JSON/Marshal/google_message1_proto3-4        30.0 ± 0%      30.0 ± 0%     ~     (all equal)
JSON/Marshal/google_message2-4              11.3k ± 0%     11.3k ± 0%     ~     (p=1.000 n=10+10)

Change-Id: I377925facde5535f06333b6f25e9c9b358dc062f
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/204602
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
This commit is contained in:
Herbie Ong 2019-10-31 17:10:15 -07:00
parent 3c5fb5f879
commit 9b3d97c473
18 changed files with 4933 additions and 2637 deletions

View File

@ -6,6 +6,7 @@ package prototext
import (
"fmt"
"io"
"strings"
"unicode/utf8"
@ -55,53 +56,101 @@ type UnmarshalOptions struct {
func (o UnmarshalOptions) Unmarshal(b []byte, m proto.Message) error {
proto.Reset(m)
// Parse into text.Value of message type.
val, err := text.Unmarshal(b)
if err != nil {
return err
}
if o.Resolver == nil {
o.Resolver = protoregistry.GlobalTypes
}
err = o.unmarshalMessage(val.Message(), m.ProtoReflect())
if err != nil {
dec := decoder{text.NewDecoder(b), o}
if err := dec.unmarshalMessage(m.ProtoReflect(), false); err != nil {
return err
}
if o.AllowPartial {
return nil
}
return proto.IsInitialized(m)
}
// unmarshalMessage unmarshals a [][2]text.Value message into the given protoreflect.Message.
func (o UnmarshalOptions) unmarshalMessage(tmsg [][2]text.Value, m pref.Message) error {
type decoder struct {
*text.Decoder
opts UnmarshalOptions
}
// newError returns an error object with position info.
func (d decoder) newError(pos int, f string, x ...interface{}) error {
line, column := d.Position(pos)
head := fmt.Sprintf("(line %d:%d): ", line, column)
return errors.New(head+f, x...)
}
// unexpectedTokenError returns a syntax error for the given unexpected token.
func (d decoder) unexpectedTokenError(tok text.Token) error {
return d.syntaxError(tok.Pos(), "unexpected token: %s", tok.RawString())
}
// syntaxError returns a syntax error for given position.
func (d decoder) syntaxError(pos int, f string, x ...interface{}) error {
line, column := d.Position(pos)
head := fmt.Sprintf("syntax error (line %d:%d): ", line, column)
return errors.New(head+f, x...)
}
// unmarshalMessage unmarshals into the given protoreflect.Message.
func (d decoder) unmarshalMessage(m pref.Message, checkDelims bool) error {
messageDesc := m.Descriptor()
if !flags.ProtoLegacy && messageset.IsMessageSet(messageDesc) {
return errors.New("no support for proto1 MessageSets")
}
// Handle expanded Any message.
if messageDesc.FullName() == "google.protobuf.Any" && isExpandedAny(tmsg) {
return o.unmarshalAny(tmsg[0], m)
if messageDesc.FullName() == "google.protobuf.Any" {
return d.unmarshalAny(m, checkDelims)
}
if checkDelims {
tok, err := d.Read()
if err != nil {
return err
}
if tok.Kind() != text.MessageOpen {
return d.unexpectedTokenError(tok)
}
}
var seenNums set.Ints
var seenOneofs set.Ints
fieldDescs := messageDesc.Fields()
for _, tfield := range tmsg {
tkey := tfield[0]
tval := tfield[1]
for {
// Read field name.
tok, err := d.Read()
if err != nil {
return err
}
switch typ := tok.Kind(); typ {
case text.Name:
// Continue below.
case text.EOF:
if checkDelims {
return io.ErrUnexpectedEOF
}
return nil
default:
if checkDelims && typ == text.MessageClose {
return nil
}
return d.unexpectedTokenError(tok)
}
// Resolve the field descriptor.
var name pref.Name
var fd pref.FieldDescriptor
var xt pref.ExtensionType
var xtErr error
switch tkey.Type() {
case text.Name:
name, _ = tkey.Name()
var isFieldNumberName bool
switch tok.NameKind() {
case text.IdentName:
name = pref.Name(tok.IdentName())
fd = fieldDescs.ByName(name)
if fd == nil {
// The proto name of a group field is in all lowercase,
@ -113,30 +162,30 @@ func (o UnmarshalOptions) unmarshalMessage(tmsg [][2]text.Value, m pref.Message)
} else if fd.Kind() == pref.GroupKind && fd.Message().Name() != name {
fd = nil // reset since field name is actually the message name
}
case text.String:
case text.TypeName:
// Handle extensions only. This code path is not for Any.
if messageDesc.FullName() == "google.protobuf.Any" {
break
}
xt, xtErr = o.findExtension(pref.FullName(tkey.String()))
case text.Uint:
v, _ := tkey.Uint(false)
num := pref.FieldNumber(v)
xt, xtErr = d.findExtension(pref.FullName(tok.TypeName()))
case text.FieldNumber:
isFieldNumberName = true
num := pref.FieldNumber(tok.FieldNumber())
if !num.IsValid() {
return errors.New("invalid field number: %d", num)
return d.newError(tok.Pos(), "invalid field number: %d", num)
}
fd = fieldDescs.ByNumber(num)
if fd == nil {
xt, xtErr = o.Resolver.FindExtensionByNumber(messageDesc.FullName(), num)
xt, xtErr = d.opts.Resolver.FindExtensionByNumber(messageDesc.FullName(), num)
}
}
if xt != nil {
fd = xt.TypeDescriptor()
if !messageDesc.ExtensionRanges().Has(fd.Number()) || fd.ContainingMessage().FullName() != messageDesc.FullName() {
return errors.New("message %v cannot be extended by %v", messageDesc.FullName(), fd.FullName())
return d.newError(tok.Pos(), "message %v cannot be extended by %v", messageDesc.FullName(), fd.FullName())
}
} else if xtErr != nil && xtErr != protoregistry.NotFound {
return errors.New("unable to resolve: %v", xtErr)
return d.newError(tok.Pos(), "unable to resolve [%s]: %v", tok.RawString(), xtErr)
}
if flags.ProtoLegacy {
if fd != nil && fd.IsWeak() && fd.Message().IsPlaceholder() {
@ -146,67 +195,64 @@ func (o UnmarshalOptions) unmarshalMessage(tmsg [][2]text.Value, m pref.Message)
// Handle unknown fields.
if fd == nil {
if o.DiscardUnknown || messageDesc.ReservedNames().Has(name) {
if d.opts.DiscardUnknown || messageDesc.ReservedNames().Has(name) {
d.skipValue()
continue
}
return errors.New("%v contains unknown field: %v", messageDesc.FullName(), tkey)
return d.newError(tok.Pos(), "unknown field: %v", tok.RawString())
}
// Handle fields identified by field number.
if tkey.Type() == text.Uint {
if isFieldNumberName {
// TODO: Add an option to permit parsing field numbers.
//
// This requires careful thought as the MarshalOptions.EmitUnknown
// option allows formatting unknown fields as the field number
// and the best-effort textual representation of the field value.
// In that case, it may not be possible to unmarshal the value from
// a parser that does have information about the unknown field.
return errors.New("cannot specify field by number: %v", tkey)
// option allows formatting unknown fields as the field number and the
// best-effort textual representation of the field value. In that case,
// it may not be possible to unmarshal the value from a parser that does
// have information about the unknown field.
return d.newError(tok.Pos(), "cannot specify field by number: %v", tok.RawString())
}
switch {
case fd.IsList():
// If input is not a list, turn it into a list.
var items []text.Value
if tval.Type() != text.List {
items = []text.Value{tval}
} else {
items = tval.List()
kind := fd.Kind()
if kind != pref.MessageKind && kind != pref.GroupKind && !tok.HasSeparator() {
return d.syntaxError(tok.Pos(), "missing field separator :")
}
list := m.Mutable(fd).List()
if err := o.unmarshalList(items, fd, list); err != nil {
if err := d.unmarshalList(fd, list); err != nil {
return err
}
case fd.IsMap():
// If input is not a list, turn it into a list.
var items []text.Value
if tval.Type() != text.List {
items = []text.Value{tval}
} else {
items = tval.List()
}
case fd.IsMap():
mmap := m.Mutable(fd).Map()
if err := o.unmarshalMap(items, fd, mmap); err != nil {
if err := d.unmarshalMap(fd, mmap); err != nil {
return err
}
default:
kind := fd.Kind()
if kind != pref.MessageKind && kind != pref.GroupKind && !tok.HasSeparator() {
return d.syntaxError(tok.Pos(), "missing field separator :")
}
// If field is a oneof, check if it has already been set.
if od := fd.ContainingOneof(); od != nil {
idx := uint64(od.Index())
if seenOneofs.Has(idx) {
return errors.New("oneof %v is already set", od.FullName())
return d.newError(tok.Pos(), "error parsing %q, oneof %v is already set", tok.RawString(), od.FullName())
}
seenOneofs.Set(idx)
}
// Required or optional fields.
num := uint64(fd.Number())
if seenNums.Has(num) {
return errors.New("non-repeated field %v is repeated", fd.FullName())
return d.newError(tok.Pos(), "non-repeated field %q is repeated", tok.RawString())
}
if err := o.unmarshalSingular(tval, fd, m); err != nil {
if err := d.unmarshalSingular(fd, m); err != nil {
return err
}
seenNums.Set(num)
@ -217,285 +263,527 @@ func (o UnmarshalOptions) unmarshalMessage(tmsg [][2]text.Value, m pref.Message)
}
// findExtension returns protoreflect.ExtensionType from the Resolver if found.
func (o UnmarshalOptions) findExtension(xtName pref.FullName) (pref.ExtensionType, error) {
xt, err := o.Resolver.FindExtensionByName(xtName)
func (d decoder) findExtension(xtName pref.FullName) (pref.ExtensionType, error) {
xt, err := d.opts.Resolver.FindExtensionByName(xtName)
if err == nil {
return xt, nil
}
return messageset.FindMessageSetExtension(o.Resolver, xtName)
return messageset.FindMessageSetExtension(d.opts.Resolver, xtName)
}
// unmarshalSingular unmarshals given text.Value into the non-repeated field.
func (o UnmarshalOptions) unmarshalSingular(input text.Value, fd pref.FieldDescriptor, m pref.Message) error {
// unmarshalSingular unmarshals a non-repeated field value specified by the
// given FieldDescriptor.
func (d decoder) unmarshalSingular(fd pref.FieldDescriptor, m pref.Message) error {
var val pref.Value
var err error
switch fd.Kind() {
case pref.MessageKind, pref.GroupKind:
if input.Type() != text.Message {
return errors.New("%v contains invalid message/group value: %v", fd.FullName(), input)
}
val = m.NewField(fd)
if err := o.unmarshalMessage(input.Message(), val.Message()); err != nil {
return err
}
err = d.unmarshalMessage(val.Message(), true)
default:
var err error
val, err = unmarshalScalar(input, fd)
if err != nil {
return err
}
val, err = d.unmarshalScalar(fd)
}
m.Set(fd, val)
return nil
if err == nil {
m.Set(fd, val)
}
return err
}
// unmarshalScalar converts the given text.Value to a scalar/enum protoreflect.Value specified in
// the given FieldDescriptor. Caller should not pass in a FieldDescriptor for a message/group kind.
func unmarshalScalar(input text.Value, fd pref.FieldDescriptor) (pref.Value, error) {
const b32 = false
const b64 = true
// unmarshalScalar unmarshals a scalar/enum protoreflect.Value specified by the
// given FieldDescriptor.
func (d decoder) unmarshalScalar(fd pref.FieldDescriptor) (pref.Value, error) {
tok, err := d.Read()
if err != nil {
return pref.Value{}, err
}
switch kind := fd.Kind(); kind {
if tok.Kind() != text.Scalar {
return pref.Value{}, d.unexpectedTokenError(tok)
}
kind := fd.Kind()
switch kind {
case pref.BoolKind:
if b, ok := input.Bool(); ok {
return pref.ValueOfBool(bool(b)), nil
if b, ok := tok.Bool(); ok {
return pref.ValueOfBool(b), nil
}
case pref.Int32Kind, pref.Sint32Kind, pref.Sfixed32Kind:
if n, ok := input.Int(b32); ok {
return pref.ValueOfInt32(int32(n)), nil
if n, ok := tok.Int32(); ok {
return pref.ValueOfInt32(n), nil
}
case pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
if n, ok := input.Int(b64); ok {
return pref.ValueOfInt64(int64(n)), nil
if n, ok := tok.Int64(); ok {
return pref.ValueOfInt64(n), nil
}
case pref.Uint32Kind, pref.Fixed32Kind:
if n, ok := input.Uint(b32); ok {
return pref.ValueOfUint32(uint32(n)), nil
if n, ok := tok.Uint32(); ok {
return pref.ValueOfUint32(n), nil
}
case pref.Uint64Kind, pref.Fixed64Kind:
if n, ok := input.Uint(b64); ok {
return pref.ValueOfUint64(uint64(n)), nil
if n, ok := tok.Uint64(); ok {
return pref.ValueOfUint64(n), nil
}
case pref.FloatKind:
if n, ok := input.Float(b32); ok {
return pref.ValueOfFloat32(float32(n)), nil
if n, ok := tok.Float32(); ok {
return pref.ValueOfFloat32(n), nil
}
case pref.DoubleKind:
if n, ok := input.Float(b64); ok {
return pref.ValueOfFloat64(float64(n)), nil
if n, ok := tok.Float64(); ok {
return pref.ValueOfFloat64(n), nil
}
case pref.StringKind:
if input.Type() == text.String {
s := input.String()
if s, ok := tok.String(); ok {
if utf8.ValidString(s) {
return pref.ValueOfString(s), nil
}
return pref.Value{}, errors.InvalidUTF8(string(fd.FullName()))
return pref.Value{}, d.newError(tok.Pos(), "contains invalid UTF-8")
}
case pref.BytesKind:
if input.Type() == text.String {
return pref.ValueOfBytes([]byte(input.String())), nil
if b, ok := tok.String(); ok {
return pref.ValueOfBytes([]byte(b)), nil
}
case pref.EnumKind:
// If input is int32, use directly.
if n, ok := input.Int(b32); ok {
return pref.ValueOfEnum(pref.EnumNumber(n)), nil
}
if name, ok := input.Name(); ok {
if lit, ok := tok.Enum(); ok {
// Lookup EnumNumber based on name.
if enumVal := fd.Enum().Values().ByName(name); enumVal != nil {
if enumVal := fd.Enum().Values().ByName(pref.Name(lit)); enumVal != nil {
return pref.ValueOfEnum(enumVal.Number()), nil
}
}
if num, ok := tok.Int32(); ok {
return pref.ValueOfEnum(pref.EnumNumber(num)), nil
}
default:
panic(fmt.Sprintf("invalid scalar kind %v", kind))
}
return pref.Value{}, errors.New("%v contains invalid scalar value: %v", fd.FullName(), input)
return pref.Value{}, d.newError(tok.Pos(), "invalid value for %v type: %v", kind, tok.RawString())
}
// unmarshalList unmarshals given []text.Value into given protoreflect.List.
func (o UnmarshalOptions) unmarshalList(inputList []text.Value, fd pref.FieldDescriptor, list pref.List) error {
// unmarshalList unmarshals into given protoreflect.List. A list value can
// either be in [] syntax or simply just a single scalar/message value.
func (d decoder) unmarshalList(fd pref.FieldDescriptor, list pref.List) error {
tok, err := d.Peek()
if err != nil {
return err
}
switch fd.Kind() {
case pref.MessageKind, pref.GroupKind:
for _, input := range inputList {
if input.Type() != text.Message {
return errors.New("%v contains invalid message/group value: %v", fd.FullName(), input)
switch tok.Kind() {
case text.ListOpen:
d.Read()
for {
tok, err := d.Peek()
if err != nil {
return err
}
switch tok.Kind() {
case text.ListClose:
d.Read()
return nil
case text.MessageOpen:
pval := list.NewElement()
if err := d.unmarshalMessage(pval.Message(), true); err != nil {
return err
}
list.Append(pval)
default:
return d.unexpectedTokenError(tok)
}
}
val := list.NewElement()
if err := o.unmarshalMessage(input.Message(), val.Message()); err != nil {
case text.MessageOpen:
pval := list.NewElement()
if err := d.unmarshalMessage(pval.Message(), true); err != nil {
return err
}
list.Append(val)
list.Append(pval)
return nil
}
default:
for _, input := range inputList {
val, err := unmarshalScalar(input, fd)
switch tok.Kind() {
case text.ListOpen:
d.Read()
for {
tok, err := d.Peek()
if err != nil {
return err
}
switch tok.Kind() {
case text.ListClose:
d.Read()
return nil
case text.Scalar:
pval, err := d.unmarshalScalar(fd)
if err != nil {
return err
}
list.Append(pval)
default:
return d.unexpectedTokenError(tok)
}
}
case text.Scalar:
pval, err := d.unmarshalScalar(fd)
if err != nil {
return err
}
list.Append(val)
list.Append(pval)
return nil
}
}
return nil
return d.unexpectedTokenError(tok)
}
// unmarshalMap unmarshals given []text.Value into given protoreflect.Map.
func (o UnmarshalOptions) unmarshalMap(input []text.Value, fd pref.FieldDescriptor, mmap pref.Map) error {
// Determine ahead whether map entry is a scalar type or a message type in order to call the
// appropriate unmarshalMapValue func inside the for loop below.
unmarshalMapValue := unmarshalMapScalarValue
// unmarshalMap unmarshals into given protoreflect.Map. A map value is a
// textproto message containing {key: <kvalue>, value: <mvalue>}.
func (d decoder) unmarshalMap(fd pref.FieldDescriptor, mmap pref.Map) error {
// Determine ahead whether map entry is a scalar type or a message type in
// order to call the appropriate unmarshalMapValue func inside
// unmarshalMapEntry.
var unmarshalMapValue func() (pref.Value, error)
switch fd.MapValue().Kind() {
case pref.MessageKind, pref.GroupKind:
unmarshalMapValue = o.unmarshalMapMessageValue
}
for _, entry := range input {
if entry.Type() != text.Message {
return errors.New("%v contains invalid map entry: %v", fd.FullName(), entry)
unmarshalMapValue = func() (pref.Value, error) {
pval := mmap.NewValue()
if err := d.unmarshalMessage(pval.Message(), true); err != nil {
return pref.Value{}, err
}
return pval, nil
}
tkey, tval, err := parseMapEntry(entry.Message(), fd.FullName())
if err != nil {
return err
}
pkey, err := unmarshalMapKey(tkey, fd.MapKey())
if err != nil {
return err
}
err = unmarshalMapValue(tval, pkey, fd.MapValue(), mmap)
if err != nil {
return err
default:
unmarshalMapValue = func() (pref.Value, error) {
return d.unmarshalScalar(fd.MapValue())
}
}
return nil
}
tok, err := d.Read()
if err != nil {
return err
}
switch tok.Kind() {
case text.MessageOpen:
return d.unmarshalMapEntry(fd, mmap, unmarshalMapValue)
// parseMapEntry parses [][2]text.Value for field names key and value, and return corresponding
// field values. If there are duplicate field names, the value for the last field is returned. If
// the field name does not exist, it will return the zero value of text.Value. It will return an
// error if there are unknown field names.
func parseMapEntry(mapEntry [][2]text.Value, name pref.FullName) (key text.Value, value text.Value, err error) {
for _, field := range mapEntry {
keyStr, ok := field[0].Name()
if ok {
switch keyStr {
case "key":
if key.Type() != 0 {
return key, value, errors.New("%v contains duplicate key field", name)
case text.ListOpen:
for {
tok, err := d.Read()
if err != nil {
return err
}
switch tok.Kind() {
case text.ListClose:
return nil
case text.MessageOpen:
if err := d.unmarshalMapEntry(fd, mmap, unmarshalMapValue); err != nil {
return err
}
key = field[1]
case "value":
if value.Type() != 0 {
return key, value, errors.New("%v contains duplicate value field", name)
}
value = field[1]
default:
ok = false
return d.unexpectedTokenError(tok)
}
}
if !ok {
// TODO: Do not return error if ignore unknown option is added and enabled.
return key, value, errors.New("%v contains unknown map entry name: %v", name, field[0])
}
}
return key, value, nil
}
// unmarshalMapKey converts given text.Value into a protoreflect.MapKey. A map key type is any
// integral or string type.
func unmarshalMapKey(input text.Value, fd pref.FieldDescriptor) (pref.MapKey, error) {
// If input is not set, use the zero value.
if input.Type() == 0 {
return fd.Default().MapKey(), nil
}
val, err := unmarshalScalar(input, fd)
if err != nil {
return pref.MapKey{}, errors.New("%v contains invalid key: %v", fd.FullName(), input)
}
return val.MapKey(), nil
}
// unmarshalMapMessageValue unmarshals given message-type text.Value into a protoreflect.Map for
// the given MapKey.
func (o UnmarshalOptions) unmarshalMapMessageValue(input text.Value, pkey pref.MapKey, fd pref.FieldDescriptor, mmap pref.Map) error {
var value [][2]text.Value
switch input.Type() {
case 0:
case text.Message:
value = input.Message()
default:
return errors.New("%v contains invalid value: %v", fd.FullName(), input)
return d.unexpectedTokenError(tok)
}
val := mmap.NewValue()
if err := o.unmarshalMessage(value, val.Message()); err != nil {
return err
}
mmap.Set(pkey, val)
return nil
}
// unmarshalMapScalarValue unmarshals given scalar-type text.Value into a protoreflect.Map
// for the given MapKey.
func unmarshalMapScalarValue(input text.Value, pkey pref.MapKey, fd pref.FieldDescriptor, mmap pref.Map) error {
var val pref.Value
if input.Type() == 0 {
val = fd.Default()
} else {
var err error
val, err = unmarshalScalar(input, fd)
// unmarshalMap unmarshals into given protoreflect.Map. A map value is a
// textproto message containing {key: <kvalue>, value: <mvalue>}.
func (d decoder) unmarshalMapEntry(fd pref.FieldDescriptor, mmap pref.Map, unmarshalMapValue func() (pref.Value, error)) error {
var key pref.MapKey
var pval pref.Value
Loop:
for {
// Read field name.
tok, err := d.Read()
if err != nil {
return err
}
switch tok.Kind() {
case text.Name:
if tok.NameKind() != text.IdentName {
if !d.opts.DiscardUnknown {
return d.newError(tok.Pos(), "unknown map entry field %q", tok.RawString())
}
d.skipValue()
continue Loop
}
// Continue below.
case text.MessageClose:
break Loop
default:
return d.unexpectedTokenError(tok)
}
name := tok.IdentName()
switch name {
case "key":
if !tok.HasSeparator() {
return d.syntaxError(tok.Pos(), "missing field separator :")
}
if key.IsValid() {
return d.newError(tok.Pos(), `map entry "key" cannot be repeated`)
}
val, err := d.unmarshalScalar(fd.MapKey())
if err != nil {
return err
}
key = val.MapKey()
case "value":
if kind := fd.MapValue().Kind(); (kind != pref.MessageKind) && (kind != pref.GroupKind) {
if !tok.HasSeparator() {
return d.syntaxError(tok.Pos(), "missing field separator :")
}
}
if pval.IsValid() {
return d.newError(tok.Pos(), `map entry "value" cannot be repeated`)
}
pval, err = unmarshalMapValue()
if err != nil {
return err
}
default:
if !d.opts.DiscardUnknown {
return d.newError(tok.Pos(), "unknown map entry field %q", name)
}
d.skipValue()
}
}
mmap.Set(pkey, val)
if !key.IsValid() {
key = fd.MapKey().Default().MapKey()
}
if !pval.IsValid() {
switch fd.MapValue().Kind() {
case pref.MessageKind, pref.GroupKind:
// If value field is not set for message/group types, construct an
// empty one as default.
pval = mmap.NewValue()
default:
pval = fd.MapValue().Default()
}
}
mmap.Set(key, pval)
return nil
}
// isExpandedAny returns true if given [][2]text.Value may be an expanded Any that contains only one
// field with key type of text.String type and value type of text.Message.
func isExpandedAny(tmsg [][2]text.Value) bool {
if len(tmsg) != 1 {
return false
// unmarshalAny unmarshals an Any textproto. It can either be in expanded form
// or non-expanded form.
func (d decoder) unmarshalAny(m pref.Message, checkDelims bool) error {
var typeURL string
var bValue []byte
// hasFields tracks which valid fields have been seen in the loop below in
// order to flag an error if there are duplicates or conflicts. It may
// contain the strings "type_url", "value" and "expanded". The literal
// "expanded" is used to indicate that the expanded form has been
// encountered already.
hasFields := map[string]bool{}
if checkDelims {
tok, err := d.Read()
if err != nil {
return err
}
if tok.Kind() != text.MessageOpen {
return d.unexpectedTokenError(tok)
}
}
field := tmsg[0]
return field[0].Type() == text.String && field[1].Type() == text.Message
}
Loop:
for {
// Read field name. Can only have 3 possible field names, i.e. type_url,
// value and type URL name inside [].
tok, err := d.Read()
if err != nil {
return err
}
if typ := tok.Kind(); typ != text.Name {
if checkDelims {
if typ == text.MessageClose {
break Loop
}
} else if typ == text.EOF {
break Loop
}
return d.unexpectedTokenError(tok)
}
// unmarshalAny unmarshals an expanded Any textproto. This method assumes that the given
// tfield has key type of text.String and value type of text.Message.
func (o UnmarshalOptions) unmarshalAny(tfield [2]text.Value, m pref.Message) error {
typeURL := tfield[0].String()
value := tfield[1].Message()
switch tok.NameKind() {
case text.IdentName:
// Both type_url and value fields require field separator :.
if !tok.HasSeparator() {
return d.syntaxError(tok.Pos(), "missing field separator :")
}
mt, err := o.Resolver.FindMessageByURL(typeURL)
if err != nil {
return errors.New("unable to resolve message [%v]: %v", typeURL, err)
}
// Create new message for the embedded message type and unmarshal the
// value into it.
m2 := mt.New()
if err := o.unmarshalMessage(value, m2); err != nil {
return err
}
// Serialize the embedded message and assign the resulting bytes to the value field.
b, err := proto.MarshalOptions{
AllowPartial: true, // never check required fields inside an Any
Deterministic: true,
}.Marshal(m2.Interface())
if err != nil {
return err
switch tok.IdentName() {
case "type_url":
if hasFields["type_url"] {
return d.newError(tok.Pos(), "duplicate Any type_url field")
}
if hasFields["expanded"] {
return d.newError(tok.Pos(), "conflict with [%s] field", typeURL)
}
tok, err := d.Read()
if err != nil {
return err
}
var ok bool
typeURL, ok = tok.String()
if !ok {
return d.newError(tok.Pos(), "invalid Any type_url: %v", tok.RawString())
}
hasFields["type_url"] = true
case "value":
if hasFields["value"] {
return d.newError(tok.Pos(), "duplicate Any value field")
}
if hasFields["expanded"] {
return d.newError(tok.Pos(), "conflict with [%s] field", typeURL)
}
tok, err := d.Read()
if err != nil {
return err
}
s, ok := tok.String()
if !ok {
return d.newError(tok.Pos(), "invalid Any value: %v", tok.RawString())
}
bValue = []byte(s)
hasFields["value"] = true
default:
if !d.opts.DiscardUnknown {
return d.newError(tok.Pos(), "invalid field name %q in google.protobuf.Any message", tok.RawString())
}
}
case text.TypeName:
if hasFields["expanded"] {
return d.newError(tok.Pos(), "cannot have more than one type")
}
if hasFields["type_url"] {
return d.newError(tok.Pos(), "conflict with type_url field")
}
typeURL = tok.TypeName()
var err error
bValue, err = d.unmarshalExpandedAny(typeURL, tok.Pos())
if err != nil {
return err
}
hasFields["expanded"] = true
default:
if !d.opts.DiscardUnknown {
return d.newError(tok.Pos(), "invalid field name %q in google.protobuf.Any message", tok.RawString())
}
}
}
fds := m.Descriptor().Fields()
fdType := fds.ByNumber(fieldnum.Any_TypeUrl)
fdValue := fds.ByNumber(fieldnum.Any_Value)
m.Set(fdType, pref.ValueOfString(typeURL))
m.Set(fdValue, pref.ValueOfBytes(b))
if len(typeURL) > 0 {
m.Set(fds.ByNumber(fieldnum.Any_TypeUrl), pref.ValueOfString(typeURL))
}
if len(bValue) > 0 {
m.Set(fds.ByNumber(fieldnum.Any_Value), pref.ValueOfBytes(bValue))
}
return nil
}
func (d decoder) unmarshalExpandedAny(typeURL string, pos int) ([]byte, error) {
mt, err := d.opts.Resolver.FindMessageByURL(typeURL)
if err != nil {
return nil, d.newError(pos, "unable to resolve message [%v]: %v", typeURL, err)
}
// Create new message for the embedded message type and unmarshal the value
// field into it.
m := mt.New()
if err := d.unmarshalMessage(m, true); err != nil {
return nil, err
}
// Serialize the embedded message and return the resulting bytes.
b, err := proto.MarshalOptions{
AllowPartial: true, // Never check required fields inside an Any.
Deterministic: true,
}.Marshal(m.Interface())
if err != nil {
return nil, d.newError(pos, "error in marshaling message into Any.value: %v", err)
}
return b, nil
}
// skipValue makes the decoder parse a field value in order to advance the read
// to the next field. It relies on Read returning an error if the types are not
// in valid sequence.
func (d decoder) skipValue() error {
tok, err := d.Read()
if err != nil {
return err
}
// Only need to continue reading for messages and lists.
switch tok.Kind() {
case text.MessageOpen:
return d.skipMessageValue()
case text.ListOpen:
for {
tok, err := d.Read()
if err != nil {
return err
}
switch tok.Kind() {
case text.ListClose:
return nil
case text.MessageOpen:
return d.skipMessageValue()
default:
// Skip items. This will not validate whether skipped values are
// of the same type or not, same behavior as C++
// TextFormat::Parser::AllowUnknownField(true) version 3.8.0.
if err := d.skipValue(); err != nil {
return err
}
}
}
}
return nil
}
// skipMessageValue makes the decoder parse and skip over all fields in a
// message. It assumes that the previous read type is MessageOpen.
func (d decoder) skipMessageValue() error {
for {
tok, err := d.Read()
if err != nil {
return err
}
switch tok.Kind() {
case text.MessageClose:
return nil
case text.Name:
if err := d.skipValue(); err != nil {
return err
}
}
}
}

View File

@ -6,6 +6,7 @@ package prototext_test
import (
"math"
"strings"
"testing"
"google.golang.org/protobuf/encoding/prototext"
@ -27,7 +28,7 @@ func TestUnmarshal(t *testing.T) {
inputMessage proto.Message
inputText string
wantMessage proto.Message
wantErr bool // TODO: Verify error message content.
wantErr string // Expected error substring.
skip bool
}{{
desc: "proto2 empty message",
@ -125,7 +126,7 @@ opt_string: "谷歌"
desc: "case sensitive",
inputMessage: &pb3.Scalars{},
inputText: `S_BOOL: true`,
wantErr: true,
wantErr: "unknown field: S_BOOL",
}, {
desc: "proto3 scalars",
inputMessage: &pb3.Scalars{},
@ -162,17 +163,17 @@ s_string: "谷歌"
desc: "string with invalid UTF-8",
inputMessage: &pb3.Scalars{},
inputText: `s_string: "abc\xff"`,
wantErr: true,
wantErr: "(line 1:11): contains invalid UTF-8",
}, {
desc: "proto2 message contains unknown field",
inputMessage: &pb2.Scalars{},
inputText: "unknown_field: 123",
wantErr: true,
wantErr: "unknown field",
}, {
desc: "proto3 message contains unknown field",
inputMessage: &pb3.Scalars{},
inputText: "unknown_field: 456",
wantErr: true,
wantErr: "unknown field",
}, {
desc: "proto2 message contains discarded unknown field",
umo: prototext.UnmarshalOptions{DiscardUnknown: true},
@ -188,111 +189,104 @@ s_string: "谷歌"
umo: prototext.UnmarshalOptions{DiscardUnknown: true},
inputMessage: &pb2.Scalars{},
inputText: `13:"hello"`,
wantErr: true,
wantErr: "cannot specify field by number",
}, {
desc: "proto3 message cannot parse field number",
umo: prototext.UnmarshalOptions{DiscardUnknown: true},
inputMessage: &pb3.Scalars{},
inputText: `13:"goodbye"`,
wantErr: true,
wantErr: "cannot specify field by number",
}, {
desc: "proto2 numeric key field",
inputMessage: &pb2.Scalars{},
inputText: "1: true",
wantErr: true,
wantErr: "cannot specify field by number",
}, {
desc: "proto3 numeric key field",
inputMessage: &pb3.Scalars{},
inputText: "1: true",
wantErr: true,
wantErr: "cannot specify field by number",
}, {
desc: "invalid bool value",
inputMessage: &pb3.Scalars{},
inputText: "s_bool: 123",
wantErr: true,
wantErr: "invalid value for bool",
}, {
desc: "invalid int32 value",
inputMessage: &pb3.Scalars{},
inputText: "s_int32: not_a_num",
wantErr: true,
wantErr: "invalid value for int32",
}, {
desc: "invalid int64 value",
inputMessage: &pb3.Scalars{},
inputText: "s_int64: 'not a num either'",
wantErr: true,
wantErr: "invalid value for int64",
}, {
desc: "invalid uint32 value",
inputMessage: &pb3.Scalars{},
inputText: "s_fixed32: -42",
wantErr: true,
wantErr: "invalid value for fixed32",
}, {
desc: "invalid uint64 value",
inputMessage: &pb3.Scalars{},
inputText: "s_uint64: -47",
wantErr: true,
wantErr: "invalid value for uint64",
}, {
desc: "invalid sint32 value",
inputMessage: &pb3.Scalars{},
inputText: "s_sint32: '42'",
wantErr: true,
wantErr: "invalid value for sint32",
}, {
desc: "invalid sint64 value",
inputMessage: &pb3.Scalars{},
inputText: "s_sint64: '-47'",
wantErr: true,
wantErr: "invalid value for sint64",
}, {
desc: "invalid fixed32 value",
inputMessage: &pb3.Scalars{},
inputText: "s_fixed32: -42",
wantErr: true,
wantErr: "invalid value for fixed32",
}, {
desc: "invalid fixed64 value",
inputMessage: &pb3.Scalars{},
inputText: "s_fixed64: -42",
wantErr: true,
wantErr: "invalid value for fixed64",
}, {
desc: "invalid sfixed32 value",
inputMessage: &pb3.Scalars{},
inputText: "s_sfixed32: 'not valid'",
wantErr: true,
wantErr: "invalid value for sfixed32",
}, {
desc: "invalid sfixed64 value",
inputMessage: &pb3.Scalars{},
inputText: "s_sfixed64: bad",
wantErr: true,
wantErr: "invalid value for sfixed64",
}, {
desc: "float positive infinity",
inputMessage: &pb3.Scalars{},
inputText: "s_float: inf",
wantMessage: &pb3.Scalars{
SFloat: float32(math.Inf(1)),
desc: "conformance: FloatFieldMaxValue",
inputMessage: &pb2.Scalars{},
inputText: `opt_float: 3.4028235e+38`,
wantMessage: &pb2.Scalars{
OptFloat: proto.Float32(3.40282347e+38),
},
}, {
desc: "float negative infinity",
inputMessage: &pb3.Scalars{},
inputText: "s_float: -inf",
wantMessage: &pb3.Scalars{
SFloat: float32(math.Inf(-1)),
desc: "conformance: FloatFieldLargerThanUint64",
inputMessage: &pb2.Scalars{},
inputText: `opt_float: 18446744073709551616`,
wantMessage: &pb2.Scalars{
OptFloat: proto.Float32(1.84467441e+19),
},
}, {
desc: "double positive infinity",
inputMessage: &pb3.Scalars{},
inputText: "s_double: inf",
wantMessage: &pb3.Scalars{
SDouble: math.Inf(1),
},
}, {
desc: "double negative infinity",
inputMessage: &pb3.Scalars{},
inputText: "s_double: -inf",
wantMessage: &pb3.Scalars{
SDouble: math.Inf(-1),
desc: "conformance: FloatFieldTooLarge",
inputMessage: &pb2.Scalars{},
inputText: `opt_float: 3.4028235e+39`,
wantMessage: &pb2.Scalars{
OptFloat: proto.Float32(float32(math.Inf(1))),
},
}, {
desc: "invalid string value",
inputMessage: &pb3.Scalars{},
inputText: "s_string: invalid_string",
wantErr: true,
wantErr: "invalid value for string type",
}, {
desc: "proto2 bytes set to empty string",
inputMessage: &pb2.Scalars{},
@ -312,7 +306,7 @@ s_string: "谷歌"
opt_bool: true
opt_bool: false
`,
wantErr: true,
wantErr: `(line 3:1): non-repeated field "opt_bool" is repeated`,
}, {
desc: "proto2 more duplicate singular field",
inputMessage: &pb2.Scalars{},
@ -321,14 +315,14 @@ opt_bool: true
opt_string: "hello"
opt_bool: false
`,
wantErr: true,
wantErr: `(line 4:1): non-repeated field "opt_bool" is repeated`,
}, {
desc: "proto2 invalid singular field",
inputMessage: &pb2.Scalars{},
inputText: `
opt_bool: [true, false]
`,
wantErr: true,
wantErr: "(line 2:11): unexpected token: [",
}, {
desc: "proto3 duplicate singular field",
inputMessage: &pb3.Scalars{},
@ -336,7 +330,7 @@ opt_bool: [true, false]
s_bool: false
s_bool: true
`,
wantErr: true,
wantErr: `non-repeated field "s_bool" is repeated`,
}, {
desc: "proto3 more duplicate singular field",
inputMessage: &pb3.Scalars{},
@ -345,7 +339,7 @@ s_bool: false
s_string: ""
s_bool: true
`,
wantErr: true,
wantErr: `non-repeated field "s_bool" is repeated`,
}, {
desc: "proto2 enum",
inputMessage: &pb2.Enums{},
@ -386,7 +380,7 @@ opt_nested_enum: -101
opt_enum: UNNAMED
opt_nested_enum: UNNAMED_TOO
`,
wantErr: true,
wantErr: "invalid value for enum type: UNNAMED",
}, {
desc: "proto3 enum name value",
inputMessage: &pb3.Enums{},
@ -426,6 +420,17 @@ s_nested_enum: -0x80000000
inputText: `
opt_nested: {}
OptGroup: {}
`,
wantMessage: &pb2.Nests{
OptNested: &pb2.Nested{},
Optgroup: &pb2.Nests_OptGroup{},
},
}, {
desc: "message fields with no field separator",
inputMessage: &pb2.Nests{},
inputText: `
opt_nested {}
OptGroup {}
`,
wantMessage: &pb2.Nests{
OptNested: &pb2.Nested{},
@ -435,7 +440,7 @@ OptGroup: {}
desc: "group field name",
inputMessage: &pb2.Nests{},
inputText: `optgroup: {}`,
wantErr: true,
wantErr: "unknown field: optgroup",
}, {
desc: "proto2 nested messages",
inputMessage: &pb2.Nests{},
@ -488,7 +493,7 @@ s_nested: {
s_string: "abc\xff"
}
`,
wantErr: true,
wantErr: "contains invalid UTF-8",
}, {
desc: "oneof set to empty string",
inputMessage: &pb3.Oneofs{},
@ -545,7 +550,7 @@ oneof_nested: {
oneof_enum: ZERO
oneof_string: "hello"
`,
wantErr: true,
wantErr: `error parsing "oneof_string", oneof pb3.Oneofs.union is already set`,
}, {
desc: "repeated scalar using same field name",
inputMessage: &pb2.Repeats{},
@ -580,7 +585,7 @@ rpt_string: "b"
desc: "repeated contains invalid UTF-8",
inputMessage: &pb2.Repeats{},
inputText: `rpt_string: "abc\xff"`,
wantErr: true,
wantErr: "contains invalid UTF-8",
}, {
desc: "repeated enums",
inputMessage: &pb2.Enums{},
@ -642,6 +647,75 @@ RptGroup: {}
{},
},
},
}, {
desc: "repeated message fields without field separator",
inputMessage: &pb2.Nests{},
inputText: `
rpt_nested {
opt_string: "repeat nested one"
}
rpt_nested: [
{
opt_string: "repeat nested two"
},
{}
]
`,
wantMessage: &pb2.Nests{
RptNested: []*pb2.Nested{
{
OptString: proto.String("repeat nested one"),
},
{
OptString: proto.String("repeat nested two"),
},
{},
},
},
}, {
desc: "bools",
inputMessage: &pb2.Repeats{},
inputText: `
rpt_bool: [ True, true, t, 1, False, false, f, 0 ]
`,
wantMessage: &pb2.Repeats{
RptBool: []bool{true, true, true, true, false, false, false, false},
},
}, {
desc: "special floats and doubles",
inputMessage: &pb2.Repeats{},
inputText: `
rpt_float: [ inf, Inf, infinity, InFiniTy, -inf, -inF, -infinitY, -InfinitY, nan, NaN, Nan ],
rpt_double: [ inf, Inf, infinity, InFiniTy, -inf, -inF, -infinitY, -InfinitY, nan, NaN, Nan ],
`,
wantMessage: &pb2.Repeats{
RptFloat: []float32{
float32(math.Inf(1)),
float32(math.Inf(1)),
float32(math.Inf(1)),
float32(math.Inf(1)),
float32(math.Inf(-1)),
float32(math.Inf(-1)),
float32(math.Inf(-1)),
float32(math.Inf(-1)),
float32(math.NaN()),
float32(math.NaN()),
float32(math.NaN()),
},
RptDouble: []float64{
math.Inf(1),
math.Inf(1),
math.Inf(1),
math.Inf(1),
math.Inf(-1),
math.Inf(-1),
math.Inf(-1),
math.Inf(-1),
math.NaN(),
math.NaN(),
math.NaN(),
},
},
}, {
desc: "map fields 1",
inputMessage: &pb3.Maps{},
@ -650,7 +724,7 @@ int32_to_str: {
key: -101
value: "-101"
}
int32_to_str: {
int32_to_str {
key: 0
value: "zero"
}
@ -662,7 +736,7 @@ int32_to_str: {
key: 255
value: "0xff"
}
bool_to_uint32: {
bool_to_uint32 {
key: true
value: 42
}
@ -708,7 +782,7 @@ uint64_to_enum: {
inputText: `
str_to_nested: {
key: "nested_one"
value: {
value {
s_string: "nested in a map"
}
}
@ -783,7 +857,7 @@ int32_to_str: {
value: "cero"
}
`,
wantErr: true,
wantErr: `map entry "key" cannot be repeated`,
}, {
desc: "map contains duplicate value fields",
inputMessage: &pb3.Maps{},
@ -794,7 +868,7 @@ int32_to_str: {
value: "uno"
}
`,
wantErr: true,
wantErr: `map entry "value" cannot be repeated`,
}, {
desc: "map contains missing key",
inputMessage: &pb3.Maps{},
@ -899,7 +973,7 @@ int32_to_str: {}
value: "abc\xff"
}
`,
wantErr: true,
wantErr: "contains invalid UTF-8",
}, {
desc: "map field key contains invalid UTF-8",
inputMessage: &pb3.Maps{},
@ -908,7 +982,7 @@ int32_to_str: {}
value: {}
}
`,
wantErr: true,
wantErr: "contains invalid UTF-8",
}, {
desc: "map contains unknown field",
inputMessage: &pb3.Maps{},
@ -919,7 +993,7 @@ int32_to_str: {
unknown: "bad"
}
`,
wantErr: true,
wantErr: `(line 5:3): unknown map entry field "unknown"`,
}, {
desc: "map contains extension-like key field",
inputMessage: &pb3.Maps{},
@ -929,7 +1003,7 @@ int32_to_str: {
value: "ten"
}
`,
wantErr: true,
wantErr: `unknown map entry field "[key]"`,
}, {
desc: "map contains invalid key",
inputMessage: &pb3.Maps{},
@ -939,7 +1013,7 @@ int32_to_str: {
value: "cero"
}
`,
wantErr: true,
wantErr: "(line 3:8): invalid value for int32 type",
}, {
desc: "map contains invalid value",
inputMessage: &pb3.Maps{},
@ -949,7 +1023,7 @@ int32_to_str: {
value: 101
}
`,
wantErr: true,
wantErr: "(line 4:10): invalid value for string type",
}, {
desc: "map contains invalid message value",
inputMessage: &pb3.Maps{},
@ -959,7 +1033,7 @@ str_to_nested: {
value: 1
}
`,
wantErr: true,
wantErr: "syntax error (line 4:10): unexpected token: 1",
}, {
desc: "map using mix of [] and repeated",
inputMessage: &pb3.Maps{},
@ -996,7 +1070,7 @@ int32_to_str: {
}, {
desc: "required fields not set",
inputMessage: &pb2.Requireds{},
wantErr: true,
wantErr: "required field",
}, {
desc: "required field set",
inputMessage: &pb2.PartialRequired{},
@ -1019,7 +1093,7 @@ req_enum: ONE
ReqString: proto.String("hello"),
ReqEnum: pb2.Enum_ONE.Enum(),
},
wantErr: true,
wantErr: "required field",
}, {
desc: "required fields partially set with AllowPartial",
umo: prototext.UnmarshalOptions{AllowPartial: true},
@ -1062,7 +1136,7 @@ req_nested: {}
wantMessage: &pb2.IndirectRequired{
OptNested: &pb2.NestedWithRequired{},
},
wantErr: true,
wantErr: "required field",
}, {
desc: "indirect required field with AllowPartial",
umo: prototext.UnmarshalOptions{AllowPartial: true},
@ -1088,7 +1162,7 @@ rpt_nested: {}
{},
},
},
wantErr: true,
wantErr: "required field",
}, {
desc: "indirect required field in repeated with AllowPartial",
umo: prototext.UnmarshalOptions{AllowPartial: true},
@ -1129,7 +1203,7 @@ str_to_nested: {
},
},
},
wantErr: true,
wantErr: "required field",
}, {
desc: "indirect required field in map with AllowPartial",
umo: prototext.UnmarshalOptions{AllowPartial: true},
@ -1163,7 +1237,7 @@ str_to_nested: {
OneofNested: &pb2.NestedWithRequired{},
},
},
wantErr: true,
wantErr: "required field",
}, {
desc: "indirect required field in oneof with AllowPartial",
umo: prototext.UnmarshalOptions{AllowPartial: true},
@ -1217,7 +1291,7 @@ opt_int32: 42
desc: "extension field contains invalid UTF-8",
inputMessage: &pb2.Extensions{},
inputText: `[pb2.opt_ext_string]: "abc\xff"`,
wantErr: true,
wantErr: "contains invalid UTF-8",
}, {
desc: "extensions of repeated fields",
inputMessage: &pb2.Extensions{},
@ -1313,7 +1387,7 @@ opt_int32: 42
desc: "invalid extension field name",
inputMessage: &pb2.Extensions{},
inputText: "[pb2.invalid_message_field]: true",
wantErr: true,
wantErr: "unknown field",
}, {
desc: "MessageSet",
inputMessage: &pb2.MessageSet{},
@ -1366,7 +1440,7 @@ opt_int32: 42
opt_string: "not a messageset extension"
}
`,
wantErr: true,
wantErr: "unknown field: [pb2.FakeMessageSetExtension]",
skip: !flags.ProtoLegacy,
}, {
desc: "not real MessageSet 3",
@ -1474,18 +1548,18 @@ value: "some bytes"
s_string: "abc\xff"
}
`,
wantErr: true,
wantErr: "contains invalid UTF-8",
}, {
desc: "Any expanded with unregistered type",
umo: prototext.UnmarshalOptions{Resolver: new(preg.Types)},
inputMessage: &anypb.Any{},
inputText: `[SomeMessage]: {}`,
wantErr: true,
wantErr: "unable to resolve message [SomeMessage]",
}, {
desc: "Any expanded with invalid value",
inputMessage: &anypb.Any{},
inputText: `[pb2.Nested]: 123`,
wantErr: true,
wantErr: "unexpected token: 123",
}, {
desc: "Any expanded with unknown fields",
inputMessage: &anypb.Any{},
@ -1493,7 +1567,7 @@ value: "some bytes"
[pb2.Nested]: {}
unknown: ""
`,
wantErr: true,
wantErr: `invalid field name "unknown" in google.protobuf.Any message`,
}, {
desc: "Any contains expanded and unexpanded fields",
inputMessage: &anypb.Any{},
@ -1501,7 +1575,7 @@ unknown: ""
[pb2.Nested]: {}
type_url: "pb2.Nested"
`,
wantErr: true,
wantErr: "(line 3:1): conflict with [pb2.Nested] field",
}, {
desc: "weak fields",
inputMessage: &testpb.TestWeak{},
@ -1516,7 +1590,7 @@ type_url: "pb2.Nested"
desc: "weak fields; unknown field",
inputMessage: &testpb.TestWeak{},
inputText: `weak_message1:{a:1} weak_message2:{a:1}`,
wantErr: true, // weak_message2 is unknown since the package containing it is not imported
wantErr: "unknown field: weak_message2", // weak_message2 is unknown since the package containing it is not imported
skip: !flags.ProtoLegacy,
}}
@ -1527,11 +1601,17 @@ type_url: "pb2.Nested"
}
t.Run(tt.desc, func(t *testing.T) {
err := tt.umo.Unmarshal([]byte(tt.inputText), tt.inputMessage)
if err != nil && !tt.wantErr {
t.Errorf("Unmarshal() returned error: %v\n\n", err)
if err != nil {
if tt.wantErr == "" {
t.Errorf("Unmarshal() got unexpected error: %v", err)
} else if !strings.Contains(err.Error(), tt.wantErr) {
t.Errorf("Unmarshal() error got %q, want %q", err, tt.wantErr)
}
return
}
if err == nil && tt.wantErr {
t.Error("Unmarshal() got nil error, want error\n\n")
if tt.wantErr != "" {
t.Errorf("Unmarshal() got nil error, want error %q", tt.wantErr)
return
}
if tt.wantMessage != nil && !proto.Equal(tt.inputMessage, tt.wantMessage) {
t.Errorf("Unmarshal()\n<got>\n%v\n<want>\n%v\n", tt.inputMessage, tt.wantMessage)

View File

@ -7,6 +7,7 @@ package prototext
import (
"fmt"
"sort"
"strconv"
"unicode/utf8"
"google.golang.org/protobuf/internal/encoding/messageset"
@ -90,6 +91,9 @@ func (o MarshalOptions) Format(m proto.Message) string {
// MarshalOptions object. Do not depend on the output being stable. It may
// change over time across different versions of the program.
func (o MarshalOptions) Marshal(m proto.Message) ([]byte, error) {
const outputASCII = false
var delims = [2]byte{'{', '}'}
if o.Multiline && o.Indent == "" {
o.Indent = defaultIndent
}
@ -97,41 +101,52 @@ func (o MarshalOptions) Marshal(m proto.Message) ([]byte, error) {
o.Resolver = protoregistry.GlobalTypes
}
v, err := o.marshalMessage(m.ProtoReflect())
internalEnc, err := text.NewEncoder(o.Indent, delims, outputASCII)
if err != nil {
return nil, err
}
delims := [2]byte{'{', '}'}
const outputASCII = false
b, err := text.Marshal(v, o.Indent, delims, outputASCII)
enc := encoder{internalEnc, o}
err = enc.marshalMessage(m.ProtoReflect(), false)
if err != nil {
return nil, err
}
out := enc.Bytes()
if len(o.Indent) > 0 && len(out) > 0 {
out = append(out, '\n')
}
if o.AllowPartial {
return b, nil
return out, nil
}
return b, proto.IsInitialized(m)
return out, proto.IsInitialized(m)
}
// marshalMessage converts a protoreflect.Message to a text.Value.
func (o MarshalOptions) marshalMessage(m pref.Message) (text.Value, error) {
type encoder struct {
*text.Encoder
opts MarshalOptions
}
// marshalMessage marshals the given protoreflect.Message.
func (e encoder) marshalMessage(m pref.Message, inclDelims bool) error {
messageDesc := m.Descriptor()
if !flags.ProtoLegacy && messageset.IsMessageSet(messageDesc) {
return text.Value{}, errors.New("no support for proto1 MessageSets")
return errors.New("no support for proto1 MessageSets")
}
if inclDelims {
e.StartMessage()
defer e.EndMessage()
}
// Handle Any expansion.
if messageDesc.FullName() == "google.protobuf.Any" {
if msg, err := o.marshalAny(m); err == nil {
// Return as is if no error.
return msg, nil
if e.marshalAny(m) {
return nil
}
// Otherwise continue on to marshal Any as a regular message.
// If unable to expand, continue on to marshal Any as a regular message.
}
// Handle known fields.
var msgFields [][2]text.Value
// Marshal known fields.
fieldDescs := messageDesc.Fields()
size := fieldDescs.Len()
for i := 0; i < size; {
@ -142,262 +157,254 @@ func (o MarshalOptions) marshalMessage(m pref.Message) (text.Value, error) {
} else {
i++
}
if fd == nil || !m.Has(fd) {
continue
}
name := text.ValueOf(fd.Name())
name := fd.Name()
// Use type name for group field name.
if fd.Kind() == pref.GroupKind {
name = text.ValueOf(fd.Message().Name())
name = fd.Message().Name()
}
pval := m.Get(fd)
var err error
msgFields, err = o.appendField(msgFields, name, pval, fd)
if err != nil {
return text.Value{}, err
val := m.Get(fd)
if err := e.marshalField(string(name), val, fd); err != nil {
return err
}
}
// Handle extensions.
var err error
msgFields, err = o.appendExtensions(msgFields, m)
if err != nil {
return text.Value{}, err
// Marshal extensions.
if err := e.marshalExtensions(m); err != nil {
return err
}
// Handle unknown fields.
if o.EmitUnknown {
msgFields = appendUnknown(msgFields, m.GetUnknown())
// Marshal unknown fields.
if e.opts.EmitUnknown {
e.marshalUnknown(m.GetUnknown())
}
return text.ValueOf(msgFields), nil
return nil
}
// appendField marshals a protoreflect.Value and appends it to the given [][2]text.Value.
func (o MarshalOptions) appendField(msgFields [][2]text.Value, name text.Value, pval pref.Value, fd pref.FieldDescriptor) ([][2]text.Value, error) {
// marshalField marshals the given field with protoreflect.Value.
func (e encoder) marshalField(name string, val pref.Value, fd pref.FieldDescriptor) error {
switch {
case fd.IsList():
items, err := o.marshalList(pval.List(), fd)
if err != nil {
return msgFields, err
}
for _, item := range items {
msgFields = append(msgFields, [2]text.Value{name, item})
}
return e.marshalList(name, val.List(), fd)
case fd.IsMap():
items, err := o.marshalMap(pval.Map(), fd)
if err != nil {
return msgFields, err
}
for _, item := range items {
msgFields = append(msgFields, [2]text.Value{name, item})
}
return e.marshalMap(name, val.Map(), fd)
default:
tval, err := o.marshalSingular(pval, fd)
if err != nil {
return msgFields, err
}
msgFields = append(msgFields, [2]text.Value{name, tval})
e.WriteName(name)
return e.marshalSingular(val, fd)
}
return msgFields, nil
}
// marshalSingular converts a non-repeated field value to text.Value.
// This includes all scalar types, enums, messages, and groups.
func (o MarshalOptions) marshalSingular(val pref.Value, fd pref.FieldDescriptor) (text.Value, error) {
// marshalSingular marshals the given non-repeated field value. This includes
// all scalar types, enums, messages, and groups.
func (e encoder) marshalSingular(val pref.Value, fd pref.FieldDescriptor) error {
kind := fd.Kind()
switch kind {
case pref.BoolKind,
pref.Int32Kind, pref.Sint32Kind, pref.Uint32Kind,
pref.Int64Kind, pref.Sint64Kind, pref.Uint64Kind,
pref.Sfixed32Kind, pref.Fixed32Kind,
pref.Sfixed64Kind, pref.Fixed64Kind,
pref.FloatKind, pref.DoubleKind,
pref.BytesKind:
return text.ValueOf(val.Interface()), nil
case pref.BoolKind:
e.WriteBool(val.Bool())
case pref.StringKind:
s := val.String()
if !utf8.ValidString(s) {
return text.Value{}, errors.InvalidUTF8(string(fd.FullName()))
return errors.InvalidUTF8(string(fd.FullName()))
}
return text.ValueOf(s), nil
e.WriteString(s)
case pref.Int32Kind, pref.Int64Kind,
pref.Sint32Kind, pref.Sint64Kind,
pref.Sfixed32Kind, pref.Sfixed64Kind:
e.WriteInt(val.Int())
case pref.Uint32Kind, pref.Uint64Kind,
pref.Fixed32Kind, pref.Fixed64Kind:
e.WriteUint(val.Uint())
case pref.FloatKind:
// Encoder.WriteFloat handles the special numbers NaN and infinites.
e.WriteFloat(val.Float(), 32)
case pref.DoubleKind:
// Encoder.WriteFloat handles the special numbers NaN and infinites.
e.WriteFloat(val.Float(), 64)
case pref.BytesKind:
e.WriteString(string(val.Bytes()))
case pref.EnumKind:
num := val.Enum()
if desc := fd.Enum().Values().ByNumber(num); desc != nil {
return text.ValueOf(desc.Name()), nil
e.WriteLiteral(string(desc.Name()))
} else {
// Use numeric value if there is no enum description.
e.WriteInt(int64(num))
}
// Use numeric value if there is no enum description.
return text.ValueOf(int32(num)), nil
case pref.MessageKind, pref.GroupKind:
return o.marshalMessage(val.Message())
}
return e.marshalMessage(val.Message(), true)
panic(fmt.Sprintf("%v has unknown kind: %v", fd.FullName(), kind))
default:
panic(fmt.Sprintf("%v has unknown kind: %v", fd.FullName(), kind))
}
return nil
}
// marshalList converts a protoreflect.List to []text.Value.
func (o MarshalOptions) marshalList(list pref.List, fd pref.FieldDescriptor) ([]text.Value, error) {
// marshalList marshals the given protoreflect.List as multiple name-value fields.
func (e encoder) marshalList(name string, list pref.List, fd pref.FieldDescriptor) error {
size := list.Len()
values := make([]text.Value, 0, size)
for i := 0; i < size; i++ {
item := list.Get(i)
val, err := o.marshalSingular(item, fd)
if err != nil {
// Return already marshaled values.
return values, err
e.WriteName(name)
if err := e.marshalSingular(list.Get(i), fd); err != nil {
return err
}
values = append(values, val)
}
return values, nil
return nil
}
var (
mapKeyName = text.ValueOf(pref.Name("key"))
mapValueName = text.ValueOf(pref.Name("value"))
)
// marshalMap converts a protoreflect.Map to []text.Value.
func (o MarshalOptions) marshalMap(mmap pref.Map, fd pref.FieldDescriptor) ([]text.Value, error) {
// values is a list of messages.
values := make([]text.Value, 0, mmap.Len())
// marshalMap marshals the given protoreflect.Map as multiple name-value fields.
func (e encoder) marshalMap(name string, mmap pref.Map, fd pref.FieldDescriptor) error {
var err error
mapsort.Range(mmap, fd.MapKey().Kind(), func(key pref.MapKey, val pref.Value) bool {
var keyTxtVal text.Value
keyTxtVal, err = o.marshalSingular(key.Value(), fd.MapKey())
e.WriteName(name)
e.StartMessage()
defer e.EndMessage()
e.WriteName("key")
err = e.marshalSingular(key.Value(), fd.MapKey())
if err != nil {
return false
}
var valTxtVal text.Value
valTxtVal, err = o.marshalSingular(val, fd.MapValue())
e.WriteName("value")
err = e.marshalSingular(val, fd.MapValue())
if err != nil {
return false
}
// Map entry (message) contains 2 fields, first field for key and second field for value.
msg := text.ValueOf([][2]text.Value{
{mapKeyName, keyTxtVal},
{mapValueName, valTxtVal},
})
values = append(values, msg)
err = nil
return true
})
if err != nil {
return nil, err
}
return values, nil
return err
}
// appendExtensions marshals extension fields and appends them to the given [][2]text.Value.
func (o MarshalOptions) appendExtensions(msgFields [][2]text.Value, m pref.Message) ([][2]text.Value, error) {
var err error
var entries [][2]text.Value
// marshalExtensions marshals extension fields.
func (e encoder) marshalExtensions(m pref.Message) error {
type entry struct {
key string
value pref.Value
desc pref.FieldDescriptor
}
// Get a sorted list based on field key first.
var entries []entry
m.Range(func(fd pref.FieldDescriptor, v pref.Value) bool {
if !fd.IsExtension() {
return true
}
// For MessageSet extensions, the name used is the parent message.
name := fd.FullName()
if messageset.IsMessageSetExtension(fd) {
name = name.Parent()
}
// Use string type to produce [name] format.
tname := text.ValueOf(string(name))
entries, err = o.appendField(entries, tname, v, fd)
if err != nil {
return false
}
err = nil
entries = append(entries, entry{
key: string(name),
value: v,
desc: fd,
})
return true
})
if err != nil {
return msgFields, err
}
// Sort extensions lexicographically and append to output.
sort.SliceStable(entries, func(i, j int) bool {
return entries[i][0].String() < entries[j][0].String()
// Sort extensions lexicographically.
sort.Slice(entries, func(i, j int) bool {
return entries[i].key < entries[j].key
})
return append(msgFields, entries...), nil
// Write out sorted list.
for _, entry := range entries {
// Extension field name is the proto field name enclosed in [].
name := "[" + entry.key + "]"
if err := e.marshalField(name, entry.value, entry.desc); err != nil {
return err
}
}
return nil
}
// appendUnknown parses the given []byte and appends field(s) into the given fields slice.
// marshalUnknown parses the given []byte and marshals fields out.
// This function assumes proper encoding in the given []byte.
func appendUnknown(fields [][2]text.Value, b []byte) [][2]text.Value {
func (e encoder) marshalUnknown(b []byte) {
const dec = 10
const hex = 16
for len(b) > 0 {
var value interface{}
num, wtype, n := wire.ConsumeTag(b)
b = b[n:]
e.WriteName(strconv.FormatInt(int64(num), dec))
switch wtype {
case wire.VarintType:
value, n = wire.ConsumeVarint(b)
var v uint64
v, n = wire.ConsumeVarint(b)
e.WriteUint(v)
case wire.Fixed32Type:
value, n = wire.ConsumeFixed32(b)
var v uint32
v, n = wire.ConsumeFixed32(b)
e.WriteLiteral("0x" + strconv.FormatUint(uint64(v), hex))
case wire.Fixed64Type:
value, n = wire.ConsumeFixed64(b)
var v uint64
v, n = wire.ConsumeFixed64(b)
e.WriteLiteral("0x" + strconv.FormatUint(v, hex))
case wire.BytesType:
value, n = wire.ConsumeBytes(b)
var v []byte
v, n = wire.ConsumeBytes(b)
e.WriteString(string(v))
case wire.StartGroupType:
e.StartMessage()
var v []byte
v, n = wire.ConsumeGroup(num, b)
var msg [][2]text.Value
value = appendUnknown(msg, v)
e.marshalUnknown(v)
e.EndMessage()
default:
panic(fmt.Sprintf("error parsing unknown field wire type: %v", wtype))
panic(fmt.Sprintf("prototext: error parsing unknown field wire type: %v", wtype))
}
fields = append(fields, [2]text.Value{text.ValueOf(uint32(num)), text.ValueOf(value)})
b = b[n:]
}
return fields
}
// marshalAny converts a google.protobuf.Any protoreflect.Message to a text.Value.
func (o MarshalOptions) marshalAny(m pref.Message) (text.Value, error) {
fds := m.Descriptor().Fields()
// marshalAny marshals the given google.protobuf.Any message in expanded form.
// It returns true if it was able to marshal, else false.
func (e encoder) marshalAny(any pref.Message) bool {
// Construct the embedded message.
fds := any.Descriptor().Fields()
fdType := fds.ByNumber(fieldnum.Any_TypeUrl)
fdValue := fds.ByNumber(fieldnum.Any_Value)
typeURL := m.Get(fdType).String()
value := m.Get(fdValue)
emt, err := o.Resolver.FindMessageByURL(typeURL)
typeURL := any.Get(fdType).String()
mt, err := e.opts.Resolver.FindMessageByURL(typeURL)
if err != nil {
return text.Value{}, err
return false
}
em := emt.New().Interface()
m := mt.New().Interface()
// Unmarshal bytes into embedded message.
fdValue := fds.ByNumber(fieldnum.Any_Value)
value := any.Get(fdValue)
err = proto.UnmarshalOptions{
AllowPartial: true,
Resolver: o.Resolver,
}.Unmarshal(value.Bytes(), em)
Resolver: e.opts.Resolver,
}.Unmarshal(value.Bytes(), m)
if err != nil {
return text.Value{}, err
return false
}
msg, err := o.marshalMessage(em.ProtoReflect())
// Get current encoder position. If marshaling fails, reset encoder output
// back to this position.
pos := e.Snapshot()
// Field name is the proto field name enclosed in [].
e.WriteName("[" + typeURL + "]")
err = e.marshalMessage(m.ProtoReflect(), true)
if err != nil {
return text.Value{}, err
e.Reset(pos)
return false
}
// Expanded Any field value contains only a single field with the type_url field value as the
// field name in [] and a text marshaled field value of the embedded message.
msgFields := [][2]text.Value{
{
text.ValueOf(typeURL),
msg,
},
}
return text.ValueOf(msgFields), nil
return true
}

View File

@ -845,7 +845,7 @@ req_nested: {}
m.ProtoReflect().SetUnknown(pack.Message{
pack.Tag{101, pack.VarintType}, pack.Bool(true),
pack.Tag{102, pack.VarintType}, pack.Varint(0xff),
pack.Tag{103, pack.Fixed32Type}, pack.Uint32(47),
pack.Tag{103, pack.Fixed32Type}, pack.Uint32(0x47),
pack.Tag{104, pack.Fixed64Type}, pack.Int64(0xdeadbeef),
}.Marshal())
return m
@ -853,8 +853,8 @@ req_nested: {}
want: `opt_string: "this message contains unknown fields"
101: 1
102: 255
103: 47
104: 3735928559
103: 0x47
104: 0xdeadbeef
`,
}, {
desc: "unknown length-delimited",
@ -1205,6 +1205,93 @@ value: "\n\x13embedded inside Any\x12\x0b\n\tinception"
},
want: `type_url: "foo/pb2.Nested"
value: "\x80"
`,
}, {
desc: "Any expanded in another message",
input: func() *pb2.KnownTypes {
m1 := &pb2.Nested{
OptString: proto.String("message inside Any of another Any field"),
}
b1, err := proto.MarshalOptions{Deterministic: true}.Marshal(m1)
if err != nil {
t.Fatalf("error in binary marshaling message for Any.value: %v", err)
}
m2 := &anypb.Any{
TypeUrl: "pb2.Nested",
Value: b1,
}
b2, err := proto.MarshalOptions{Deterministic: true}.Marshal(m2)
if err != nil {
t.Fatalf("error in binary marshaling message for Any.value: %v", err)
}
return &pb2.KnownTypes{
OptAny: &anypb.Any{
TypeUrl: "google.protobuf.Any",
Value: b2,
},
}
}(),
want: `opt_any: {
[google.protobuf.Any]: {
[pb2.Nested]: {
opt_string: "message inside Any of another Any field"
}
}
}
`,
}, {
desc: "Any not expanded due to invalid UTF-8",
input: func() *pb2.KnownTypes {
m := &pb2.Nested{
OptString: proto.String("invalid UTF-8 abc\xff"),
}
b, err := proto.MarshalOptions{Deterministic: true}.Marshal(m)
if err != nil {
t.Fatalf("error in binary marshaling message for Any.value: %v", err)
}
return &pb2.KnownTypes{
OptAny: &anypb.Any{
TypeUrl: "pb2.Nested",
Value: b,
},
}
}(),
want: `opt_any: {
type_url: "pb2.Nested"
value: "\n\x12invalid UTF-8 abc\xff"
}
`,
}, {
desc: "Any inside Any not expanded",
input: func() *pb2.KnownTypes {
m1 := &pb2.Nested{
OptString: proto.String("invalid UTF-8 abc\xff"),
}
b1, err := proto.MarshalOptions{Deterministic: true}.Marshal(m1)
if err != nil {
t.Fatalf("error in binary marshaling message for Any.value: %v", err)
}
m2 := &anypb.Any{
TypeUrl: "pb2.Nested",
Value: b1,
}
b2, err := proto.MarshalOptions{Deterministic: true}.Marshal(m2)
if err != nil {
t.Fatalf("error in binary marshaling message for Any.value: %v", err)
}
return &pb2.KnownTypes{
OptAny: &anypb.Any{
TypeUrl: "google.protobuf.Any",
Value: b2,
},
}
}(),
want: `opt_any: {
[google.protobuf.Any]: {
type_url: "pb2.Nested"
value: "\n\x12invalid UTF-8 abc\xff"
}
}
`,
}}

View File

@ -1,9 +1 @@
Recommended.Proto3.ProtobufInput.MessageUnknownFields_Print.TextFormatOutput
Required.Proto3.TextFormatInput.FloatFieldLargerThanUint64.ProtobufOutput
Required.Proto3.TextFormatInput.FloatFieldLargerThanUint64.TextFormatOutput
Required.Proto3.TextFormatInput.FloatFieldMaxValue.ProtobufOutput
Required.Proto3.TextFormatInput.FloatFieldMaxValue.TextFormatOutput
Required.Proto3.TextFormatInput.FloatFieldNaNValue.ProtobufOutput
Required.Proto3.TextFormatInput.FloatFieldNaNValue.TextFormatOutput
Required.Proto3.TextFormatInput.FloatFieldTooLarge.ProtobufOutput
Required.Proto3.TextFormatInput.FloatFieldTooLarge.TextFormatOutput

View File

@ -175,13 +175,11 @@ func Marshal(v pref.Value, ev pref.EnumValueDescriptor, k pref.Kind, f Format) (
func unmarshalBytes(s string) ([]byte, bool) {
// Bytes values use the same escaping as the text format,
// however they lack the surrounding double quotes.
// TODO: Export unmarshalString in the text package to avoid this hack.
v, err := ptext.Unmarshal([]byte(`["` + s + `"]:0`))
if err == nil && len(v.Message()) == 1 {
s := v.Message()[0][0].String()
return []byte(s), true
v, err := ptext.UnmarshalString(`"` + s + `"`)
if err != nil {
return nil, false
}
return nil, false
return []byte(v), true
}
// marshalBytes serializes bytes by using C escaping.

View File

@ -6,238 +6,524 @@ package text
import (
"bytes"
"fmt"
"io"
"regexp"
"strconv"
"unicode/utf8"
"google.golang.org/protobuf/internal/errors"
"google.golang.org/protobuf/reflect/protoreflect"
)
type syntaxError struct{ error }
// Decoder is a token-based textproto decoder.
type Decoder struct {
// lastCall is last method called, either readCall or peekCall.
// Initial value is readCall.
lastCall call
func newSyntaxError(f string, x ...interface{}) error {
return syntaxError{errors.New(f, x...)}
}
// lastToken contains the last read token.
lastToken Token
// Unmarshal parses b as the proto text format.
// It returns a Value, which is always of the Message type.
func Unmarshal(b []byte) (Value, error) {
p := decoder{in: b}
p.consume(0) // trim leading spaces or comments
v, err := p.unmarshalMessage(false)
if err != nil {
if e, ok := err.(syntaxError); ok {
b = b[:len(b)-len(p.in)] // consumed input
line := bytes.Count(b, []byte("\n")) + 1
if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
b = b[i+1:]
}
column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
err = errors.New("syntax error (line %d:%d): %v", line, column, e.error)
}
return Value{}, err
}
if len(p.in) > 0 {
return Value{}, errors.New("%d bytes of unconsumed input", len(p.in))
}
return v, nil
}
// lastErr contains the last read error.
lastErr error
type decoder struct {
// openStack is a stack containing the byte characters for MessageOpen and
// ListOpen kinds. The top of stack represents the message or the list that
// the current token is nested in. An empty stack means the current token is
// at the top level message. The characters '{' and '<' both represent the
// MessageOpen kind.
openStack []byte
// orig is used in reporting line and column.
orig []byte
// in contains the unconsumed input.
in []byte
}
func (p *decoder) unmarshalList() (Value, error) {
b := p.in
var elems []Value
if err := p.consumeChar('[', "at start of list"); err != nil {
return Value{}, err
// NewDecoder returns a Decoder to read the given []byte.
func NewDecoder(b []byte) *Decoder {
return &Decoder{orig: b, in: b}
}
// call specifies which Decoder method was invoked.
type call uint8
const (
readCall call = iota
peekCall
)
// Peek looks ahead and returns the next token and error without advancing a read.
func (d *Decoder) Peek() (Token, error) {
defer func() { d.lastCall = peekCall }()
if d.lastCall == readCall {
d.lastToken, d.lastErr = d.Read()
}
if len(p.in) > 0 && p.in[0] != ']' {
for len(p.in) > 0 {
v, err := p.unmarshalValue()
if err != nil {
return Value{}, err
return d.lastToken, d.lastErr
}
// Read returns the next token.
// It will return an error if there is no valid token.
func (d *Decoder) Read() (Token, error) {
defer func() { d.lastCall = readCall }()
if d.lastCall == peekCall {
return d.lastToken, d.lastErr
}
tok, err := d.parseNext(d.lastToken.Kind())
if err != nil {
return Token{}, err
}
switch tok.kind {
case comma, semicolon:
tok, err = d.parseNext(tok.kind)
if err != nil {
return Token{}, err
}
}
d.lastToken = tok
return tok, nil
}
const (
mismatchedFmt = "mismatched close character %q"
unexpectedFmt = "unexpected character %q"
)
// parseNext parses the next Token based on given last kind.
func (d *Decoder) parseNext(lastKind Kind) (Token, error) {
// Trim leading spaces.
d.consume(0)
isEOF := false
if len(d.in) == 0 {
isEOF = true
}
switch lastKind {
case EOF:
return d.consumeToken(EOF, 0, 0), nil
case bof:
// Start of top level message. Next token can be EOF or Name.
if isEOF {
return d.consumeToken(EOF, 0, 0), nil
}
return d.parseFieldName()
case Name:
// Next token can be MessageOpen, ListOpen or Scalar.
if isEOF {
return Token{}, io.ErrUnexpectedEOF
}
switch ch := d.in[0]; ch {
case '{', '<':
d.pushOpenStack(ch)
return d.consumeToken(MessageOpen, 1, 0), nil
case '[':
d.pushOpenStack(ch)
return d.consumeToken(ListOpen, 1, 0), nil
default:
return d.parseScalar()
}
case Scalar:
openKind, closeCh := d.currentOpenKind()
switch openKind {
case bof:
// Top level message.
// Next token can be EOF, comma, semicolon or Name.
if isEOF {
return d.consumeToken(EOF, 0, 0), nil
}
elems = append(elems, v)
if !p.tryConsumeChar(',') {
switch d.in[0] {
case ',':
return d.consumeToken(comma, 1, 0), nil
case ';':
return d.consumeToken(semicolon, 1, 0), nil
default:
return d.parseFieldName()
}
case MessageOpen:
// Next token can be MessageClose, comma, semicolon or Name.
if isEOF {
return Token{}, io.ErrUnexpectedEOF
}
switch ch := d.in[0]; ch {
case closeCh:
d.popOpenStack()
return d.consumeToken(MessageClose, 1, 0), nil
case otherCloseChar[closeCh]:
return Token{}, d.newSyntaxError(mismatchedFmt, ch)
case ',':
return d.consumeToken(comma, 1, 0), nil
case ';':
return d.consumeToken(semicolon, 1, 0), nil
default:
return d.parseFieldName()
}
case ListOpen:
// Next token can be ListClose or comma.
switch ch := d.in[0]; ch {
case ']':
d.popOpenStack()
return d.consumeToken(ListClose, 1, 0), nil
case ',':
return d.consumeToken(comma, 1, 0), nil
default:
return Token{}, d.newSyntaxError(unexpectedFmt, ch)
}
}
case MessageOpen:
// Next token can be MessageClose or Name.
if isEOF {
return Token{}, io.ErrUnexpectedEOF
}
_, closeCh := d.currentOpenKind()
switch ch := d.in[0]; ch {
case closeCh:
d.popOpenStack()
return d.consumeToken(MessageClose, 1, 0), nil
case otherCloseChar[closeCh]:
return Token{}, d.newSyntaxError(mismatchedFmt, ch)
default:
return d.parseFieldName()
}
case MessageClose:
openKind, closeCh := d.currentOpenKind()
switch openKind {
case bof:
// Top level message.
// Next token can be EOF, comma, semicolon or Name.
if isEOF {
return d.consumeToken(EOF, 0, 0), nil
}
switch ch := d.in[0]; ch {
case ',':
return d.consumeToken(comma, 1, 0), nil
case ';':
return d.consumeToken(semicolon, 1, 0), nil
default:
return d.parseFieldName()
}
case MessageOpen:
// Next token can be MessageClose, comma, semicolon or Name.
if isEOF {
return Token{}, io.ErrUnexpectedEOF
}
switch ch := d.in[0]; ch {
case closeCh:
d.popOpenStack()
return d.consumeToken(MessageClose, 1, 0), nil
case otherCloseChar[closeCh]:
return Token{}, d.newSyntaxError(mismatchedFmt, ch)
case ',':
return d.consumeToken(comma, 1, 0), nil
case ';':
return d.consumeToken(semicolon, 1, 0), nil
default:
return d.parseFieldName()
}
case ListOpen:
// Next token can be ListClose or comma
if isEOF {
return Token{}, io.ErrUnexpectedEOF
}
switch ch := d.in[0]; ch {
case closeCh:
d.popOpenStack()
return d.consumeToken(ListClose, 1, 0), nil
case ',':
return d.consumeToken(comma, 1, 0), nil
default:
return Token{}, d.newSyntaxError(unexpectedFmt, ch)
}
}
case ListOpen:
// Next token can be ListClose, MessageStart or Scalar.
if isEOF {
return Token{}, io.ErrUnexpectedEOF
}
switch ch := d.in[0]; ch {
case ']':
d.popOpenStack()
return d.consumeToken(ListClose, 1, 0), nil
case '{', '<':
d.pushOpenStack(ch)
return d.consumeToken(MessageOpen, 1, 0), nil
default:
return d.parseScalar()
}
case ListClose:
openKind, closeCh := d.currentOpenKind()
switch openKind {
case bof:
// Top level message.
// Next token can be EOF, comma, semicolon or Name.
if isEOF {
return d.consumeToken(EOF, 0, 0), nil
}
switch ch := d.in[0]; ch {
case ',':
return d.consumeToken(comma, 1, 0), nil
case ';':
return d.consumeToken(semicolon, 1, 0), nil
default:
return d.parseFieldName()
}
case MessageOpen:
// Next token can be MessageClose, comma, semicolon or Name.
if isEOF {
return Token{}, io.ErrUnexpectedEOF
}
switch ch := d.in[0]; ch {
case closeCh:
d.popOpenStack()
return d.consumeToken(MessageClose, 1, 0), nil
case otherCloseChar[closeCh]:
return Token{}, d.newSyntaxError(mismatchedFmt, ch)
case ',':
return d.consumeToken(comma, 1, 0), nil
case ';':
return d.consumeToken(semicolon, 1, 0), nil
default:
return d.parseFieldName()
}
default:
// It is not possible to have this case. Let it panic below.
}
case comma, semicolon:
openKind, closeCh := d.currentOpenKind()
switch openKind {
case bof:
// Top level message. Next token can be EOF or Name.
if isEOF {
return d.consumeToken(EOF, 0, 0), nil
}
return d.parseFieldName()
case MessageOpen:
// Next token can be MessageClose or Name.
if isEOF {
return Token{}, io.ErrUnexpectedEOF
}
switch ch := d.in[0]; ch {
case closeCh:
d.popOpenStack()
return d.consumeToken(MessageClose, 1, 0), nil
case otherCloseChar[closeCh]:
return Token{}, d.newSyntaxError(mismatchedFmt, ch)
default:
return d.parseFieldName()
}
case ListOpen:
if lastKind == semicolon {
// It is not be possible to have this case as logic here
// should not have produced a semicolon Token when inside a
// list. Let it panic below.
break
}
}
}
if err := p.consumeChar(']', "at end of list"); err != nil {
return Value{}, err
}
b = b[:len(b)-len(p.in)]
return rawValueOf(elems, b[:len(b):len(b)]), nil
}
func (p *decoder) unmarshalMessage(checkDelims bool) (Value, error) {
b := p.in
var items [][2]Value
delims := [2]byte{'{', '}'}
if len(p.in) > 0 && p.in[0] == '<' {
delims = [2]byte{'<', '>'}
}
if checkDelims {
if err := p.consumeChar(delims[0], "at start of message"); err != nil {
return Value{}, err
}
}
for len(p.in) > 0 {
if p.in[0] == '}' || p.in[0] == '>' {
break
}
k, err := p.unmarshalKey()
if err != nil {
return Value{}, err
}
if !p.tryConsumeChar(':') && len(p.in) > 0 && p.in[0] != '{' && p.in[0] != '<' {
return Value{}, newSyntaxError("expected ':' after message key")
}
v, err := p.unmarshalValue()
if err != nil {
return Value{}, err
}
if p.tryConsumeChar(';') || p.tryConsumeChar(',') {
// always optional
}
items = append(items, [2]Value{k, v})
}
if checkDelims {
if err := p.consumeChar(delims[1], "at end of message"); err != nil {
return Value{}, err
}
}
b = b[:len(b)-len(p.in)]
return rawValueOf(items, b[:len(b):len(b)]), nil
}
// unmarshalKey parses the key, which may be a Name, String, or Uint.
func (p *decoder) unmarshalKey() (v Value, err error) {
if p.tryConsumeChar('[') {
if len(p.in) == 0 {
return Value{}, io.ErrUnexpectedEOF
}
if p.in[0] == '\'' || p.in[0] == '"' {
// Historically, Go's parser allowed a string for the Any type URL.
// This is specific to Go and contrary to the C++ implementation,
// which does not support strings for the Any type URL.
v, err = p.unmarshalString()
if err != nil {
return Value{}, err
// Next token can be MessageOpen or Scalar.
if isEOF {
return Token{}, io.ErrUnexpectedEOF
}
} else {
v, err = p.unmarshalURL()
if err != nil {
return Value{}, err
switch ch := d.in[0]; ch {
case '{', '<':
d.pushOpenStack(ch)
return d.consumeToken(MessageOpen, 1, 0), nil
default:
return d.parseScalar()
}
}
if err := p.consumeChar(']', "at end of extension name"); err != nil {
return Value{}, err
}
return v, nil
}
v, err = p.unmarshalName()
if err == nil {
return v, nil
}
v, err = p.unmarshalNumberKey()
if err == nil {
return v, nil
}
return Value{}, err
line, column := d.Position(len(d.orig) - len(d.in))
panic(fmt.Sprintf("Decoder.parseNext: bug at handling line %d:%d with lastKind=%v", line, column, lastKind))
}
// unmarshalURL parses an Any type URL string. The C++ parser does not handle
// many legal URL strings. This implementation is more liberal and allows for
// the pattern ^[-_a-zA-Z0-9]+([./][-_a-zA-Z0-9]+)*`).
func (p *decoder) unmarshalURL() (Value, error) {
s := p.in
var size int
for len(s) > 0 && (s[0] == '-' || s[0] == '_' ||
('0' <= s[0] && s[0] <= '9') ||
('a' <= s[0] && s[0] <= 'z') ||
('A' <= s[0] && s[0] <= 'Z')) {
s = s[1:]
size++
if len(s) > 0 && (s[0] == '/' || s[0] == '.') {
s = s[1:]
size++
}
}
// Last character cannot be '.' or '/'.
// Next byte should either be a delimiter or it is at the end.
if size == 0 || p.in[size-1] == '.' || p.in[size-1] == '/' ||
(len(s) > 0 && !isDelim(s[0])) {
return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in))
}
v := rawValueOf(string(p.in[:size]), p.in[:size:size])
p.consume(size)
return v, nil
var otherCloseChar = map[byte]byte{
'}': '>',
'>': '}',
}
// unmarshalNumberKey parses field number as key. Field numbers are non-negative
// integers.
func (p *decoder) unmarshalNumberKey() (Value, error) {
num, ok := parseNumber(p.in)
if !ok || num.neg || num.typ == numFloat {
return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in))
// currentOpenKind indicates whether current position is inside a message, list
// or top-level message by returning MessageOpen, ListOpen or bof respectively.
// If the returned kind is either a MessageOpen or ListOpen, it also returns the
// corresponding closing character.
func (d *Decoder) currentOpenKind() (Kind, byte) {
if len(d.openStack) == 0 {
return bof, 0
}
v, err := strconv.ParseUint(string(num.value), 0, 64)
if err != nil {
return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in))
}
p.consume(num.size)
return rawValueOf(v, num.value), nil
}
func (p *decoder) unmarshalValue() (Value, error) {
if len(p.in) == 0 {
return Value{}, io.ErrUnexpectedEOF
}
switch p.in[0] {
case '"', '\'':
return p.unmarshalStrings()
openCh := d.openStack[len(d.openStack)-1]
switch openCh {
case '{':
return MessageOpen, '}'
case '<':
return MessageOpen, '>'
case '[':
return p.unmarshalList()
case '{', '<':
return p.unmarshalMessage(true)
default:
n, ok := consumeName(p.in)
if ok && literals[string(p.in[:n])] == nil {
v := rawValueOf(protoreflect.Name(p.in[:n]), p.in[:n:n])
p.consume(n)
return v, nil
return ListOpen, ']'
}
panic(fmt.Sprintf("Decoder: openStack contains invalid byte %s", string(openCh)))
}
func (d *Decoder) pushOpenStack(ch byte) {
d.openStack = append(d.openStack, ch)
}
func (d *Decoder) popOpenStack() {
d.openStack = d.openStack[:len(d.openStack)-1]
}
// parseFieldName parses field name and separator.
func (d *Decoder) parseFieldName() (tok Token, err error) {
defer func() {
if err == nil && d.tryConsumeChar(':') {
tok.attrs |= hasSeparator
}
return p.unmarshalNumber()
}()
// Extension or Any type URL.
if d.in[0] == '[' {
return d.parseTypeName()
}
// Identifier.
if size := parseIdent(d.in, false); size > 0 {
return d.consumeToken(Name, size, uint8(IdentName)), nil
}
// Field number. Identify if input is a valid number that is not negative
// and is decimal integer within 32-bit range.
if num := parseNumber(d.in); num.size > 0 {
if !num.neg && num.kind == numDec {
if _, err := strconv.ParseInt(string(d.in[:num.size]), 10, 32); err == nil {
return d.consumeToken(Name, num.size, uint8(FieldNumber)), nil
}
}
return Token{}, d.newSyntaxError("invalid field number: %s", d.in[:num.size])
}
return Token{}, d.newSyntaxError("invalid field name: %s", errRegexp.Find(d.in))
}
// parseTypeName parses Any type URL or extension field name. The name is
// enclosed in [ and ] characters. The C++ parser does not handle many legal URL
// strings. This implementation is more liberal and allows for the pattern
// ^[-_a-zA-Z0-9]+([./][-_a-zA-Z0-9]+)*`). Whitespaces and comments are allowed
// in between [ ], '.', '/' and the sub names.
func (d *Decoder) parseTypeName() (Token, error) {
startPos := len(d.orig) - len(d.in)
// Use alias s to advance first in order to use d.in for error handling.
// Caller already checks for [ as first character.
s := consume(d.in[1:], 0)
if len(s) == 0 {
return Token{}, io.ErrUnexpectedEOF
}
var name []byte
for len(s) > 0 && isTypeNameChar(s[0]) {
name = append(name, s[0])
s = s[1:]
}
s = consume(s, 0)
var closed bool
for len(s) > 0 && !closed {
switch {
case s[0] == ']':
s = s[1:]
closed = true
case s[0] == '/', s[0] == '.':
if len(name) > 0 && (name[len(name)-1] == '/' || name[len(name)-1] == '.') {
return Token{}, d.newSyntaxError("invalid type URL/extension field name: %s",
d.in[startPos:len(d.orig)-len(s)+1])
}
name = append(name, s[0])
s = s[1:]
s = consume(s, 0)
for len(s) > 0 && isTypeNameChar(s[0]) {
name = append(name, s[0])
s = s[1:]
}
s = consume(s, 0)
default:
return Token{}, d.newSyntaxError(
"invalid type URL/extension field name: %s", d.in[startPos:len(d.orig)-len(s)+1])
}
}
if !closed {
return Token{}, io.ErrUnexpectedEOF
}
// First character cannot be '.'. Last character cannot be '.' or '/'.
size := len(name)
if size == 0 || name[0] == '.' || name[size-1] == '.' || name[size-1] == '/' {
return Token{}, d.newSyntaxError("invalid type URL/extension field name: %s",
d.in[startPos:len(d.orig)-len(s)])
}
d.in = s
endPos := len(d.orig) - len(d.in)
d.consume(0)
return Token{
kind: Name,
attrs: uint8(TypeName),
pos: startPos,
raw: d.orig[startPos:endPos],
str: string(name),
}, nil
}
func isTypeNameChar(b byte) bool {
return (b == '-' || b == '_' ||
('0' <= b && b <= '9') ||
('a' <= b && b <= 'z') ||
('A' <= b && b <= 'Z'))
}
func isWhiteSpace(b byte) bool {
switch b {
case ' ', '\n', '\r', '\t':
return true
default:
return false
}
}
// unmarshalName unmarshals an unquoted proto identifier.
// Regular expression that matches an identifier: `^[_a-zA-Z][_a-zA-Z0-9]*`
//
// E.g., `field_name` => ValueOf(protoreflect.Name("field_name"))
func (p *decoder) unmarshalName() (Value, error) {
n, ok := consumeName(p.in)
if !ok {
return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in))
}
v := rawValueOf(protoreflect.Name(p.in[:n]), p.in[:n:n])
p.consume(n)
return v, nil
}
func consumeName(input []byte) (int, bool) {
var n int
// parseIdent parses an unquoted proto identifier and returns size.
// If allowNeg is true, it allows '-' to be the first character in the
// identifier. This is used when parsing literal values like -infinity, etc.
// Regular expression matches an identifier: `^[_a-zA-Z][_a-zA-Z0-9]*`
func parseIdent(input []byte, allowNeg bool) int {
var size int
s := input
if len(s) == 0 {
return 0, false
return 0
}
if allowNeg && s[0] == '-' {
s = s[1:]
size++
if len(s) == 0 {
return 0
}
}
switch {
@ -245,9 +531,9 @@ func consumeName(input []byte) (int, bool) {
'a' <= s[0] && s[0] <= 'z',
'A' <= s[0] && s[0] <= 'Z':
s = s[1:]
n++
size++
default:
return 0, false
return 0
}
for len(s) > 0 && (s[0] == '_' ||
@ -255,51 +541,110 @@ func consumeName(input []byte) (int, bool) {
'A' <= s[0] && s[0] <= 'Z' ||
'0' <= s[0] && s[0] <= '9') {
s = s[1:]
n++
size++
}
if len(s) > 0 && !isDelim(s[0]) {
return 0, false
return 0
}
return n, true
return size
}
func (p *decoder) consumeChar(c byte, msg string) error {
if p.tryConsumeChar(c) {
return nil
// parseScalar parses for a string, literal or number value.
func (d *Decoder) parseScalar() (Token, error) {
if d.in[0] == '"' || d.in[0] == '\'' {
return d.parseStringValue()
}
if len(p.in) == 0 {
return io.ErrUnexpectedEOF
if tok, ok := d.parseLiteralValue(); ok {
return tok, nil
}
return newSyntaxError("invalid character %q, expected %q %s", p.in[0], c, msg)
if tok, ok := d.parseNumberValue(); ok {
return tok, nil
}
return Token{}, d.newSyntaxError("invalid scalar value: %s", errRegexp.Find(d.in))
}
func (p *decoder) tryConsumeChar(c byte) bool {
if len(p.in) > 0 && p.in[0] == c {
p.consume(1)
// parseLiteralValue parses a literal value. A literal value is used for
// bools, special floats and enums. This function simply identifies that the
// field value is a literal.
func (d *Decoder) parseLiteralValue() (Token, bool) {
size := parseIdent(d.in, true)
if size == 0 {
return Token{}, false
}
return d.consumeToken(Scalar, size, literalValue), true
}
// consumeToken constructs a Token for given Kind from d.in and consumes given
// size-length from it.
func (d *Decoder) consumeToken(kind Kind, size int, attrs uint8) Token {
// Important to compute raw and pos before consuming.
tok := Token{
kind: kind,
attrs: attrs,
pos: len(d.orig) - len(d.in),
raw: d.in[:size],
}
d.consume(size)
return tok
}
// newSyntaxError returns a syntax error with line and column information for
// current position.
func (d *Decoder) newSyntaxError(f string, x ...interface{}) error {
e := errors.New(f, x...)
line, column := d.Position(len(d.orig) - len(d.in))
return errors.New("syntax error (line %d:%d): %v", line, column, e)
}
// Position returns line and column number of given index of the original input.
// It will panic if index is out of range.
func (d *Decoder) Position(idx int) (line int, column int) {
b := d.orig[:idx]
line = bytes.Count(b, []byte("\n")) + 1
if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
b = b[i+1:]
}
column = utf8.RuneCount(b) + 1 // ignore multi-rune characters
return line, column
}
func (d *Decoder) tryConsumeChar(c byte) bool {
if len(d.in) > 0 && d.in[0] == c {
d.consume(1)
return true
}
return false
}
// consume consumes n bytes of input and any subsequent whitespace or comments.
func (p *decoder) consume(n int) {
p.in = p.in[n:]
for len(p.in) > 0 {
switch p.in[0] {
func (d *Decoder) consume(n int) {
d.in = consume(d.in, n)
return
}
// consume consumes n bytes of input and any subsequent whitespace or comments.
func consume(b []byte, n int) []byte {
b = b[n:]
for len(b) > 0 {
switch b[0] {
case ' ', '\n', '\r', '\t':
p.in = p.in[1:]
b = b[1:]
case '#':
if i := bytes.IndexByte(p.in, '\n'); i >= 0 {
p.in = p.in[i+len("\n"):]
if i := bytes.IndexByte(b, '\n'); i >= 0 {
b = b[i+len("\n"):]
} else {
p.in = nil
b = nil
}
default:
return
return b
}
}
return b
}
// Any sequence that looks like a non-delimiter (for error reporting).

View File

@ -0,0 +1,190 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package text
// parseNumberValue parses a number from the input and returns a Token object.
func (d *Decoder) parseNumberValue() (Token, bool) {
in := d.in
num := parseNumber(in)
if num.size == 0 {
return Token{}, false
}
numAttrs := num.kind
if num.neg {
numAttrs |= isNegative
}
strSize := num.size
last := num.size - 1
if num.kind == numFloat && (d.in[last] == 'f' || d.in[last] == 'F') {
strSize = last
}
tok := Token{
kind: Scalar,
attrs: numberValue,
pos: len(d.orig) - len(d.in),
raw: d.in[:num.size],
str: string(d.in[:strSize]),
numAttrs: numAttrs,
}
d.consume(num.size)
return tok, true
}
const (
numDec uint8 = (1 << iota) / 2
numHex
numOct
numFloat
)
// number is the result of parsing out a valid number from parseNumber. It
// contains data for doing float or integer conversion via the strconv package
// in conjunction with the input bytes.
type number struct {
kind uint8
neg bool
size int
}
// parseNumber constructs a number object from given input. It allows for the
// following patterns:
// integer: ^-?([1-9][0-9]*|0[xX][0-9a-fA-F]+|0[0-7]*)
// float: ^-?((0|[1-9][0-9]*)?([.][0-9]*)?([eE][+-]?[0-9]+)?[fF]?)
// It also returns the number of parsed bytes for the given number, 0 if it is
// not a number.
func parseNumber(input []byte) number {
kind := numDec
var size int
var neg bool
s := input
if len(s) == 0 {
return number{}
}
// Optional -
if s[0] == '-' {
neg = true
s = s[1:]
size++
if len(s) == 0 {
return number{}
}
}
// C++ allows for whitespace and comments in between the negative sign and
// the rest of the number. This logic currently does not but is consistent
// with v1.
switch {
case s[0] == '0':
if len(s) > 1 {
switch {
case s[1] == 'x' || s[1] == 'X':
// Parse as hex number.
kind = numHex
n := 2
s = s[2:]
for len(s) > 0 && (('0' <= s[0] && s[0] <= '9') ||
('a' <= s[0] && s[0] <= 'f') ||
('A' <= s[0] && s[0] <= 'F')) {
s = s[1:]
n++
}
if n == 2 {
return number{}
}
size += n
case '0' <= s[1] && s[1] <= '7':
// Parse as octal number.
kind = numOct
n := 2
s = s[2:]
for len(s) > 0 && '0' <= s[0] && s[0] <= '7' {
s = s[1:]
n++
}
size += n
}
if kind&(numHex|numOct) > 0 {
if len(s) > 0 && !isDelim(s[0]) {
return number{}
}
return number{kind: kind, neg: neg, size: size}
}
}
s = s[1:]
size++
case '1' <= s[0] && s[0] <= '9':
n := 1
s = s[1:]
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
s = s[1:]
n++
}
size += n
case s[0] == '.':
// Set kind to numFloat to signify the intent to parse as float. And
// that it needs to have other digits after '.'.
kind = numFloat
default:
return number{}
}
// . followed by 0 or more digits.
if len(s) > 0 && s[0] == '.' {
n := 1
s = s[1:]
// If decimal point was before any digits, it should be followed by
// other digits.
if len(s) == 0 && kind == numFloat {
return number{}
}
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
s = s[1:]
n++
}
size += n
kind = numFloat
}
// e or E followed by an optional - or + and 1 or more digits.
if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') {
kind = numFloat
s = s[1:]
n := 1
if s[0] == '+' || s[0] == '-' {
s = s[1:]
n++
if len(s) == 0 {
return number{}
}
}
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
s = s[1:]
n++
}
size += n
}
// Optional suffix f or F for floats.
if len(s) > 0 && (s[0] == 'f' || s[0] == 'F') {
kind = numFloat
s = s[1:]
size++
}
// Check that next byte is a delimiter or it is at the end.
if len(s) > 0 && !isDelim(s[0]) {
return number{}
}
return number{kind: kind, neg: neg, size: size}
}

View File

@ -0,0 +1,162 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package text
import (
"bytes"
"io"
"strconv"
"strings"
"unicode"
"unicode/utf16"
"unicode/utf8"
"google.golang.org/protobuf/internal/strs"
)
// parseStringValue parses string field token.
// This differs from parseString since the text format allows
// multiple back-to-back string literals where they are semantically treated
// as a single large string with all values concatenated.
//
// E.g., `"foo" "bar" "baz"` => "foobarbaz"
func (d *Decoder) parseStringValue() (Token, error) {
// Note that the ending quote is sufficient to unambiguously mark the end
// of a string. Thus, the text grammar does not require intervening
// whitespace or control characters in-between strings.
// Thus, the following is valid:
// `"foo"'bar'"baz"` => "foobarbaz"
in0 := d.in
var ss []string
for len(d.in) > 0 && (d.in[0] == '"' || d.in[0] == '\'') {
s, err := d.parseString()
if err != nil {
return Token{}, err
}
ss = append(ss, s)
}
// d.in already points to the end of the value at this point.
return Token{
kind: Scalar,
attrs: stringValue,
pos: len(d.orig) - len(in0),
raw: in0[:len(in0)-len(d.in)],
str: strings.Join(ss, ""),
}, nil
}
// parseString parses a string value enclosed in " or '.
func (d *Decoder) parseString() (string, error) {
in := d.in
if len(in) == 0 {
return "", io.ErrUnexpectedEOF
}
quote := in[0]
in = in[1:]
i := indexNeedEscapeInBytes(in)
in, out := in[i:], in[:i:i] // set cap to prevent mutations
for len(in) > 0 {
switch r, n := utf8.DecodeRune(in); {
case r == utf8.RuneError && n == 1:
return "", d.newSyntaxError("invalid UTF-8 detected")
case r == 0 || r == '\n':
return "", d.newSyntaxError("invalid character %q in string", r)
case r == rune(quote):
in = in[1:]
d.consume(len(d.in) - len(in))
return string(out), nil
case r == '\\':
if len(in) < 2 {
return "", io.ErrUnexpectedEOF
}
switch r := in[1]; r {
case '"', '\'', '\\', '?':
in, out = in[2:], append(out, r)
case 'a':
in, out = in[2:], append(out, '\a')
case 'b':
in, out = in[2:], append(out, '\b')
case 'n':
in, out = in[2:], append(out, '\n')
case 'r':
in, out = in[2:], append(out, '\r')
case 't':
in, out = in[2:], append(out, '\t')
case 'v':
in, out = in[2:], append(out, '\v')
case 'f':
in, out = in[2:], append(out, '\f')
case '0', '1', '2', '3', '4', '5', '6', '7':
// One, two, or three octal characters.
n := len(in[1:]) - len(bytes.TrimLeft(in[1:], "01234567"))
if n > 3 {
n = 3
}
v, err := strconv.ParseUint(string(in[1:1+n]), 8, 8)
if err != nil {
return "", d.newSyntaxError("invalid octal escape code %q in string", in[:1+n])
}
in, out = in[1+n:], append(out, byte(v))
case 'x':
// One or two hexadecimal characters.
n := len(in[2:]) - len(bytes.TrimLeft(in[2:], "0123456789abcdefABCDEF"))
if n > 2 {
n = 2
}
v, err := strconv.ParseUint(string(in[2:2+n]), 16, 8)
if err != nil {
return "", d.newSyntaxError("invalid hex escape code %q in string", in[:2+n])
}
in, out = in[2+n:], append(out, byte(v))
case 'u', 'U':
// Four or eight hexadecimal characters
n := 6
if r == 'U' {
n = 10
}
if len(in) < n {
return "", io.ErrUnexpectedEOF
}
v, err := strconv.ParseUint(string(in[2:n]), 16, 32)
if utf8.MaxRune < v || err != nil {
return "", d.newSyntaxError("invalid Unicode escape code %q in string", in[:n])
}
in = in[n:]
r := rune(v)
if utf16.IsSurrogate(r) {
if len(in) < 6 {
return "", io.ErrUnexpectedEOF
}
v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
r = utf16.DecodeRune(r, rune(v))
if in[0] != '\\' || in[1] != 'u' || r == unicode.ReplacementChar || err != nil {
return "", d.newSyntaxError("invalid Unicode escape code %q in string", in[:6])
}
in = in[6:]
}
out = append(out, string(r)...)
default:
return "", d.newSyntaxError("invalid escape code %q in string", in[:2])
}
default:
i := indexNeedEscapeInBytes(in[n:])
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
return "", io.ErrUnexpectedEOF
}
// indexNeedEscapeInString returns the index of the character that needs
// escaping. If no characters need escaping, this returns the input length.
func indexNeedEscapeInBytes(b []byte) int { return indexNeedEscapeInString(strs.UnsafeString(b)) }
// UnmarshalString returns an unescaped string given a textproto string value.
// String value needs to contain single or double quotes. This is only used by
// internal/encoding/defval package for unmarshaling bytes.
func UnmarshalString(s string) (string, error) {
d := NewDecoder([]byte(s))
return d.parseString()
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,370 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package text
import (
"bytes"
"fmt"
"math"
"strconv"
"strings"
"google.golang.org/protobuf/internal/flags"
)
// Kind represents a token kind expressible in the textproto format.
type Kind uint8
const (
Invalid Kind = iota
EOF
Name // Name indicates the field name.
Scalar // Scalar are scalar values, e.g. "string", 47, ENUM_LITERAL, true.
MessageOpen
MessageClose
ListOpen
ListClose
// comma and semi-colon are only for parsing in between values and should not be exposed.
comma
semicolon
// bof indicates beginning of file, which is the default token
// kind at the beginning of parsing.
bof = Invalid
)
func (t Kind) String() string {
switch t {
case Invalid:
return "<invalid>"
case EOF:
return "eof"
case Scalar:
return "scalar"
case Name:
return "name"
case MessageOpen:
return "{"
case MessageClose:
return "}"
case ListOpen:
return "["
case ListClose:
return "]"
case comma:
return ","
case semicolon:
return ";"
default:
return fmt.Sprintf("<invalid:%v>", uint8(t))
}
}
// NameKind represents different types of field names.
type NameKind uint8
const (
IdentName NameKind = iota + 1
TypeName
FieldNumber
)
func (t NameKind) String() string {
switch t {
case IdentName:
return "IdentName"
case TypeName:
return "TypeName"
case FieldNumber:
return "FieldNumber"
default:
return fmt.Sprintf("<invalid:%v>", uint8(t))
}
}
// Bit mask in Token.attrs to indicate if a Name token is followed by the
// separator char ':'. The field name separator char is optional for message
// field or repeated message field, but required for all other types. Decoder
// simply indicates whether a Name token is followed by separator or not. It is
// up to the prototext package to validate.
const hasSeparator = 1 << 7
// Scalar value types.
const (
numberValue = iota + 1
stringValue
literalValue
)
// Bit mask in Token.numAttrs to indicate that the number is a negative.
const isNegative = 1 << 7
// Token provides a parsed token kind and value. Values are provided by the
// different accessor methods.
type Token struct {
// Kind of the Token object.
kind Kind
// attrs contains metadata for the following Kinds:
// Name: hasSeparator bit and one of NameKind.
// Scalar: one of numberValue, stringValue, literalValue.
attrs uint8
// numAttrs contains metadata for numberValue:
// - highest bit is whether negative or positive.
// - lower bits indicate one of numDec, numHex, numOct, numFloat.
numAttrs uint8
// pos provides the position of the token in the original input.
pos int
// raw bytes of the serialized token.
// This is a subslice into the original input.
raw []byte
// str contains parsed string for the following:
// - stringValue of Scalar kind
// - numberValue of Scalar kind
// - TypeName of Name kind
str string
}
// Kind returns the token kind.
func (t Token) Kind() Kind {
return t.kind
}
// RawString returns the read value in string.
func (t Token) RawString() string {
return string(t.raw)
}
// Pos returns the token position from the input.
func (t Token) Pos() int {
return t.pos
}
// NameKind returns IdentName, TypeName or FieldNumber.
// It panics if type is not Name.
func (t Token) NameKind() NameKind {
if t.kind == Name {
return NameKind(t.attrs &^ hasSeparator)
}
panic(fmt.Sprintf("Token is not a Name type: %s", t.kind))
}
// HasSeparator returns true if the field name is followed by the separator char
// ':', else false. It panics if type is not Name.
func (t Token) HasSeparator() bool {
if t.kind == Name {
return t.attrs&hasSeparator != 0
}
panic(fmt.Sprintf("Token is not a Name type: %s", t.kind))
}
// IdentName returns the value for IdentName type.
func (t Token) IdentName() string {
if t.kind == Name && t.attrs&uint8(IdentName) != 0 {
return string(t.raw)
}
panic(fmt.Sprintf("Token is not an IdentName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
}
// TypeName returns the value for TypeName type.
func (t Token) TypeName() string {
if t.kind == Name && t.attrs&uint8(TypeName) != 0 {
return t.str
}
panic(fmt.Sprintf("Token is not a TypeName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
}
// FieldNumber returns the value for FieldNumber type. It returns a
// non-negative int32 value. Caller will still need to validate for the correct
// field number range.
func (t Token) FieldNumber() int32 {
if t.kind != Name || t.attrs&uint8(FieldNumber) == 0 {
panic(fmt.Sprintf("Token is not a FieldNumber: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
}
// Following should not return an error as it had already been called right
// before this Token was constructed.
num, _ := strconv.ParseInt(string(t.raw), 10, 32)
return int32(num)
}
// String returns the string value for a Scalar type.
func (t Token) String() (string, bool) {
if t.kind != Scalar || t.attrs != stringValue {
return "", false
}
return t.str, true
}
// Enum returns the literal value for a Scalar type for use as enum literals.
func (t Token) Enum() (string, bool) {
if t.kind != Scalar || t.attrs != literalValue || (len(t.raw) > 0 && t.raw[0] == '-') {
return "", false
}
return string(t.raw), true
}
// Bool returns the bool value for a Scalar type.
func (t Token) Bool() (bool, bool) {
if t.kind != Scalar {
return false, false
}
switch t.attrs {
case literalValue:
if b, ok := boolLits[string(t.raw)]; ok {
return b, true
}
case numberValue:
// Unsigned integer representation of 0 or 1 is permitted: 00, 0x0, 01,
// 0x1, etc.
n, err := strconv.ParseUint(t.str, 0, 64)
if err == nil {
switch n {
case 0:
return false, true
case 1:
return true, true
}
}
}
return false, false
}
// These exact boolean literals are the ones supported in C++.
var boolLits = map[string]bool{
"t": true,
"true": true,
"True": true,
"f": false,
"false": false,
"False": false,
}
// Uint64 returns the uint64 value for a Scalar type.
func (t Token) Uint64() (uint64, bool) {
if t.kind != Scalar || t.attrs != numberValue ||
t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 {
return 0, false
}
n, err := strconv.ParseUint(t.str, 0, 64)
if err != nil {
return 0, false
}
return n, true
}
// Uint32 returns the uint32 value for a Scalar type.
func (t Token) Uint32() (uint32, bool) {
if t.kind != Scalar || t.attrs != numberValue ||
t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 {
return 0, false
}
n, err := strconv.ParseUint(t.str, 0, 32)
if err != nil {
return 0, false
}
return uint32(n), true
}
// Int64 returns the int64 value for a Scalar type.
func (t Token) Int64() (int64, bool) {
if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 {
return 0, false
}
if n, err := strconv.ParseInt(t.str, 0, 64); err == nil {
return n, true
}
// C++ accepts large positive hex numbers as negative values.
// This feature is here for proto1 backwards compatibility purposes.
if flags.ProtoLegacy && (t.numAttrs == numHex) {
if n, err := strconv.ParseUint(t.str, 0, 64); err == nil {
return int64(n), true
}
}
return 0, false
}
// Int32 returns the int32 value for a Scalar type.
func (t Token) Int32() (int32, bool) {
if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 {
return 0, false
}
if n, err := strconv.ParseInt(t.str, 0, 32); err == nil {
return int32(n), true
}
// C++ accepts large positive hex numbers as negative values.
// This feature is here for proto1 backwards compatibility purposes.
if flags.ProtoLegacy && (t.numAttrs == numHex) {
if n, err := strconv.ParseUint(t.str, 0, 32); err == nil {
return int32(n), true
}
}
return 0, false
}
// Float64 returns the float64 value for a Scalar type.
func (t Token) Float64() (float64, bool) {
if t.kind != Scalar {
return 0, false
}
switch t.attrs {
case literalValue:
if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok {
return f, true
}
case numberValue:
n, err := strconv.ParseFloat(t.str, 64)
if err == nil {
return n, true
}
nerr := err.(*strconv.NumError)
if nerr.Err == strconv.ErrRange {
return n, true
}
}
return 0, false
}
// Float32 returns the float32 value for a Scalar type.
func (t Token) Float32() (float32, bool) {
if t.kind != Scalar {
return 0, false
}
switch t.attrs {
case literalValue:
if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok {
return float32(f), true
}
case numberValue:
n, err := strconv.ParseFloat(t.str, 64)
if err == nil {
// Overflows are treated as (-)infinity.
return float32(n), true
}
nerr := err.(*strconv.NumError)
if nerr.Err == strconv.ErrRange {
return float32(n), true
}
}
return 0, false
}
// These are the supported float literals which C++ permits case-insensitive
// variants of these.
var floatLits = map[string]float64{
"nan": math.NaN(),
"inf": math.Inf(1),
"infinity": math.Inf(1),
"-inf": math.Inf(-1),
"-infinity": math.Inf(-1),
}
// TokenEquals returns true if given Tokens are equal, else false.
func TokenEquals(x, y Token) bool {
return x.kind == y.kind &&
x.attrs == y.attrs &&
x.numAttrs == y.numAttrs &&
x.pos == y.pos &&
bytes.Equal(x.raw, y.raw) &&
x.str == y.str
}

View File

@ -0,0 +1,29 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package text implements the text format for protocol buffers.
// This package has no semantic understanding for protocol buffers and is only
// a parser and composer for the format.
//
// There is no formal specification for the protobuf text format, as such the
// C++ implementation (see google::protobuf::TextFormat) is the reference
// implementation of the text format.
//
// This package is neither a superset nor a subset of the C++ implementation.
// This implementation permits a more liberal grammar in some cases to be
// backwards compatible with the historical Go implementation.
// Future parsings unique to Go should not be added.
// Some grammars allowed by the C++ implementation are deliberately
// not implemented here because they are considered a bug by the protobuf team
// and should not be replicated.
//
// The Go implementation should implement a sufficient amount of the C++
// grammar such that the default text serialization by C++ can be parsed by Go.
// However, just because the C++ parser accepts some input does not mean that
// the Go implementation should as well.
//
// The text format is almost a superset of JSON except:
// * message keys are not quoted strings, but identifiers
// * the top-level value must be a message without the delimiters
package text

View File

@ -5,16 +5,45 @@
package text
import (
"regexp"
"math"
"math/bits"
"strconv"
"strings"
"unicode/utf8"
"google.golang.org/protobuf/internal/detrand"
"google.golang.org/protobuf/internal/errors"
)
// Marshal serializes v as the proto text format, where v must be a Message.
// In the proto text format, the top-level value is always a message where the
// delimiters are elided.
// encType represents an encoding type.
type encType uint8
const (
_ encType = (1 << iota) / 2
name
scalar
messageOpen
messageClose
)
// Encoder provides methods to write out textproto constructs and values. The user is
// responsible for producing valid sequences of constructs and values.
type Encoder struct {
encoderState
indent string
newline string // set to "\n" if len(indent) > 0
delims [2]byte
outputASCII bool
}
type encoderState struct {
lastType encType
indents []byte
out []byte
}
// NewEncoder returns an Encoder.
//
// If indent is a non-empty string, it causes every entry in a List or Message
// to be preceded by the indent and trailed by a newline.
@ -25,164 +54,214 @@ import (
// If outputASCII is true, strings will be serialized in such a way that
// multi-byte UTF-8 sequences are escaped. This property ensures that the
// overall output is ASCII (as opposed to UTF-8).
func Marshal(v Value, indent string, delims [2]byte, outputASCII bool) ([]byte, error) {
p := encoder{}
func NewEncoder(indent string, delims [2]byte, outputASCII bool) (*Encoder, error) {
e := &Encoder{}
if len(indent) > 0 {
if strings.Trim(indent, " \t") != "" {
return nil, errors.New("indent may only be composed of space and tab characters")
}
p.indent = indent
p.newline = "\n"
e.indent = indent
e.newline = "\n"
}
switch delims {
case [2]byte{0, 0}:
p.delims = [2]byte{'{', '}'}
e.delims = [2]byte{'{', '}'}
case [2]byte{'{', '}'}, [2]byte{'<', '>'}:
p.delims = delims
e.delims = delims
default:
return nil, errors.New("delimiters may only be \"{}\" or \"<>\"")
}
p.outputASCII = outputASCII
e.outputASCII = outputASCII
err := p.marshalMessage(v, false)
if err != nil {
return nil, err
}
return p.out, nil
return e, nil
}
type encoder struct {
out []byte
indent string
indents []byte
newline string // set to "\n" if len(indent) > 0
delims [2]byte
outputASCII bool
// Bytes returns the content of the written bytes.
func (e *Encoder) Bytes() []byte {
return e.out
}
func (p *encoder) marshalList(v Value) error {
if v.Type() != List {
return errors.New("invalid type %v, expected list", v.Type())
}
elems := v.List()
p.out = append(p.out, '[')
p.indents = append(p.indents, p.indent...)
if len(elems) > 0 {
p.out = append(p.out, p.newline...)
}
for i, elem := range elems {
p.out = append(p.out, p.indents...)
if err := p.marshalValue(elem); err != nil {
return err
}
if i < len(elems)-1 {
p.out = append(p.out, ',')
}
p.out = append(p.out, p.newline...)
}
p.indents = p.indents[:len(p.indents)-len(p.indent)]
if len(elems) > 0 {
p.out = append(p.out, p.indents...)
}
p.out = append(p.out, ']')
return nil
// StartMessage writes out the '{' or '<' symbol.
func (e *Encoder) StartMessage() {
e.prepareNext(messageOpen)
e.out = append(e.out, e.delims[0])
}
func (p *encoder) marshalMessage(v Value, emitDelims bool) error {
if v.Type() != Message {
return errors.New("invalid type %v, expected message", v.Type())
// EndMessage writes out the '}' or '>' symbol.
func (e *Encoder) EndMessage() {
e.prepareNext(messageClose)
e.out = append(e.out, e.delims[1])
}
// Writname writes out the field name and the separator ':'.
func (e *Encoder) WriteName(s string) {
e.prepareNext(name)
e.out = append(e.out, s...)
e.out = append(e.out, ':')
}
// WriteBool writes out the given boolean value.
func (e *Encoder) WriteBool(b bool) {
if b {
e.WriteLiteral("true")
} else {
e.WriteLiteral("false")
}
items := v.Message()
if emitDelims {
p.out = append(p.out, p.delims[0])
p.indents = append(p.indents, p.indent...)
if len(items) > 0 {
p.out = append(p.out, p.newline...)
}
// WriteString writes out the given string value.
func (e *Encoder) WriteString(s string) {
e.prepareNext(scalar)
e.out = appendString(e.out, s, e.outputASCII)
}
func appendString(out []byte, in string, outputASCII bool) []byte {
out = append(out, '"')
i := indexNeedEscapeInString(in)
in, out = in[i:], append(out, in[:i]...)
for len(in) > 0 {
switch r, n := utf8.DecodeRuneInString(in); {
case r == utf8.RuneError && n == 1:
// We do not report invalid UTF-8 because strings in the text format
// are used to represent both the proto string and bytes type.
r = rune(in[0])
fallthrough
case r < ' ' || r == '"' || r == '\\':
out = append(out, '\\')
switch r {
case '"', '\\':
out = append(out, byte(r))
case '\n':
out = append(out, 'n')
case '\r':
out = append(out, 'r')
case '\t':
out = append(out, 't')
default:
out = append(out, 'x')
out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
out = strconv.AppendUint(out, uint64(r), 16)
}
in = in[n:]
case outputASCII && r >= utf8.RuneSelf:
out = append(out, '\\')
if r <= math.MaxUint16 {
out = append(out, 'u')
out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
out = strconv.AppendUint(out, uint64(r), 16)
} else {
out = append(out, 'U')
out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
out = strconv.AppendUint(out, uint64(r), 16)
}
in = in[n:]
default:
i := indexNeedEscapeInString(in[n:])
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
for i, item := range items {
p.out = append(p.out, p.indents...)
if err := p.marshalKey(item[0]); err != nil {
return err
out = append(out, '"')
return out
}
// indexNeedEscapeInString returns the index of the character that needs
// escaping. If no characters need escaping, this returns the input length.
func indexNeedEscapeInString(s string) int {
for i := 0; i < len(s); i++ {
if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= utf8.RuneSelf {
return i
}
p.out = append(p.out, ':')
if len(p.indent) > 0 {
p.out = append(p.out, ' ')
// For multi-line output, add a random extra space after key:
// to make output unstable.
}
return len(s)
}
// WriteFloat writes out the given float value for given bitSize.
func (e *Encoder) WriteFloat(n float64, bitSize int) {
e.prepareNext(scalar)
e.out = appendFloat(e.out, n, bitSize)
}
func appendFloat(out []byte, n float64, bitSize int) []byte {
switch {
case math.IsNaN(n):
return append(out, "nan"...)
case math.IsInf(n, +1):
return append(out, "inf"...)
case math.IsInf(n, -1):
return append(out, "-inf"...)
default:
return strconv.AppendFloat(out, n, 'g', -1, bitSize)
}
}
// WriteInt writes out the given signed integer value.
func (e *Encoder) WriteInt(n int64) {
e.prepareNext(scalar)
e.out = append(e.out, strconv.FormatInt(n, 10)...)
}
// WriteUint writes out the given unsigned integer value.
func (e *Encoder) WriteUint(n uint64) {
e.prepareNext(scalar)
e.out = append(e.out, strconv.FormatUint(n, 10)...)
}
// WriteLiteral writes out the given string as a literal value without quotes.
// This is used for writing enum literal strings.
func (e *Encoder) WriteLiteral(s string) {
e.prepareNext(scalar)
e.out = append(e.out, s...)
}
// prepareNext adds possible space and indentation for the next value based
// on last encType and indent option. It also updates e.lastType to next.
func (e *Encoder) prepareNext(next encType) {
defer func() {
e.lastType = next
}()
// Single line.
if len(e.indent) == 0 {
// Add space after each field before the next one.
if e.lastType&(scalar|messageClose) != 0 && next == name {
e.out = append(e.out, ' ')
// Add a random extra space to make output unstable.
if detrand.Bool() {
p.out = append(p.out, ' ')
e.out = append(e.out, ' ')
}
}
if err := p.marshalValue(item[1]); err != nil {
return err
}
if i < len(items)-1 && len(p.indent) == 0 {
p.out = append(p.out, ' ')
// For single-line output, add a random extra space after a field
// to make output unstable.
if detrand.Bool() {
p.out = append(p.out, ' ')
}
}
p.out = append(p.out, p.newline...)
return
}
if emitDelims {
p.indents = p.indents[:len(p.indents)-len(p.indent)]
if len(items) > 0 {
p.out = append(p.out, p.indents...)
}
p.out = append(p.out, p.delims[1])
}
return nil
}
// This expression is more liberal than ConsumeAnyTypeUrl in C++.
// However, the C++ parser does not handle many legal URL strings.
// The Go implementation is more liberal to be backwards compatible with
// the historical Go implementation which was overly liberal (and buggy).
var urlRegexp = regexp.MustCompile(`^[-_a-zA-Z0-9]+([./][-_a-zA-Z0-9]+)*`)
func (p *encoder) marshalKey(v Value) error {
switch v.Type() {
case String:
var err error
p.out = append(p.out, '[')
if len(urlRegexp.FindString(v.str)) == len(v.str) {
p.out = append(p.out, v.str...)
} else {
err = p.marshalString(v)
// Multi-line.
switch {
case e.lastType == name:
e.out = append(e.out, ' ')
// Add a random extra space after name: to make output unstable.
if detrand.Bool() {
e.out = append(e.out, ' ')
}
p.out = append(p.out, ']')
return err
case Uint:
return p.marshalNumber(v)
case Name:
s, _ := v.Name()
p.out = append(p.out, s...)
return nil
default:
return errors.New("invalid type %v to encode key", v.Type())
case e.lastType == messageOpen && next != messageClose:
e.indents = append(e.indents, e.indent...)
e.out = append(e.out, '\n')
e.out = append(e.out, e.indents...)
case e.lastType&(scalar|messageClose) != 0:
if next == messageClose {
e.indents = e.indents[:len(e.indents)-len(e.indent)]
}
e.out = append(e.out, '\n')
e.out = append(e.out, e.indents...)
}
}
func (p *encoder) marshalValue(v Value) error {
switch v.Type() {
case Bool, Int, Uint, Float32, Float64:
return p.marshalNumber(v)
case String:
return p.marshalString(v)
case List:
return p.marshalList(v)
case Message:
return p.marshalMessage(v, true)
case Name:
s, _ := v.Name()
p.out = append(p.out, s...)
return nil
default:
return errors.New("invalid type %v to encode value", v.Type())
}
// Snapshot returns the current snapshot for use in Reset.
func (e *Encoder) Snapshot() encoderState {
return e.encoderState
}
// Reset resets the Encoder to the given encoderState from a Snapshot.
func (e *Encoder) Reset(es encoderState) {
e.encoderState = es
}

View File

@ -0,0 +1,549 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package text_test
import (
"math"
"strings"
"testing"
"unicode/utf8"
"github.com/google/go-cmp/cmp"
"google.golang.org/protobuf/internal/detrand"
"google.golang.org/protobuf/internal/encoding/text"
)
// Disable detrand to enable direct comparisons on outputs.
func init() { detrand.Disable() }
func TestEncoder(t *testing.T) {
tests := []encoderTestCase{
{
desc: "no-opt",
write: func(e *text.Encoder) {},
wantOut: ``,
wantOutIndent: ``,
},
{
desc: "true",
write: func(e *text.Encoder) {
e.WriteName("bool")
e.WriteBool(true)
},
wantOut: `bool:true`,
wantOutIndent: `bool: true`,
},
{
desc: "false",
write: func(e *text.Encoder) {
e.WriteName("bool")
e.WriteBool(false)
},
wantOut: `bool:false`,
wantOutIndent: `bool: false`,
},
{
desc: "bracket name",
write: func(e *text.Encoder) {
e.WriteName("[extension]")
e.WriteString("hello")
},
wantOut: `[extension]:"hello"`,
wantOutIndent: `[extension]: "hello"`,
},
{
desc: "numeric name",
write: func(e *text.Encoder) {
e.WriteName("01234")
e.WriteString("hello")
},
wantOut: `01234:"hello"`,
wantOutIndent: `01234: "hello"`,
},
{
desc: "string",
write: func(e *text.Encoder) {
e.WriteName("str")
e.WriteString("hello world")
},
wantOut: `str:"hello world"`,
wantOutIndent: `str: "hello world"`,
},
{
desc: "enum",
write: func(e *text.Encoder) {
e.WriteName("enum")
e.WriteLiteral("ENUM_VALUE")
},
wantOut: `enum:ENUM_VALUE`,
wantOutIndent: `enum: ENUM_VALUE`,
},
{
desc: "float64",
write: func(e *text.Encoder) {
e.WriteName("float64")
e.WriteFloat(1.0199999809265137, 64)
},
wantOut: `float64:1.0199999809265137`,
wantOutIndent: `float64: 1.0199999809265137`,
},
{
desc: "float64 max value",
write: func(e *text.Encoder) {
e.WriteName("float64")
e.WriteFloat(math.MaxFloat64, 64)
},
wantOut: `float64:1.7976931348623157e+308`,
wantOutIndent: `float64: 1.7976931348623157e+308`,
},
{
desc: "float64 min value",
write: func(e *text.Encoder) {
e.WriteName("float64")
e.WriteFloat(-math.MaxFloat64, 64)
},
wantOut: `float64:-1.7976931348623157e+308`,
wantOutIndent: `float64: -1.7976931348623157e+308`,
},
{
desc: "float64 nan",
write: func(e *text.Encoder) {
e.WriteName("float64")
e.WriteFloat(math.NaN(), 64)
},
wantOut: `float64:nan`,
wantOutIndent: `float64: nan`,
},
{
desc: "float64 inf",
write: func(e *text.Encoder) {
e.WriteName("float64")
e.WriteFloat(math.Inf(+1), 64)
},
wantOut: `float64:inf`,
wantOutIndent: `float64: inf`,
},
{
desc: "float64 -inf",
write: func(e *text.Encoder) {
e.WriteName("float64")
e.WriteFloat(math.Inf(-1), 64)
},
wantOut: `float64:-inf`,
wantOutIndent: `float64: -inf`,
},
{
desc: "float64 negative zero",
write: func(e *text.Encoder) {
e.WriteName("float64")
e.WriteFloat(math.Copysign(0, -1), 64)
},
wantOut: `float64:-0`,
wantOutIndent: `float64: -0`,
},
{
desc: "float32",
write: func(e *text.Encoder) {
e.WriteName("float")
e.WriteFloat(1.02, 32)
},
wantOut: `float:1.02`,
wantOutIndent: `float: 1.02`,
},
{
desc: "float32 max value",
write: func(e *text.Encoder) {
e.WriteName("float32")
e.WriteFloat(math.MaxFloat32, 32)
},
wantOut: `float32:3.4028235e+38`,
wantOutIndent: `float32: 3.4028235e+38`,
},
{
desc: "float32 nan",
write: func(e *text.Encoder) {
e.WriteName("float32")
e.WriteFloat(math.NaN(), 32)
},
wantOut: `float32:nan`,
wantOutIndent: `float32: nan`,
},
{
desc: "float32 inf",
write: func(e *text.Encoder) {
e.WriteName("float32")
e.WriteFloat(math.Inf(+1), 32)
},
wantOut: `float32:inf`,
wantOutIndent: `float32: inf`,
},
{
desc: "float32 -inf",
write: func(e *text.Encoder) {
e.WriteName("float32")
e.WriteFloat(math.Inf(-1), 32)
},
wantOut: `float32:-inf`,
wantOutIndent: `float32: -inf`,
},
{
desc: "float32 negative zero",
write: func(e *text.Encoder) {
e.WriteName("float32")
e.WriteFloat(math.Copysign(0, -1), 32)
},
wantOut: `float32:-0`,
wantOutIndent: `float32: -0`,
},
{
desc: "int64 max value",
write: func(e *text.Encoder) {
e.WriteName("int")
e.WriteInt(math.MaxInt64)
},
wantOut: `int:9223372036854775807`,
wantOutIndent: `int: 9223372036854775807`,
},
{
desc: "int64 min value",
write: func(e *text.Encoder) {
e.WriteName("int")
e.WriteInt(math.MinInt64)
},
wantOut: `int:-9223372036854775808`,
wantOutIndent: `int: -9223372036854775808`,
},
{
desc: "uint",
write: func(e *text.Encoder) {
e.WriteName("uint")
e.WriteUint(math.MaxUint64)
},
wantOut: `uint:18446744073709551615`,
wantOutIndent: `uint: 18446744073709551615`,
},
{
desc: "empty message field",
write: func(e *text.Encoder) {
e.WriteName("m")
e.StartMessage()
e.EndMessage()
},
wantOut: `m:{}`,
wantOutIndent: `m: {}`,
},
{
desc: "multiple fields",
write: func(e *text.Encoder) {
e.WriteName("bool")
e.WriteBool(true)
e.WriteName("str")
e.WriteString("hello")
e.WriteName("str")
e.WriteString("world")
e.WriteName("m")
e.StartMessage()
e.EndMessage()
e.WriteName("[int]")
e.WriteInt(49)
e.WriteName("float64")
e.WriteFloat(1.00023e4, 64)
e.WriteName("101")
e.WriteString("unknown")
},
wantOut: `bool:true str:"hello" str:"world" m:{} [int]:49 float64:10002.3 101:"unknown"`,
wantOutIndent: `bool: true
str: "hello"
str: "world"
m: {}
[int]: 49
float64: 10002.3
101: "unknown"`,
},
{
desc: "populated message fields",
write: func(e *text.Encoder) {
e.WriteName("m1")
e.StartMessage()
{
e.WriteName("str")
e.WriteString("hello")
}
e.EndMessage()
e.WriteName("bool")
e.WriteBool(true)
e.WriteName("m2")
e.StartMessage()
{
e.WriteName("str")
e.WriteString("world")
e.WriteName("m2-1")
e.StartMessage()
e.EndMessage()
e.WriteName("m2-2")
e.StartMessage()
{
e.WriteName("[int]")
e.WriteInt(49)
}
e.EndMessage()
e.WriteName("float64")
e.WriteFloat(1.00023e4, 64)
}
e.EndMessage()
e.WriteName("101")
e.WriteString("unknown")
},
wantOut: `m1:{str:"hello"} bool:true m2:{str:"world" m2-1:{} m2-2:{[int]:49} float64:10002.3} 101:"unknown"`,
wantOutIndent: `m1: {
str: "hello"
}
bool: true
m2: {
str: "world"
m2-1: {}
m2-2: {
[int]: 49
}
float64: 10002.3
}
101: "unknown"`,
},
}
for _, tc := range tests {
t.Run(tc.desc, func(t *testing.T) {
runEncoderTest(t, tc, [2]byte{})
// Test using the angle brackets.
// Testcases should not contain characters '{' and '}'.
tc.wantOut = replaceDelims(tc.wantOut)
tc.wantOutIndent = replaceDelims(tc.wantOutIndent)
runEncoderTest(t, tc, [2]byte{'<', '>'})
})
}
}
type encoderTestCase struct {
desc string
write func(*text.Encoder)
wantOut string
wantOutIndent string
}
func runEncoderTest(t *testing.T, tc encoderTestCase, delims [2]byte) {
t.Helper()
if tc.wantOut != "" {
enc, err := text.NewEncoder("", delims, false)
if err != nil {
t.Fatalf("NewEncoder returned error: %v", err)
}
tc.write(enc)
got := string(enc.Bytes())
if got != tc.wantOut {
t.Errorf("(compact)\n<got>\n%v\n<want>\n%v\n", got, tc.wantOut)
}
}
if tc.wantOutIndent != "" {
enc, err := text.NewEncoder("\t", delims, false)
if err != nil {
t.Fatalf("NewEncoder returned error: %v", err)
}
tc.write(enc)
got, want := string(enc.Bytes()), tc.wantOutIndent
if got != want {
t.Errorf("(multi-line)\n<got>\n%v\n<want>\n%v\n<diff -want +got>\n%v\n",
got, want, cmp.Diff(want, got))
}
}
}
func replaceDelims(s string) string {
s = strings.Replace(s, "{", "<", -1)
return strings.Replace(s, "}", ">", -1)
}
// Test for UTF-8 and ASCII outputs.
func TestEncodeStrings(t *testing.T) {
tests := []struct {
in string
wantOut string
wantOutASCII string
}{
{
in: `"`,
wantOut: `"\""`,
},
{
in: `'`,
wantOut: `"'"`,
},
{
in: "hello\u1234world",
wantOut: "\"hello\u1234world\"",
wantOutASCII: `"hello\u1234world"`,
},
{
// String that has as few escaped characters as possible.
in: func() string {
var b []byte
for i := 0; i < utf8.RuneSelf; i++ {
switch i {
case 0, '\\', '\n', '\'': // these must be escaped, so ignore them
default:
b = append(b, byte(i))
}
}
return string(b)
}(),
wantOut: `"\x01\x02\x03\x04\x05\x06\x07\x08\t\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_` + "`abcdefghijklmnopqrstuvwxyz{|}~\x7f\"",
wantOutASCII: `"\x01\x02\x03\x04\x05\x06\x07\x08\t\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_` + "`abcdefghijklmnopqrstuvwxyz{|}~\x7f\"",
},
{
// Valid UTF-8 wire encoding of the RuneError rune.
in: string(utf8.RuneError),
wantOut: `"` + string(utf8.RuneError) + `"`,
wantOutASCII: `"\ufffd"`,
},
{
in: "\"'\\?\a\b\n\r\t\v\f\x01\nS\n\xab\x12\uab8f\U0010ffff",
wantOut: `"\"'\\?\x07\x08\n\r\t\x0b\x0c\x01\nS\n\xab\x12` + "\uab8f\U0010ffff" + `"`,
wantOutASCII: `"\"'\\?\x07\x08\n\r\t\x0b\x0c\x01\nS\n\xab\x12\uab8f\U0010ffff"`,
},
{
in: "\001x",
wantOut: `"\x01x"`,
wantOutASCII: `"\x01x"`,
},
{
in: "\012x",
wantOut: `"\nx"`,
wantOutASCII: `"\nx"`,
},
{
in: "\123x",
wantOut: `"Sx"`,
wantOutASCII: `"Sx"`,
},
{
in: "\1234x",
wantOut: `"S4x"`,
wantOutASCII: `"S4x"`,
},
{
in: "\001",
wantOut: `"\x01"`,
wantOutASCII: `"\x01"`,
},
{
in: "\012",
wantOut: `"\n"`,
wantOutASCII: `"\n"`,
},
{
in: "\123",
wantOut: `"S"`,
wantOutASCII: `"S"`,
},
{
in: "\1234",
wantOut: `"S4"`,
wantOutASCII: `"S4"`,
},
{
in: "\377",
wantOut: `"\xff"`,
wantOutASCII: `"\xff"`,
},
{
in: "\x0fx",
wantOut: `"\x0fx"`,
wantOutASCII: `"\x0fx"`,
},
{
in: "\xffx",
wantOut: `"\xffx"`,
wantOutASCII: `"\xffx"`,
},
{
in: "\xfffx",
wantOut: `"\xfffx"`,
wantOutASCII: `"\xfffx"`,
},
{
in: "\x0f",
wantOut: `"\x0f"`,
wantOutASCII: `"\x0f"`,
},
{
in: "\xff",
wantOut: `"\xff"`,
wantOutASCII: `"\xff"`,
},
{
in: "\xfff",
wantOut: `"\xfff"`,
wantOutASCII: `"\xfff"`,
},
}
for _, tc := range tests {
t.Run("", func(t *testing.T) {
if tc.wantOut != "" {
runEncodeStringsTest(t, tc.in, tc.wantOut, false)
}
if tc.wantOutASCII != "" {
runEncodeStringsTest(t, tc.in, tc.wantOutASCII, true)
}
})
}
}
func runEncodeStringsTest(t *testing.T, in string, want string, outputASCII bool) {
t.Helper()
charType := "UTF-8"
if outputASCII {
charType = "ASCII"
}
enc, err := text.NewEncoder("", [2]byte{}, outputASCII)
if err != nil {
t.Fatalf("[%s] NewEncoder returned error: %v", charType, err)
}
enc.WriteString(in)
got := string(enc.Bytes())
if got != want {
t.Errorf("[%s] WriteString(%q)\n<got>\n%v\n<want>\n%v\n", charType, in, got, want)
}
}
func TestReset(t *testing.T) {
enc, err := text.NewEncoder("\t", [2]byte{}, false)
if err != nil {
t.Fatalf("NewEncoder returned error: %v", err)
}
enc.WriteName("foo")
pos := enc.Snapshot()
// Attempt to write a message value.
enc.StartMessage()
enc.WriteName("bar")
enc.WriteUint(10)
// Reset the value and decided to write a string value instead.
enc.Reset(pos)
enc.WriteString("0123456789")
got := string(enc.Bytes())
want := `foo: "0123456789"`
if got != want {
t.Errorf("Reset did not restore given position:\n<got>\n%v\n<want>\n%v\n", got, want)
}
}

View File

@ -1,337 +0,0 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package text
import (
"bytes"
"io"
"math"
"strconv"
"google.golang.org/protobuf/internal/errors"
)
// marshalNumber encodes v as either a Bool, Int, Uint, Float32, or Float64.
func (p *encoder) marshalNumber(v Value) error {
var err error
p.out, err = appendNumber(p.out, v)
return err
}
func appendNumber(out []byte, v Value) ([]byte, error) {
if len(v.raw) > 0 {
switch v.Type() {
case Bool, Int, Uint, Float32, Float64:
return append(out, v.raw...), nil
}
}
switch v.Type() {
case Bool:
if b, _ := v.Bool(); b {
return append(out, "true"...), nil
} else {
return append(out, "false"...), nil
}
case Int:
return strconv.AppendInt(out, int64(v.num), 10), nil
case Uint:
return strconv.AppendUint(out, uint64(v.num), 10), nil
case Float32:
return appendFloat(out, v, 32)
case Float64:
return appendFloat(out, v, 64)
default:
return nil, errors.New("invalid type %v, expected bool or number", v.Type())
}
}
func appendFloat(out []byte, v Value, bitSize int) ([]byte, error) {
switch n := math.Float64frombits(v.num); {
case math.IsNaN(n):
return append(out, "nan"...), nil
case math.IsInf(n, +1):
return append(out, "inf"...), nil
case math.IsInf(n, -1):
return append(out, "-inf"...), nil
default:
return strconv.AppendFloat(out, n, 'g', -1, bitSize), nil
}
}
// These regular expressions were derived by reverse engineering the C++ code
// in tokenizer.cc and text_format.cc.
var (
literals = map[string]interface{}{
// These exact literals are the ones supported in C++.
// In C++, a 1-bit unsigned integers is also allowed to represent
// a boolean. This is handled in Value.Bool.
"t": true,
"true": true,
"True": true,
"f": false,
"false": false,
"False": false,
// C++ permits "-nan" and the case-insensitive variants of these.
// However, Go continues to be case-sensitive.
"nan": math.NaN(),
"inf": math.Inf(+1),
"-inf": math.Inf(-1),
}
)
// unmarshalNumber decodes a Bool, Int, Uint, or Float64 from the input.
func (p *decoder) unmarshalNumber() (Value, error) {
v, n, err := consumeNumber(p.in)
p.consume(n)
return v, err
}
func consumeNumber(in []byte) (Value, int, error) {
if len(in) == 0 {
return Value{}, 0, io.ErrUnexpectedEOF
}
if v, n := matchLiteral(in); n > 0 {
return rawValueOf(v, in[:n]), n, nil
}
num, ok := parseNumber(in)
if !ok {
return Value{}, 0, newSyntaxError("invalid %q as number or bool", errRegexp.Find(in))
}
if num.typ == numFloat {
f, err := strconv.ParseFloat(string(num.value), 64)
if err != nil {
return Value{}, 0, err
}
return rawValueOf(f, in[:num.size]), num.size, nil
}
if num.neg {
v, err := strconv.ParseInt(string(num.value), 0, 64)
if err != nil {
return Value{}, 0, err
}
return rawValueOf(v, num.value), num.size, nil
}
v, err := strconv.ParseUint(string(num.value), 0, 64)
if err != nil {
return Value{}, 0, err
}
return rawValueOf(v, num.value), num.size, nil
}
func matchLiteral(in []byte) (interface{}, int) {
switch in[0] {
case 't', 'T':
rest := in[1:]
if len(rest) == 0 || isDelim(rest[0]) {
return true, 1
}
if n := matchStringWithDelim("rue", rest); n > 0 {
return true, 4
}
case 'f', 'F':
rest := in[1:]
if len(rest) == 0 || isDelim(rest[0]) {
return false, 1
}
if n := matchStringWithDelim("alse", rest); n > 0 {
return false, 5
}
case 'n':
if n := matchStringWithDelim("nan", in); n > 0 {
return math.NaN(), 3
}
case 'i':
if n := matchStringWithDelim("inf", in); n > 0 {
return math.Inf(1), 3
}
case '-':
if n := matchStringWithDelim("-inf", in); n > 0 {
return math.Inf(-1), 4
}
}
return nil, 0
}
func matchStringWithDelim(s string, b []byte) int {
if !bytes.HasPrefix(b, []byte(s)) {
return 0
}
n := len(s)
if n < len(b) && !isDelim(b[n]) {
return 0
}
return n
}
type numType uint8
const (
numDec numType = (1 << iota) / 2
numHex
numOct
numFloat
)
// number is the result of parsing out a valid number from parseNumber. It
// contains data for doing float or integer conversion via the strconv package.
type number struct {
typ numType
neg bool
// Size of input taken up by the number. This may not be the same as
// len(number.value).
size int
// Bytes for doing strconv.Parse{Float,Int,Uint} conversion.
value []byte
}
// parseNumber constructs a number object from given input. It allows for the
// following patterns:
// integer: ^-?([1-9][0-9]*|0[xX][0-9a-fA-F]+|0[0-7]*)
// float: ^-?((0|[1-9][0-9]*)?([.][0-9]*)?([eE][+-]?[0-9]+)?[fF]?)
func parseNumber(input []byte) (number, bool) {
var size int
var neg bool
typ := numDec
s := input
if len(s) == 0 {
return number{}, false
}
// Optional -
if s[0] == '-' {
neg = true
s = s[1:]
size++
if len(s) == 0 {
return number{}, false
}
}
// C++ allows for whitespace and comments in between the negative sign and
// the rest of the number. This logic currently does not but is consistent
// with v1.
switch {
case s[0] == '0':
if len(s) > 1 {
switch {
case s[1] == 'x' || s[1] == 'X':
// Parse as hex number.
typ = numHex
n := 2
s = s[2:]
for len(s) > 0 && (('0' <= s[0] && s[0] <= '9') ||
('a' <= s[0] && s[0] <= 'f') ||
('A' <= s[0] && s[0] <= 'F')) {
s = s[1:]
n++
}
if n == 2 {
return number{}, false
}
size += n
case '0' <= s[1] && s[1] <= '7':
// Parse as octal number.
typ = numOct
n := 2
s = s[2:]
for len(s) > 0 && '0' <= s[0] && s[0] <= '7' {
s = s[1:]
n++
}
size += n
}
if typ&(numHex|numOct) > 0 {
if len(s) > 0 && !isDelim(s[0]) {
return number{}, false
}
return number{
typ: typ,
size: size,
neg: neg,
value: input[:size],
}, true
}
}
s = s[1:]
size++
case '1' <= s[0] && s[0] <= '9':
n := 1
s = s[1:]
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
s = s[1:]
n++
}
size += n
case s[0] == '.':
// Handled below.
default:
return number{}, false
}
// . followed by 0 or more digits.
if len(s) > 0 && s[0] == '.' {
typ = numFloat
n := 1
s = s[1:]
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
s = s[1:]
n++
}
size += n
}
// e or E followed by an optional - or + and 1 or more digits.
if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') {
typ = numFloat
s = s[1:]
n := 1
if s[0] == '+' || s[0] == '-' {
s = s[1:]
n++
if len(s) == 0 {
return number{}, false
}
}
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
s = s[1:]
n++
}
size += n
}
// At this point, input[:size] contains a valid number that can be converted
// via strconv.Parse{Float,Int,Uint}.
value := input[:size]
// Optional suffix f or F for floats.
if len(s) > 0 && (s[0] == 'f' || s[0] == 'F') {
typ = numFloat
s = s[1:]
size++
}
// Check that next byte is a delimiter or it is at the end.
if len(s) > 0 && !isDelim(s[0]) {
return number{}, false
}
return number{
typ: typ,
size: size,
neg: neg,
value: value,
}, true
}

View File

@ -1,229 +0,0 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package text
import (
"bytes"
"io"
"math"
"math/bits"
"strconv"
"strings"
"unicode"
"unicode/utf16"
"unicode/utf8"
"google.golang.org/protobuf/internal/errors"
"google.golang.org/protobuf/internal/strs"
)
func (p *encoder) marshalString(v Value) error {
var err error
p.out, err = appendString(p.out, v, p.outputASCII)
return err
}
func appendString(out []byte, v Value, outputASCII bool) ([]byte, error) {
if v.Type() != String {
return nil, errors.New("invalid type %v, expected string", v.Type())
}
if len(v.raw) > 0 {
return append(out, v.raw...), nil
}
in := v.String()
out = append(out, '"')
i := indexNeedEscapeInString(in)
in, out = in[i:], append(out, in[:i]...)
for len(in) > 0 {
switch r, n := utf8.DecodeRuneInString(in); {
case r == utf8.RuneError && n == 1:
// We do not report invalid UTF-8 because strings in the text format
// are used to represent both the proto string and bytes type.
r = rune(in[0])
fallthrough
case r < ' ' || r == '"' || r == '\\':
out = append(out, '\\')
switch r {
case '"', '\\':
out = append(out, byte(r))
case '\n':
out = append(out, 'n')
case '\r':
out = append(out, 'r')
case '\t':
out = append(out, 't')
default:
out = append(out, 'x')
out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
out = strconv.AppendUint(out, uint64(r), 16)
}
in = in[n:]
case outputASCII && r >= utf8.RuneSelf:
out = append(out, '\\')
if r <= math.MaxUint16 {
out = append(out, 'u')
out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
out = strconv.AppendUint(out, uint64(r), 16)
} else {
out = append(out, 'U')
out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
out = strconv.AppendUint(out, uint64(r), 16)
}
in = in[n:]
default:
i := indexNeedEscapeInString(in[n:])
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
out = append(out, '"')
return out, nil
}
func (p *decoder) unmarshalString() (Value, error) {
v, n, err := consumeString(p.in)
p.consume(n)
return v, err
}
func consumeString(in []byte) (Value, int, error) {
in0 := in
if len(in) == 0 {
return Value{}, 0, io.ErrUnexpectedEOF
}
quote := in[0]
if in[0] != '"' && in[0] != '\'' {
return Value{}, 0, newSyntaxError("invalid character %q at start of string", in[0])
}
in = in[1:]
i := indexNeedEscapeInBytes(in)
in, out := in[i:], in[:i:i] // set cap to prevent mutations
for len(in) > 0 {
switch r, n := utf8.DecodeRune(in); {
case r == utf8.RuneError && n == 1:
return Value{}, 0, newSyntaxError("invalid UTF-8 detected")
case r == 0 || r == '\n':
return Value{}, 0, newSyntaxError("invalid character %q in string", r)
case r == rune(quote):
in = in[1:]
n := len(in0) - len(in)
v := rawValueOf(string(out), in0[:n:n])
return v, n, nil
case r == '\\':
if len(in) < 2 {
return Value{}, 0, io.ErrUnexpectedEOF
}
switch r := in[1]; r {
case '"', '\'', '\\', '?':
in, out = in[2:], append(out, r)
case 'a':
in, out = in[2:], append(out, '\a')
case 'b':
in, out = in[2:], append(out, '\b')
case 'n':
in, out = in[2:], append(out, '\n')
case 'r':
in, out = in[2:], append(out, '\r')
case 't':
in, out = in[2:], append(out, '\t')
case 'v':
in, out = in[2:], append(out, '\v')
case 'f':
in, out = in[2:], append(out, '\f')
case '0', '1', '2', '3', '4', '5', '6', '7':
// One, two, or three octal characters.
n := len(in[1:]) - len(bytes.TrimLeft(in[1:], "01234567"))
if n > 3 {
n = 3
}
v, err := strconv.ParseUint(string(in[1:1+n]), 8, 8)
if err != nil {
return Value{}, 0, newSyntaxError("invalid octal escape code %q in string", in[:1+n])
}
in, out = in[1+n:], append(out, byte(v))
case 'x':
// One or two hexadecimal characters.
n := len(in[2:]) - len(bytes.TrimLeft(in[2:], "0123456789abcdefABCDEF"))
if n > 2 {
n = 2
}
v, err := strconv.ParseUint(string(in[2:2+n]), 16, 8)
if err != nil {
return Value{}, 0, newSyntaxError("invalid hex escape code %q in string", in[:2+n])
}
in, out = in[2+n:], append(out, byte(v))
case 'u', 'U':
// Four or eight hexadecimal characters
n := 6
if r == 'U' {
n = 10
}
if len(in) < n {
return Value{}, 0, io.ErrUnexpectedEOF
}
v, err := strconv.ParseUint(string(in[2:n]), 16, 32)
if utf8.MaxRune < v || err != nil {
return Value{}, 0, newSyntaxError("invalid Unicode escape code %q in string", in[:n])
}
in = in[n:]
r := rune(v)
if utf16.IsSurrogate(r) {
if len(in) < 6 {
return Value{}, 0, io.ErrUnexpectedEOF
}
v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
r = utf16.DecodeRune(r, rune(v))
if in[0] != '\\' || in[1] != 'u' || r == unicode.ReplacementChar || err != nil {
return Value{}, 0, newSyntaxError("invalid Unicode escape code %q in string", in[:6])
}
in = in[6:]
}
out = append(out, string(r)...)
default:
return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:2])
}
default:
i := indexNeedEscapeInBytes(in[n:])
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
return Value{}, 0, io.ErrUnexpectedEOF
}
// unmarshalStrings unmarshals multiple strings.
// This differs from unmarshalString since the text format allows
// multiple back-to-back string literals where they are semantically treated
// as a single large string with all values concatenated.
//
// E.g., `"foo" "bar" "baz"` => ValueOf("foobarbaz")
func (p *decoder) unmarshalStrings() (Value, error) {
// Note that the ending quote is sufficient to unambiguously mark the end
// of a string. Thus, the text grammar does not require intervening
// whitespace or control characters in-between strings.
// Thus, the following is valid:
// `"foo"'bar'"baz"` => ValueOf("foobarbaz")
b := p.in
var ss []string
for len(p.in) > 0 && (p.in[0] == '"' || p.in[0] == '\'') {
v, err := p.unmarshalString()
if err != nil {
return Value{}, err
}
ss = append(ss, v.String())
}
b = b[:len(b)-len(p.in)]
return rawValueOf(strings.Join(ss, ""), b[:len(b):len(b)]), nil
}
// indexNeedEscapeInString returns the index of the character that needs
// escaping. If no characters need escaping, this returns the input length.
func indexNeedEscapeInString(s string) int {
for i := 0; i < len(s); i++ {
if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= utf8.RuneSelf {
return i
}
}
return len(s)
}
func indexNeedEscapeInBytes(b []byte) int { return indexNeedEscapeInString(strs.UnsafeString(b)) }

View File

@ -1,863 +0,0 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package text
import (
"fmt"
"math"
"strings"
"testing"
"unicode/utf8"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"google.golang.org/protobuf/internal/detrand"
"google.golang.org/protobuf/internal/flags"
"google.golang.org/protobuf/reflect/protoreflect"
)
// Disable detrand to enable direct comparisons on outputs.
func init() { detrand.Disable() }
var S = fmt.Sprintf
var V = ValueOf
var ID = func(n protoreflect.Name) Value { return V(n) }
type Lst = []Value
type Msg = [][2]Value
func Test(t *testing.T) {
const space = " \n\r\t"
tests := []struct {
in string
wantVal Value
wantOut string
wantOutBracket string
wantOutASCII string
wantOutIndent string
wantErr string
}{{
in: "",
wantVal: V(Msg{}),
wantOutIndent: "",
}, {
in: S("%s# hello%s", space, space),
wantVal: V(Msg{}),
}, {
in: S("%s# hello\rfoo:bar", space),
wantVal: V(Msg{}),
}, {
// Comments only extend until the newline.
in: S("%s# hello\nfoo:bar", space),
wantVal: V(Msg{{ID("foo"), ID("bar")}}),
wantOut: "foo:bar",
wantOutIndent: "foo: bar\n",
}, {
// NUL is an invalid whitespace since C++ uses C-strings.
in: "\x00",
wantErr: `invalid "\x00" as identifier`,
}, {
in: "foo:0",
wantVal: V(Msg{{ID("foo"), V(uint32(0))}}),
wantOut: "foo:0",
}, {
in: S("%sfoo%s:0", space, space),
wantVal: V(Msg{{ID("foo"), V(uint32(0))}}),
}, {
in: "foo bar:0",
wantErr: `expected ':' after message key`,
}, {
in: "[foo]:0",
wantVal: V(Msg{{V("foo"), V(uint32(0))}}),
wantOut: "[foo]:0",
wantOutIndent: "[foo]: 0\n",
}, {
in: S("%s[%sfoo%s]%s:0", space, space, space, space),
wantVal: V(Msg{{V("foo"), V(uint32(0))}}),
}, {
in: "[proto.package.name]:0",
wantVal: V(Msg{{V("proto.package.name"), V(uint32(0))}}),
wantOut: "[proto.package.name]:0",
wantOutIndent: "[proto.package.name]: 0\n",
}, {
in: S("%s[%sproto.package.name%s]%s:0", space, space, space, space),
wantVal: V(Msg{{V("proto.package.name"), V(uint32(0))}}),
}, {
in: "['sub.domain.com\x2fpath\x2fto\x2fproto.package.name']:0",
wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
wantOut: "[sub.domain.com/path/to/proto.package.name]:0",
wantOutIndent: "[sub.domain.com/path/to/proto.package.name]: 0\n",
}, {
in: "[\"sub.domain.com\x2fpath\x2fto\x2fproto.package.name\"]:0",
wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
}, {
in: S("%s[%s'sub.domain.com\x2fpath\x2fto\x2fproto.package.name'%s]%s:0", space, space, space, space),
wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
}, {
in: S("%s[%s\"sub.domain.com\x2fpath\x2fto\x2fproto.package.name\"%s]%s:0", space, space, space, space),
wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
}, {
in: `['http://example.com/path/to/proto.package.name']:0`,
wantVal: V(Msg{{V("http://example.com/path/to/proto.package.name"), V(uint32(0))}}),
wantOut: `["http://example.com/path/to/proto.package.name"]:0`,
wantOutIndent: `["http://example.com/path/to/proto.package.name"]: 0` + "\n",
}, {
in: "[proto.package.name:0",
wantErr: `invalid character ':', expected ']' at end of extension name`,
}, {
in: "[proto.package name]:0",
wantErr: `invalid character 'n', expected ']' at end of extension name`,
}, {
in: `["proto.package" "name"]:0`,
wantErr: `invalid character '"', expected ']' at end of extension name`,
}, {
in: `["\z"]`,
wantErr: `invalid escape code "\\z" in string`,
}, {
in: "[$]",
wantErr: `invalid "$" as identifier`,
}, {
in: `[proto.package.]:0`,
wantErr: `invalid "proto.package." as identifier`,
}, {
in: `[/proto.package]:0`,
wantErr: `invalid "/proto.package" as identifier`,
}, {
in: `[proto.package/]:0`,
wantErr: `invalid "proto.package/" as identifier`,
}, {
// This parses fine, but should result in a error later since no
// type name in proto will ever be just a number.
in: "[20]:0",
wantVal: V(Msg{{V("20"), V(uint32(0))}}),
wantOut: "[20]:0",
}, {
in: "20:0",
wantVal: V(Msg{{V(uint32(20)), V(uint32(0))}}),
wantOut: "20:0",
}, {
in: "0x20:0",
wantVal: V(Msg{{V(uint32(0x20)), V(uint32(0))}}),
wantOut: "32:0",
}, {
in: "020:0",
wantVal: V(Msg{{V(uint32(020)), V(uint32(0))}}),
wantOut: "16:0",
}, {
in: "-20:0",
wantErr: `invalid "-20" as identifier`,
}, {
in: `foo:true bar:"s" baz:{} qux:[] wib:id`,
wantVal: V(Msg{
{ID("foo"), V(true)},
{ID("bar"), V("s")},
{ID("baz"), V(Msg{})},
{ID("qux"), V(Lst{})},
{ID("wib"), ID("id")},
}),
wantOut: `foo:true bar:"s" baz:{} qux:[] wib:id`,
wantOutIndent: "foo: true\nbar: \"s\"\nbaz: {}\nqux: []\nwib: id\n",
}, {
in: S(`%sfoo%s:%strue%s %sbar%s:%s"s"%s %sbaz%s:%s<>%s %squx%s:%s[]%s %swib%s:%sid%s`,
space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space),
wantVal: V(Msg{
{ID("foo"), V(true)},
{ID("bar"), V("s")},
{ID("baz"), V(Msg{})},
{ID("qux"), V(Lst{})},
{ID("wib"), ID("id")},
}),
}, {
in: `foo:true;`,
wantVal: V(Msg{{ID("foo"), V(true)}}),
wantOut: "foo:true",
wantOutIndent: "foo: true\n",
}, {
in: `foo:true,`,
wantVal: V(Msg{{ID("foo"), V(true)}}),
}, {
in: `foo:bar;,`,
wantErr: `invalid "," as identifier`,
}, {
in: `foo:bar,;`,
wantErr: `invalid ";" as identifier`,
}, {
in: `footrue`,
wantErr: `unexpected EOF`,
}, {
in: `foo true`,
wantErr: `expected ':' after message key`,
}, {
in: `foo"s"`,
wantErr: `expected ':' after message key`,
}, {
in: `foo "s"`,
wantErr: `expected ':' after message key`,
}, {
in: `foo{}`,
wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
wantOut: "foo:{}",
wantOutBracket: "foo:<>",
wantOutIndent: "foo: {}\n",
}, {
in: `foo {}`,
wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
}, {
in: `foo<>`,
wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
}, {
in: `foo <>`,
wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
}, {
in: `foo[]`,
wantErr: `expected ':' after message key`,
}, {
in: `foo []`,
wantErr: `expected ':' after message key`,
}, {
in: `foo:truebar:true`,
wantErr: `invalid ":" as identifier`,
}, {
in: `foo:"s"bar:true`,
wantVal: V(Msg{{ID("foo"), V("s")}, {ID("bar"), V(true)}}),
wantOut: `foo:"s" bar:true`,
wantOutIndent: "foo: \"s\"\nbar: true\n",
}, {
in: `foo:0bar:true`,
wantErr: `invalid "0bar" as number or bool`,
}, {
in: `foo:{}bar:true`,
wantVal: V(Msg{{ID("foo"), V(Msg{})}, {ID("bar"), V(true)}}),
wantOut: "foo:{} bar:true",
wantOutBracket: "foo:<> bar:true",
wantOutIndent: "foo: {}\nbar: true\n",
}, {
in: `foo:[]bar:true`,
wantVal: V(Msg{{ID("foo"), V(Lst{})}, {ID("bar"), V(true)}}),
}, {
in: `foo{bar:true}`,
wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
wantOut: "foo:{bar:true}",
wantOutBracket: "foo:<bar:true>",
wantOutIndent: "foo: {\n\tbar: true\n}\n",
}, {
in: `foo<bar:true>`,
wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
}, {
in: `foo{bar:true,}`,
wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
}, {
in: `foo{bar:true;}`,
wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
}, {
in: `foo{`,
wantErr: `unexpected EOF`,
}, {
in: `foo{ `,
wantErr: `unexpected EOF`,
}, {
in: `foo{[`,
wantErr: `unexpected EOF`,
}, {
in: `foo{[ `,
wantErr: `unexpected EOF`,
}, {
in: `foo{bar:true,;}`,
wantErr: `invalid ";" as identifier`,
}, {
in: `foo{bar:true;,}`,
wantErr: `invalid "," as identifier`,
}, {
in: `foo<bar:{}>`,
wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(Msg{})}})}}),
wantOut: "foo:{bar:{}}",
wantOutBracket: "foo:<bar:<>>",
wantOutIndent: "foo: {\n\tbar: {}\n}\n",
}, {
in: `foo<bar:{>`,
wantErr: `invalid character '>', expected '}' at end of message`,
}, {
in: `foo<bar:{}`,
wantErr: `unexpected EOF`,
}, {
in: `arr:[]`,
wantVal: V(Msg{{ID("arr"), V(Lst{})}}),
wantOut: "arr:[]",
wantOutBracket: "arr:[]",
wantOutIndent: "arr: []\n",
}, {
in: `arr:[,]`,
wantErr: `invalid "," as number or bool`,
}, {
in: `arr:[0 0]`,
wantErr: `invalid character '0', expected ']' at end of list`,
}, {
in: `arr:["foo" "bar"]`,
wantVal: V(Msg{{ID("arr"), V(Lst{V("foobar")})}}),
wantOut: `arr:["foobar"]`,
wantOutBracket: `arr:["foobar"]`,
wantOutIndent: "arr: [\n\t\"foobar\"\n]\n",
}, {
in: `arr:[0,]`,
wantErr: `invalid "]" as number or bool`,
}, {
in: `arr:[true,0,"",id,[],{}]`,
wantVal: V(Msg{{ID("arr"), V(Lst{
V(true), V(uint32(0)), V(""), ID("id"), V(Lst{}), V(Msg{}),
})}}),
wantOut: `arr:[true,0,"",id,[],{}]`,
wantOutBracket: `arr:[true,0,"",id,[],<>]`,
wantOutIndent: "arr: [\n\ttrue,\n\t0,\n\t\"\",\n\tid,\n\t[],\n\t{}\n]\n",
}, {
in: S(`arr:[%strue%s,%s0%s,%s""%s,%sid%s,%s[]%s,%s{}%s]`,
space, space, space, space, space, space, space, space, space, space, space, space),
wantVal: V(Msg{{ID("arr"), V(Lst{
V(true), V(uint32(0)), V(""), ID("id"), V(Lst{}), V(Msg{}),
})}}),
}, {
in: `arr:[`,
wantErr: `unexpected EOF`,
}, {
in: `{`,
wantErr: `invalid "{" as identifier`,
}, {
in: `<`,
wantErr: `invalid "<" as identifier`,
}, {
in: `[`,
wantErr: "unexpected EOF",
}, {
in: `}`,
wantErr: "1 bytes of unconsumed input",
}, {
in: `>`,
wantErr: "1 bytes of unconsumed input",
}, {
in: `]`,
wantErr: `invalid "]" as identifier`,
}, {
in: `str: "'"`,
wantVal: V(Msg{{ID("str"), V(`'`)}}),
wantOut: `str:"'"`,
}, {
in: `str: '"'`,
wantVal: V(Msg{{ID("str"), V(`"`)}}),
wantOut: `str:"\""`,
}, {
// String that has as few escaped characters as possible.
in: `str: ` + func() string {
var b []byte
for i := 0; i < utf8.RuneSelf; i++ {
switch i {
case 0, '\\', '\n', '\'': // these must be escaped, so ignore them
default:
b = append(b, byte(i))
}
}
return "'" + string(b) + "'"
}(),
wantVal: V(Msg{{ID("str"), V("\x01\x02\x03\x04\x05\x06\a\b\t\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f")}}),
wantOut: `str:"\x01\x02\x03\x04\x05\x06\x07\x08\t\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_` + "`abcdefghijklmnopqrstuvwxyz{|}~\x7f\"",
wantOutASCII: `str:"\x01\x02\x03\x04\x05\x06\x07\x08\t\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_` + "`abcdefghijklmnopqrstuvwxyz{|}~\x7f\"",
}, {
in: "str: '\xde\xad\xbe\xef'",
wantErr: "invalid UTF-8 detected",
}, {
// Valid UTF-8 wire encoding, but sub-optimal encoding.
in: "str: '\xc0\x80'",
wantErr: "invalid UTF-8 detected",
}, {
// Valid UTF-8 wire encoding, but invalid rune (surrogate pair).
in: "str: '\xed\xa0\x80'",
wantErr: "invalid UTF-8 detected",
}, {
// Valid UTF-8 wire encoding, but invalid rune (above max rune).
in: "str: '\xf7\xbf\xbf\xbf'",
wantErr: "invalid UTF-8 detected",
}, {
// Valid UTF-8 wire encoding of the RuneError rune.
in: "str: '\xef\xbf\xbd'",
wantVal: V(Msg{{ID("str"), V(string(utf8.RuneError))}}),
wantOut: `str:"` + string(utf8.RuneError) + `"`,
wantOutASCII: `str:"\ufffd"`,
}, {
in: "str: 'hello\u1234world'",
wantVal: V(Msg{{ID("str"), V("hello\u1234world")}}),
wantOut: "str:\"hello\u1234world\"",
wantOutASCII: `str:"hello\u1234world"`,
}, {
in: `str: '\"\'\\\?\a\b\n\r\t\v\f\1\12\123\xA\xaB\x12\uAb8f\U0010FFFF'`,
wantVal: V(Msg{{ID("str"), V("\"'\\?\a\b\n\r\t\v\f\x01\nS\n\xab\x12\uab8f\U0010ffff")}}),
wantOut: `str:"\"'\\?\x07\x08\n\r\t\x0b\x0c\x01\nS\n\xab\x12` + "\uab8f\U0010ffff" + `"`,
wantOutASCII: `str:"\"'\\?\x07\x08\n\r\t\x0b\x0c\x01\nS\n\xab\x12\uab8f\U0010ffff"`,
}, {
in: `str: '`,
wantErr: `unexpected EOF`,
}, {
in: `str: '\`,
wantErr: `unexpected EOF`,
}, {
in: `str: '\'`,
wantErr: `unexpected EOF`,
}, {
in: `str: '\8'`,
wantErr: `invalid escape code "\\8" in string`,
}, {
in: `str: '\1x'`,
wantVal: V(Msg{{ID("str"), V("\001x")}}),
wantOut: `str:"\x01x"`,
wantOutASCII: `str:"\x01x"`,
}, {
in: `str: '\12x'`,
wantVal: V(Msg{{ID("str"), V("\012x")}}),
wantOut: `str:"\nx"`,
wantOutASCII: `str:"\nx"`,
}, {
in: `str: '\123x'`,
wantVal: V(Msg{{ID("str"), V("\123x")}}),
wantOut: `str:"Sx"`,
wantOutASCII: `str:"Sx"`,
}, {
in: `str: '\1234x'`,
wantVal: V(Msg{{ID("str"), V("\1234x")}}),
wantOut: `str:"S4x"`,
wantOutASCII: `str:"S4x"`,
}, {
in: `str: '\1'`,
wantVal: V(Msg{{ID("str"), V("\001")}}),
wantOut: `str:"\x01"`,
wantOutASCII: `str:"\x01"`,
}, {
in: `str: '\12'`,
wantVal: V(Msg{{ID("str"), V("\012")}}),
wantOut: `str:"\n"`,
wantOutASCII: `str:"\n"`,
}, {
in: `str: '\123'`,
wantVal: V(Msg{{ID("str"), V("\123")}}),
wantOut: `str:"S"`,
wantOutASCII: `str:"S"`,
}, {
in: `str: '\1234'`,
wantVal: V(Msg{{ID("str"), V("\1234")}}),
wantOut: `str:"S4"`,
wantOutASCII: `str:"S4"`,
}, {
in: `str: '\377'`,
wantVal: V(Msg{{ID("str"), V("\377")}}),
wantOut: `str:"\xff"`,
wantOutASCII: `str:"\xff"`,
}, {
// Overflow octal escape.
in: `str: '\400'`,
wantErr: `invalid octal escape code "\\400" in string`,
}, {
in: `str: '\xfx'`,
wantVal: V(Msg{{ID("str"), V("\x0fx")}}),
wantOut: `str:"\x0fx"`,
wantOutASCII: `str:"\x0fx"`,
}, {
in: `str: '\xffx'`,
wantVal: V(Msg{{ID("str"), V("\xffx")}}),
wantOut: `str:"\xffx"`,
wantOutASCII: `str:"\xffx"`,
}, {
in: `str: '\xfffx'`,
wantVal: V(Msg{{ID("str"), V("\xfffx")}}),
wantOut: `str:"\xfffx"`,
wantOutASCII: `str:"\xfffx"`,
}, {
in: `str: '\xf'`,
wantVal: V(Msg{{ID("str"), V("\x0f")}}),
wantOut: `str:"\x0f"`,
wantOutASCII: `str:"\x0f"`,
}, {
in: `str: '\xff'`,
wantVal: V(Msg{{ID("str"), V("\xff")}}),
wantOut: `str:"\xff"`,
wantOutASCII: `str:"\xff"`,
}, {
in: `str: '\xfff'`,
wantVal: V(Msg{{ID("str"), V("\xfff")}}),
wantOut: `str:"\xfff"`,
wantOutASCII: `str:"\xfff"`,
}, {
in: `str: '\xz'`,
wantErr: `invalid hex escape code "\\x" in string`,
}, {
in: `str: '\uPo'`,
wantErr: `unexpected EOF`,
}, {
in: `str: '\uPoo'`,
wantErr: `invalid Unicode escape code "\\uPoo'" in string`,
}, {
in: `str: '\uPoop'`,
wantErr: `invalid Unicode escape code "\\uPoop" in string`,
}, {
// Unmatched surrogate pair.
in: `str: '\uDEAD'`,
wantErr: `unexpected EOF`, // trying to reader other half
}, {
// Surrogate pair with invalid other half.
in: `str: '\uDEAD\u0000'`,
wantErr: `invalid Unicode escape code "\\u0000" in string`,
}, {
// Properly matched surrogate pair.
in: `str: '\uD800\uDEAD'`,
wantVal: V(Msg{{ID("str"), V("𐊭")}}),
wantOut: `str:"𐊭"`,
wantOutASCII: `str:"\U000102ad"`,
}, {
// Overflow on Unicode rune.
in: `str: '\U00110000'`,
wantErr: `invalid Unicode escape code "\\U00110000" in string`,
}, {
in: `str: '\z'`,
wantErr: `invalid escape code "\\z" in string`,
}, {
// Strings cannot have NUL literal since C-style strings forbid them.
in: "str: '\x00'",
wantErr: `invalid character '\x00' in string`,
}, {
// Strings cannot have newline literal. The C++ permits them if an
// option is specified to allow them. In Go, we always forbid them.
in: "str: '\n'",
wantErr: `invalid character '\n' in string`,
}, {
in: "name: \"My name is \"\n\"elsewhere\"",
wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
wantOut: `name:"My name is elsewhere"`,
wantOutASCII: `name:"My name is elsewhere"`,
}, {
in: "name: 'My name is '\n'elsewhere'",
wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
}, {
in: "name: 'My name is '\n\"elsewhere\"",
wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
}, {
in: "name: \"My name is \"\n'elsewhere'",
wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
}, {
in: "name: \"My \"'name '\"is \"\n'elsewhere'",
wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
}, {
in: `crazy:"x'"'\""\''"'z"`,
wantVal: V(Msg{{ID("crazy"), V(`x'""''z`)}}),
}, {
in: `nums: [t,T,true,True,TRUE,f,F,false,False,FALSE]`,
wantVal: V(Msg{{ID("nums"), V(Lst{
V(true),
ID("T"),
V(true),
V(true),
ID("TRUE"),
V(false),
ID("F"),
V(false),
V(false),
ID("FALSE"),
})}}),
wantOut: "nums:[true,T,true,true,TRUE,false,F,false,false,FALSE]",
wantOutIndent: "nums: [\n\ttrue,\n\tT,\n\ttrue,\n\ttrue,\n\tTRUE,\n\tfalse,\n\tF,\n\tfalse,\n\tfalse,\n\tFALSE\n]\n",
}, {
in: `nums: [nan,inf,-inf,NaN,NAN,Inf,INF]`,
wantVal: V(Msg{{ID("nums"), V(Lst{
V(math.NaN()),
V(math.Inf(+1)),
V(math.Inf(-1)),
ID("NaN"),
ID("NAN"),
ID("Inf"),
ID("INF"),
})}}),
wantOut: "nums:[nan,inf,-inf,NaN,NAN,Inf,INF]",
wantOutIndent: "nums: [\n\tnan,\n\tinf,\n\t-inf,\n\tNaN,\n\tNAN,\n\tInf,\n\tINF\n]\n",
}, {
// C++ permits this, but we currently reject this.
in: `num: -nan`,
wantErr: `invalid "-nan" as number or bool`,
}, {
in: `nums: [0,-0,-9876543210,9876543210,0x0,0x0123456789abcdef,-0x0123456789abcdef,01234567,-01234567]`,
wantVal: V(Msg{{ID("nums"), V(Lst{
V(uint32(0)),
V(int32(-0)),
V(int64(-9876543210)),
V(uint64(9876543210)),
V(uint32(0x0)),
V(uint64(0x0123456789abcdef)),
V(int64(-0x0123456789abcdef)),
V(uint64(01234567)),
V(int64(-01234567)),
})}}),
wantOut: "nums:[0,0,-9876543210,9876543210,0,81985529216486895,-81985529216486895,342391,-342391]",
wantOutIndent: "nums: [\n\t0,\n\t0,\n\t-9876543210,\n\t9876543210,\n\t0,\n\t81985529216486895,\n\t-81985529216486895,\n\t342391,\n\t-342391\n]\n",
}, {
in: `nums: [0.,0f,1f,10f,-0f,-1f,-10f,1.0,0.1e-3,1.5e+5,1e10,.0]`,
wantVal: V(Msg{{ID("nums"), V(Lst{
V(0.0),
V(0.0),
V(1.0),
V(10.0),
V(-0.0),
V(-1.0),
V(-10.0),
V(1.0),
V(0.1e-3),
V(1.5e+5),
V(1.0e+10),
V(0.0),
})}}),
wantOut: "nums:[0,0,1,10,0,-1,-10,1,0.0001,150000,1e+10,0]",
wantOutIndent: "nums: [\n\t0,\n\t0,\n\t1,\n\t10,\n\t0,\n\t-1,\n\t-10,\n\t1,\n\t0.0001,\n\t150000,\n\t1e+10,\n\t0\n]\n",
}, {
in: `nums: [0xbeefbeef,0xbeefbeefbeefbeef]`,
wantVal: V(Msg{{ID("nums"), func() Value {
if flags.ProtoLegacy {
return V(Lst{V(int32(-1091584273)), V(int64(-4688318750159552785))})
} else {
return V(Lst{V(uint32(0xbeefbeef)), V(uint64(0xbeefbeefbeefbeef))})
}
}()}}),
}, {
in: `num: +0`,
wantErr: `invalid "+0" as number or bool`,
}, {
in: `num: 01.1234`,
wantErr: `invalid "01.1234" as number or bool`,
}, {
in: `num: 0x`,
wantErr: `invalid "0x" as number or bool`,
}, {
in: `num: 0xX`,
wantErr: `invalid "0xX" as number or bool`,
}, {
in: `num: 0800`,
wantErr: `invalid "0800" as number or bool`,
}, {
in: `num: true.`,
wantErr: `invalid "true." as number or bool`,
}, {
in: `num: .`,
wantErr: `parsing ".": invalid syntax`,
}, {
in: `num: -.`,
wantErr: `parsing "-.": invalid syntax`,
}, {
in: `num: 1e10000`,
wantErr: `parsing "1e10000": value out of range`,
}, {
in: `num: 99999999999999999999`,
wantErr: `parsing "99999999999999999999": value out of range`,
}, {
in: `num: -99999999999999999999`,
wantErr: `parsing "-99999999999999999999": value out of range`,
}, {
in: "x: -",
wantErr: `syntax error (line 1:5)`,
}, {
in: "x:[\"💩\"x",
wantErr: `syntax error (line 1:7)`,
}, {
in: "x:\n\n[\"🔥🔥🔥\"x",
wantErr: `syntax error (line 3:7)`,
}, {
in: "x:[\"👍🏻👍🏿\"x",
wantErr: `syntax error (line 1:10)`, // multi-rune emojis; could be column:8
}, {
in: `
firstName : "John",
lastName : "Smith" ,
isAlive : true,
age : 27,
address { # missing colon is okay for messages
streetAddress : "21 2nd Street" ,
city : "New York" ,
state : "NY" ,
postalCode : "10021-3100" ; # trailing semicolon is okay
},
phoneNumbers : [ {
type : "home" ,
number : "212 555-1234"
} , {
type : "office" ,
number : "646 555-4567"
} , {
type : "mobile" ,
number : "123 456-7890" , # trailing comma is okay
} ],
children : [] ,
spouse : null`,
wantVal: V(Msg{
{ID("firstName"), V("John")},
{ID("lastName"), V("Smith")},
{ID("isAlive"), V(true)},
{ID("age"), V(27.0)},
{ID("address"), V(Msg{
{ID("streetAddress"), V("21 2nd Street")},
{ID("city"), V("New York")},
{ID("state"), V("NY")},
{ID("postalCode"), V("10021-3100")},
})},
{ID("phoneNumbers"), V([]Value{
V(Msg{
{ID("type"), V("home")},
{ID("number"), V("212 555-1234")},
}),
V(Msg{
{ID("type"), V("office")},
{ID("number"), V("646 555-4567")},
}),
V(Msg{
{ID("type"), V("mobile")},
{ID("number"), V("123 456-7890")},
}),
})},
{ID("children"), V([]Value{})},
{ID("spouse"), V(protoreflect.Name("null"))},
}),
wantOut: `firstName:"John" lastName:"Smith" isAlive:true age:27 address:{streetAddress:"21 2nd Street" city:"New York" state:"NY" postalCode:"10021-3100"} phoneNumbers:[{type:"home" number:"212 555-1234"},{type:"office" number:"646 555-4567"},{type:"mobile" number:"123 456-7890"}] children:[] spouse:null`,
wantOutBracket: `firstName:"John" lastName:"Smith" isAlive:true age:27 address:<streetAddress:"21 2nd Street" city:"New York" state:"NY" postalCode:"10021-3100"> phoneNumbers:[<type:"home" number:"212 555-1234">,<type:"office" number:"646 555-4567">,<type:"mobile" number:"123 456-7890">] children:[] spouse:null`,
wantOutIndent: `firstName: "John"
lastName: "Smith"
isAlive: true
age: 27
address: {
streetAddress: "21 2nd Street"
city: "New York"
state: "NY"
postalCode: "10021-3100"
}
phoneNumbers: [
{
type: "home"
number: "212 555-1234"
},
{
type: "office"
number: "646 555-4567"
},
{
type: "mobile"
number: "123 456-7890"
}
]
children: []
spouse: null
`,
}}
opts := cmp.Options{
cmpopts.EquateEmpty(),
// Transform composites (List and Message).
cmp.FilterValues(func(x, y Value) bool {
return (x.Type() == List && y.Type() == List) || (x.Type() == Message && y.Type() == Message)
}, cmp.Transformer("", func(v Value) interface{} {
if v.Type() == List {
return v.List()
} else {
return v.Message()
}
})),
// Compare scalars (Bool, Int, Uint, Float, String, Name).
cmp.FilterValues(func(x, y Value) bool {
return !(x.Type() == List && y.Type() == List) && !(x.Type() == Message && y.Type() == Message)
}, cmp.Comparer(func(x, y Value) bool {
if x.Type() == List || x.Type() == Message || y.Type() == List || y.Type() == Message {
return false
}
// Ensure golden value is always in x variable.
if len(x.raw) > 0 {
x, y = y, x
}
switch x.Type() {
case Bool:
want, _ := x.Bool()
got, ok := y.Bool()
return got == want && ok
case Int:
want, _ := x.Int(true)
got, ok := y.Int(want < math.MinInt32 || math.MaxInt32 < want)
return got == want && ok
case Uint:
want, _ := x.Uint(true)
got, ok := y.Uint(math.MaxUint32 < want)
return got == want && ok
case Float32, Float64:
want, _ := x.Float(true)
got, ok := y.Float(math.MaxFloat32 < math.Abs(want))
if math.IsNaN(got) || math.IsNaN(want) {
return math.IsNaN(got) == math.IsNaN(want)
}
return got == want && ok
case Name:
want, _ := x.Name()
got, ok := y.Name()
return got == want && ok
default:
return x.String() == y.String()
}
})),
}
for _, tt := range tests {
t.Run("", func(t *testing.T) {
if tt.in != "" || tt.wantVal.Type() != 0 || tt.wantErr != "" {
gotVal, err := Unmarshal([]byte(tt.in))
if err == nil {
if tt.wantErr != "" {
t.Errorf("Unmarshal(): got nil error, want %v", tt.wantErr)
}
} else {
if tt.wantErr == "" {
t.Errorf("Unmarshal(): got %v, want nil error", err)
} else if !strings.Contains(err.Error(), tt.wantErr) {
t.Errorf("Unmarshal(): got %v, want %v", err, tt.wantErr)
}
}
if diff := cmp.Diff(gotVal, tt.wantVal, opts); diff != "" {
t.Errorf("Unmarshal(): output mismatch (-got +want):\n%s", diff)
}
}
if tt.wantOut != "" {
gotOut, err := Marshal(tt.wantVal, "", [2]byte{0, 0}, false)
if err != nil {
t.Errorf("Marshal(): got %v, want nil error", err)
}
if string(gotOut) != tt.wantOut {
t.Errorf("Marshal():\ngot: %s\nwant: %s", gotOut, tt.wantOut)
}
}
if tt.wantOutBracket != "" {
gotOut, err := Marshal(tt.wantVal, "", [2]byte{'<', '>'}, false)
if err != nil {
t.Errorf("Marshal(Bracket): got %v, want nil error", err)
}
if string(gotOut) != tt.wantOutBracket {
t.Errorf("Marshal(Bracket):\ngot: %s\nwant: %s", gotOut, tt.wantOutBracket)
}
}
if tt.wantOutASCII != "" {
gotOut, err := Marshal(tt.wantVal, "", [2]byte{0, 0}, true)
if err != nil {
t.Errorf("Marshal(ASCII): got %v, want nil error", err)
}
if string(gotOut) != tt.wantOutASCII {
t.Errorf("Marshal(ASCII):\ngot: %s\nwant: %s", gotOut, tt.wantOutASCII)
}
}
if tt.wantOutIndent != "" {
gotOut, err := Marshal(tt.wantVal, "\t", [2]byte{0, 0}, false)
if err != nil {
t.Errorf("Marshal(Indent): got %v, want nil error", err)
}
if string(gotOut) != tt.wantOutIndent {
t.Errorf("Marshal(Indent):\ngot: %s\nwant: %s", gotOut, tt.wantOutIndent)
}
}
})
}
}

View File

@ -1,334 +0,0 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package text implements the text format for protocol buffers.
// This package has no semantic understanding for protocol buffers and is only
// a parser and composer for the format.
//
// There is no formal specification for the protobuf text format, as such the
// C++ implementation (see google::protobuf::TextFormat) is the reference
// implementation of the text format.
//
// This package is neither a superset nor a subset of the C++ implementation.
// This implementation permits a more liberal grammar in some cases to be
// backwards compatible with the historical Go implementation.
// Future parsings unique to Go should not be added.
// Some grammars allowed by the C++ implementation are deliberately
// not implemented here because they are considered a bug by the protobuf team
// and should not be replicated.
//
// The Go implementation should implement a sufficient amount of the C++
// grammar such that the default text serialization by C++ can be parsed by Go.
// However, just because the C++ parser accepts some input does not mean that
// the Go implementation should as well.
//
// The text format is almost a superset of JSON except:
// * message keys are not quoted strings, but identifiers
// * the top-level value must be a message without the delimiters
package text
import (
"fmt"
"math"
"strings"
"google.golang.org/protobuf/internal/flags"
"google.golang.org/protobuf/reflect/protoreflect"
)
// Type represents a type expressible in the text format.
type Type uint8
const (
_ Type = iota
// Bool is a boolean (e.g., "true" or "false").
Bool
// Int is a signed integer (e.g., "-1423").
Int
// Uint is an unsigned integer (e.g., "0xdeadbeef").
Uint
// Float32 is a 32-bit floating-point number (e.g., "1.234" or "1e38").
// This allows encoding to differentiate the bitsize used for formatting.
Float32
// Float64 is a 64-bit floating-point number.
Float64
// String is a quoted string (e.g., `"the quick brown fox"`).
String
// Name is a protocol buffer identifier (e.g., `field_name`).
Name
// List is an ordered list of values (e.g., `[0, "one", true]`).
List
// Message is an ordered map of values (e.g., `{"key": null}`).
Message
)
func (t Type) String() string {
switch t {
case Bool:
return "bool"
case Int:
return "int"
case Uint:
return "uint"
case Float32:
return "float32"
case Float64:
return "float64"
case String:
return "string"
case Name:
return "name"
case List:
return "list"
case Message:
return "message"
default:
return "<invalid>"
}
}
// Value contains a value of a given Type.
type Value struct {
typ Type
raw []byte // raw bytes of the serialized data
str string // only for String or Name
num uint64 // only for Bool, Int, Uint, Float32, or Float64
arr []Value // only for List
obj [][2]Value // only for Message
}
// ValueOf returns a Value for a given Go value:
// bool => Bool
// int32, int64 => Int
// uint32, uint64 => Uint
// float32 => Float32
// float64 => Float64
// string, []byte => String
// protoreflect.Name => Name
// []Value => List
// [][2]Value => Message
//
// ValueOf panics if the Go type is not one of the above.
func ValueOf(v interface{}) Value {
switch v := v.(type) {
case bool:
if v {
return Value{typ: Bool, num: 1}
} else {
return Value{typ: Bool, num: 0}
}
case int32:
return Value{typ: Int, num: uint64(v)}
case int64:
return Value{typ: Int, num: uint64(v)}
case uint32:
return Value{typ: Uint, num: uint64(v)}
case uint64:
return Value{typ: Uint, num: uint64(v)}
case float32:
// Store as float64 bits.
return Value{typ: Float32, num: math.Float64bits(float64(v))}
case float64:
return Value{typ: Float64, num: math.Float64bits(float64(v))}
case string:
return Value{typ: String, str: string(v)}
case []byte:
return Value{typ: String, str: string(v)}
case protoreflect.Name:
return Value{typ: Name, str: string(v)}
case []Value:
return Value{typ: List, arr: v}
case [][2]Value:
return Value{typ: Message, obj: v}
default:
panic(fmt.Sprintf("invalid type %T", v))
}
}
func rawValueOf(v interface{}, raw []byte) Value {
v2 := ValueOf(v)
v2.raw = raw
return v2
}
// Type is the type of the value. When parsing, this is a best-effort guess
// at the resulting type. However, there are ambiguities as to the exact type
// of the value (e.g., "false" is either a bool or a name).
// Thus, some of the types are convertible with each other.
// The Bool, Int, Uint, Float32, Float64, and Name methods return a boolean to
// report whether the conversion was successful.
func (v Value) Type() Type {
return v.typ
}
// Bool returns v as a bool and reports whether the conversion succeeded.
func (v Value) Bool() (x bool, ok bool) {
switch v.typ {
case Bool:
return v.num > 0, true
case Uint, Int:
// C++ allows a 1-bit unsigned integer (e.g., "0", "1", or "0x1").
if len(v.raw) > 0 && v.raw[0] != '-' && v.num < 2 {
return v.num > 0, true
}
}
return false, false
}
// Int returns v as an int64 of the specified precision and reports whether
// the conversion succeeded.
func (v Value) Int(b64 bool) (x int64, ok bool) {
switch v.typ {
case Int:
n := int64(v.num)
if b64 || (math.MinInt32 <= n && n <= math.MaxInt32) {
return int64(n), true
}
case Uint:
n := uint64(v.num)
if (!b64 && n <= math.MaxInt32) || (b64 && n <= math.MaxInt64) {
return int64(n), true
}
// C++ accepts large positive hex numbers as negative values.
// This feature is here for proto1 backwards compatibility purposes.
if flags.ProtoLegacy && len(v.raw) > 1 && v.raw[0] == '0' && v.raw[1] == 'x' {
if !b64 {
return int64(int32(n)), n <= math.MaxUint32
}
// if !b64 && n <= math.MaxUint32 {
// return int64(int32(n)), true
// }
return int64(n), true
}
}
return 0, false
}
// Uint returns v as an uint64 of the specified precision and reports whether
// the conversion succeeded.
func (v Value) Uint(b64 bool) (x uint64, ok bool) {
switch v.typ {
case Int:
n := int64(v.num)
if len(v.raw) > 0 && v.raw[0] != '-' && (b64 || n <= math.MaxUint32) {
return uint64(n), true
}
case Uint:
n := uint64(v.num)
if b64 || n <= math.MaxUint32 {
return uint64(n), true
}
}
return 0, false
}
// Float returns v as a float64 of the specified precision and reports whether
// the conversion succeeded.
func (v Value) Float(b64 bool) (x float64, ok bool) {
switch v.typ {
case Int:
return float64(int64(v.num)), true // possibly lossy, but allowed
case Uint:
return float64(uint64(v.num)), true // possibly lossy, but allowed
case Float32, Float64:
n := math.Float64frombits(v.num)
if math.IsNaN(n) || math.IsInf(n, 0) {
return float64(n), true
}
if b64 || math.Abs(n) <= math.MaxFloat32 {
return float64(n), true
}
}
return 0, false
}
// String returns v as a string if the Type is String.
// Otherwise, this returns a formatted string of v for debugging purposes.
//
// Since String is used to represent both text and binary, it is not validated
// to contain valid UTF-8. When using this value with the string type in proto,
// it is the user's responsibility perform additional UTF-8 validation.
func (v Value) String() string {
if v.typ != String {
return v.stringValue()
}
return v.str
}
func (v Value) stringValue() string {
switch v.typ {
case Bool, Int, Uint, Float32, Float64, Name:
return string(v.Raw())
case List:
var ss []string
for _, v := range v.List() {
ss = append(ss, v.String())
}
return "[" + strings.Join(ss, ",") + "]"
case Message:
var ss []string
for _, v := range v.Message() {
k := v[0].String()
if v[0].Type() == String {
k = "[" + k + "]"
}
ss = append(ss, k+":"+v[1].String())
}
return "{" + strings.Join(ss, ",") + "}"
default:
return "<invalid>"
}
}
// Name returns the field name or enum value name and reports whether the value
// can be treated as an identifier.
func (v Value) Name() (protoreflect.Name, bool) {
switch v.typ {
case Bool, Float32, Float64:
// Ambiguity arises in unmarshalValue since "nan" may interpreted as
// either a Name type (for enum values) or a Float32/Float64 type.
// Similarly, "true" may be interpreted as either a Name or Bool type.
n := protoreflect.Name(v.raw)
if n.IsValid() {
return n, true
}
case Name:
return protoreflect.Name(v.str), true
}
return "", false
}
// List returns the elements of v and panics if the Type is not List.
// Mutations on the return value may not be observable from the Raw method.
func (v Value) List() []Value {
if v.typ != List {
panic("value is not a list")
}
return v.arr
}
// Message returns the items of v and panics if the Type is not Message.
// The [2]Value represents a key and value pair, where the key is either
// a Name (representing a field name), a String (representing extension field
// names or the Any type URL), or an Uint for unknown fields.
//
// Mutations on the return value may not be observable from the Raw method.
func (v Value) Message() [][2]Value {
if v.typ != Message {
panic("value is not a message")
}
return v.obj
}
// Raw returns the raw representation of the value.
// The returned value may alias the input given to Unmarshal.
func (v Value) Raw() []byte {
if len(v.raw) > 0 {
return v.raw
}
p := encoder{}
if err := p.marshalValue(v); err != nil {
return []byte("<invalid>")
}
return p.out
}