internal/encoding/tag: centralize logic for protobuf struct tag serialization

The bespoke text-serialization of field descriptors in protoc-gen-go is also
used in the legacy implementation of protobuf reflection to derive a
protoreflect.FieldDescriptor from legacy messages and also to convert to/from
protoreflect.ExtensionDescriptor and protoV1.ExtensionDesc.

Centralize this logic in a single place:
* to avoid reimplementing the same logic in internal/impl
* to keep the marshal and unmarshal logic co-located

Change-Id: I634c5afbb9dc6eda91d6cb6b0e68dbd724cb1ccb
Reviewed-on: https://go-review.googlesource.com/c/146758
Reviewed-by: Herbie Ong <herbie@google.com>
Reviewed-by: Damien Neil <dneil@google.com>
This commit is contained in:
Joe Tsai 2018-11-01 13:52:16 -07:00 committed by Joe Tsai
parent 95b0290ea8
commit 05828dba44
4 changed files with 366 additions and 273 deletions

View File

@ -18,6 +18,7 @@ import (
"github.com/golang/protobuf/proto"
descpb "github.com/golang/protobuf/protoc-gen-go/descriptor"
"github.com/golang/protobuf/v2/internal/encoding/tag"
"github.com/golang/protobuf/v2/protogen"
"github.com/golang/protobuf/v2/reflect/protoreflect"
)
@ -636,112 +637,11 @@ func fieldGoType(g *protogen.GeneratedFile, field *protogen.Field) (goType strin
}
func fieldProtobufTag(field *protogen.Field) string {
var tag []string
// wire type
tag = append(tag, wireTypes[field.Desc.Kind()])
// field number
tag = append(tag, strconv.Itoa(int(field.Desc.Number())))
// cardinality
switch field.Desc.Cardinality() {
case protoreflect.Optional:
tag = append(tag, "opt")
case protoreflect.Required:
tag = append(tag, "req")
case protoreflect.Repeated:
tag = append(tag, "rep")
}
if field.Desc.IsPacked() {
tag = append(tag, "packed")
}
// TODO: packed
// name
name := string(field.Desc.Name())
if field.Desc.Kind() == protoreflect.GroupKind {
// The name of the FieldDescriptor for a group field is
// lowercased. To find the original capitalization, we
// look in the field's MessageType.
name = string(field.MessageType.Desc.Name())
}
tag = append(tag, "name="+name)
// JSON name
if jsonName := field.Desc.JSONName(); jsonName != "" && jsonName != name {
tag = append(tag, "json="+jsonName)
}
// proto3
// The previous implementation does not tag extension fields as proto3,
// even when the field is defined in a proto3 file. Match that behavior
// for consistency.
if field.Desc.Syntax() == protoreflect.Proto3 && field.Desc.ExtendedType() == nil {
tag = append(tag, "proto3")
}
// enum
var enumName string
if field.Desc.Kind() == protoreflect.EnumKind {
tag = append(tag, "enum="+enumRegistryName(field.EnumType))
enumName = enumRegistryName(field.EnumType)
}
// oneof
if field.Desc.OneofType() != nil {
tag = append(tag, "oneof")
}
// default value
// This must appear last in the tag, since commas in strings aren't escaped.
if field.Desc.HasDefault() {
var def string
switch field.Desc.Kind() {
case protoreflect.BoolKind:
if field.Desc.Default().Bool() {
def = "1"
} else {
def = "0"
}
case protoreflect.BytesKind:
// Preserve protoc-gen-go's historical output of escaped bytes.
// This behavior is buggy, but fixing it makes it impossible to
// distinguish between the escaped and unescaped forms.
//
// To match the exact output of protoc, this is identical to the
// CEscape function in strutil.cc of the protoc source code.
var b []byte
for _, c := range field.Desc.Default().Bytes() {
switch c {
case '\n':
b = append(b, `\n`...)
case '\r':
b = append(b, `\r`...)
case '\t':
b = append(b, `\t`...)
case '"':
b = append(b, `\"`...)
case '\'':
b = append(b, `\'`...)
case '\\':
b = append(b, `\\`...)
default:
if c >= 0x20 && c <= 0x7e {
b = append(b, c)
} else {
b = append(b, fmt.Sprintf(`\%03o`, c)...)
}
}
}
def = string(b)
case protoreflect.FloatKind, protoreflect.DoubleKind:
f := field.Desc.Default().Float()
switch {
case math.IsInf(f, -1):
def = "-inf"
case math.IsInf(f, 1):
def = "inf"
case math.IsNaN(f):
def = "nan"
default:
def = fmt.Sprint(field.Desc.Default().Interface())
}
default:
def = fmt.Sprint(field.Desc.Default().Interface())
}
tag = append(tag, "def="+def)
}
return strings.Join(tag, ",")
return tag.Marshal(field.Desc, enumName)
}
func fieldDefaultValue(g *protogen.GeneratedFile, message *protogen.Message, field *protogen.Field) string {
@ -789,27 +689,6 @@ func fieldHasDefault(field *protogen.Field) bool {
return true
}
var wireTypes = map[protoreflect.Kind]string{
protoreflect.BoolKind: "varint",
protoreflect.EnumKind: "varint",
protoreflect.Int32Kind: "varint",
protoreflect.Sint32Kind: "zigzag32",
protoreflect.Uint32Kind: "varint",
protoreflect.Int64Kind: "varint",
protoreflect.Sint64Kind: "zigzag64",
protoreflect.Uint64Kind: "varint",
protoreflect.Sfixed32Kind: "fixed32",
protoreflect.Fixed32Kind: "fixed32",
protoreflect.FloatKind: "fixed32",
protoreflect.Sfixed64Kind: "fixed64",
protoreflect.Fixed64Kind: "fixed64",
protoreflect.DoubleKind: "fixed64",
protoreflect.StringKind: "bytes",
protoreflect.BytesKind: "bytes",
protoreflect.MessageKind: "bytes",
protoreflect.GroupKind: "group",
}
func fieldJSONTag(field *protogen.Field) string {
return string(field.Desc.Name()) + ",omitempty"
}

View File

@ -0,0 +1,303 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package tag marshals and unmarshals the legacy struct tags as generated
// by historical versions of protoc-gen-go.
package tag
import (
"fmt"
"math"
"reflect"
"strconv"
"strings"
protoV1 "github.com/golang/protobuf/proto"
descriptorV1 "github.com/golang/protobuf/protoc-gen-go/descriptor"
ptext "github.com/golang/protobuf/v2/internal/encoding/text"
pref "github.com/golang/protobuf/v2/reflect/protoreflect"
ptype "github.com/golang/protobuf/v2/reflect/prototype"
)
var byteType = reflect.TypeOf(byte(0))
// Unmarshal decodes the tag into a prototype.Field.
//
// The goType is needed to determine the original protoreflect.Kind since the
// tag does not record sufficient information to determine that.
// The type is the underlying field type (e.g., a repeated field may be
// represented by []T, but the Go type passed in is just T).
// This does not populate the EnumType or MessageType (except for weak message).
//
// This function is a best effort attempt; parsing errors are ignored.
func Unmarshal(tag string, goType reflect.Type) ptype.Field {
var f ptype.Field
f.Options = &descriptorV1.FieldOptions{
Packed: protoV1.Bool(false),
}
for len(tag) > 0 {
i := strings.IndexByte(tag, ',')
if i < 0 {
i = len(tag)
}
switch s := tag[:i]; {
case strings.HasPrefix(s, "name="):
f.Name = pref.Name(s[len("name="):])
case strings.Trim(s, "0123456789") == "":
n, _ := strconv.ParseUint(s, 10, 32)
f.Number = pref.FieldNumber(n)
case s == "opt":
f.Cardinality = pref.Optional
case s == "req":
f.Cardinality = pref.Required
case s == "rep":
f.Cardinality = pref.Repeated
case s == "varint":
switch goType.Kind() {
case reflect.Bool:
f.Kind = pref.BoolKind
case reflect.Int32:
f.Kind = pref.Int32Kind
case reflect.Int64:
f.Kind = pref.Int64Kind
case reflect.Uint32:
f.Kind = pref.Uint32Kind
case reflect.Uint64:
f.Kind = pref.Uint64Kind
}
case s == "zigzag32":
if goType.Kind() == reflect.Int32 {
f.Kind = pref.Sint32Kind
}
case s == "zigzag64":
if goType.Kind() == reflect.Int64 {
f.Kind = pref.Sint64Kind
}
case s == "fixed32":
switch goType.Kind() {
case reflect.Int32:
f.Kind = pref.Sfixed32Kind
case reflect.Uint32:
f.Kind = pref.Fixed32Kind
case reflect.Float32:
f.Kind = pref.FloatKind
}
case s == "fixed64":
switch goType.Kind() {
case reflect.Int64:
f.Kind = pref.Sfixed64Kind
case reflect.Uint64:
f.Kind = pref.Fixed64Kind
case reflect.Float64:
f.Kind = pref.DoubleKind
}
case s == "bytes":
switch {
case goType.Kind() == reflect.String:
f.Kind = pref.StringKind
case goType.Kind() == reflect.Slice && goType.Elem() == byteType:
f.Kind = pref.BytesKind
default:
f.Kind = pref.MessageKind
}
case s == "group":
f.Kind = pref.GroupKind
case strings.HasPrefix(s, "enum="):
f.Kind = pref.EnumKind
case strings.HasPrefix(s, "json="):
f.JSONName = s[len("json="):]
case s == "packed":
*f.Options.Packed = true
case strings.HasPrefix(s, "weak="):
f.Options.Weak = protoV1.Bool(true)
f.MessageType = ptype.PlaceholderMessage(pref.FullName(s[len("weak="):]))
case strings.HasPrefix(s, "def="):
// The default tag is special in that everything afterwards is the
// default regardless of the presence of commas.
s, i = tag[len("def="):], len(tag)
// Defaults are parsed last, so Kind is populated.
switch f.Kind {
case pref.BoolKind:
switch s {
case "1":
f.Default = pref.ValueOf(true)
case "0":
f.Default = pref.ValueOf(false)
}
case pref.EnumKind:
n, _ := strconv.ParseInt(s, 10, 32)
f.Default = pref.ValueOf(pref.EnumNumber(n))
case pref.Int32Kind, pref.Sint32Kind, pref.Sfixed32Kind:
n, _ := strconv.ParseInt(s, 10, 32)
f.Default = pref.ValueOf(int32(n))
case pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
n, _ := strconv.ParseInt(s, 10, 64)
f.Default = pref.ValueOf(int64(n))
case pref.Uint32Kind, pref.Fixed32Kind:
n, _ := strconv.ParseUint(s, 10, 32)
f.Default = pref.ValueOf(uint32(n))
case pref.Uint64Kind, pref.Fixed64Kind:
n, _ := strconv.ParseUint(s, 10, 64)
f.Default = pref.ValueOf(uint64(n))
case pref.FloatKind, pref.DoubleKind:
n, _ := strconv.ParseFloat(s, 64)
switch s {
case "nan":
n = math.NaN()
case "inf":
n = math.Inf(+1)
case "-inf":
n = math.Inf(-1)
}
if f.Kind == pref.FloatKind {
f.Default = pref.ValueOf(float32(n))
} else {
f.Default = pref.ValueOf(float64(n))
}
case pref.StringKind:
f.Default = pref.ValueOf(string(s))
case pref.BytesKind:
// The default value is in escaped form (C-style).
// TODO: Export unmarshalString in the text package to avoid this hack.
v, err := ptext.Unmarshal([]byte(`["` + s + `"]:0`))
if err == nil && len(v.Message()) == 1 {
s := v.Message()[0][0].String()
f.Default = pref.ValueOf([]byte(s))
}
}
}
tag = strings.TrimPrefix(tag[i:], ",")
}
// The generator uses the group message name instead of the field name.
// We obtain the real field name by lowercasing the group name.
if f.Kind == pref.GroupKind {
f.Name = pref.Name(strings.ToLower(string(f.Name)))
}
return f
}
// Marshal encodes the protoreflect.FieldDescriptor as a tag.
//
// The enumName must be provided if the kind is an enum.
// Historically, the formulation of the enum "name" was the proto package
// dot-concatenated with the generated Go identifier for the enum type.
// Depending on the context on how Marshal is called, there are different ways
// through which that information is determined. As such it is the caller's
// responsibility to provide a function to obtain that information.
func Marshal(fd pref.FieldDescriptor, enumName string) string {
var tag []string
switch fd.Kind() {
case pref.BoolKind, pref.EnumKind, pref.Int32Kind, pref.Uint32Kind, pref.Int64Kind, pref.Uint64Kind:
tag = append(tag, "varint")
case pref.Sint32Kind:
tag = append(tag, "zigzag32")
case pref.Sint64Kind:
tag = append(tag, "zigzag64")
case pref.Sfixed32Kind, pref.Fixed32Kind, pref.FloatKind:
tag = append(tag, "fixed32")
case pref.Sfixed64Kind, pref.Fixed64Kind, pref.DoubleKind:
tag = append(tag, "fixed64")
case pref.StringKind, pref.BytesKind, pref.MessageKind:
tag = append(tag, "bytes")
case pref.GroupKind:
tag = append(tag, "group")
}
tag = append(tag, strconv.Itoa(int(fd.Number())))
switch fd.Cardinality() {
case pref.Optional:
tag = append(tag, "opt")
case pref.Required:
tag = append(tag, "req")
case pref.Repeated:
tag = append(tag, "rep")
}
if fd.IsPacked() {
tag = append(tag, "packed")
}
// TODO: Weak fields?
name := string(fd.Name())
if fd.Kind() == pref.GroupKind {
// The name of the FieldDescriptor for a group field is
// lowercased. To find the original capitalization, we
// look in the field's MessageType.
name = string(fd.MessageType().Name())
}
tag = append(tag, "name="+name)
if jsonName := fd.JSONName(); jsonName != "" && jsonName != name {
tag = append(tag, "json="+jsonName)
}
// The previous implementation does not tag extension fields as proto3,
// even when the field is defined in a proto3 file. Match that behavior
// for consistency.
if fd.Syntax() == pref.Proto3 && fd.ExtendedType() == nil {
tag = append(tag, "proto3")
}
if fd.Kind() == pref.EnumKind && enumName != "" {
tag = append(tag, "enum="+enumName)
}
if fd.OneofType() != nil {
tag = append(tag, "oneof")
}
// This must appear last in the tag, since commas in strings aren't escaped.
if fd.HasDefault() {
var def string
switch fd.Kind() {
case pref.BoolKind:
if fd.Default().Bool() {
def = "1"
} else {
def = "0"
}
case pref.BytesKind:
// Preserve protoc-gen-go's historical output of escaped bytes.
// This behavior is buggy, but fixing it makes it impossible to
// distinguish between the escaped and unescaped forms.
//
// To match the exact output of protoc, this is identical to the
// CEscape function in strutil.cc of the protoc source code.
var b []byte
for _, c := range fd.Default().Bytes() {
switch c {
case '\n':
b = append(b, `\n`...)
case '\r':
b = append(b, `\r`...)
case '\t':
b = append(b, `\t`...)
case '"':
b = append(b, `\"`...)
case '\'':
b = append(b, `\'`...)
case '\\':
b = append(b, `\\`...)
default:
if c >= 0x20 && c <= 0x7e { // printable ASCII
b = append(b, c)
} else {
b = append(b, fmt.Sprintf(`\%03o`, c)...)
}
}
}
def = string(b)
case pref.FloatKind, pref.DoubleKind:
f := fd.Default().Float()
switch {
case math.IsInf(f, -1):
def = "-inf"
case math.IsInf(f, 1):
def = "inf"
case math.IsNaN(f):
def = "nan"
default:
def = fmt.Sprint(fd.Default().Interface())
}
default:
def = fmt.Sprint(fd.Default().Interface())
}
tag = append(tag, "def="+def)
}
return strings.Join(tag, ",")
}

View File

@ -0,0 +1,55 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package tag
import (
"reflect"
"testing"
pref "github.com/golang/protobuf/v2/reflect/protoreflect"
ptype "github.com/golang/protobuf/v2/reflect/prototype"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
)
func Test(t *testing.T) {
m := &ptype.StandaloneMessage{
Syntax: pref.Proto3,
FullName: "golang.org.example.FooMessage",
Fields: []ptype.Field{{
Name: "foo_field",
Number: 1337,
Cardinality: pref.Repeated,
Kind: pref.BytesKind,
JSONName: "fooField",
Default: pref.ValueOf([]byte("hello, \xde\xad\xbe\xef\n")),
}},
}
md, err := ptype.NewMessage(m)
if err != nil {
t.Fatalf("unexpected NewMessage error: %v", err)
}
// Marshal test.
gotTag := Marshal(md.Fields().Get(0), "")
wantTag := `bytes,1337,rep,name=foo_field,json=fooField,proto3,def=hello, \336\255\276\357\n`
if gotTag != wantTag {
t.Errorf("Marshal() = `%v`, want `%v`", gotTag, wantTag)
}
// Unmarshal test.
gotField := Unmarshal(wantTag, reflect.TypeOf([]byte{}))
wantField := m.Fields[0]
opts := cmp.Options{
cmp.Transformer("UnwrapValue", func(x pref.Value) interface{} {
return x.Interface()
}),
cmpopts.IgnoreUnexported(ptype.Field{}),
cmpopts.IgnoreFields(ptype.Field{}, "Options"),
}
if diff := cmp.Diff(wantField, gotField, opts); diff != "" {
t.Errorf("Unmarshal() mismatch (-want +got):\n%v", diff)
}
}

View File

@ -6,16 +6,14 @@ package impl
import (
"fmt"
"math"
"reflect"
"strconv"
"strings"
"sync"
"unicode"
protoV1 "github.com/golang/protobuf/proto"
descriptorV1 "github.com/golang/protobuf/protoc-gen-go/descriptor"
"github.com/golang/protobuf/v2/internal/encoding/text"
ptag "github.com/golang/protobuf/v2/internal/encoding/tag"
pref "github.com/golang/protobuf/v2/reflect/protoreflect"
ptype "github.com/golang/protobuf/v2/reflect/prototype"
)
@ -174,156 +172,14 @@ func (ms *messageDescSet) processMessage(t reflect.Type) pref.MessageDescriptor
return ptype.PlaceholderMessage(m.FullName)
}
func (ms *messageDescSet) parseField(tag, tagKey, tagVal string, t reflect.Type, parent *ptype.StandaloneMessage) (f ptype.Field) {
func (ms *messageDescSet) parseField(tag, tagKey, tagVal string, goType reflect.Type, parent *ptype.StandaloneMessage) ptype.Field {
t := goType
isOptional := t.Kind() == reflect.Ptr && t.Elem().Kind() != reflect.Struct
isRepeated := t.Kind() == reflect.Slice && t.Elem().Kind() != reflect.Uint8
if isOptional || isRepeated {
t = t.Elem()
}
f.Options = &descriptorV1.FieldOptions{
Packed: protoV1.Bool(false),
}
for len(tag) > 0 {
i := strings.IndexByte(tag, ',')
if i < 0 {
i = len(tag)
}
switch s := tag[:i]; {
case strings.HasPrefix(s, "name="):
f.Name = pref.Name(s[len("name="):])
case strings.Trim(s, "0123456789") == "":
n, _ := strconv.ParseUint(s, 10, 32)
f.Number = pref.FieldNumber(n)
case s == "opt":
f.Cardinality = pref.Optional
case s == "req":
f.Cardinality = pref.Required
case s == "rep":
f.Cardinality = pref.Repeated
case s == "varint":
switch t.Kind() {
case reflect.Bool:
f.Kind = pref.BoolKind
case reflect.Int32:
f.Kind = pref.Int32Kind
case reflect.Int64:
f.Kind = pref.Int64Kind
case reflect.Uint32:
f.Kind = pref.Uint32Kind
case reflect.Uint64:
f.Kind = pref.Uint64Kind
}
case s == "zigzag32":
if t.Kind() == reflect.Int32 {
f.Kind = pref.Sint32Kind
}
case s == "zigzag64":
if t.Kind() == reflect.Int64 {
f.Kind = pref.Sint64Kind
}
case s == "fixed32":
switch t.Kind() {
case reflect.Int32:
f.Kind = pref.Sfixed32Kind
case reflect.Uint32:
f.Kind = pref.Fixed32Kind
case reflect.Float32:
f.Kind = pref.FloatKind
}
case s == "fixed64":
switch t.Kind() {
case reflect.Int64:
f.Kind = pref.Sfixed64Kind
case reflect.Uint64:
f.Kind = pref.Fixed64Kind
case reflect.Float64:
f.Kind = pref.DoubleKind
}
case s == "bytes":
switch {
case t.Kind() == reflect.String:
f.Kind = pref.StringKind
case t.Kind() == reflect.Slice && t.Elem() == byteType:
f.Kind = pref.BytesKind
default:
f.Kind = pref.MessageKind
}
case s == "group":
f.Kind = pref.GroupKind
case strings.HasPrefix(s, "enum="):
f.Kind = pref.EnumKind
case strings.HasPrefix(s, "json="):
f.JSONName = s[len("json="):]
case s == "packed":
*f.Options.Packed = true
case strings.HasPrefix(s, "weak="):
f.Options.Weak = protoV1.Bool(true)
f.MessageType = ptype.PlaceholderMessage(pref.FullName(s[len("weak="):]))
case strings.HasPrefix(s, "def="):
// The default tag is special in that everything afterwards is the
// default regardless of the presence of commas.
s, i = tag[len("def="):], len(tag)
// Defaults are parsed last, so Kind is populated.
switch f.Kind {
case pref.BoolKind:
switch s {
case "1":
f.Default = pref.ValueOf(true)
case "0":
f.Default = pref.ValueOf(false)
}
case pref.EnumKind:
n, _ := strconv.ParseInt(s, 10, 32)
f.Default = pref.ValueOf(pref.EnumNumber(n))
case pref.Int32Kind, pref.Sint32Kind, pref.Sfixed32Kind:
n, _ := strconv.ParseInt(s, 10, 32)
f.Default = pref.ValueOf(int32(n))
case pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
n, _ := strconv.ParseInt(s, 10, 64)
f.Default = pref.ValueOf(int64(n))
case pref.Uint32Kind, pref.Fixed32Kind:
n, _ := strconv.ParseUint(s, 10, 32)
f.Default = pref.ValueOf(uint32(n))
case pref.Uint64Kind, pref.Fixed64Kind:
n, _ := strconv.ParseUint(s, 10, 64)
f.Default = pref.ValueOf(uint64(n))
case pref.FloatKind, pref.DoubleKind:
n, _ := strconv.ParseFloat(s, 64)
switch s {
case "nan":
n = math.NaN()
case "inf":
n = math.Inf(+1)
case "-inf":
n = math.Inf(-1)
}
if f.Kind == pref.FloatKind {
f.Default = pref.ValueOf(float32(n))
} else {
f.Default = pref.ValueOf(float64(n))
}
case pref.StringKind:
f.Default = pref.ValueOf(string(s))
case pref.BytesKind:
// The default value is in escaped form (C-style).
// TODO: Export unmarshalString in the text package to avoid this hack.
v, err := text.Unmarshal([]byte(`["` + s + `"]:0`))
if err == nil && len(v.Message()) == 1 {
s := v.Message()[0][0].String()
f.Default = pref.ValueOf([]byte(s))
}
}
}
tag = strings.TrimPrefix(tag[i:], ",")
}
// The generator uses the group message name instead of the field name.
// We obtain the real field name by lowercasing the group name.
if f.Kind == pref.GroupKind {
f.Name = pref.Name(strings.ToLower(string(f.Name)))
}
f := ptag.Unmarshal(tag, t)
// Populate EnumType and MessageType.
if f.EnumType == nil && f.Kind == pref.EnumKind {