compiler/protogen, internal/strs, internal/impl: expose enum Go name derivation

In order to migrate v1 to wrap v2, we need a way to reproduce
the awful enum "names" that v1 used, which was the concatenation of
the proto package with the Go identifier used for the enum.

To support this:
* Move the camel case logic from compiler/protogen to internal/strs
* Add a small stub in internal/impl to expose this functionality

Change-Id: I8ff31daa9ae541e5788dc04d2e89eae1574877e4
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/191637
Reviewed-by: Damien Neil <dneil@google.com>
This commit is contained in:
Joe Tsai 2019-08-23 12:18:57 -07:00
parent c5060d2fe6
commit 2e7817f117
9 changed files with 214 additions and 243 deletions

View File

@ -780,14 +780,7 @@ func fieldGoType(g *protogen.GeneratedFile, f *fileInfo, field *protogen.Field)
func fieldProtobufTagValue(field *protogen.Field) string {
var enumName string
if field.Desc.Kind() == protoreflect.EnumKind {
// For historical reasons, the name used in the tag is neither
// the protobuf full name nor the fully qualified Go identifier,
// but an odd mix of both.
enumName = field.Enum.GoIdent.GoName
protoPkg := string(field.Enum.Desc.ParentFile().Package())
if protoPkg != "" {
enumName = protoPkg + "." + enumName
}
enumName = protoimpl.X.LegacyEnumName(field.Enum.Desc)
}
return tag.Marshal(field.Desc, enumName)
}

View File

@ -1,141 +0,0 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package protogen
import (
"fmt"
"go/token"
"strconv"
"strings"
"unicode"
"unicode/utf8"
"google.golang.org/protobuf/reflect/protoreflect"
)
// A GoIdent is a Go identifier, consisting of a name and import path.
// The name is a single identifier and may not be a dot-qualified selector.
type GoIdent struct {
GoName string
GoImportPath GoImportPath
}
func (id GoIdent) String() string { return fmt.Sprintf("%q.%v", id.GoImportPath, id.GoName) }
// newGoIdent returns the Go identifier for a descriptor.
func newGoIdent(f *File, d protoreflect.Descriptor) GoIdent {
name := strings.TrimPrefix(string(d.FullName()), string(f.Desc.Package())+".")
return GoIdent{
GoName: camelCase(name),
GoImportPath: f.GoImportPath,
}
}
// A GoImportPath is the import path of a Go package. e.g., "google.golang.org/genproto/protobuf".
type GoImportPath string
func (p GoImportPath) String() string { return strconv.Quote(string(p)) }
// Ident returns a GoIdent with s as the GoName and p as the GoImportPath.
func (p GoImportPath) Ident(s string) GoIdent {
return GoIdent{GoName: s, GoImportPath: p}
}
// A GoPackageName is the name of a Go package. e.g., "protobuf".
type GoPackageName string
// cleanPackageName converts a string to a valid Go package name.
func cleanPackageName(name string) GoPackageName {
return GoPackageName(cleanGoName(name))
}
// cleanGoName converts a string to a valid Go identifier.
func cleanGoName(s string) string {
// Sanitize the input to the set of valid characters,
// which must be '_' or be in the Unicode L or N categories.
s = strings.Map(func(r rune) rune {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
return r
}
return '_'
}, s)
// Prepend '_' in the event of a Go keyword conflict or if
// the identifier is invalid (does not start in the Unicode L category).
r, _ := utf8.DecodeRuneInString(s)
if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
return "_" + s
}
return s
}
// baseName returns the last path element of the name, with the last dotted suffix removed.
func baseName(name string) string {
// First, find the last element
if i := strings.LastIndex(name, "/"); i >= 0 {
name = name[i+1:]
}
// Now drop the suffix
if i := strings.LastIndex(name, "."); i >= 0 {
name = name[:i]
}
return name
}
// camelCase converts a name to CamelCase.
//
// If there is an interior underscore followed by a lower case letter,
// drop the underscore and convert the letter to upper case.
// There is a remote possibility of this rewrite causing a name collision,
// but it's so remote we're prepared to pretend it's nonexistent - since the
// C++ generator lowercases names, it's extremely unlikely to have two fields
// with different capitalizations.
func camelCase(s string) string {
// Invariant: if the next letter is lower case, it must be converted
// to upper case.
// That is, we process a word at a time, where words are marked by _ or
// upper case letter. Digits are treated as words.
var b []byte
for i := 0; i < len(s); i++ {
c := s[i]
switch {
case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
// Skip over '.' in ".{{lowercase}}".
case c == '.':
b = append(b, '_') // convert '.' to '_'
case c == '_' && (i == 0 || s[i-1] == '.'):
// Convert initial '_' to ensure we start with a capital letter.
// Do the same for '_' after '.' to match historic behavior.
b = append(b, 'X') // convert '_' to 'X'
case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
// Skip over '_' in "_{{lowercase}}".
case isASCIIDigit(c):
b = append(b, c)
default:
// Assume we have a letter now - if not, it's a bogus identifier.
// The next word is a sequence of characters that must start upper case.
if isASCIILower(c) {
c -= 'a' - 'A' // convert lowercase to uppercase
}
b = append(b, c)
// Accept lower case sequence that follows.
for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
b = append(b, s[i+1])
}
}
}
return string(b)
}
// Is c an ASCII lower-case letter?
func isASCIILower(c byte) bool {
return 'a' <= c && c <= 'z'
}
// Is c an ASCII digit?
func isASCIIDigit(c byte) bool {
return '0' <= c && c <= '9'
}

View File

@ -1,62 +0,0 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package protogen
import "testing"
func TestCamelCase(t *testing.T) {
tests := []struct {
in, want string
}{
{"", ""},
{"one", "One"},
{"one_two", "OneTwo"},
{"_my_field_name_2", "XMyFieldName_2"},
{"Something_Capped", "Something_Capped"},
{"my_Name", "My_Name"},
{"OneTwo", "OneTwo"},
{"_", "X"},
{"_a_", "XA_"},
{"one.two", "OneTwo"},
{"one.Two", "One_Two"},
{"one_two.three_four", "OneTwoThreeFour"},
{"one_two.Three_four", "OneTwo_ThreeFour"},
{"_one._two", "XOne_XTwo"},
{"SCREAMING_SNAKE_CASE", "SCREAMING_SNAKE_CASE"},
{"double__underscore", "Double_Underscore"},
{"camelCase", "CamelCase"},
{"go2proto", "Go2Proto"},
{"世界", "世界"},
{"x世界", "X世界"},
{"foo_bar世界", "FooBar世界"},
}
for _, tc := range tests {
if got := camelCase(tc.in); got != tc.want {
t.Errorf("CamelCase(%q) = %q, want %q", tc.in, got, tc.want)
}
}
}
func TestCleanGoName(t *testing.T) {
tests := []struct {
in, want string
}{
{"", "_"},
{"boo", "boo"},
{"Boo", "Boo"},
{"ßoo", "ßoo"},
{"default", "_default"},
{"hello", "hello"},
{"hello-world!!", "hello_world__"},
{"hello-\xde\xad\xbe\xef\x00", "hello_____"},
{"hello 世界", "hello_世界"},
{"世界", "世界"},
}
for _, tc := range tests {
if got := cleanGoName(tc.in); got != tc.want {
t.Errorf("cleanGoName(%q) = %q, want %q", tc.in, got, tc.want)
}
}
}

View File

@ -30,6 +30,7 @@ import (
"google.golang.org/protobuf/encoding/prototext"
"google.golang.org/protobuf/internal/fieldnum"
"google.golang.org/protobuf/internal/strs"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/reflect/protodesc"
"google.golang.org/protobuf/reflect/protoreflect"
@ -431,7 +432,7 @@ func newFile(gen *Plugin, p *descriptorpb.FileDescriptorProto, packageName GoPac
}
}
f.GoDescriptorIdent = GoIdent{
GoName: "File_" + cleanGoName(p.GetName()),
GoName: "File_" + strs.GoSanitized(p.GetName()),
GoImportPath: f.GoImportPath,
}
f.GeneratedFilenamePrefix = prefix
@ -499,6 +500,8 @@ func goPackageOption(d *descriptorpb.FileDescriptorProto) (pkg GoPackageName, im
}
// A semicolon-delimited suffix delimits the import path and package name.
if i := strings.Index(opt, ";"); i >= 0 {
// TODO: The package name is explicitly provided by the .proto file.
// Rather than sanitizing it, we should pass it verbatim.
return cleanPackageName(opt[i+1:]), GoImportPath(opt[:i])
}
// The presence of a slash implies there's an import path.
@ -756,7 +759,7 @@ func newField(gen *Plugin, f *File, message *Message, desc protoreflect.FieldDes
default:
loc = message.Location.appendPath(fieldnum.DescriptorProto_Field, int32(desc.Index()))
}
camelCased := camelCase(string(desc.Name()))
camelCased := strs.GoCamelCase(string(desc.Name()))
var parentPrefix string
if message != nil {
parentPrefix = message.GoIdent.GoName + "_"
@ -826,7 +829,7 @@ type Oneof struct {
func newOneof(gen *Plugin, f *File, message *Message, desc protoreflect.OneofDescriptor) *Oneof {
loc := message.Location.appendPath(fieldnum.DescriptorProto_OneofDecl, int32(desc.Index()))
camelCased := camelCase(string(desc.Name()))
camelCased := strs.GoCamelCase(string(desc.Name()))
parentPrefix := message.GoIdent.GoName + "_"
return &Oneof{
Desc: desc,
@ -860,7 +863,7 @@ func newService(gen *Plugin, f *File, desc protoreflect.ServiceDescriptor) *Serv
loc := f.location(fieldnum.FileDescriptorProto_Service, int32(desc.Index()))
service := &Service{
Desc: desc,
GoName: camelCase(string(desc.Name())),
GoName: strs.GoCamelCase(string(desc.Name())),
Location: loc,
Comments: f.comments[newPathKey(loc.Path)],
}
@ -889,7 +892,7 @@ func newMethod(gen *Plugin, f *File, service *Service, desc protoreflect.MethodD
loc := service.Location.appendPath(fieldnum.ServiceDescriptorProto_Method, int32(desc.Index()))
method := &Method{
Desc: desc,
GoName: camelCase(string(desc.Name())),
GoName: strs.GoCamelCase(string(desc.Name())),
Parent: service,
Location: loc,
Comments: f.comments[newPathKey(loc.Path)],
@ -1183,6 +1186,56 @@ func (g *GeneratedFile) metaFile(content []byte) (string, error) {
return string(b), nil
}
// A GoIdent is a Go identifier, consisting of a name and import path.
// The name is a single identifier and may not be a dot-qualified selector.
type GoIdent struct {
GoName string
GoImportPath GoImportPath
}
func (id GoIdent) String() string { return fmt.Sprintf("%q.%v", id.GoImportPath, id.GoName) }
// newGoIdent returns the Go identifier for a descriptor.
func newGoIdent(f *File, d protoreflect.Descriptor) GoIdent {
name := strings.TrimPrefix(string(d.FullName()), string(f.Desc.Package())+".")
return GoIdent{
GoName: strs.GoCamelCase(name),
GoImportPath: f.GoImportPath,
}
}
// A GoImportPath is the import path of a Go package.
// For example: "google.golang.org/protobuf/compiler/protogen"
type GoImportPath string
func (p GoImportPath) String() string { return strconv.Quote(string(p)) }
// Ident returns a GoIdent with s as the GoName and p as the GoImportPath.
func (p GoImportPath) Ident(s string) GoIdent {
return GoIdent{GoName: s, GoImportPath: p}
}
// A GoPackageName is the name of a Go package. e.g., "protobuf".
type GoPackageName string
// cleanPackageName converts a string to a valid Go package name.
func cleanPackageName(name string) GoPackageName {
return GoPackageName(strs.GoSanitized(name))
}
// baseName returns the last path element of the name, with the last dotted suffix removed.
func baseName(name string) string {
// First, find the last element
if i := strings.LastIndex(name, "/"); i >= 0 {
name = name[i+1:]
}
// Now drop the suffix
if i := strings.LastIndex(name, "."); i >= 0 {
name = name[:i]
}
return name
}
type pathType int
const (

View File

@ -11,10 +11,27 @@ import (
"sync"
"google.golang.org/protobuf/internal/filedesc"
"google.golang.org/protobuf/internal/strs"
"google.golang.org/protobuf/reflect/protoreflect"
pref "google.golang.org/protobuf/reflect/protoreflect"
)
// legacyEnumName returns the name of enums used in legacy code.
// It is neither the protobuf full name nor the qualified Go name,
// but rather an odd hybrid of both.
func legacyEnumName(ed pref.EnumDescriptor) string {
var protoPkg string
enumName := string(ed.FullName())
if fd := ed.ParentFile(); fd != nil {
protoPkg = string(fd.Package())
enumName = strings.TrimPrefix(enumName, protoPkg+".")
}
if protoPkg == "" {
return strs.GoCamelCase(enumName)
}
return protoPkg + "." + strs.GoCamelCase(enumName)
}
// legacyWrapEnum wraps v as a protoreflect.Enum,
// where v must be a int32 kind and not implement the v2 API already.
func legacyWrapEnum(v reflect.Value) pref.Enum {

View File

@ -21,6 +21,11 @@ import (
// These functions exist to support exported APIs in generated protobufs.
// While these are deprecated, they cannot be removed for compatibility reasons.
// LegacyEnumName returns the name of enums used in legacy code.
func (Export) LegacyEnumName(ed pref.EnumDescriptor) string {
return legacyEnumName(ed)
}
// UnmarshalJSONEnum unmarshals an enum from a JSON-encoded input.
// The input can either be a string representing the enum value by name,
// or a number representing the enum number itself.

View File

@ -77,31 +77,10 @@ func (xi *ExtensionInfo) initToLegacy() {
}
}
// Reconstruct the legacy enum full name, which is an odd mixture of the
// proto package name with the Go type name.
// Reconstruct the legacy enum full name.
var enumName string
if xd.Kind() == pref.EnumKind {
// Derive Go type name.
t := extType
if t.Kind() == reflect.Ptr || t.Kind() == reflect.Slice {
t = t.Elem()
}
enumName = t.Name()
// Derive the proto package name.
// For legacy enums, obtain the proto package from the raw descriptor.
var protoPkg string
if fd := xd.Enum().ParentFile(); fd != nil {
protoPkg = string(fd.Package())
}
if ed, ok := reflect.Zero(t).Interface().(enumV1); ok && protoPkg == "" {
b, _ := ed.EnumDescriptor()
protoPkg = string(legacyLoadFileDesc(b).Package())
}
if protoPkg != "" {
enumName = protoPkg + "." + enumName
}
enumName = legacyEnumName(xd.Enum())
}
// Derive the proto file that the extension was declared within.

View File

@ -6,8 +6,10 @@
package strs
import (
"go/token"
"strings"
"unicode"
"unicode/utf8"
"google.golang.org/protobuf/internal/flags"
"google.golang.org/protobuf/reflect/protoreflect"
@ -23,6 +25,68 @@ func EnforceUTF8(fd protoreflect.FieldDescriptor) bool {
return fd.Syntax() == protoreflect.Proto3
}
// GoCamelCase camel-cases a protobuf name for use as a Go identifier.
//
// If there is an interior underscore followed by a lower case letter,
// drop the underscore and convert the letter to upper case.
func GoCamelCase(s string) string {
// Invariant: if the next letter is lower case, it must be converted
// to upper case.
// That is, we process a word at a time, where words are marked by _ or
// upper case letter. Digits are treated as words.
var b []byte
for i := 0; i < len(s); i++ {
c := s[i]
switch {
case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
// Skip over '.' in ".{{lowercase}}".
case c == '.':
b = append(b, '_') // convert '.' to '_'
case c == '_' && (i == 0 || s[i-1] == '.'):
// Convert initial '_' to ensure we start with a capital letter.
// Do the same for '_' after '.' to match historic behavior.
b = append(b, 'X') // convert '_' to 'X'
case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
// Skip over '_' in "_{{lowercase}}".
case isASCIIDigit(c):
b = append(b, c)
default:
// Assume we have a letter now - if not, it's a bogus identifier.
// The next word is a sequence of characters that must start upper case.
if isASCIILower(c) {
c -= 'a' - 'A' // convert lowercase to uppercase
}
b = append(b, c)
// Accept lower case sequence that follows.
for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
b = append(b, s[i+1])
}
}
}
return string(b)
}
// GoSanitized converts a string to a valid Go identifier.
func GoSanitized(s string) string {
// Sanitize the input to the set of valid characters,
// which must be '_' or be in the Unicode L or N categories.
s = strings.Map(func(r rune) rune {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
return r
}
return '_'
}, s)
// Prepend '_' in the event of a Go keyword conflict or if
// the identifier is invalid (does not start in the Unicode L category).
r, _ := utf8.DecodeRuneInString(s)
if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
return "_" + s
}
return s
}
// JSONCamelCase converts a snake_case identifier to a camelCase identifier,
// according to the protobuf JSON specification.
func JSONCamelCase(s string) string {
@ -31,8 +95,7 @@ func JSONCamelCase(s string) string {
for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
c := s[i]
if c != '_' {
isLower := 'a' <= c && c <= 'z'
if wasUnderscore && isLower {
if wasUnderscore && isASCIILower(c) {
c -= 'a' - 'A' // convert to uppercase
}
b = append(b, c)
@ -48,8 +111,7 @@ func JSONSnakeCase(s string) string {
var b []byte
for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
c := s[i]
isUpper := 'A' <= c && c <= 'Z'
if isUpper {
if isASCIIUpper(c) {
b = append(b, '_')
c += 'a' - 'A' // convert to lowercase
}
@ -122,3 +184,13 @@ func TrimEnumPrefix(s, prefix string) string {
}
return s
}
func isASCIILower(c byte) bool {
return 'a' <= c && c <= 'z'
}
func isASCIIUpper(c byte) bool {
return 'A' <= c && c <= 'Z'
}
func isASCIIDigit(c byte) bool {
return '0' <= c && c <= '9'
}

View File

@ -9,6 +9,61 @@ import (
"testing"
)
func TestGoCamelCase(t *testing.T) {
tests := []struct {
in, want string
}{
{"", ""},
{"one", "One"},
{"one_two", "OneTwo"},
{"_my_field_name_2", "XMyFieldName_2"},
{"Something_Capped", "Something_Capped"},
{"my_Name", "My_Name"},
{"OneTwo", "OneTwo"},
{"_", "X"},
{"_a_", "XA_"},
{"one.two", "OneTwo"},
{"one.Two", "One_Two"},
{"one_two.three_four", "OneTwoThreeFour"},
{"one_two.Three_four", "OneTwo_ThreeFour"},
{"_one._two", "XOne_XTwo"},
{"SCREAMING_SNAKE_CASE", "SCREAMING_SNAKE_CASE"},
{"double__underscore", "Double_Underscore"},
{"camelCase", "CamelCase"},
{"go2proto", "Go2Proto"},
{"世界", "世界"},
{"x世界", "X世界"},
{"foo_bar世界", "FooBar世界"},
}
for _, tc := range tests {
if got := GoCamelCase(tc.in); got != tc.want {
t.Errorf("GoCamelCase(%q) = %q, want %q", tc.in, got, tc.want)
}
}
}
func TestGoSanitized(t *testing.T) {
tests := []struct {
in, want string
}{
{"", "_"},
{"boo", "boo"},
{"Boo", "Boo"},
{"ßoo", "ßoo"},
{"default", "_default"},
{"hello", "hello"},
{"hello-world!!", "hello_world__"},
{"hello-\xde\xad\xbe\xef\x00", "hello_____"},
{"hello 世界", "hello_世界"},
{"世界", "世界"},
}
for _, tc := range tests {
if got := GoSanitized(tc.in); got != tc.want {
t.Errorf("GoSanitized(%q) = %q, want %q", tc.in, got, tc.want)
}
}
}
func TestName(t *testing.T) {
tests := []struct {
in string