diff --git a/cmd/protoc-gen-go/internal_gengo/main.go b/cmd/protoc-gen-go/internal_gengo/main.go index 0bb1f4f9..dfc31021 100644 --- a/cmd/protoc-gen-go/internal_gengo/main.go +++ b/cmd/protoc-gen-go/internal_gengo/main.go @@ -780,14 +780,7 @@ func fieldGoType(g *protogen.GeneratedFile, f *fileInfo, field *protogen.Field) func fieldProtobufTagValue(field *protogen.Field) string { var enumName string if field.Desc.Kind() == protoreflect.EnumKind { - // For historical reasons, the name used in the tag is neither - // the protobuf full name nor the fully qualified Go identifier, - // but an odd mix of both. - enumName = field.Enum.GoIdent.GoName - protoPkg := string(field.Enum.Desc.ParentFile().Package()) - if protoPkg != "" { - enumName = protoPkg + "." + enumName - } + enumName = protoimpl.X.LegacyEnumName(field.Enum.Desc) } return tag.Marshal(field.Desc, enumName) } diff --git a/compiler/protogen/names.go b/compiler/protogen/names.go deleted file mode 100644 index ae41a5ad..00000000 --- a/compiler/protogen/names.go +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package protogen - -import ( - "fmt" - "go/token" - "strconv" - "strings" - "unicode" - "unicode/utf8" - - "google.golang.org/protobuf/reflect/protoreflect" -) - -// A GoIdent is a Go identifier, consisting of a name and import path. -// The name is a single identifier and may not be a dot-qualified selector. -type GoIdent struct { - GoName string - GoImportPath GoImportPath -} - -func (id GoIdent) String() string { return fmt.Sprintf("%q.%v", id.GoImportPath, id.GoName) } - -// newGoIdent returns the Go identifier for a descriptor. -func newGoIdent(f *File, d protoreflect.Descriptor) GoIdent { - name := strings.TrimPrefix(string(d.FullName()), string(f.Desc.Package())+".") - return GoIdent{ - GoName: camelCase(name), - GoImportPath: f.GoImportPath, - } -} - -// A GoImportPath is the import path of a Go package. e.g., "google.golang.org/genproto/protobuf". -type GoImportPath string - -func (p GoImportPath) String() string { return strconv.Quote(string(p)) } - -// Ident returns a GoIdent with s as the GoName and p as the GoImportPath. -func (p GoImportPath) Ident(s string) GoIdent { - return GoIdent{GoName: s, GoImportPath: p} -} - -// A GoPackageName is the name of a Go package. e.g., "protobuf". -type GoPackageName string - -// cleanPackageName converts a string to a valid Go package name. -func cleanPackageName(name string) GoPackageName { - return GoPackageName(cleanGoName(name)) -} - -// cleanGoName converts a string to a valid Go identifier. -func cleanGoName(s string) string { - // Sanitize the input to the set of valid characters, - // which must be '_' or be in the Unicode L or N categories. - s = strings.Map(func(r rune) rune { - if unicode.IsLetter(r) || unicode.IsDigit(r) { - return r - } - return '_' - }, s) - - // Prepend '_' in the event of a Go keyword conflict or if - // the identifier is invalid (does not start in the Unicode L category). - r, _ := utf8.DecodeRuneInString(s) - if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) { - return "_" + s - } - return s -} - -// baseName returns the last path element of the name, with the last dotted suffix removed. -func baseName(name string) string { - // First, find the last element - if i := strings.LastIndex(name, "/"); i >= 0 { - name = name[i+1:] - } - // Now drop the suffix - if i := strings.LastIndex(name, "."); i >= 0 { - name = name[:i] - } - return name -} - -// camelCase converts a name to CamelCase. -// -// If there is an interior underscore followed by a lower case letter, -// drop the underscore and convert the letter to upper case. -// There is a remote possibility of this rewrite causing a name collision, -// but it's so remote we're prepared to pretend it's nonexistent - since the -// C++ generator lowercases names, it's extremely unlikely to have two fields -// with different capitalizations. -func camelCase(s string) string { - // Invariant: if the next letter is lower case, it must be converted - // to upper case. - // That is, we process a word at a time, where words are marked by _ or - // upper case letter. Digits are treated as words. - var b []byte - for i := 0; i < len(s); i++ { - c := s[i] - switch { - case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]): - // Skip over '.' in ".{{lowercase}}". - case c == '.': - b = append(b, '_') // convert '.' to '_' - case c == '_' && (i == 0 || s[i-1] == '.'): - // Convert initial '_' to ensure we start with a capital letter. - // Do the same for '_' after '.' to match historic behavior. - b = append(b, 'X') // convert '_' to 'X' - case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]): - // Skip over '_' in "_{{lowercase}}". - case isASCIIDigit(c): - b = append(b, c) - default: - // Assume we have a letter now - if not, it's a bogus identifier. - // The next word is a sequence of characters that must start upper case. - if isASCIILower(c) { - c -= 'a' - 'A' // convert lowercase to uppercase - } - b = append(b, c) - - // Accept lower case sequence that follows. - for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ { - b = append(b, s[i+1]) - } - } - } - return string(b) -} - -// Is c an ASCII lower-case letter? -func isASCIILower(c byte) bool { - return 'a' <= c && c <= 'z' -} - -// Is c an ASCII digit? -func isASCIIDigit(c byte) bool { - return '0' <= c && c <= '9' -} diff --git a/compiler/protogen/names_test.go b/compiler/protogen/names_test.go deleted file mode 100644 index 6f03cc91..00000000 --- a/compiler/protogen/names_test.go +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package protogen - -import "testing" - -func TestCamelCase(t *testing.T) { - tests := []struct { - in, want string - }{ - {"", ""}, - {"one", "One"}, - {"one_two", "OneTwo"}, - {"_my_field_name_2", "XMyFieldName_2"}, - {"Something_Capped", "Something_Capped"}, - {"my_Name", "My_Name"}, - {"OneTwo", "OneTwo"}, - {"_", "X"}, - {"_a_", "XA_"}, - {"one.two", "OneTwo"}, - {"one.Two", "One_Two"}, - {"one_two.three_four", "OneTwoThreeFour"}, - {"one_two.Three_four", "OneTwo_ThreeFour"}, - {"_one._two", "XOne_XTwo"}, - {"SCREAMING_SNAKE_CASE", "SCREAMING_SNAKE_CASE"}, - {"double__underscore", "Double_Underscore"}, - {"camelCase", "CamelCase"}, - {"go2proto", "Go2Proto"}, - {"世界", "世界"}, - {"x世界", "X世界"}, - {"foo_bar世界", "FooBar世界"}, - } - for _, tc := range tests { - if got := camelCase(tc.in); got != tc.want { - t.Errorf("CamelCase(%q) = %q, want %q", tc.in, got, tc.want) - } - } -} - -func TestCleanGoName(t *testing.T) { - tests := []struct { - in, want string - }{ - {"", "_"}, - {"boo", "boo"}, - {"Boo", "Boo"}, - {"ßoo", "ßoo"}, - {"default", "_default"}, - {"hello", "hello"}, - {"hello-world!!", "hello_world__"}, - {"hello-\xde\xad\xbe\xef\x00", "hello_____"}, - {"hello 世界", "hello_世界"}, - {"世界", "世界"}, - } - for _, tc := range tests { - if got := cleanGoName(tc.in); got != tc.want { - t.Errorf("cleanGoName(%q) = %q, want %q", tc.in, got, tc.want) - } - } -} diff --git a/compiler/protogen/protogen.go b/compiler/protogen/protogen.go index ec037049..3be898d7 100644 --- a/compiler/protogen/protogen.go +++ b/compiler/protogen/protogen.go @@ -30,6 +30,7 @@ import ( "google.golang.org/protobuf/encoding/prototext" "google.golang.org/protobuf/internal/fieldnum" + "google.golang.org/protobuf/internal/strs" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/reflect/protodesc" "google.golang.org/protobuf/reflect/protoreflect" @@ -431,7 +432,7 @@ func newFile(gen *Plugin, p *descriptorpb.FileDescriptorProto, packageName GoPac } } f.GoDescriptorIdent = GoIdent{ - GoName: "File_" + cleanGoName(p.GetName()), + GoName: "File_" + strs.GoSanitized(p.GetName()), GoImportPath: f.GoImportPath, } f.GeneratedFilenamePrefix = prefix @@ -499,6 +500,8 @@ func goPackageOption(d *descriptorpb.FileDescriptorProto) (pkg GoPackageName, im } // A semicolon-delimited suffix delimits the import path and package name. if i := strings.Index(opt, ";"); i >= 0 { + // TODO: The package name is explicitly provided by the .proto file. + // Rather than sanitizing it, we should pass it verbatim. return cleanPackageName(opt[i+1:]), GoImportPath(opt[:i]) } // The presence of a slash implies there's an import path. @@ -756,7 +759,7 @@ func newField(gen *Plugin, f *File, message *Message, desc protoreflect.FieldDes default: loc = message.Location.appendPath(fieldnum.DescriptorProto_Field, int32(desc.Index())) } - camelCased := camelCase(string(desc.Name())) + camelCased := strs.GoCamelCase(string(desc.Name())) var parentPrefix string if message != nil { parentPrefix = message.GoIdent.GoName + "_" @@ -826,7 +829,7 @@ type Oneof struct { func newOneof(gen *Plugin, f *File, message *Message, desc protoreflect.OneofDescriptor) *Oneof { loc := message.Location.appendPath(fieldnum.DescriptorProto_OneofDecl, int32(desc.Index())) - camelCased := camelCase(string(desc.Name())) + camelCased := strs.GoCamelCase(string(desc.Name())) parentPrefix := message.GoIdent.GoName + "_" return &Oneof{ Desc: desc, @@ -860,7 +863,7 @@ func newService(gen *Plugin, f *File, desc protoreflect.ServiceDescriptor) *Serv loc := f.location(fieldnum.FileDescriptorProto_Service, int32(desc.Index())) service := &Service{ Desc: desc, - GoName: camelCase(string(desc.Name())), + GoName: strs.GoCamelCase(string(desc.Name())), Location: loc, Comments: f.comments[newPathKey(loc.Path)], } @@ -889,7 +892,7 @@ func newMethod(gen *Plugin, f *File, service *Service, desc protoreflect.MethodD loc := service.Location.appendPath(fieldnum.ServiceDescriptorProto_Method, int32(desc.Index())) method := &Method{ Desc: desc, - GoName: camelCase(string(desc.Name())), + GoName: strs.GoCamelCase(string(desc.Name())), Parent: service, Location: loc, Comments: f.comments[newPathKey(loc.Path)], @@ -1183,6 +1186,56 @@ func (g *GeneratedFile) metaFile(content []byte) (string, error) { return string(b), nil } +// A GoIdent is a Go identifier, consisting of a name and import path. +// The name is a single identifier and may not be a dot-qualified selector. +type GoIdent struct { + GoName string + GoImportPath GoImportPath +} + +func (id GoIdent) String() string { return fmt.Sprintf("%q.%v", id.GoImportPath, id.GoName) } + +// newGoIdent returns the Go identifier for a descriptor. +func newGoIdent(f *File, d protoreflect.Descriptor) GoIdent { + name := strings.TrimPrefix(string(d.FullName()), string(f.Desc.Package())+".") + return GoIdent{ + GoName: strs.GoCamelCase(name), + GoImportPath: f.GoImportPath, + } +} + +// A GoImportPath is the import path of a Go package. +// For example: "google.golang.org/protobuf/compiler/protogen" +type GoImportPath string + +func (p GoImportPath) String() string { return strconv.Quote(string(p)) } + +// Ident returns a GoIdent with s as the GoName and p as the GoImportPath. +func (p GoImportPath) Ident(s string) GoIdent { + return GoIdent{GoName: s, GoImportPath: p} +} + +// A GoPackageName is the name of a Go package. e.g., "protobuf". +type GoPackageName string + +// cleanPackageName converts a string to a valid Go package name. +func cleanPackageName(name string) GoPackageName { + return GoPackageName(strs.GoSanitized(name)) +} + +// baseName returns the last path element of the name, with the last dotted suffix removed. +func baseName(name string) string { + // First, find the last element + if i := strings.LastIndex(name, "/"); i >= 0 { + name = name[i+1:] + } + // Now drop the suffix + if i := strings.LastIndex(name, "."); i >= 0 { + name = name[:i] + } + return name +} + type pathType int const ( diff --git a/internal/impl/legacy_enum.go b/internal/impl/legacy_enum.go index 279baa9f..4ec31df0 100644 --- a/internal/impl/legacy_enum.go +++ b/internal/impl/legacy_enum.go @@ -11,10 +11,27 @@ import ( "sync" "google.golang.org/protobuf/internal/filedesc" + "google.golang.org/protobuf/internal/strs" "google.golang.org/protobuf/reflect/protoreflect" pref "google.golang.org/protobuf/reflect/protoreflect" ) +// legacyEnumName returns the name of enums used in legacy code. +// It is neither the protobuf full name nor the qualified Go name, +// but rather an odd hybrid of both. +func legacyEnumName(ed pref.EnumDescriptor) string { + var protoPkg string + enumName := string(ed.FullName()) + if fd := ed.ParentFile(); fd != nil { + protoPkg = string(fd.Package()) + enumName = strings.TrimPrefix(enumName, protoPkg+".") + } + if protoPkg == "" { + return strs.GoCamelCase(enumName) + } + return protoPkg + "." + strs.GoCamelCase(enumName) +} + // legacyWrapEnum wraps v as a protoreflect.Enum, // where v must be a int32 kind and not implement the v2 API already. func legacyWrapEnum(v reflect.Value) pref.Enum { diff --git a/internal/impl/legacy_export.go b/internal/impl/legacy_export.go index 07c16b5d..29c1b01a 100644 --- a/internal/impl/legacy_export.go +++ b/internal/impl/legacy_export.go @@ -21,6 +21,11 @@ import ( // These functions exist to support exported APIs in generated protobufs. // While these are deprecated, they cannot be removed for compatibility reasons. +// LegacyEnumName returns the name of enums used in legacy code. +func (Export) LegacyEnumName(ed pref.EnumDescriptor) string { + return legacyEnumName(ed) +} + // UnmarshalJSONEnum unmarshals an enum from a JSON-encoded input. // The input can either be a string representing the enum value by name, // or a number representing the enum number itself. diff --git a/internal/impl/legacy_extension.go b/internal/impl/legacy_extension.go index b4840673..ec5420d2 100644 --- a/internal/impl/legacy_extension.go +++ b/internal/impl/legacy_extension.go @@ -77,31 +77,10 @@ func (xi *ExtensionInfo) initToLegacy() { } } - // Reconstruct the legacy enum full name, which is an odd mixture of the - // proto package name with the Go type name. + // Reconstruct the legacy enum full name. var enumName string if xd.Kind() == pref.EnumKind { - // Derive Go type name. - t := extType - if t.Kind() == reflect.Ptr || t.Kind() == reflect.Slice { - t = t.Elem() - } - enumName = t.Name() - - // Derive the proto package name. - // For legacy enums, obtain the proto package from the raw descriptor. - var protoPkg string - if fd := xd.Enum().ParentFile(); fd != nil { - protoPkg = string(fd.Package()) - } - if ed, ok := reflect.Zero(t).Interface().(enumV1); ok && protoPkg == "" { - b, _ := ed.EnumDescriptor() - protoPkg = string(legacyLoadFileDesc(b).Package()) - } - - if protoPkg != "" { - enumName = protoPkg + "." + enumName - } + enumName = legacyEnumName(xd.Enum()) } // Derive the proto file that the extension was declared within. diff --git a/internal/strs/strings.go b/internal/strs/strings.go index 2208ff21..0b74e765 100644 --- a/internal/strs/strings.go +++ b/internal/strs/strings.go @@ -6,8 +6,10 @@ package strs import ( + "go/token" "strings" "unicode" + "unicode/utf8" "google.golang.org/protobuf/internal/flags" "google.golang.org/protobuf/reflect/protoreflect" @@ -23,6 +25,68 @@ func EnforceUTF8(fd protoreflect.FieldDescriptor) bool { return fd.Syntax() == protoreflect.Proto3 } +// GoCamelCase camel-cases a protobuf name for use as a Go identifier. +// +// If there is an interior underscore followed by a lower case letter, +// drop the underscore and convert the letter to upper case. +func GoCamelCase(s string) string { + // Invariant: if the next letter is lower case, it must be converted + // to upper case. + // That is, we process a word at a time, where words are marked by _ or + // upper case letter. Digits are treated as words. + var b []byte + for i := 0; i < len(s); i++ { + c := s[i] + switch { + case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]): + // Skip over '.' in ".{{lowercase}}". + case c == '.': + b = append(b, '_') // convert '.' to '_' + case c == '_' && (i == 0 || s[i-1] == '.'): + // Convert initial '_' to ensure we start with a capital letter. + // Do the same for '_' after '.' to match historic behavior. + b = append(b, 'X') // convert '_' to 'X' + case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]): + // Skip over '_' in "_{{lowercase}}". + case isASCIIDigit(c): + b = append(b, c) + default: + // Assume we have a letter now - if not, it's a bogus identifier. + // The next word is a sequence of characters that must start upper case. + if isASCIILower(c) { + c -= 'a' - 'A' // convert lowercase to uppercase + } + b = append(b, c) + + // Accept lower case sequence that follows. + for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ { + b = append(b, s[i+1]) + } + } + } + return string(b) +} + +// GoSanitized converts a string to a valid Go identifier. +func GoSanitized(s string) string { + // Sanitize the input to the set of valid characters, + // which must be '_' or be in the Unicode L or N categories. + s = strings.Map(func(r rune) rune { + if unicode.IsLetter(r) || unicode.IsDigit(r) { + return r + } + return '_' + }, s) + + // Prepend '_' in the event of a Go keyword conflict or if + // the identifier is invalid (does not start in the Unicode L category). + r, _ := utf8.DecodeRuneInString(s) + if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) { + return "_" + s + } + return s +} + // JSONCamelCase converts a snake_case identifier to a camelCase identifier, // according to the protobuf JSON specification. func JSONCamelCase(s string) string { @@ -31,8 +95,7 @@ func JSONCamelCase(s string) string { for i := 0; i < len(s); i++ { // proto identifiers are always ASCII c := s[i] if c != '_' { - isLower := 'a' <= c && c <= 'z' - if wasUnderscore && isLower { + if wasUnderscore && isASCIILower(c) { c -= 'a' - 'A' // convert to uppercase } b = append(b, c) @@ -48,8 +111,7 @@ func JSONSnakeCase(s string) string { var b []byte for i := 0; i < len(s); i++ { // proto identifiers are always ASCII c := s[i] - isUpper := 'A' <= c && c <= 'Z' - if isUpper { + if isASCIIUpper(c) { b = append(b, '_') c += 'a' - 'A' // convert to lowercase } @@ -122,3 +184,13 @@ func TrimEnumPrefix(s, prefix string) string { } return s } + +func isASCIILower(c byte) bool { + return 'a' <= c && c <= 'z' +} +func isASCIIUpper(c byte) bool { + return 'A' <= c && c <= 'Z' +} +func isASCIIDigit(c byte) bool { + return '0' <= c && c <= '9' +} diff --git a/internal/strs/strings_test.go b/internal/strs/strings_test.go index 2c4c2adf..0bb894a1 100644 --- a/internal/strs/strings_test.go +++ b/internal/strs/strings_test.go @@ -9,6 +9,61 @@ import ( "testing" ) +func TestGoCamelCase(t *testing.T) { + tests := []struct { + in, want string + }{ + {"", ""}, + {"one", "One"}, + {"one_two", "OneTwo"}, + {"_my_field_name_2", "XMyFieldName_2"}, + {"Something_Capped", "Something_Capped"}, + {"my_Name", "My_Name"}, + {"OneTwo", "OneTwo"}, + {"_", "X"}, + {"_a_", "XA_"}, + {"one.two", "OneTwo"}, + {"one.Two", "One_Two"}, + {"one_two.three_four", "OneTwoThreeFour"}, + {"one_two.Three_four", "OneTwo_ThreeFour"}, + {"_one._two", "XOne_XTwo"}, + {"SCREAMING_SNAKE_CASE", "SCREAMING_SNAKE_CASE"}, + {"double__underscore", "Double_Underscore"}, + {"camelCase", "CamelCase"}, + {"go2proto", "Go2Proto"}, + {"世界", "世界"}, + {"x世界", "X世界"}, + {"foo_bar世界", "FooBar世界"}, + } + for _, tc := range tests { + if got := GoCamelCase(tc.in); got != tc.want { + t.Errorf("GoCamelCase(%q) = %q, want %q", tc.in, got, tc.want) + } + } +} + +func TestGoSanitized(t *testing.T) { + tests := []struct { + in, want string + }{ + {"", "_"}, + {"boo", "boo"}, + {"Boo", "Boo"}, + {"ßoo", "ßoo"}, + {"default", "_default"}, + {"hello", "hello"}, + {"hello-world!!", "hello_world__"}, + {"hello-\xde\xad\xbe\xef\x00", "hello_____"}, + {"hello 世界", "hello_世界"}, + {"世界", "世界"}, + } + for _, tc := range tests { + if got := GoSanitized(tc.in); got != tc.want { + t.Errorf("GoSanitized(%q) = %q, want %q", tc.in, got, tc.want) + } + } +} + func TestName(t *testing.T) { tests := []struct { in string