mirror of
https://github.com/protocolbuffers/protobuf-go.git
synced 2025-01-04 02:38:50 +00:00
2c6f09887d
We want to codify the naming logic to be understandable in a specification. Clean up the camelCase function to be a little more readable. Cleanup and fix the cleanGoName function when mustExport is true. It is okay to change this behavior since this is new logic in v2 that has not yet been exposed to the universe. In the mustExport code-path, we do not need to check for conflicts with keywords since Go keywords are never uppercase, so the uppercasing of the first letter is sufficient to break a conflict. Also, we fix the logic for uppercasing the first character since not every lowercase character has an uppercase form. Change-Id: If14422d773bb89ed7038d874135e3dcd12683101 Reviewed-on: https://go-review.googlesource.com/c/154180 Reviewed-by: Damien Neil <dneil@google.com>
196 lines
5.6 KiB
Go
196 lines
5.6 KiB
Go
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package protogen
|
|
|
|
import (
|
|
"fmt"
|
|
"go/token"
|
|
"strconv"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
|
|
"github.com/golang/protobuf/v2/reflect/protoreflect"
|
|
)
|
|
|
|
// A GoIdent is a Go identifier, consisting of a name and import path.
|
|
// The name is a single identifier and may not be a dot-qualified selector.
|
|
type GoIdent struct {
|
|
GoName string
|
|
GoImportPath GoImportPath
|
|
}
|
|
|
|
func (id GoIdent) String() string { return fmt.Sprintf("%q.%v", id.GoImportPath, id.GoName) }
|
|
|
|
// newGoIdent returns the Go identifier for a descriptor.
|
|
func newGoIdent(f *File, d protoreflect.Descriptor) GoIdent {
|
|
name := strings.TrimPrefix(string(d.FullName()), string(f.Desc.Package())+".")
|
|
return GoIdent{
|
|
GoName: camelCase(name),
|
|
GoImportPath: f.GoImportPath,
|
|
}
|
|
}
|
|
|
|
// A GoImportPath is the import path of a Go package. e.g., "google.golang.org/genproto/protobuf".
|
|
type GoImportPath string
|
|
|
|
func (p GoImportPath) String() string { return strconv.Quote(string(p)) }
|
|
|
|
// Ident returns a GoIdent with s as the GoName and p as the GoImportPath.
|
|
func (p GoImportPath) Ident(s string) GoIdent {
|
|
return GoIdent{GoName: s, GoImportPath: p}
|
|
}
|
|
|
|
// A GoPackageName is the name of a Go package. e.g., "protobuf".
|
|
type GoPackageName string
|
|
|
|
// cleanPackageName converts a string to a valid Go package name.
|
|
func cleanPackageName(name string) GoPackageName {
|
|
return GoPackageName(cleanGoName(name, false))
|
|
}
|
|
|
|
// cleanGoName converts a string to a valid Go identifier.
|
|
// If mustExport, then the returned identifier is exported if not already.
|
|
func cleanGoName(s string, mustExport bool) string {
|
|
// Sanitize the input to the set of valid characters,
|
|
// which must be '_' or be in the Unicode L or N categories.
|
|
s = strings.Map(func(r rune) rune {
|
|
if unicode.IsLetter(r) || unicode.IsDigit(r) {
|
|
return r
|
|
}
|
|
return '_'
|
|
}, s)
|
|
r, n := utf8.DecodeRuneInString(s)
|
|
|
|
// Export the identifier by either uppercasing the first character or by
|
|
// prepending 'X' (to ensure name starts in the Unicode Lu category).
|
|
if mustExport {
|
|
// If possible, uppercase the first character. However, not all
|
|
// characters in the Unicode L category have an Lu equivalent.
|
|
if unicode.IsUpper(unicode.ToUpper(r)) {
|
|
return string(unicode.ToUpper(r)) + s[n:]
|
|
}
|
|
return "X" + s
|
|
}
|
|
|
|
// Prepend '_' in the event of a Go keyword conflict or if
|
|
// the identifier is invalid (does not start in the Unicode L category).
|
|
if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
|
|
return "_" + s
|
|
}
|
|
return s
|
|
}
|
|
|
|
var isGoPredeclaredIdentifier = map[string]bool{
|
|
"append": true,
|
|
"bool": true,
|
|
"byte": true,
|
|
"cap": true,
|
|
"close": true,
|
|
"complex": true,
|
|
"complex128": true,
|
|
"complex64": true,
|
|
"copy": true,
|
|
"delete": true,
|
|
"error": true,
|
|
"false": true,
|
|
"float32": true,
|
|
"float64": true,
|
|
"imag": true,
|
|
"int": true,
|
|
"int16": true,
|
|
"int32": true,
|
|
"int64": true,
|
|
"int8": true,
|
|
"iota": true,
|
|
"len": true,
|
|
"make": true,
|
|
"new": true,
|
|
"nil": true,
|
|
"panic": true,
|
|
"print": true,
|
|
"println": true,
|
|
"real": true,
|
|
"recover": true,
|
|
"rune": true,
|
|
"string": true,
|
|
"true": true,
|
|
"uint": true,
|
|
"uint16": true,
|
|
"uint32": true,
|
|
"uint64": true,
|
|
"uint8": true,
|
|
"uintptr": true,
|
|
}
|
|
|
|
// baseName returns the last path element of the name, with the last dotted suffix removed.
|
|
func baseName(name string) string {
|
|
// First, find the last element
|
|
if i := strings.LastIndex(name, "/"); i >= 0 {
|
|
name = name[i+1:]
|
|
}
|
|
// Now drop the suffix
|
|
if i := strings.LastIndex(name, "."); i >= 0 {
|
|
name = name[:i]
|
|
}
|
|
return name
|
|
}
|
|
|
|
// camelCase converts a name to CamelCase.
|
|
//
|
|
// If there is an interior underscore followed by a lower case letter,
|
|
// drop the underscore and convert the letter to upper case.
|
|
// There is a remote possibility of this rewrite causing a name collision,
|
|
// but it's so remote we're prepared to pretend it's nonexistent - since the
|
|
// C++ generator lowercases names, it's extremely unlikely to have two fields
|
|
// with different capitalizations.
|
|
func camelCase(s string) string {
|
|
// Invariant: if the next letter is lower case, it must be converted
|
|
// to upper case.
|
|
// That is, we process a word at a time, where words are marked by _ or
|
|
// upper case letter. Digits are treated as words.
|
|
var b []byte
|
|
for i := 0; i < len(s); i++ {
|
|
c := s[i]
|
|
switch {
|
|
case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
|
|
// Skip over '.' in ".{{lowercase}}".
|
|
case c == '.':
|
|
b = append(b, '_') // convert '.' to '_'
|
|
case c == '_' && (i == 0 || s[i-1] == '.'):
|
|
// Convert initial '_' to ensure we start with a capital letter.
|
|
// Do the same for '_' after '.' to match historic behavior.
|
|
b = append(b, 'X') // convert '_' to 'X'
|
|
case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
|
|
// Skip over '_' in "_{{lowercase}}".
|
|
case isASCIIDigit(c):
|
|
b = append(b, c)
|
|
default:
|
|
// Assume we have a letter now - if not, it's a bogus identifier.
|
|
// The next word is a sequence of characters that must start upper case.
|
|
if isASCIILower(c) {
|
|
c -= 'a' - 'A' // convert lowercase to uppercase
|
|
}
|
|
b = append(b, c)
|
|
|
|
// Accept lower case sequence that follows.
|
|
for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
|
|
b = append(b, s[i+1])
|
|
}
|
|
}
|
|
}
|
|
return string(b)
|
|
}
|
|
|
|
// Is c an ASCII lower-case letter?
|
|
func isASCIILower(c byte) bool {
|
|
return 'a' <= c && c <= 'z'
|
|
}
|
|
|
|
// Is c an ASCII digit?
|
|
func isASCIIDigit(c byte) bool {
|
|
return '0' <= c && c <= '9'
|
|
}
|