Joe Tsai 2e7817f117 compiler/protogen, internal/strs, internal/impl: expose enum Go name derivation
In order to migrate v1 to wrap v2, we need a way to reproduce
the awful enum "names" that v1 used, which was the concatenation of
the proto package with the Go identifier used for the enum.

To support this:
* Move the camel case logic from compiler/protogen to internal/strs
* Add a small stub in internal/impl to expose this functionality

Change-Id: I8ff31daa9ae541e5788dc04d2e89eae1574877e4
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/191637
Reviewed-by: Damien Neil <dneil@google.com>
2019-08-26 17:49:17 +00:00

197 lines
5.2 KiB
Go

// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package strs provides string manipulation functionality specific to protobuf.
package strs
import (
"go/token"
"strings"
"unicode"
"unicode/utf8"
"google.golang.org/protobuf/internal/flags"
"google.golang.org/protobuf/reflect/protoreflect"
)
// EnforceUTF8 reports whether to enforce strict UTF-8 validation.
func EnforceUTF8(fd protoreflect.FieldDescriptor) bool {
if flags.ProtoLegacy {
if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok {
return fd.EnforceUTF8()
}
}
return fd.Syntax() == protoreflect.Proto3
}
// GoCamelCase camel-cases a protobuf name for use as a Go identifier.
//
// If there is an interior underscore followed by a lower case letter,
// drop the underscore and convert the letter to upper case.
func GoCamelCase(s string) string {
// Invariant: if the next letter is lower case, it must be converted
// to upper case.
// That is, we process a word at a time, where words are marked by _ or
// upper case letter. Digits are treated as words.
var b []byte
for i := 0; i < len(s); i++ {
c := s[i]
switch {
case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
// Skip over '.' in ".{{lowercase}}".
case c == '.':
b = append(b, '_') // convert '.' to '_'
case c == '_' && (i == 0 || s[i-1] == '.'):
// Convert initial '_' to ensure we start with a capital letter.
// Do the same for '_' after '.' to match historic behavior.
b = append(b, 'X') // convert '_' to 'X'
case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
// Skip over '_' in "_{{lowercase}}".
case isASCIIDigit(c):
b = append(b, c)
default:
// Assume we have a letter now - if not, it's a bogus identifier.
// The next word is a sequence of characters that must start upper case.
if isASCIILower(c) {
c -= 'a' - 'A' // convert lowercase to uppercase
}
b = append(b, c)
// Accept lower case sequence that follows.
for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
b = append(b, s[i+1])
}
}
}
return string(b)
}
// GoSanitized converts a string to a valid Go identifier.
func GoSanitized(s string) string {
// Sanitize the input to the set of valid characters,
// which must be '_' or be in the Unicode L or N categories.
s = strings.Map(func(r rune) rune {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
return r
}
return '_'
}, s)
// Prepend '_' in the event of a Go keyword conflict or if
// the identifier is invalid (does not start in the Unicode L category).
r, _ := utf8.DecodeRuneInString(s)
if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
return "_" + s
}
return s
}
// JSONCamelCase converts a snake_case identifier to a camelCase identifier,
// according to the protobuf JSON specification.
func JSONCamelCase(s string) string {
var b []byte
var wasUnderscore bool
for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
c := s[i]
if c != '_' {
if wasUnderscore && isASCIILower(c) {
c -= 'a' - 'A' // convert to uppercase
}
b = append(b, c)
}
wasUnderscore = c == '_'
}
return string(b)
}
// JSONSnakeCase converts a camelCase identifier to a snake_case identifier,
// according to the protobuf JSON specification.
func JSONSnakeCase(s string) string {
var b []byte
for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
c := s[i]
if isASCIIUpper(c) {
b = append(b, '_')
c += 'a' - 'A' // convert to lowercase
}
b = append(b, c)
}
return string(b)
}
// MapEntryName derives the name of the map entry message given the field name.
// See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057
func MapEntryName(s string) string {
var b []byte
upperNext := true
for _, c := range s {
switch {
case c == '_':
upperNext = true
case upperNext:
b = append(b, byte(unicode.ToUpper(c)))
upperNext = false
default:
b = append(b, byte(c))
}
}
b = append(b, "Entry"...)
return string(b)
}
// EnumValueName derives the camel-cased enum value name.
// See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313
func EnumValueName(s string) string {
var b []byte
upperNext := true
for _, c := range s {
switch {
case c == '_':
upperNext = true
case upperNext:
b = append(b, byte(unicode.ToUpper(c)))
upperNext = false
default:
b = append(b, byte(unicode.ToLower(c)))
upperNext = false
}
}
return string(b)
}
// TrimEnumPrefix trims the enum name prefix from an enum value name,
// where the prefix is all lowercase without underscores.
// See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375
func TrimEnumPrefix(s, prefix string) string {
s0 := s // original input
for len(s) > 0 && len(prefix) > 0 {
if s[0] == '_' {
s = s[1:]
continue
}
if unicode.ToLower(rune(s[0])) != rune(prefix[0]) {
return s0 // no prefix match
}
s, prefix = s[1:], prefix[1:]
}
if len(prefix) > 0 {
return s0 // no prefix match
}
s = strings.TrimLeft(s, "_")
if len(s) == 0 {
return s0 // avoid returning empty string
}
return s
}
func isASCIILower(c byte) bool {
return 'a' <= c && c <= 'z'
}
func isASCIIUpper(c byte) bool {
return 'A' <= c && c <= 'Z'
}
func isASCIIDigit(c byte) bool {
return '0' <= c && c <= '9'
}