mirror of
https://github.com/protocolbuffers/protobuf-go.git
synced 2025-01-27 03:35:32 +00:00
c51e2e0293
In 2014, when proto3 was being developed, there were a number of early adopters of the new syntax. Before the finalization of proto3 when it was released in open-source in July 2016, a decision was made to strictly validate strings in proto3. However, some of the early adopters were already using invalid UTF-8 with string fields. The google.protobuf.FieldOptions.enforce_utf8 option only exists to support those grandfathered users where they can opt-out of the validation logic. Practical use of that option in open source is impossible even if a user specifies the proto1_legacy build tag since it requires a hacked variant of descriptor.proto that is not externally available. This CL supports enforce_utf8 by modifiyng internal/filedesc to expose the flag if it detects it in the raw descriptor. We add an strs.EnforceUTF8 function as a centralized place to determine whether to perform validation. Validation opt-out is supported only in builds with legacy support. We implement support for validating UTF-8 in all proto3 string fields, even if they are backed by a Go []byte. Change-Id: I9c0628b84909bc7181125f09db730c80d490e485 Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/186002 Reviewed-by: Damien Neil <dneil@google.com>
125 lines
3.1 KiB
Go
125 lines
3.1 KiB
Go
// Copyright 2019 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Package strs provides string manipulation functionality specific to protobuf.
|
|
package strs
|
|
|
|
import (
|
|
"strings"
|
|
"unicode"
|
|
|
|
"google.golang.org/protobuf/internal/flags"
|
|
"google.golang.org/protobuf/reflect/protoreflect"
|
|
)
|
|
|
|
// EnforceUTF8 reports whether to enforce strict UTF-8 validation.
|
|
func EnforceUTF8(fd protoreflect.FieldDescriptor) bool {
|
|
if flags.Proto1Legacy {
|
|
if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok {
|
|
return fd.EnforceUTF8()
|
|
}
|
|
}
|
|
return fd.Syntax() == protoreflect.Proto3
|
|
}
|
|
|
|
// JSONCamelCase converts a snake_case identifier to a camelCase identifier,
|
|
// according to the protobuf JSON specification.
|
|
func JSONCamelCase(s string) string {
|
|
var b []byte
|
|
var wasUnderscore bool
|
|
for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
|
|
c := s[i]
|
|
if c != '_' {
|
|
isLower := 'a' <= c && c <= 'z'
|
|
if wasUnderscore && isLower {
|
|
c -= 'a' - 'A' // convert to uppercase
|
|
}
|
|
b = append(b, c)
|
|
}
|
|
wasUnderscore = c == '_'
|
|
}
|
|
return string(b)
|
|
}
|
|
|
|
// JSONSnakeCase converts a camelCase identifier to a snake_case identifier,
|
|
// according to the protobuf JSON specification.
|
|
func JSONSnakeCase(s string) string {
|
|
var b []byte
|
|
for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
|
|
c := s[i]
|
|
isUpper := 'A' <= c && c <= 'Z'
|
|
if isUpper {
|
|
b = append(b, '_')
|
|
c += 'a' - 'A' // convert to lowercase
|
|
}
|
|
b = append(b, c)
|
|
}
|
|
return string(b)
|
|
}
|
|
|
|
// MapEntryName derives the name of the map entry message given the field name.
|
|
// See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057
|
|
func MapEntryName(s string) string {
|
|
var b []byte
|
|
upperNext := true
|
|
for _, c := range s {
|
|
switch {
|
|
case c == '_':
|
|
upperNext = true
|
|
case upperNext:
|
|
b = append(b, byte(unicode.ToUpper(c)))
|
|
upperNext = false
|
|
default:
|
|
b = append(b, byte(c))
|
|
}
|
|
}
|
|
b = append(b, "Entry"...)
|
|
return string(b)
|
|
}
|
|
|
|
// EnumValueName derives the camel-cased enum value name.
|
|
// See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313
|
|
func EnumValueName(s string) string {
|
|
var b []byte
|
|
upperNext := true
|
|
for _, c := range s {
|
|
switch {
|
|
case c == '_':
|
|
upperNext = true
|
|
case upperNext:
|
|
b = append(b, byte(unicode.ToUpper(c)))
|
|
upperNext = false
|
|
default:
|
|
b = append(b, byte(unicode.ToLower(c)))
|
|
upperNext = false
|
|
}
|
|
}
|
|
return string(b)
|
|
}
|
|
|
|
// TrimEnumPrefix trims the enum name prefix from an enum value name,
|
|
// where the prefix is all lowercase without underscores.
|
|
// See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375
|
|
func TrimEnumPrefix(s, prefix string) string {
|
|
s0 := s // original input
|
|
for len(s) > 0 && len(prefix) > 0 {
|
|
if s[0] == '_' {
|
|
s = s[1:]
|
|
continue
|
|
}
|
|
if unicode.ToLower(rune(s[0])) != rune(prefix[0]) {
|
|
return s0 // no prefix match
|
|
}
|
|
s, prefix = s[1:], prefix[1:]
|
|
}
|
|
if len(prefix) > 0 {
|
|
return s0 // no prefix match
|
|
}
|
|
s = strings.TrimLeft(s, "_")
|
|
if len(s) == 0 {
|
|
return s0 // avoid returning empty string
|
|
}
|
|
return s
|
|
}
|