Joe Tsai c51e2e0293 all: support enforce_utf8 override
In 2014, when proto3 was being developed, there were a number of early
adopters of the new syntax. Before the finalization of proto3 when
it was released in open-source in July 2016, a decision was made to
strictly validate strings in proto3. However, some of the early adopters
were already using invalid UTF-8 with string fields.
The google.protobuf.FieldOptions.enforce_utf8 option only exists to support
those grandfathered users where they can opt-out of the validation logic.
Practical use of that option in open source is impossible even if a user
specifies the proto1_legacy build tag since it requires a hacked
variant of descriptor.proto that is not externally available.

This CL supports enforce_utf8 by modifiyng internal/filedesc to
expose the flag if it detects it in the raw descriptor.
We add an strs.EnforceUTF8 function as a centralized place to determine
whether to perform validation. Validation opt-out is supported
only in builds with legacy support.

We implement support for validating UTF-8 in all proto3 string fields,
even if they are backed by a Go []byte.

Change-Id: I9c0628b84909bc7181125f09db730c80d490e485
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/186002
Reviewed-by: Damien Neil <dneil@google.com>
2019-07-15 19:53:05 +00:00

125 lines
3.1 KiB
Go

// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package strs provides string manipulation functionality specific to protobuf.
package strs
import (
"strings"
"unicode"
"google.golang.org/protobuf/internal/flags"
"google.golang.org/protobuf/reflect/protoreflect"
)
// EnforceUTF8 reports whether to enforce strict UTF-8 validation.
func EnforceUTF8(fd protoreflect.FieldDescriptor) bool {
if flags.Proto1Legacy {
if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok {
return fd.EnforceUTF8()
}
}
return fd.Syntax() == protoreflect.Proto3
}
// JSONCamelCase converts a snake_case identifier to a camelCase identifier,
// according to the protobuf JSON specification.
func JSONCamelCase(s string) string {
var b []byte
var wasUnderscore bool
for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
c := s[i]
if c != '_' {
isLower := 'a' <= c && c <= 'z'
if wasUnderscore && isLower {
c -= 'a' - 'A' // convert to uppercase
}
b = append(b, c)
}
wasUnderscore = c == '_'
}
return string(b)
}
// JSONSnakeCase converts a camelCase identifier to a snake_case identifier,
// according to the protobuf JSON specification.
func JSONSnakeCase(s string) string {
var b []byte
for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
c := s[i]
isUpper := 'A' <= c && c <= 'Z'
if isUpper {
b = append(b, '_')
c += 'a' - 'A' // convert to lowercase
}
b = append(b, c)
}
return string(b)
}
// MapEntryName derives the name of the map entry message given the field name.
// See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057
func MapEntryName(s string) string {
var b []byte
upperNext := true
for _, c := range s {
switch {
case c == '_':
upperNext = true
case upperNext:
b = append(b, byte(unicode.ToUpper(c)))
upperNext = false
default:
b = append(b, byte(c))
}
}
b = append(b, "Entry"...)
return string(b)
}
// EnumValueName derives the camel-cased enum value name.
// See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313
func EnumValueName(s string) string {
var b []byte
upperNext := true
for _, c := range s {
switch {
case c == '_':
upperNext = true
case upperNext:
b = append(b, byte(unicode.ToUpper(c)))
upperNext = false
default:
b = append(b, byte(unicode.ToLower(c)))
upperNext = false
}
}
return string(b)
}
// TrimEnumPrefix trims the enum name prefix from an enum value name,
// where the prefix is all lowercase without underscores.
// See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375
func TrimEnumPrefix(s, prefix string) string {
s0 := s // original input
for len(s) > 0 && len(prefix) > 0 {
if s[0] == '_' {
s = s[1:]
continue
}
if unicode.ToLower(rune(s[0])) != rune(prefix[0]) {
return s0 // no prefix match
}
s, prefix = s[1:], prefix[1:]
}
if len(prefix) > 0 {
return s0 // no prefix match
}
s = strings.TrimLeft(s, "_")
if len(s) == 0 {
return s0 // avoid returning empty string
}
return s
}