mirror of
https://github.com/protocolbuffers/protobuf-go.git
synced 2025-01-26 09:35:33 +00:00
8c86fc5e7d
Immediately abort (un)marshal operations when encountering invalid UTF-8 data in proto3 strings. No other proto implementation supports non-UTF-8 data in proto3 strings (and many reject it in proto2 strings as well). Producing invalid output is an interoperability threat (other implementations won't be able to read it). The case where existing string data is found to contain non-UTF8 data is better handled by changing the field to the `bytes` type, which (aside from UTF-8 validation) is wire-compatible with `string`. Remove the errors.NonFatal type, since there are no remaining cases where it is needed. "Non-fatal" errors which produce results and a non-nil error are problematic because they compose poorly; the better approach is to take an option like AllowPartial indicating which conditions to check for. Change-Id: I9d189ec6ffda7b5d96d094aa1b290af2e3f23736 Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/183098 Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
335 lines
9.4 KiB
Go
335 lines
9.4 KiB
Go
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Package text implements the text format for protocol buffers.
|
|
// This package has no semantic understanding for protocol buffers and is only
|
|
// a parser and composer for the format.
|
|
//
|
|
// There is no formal specification for the protobuf text format, as such the
|
|
// C++ implementation (see google::protobuf::TextFormat) is the reference
|
|
// implementation of the text format.
|
|
//
|
|
// This package is neither a superset nor a subset of the C++ implementation.
|
|
// This implementation permits a more liberal grammar in some cases to be
|
|
// backwards compatible with the historical Go implementation.
|
|
// Future parsings unique to Go should not be added.
|
|
// Some grammars allowed by the C++ implementation are deliberately
|
|
// not implemented here because they are considered a bug by the protobuf team
|
|
// and should not be replicated.
|
|
//
|
|
// The Go implementation should implement a sufficient amount of the C++
|
|
// grammar such that the default text serialization by C++ can be parsed by Go.
|
|
// However, just because the C++ parser accepts some input does not mean that
|
|
// the Go implementation should as well.
|
|
//
|
|
// The text format is almost a superset of JSON except:
|
|
// * message keys are not quoted strings, but identifiers
|
|
// * the top-level value must be a message without the delimiters
|
|
package text
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
"strings"
|
|
|
|
"google.golang.org/protobuf/internal/flags"
|
|
"google.golang.org/protobuf/reflect/protoreflect"
|
|
)
|
|
|
|
// Type represents a type expressible in the text format.
|
|
type Type uint8
|
|
|
|
const (
|
|
_ Type = iota
|
|
|
|
// Bool is a boolean (e.g., "true" or "false").
|
|
Bool
|
|
// Int is a signed integer (e.g., "-1423").
|
|
Int
|
|
// Uint is an unsigned integer (e.g., "0xdeadbeef").
|
|
Uint
|
|
// Float32 is a 32-bit floating-point number (e.g., "1.234" or "1e38").
|
|
// This allows encoding to differentiate the bitsize used for formatting.
|
|
Float32
|
|
// Float64 is a 64-bit floating-point number.
|
|
Float64
|
|
// String is a quoted string (e.g., `"the quick brown fox"`).
|
|
String
|
|
// Name is a protocol buffer identifier (e.g., `field_name`).
|
|
Name
|
|
// List is an ordered list of values (e.g., `[0, "one", true]`).
|
|
List
|
|
// Message is an ordered map of values (e.g., `{"key": null}`).
|
|
Message
|
|
)
|
|
|
|
func (t Type) String() string {
|
|
switch t {
|
|
case Bool:
|
|
return "bool"
|
|
case Int:
|
|
return "int"
|
|
case Uint:
|
|
return "uint"
|
|
case Float32:
|
|
return "float32"
|
|
case Float64:
|
|
return "float64"
|
|
case String:
|
|
return "string"
|
|
case Name:
|
|
return "name"
|
|
case List:
|
|
return "list"
|
|
case Message:
|
|
return "message"
|
|
default:
|
|
return "<invalid>"
|
|
}
|
|
}
|
|
|
|
// Value contains a value of a given Type.
|
|
type Value struct {
|
|
typ Type
|
|
raw []byte // raw bytes of the serialized data
|
|
str string // only for String or Name
|
|
num uint64 // only for Bool, Int, Uint, Float32, or Float64
|
|
arr []Value // only for List
|
|
obj [][2]Value // only for Message
|
|
}
|
|
|
|
// ValueOf returns a Value for a given Go value:
|
|
// bool => Bool
|
|
// int32, int64 => Int
|
|
// uint32, uint64 => Uint
|
|
// float32 => Float32
|
|
// float64 => Float64
|
|
// string, []byte => String
|
|
// protoreflect.Name => Name
|
|
// []Value => List
|
|
// [][2]Value => Message
|
|
//
|
|
// ValueOf panics if the Go type is not one of the above.
|
|
func ValueOf(v interface{}) Value {
|
|
switch v := v.(type) {
|
|
case bool:
|
|
if v {
|
|
return Value{typ: Bool, num: 1}
|
|
} else {
|
|
return Value{typ: Bool, num: 0}
|
|
}
|
|
case int32:
|
|
return Value{typ: Int, num: uint64(v)}
|
|
case int64:
|
|
return Value{typ: Int, num: uint64(v)}
|
|
case uint32:
|
|
return Value{typ: Uint, num: uint64(v)}
|
|
case uint64:
|
|
return Value{typ: Uint, num: uint64(v)}
|
|
case float32:
|
|
// Store as float64 bits.
|
|
return Value{typ: Float32, num: math.Float64bits(float64(v))}
|
|
case float64:
|
|
return Value{typ: Float64, num: math.Float64bits(float64(v))}
|
|
case string:
|
|
return Value{typ: String, str: string(v)}
|
|
case []byte:
|
|
return Value{typ: String, str: string(v)}
|
|
case protoreflect.Name:
|
|
return Value{typ: Name, str: string(v)}
|
|
case []Value:
|
|
return Value{typ: List, arr: v}
|
|
case [][2]Value:
|
|
return Value{typ: Message, obj: v}
|
|
default:
|
|
panic(fmt.Sprintf("invalid type %T", v))
|
|
}
|
|
}
|
|
func rawValueOf(v interface{}, raw []byte) Value {
|
|
v2 := ValueOf(v)
|
|
v2.raw = raw
|
|
return v2
|
|
}
|
|
|
|
// Type is the type of the value. When parsing, this is a best-effort guess
|
|
// at the resulting type. However, there are ambiguities as to the exact type
|
|
// of the value (e.g., "false" is either a bool or a name).
|
|
// Thus, some of the types are convertible with each other.
|
|
// The Bool, Int, Uint, Float32, Float64, and Name methods return a boolean to
|
|
// report whether the conversion was successful.
|
|
func (v Value) Type() Type {
|
|
return v.typ
|
|
}
|
|
|
|
// Bool returns v as a bool and reports whether the conversion succeeded.
|
|
func (v Value) Bool() (x bool, ok bool) {
|
|
switch v.typ {
|
|
case Bool:
|
|
return v.num > 0, true
|
|
case Uint, Int:
|
|
// C++ allows a 1-bit unsigned integer (e.g., "0", "1", or "0x1").
|
|
if len(v.raw) > 0 && v.raw[0] != '-' && v.num < 2 {
|
|
return v.num > 0, true
|
|
}
|
|
}
|
|
return false, false
|
|
}
|
|
|
|
// Int returns v as an int64 of the specified precision and reports whether
|
|
// the conversion succeeded.
|
|
func (v Value) Int(b64 bool) (x int64, ok bool) {
|
|
switch v.typ {
|
|
case Int:
|
|
n := int64(v.num)
|
|
if b64 || (math.MinInt32 <= n && n <= math.MaxInt32) {
|
|
return int64(n), true
|
|
}
|
|
case Uint:
|
|
n := uint64(v.num)
|
|
if (!b64 && n <= math.MaxInt32) || (b64 && n <= math.MaxInt64) {
|
|
return int64(n), true
|
|
}
|
|
// C++ accepts large positive hex numbers as negative values.
|
|
// This feature is here for proto1 backwards compatibility purposes.
|
|
if flags.Proto1Legacy && len(v.raw) > 1 && v.raw[0] == '0' && v.raw[1] == 'x' {
|
|
if !b64 {
|
|
return int64(int32(n)), n <= math.MaxUint32
|
|
}
|
|
// if !b64 && n <= math.MaxUint32 {
|
|
// return int64(int32(n)), true
|
|
// }
|
|
return int64(n), true
|
|
}
|
|
}
|
|
return 0, false
|
|
}
|
|
|
|
// Uint returns v as an uint64 of the specified precision and reports whether
|
|
// the conversion succeeded.
|
|
func (v Value) Uint(b64 bool) (x uint64, ok bool) {
|
|
switch v.typ {
|
|
case Int:
|
|
n := int64(v.num)
|
|
if len(v.raw) > 0 && v.raw[0] != '-' && (b64 || n <= math.MaxUint32) {
|
|
return uint64(n), true
|
|
}
|
|
case Uint:
|
|
n := uint64(v.num)
|
|
if b64 || n <= math.MaxUint32 {
|
|
return uint64(n), true
|
|
}
|
|
}
|
|
return 0, false
|
|
}
|
|
|
|
// Float returns v as a float64 of the specified precision and reports whether
|
|
// the conversion succeeded.
|
|
func (v Value) Float(b64 bool) (x float64, ok bool) {
|
|
switch v.typ {
|
|
case Int:
|
|
return float64(int64(v.num)), true // possibly lossy, but allowed
|
|
case Uint:
|
|
return float64(uint64(v.num)), true // possibly lossy, but allowed
|
|
case Float32, Float64:
|
|
n := math.Float64frombits(v.num)
|
|
if math.IsNaN(n) || math.IsInf(n, 0) {
|
|
return float64(n), true
|
|
}
|
|
if b64 || math.Abs(n) <= math.MaxFloat32 {
|
|
return float64(n), true
|
|
}
|
|
}
|
|
return 0, false
|
|
}
|
|
|
|
// String returns v as a string if the Type is String.
|
|
// Otherwise, this returns a formatted string of v for debugging purposes.
|
|
//
|
|
// Since String is used to represent both text and binary, it is not validated
|
|
// to contain valid UTF-8. When using this value with the string type in proto,
|
|
// it is the user's responsibility perform additional UTF-8 validation.
|
|
func (v Value) String() string {
|
|
if v.typ != String {
|
|
return v.stringValue()
|
|
}
|
|
return v.str
|
|
}
|
|
func (v Value) stringValue() string {
|
|
switch v.typ {
|
|
case Bool, Int, Uint, Float32, Float64, Name:
|
|
return string(v.Raw())
|
|
case List:
|
|
var ss []string
|
|
for _, v := range v.List() {
|
|
ss = append(ss, v.String())
|
|
}
|
|
return "[" + strings.Join(ss, ",") + "]"
|
|
case Message:
|
|
var ss []string
|
|
for _, v := range v.Message() {
|
|
k := v[0].String()
|
|
if v[0].Type() == String {
|
|
k = "[" + k + "]"
|
|
}
|
|
ss = append(ss, k+":"+v[1].String())
|
|
}
|
|
return "{" + strings.Join(ss, ",") + "}"
|
|
default:
|
|
return "<invalid>"
|
|
}
|
|
}
|
|
|
|
// Name returns the field name or enum value name and reports whether the value
|
|
// can be treated as an identifier.
|
|
func (v Value) Name() (protoreflect.Name, bool) {
|
|
switch v.typ {
|
|
case Bool, Float32, Float64:
|
|
// Ambiguity arises in unmarshalValue since "nan" may interpreted as
|
|
// either a Name type (for enum values) or a Float32/Float64 type.
|
|
// Similarly, "true" may be interpreted as either a Name or Bool type.
|
|
n := protoreflect.Name(v.raw)
|
|
if n.IsValid() {
|
|
return n, true
|
|
}
|
|
case Name:
|
|
return protoreflect.Name(v.str), true
|
|
}
|
|
return "", false
|
|
}
|
|
|
|
// List returns the elements of v and panics if the Type is not List.
|
|
// Mutations on the return value may not be observable from the Raw method.
|
|
func (v Value) List() []Value {
|
|
if v.typ != List {
|
|
panic("value is not a list")
|
|
}
|
|
return v.arr
|
|
}
|
|
|
|
// Message returns the items of v and panics if the Type is not Message.
|
|
// The [2]Value represents a key and value pair, where the key is either
|
|
// a Name (representing a field name), a String (representing extension field
|
|
// names or the Any type URL), or an Uint for unknown fields.
|
|
//
|
|
// Mutations on the return value may not be observable from the Raw method.
|
|
func (v Value) Message() [][2]Value {
|
|
if v.typ != Message {
|
|
panic("value is not a message")
|
|
}
|
|
return v.obj
|
|
}
|
|
|
|
// Raw returns the raw representation of the value.
|
|
// The returned value may alias the input given to Unmarshal.
|
|
func (v Value) Raw() []byte {
|
|
if len(v.raw) > 0 {
|
|
return v.raw
|
|
}
|
|
p := encoder{}
|
|
if err := p.marshalValue(v); err != nil {
|
|
return []byte("<invalid>")
|
|
}
|
|
return p.out
|
|
}
|