protobuf-go/internal/impl/message_field.go

293 lines
8.2 KiB
Go
Raw Normal View History

// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package impl
import (
"fmt"
"reflect"
"github.com/golang/protobuf/v2/internal/flags"
internal/impl: support legacy extension fields Implement support for extension fields for messages that use the v1 data structures for extensions. The legacyExtensionFields type wraps a v1 map to implement the v2 protoreflect.KnownFields interface. Working on this change revealed a bug in the dynamic construction of message types for protobuf messages that had cyclic dependencies (e.g., message Foo has a sub-field of message Bar, and Bar has a sub-field of Foo). In such a situation, a deadlock occurs because initialization code depends on the very initialization code that is currently running. To break these cycles, we make some systematic changes listed in the following paragraphs. Generally speaking, we separate the logic for construction and wrapping, where constuction does not recursively rely on dependencies, while wrapping may recursively inspect dependencies. Promote the MessageType.MessageOf method as a standalone MessageOf function that dynamically finds the proper *MessageType to use. We make it such that MessageType only supports two forms of messages types: * Those that fully implement the v2 API. * Those that do not implement the v2 API at all. This removes support for the hybrid form that was exploited by message_test.go In impl/message_test.go, switch each message to look more like how future generated messages will look like. This is done in reaction to the fact that MessageType.MessageOf no longer exists. In value/{map,vector}.go, fix Unwrap to return a pointer since the underlying reflect.Value is addressable reference value, not a pointer value. In value/convert.go, split the logic apart so that obtaining a v2 type and wrapping a type as v2 are distinct operations. Wrapping requires further initialization than simply creating the initial message type, and calling it during initial construction would lead to a deadlock. In protoreflect/go_type.go, we switch back to a lazy initialization of GoType to avoid a deadlock since the user-provided fn may rely on the fact that prototype.GoMessage returned. Change-Id: I5dea00e36fe1a9899bd2ac0aed2c8e51d5d87420 Reviewed-on: https://go-review.googlesource.com/c/148826 Reviewed-by: Herbie Ong <herbie@google.com>
2018-11-06 13:05:20 -08:00
pvalue "github.com/golang/protobuf/v2/internal/value"
pref "github.com/golang/protobuf/v2/reflect/protoreflect"
)
type fieldInfo struct {
// TODO: specialize marshal and unmarshal functions?
has func(pointer) bool
get func(pointer) pref.Value
set func(pointer, pref.Value)
clear func(pointer)
newMessage func() pref.Message
}
func fieldInfoForWeak(fd pref.FieldDescriptor, fs reflect.StructField) fieldInfo {
if !flags.Proto1Legacy {
panic("weak fields not supported")
}
// TODO: support weak fields.
panic(fmt.Sprintf("invalid field: %v", fd))
}
func fieldInfoForOneof(fd pref.FieldDescriptor, fs reflect.StructField, ot reflect.Type) fieldInfo {
ft := fs.Type
if ft.Kind() != reflect.Interface {
panic(fmt.Sprintf("invalid type: got %v, want interface kind", ft))
}
if ot.Kind() != reflect.Struct {
panic(fmt.Sprintf("invalid type: got %v, want struct kind", ot))
}
if !reflect.PtrTo(ot).Implements(ft) {
panic(fmt.Sprintf("invalid type: %v does not implement %v", ot, ft))
}
conv := pvalue.NewLegacyConverter(ot.Field(0).Type, fd.Kind(), legacyWrapper)
fieldOffset := offsetOf(fs)
// TODO: Implement unsafe fast path?
return fieldInfo{
// NOTE: The logic below intentionally assumes that oneof fields are
// well-formatted. That is, the oneof interface never contains a
// typed nil pointer to one of the wrapper structs.
has: func(p pointer) bool {
if p.IsNil() {
return false
}
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
if rv.IsNil() || rv.Elem().Type().Elem() != ot {
return false
}
return true
},
get: func(p pointer) pref.Value {
if p.IsNil() {
return defaultValueOf(fd)
}
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
if rv.IsNil() || rv.Elem().Type().Elem() != ot {
return defaultValueOf(fd)
}
rv = rv.Elem().Elem().Field(0)
return conv.PBValueOf(rv)
},
set: func(p pointer, v pref.Value) {
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
if rv.IsNil() || rv.Elem().Type().Elem() != ot {
rv.Set(reflect.New(ot))
}
rv = rv.Elem().Elem().Field(0)
rv.Set(conv.GoValueOf(v))
},
clear: func(p pointer) {
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
if rv.IsNil() || rv.Elem().Type().Elem() != ot {
return
}
rv.Set(reflect.Zero(rv.Type()))
},
newMessage: func() pref.Message {
// This is only valid for messages and panics for other kinds.
return conv.MessageType.New()
},
}
}
func fieldInfoForMap(fd pref.FieldDescriptor, fs reflect.StructField) fieldInfo {
ft := fs.Type
if ft.Kind() != reflect.Map {
panic(fmt.Sprintf("invalid type: got %v, want map kind", ft))
}
keyConv := pvalue.NewLegacyConverter(ft.Key(), fd.MessageType().Fields().ByNumber(1).Kind(), legacyWrapper)
valConv := pvalue.NewLegacyConverter(ft.Elem(), fd.MessageType().Fields().ByNumber(2).Kind(), legacyWrapper)
fieldOffset := offsetOf(fs)
// TODO: Implement unsafe fast path?
return fieldInfo{
has: func(p pointer) bool {
if p.IsNil() {
return false
}
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
return rv.Len() > 0
},
get: func(p pointer) pref.Value {
if p.IsNil() {
v := reflect.Zero(reflect.PtrTo(fs.Type)).Interface()
return pref.ValueOf(pvalue.MapOf(v, keyConv, valConv))
}
v := p.Apply(fieldOffset).AsIfaceOf(fs.Type)
internal/impl: support legacy extension fields Implement support for extension fields for messages that use the v1 data structures for extensions. The legacyExtensionFields type wraps a v1 map to implement the v2 protoreflect.KnownFields interface. Working on this change revealed a bug in the dynamic construction of message types for protobuf messages that had cyclic dependencies (e.g., message Foo has a sub-field of message Bar, and Bar has a sub-field of Foo). In such a situation, a deadlock occurs because initialization code depends on the very initialization code that is currently running. To break these cycles, we make some systematic changes listed in the following paragraphs. Generally speaking, we separate the logic for construction and wrapping, where constuction does not recursively rely on dependencies, while wrapping may recursively inspect dependencies. Promote the MessageType.MessageOf method as a standalone MessageOf function that dynamically finds the proper *MessageType to use. We make it such that MessageType only supports two forms of messages types: * Those that fully implement the v2 API. * Those that do not implement the v2 API at all. This removes support for the hybrid form that was exploited by message_test.go In impl/message_test.go, switch each message to look more like how future generated messages will look like. This is done in reaction to the fact that MessageType.MessageOf no longer exists. In value/{map,vector}.go, fix Unwrap to return a pointer since the underlying reflect.Value is addressable reference value, not a pointer value. In value/convert.go, split the logic apart so that obtaining a v2 type and wrapping a type as v2 are distinct operations. Wrapping requires further initialization than simply creating the initial message type, and calling it during initial construction would lead to a deadlock. In protoreflect/go_type.go, we switch back to a lazy initialization of GoType to avoid a deadlock since the user-provided fn may rely on the fact that prototype.GoMessage returned. Change-Id: I5dea00e36fe1a9899bd2ac0aed2c8e51d5d87420 Reviewed-on: https://go-review.googlesource.com/c/148826 Reviewed-by: Herbie Ong <herbie@google.com>
2018-11-06 13:05:20 -08:00
return pref.ValueOf(pvalue.MapOf(v, keyConv, valConv))
},
set: func(p pointer, v pref.Value) {
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
rv.Set(reflect.ValueOf(v.Map().(pvalue.Unwrapper).ProtoUnwrap()).Elem())
},
clear: func(p pointer) {
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
rv.Set(reflect.Zero(rv.Type()))
},
}
}
all: rename Vector as List The terminology Vector does not occur in protobuf documentation at all, so we should rename the Go use of the term to something more recognizable. As such, all instances that match the regexp "[Vv]ect(or)?" were replaced. The C++ documentation uses the term "Repeated", which is a reasonable name. However, the term became overloaded in 2014, when maps were added as a feature and implementated under the hood as repeated fields. This is confusing as it means "repeated" could either refer to repeated fields proper (i.e., explicitly marked with the "repeated" label in the proto file) or map fields. In the case of the C++ reflective API, this is not a problem since repeated fields proper and map fields are interacted with through the same RepeatedField type. In Go, we do not use a single type to handle both types of repeated fields: 1) We are coming up with the Go protobuf reflection API for the first time and so do not need to piggy-back on the repeated fields API to remain backwards compatible since no former usages of Go protobuf reflection exists. 2) Map fields are commonly represented in Go as the Go map type, which do not preserve ordering information. As such it is fundamentally impossible to present an unordered map as a consistently ordered list. Thus, Go needs two different interfaces for lists and maps. Given the above situation, "Repeated" is not a great term to use since it refers to two different things (when we only want one of the meanings). To distinguish between the two, we'll use the terms "List" and "Map" instead. There is some precedence for the term "List" in the protobuf codebase (e.g., "getRepeatedInt32List"). Change-Id: Iddcdb6b78e1e60c14fa4ca213c15f45e214b967b Reviewed-on: https://go-review.googlesource.com/c/149657 Reviewed-by: Damien Neil <dneil@google.com>
2018-11-14 14:05:19 -08:00
func fieldInfoForList(fd pref.FieldDescriptor, fs reflect.StructField) fieldInfo {
ft := fs.Type
if ft.Kind() != reflect.Slice {
panic(fmt.Sprintf("invalid type: got %v, want slice kind", ft))
}
conv := pvalue.NewLegacyConverter(ft.Elem(), fd.Kind(), legacyWrapper)
fieldOffset := offsetOf(fs)
// TODO: Implement unsafe fast path?
return fieldInfo{
has: func(p pointer) bool {
if p.IsNil() {
return false
}
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
return rv.Len() > 0
},
get: func(p pointer) pref.Value {
if p.IsNil() {
v := reflect.Zero(reflect.PtrTo(fs.Type)).Interface()
return pref.ValueOf(pvalue.ListOf(v, conv))
}
v := p.Apply(fieldOffset).AsIfaceOf(fs.Type)
all: rename Vector as List The terminology Vector does not occur in protobuf documentation at all, so we should rename the Go use of the term to something more recognizable. As such, all instances that match the regexp "[Vv]ect(or)?" were replaced. The C++ documentation uses the term "Repeated", which is a reasonable name. However, the term became overloaded in 2014, when maps were added as a feature and implementated under the hood as repeated fields. This is confusing as it means "repeated" could either refer to repeated fields proper (i.e., explicitly marked with the "repeated" label in the proto file) or map fields. In the case of the C++ reflective API, this is not a problem since repeated fields proper and map fields are interacted with through the same RepeatedField type. In Go, we do not use a single type to handle both types of repeated fields: 1) We are coming up with the Go protobuf reflection API for the first time and so do not need to piggy-back on the repeated fields API to remain backwards compatible since no former usages of Go protobuf reflection exists. 2) Map fields are commonly represented in Go as the Go map type, which do not preserve ordering information. As such it is fundamentally impossible to present an unordered map as a consistently ordered list. Thus, Go needs two different interfaces for lists and maps. Given the above situation, "Repeated" is not a great term to use since it refers to two different things (when we only want one of the meanings). To distinguish between the two, we'll use the terms "List" and "Map" instead. There is some precedence for the term "List" in the protobuf codebase (e.g., "getRepeatedInt32List"). Change-Id: Iddcdb6b78e1e60c14fa4ca213c15f45e214b967b Reviewed-on: https://go-review.googlesource.com/c/149657 Reviewed-by: Damien Neil <dneil@google.com>
2018-11-14 14:05:19 -08:00
return pref.ValueOf(pvalue.ListOf(v, conv))
},
set: func(p pointer, v pref.Value) {
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
rv.Set(reflect.ValueOf(v.List().(pvalue.Unwrapper).ProtoUnwrap()).Elem())
},
clear: func(p pointer) {
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
rv.Set(reflect.Zero(rv.Type()))
},
}
}
var emptyBytes = reflect.ValueOf([]byte{})
func fieldInfoForScalar(fd pref.FieldDescriptor, fs reflect.StructField) fieldInfo {
ft := fs.Type
nullable := fd.Syntax() == pref.Proto2
if nullable {
if ft.Kind() != reflect.Ptr && ft.Kind() != reflect.Slice {
panic(fmt.Sprintf("invalid type: got %v, want pointer", ft))
}
if ft.Kind() == reflect.Ptr {
ft = ft.Elem()
}
}
conv := pvalue.NewLegacyConverter(ft, fd.Kind(), legacyWrapper)
fieldOffset := offsetOf(fs)
// TODO: Implement unsafe fast path?
return fieldInfo{
has: func(p pointer) bool {
if p.IsNil() {
return false
}
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
if nullable {
return !rv.IsNil()
}
switch rv.Kind() {
case reflect.Bool:
return rv.Bool()
case reflect.Int32, reflect.Int64:
return rv.Int() != 0
case reflect.Uint32, reflect.Uint64:
return rv.Uint() != 0
case reflect.Float32, reflect.Float64:
return rv.Float() != 0
case reflect.String, reflect.Slice:
return rv.Len() > 0
default:
panic(fmt.Sprintf("invalid type: %v", rv.Type())) // should never happen
}
},
get: func(p pointer) pref.Value {
if p.IsNil() {
return defaultValueOf(fd)
}
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
if nullable {
if rv.IsNil() {
return defaultValueOf(fd)
}
if rv.Kind() == reflect.Ptr {
rv = rv.Elem()
}
}
return conv.PBValueOf(rv)
},
set: func(p pointer, v pref.Value) {
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
if nullable && rv.Kind() == reflect.Ptr {
if rv.IsNil() {
rv.Set(reflect.New(ft))
}
rv = rv.Elem()
}
rv.Set(conv.GoValueOf(v))
if nullable && rv.Kind() == reflect.Slice && rv.IsNil() {
rv.Set(emptyBytes)
}
},
clear: func(p pointer) {
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
rv.Set(reflect.Zero(rv.Type()))
},
}
}
func fieldInfoForMessage(fd pref.FieldDescriptor, fs reflect.StructField) fieldInfo {
ft := fs.Type
conv := pvalue.NewLegacyConverter(ft, fd.Kind(), legacyWrapper)
fieldOffset := offsetOf(fs)
// TODO: Implement unsafe fast path?
return fieldInfo{
has: func(p pointer) bool {
if p.IsNil() {
return false
}
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
return !rv.IsNil()
},
get: func(p pointer) pref.Value {
if p.IsNil() {
return pref.Value{}
}
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
reflect/protoreflect: clarify Get semantics on unpopulated fields Clearly specify that Get on an unpopulated field: * returns the default value for scalars * returns a mutable (but empty) List for repeated fields * returns a mutable (but empty) Map for map fields * returns an invalid value for message fields The difference in semantics between List+Maps and Messages is because protobuf semantics provide no distinction between an unpopulated and empty list or map. On the other hand, there is a semantic difference between an unpopulated message and an empty message. Default values for scalars is trivial to implement with FieldDescriptor.Default. A mutable, but empty List and Map is easy to implement for known fields since known fields are generated as a slice or map field in a struct. Since struct fields are addressable, the implementation can just return a reference to the slice or map. Repeated, extension fields are a little more tricky since extension fields are implemented under the hood as a map[FieldNumber]Extension. Rather than allocating an empty list in KnownFields.Get upon first retrieval (which presents a race), delegate the work to ExtensionFieldTypes.Register, which must occur before any Get operation. Register is not a concurrent-safe operation, so that is an excellent time to initilize empty lists. The implementation of extensions will need to be careful that Clear on a repeated field simply truncates it zero instead of deleting the object. For unpopulated messages, we return an invalid value, instead of the prior behavior of returning a typed nil-pointer to the Go type for the message. The approach is problematic because it assumes that 1) all messages are always implemented on a pointer reciever 2) a typed nil-pointer is an appropriate "read-only, but empty" message These assumptions are not true of all message types (e.g., dynamic messages). Change-Id: Ie96e6744c890308d9de738b6cf01d3b19e7e7c6a Reviewed-on: https://go-review.googlesource.com/c/150319 Reviewed-by: Damien Neil <dneil@google.com>
2018-11-19 14:26:06 -08:00
if rv.IsNil() {
return pref.Value{}
}
return conv.PBValueOf(rv)
},
set: func(p pointer, v pref.Value) {
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
rv.Set(conv.GoValueOf(v))
reflect/protoreflect: clarify Get semantics on unpopulated fields Clearly specify that Get on an unpopulated field: * returns the default value for scalars * returns a mutable (but empty) List for repeated fields * returns a mutable (but empty) Map for map fields * returns an invalid value for message fields The difference in semantics between List+Maps and Messages is because protobuf semantics provide no distinction between an unpopulated and empty list or map. On the other hand, there is a semantic difference between an unpopulated message and an empty message. Default values for scalars is trivial to implement with FieldDescriptor.Default. A mutable, but empty List and Map is easy to implement for known fields since known fields are generated as a slice or map field in a struct. Since struct fields are addressable, the implementation can just return a reference to the slice or map. Repeated, extension fields are a little more tricky since extension fields are implemented under the hood as a map[FieldNumber]Extension. Rather than allocating an empty list in KnownFields.Get upon first retrieval (which presents a race), delegate the work to ExtensionFieldTypes.Register, which must occur before any Get operation. Register is not a concurrent-safe operation, so that is an excellent time to initilize empty lists. The implementation of extensions will need to be careful that Clear on a repeated field simply truncates it zero instead of deleting the object. For unpopulated messages, we return an invalid value, instead of the prior behavior of returning a typed nil-pointer to the Go type for the message. The approach is problematic because it assumes that 1) all messages are always implemented on a pointer reciever 2) a typed nil-pointer is an appropriate "read-only, but empty" message These assumptions are not true of all message types (e.g., dynamic messages). Change-Id: Ie96e6744c890308d9de738b6cf01d3b19e7e7c6a Reviewed-on: https://go-review.googlesource.com/c/150319 Reviewed-by: Damien Neil <dneil@google.com>
2018-11-19 14:26:06 -08:00
if rv.IsNil() {
panic("invalid nil pointer")
}
},
clear: func(p pointer) {
rv := p.Apply(fieldOffset).AsValueOf(fs.Type).Elem()
rv.Set(reflect.Zero(rv.Type()))
},
newMessage: func() pref.Message {
return conv.MessageType.New()
},
}
}
// defaultValueOf returns the default value for the field.
func defaultValueOf(fd pref.FieldDescriptor) pref.Value {
if fd == nil {
return pref.Value{}
}
pv := fd.Default() // invalid Value for messages and repeated fields
if fd.Kind() == pref.BytesKind && pv.IsValid() && len(pv.Bytes()) > 0 {
return pref.ValueOf(append([]byte(nil), pv.Bytes()...)) // copy default bytes for safety
}
return pv
}