internal/encoding/text: format using 32 bitsize when encoding float32

When encoding/textpb marshals out float32 values, it was previously
formatting it as float64 bitsize since both float types are stored as
float64 and internal/encoding/text only has one Float type.  A
consequence of this is that the output may display a different value
than expected, e.g.  1.02 becomes 1.0199999809265137.

This CL splits Float type into Float32 and Float64 to keep track of
which bitsize to use when formatting.  Values of both types are still
stored as float64 to keep the logic simple.

Decoding will always use Float64, but users can ask for a float32 value
from it.

Change-Id: Iea5b14b283fec2236a0c3946fac34d4d79b95274
Reviewed-on: https://go-review.googlesource.com/c/158497
Reviewed-by: Damien Neil <dneil@google.com>
This commit is contained in:
Herbie Ong 2019-01-17 19:31:47 -08:00
parent de7313b557
commit 84f0960b04
7 changed files with 95 additions and 48 deletions

View File

@ -266,11 +266,11 @@ func unmarshalScalar(input text.Value, fd pref.FieldDescriptor) (pref.Value, err
return pref.ValueOf(uint64(n)), nil
}
case pref.FloatKind:
if n, ok := input.Float(b32); ok {
if n, ok := input.Float32(); ok {
return pref.ValueOf(float32(n)), nil
}
case pref.DoubleKind:
if n, ok := input.Float(b64); ok {
if n, ok := input.Float64(); ok {
return pref.ValueOf(float64(n)), nil
}
case pref.StringKind:

View File

@ -162,9 +162,8 @@ opt_string: ""
OptSint64: scalar.Int64(-0xffff),
OptFixed64: scalar.Uint64(64),
OptSfixed32: scalar.Int32(-32),
// TODO: Update encoder to output same decimals.
OptFloat: scalar.Float32(1.02),
OptDouble: scalar.Float64(1.23e100),
OptFloat: scalar.Float32(1.02),
OptDouble: scalar.Float64(1.0199999809265137),
// TODO: Update encoder to not output UTF8 for bytes.
OptBytes: []byte("\xe8\xb0\xb7\xe6\xad\x8c"),
OptString: scalar.String("谷歌"),
@ -178,8 +177,8 @@ opt_sint32: -1001
opt_sint64: -65535
opt_fixed64: 64
opt_sfixed32: -32
opt_float: 1.0199999809265137
opt_double: 1.23e+100
opt_float: 1.02
opt_double: 1.0199999809265137
opt_bytes: "谷歌"
opt_string: "谷歌"
`,
@ -449,7 +448,7 @@ OptGroup: {}
RptInt64: []int64{-64, 47},
RptUint32: []uint32{0xff, 0xffff},
RptUint64: []uint64{0xdeadbeef},
// TODO: add float32 examples.
RptFloat: []float32{float32(math.NaN()), float32(math.Inf(1)), float32(math.Inf(-1)), 1.034},
RptDouble: []float64{math.NaN(), math.Inf(1), math.Inf(-1), 1.23e-308},
RptString: []string{"hello", "世界"},
RptBytes: [][]byte{
@ -470,6 +469,10 @@ rpt_int64: 47
rpt_uint32: 255
rpt_uint32: 65535
rpt_uint64: 3735928559
rpt_float: nan
rpt_float: inf
rpt_float: -inf
rpt_float: 1.034
rpt_double: nan
rpt_double: inf
rpt_double: -inf

1
go.sum
View File

@ -1,3 +1,4 @@
github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
github.com/golang/protobuf v1.2.1-0.20181127190454-8d0c54c12466/go.mod h1:Qd/q+1AKNOZr9uGQzbzCmRO6sUih6GTPZv6a1/R87v0=
github.com/golang/protobuf v1.2.1-0.20181205191652-7e65e513332f h1:jEoef3K+ZQwZ7UB1iGu6KhX8hS9cYw1aXR7djS3Vn10=
github.com/golang/protobuf v1.2.1-0.20181205191652-7e65e513332f/go.mod h1:asK8yRb/+zxJTE0SbTESCku/4OjiDfbPwk4rEyIatUA=

View File

@ -168,7 +168,7 @@ func (p *encoder) marshalKey(v Value) error {
func (p *encoder) marshalValue(v Value) error {
switch v.Type() {
case Bool, Int, Uint, Float:
case Bool, Int, Uint, Float32, Float64:
return p.marshalNumber(v)
case String:
return p.marshalString(v)

View File

@ -15,7 +15,7 @@ import (
"github.com/golang/protobuf/v2/internal/errors"
)
// marshalNumber encodes v as either a Bool, Int, Uint, or Float.
// marshalNumber encodes v as either a Bool, Int, Uint, Float32, or Float64.
func (p *encoder) marshalNumber(v Value) error {
var err error
p.out, err = appendNumber(p.out, v)
@ -24,7 +24,7 @@ func (p *encoder) marshalNumber(v Value) error {
func appendNumber(out []byte, v Value) ([]byte, error) {
if len(v.raw) > 0 {
switch v.Type() {
case Bool, Int, Uint, Float:
case Bool, Int, Uint, Float32, Float64:
return append(out, v.raw...), nil
}
}
@ -39,22 +39,28 @@ func appendNumber(out []byte, v Value) ([]byte, error) {
return strconv.AppendInt(out, int64(v.num), 10), nil
case Uint:
return strconv.AppendUint(out, uint64(v.num), 10), nil
case Float:
switch n := math.Float64frombits(v.num); {
case math.IsNaN(n):
return append(out, "nan"...), nil
case math.IsInf(n, +1):
return append(out, "inf"...), nil
case math.IsInf(n, -1):
return append(out, "-inf"...), nil
default:
return strconv.AppendFloat(out, n, 'g', -1, 64), nil
}
case Float32:
return appendFloat(out, v, 32)
case Float64:
return appendFloat(out, v, 64)
default:
return nil, errors.New("invalid type %v, expected bool or number", v.Type())
}
}
func appendFloat(out []byte, v Value, bitSize int) ([]byte, error) {
switch n := math.Float64frombits(v.num); {
case math.IsNaN(n):
return append(out, "nan"...), nil
case math.IsInf(n, +1):
return append(out, "inf"...), nil
case math.IsInf(n, -1):
return append(out, "-inf"...), nil
default:
return strconv.AppendFloat(out, n, 'g', -1, bitSize), nil
}
}
// These regular expressions were derived by reverse engineering the C++ code
// in tokenizer.cc and text_format.cc.
var (
@ -80,7 +86,7 @@ var (
floatRegexp = regexp.MustCompile("^-?((0|[1-9][0-9]*)?([.][0-9]*)?([eE][+-]?[0-9]+)?[fF]?)")
)
// unmarshalNumber decodes a Bool, Int, Uint, or Float from the input.
// unmarshalNumber decodes a Bool, Int, Uint, or Float64 from the input.
func (p *decoder) unmarshalNumber() (Value, error) {
v, n, err := consumeNumber(p.in)
p.consume(n)
@ -98,6 +104,7 @@ func consumeNumber(in []byte) (Value, int, error) {
if n := matchWithDelim(floatRegexp, in); n > 0 {
if bytes.ContainsAny(in[:n], ".eEfF") {
s := strings.TrimRight(string(in[:n]), "fF")
// Always decode float as 64-bit.
f, err := strconv.ParseFloat(s, 64)
if err != nil {
return Value{}, 0, err

View File

@ -549,6 +549,10 @@ func Test(t *testing.T) {
}, {
in: `crazy:"x'"'\""\''"'z"`,
wantVal: V(Msg{{ID("crazy"), V(`x'""''z`)}}),
}, {
in: `num: 1.02`,
wantVal: V(Msg{{ID("num"), V(float32(1.02))}}), // Use float32 to test marshaling of Float32 type.
wantOut: `num:1.02`,
}, {
in: `nums: [t,T,true,True,TRUE,f,F,false,False,FALSE]`,
wantVal: V(Msg{{ID("nums"), V(Lst{
@ -790,9 +794,16 @@ spouse: null
want, _ := x.Uint(true)
got, ok := y.Uint(math.MaxUint32 < want)
return got == want && ok
case Float:
want, _ := x.Float(true)
got, ok := y.Float(math.MaxFloat32 < math.Abs(want))
case Float32:
want, _ := x.Float32()
got, ok := y.Float32()
if math.IsNaN(float64(got)) || math.IsNaN(float64(want)) {
return math.IsNaN(float64(got)) == math.IsNaN(float64(want))
}
return got == want && ok
case Float64:
want, _ := x.Float64()
got, ok := y.Float64()
if math.IsNaN(got) || math.IsNaN(want) {
return math.IsNaN(got) == math.IsNaN(want)
}

View File

@ -49,8 +49,11 @@ const (
Int
// Uint is an unsigned integer (e.g., "0xdeadbeef").
Uint
// Float is a floating-point number (e.g., "1.234" or "1e100").
Float
// Float32 is a 32-bit floating-point number (e.g., "1.234" or "1e38").
// This allows encoding to differentiate the bitsize used for formatting.
Float32
// Float64 is a 64-bit floating-point number.
Float64
// String is a quoted string (e.g., `"the quick brown fox"`).
String
// Name is a protocol buffer identifier (e.g., `field_name`).
@ -69,8 +72,10 @@ func (t Type) String() string {
return "int"
case Uint:
return "uint"
case Float:
return "float"
case Float32:
return "float32"
case Float64:
return "float64"
case String:
return "string"
case Name:
@ -89,7 +94,7 @@ type Value struct {
typ Type
raw []byte // raw bytes of the serialized data
str string // only for String or Name
num uint64 // only for Bool, Int, Uint, or Float
num uint64 // only for Bool, Int, Uint, Float32, or Float64
arr []Value // only for List
obj [][2]Value // only for Message
}
@ -98,7 +103,8 @@ type Value struct {
// bool => Bool
// int32, int64 => Int
// uint32, uint64 => Uint
// float32, float64 => Float
// float32 => Float32
// float64 => Float64
// string, []byte => String
// protoreflect.Name => Name
// []Value => List
@ -122,9 +128,10 @@ func ValueOf(v interface{}) Value {
case uint64:
return Value{typ: Uint, num: uint64(v)}
case float32:
return Value{typ: Float, num: math.Float64bits(float64(v))}
// Store as float64 bits.
return Value{typ: Float32, num: math.Float64bits(float64(v))}
case float64:
return Value{typ: Float, num: math.Float64bits(float64(v))}
return Value{typ: Float64, num: math.Float64bits(float64(v))}
case string:
return Value{typ: String, str: string(v)}
case []byte:
@ -149,8 +156,8 @@ func rawValueOf(v interface{}, raw []byte) Value {
// at the resulting type. However, there are ambiguities as to the exact type
// of the value (e.g., "false" is either a bool or a name).
// Thus, some of the types are convertible with each other.
// The Bool, Int, Uint, Float, and Name methods return a boolean to report
// whether the conversion was successful.
// The Bool, Int, Uint, Float32, Float64, and Name methods return a boolean to
// report whether the conversion was successful.
func (v Value) Type() Type {
return v.typ
}
@ -216,22 +223,40 @@ func (v Value) Uint(b64 bool) (x uint64, ok bool) {
return 0, false
}
// Float returns v as a float64 of the specified precision and reports whether
// Float32 returns v as a float32 of the specified precision and reports whether
// the conversion succeeded.
func (v Value) Float(b64 bool) (x float64, ok bool) {
func (v Value) Float32() (x float32, ok bool) {
switch v.typ {
case Int:
return float32(int64(v.num)), true // possibly lossy, but allowed
case Uint:
return float32(uint64(v.num)), true // possibly lossy, but allowed
case Float32, Float64:
n := math.Float64frombits(v.num)
if math.IsNaN(n) || math.IsInf(n, 0) {
return float32(n), true
}
if math.Abs(n) <= math.MaxFloat32 {
return float32(n), true
}
}
return 0, false
}
// Float64 returns v as a float64 of the specified precision and reports whether
// the conversion succeeded.
func (v Value) Float64() (x float64, ok bool) {
switch v.typ {
case Int:
return float64(int64(v.num)), true // possibly lossy, but allowed
case Uint:
return float64(uint64(v.num)), true // possibly lossy, but allowed
case Float:
case Float32:
f, ok := v.Float32()
return float64(f), ok
case Float64:
n := math.Float64frombits(v.num)
if math.IsNaN(n) || math.IsInf(n, 0) {
return float64(n), true
}
if b64 || math.Abs(n) <= math.MaxFloat32 {
return float64(n), true
}
return n, true
}
return 0, false
}
@ -250,7 +275,7 @@ func (v Value) String() string {
}
func (v Value) stringValue() string {
switch v.typ {
case Bool, Int, Uint, Float, Name:
case Bool, Int, Uint, Float32, Float64, Name:
return string(v.Raw())
case List:
var ss []string
@ -277,9 +302,9 @@ func (v Value) stringValue() string {
// can be treated as an identifier.
func (v Value) Name() (protoreflect.Name, bool) {
switch v.typ {
case Bool, Float:
case Bool, Float32, Float64:
// Ambiguity arises in unmarshalValue since "nan" may interpreted as
// either a Name type (for enum values) or a Float type.
// either a Name type (for enum values) or a Float32/Float64 type.
// Similarly, "true" may be interpreted as either a Name or Bool type.
n := protoreflect.Name(v.raw)
if n.IsValid() {