internal/impl: preallocate memory when unmarshalling packed repeated fields

This improves wall time and allocated memory. I haven't found any
case (0 values, few values, many values) where this change is a
consistent regression. For fields with thousands of values, this
reduces memory usage by 50% and wall time by 20%.

Benchmark results from:
go test bench_test.go testmessages_test.go decode_test.go -run=none -bench=BenchmarkDecode/packed -benchmem -count=6 -timeout=0

goos: linux
goarch: amd64
cpu: Intel(R) Xeon(R) W-2135 CPU @ 3.70GHz
                                  │ base        │ fast
                                  │ sec/op      │ sec/op      vs base
repeated0packedAllTypes         432.9n ± 5%  420.1n ± 11%       ~ p=0.39
repeated0packed3AllTypes        431.2n ± 6%  433.8n ±  3%       ~ p=0.69
repeated0packedAllExt           2.748µ ± 6%  2.845µ ±  2%       ~ p=0.06
repeated0length_packedAllTypes  310.0n ± 0%  307.4n ±  1%  -0.84% p=0.00
repeated0length_packed3AllTypes 309.7n ± 1%  309.1n ±  4%       ~ p=0.41
repeated0length_packedAllExt    1.689µ ± 2%  1.732µ ±  5%       ~ p=0.39
packedPackedTypes               308.6n ± 1%  276.3n ±  1% -10.47% p=0.00
packedPackedExt                 2.727µ ± 2%  2.685µ ±  1%  -1.54% p=0.00
packed0lengthPackedTypes        163.4n ± 1%  160.8n ±  4%       ~ p=0.06
packed0lengthPackedExt          1.676µ ± 1%  1.748µ ±  4%  +4.30% p=0.01
geomean                             673.4n       668.3n        -0.75%

                                  │ base         │ fast
                                  │ B/op         │ B/op       vs base
repeated0packedAllTypes         1.328Ki ± 0%  1.281Ki ± 0% -3.53% p=0.00
repeated0packed3AllTypes        1.328Ki ± 0%  1.281Ki ± 0% -3.53% p=0.00
repeated0packedAllExt           5.364Ki ± 0%  5.364Ki ± 0%      ~ p=1.00
repeated0length_packedAllTypes  1.125Ki ± 0%  1.125Ki ± 0%      ~ p=1.00
repeated0length_packed3AllTypes 1.125Ki ± 0%  1.125Ki ± 0%      ~ p=1.00
repeated0length_packedAllExt    4.208Ki ± 0%  4.208Ki ± 0%      ~ p=1.00
packedPackedTypes                 592.0 ± 0%    544.0 ± 0% -8.11% p=0.00
packedPackedExt                 5.364Ki ± 0%  5.364Ki ± 0%      ~ p=1.00
packed0lengthPackedTypes          384.0 ± 0%    384.0 ± 0%      ~ p=1.00
packed0lengthPackedExt          4.208Ki ± 0%  4.208Ki ± 0%      ~ p=1.00
geomean                             1.735Ki       1.708Ki      -1.55%

                                  │ base       │ fast
                                  │ allocs/op  │ allocs/op   vs base
repeated0packedAllTypes         21.00 ± 0% 15.00 ± 0%  -28.57% p=0.002
repeated0packed3AllTypes        21.00 ± 0% 15.00 ± 0%  -28.57% p=0.002
repeated0packedAllExt           131.0 ± 0% 131.0 ± 0%        ~ p=1.000
repeated0length_packedAllTypes  1.000 ± 0% 1.000 ± 0%        ~ p=1.000
repeated0length_packed3AllTypes 1.000 ± 0% 1.000 ± 0%        ~ p=1.000
repeated0length_packedAllExt    33.00 ± 0% 33.00 ± 0%        ~ p=1.000
packedPackedTypes               21.00 ± 0% 15.00 ± 0%  -28.57% p=0.002
packedPackedExt                 131.0 ± 0% 131.0 ± 0%        ~ p=1.000
packed0lengthPackedTypes        1.000 ± 0% 1.000 ± 0%        ~ p=1.000
packed0lengthPackedExt          33.00 ± 0% 33.00 ± 0%        ~ p=1.000
geomean                             13.30      12.02        -9.60%

Change-Id: I622dd2055c3ca936f948f86ae8434387f42f8d8e
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/534196
Reviewed-by: Michael Stapelberg <stapelberg@google.com>
Reviewed-by: Damien Neil <dneil@google.com>
This commit is contained in:
Vanja Pejovic 2023-10-10 17:12:13 -04:00 committed by Michael Stapelberg
parent 322d454415
commit 86bdc4705a
4 changed files with 190 additions and 14 deletions

View File

@ -367,11 +367,24 @@ func consume{{.Name}}Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFie
sp := p.{{.GoType.PointerMethod}}Slice()
{{- if .WireType.Packable}}
if wtyp == protowire.BytesType {
s := *sp
b, n := protowire.ConsumeBytes(b)
if n < 0 {
return out, errDecode
}
{{if .WireType.ConstSize -}}
count := len(b) / {{template "Size" .}}
{{- else -}}
count := 0
for _, v := range b {
if v < 0x80 {
count++
}
}
{{- end}}
if count > 0 {
p.grow{{.GoType.PointerMethod}}Slice(count)
}
s := *sp
for len(b) > 0 {
{{template "Consume" .}}
if n < 0 {

View File

@ -162,11 +162,20 @@ func appendBoolSlice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions
func consumeBoolSlice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) {
sp := p.BoolSlice()
if wtyp == protowire.BytesType {
s := *sp
b, n := protowire.ConsumeBytes(b)
if n < 0 {
return out, errDecode
}
count := 0
for _, v := range b {
if v < 0x80 {
count++
}
}
if count > 0 {
p.growBoolSlice(count)
}
s := *sp
for len(b) > 0 {
var v uint64
var n int
@ -732,11 +741,20 @@ func appendInt32Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOption
func consumeInt32Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) {
sp := p.Int32Slice()
if wtyp == protowire.BytesType {
s := *sp
b, n := protowire.ConsumeBytes(b)
if n < 0 {
return out, errDecode
}
count := 0
for _, v := range b {
if v < 0x80 {
count++
}
}
if count > 0 {
p.growInt32Slice(count)
}
s := *sp
for len(b) > 0 {
var v uint64
var n int
@ -1138,11 +1156,20 @@ func appendSint32Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptio
func consumeSint32Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) {
sp := p.Int32Slice()
if wtyp == protowire.BytesType {
s := *sp
b, n := protowire.ConsumeBytes(b)
if n < 0 {
return out, errDecode
}
count := 0
for _, v := range b {
if v < 0x80 {
count++
}
}
if count > 0 {
p.growInt32Slice(count)
}
s := *sp
for len(b) > 0 {
var v uint64
var n int
@ -1544,11 +1571,20 @@ func appendUint32Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptio
func consumeUint32Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) {
sp := p.Uint32Slice()
if wtyp == protowire.BytesType {
s := *sp
b, n := protowire.ConsumeBytes(b)
if n < 0 {
return out, errDecode
}
count := 0
for _, v := range b {
if v < 0x80 {
count++
}
}
if count > 0 {
p.growUint32Slice(count)
}
s := *sp
for len(b) > 0 {
var v uint64
var n int
@ -1950,11 +1986,20 @@ func appendInt64Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOption
func consumeInt64Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) {
sp := p.Int64Slice()
if wtyp == protowire.BytesType {
s := *sp
b, n := protowire.ConsumeBytes(b)
if n < 0 {
return out, errDecode
}
count := 0
for _, v := range b {
if v < 0x80 {
count++
}
}
if count > 0 {
p.growInt64Slice(count)
}
s := *sp
for len(b) > 0 {
var v uint64
var n int
@ -2356,11 +2401,20 @@ func appendSint64Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptio
func consumeSint64Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) {
sp := p.Int64Slice()
if wtyp == protowire.BytesType {
s := *sp
b, n := protowire.ConsumeBytes(b)
if n < 0 {
return out, errDecode
}
count := 0
for _, v := range b {
if v < 0x80 {
count++
}
}
if count > 0 {
p.growInt64Slice(count)
}
s := *sp
for len(b) > 0 {
var v uint64
var n int
@ -2762,11 +2816,20 @@ func appendUint64Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptio
func consumeUint64Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) {
sp := p.Uint64Slice()
if wtyp == protowire.BytesType {
s := *sp
b, n := protowire.ConsumeBytes(b)
if n < 0 {
return out, errDecode
}
count := 0
for _, v := range b {
if v < 0x80 {
count++
}
}
if count > 0 {
p.growUint64Slice(count)
}
s := *sp
for len(b) > 0 {
var v uint64
var n int
@ -3145,11 +3208,15 @@ func appendSfixed32Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOpt
func consumeSfixed32Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) {
sp := p.Int32Slice()
if wtyp == protowire.BytesType {
s := *sp
b, n := protowire.ConsumeBytes(b)
if n < 0 {
return out, errDecode
}
count := len(b) / protowire.SizeFixed32()
if count > 0 {
p.growInt32Slice(count)
}
s := *sp
for len(b) > 0 {
v, n := protowire.ConsumeFixed32(b)
if n < 0 {
@ -3461,11 +3528,15 @@ func appendFixed32Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOpti
func consumeFixed32Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) {
sp := p.Uint32Slice()
if wtyp == protowire.BytesType {
s := *sp
b, n := protowire.ConsumeBytes(b)
if n < 0 {
return out, errDecode
}
count := len(b) / protowire.SizeFixed32()
if count > 0 {
p.growUint32Slice(count)
}
s := *sp
for len(b) > 0 {
v, n := protowire.ConsumeFixed32(b)
if n < 0 {
@ -3777,11 +3848,15 @@ func appendFloatSlice(b []byte, p pointer, f *coderFieldInfo, opts marshalOption
func consumeFloatSlice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) {
sp := p.Float32Slice()
if wtyp == protowire.BytesType {
s := *sp
b, n := protowire.ConsumeBytes(b)
if n < 0 {
return out, errDecode
}
count := len(b) / protowire.SizeFixed32()
if count > 0 {
p.growFloat32Slice(count)
}
s := *sp
for len(b) > 0 {
v, n := protowire.ConsumeFixed32(b)
if n < 0 {
@ -4093,11 +4168,15 @@ func appendSfixed64Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOpt
func consumeSfixed64Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) {
sp := p.Int64Slice()
if wtyp == protowire.BytesType {
s := *sp
b, n := protowire.ConsumeBytes(b)
if n < 0 {
return out, errDecode
}
count := len(b) / protowire.SizeFixed64()
if count > 0 {
p.growInt64Slice(count)
}
s := *sp
for len(b) > 0 {
v, n := protowire.ConsumeFixed64(b)
if n < 0 {
@ -4409,11 +4488,15 @@ func appendFixed64Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOpti
func consumeFixed64Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) {
sp := p.Uint64Slice()
if wtyp == protowire.BytesType {
s := *sp
b, n := protowire.ConsumeBytes(b)
if n < 0 {
return out, errDecode
}
count := len(b) / protowire.SizeFixed64()
if count > 0 {
p.growUint64Slice(count)
}
s := *sp
for len(b) > 0 {
v, n := protowire.ConsumeFixed64(b)
if n < 0 {
@ -4725,11 +4808,15 @@ func appendDoubleSlice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptio
func consumeDoubleSlice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) {
sp := p.Float64Slice()
if wtyp == protowire.BytesType {
s := *sp
b, n := protowire.ConsumeBytes(b)
if n < 0 {
return out, errDecode
}
count := len(b) / protowire.SizeFixed64()
if count > 0 {
p.growFloat64Slice(count)
}
s := *sp
for len(b) > 0 {
v, n := protowire.ConsumeFixed64(b)
if n < 0 {

View File

@ -159,6 +159,42 @@ func (p pointer) SetPointer(v pointer) {
p.v.Elem().Set(v.v)
}
func growSlice(p pointer, addCap int) {
// TODO: Once we only support Go 1.20 and newer, use reflect.Grow.
in := p.v.Elem()
out := reflect.MakeSlice(in.Type(), in.Len(), in.Len()+addCap)
reflect.Copy(out, in)
p.v.Elem().Set(out)
}
func (p pointer) growBoolSlice(addCap int) {
growSlice(p, addCap)
}
func (p pointer) growInt32Slice(addCap int) {
growSlice(p, addCap)
}
func (p pointer) growUint32Slice(addCap int) {
growSlice(p, addCap)
}
func (p pointer) growInt64Slice(addCap int) {
growSlice(p, addCap)
}
func (p pointer) growUint64Slice(addCap int) {
growSlice(p, addCap)
}
func (p pointer) growFloat64Slice(addCap int) {
growSlice(p, addCap)
}
func (p pointer) growFloat32Slice(addCap int) {
growSlice(p, addCap)
}
func (Export) MessageStateOf(p Pointer) *messageState { panic("not supported") }
func (ms *messageState) pointer() pointer { panic("not supported") }
func (ms *messageState) messageInfo() *MessageInfo { panic("not supported") }

View File

@ -138,6 +138,46 @@ func (p pointer) SetPointer(v pointer) {
*(*unsafe.Pointer)(p.p) = (unsafe.Pointer)(v.p)
}
func (p pointer) growBoolSlice(addCap int) {
sp := p.BoolSlice()
s := make([]bool, 0, addCap+len(*sp))
s = s[:len(*sp)]
copy(s, *sp)
*sp = s
}
func (p pointer) growInt32Slice(addCap int) {
sp := p.Int32Slice()
s := make([]int32, 0, addCap+len(*sp))
s = s[:len(*sp)]
copy(s, *sp)
*sp = s
}
func (p pointer) growUint32Slice(addCap int) {
p.growInt32Slice(addCap)
}
func (p pointer) growFloat32Slice(addCap int) {
p.growInt32Slice(addCap)
}
func (p pointer) growInt64Slice(addCap int) {
sp := p.Int64Slice()
s := make([]int64, 0, addCap+len(*sp))
s = s[:len(*sp)]
copy(s, *sp)
*sp = s
}
func (p pointer) growUint64Slice(addCap int) {
p.growInt64Slice(addCap)
}
func (p pointer) growFloat64Slice(addCap int) {
p.growInt64Slice(addCap)
}
// Static check that MessageState does not exceed the size of a pointer.
const _ = uint(unsafe.Sizeof(unsafe.Pointer(nil)) - unsafe.Sizeof(MessageState{}))