protobuf-go/internal/benchmarks/micro/micro_test.go
Nicolas Hillegeer 8a744307e3 internal/cmd/generate-types: manual CSE of m.messageInfo()
messageInfo() looks like this:

    func (ms *messageState) messageInfo() *MessageInfo {
    	mi := ms.LoadMessageInfo()
    	if mi == nil {
    		panic("invalid nil message info; this suggests memory corruption due to a race or shallow copy on the message struct")
    	}
    	return mi
    }

    func (ms *messageState) LoadMessageInfo() *MessageInfo {
    	return (*MessageInfo)(atomic.LoadPointer((*unsafe.Pointer)(unsafe.Pointer(&ms.atomicMessageInfo))))
    }

Which is an atomic load and a predictable branch. On x86, this 64-bit
load is just a MOV. On other platforms, like ARM64, there's actual
atomics involved (LDAR).

Meaning, it's cheap, but not free. Eliminate redundant copies of this
(Common Subexpression Elimination).

The newly added benchmarks improve by (geomean) 2.5%:

    $ benchstat pre post | head -10
    goarch: amd64
    cpu: AMD Ryzen Threadripper PRO 3995WX 64-Cores
                          │     pre     │                post                │
                          │   sec/op    │   sec/op     vs base               │
    Extension/Has/None-12   106.4n ± 2%   104.0n ± 2%  -2.21% (p=0.020 n=10)
    Extension/Has/Set-12    116.4n ± 1%   114.4n ± 2%  -1.76% (p=0.017 n=10)
    Extension/Get/None-12   184.2n ± 1%   181.0n ± 1%  -1.68% (p=0.003 n=10)
    Extension/Get/Set-12    144.5n ± 3%   140.7n ± 2%  -2.63% (p=0.041 n=10)
    Extension/Set-12        227.2n ± 2%   218.6n ± 2%  -3.81% (p=0.000 n=10)
    geomean                 149.6n        145.9n       -2.42%

I didn't test on ARM64, but the difference should be larger due to the
reduced atomics.

Change-Id: I8eebeb6f753425b743368a7f5c7be4d48537e5c3
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/575036
Reviewed-by: Michael Stapelberg <stapelberg@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Damien Neil <dneil@google.com>
Commit-Queue: Nicolas Hillegeer <aktau@google.com>
Auto-Submit: Nicolas Hillegeer <aktau@google.com>
2024-04-02 14:35:14 +00:00

245 lines
6.1 KiB
Go

// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This package contains microbenchmarks exercising specific areas of interest.
// The benchmarks here are not comprehensive and are not necessarily indicative
// real-world performance.
package micro_test
import (
"testing"
"google.golang.org/protobuf/internal/impl"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/runtime/protoiface"
"google.golang.org/protobuf/types/known/emptypb"
micropb "google.golang.org/protobuf/internal/testprotos/benchmarks/micro"
testpb "google.golang.org/protobuf/internal/testprotos/test"
)
// BenchmarkEmptyMessage tests a google.protobuf.Empty.
//
// It measures per-operation overhead.
func BenchmarkEmptyMessage(b *testing.B) {
b.Run("Wire/Marshal", func(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
m := &emptypb.Empty{}
for pb.Next() {
if _, err := proto.Marshal(m); err != nil {
b.Fatal(err)
}
}
})
})
b.Run("Wire/Unmarshal", func(b *testing.B) {
opts := proto.UnmarshalOptions{
Merge: true,
}
b.RunParallel(func(pb *testing.PB) {
m := &emptypb.Empty{}
for pb.Next() {
if err := opts.Unmarshal([]byte{}, m); err != nil {
b.Fatal(err)
}
}
})
})
b.Run("Wire/Validate", func(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
mt := (&emptypb.Empty{}).ProtoReflect().Type()
for pb.Next() {
_, got := impl.Validate(mt, protoiface.UnmarshalInput{})
want := impl.ValidationValid
if got != want {
b.Fatalf("Validate = %v, want %v", got, want)
}
}
})
})
b.Run("Clone", func(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
m := &emptypb.Empty{}
for pb.Next() {
proto.Clone(m)
}
})
})
b.Run("New", func(b *testing.B) {
mt := (&emptypb.Empty{}).ProtoReflect().Type()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
mt.New()
}
})
})
}
// BenchmarkRepeatedInt32 tests a message containing 500 non-packed repeated int32s.
//
// For unmarshal operations, it measures the cost of the field decode loop, since each
// item in the repeated field has an individual tag and value.
func BenchmarkRepeatedInt32(b *testing.B) {
m := &testpb.TestAllTypes{}
for i := int32(0); i < 500; i++ {
m.RepeatedInt32 = append(m.RepeatedInt32, i)
}
w, err := proto.Marshal(m)
if err != nil {
b.Fatal(err)
}
b.Run("Wire/Marshal", func(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if _, err := proto.Marshal(m); err != nil {
b.Fatal(err)
}
}
})
})
b.Run("Wire/Unmarshal", func(b *testing.B) {
opts := proto.UnmarshalOptions{
Merge: true,
}
b.RunParallel(func(pb *testing.PB) {
m := &testpb.TestAllTypes{}
for pb.Next() {
m.RepeatedInt32 = m.RepeatedInt32[:0]
if err := opts.Unmarshal(w, m); err != nil {
b.Fatal(err)
}
}
})
})
b.Run("Wire/Validate", func(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
mt := (&testpb.TestAllTypes{}).ProtoReflect().Type()
for pb.Next() {
_, got := impl.Validate(mt, protoiface.UnmarshalInput{
Buf: w,
})
want := impl.ValidationValid
if got != want {
b.Fatalf("Validate = %v, want %v", got, want)
}
}
})
})
b.Run("Clone", func(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
proto.Clone(m)
}
})
})
}
// BenchmarkRequired tests a message containing a required field.
func BenchmarkRequired(b *testing.B) {
m := &micropb.SixteenRequired{
F1: proto.Int32(1),
F2: proto.Int32(1),
F3: proto.Int32(1),
F4: proto.Int32(1),
F5: proto.Int32(1),
F6: proto.Int32(1),
F7: proto.Int32(1),
F8: proto.Int32(1),
F9: proto.Int32(1),
F10: proto.Int32(1),
F11: proto.Int32(1),
F12: proto.Int32(1),
F13: proto.Int32(1),
F14: proto.Int32(1),
F15: proto.Int32(1),
F16: proto.Int32(1),
}
w, err := proto.Marshal(m)
if err != nil {
b.Fatal(err)
}
b.Run("Wire/Marshal", func(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if _, err := proto.Marshal(m); err != nil {
b.Fatal(err)
}
}
})
})
b.Run("Wire/Unmarshal", func(b *testing.B) {
opts := proto.UnmarshalOptions{
Merge: true,
}
b.RunParallel(func(pb *testing.PB) {
m := &micropb.SixteenRequired{}
for pb.Next() {
if err := opts.Unmarshal(w, m); err != nil {
b.Fatal(err)
}
}
})
})
b.Run("Wire/Validate", func(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
mt := (&micropb.SixteenRequired{}).ProtoReflect().Type()
for pb.Next() {
_, got := impl.Validate(mt, protoiface.UnmarshalInput{
Buf: w,
})
want := impl.ValidationValid
if got != want {
b.Fatalf("Validate = %v, want %v", got, want)
}
}
})
})
b.Run("Clone", func(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
proto.Clone(m)
}
})
})
}
func BenchmarkExtension(b *testing.B) {
b.Run("Has/None", func(b *testing.B) {
m := &testpb.TestAllExtensions{}
for i := 0; i < b.N; i++ {
proto.HasExtension(m, testpb.E_OptionalNestedMessage)
}
})
b.Run("Has/Set", func(b *testing.B) {
m := &testpb.TestAllExtensions{}
ext := &testpb.TestAllExtensions_NestedMessage{A: proto.Int32(-32)}
proto.SetExtension(m, testpb.E_OptionalNestedMessage, ext)
for i := 0; i < b.N; i++ {
proto.HasExtension(m, testpb.E_OptionalNestedMessage)
}
})
b.Run("Get/None", func(b *testing.B) {
m := &testpb.TestAllExtensions{}
for i := 0; i < b.N; i++ {
proto.GetExtension(m, testpb.E_OptionalNestedMessage)
}
})
b.Run("Get/Set", func(b *testing.B) {
m := &testpb.TestAllExtensions{}
ext := &testpb.TestAllExtensions_NestedMessage{A: proto.Int32(-32)}
proto.SetExtension(m, testpb.E_OptionalNestedMessage, ext)
for i := 0; i < b.N; i++ {
proto.GetExtension(m, testpb.E_OptionalNestedMessage)
}
})
b.Run("Set", func(b *testing.B) {
m := &testpb.TestAllExtensions{}
ext := &testpb.TestAllExtensions_NestedMessage{A: proto.Int32(-32)}
for i := 0; i < b.N; i++ {
proto.SetExtension(m, testpb.E_OptionalNestedMessage, ext)
}
})
}