protobuf-go/benchmarks/bench_test.go
Damien Neil 42cfff4a76 benchmarks: add general-purpose benchmarks directory
Move the benchmarks using the common protobuf datasets out of proto/ and
into their own directory. Add benchmarks for text and JSON.

Move initialization out of the Benchmark function to avoid including it
in CPU/memory profiles.

We could put benchmarks in each individual package (proto, prototext,
etc.), but the need for common infrastructure around managing the test
data makes it simpler to keep the benchmarks together. Also, it's nice
to have a one-stop overview of performance.

Change-Id: I17c37efb91b2413fc43ab1b4c35bff2e1330bc0a
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/183245
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
2019-06-24 19:20:48 +00:00

268 lines
7.1 KiB
Go

// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bench_test
import (
"bytes"
"flag"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
"time"
"github.com/golang/protobuf/jsonpb"
protoV1 "github.com/golang/protobuf/proto"
"google.golang.org/protobuf/encoding/protojson"
"google.golang.org/protobuf/encoding/prototext"
"google.golang.org/protobuf/proto"
pref "google.golang.org/protobuf/reflect/protoreflect"
preg "google.golang.org/protobuf/reflect/protoregistry"
benchpb "google.golang.org/protobuf/internal/testprotos/benchmarks"
_ "google.golang.org/protobuf/internal/testprotos/benchmarks/datasets/google_message1/proto2"
_ "google.golang.org/protobuf/internal/testprotos/benchmarks/datasets/google_message1/proto3"
_ "google.golang.org/protobuf/internal/testprotos/benchmarks/datasets/google_message2"
_ "google.golang.org/protobuf/internal/testprotos/benchmarks/datasets/google_message3"
_ "google.golang.org/protobuf/internal/testprotos/benchmarks/datasets/google_message4"
)
var (
benchV1 = flag.Bool("v1", false, "benchmark the v1 implementation")
)
func BenchmarkWire(b *testing.B) {
bench(b, "Unmarshal", func(ds dataset, pb *testing.PB) {
for pb.Next() {
for _, p := range ds.wire {
m := ds.messageType.New().Interface()
if err := Unmarshal(p, m); err != nil {
b.Fatal(err)
}
}
}
})
bench(b, "Marshal", func(ds dataset, pb *testing.PB) {
for pb.Next() {
for _, m := range ds.messages {
if _, err := Marshal(m); err != nil {
b.Fatal(err)
}
}
}
})
bench(b, "Size", func(ds dataset, pb *testing.PB) {
for pb.Next() {
for _, m := range ds.messages {
Size(m)
}
}
})
}
func BenchmarkText(b *testing.B) {
bench(b, "Unmarshal", func(ds dataset, pb *testing.PB) {
for pb.Next() {
for _, p := range ds.text {
m := ds.messageType.New().Interface()
if err := UnmarshalText(p, m); err != nil {
b.Fatal(err)
}
}
}
})
bench(b, "Marshal", func(ds dataset, pb *testing.PB) {
for pb.Next() {
for _, m := range ds.messages {
if _, err := MarshalText(m); err != nil {
b.Fatal(err)
}
}
}
})
}
func BenchmarkJSON(b *testing.B) {
bench(b, "Unmarshal", func(ds dataset, pb *testing.PB) {
for pb.Next() {
for _, p := range ds.json {
m := ds.messageType.New().Interface()
if err := UnmarshalJSON(p, m); err != nil {
b.Fatal(err)
}
}
}
})
bench(b, "Marshal", func(ds dataset, pb *testing.PB) {
for pb.Next() {
for _, m := range ds.messages {
if _, err := MarshalJSON(m); err != nil {
b.Fatal(err)
}
}
}
})
}
func bench(b *testing.B, name string, f func(dataset, *testing.PB)) {
b.Helper()
b.Run(name, func(b *testing.B) {
for _, ds := range datasets {
b.Run(ds.name, func(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
f(ds, pb)
})
})
}
})
}
type dataset struct {
name string
messageType pref.MessageType
messages []proto.Message
wire [][]byte
text [][]byte
json [][]byte
}
var datasets []dataset
func TestMain(m *testing.M) {
// Load benchmark data early, to avoid including this step in -cpuprofile/-memprofile.
//
// For the larger benchmark datasets (not downloaded by default), preparing
// this data is quite expensive. In addition, keeping the unmarshaled messages
// in memory makes GC scans a substantial fraction of runtime CPU cost.
//
// It would be nice to avoid loading the data we aren't going to use. Unfortunately,
// there isn't any simple way to tell what benchmarks are going to run; we can examine
// the -test.bench flag, but parsing it is quite complicated.
flag.Parse()
if v := flag.Lookup("test.bench").Value.(flag.Getter).Get(); v == "" {
// Don't bother loading data if we aren't going to run any benchmarks.
// Avoids slowing down go test ./...
return
}
if v := flag.Lookup("test.timeout").Value.(flag.Getter).Get().(time.Duration); v != 0 && v <= 10*time.Minute {
// The default test timeout of 10m is too short if running all the benchmarks.
// It's quite frustrating to discover this 10m through a benchmark run, so
// catch the condition.
//
// The -timeout and -test.timeout flags are handled by the go command, which
// forwards them along to the test binary, so we can't just set the default
// to something reasonable; the go command will override it with its default.
// We also can't ignore the timeout, because the go command kills a test which
// runs more than a minute past its deadline.
fmt.Fprintf(os.Stderr, "Test timeout of %v is probably too short; set -test.timeout=0.\n", v)
os.Exit(1)
}
out, err := exec.Command("git", "rev-parse", "--show-toplevel").CombinedOutput()
if err != nil {
panic(err)
}
repoRoot := strings.TrimSpace(string(out))
dataDir := filepath.Join(repoRoot, ".cache", "benchdata")
filepath.Walk(dataDir, func(path string, _ os.FileInfo, _ error) error {
if filepath.Ext(path) != ".pb" {
return nil
}
raw, err := ioutil.ReadFile(path)
if err != nil {
panic(err)
}
dspb := &benchpb.BenchmarkDataset{}
if err := proto.Unmarshal(raw, dspb); err != nil {
panic(err)
}
mt, err := preg.GlobalTypes.FindMessageByName(pref.FullName(dspb.MessageName))
if err != nil {
panic(err)
}
ds := dataset{
name: dspb.Name,
messageType: mt,
wire: dspb.Payload,
}
for _, payload := range dspb.Payload {
m := mt.New().Interface()
if err := proto.Unmarshal(payload, m); err != nil {
panic(err)
}
ds.messages = append(ds.messages, m)
b, err := prototext.Marshal(m)
if err != nil {
panic(err)
}
ds.text = append(ds.text, b)
b, err = protojson.Marshal(m)
if err != nil {
panic(err)
}
ds.json = append(ds.json, b)
}
datasets = append(datasets, ds)
return nil
})
os.Exit(m.Run())
}
func Unmarshal(b []byte, m proto.Message) error {
if *benchV1 {
return protoV1.Unmarshal(b, m.(protoV1.Message))
}
return proto.Unmarshal(b, m)
}
func Marshal(m proto.Message) ([]byte, error) {
if *benchV1 {
return protoV1.Marshal(m.(protoV1.Message))
}
return proto.Marshal(m)
}
func Size(m proto.Message) int {
if *benchV1 {
return protoV1.Size(m.(protoV1.Message))
}
return proto.Size(m)
}
func UnmarshalText(b []byte, m proto.Message) error {
if *benchV1 {
// Extra string conversion makes this not quite right.
return protoV1.UnmarshalText(string(b), m.(protoV1.Message))
}
return prototext.Unmarshal(b, m)
}
func MarshalText(m proto.Message) ([]byte, error) {
if *benchV1 {
var b bytes.Buffer
err := protoV1.MarshalText(&b, m.(protoV1.Message))
return b.Bytes(), err
}
return prototext.Marshal(m)
}
func UnmarshalJSON(b []byte, m proto.Message) error {
if *benchV1 {
return jsonpb.Unmarshal(bytes.NewBuffer(b), m.(protoV1.Message))
}
return protojson.Unmarshal(b, m)
}
func MarshalJSON(m proto.Message) ([]byte, error) {
if *benchV1 {
var b bytes.Buffer
err := (&jsonpb.Marshaler{}).Marshal(&b, m.(protoV1.Message))
return b.Bytes(), err
}
return protojson.Marshal(m)
}