From 14bc5ab47f2b6fcfb812a96a2c8fe531848bc2dd Mon Sep 17 00:00:00 2001 From: Tommy PAGEARD Date: Thu, 18 Jun 2020 14:18:44 +0200 Subject: [PATCH] feat(metrics/logger): add new metric And logger possiblity for peers error Introducing a new metric GetFromPeersSlowestDuration which will be recording the slowest call made to getFromPeers. Introducing SetLogger() to set a logger and allow getFromPeers error to be logged of. --- go.mod | 5 ++++- go.sum | 12 ++++++++++++ groupcache.go | 54 ++++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 61 insertions(+), 10 deletions(-) diff --git a/go.mod b/go.mod index 1f4c6c4..134dccd 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,8 @@ module github.com/mailgun/groupcache/v2 -require github.com/golang/protobuf v1.3.1 +require ( + github.com/golang/protobuf v1.3.1 + github.com/sirupsen/logrus v1.6.0 +) go 1.13 diff --git a/go.sum b/go.sum index 092b956..11ac6d3 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,14 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8= +github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I= +github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= +github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/pU5OE2C0WrNTOYK1Uuc= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/groupcache.go b/groupcache.go index 480e504..6665c04 100644 --- a/groupcache.go +++ b/groupcache.go @@ -35,8 +35,15 @@ import ( pb "github.com/mailgun/groupcache/v2/groupcachepb" "github.com/mailgun/groupcache/v2/lru" "github.com/mailgun/groupcache/v2/singleflight" + "github.com/sirupsen/logrus" ) +var logger *logrus.Entry + +func SetLogger(log *logrus.Entry) { + logger = log +} + // A Getter loads data for a key. type Getter interface { // Get returns the value identified by key, populating dest. @@ -188,15 +195,16 @@ type flightGroup interface { // Stats are per-group statistics. type Stats struct { - Gets AtomicInt // any Get request, including from peers - CacheHits AtomicInt // either cache was good - PeerLoads AtomicInt // either remote load or remote cache hit (not an error) - PeerErrors AtomicInt - Loads AtomicInt // (gets - cacheHits) - LoadsDeduped AtomicInt // after singleflight - LocalLoads AtomicInt // total good local loads - LocalLoadErrs AtomicInt // total bad local loads - ServerRequests AtomicInt // gets that came over the network from peers + Gets AtomicInt // any Get request, including from peers + CacheHits AtomicInt // either cache was good + GetFromPeersSlowestDuration AtomicInt // slowest duration to request value from peers + PeerLoads AtomicInt // either remote load or remote cache hit (not an error) + PeerErrors AtomicInt + Loads AtomicInt // (gets - cacheHits) + LoadsDeduped AtomicInt // after singleflight + LocalLoads AtomicInt // total good local loads + LocalLoadErrs AtomicInt // total bad local loads + ServerRequests AtomicInt // gets that came over the network from peers } // Name returns the name of the group. @@ -320,11 +328,33 @@ func (g *Group) load(ctx context.Context, key string, dest Sink) (value ByteView var value ByteView var err error if peer, ok := g.peers.PickPeer(key); ok { + + // metrics duration start + start := time.Now() + + // get value from peers value, err = g.getFromPeer(ctx, peer, key) + + // metrics duration compute + duration := time.Since(start).Milliseconds() + + // metrics only store the slowest duration + if g.Stats.GetFromPeersSlowestDuration.Get() < duration { + g.Stats.GetFromPeersSlowestDuration.Store(duration) + } + if err == nil { g.Stats.PeerLoads.Add(1) return value, nil } + + if logger != nil { + logger.WithFields(logrus.Fields{ + "err": err, + "key": key, + }).Error("groupcache: error retrieving key from peers") + } + g.Stats.PeerErrors.Add(1) if ctx != nil && ctx.Err() != nil { // Return here without attempting to get locally @@ -336,6 +366,7 @@ func (g *Group) load(ctx context.Context, key string, dest Sink) (value ByteView // probably boring (normal task movement), so not // worth logging I imagine. } + value, err = g.getLocally(ctx, key, dest) if err != nil { g.Stats.LocalLoadErrs.Add(1) @@ -568,6 +599,11 @@ func (i *AtomicInt) Add(n int64) { atomic.AddInt64((*int64)(i), n) } +// Store atomically stores n to i. +func (i *AtomicInt) Store(n int64) { + atomic.StoreInt64((*int64)(i), n) +} + // Get atomically gets the value of i. func (i *AtomicInt) Get() int64 { return atomic.LoadInt64((*int64)(i))