From 14bc5ab47f2b6fcfb812a96a2c8fe531848bc2dd Mon Sep 17 00:00:00 2001 From: Tommy PAGEARD Date: Thu, 18 Jun 2020 14:18:44 +0200 Subject: [PATCH 1/8] feat(metrics/logger): add new metric And logger possiblity for peers error Introducing a new metric GetFromPeersSlowestDuration which will be recording the slowest call made to getFromPeers. Introducing SetLogger() to set a logger and allow getFromPeers error to be logged of. --- go.mod | 5 ++++- go.sum | 12 ++++++++++++ groupcache.go | 54 ++++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 61 insertions(+), 10 deletions(-) diff --git a/go.mod b/go.mod index 1f4c6c4..134dccd 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,8 @@ module github.com/mailgun/groupcache/v2 -require github.com/golang/protobuf v1.3.1 +require ( + github.com/golang/protobuf v1.3.1 + github.com/sirupsen/logrus v1.6.0 +) go 1.13 diff --git a/go.sum b/go.sum index 092b956..11ac6d3 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,14 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8= +github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I= +github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= +github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/pU5OE2C0WrNTOYK1Uuc= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/groupcache.go b/groupcache.go index 480e504..6665c04 100644 --- a/groupcache.go +++ b/groupcache.go @@ -35,8 +35,15 @@ import ( pb "github.com/mailgun/groupcache/v2/groupcachepb" "github.com/mailgun/groupcache/v2/lru" "github.com/mailgun/groupcache/v2/singleflight" + "github.com/sirupsen/logrus" ) +var logger *logrus.Entry + +func SetLogger(log *logrus.Entry) { + logger = log +} + // A Getter loads data for a key. type Getter interface { // Get returns the value identified by key, populating dest. @@ -188,15 +195,16 @@ type flightGroup interface { // Stats are per-group statistics. type Stats struct { - Gets AtomicInt // any Get request, including from peers - CacheHits AtomicInt // either cache was good - PeerLoads AtomicInt // either remote load or remote cache hit (not an error) - PeerErrors AtomicInt - Loads AtomicInt // (gets - cacheHits) - LoadsDeduped AtomicInt // after singleflight - LocalLoads AtomicInt // total good local loads - LocalLoadErrs AtomicInt // total bad local loads - ServerRequests AtomicInt // gets that came over the network from peers + Gets AtomicInt // any Get request, including from peers + CacheHits AtomicInt // either cache was good + GetFromPeersSlowestDuration AtomicInt // slowest duration to request value from peers + PeerLoads AtomicInt // either remote load or remote cache hit (not an error) + PeerErrors AtomicInt + Loads AtomicInt // (gets - cacheHits) + LoadsDeduped AtomicInt // after singleflight + LocalLoads AtomicInt // total good local loads + LocalLoadErrs AtomicInt // total bad local loads + ServerRequests AtomicInt // gets that came over the network from peers } // Name returns the name of the group. @@ -320,11 +328,33 @@ func (g *Group) load(ctx context.Context, key string, dest Sink) (value ByteView var value ByteView var err error if peer, ok := g.peers.PickPeer(key); ok { + + // metrics duration start + start := time.Now() + + // get value from peers value, err = g.getFromPeer(ctx, peer, key) + + // metrics duration compute + duration := time.Since(start).Milliseconds() + + // metrics only store the slowest duration + if g.Stats.GetFromPeersSlowestDuration.Get() < duration { + g.Stats.GetFromPeersSlowestDuration.Store(duration) + } + if err == nil { g.Stats.PeerLoads.Add(1) return value, nil } + + if logger != nil { + logger.WithFields(logrus.Fields{ + "err": err, + "key": key, + }).Error("groupcache: error retrieving key from peers") + } + g.Stats.PeerErrors.Add(1) if ctx != nil && ctx.Err() != nil { // Return here without attempting to get locally @@ -336,6 +366,7 @@ func (g *Group) load(ctx context.Context, key string, dest Sink) (value ByteView // probably boring (normal task movement), so not // worth logging I imagine. } + value, err = g.getLocally(ctx, key, dest) if err != nil { g.Stats.LocalLoadErrs.Add(1) @@ -568,6 +599,11 @@ func (i *AtomicInt) Add(n int64) { atomic.AddInt64((*int64)(i), n) } +// Store atomically stores n to i. +func (i *AtomicInt) Store(n int64) { + atomic.StoreInt64((*int64)(i), n) +} + // Get atomically gets the value of i. func (i *AtomicInt) Get() int64 { return atomic.LoadInt64((*int64)(i)) From 121623bc0261dcea802ac125e35461055524e38a Mon Sep 17 00:00:00 2001 From: Tommy PAGEARD Date: Fri, 3 Jul 2020 12:42:10 +0200 Subject: [PATCH 2/8] feat(logger): add a new field category in the log --- groupcache.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/groupcache.go b/groupcache.go index 6665c04..6c0240c 100644 --- a/groupcache.go +++ b/groupcache.go @@ -350,9 +350,10 @@ func (g *Group) load(ctx context.Context, key string, dest Sink) (value ByteView if logger != nil { logger.WithFields(logrus.Fields{ - "err": err, - "key": key, - }).Error("groupcache: error retrieving key from peers") + "err": err, + "key": key, + "category": "groupcache", + }).Error("error retrieving key from peers") } g.Stats.PeerErrors.Add(1) From b543958475062d1798159e5995210c7395b0c0d3 Mon Sep 17 00:00:00 2001 From: Tommy PAGEARD Date: Mon, 6 Jul 2020 18:53:29 +0200 Subject: [PATCH 3/8] fix(datarace): pass peer var as param for func literal peer var was being accessed by scope heritage instead of being passed as param. making it unsafe to data race. --- groupcache.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/groupcache.go b/groupcache.go index 6c0240c..e841ef9 100644 --- a/groupcache.go +++ b/groupcache.go @@ -273,10 +273,10 @@ func (g *Group) Remove(ctx context.Context, key string) error { } wg.Add(1) - go func() { + go func(peer ProtoGetter) { errs <- g.removeFromPeer(ctx, peer, key) wg.Done() - }() + }(peer) } go func() { wg.Wait() From 3f7232facaebc69fccdbc773c2cccb4a668089cd Mon Sep 17 00:00:00 2001 From: Tommy PAGEARD Date: Wed, 8 Jul 2020 14:21:49 +0200 Subject: [PATCH 4/8] feat(peer): add GetURL to be able to retrieve peer url --- groupcache.go | 2 +- groupcache_test.go | 4 ++++ http.go | 14 ++++++++++++++ peers.go | 6 ++++++ 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/groupcache.go b/groupcache.go index e841ef9..c583d80 100644 --- a/groupcache.go +++ b/groupcache.go @@ -353,7 +353,7 @@ func (g *Group) load(ctx context.Context, key string, dest Sink) (value ByteView "err": err, "key": key, "category": "groupcache", - }).Error("error retrieving key from peers") + }).Errorf("error retrieving key from peer '%s'", g.peers.GetURL(key)) } g.Stats.PeerErrors.Add(1) diff --git a/groupcache_test.go b/groupcache_test.go index a3d8369..58f8c81 100644 --- a/groupcache_test.go +++ b/groupcache_test.go @@ -285,6 +285,10 @@ func (p fakePeers) GetAll() []ProtoGetter { return p } +func (p fakePeers) GetURL(key string) string { + return "fakePeer" +} + // tests that peers (virtual, in-process) are hit, and how much. func TestPeers(t *testing.T) { once.Do(testSetup) diff --git a/http.go b/http.go index a792357..badc84c 100644 --- a/http.go +++ b/http.go @@ -157,6 +157,20 @@ func (p *HTTPPool) PickPeer(key string) (ProtoGetter, bool) { return nil, false } +// GetURL +func (p *HTTPPool) GetURL(key string) string { + p.mu.Lock() + defer p.mu.Unlock() + + if p.peers.IsEmpty() { + return "" + } + if peer := p.peers.Get(key); peer != p.self { + return peer + } + return p.self +} + func (p *HTTPPool) ServeHTTP(w http.ResponseWriter, r *http.Request) { // Parse request. if !strings.HasPrefix(r.URL.Path, p.opts.BasePath) { diff --git a/peers.go b/peers.go index dbb8ae1..9f36e6b 100644 --- a/peers.go +++ b/peers.go @@ -20,6 +20,7 @@ package groupcache import ( "context" + pb "github.com/mailgun/groupcache/v2/groupcachepb" ) @@ -36,7 +37,11 @@ type PeerPicker interface { // and true to indicate that a remote peer was nominated. // It returns nil, false if the key owner is the current peer. PickPeer(key string) (peer ProtoGetter, ok bool) + // GetAll returns all the peers in the group GetAll() []ProtoGetter + // GetURL returns the peer URL that own the specific key + // It returns empty string if there is no peers + GetURL(key string) string } // NoPeers is an implementation of PeerPicker that never finds a peer. @@ -44,6 +49,7 @@ type NoPeers struct{} func (NoPeers) PickPeer(key string) (peer ProtoGetter, ok bool) { return } func (NoPeers) GetAll() []ProtoGetter { return []ProtoGetter{} } +func (NoPeers) GetURL(key string) string { return "" } var ( portPicker func(groupName string) PeerPicker From ae2e156f19739f00dceb1cddb35ce2e2544981a3 Mon Sep 17 00:00:00 2001 From: Tommy PAGEARD Date: Wed, 8 Jul 2020 16:49:30 +0200 Subject: [PATCH 5/8] fix(metrics): use backward compatible time computation for duration --- groupcache.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/groupcache.go b/groupcache.go index c583d80..9b1578f 100644 --- a/groupcache.go +++ b/groupcache.go @@ -336,7 +336,7 @@ func (g *Group) load(ctx context.Context, key string, dest Sink) (value ByteView value, err = g.getFromPeer(ctx, peer, key) // metrics duration compute - duration := time.Since(start).Milliseconds() + duration := int64(time.Since(start)) / int64(time.Millisecond) // metrics only store the slowest duration if g.Stats.GetFromPeersSlowestDuration.Get() < duration { From fb2028f2ffb0f7610d43609c697b70870454b9a3 Mon Sep 17 00:00:00 2001 From: Tommy PAGEARD Date: Wed, 8 Jul 2020 18:59:59 +0200 Subject: [PATCH 6/8] fix(): get URL from httpGetter instead of httpPool --- groupcache.go | 2 +- groupcache_test.go | 4 ++++ http.go | 19 +++++-------------- peers.go | 6 ++---- 4 files changed, 12 insertions(+), 19 deletions(-) diff --git a/groupcache.go b/groupcache.go index 9b1578f..3238876 100644 --- a/groupcache.go +++ b/groupcache.go @@ -353,7 +353,7 @@ func (g *Group) load(ctx context.Context, key string, dest Sink) (value ByteView "err": err, "key": key, "category": "groupcache", - }).Errorf("error retrieving key from peer '%s'", g.peers.GetURL(key)) + }).Errorf("error retrieving key from peer '%s'", peer.GetURL()) } g.Stats.PeerErrors.Add(1) diff --git a/groupcache_test.go b/groupcache_test.go index 58f8c81..86a4df2 100644 --- a/groupcache_test.go +++ b/groupcache_test.go @@ -271,6 +271,10 @@ func (p *fakePeer) Remove(_ context.Context, in *pb.GetRequest) error { return nil } +func (p *fakePeer) GetURL() string { + return "fakePeer" +} + type fakePeers []ProtoGetter func (p fakePeers) PickPeer(key string) (peer ProtoGetter, ok bool) { diff --git a/http.go b/http.go index badc84c..3048afc 100644 --- a/http.go +++ b/http.go @@ -157,20 +157,6 @@ func (p *HTTPPool) PickPeer(key string) (ProtoGetter, bool) { return nil, false } -// GetURL -func (p *HTTPPool) GetURL(key string) string { - p.mu.Lock() - defer p.mu.Unlock() - - if p.peers.IsEmpty() { - return "" - } - if peer := p.peers.Get(key); peer != p.self { - return peer - } - return p.self -} - func (p *HTTPPool) ServeHTTP(w http.ResponseWriter, r *http.Request) { // Parse request. if !strings.HasPrefix(r.URL.Path, p.opts.BasePath) { @@ -239,6 +225,11 @@ type httpGetter struct { baseURL string } +// GetURL +func (p *httpGetter) GetURL() string { + return p.baseURL +} + var bufferPool = sync.Pool{ New: func() interface{} { return new(bytes.Buffer) }, } diff --git a/peers.go b/peers.go index 9f36e6b..18d8659 100644 --- a/peers.go +++ b/peers.go @@ -28,6 +28,8 @@ import ( type ProtoGetter interface { Get(context context.Context, in *pb.GetRequest, out *pb.GetResponse) error Remove(context context.Context, in *pb.GetRequest) error + // GetURL returns the peer URL + GetURL() string } // PeerPicker is the interface that must be implemented to locate @@ -39,9 +41,6 @@ type PeerPicker interface { PickPeer(key string) (peer ProtoGetter, ok bool) // GetAll returns all the peers in the group GetAll() []ProtoGetter - // GetURL returns the peer URL that own the specific key - // It returns empty string if there is no peers - GetURL(key string) string } // NoPeers is an implementation of PeerPicker that never finds a peer. @@ -49,7 +48,6 @@ type NoPeers struct{} func (NoPeers) PickPeer(key string) (peer ProtoGetter, ok bool) { return } func (NoPeers) GetAll() []ProtoGetter { return []ProtoGetter{} } -func (NoPeers) GetURL(key string) string { return "" } var ( portPicker func(groupName string) PeerPicker From 6308e62bbb91a979481e6650bd82cae49a7058c2 Mon Sep 17 00:00:00 2001 From: Tommy PAGEARD Date: Wed, 8 Jul 2020 19:01:58 +0200 Subject: [PATCH 7/8] style(metrics): rename get from peers stats --- groupcache.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/groupcache.go b/groupcache.go index 3238876..ddfe8da 100644 --- a/groupcache.go +++ b/groupcache.go @@ -195,16 +195,16 @@ type flightGroup interface { // Stats are per-group statistics. type Stats struct { - Gets AtomicInt // any Get request, including from peers - CacheHits AtomicInt // either cache was good - GetFromPeersSlowestDuration AtomicInt // slowest duration to request value from peers - PeerLoads AtomicInt // either remote load or remote cache hit (not an error) - PeerErrors AtomicInt - Loads AtomicInt // (gets - cacheHits) - LoadsDeduped AtomicInt // after singleflight - LocalLoads AtomicInt // total good local loads - LocalLoadErrs AtomicInt // total bad local loads - ServerRequests AtomicInt // gets that came over the network from peers + Gets AtomicInt // any Get request, including from peers + CacheHits AtomicInt // either cache was good + GetFromPeersLatencyLower AtomicInt // slowest duration to request value from peers + PeerLoads AtomicInt // either remote load or remote cache hit (not an error) + PeerErrors AtomicInt + Loads AtomicInt // (gets - cacheHits) + LoadsDeduped AtomicInt // after singleflight + LocalLoads AtomicInt // total good local loads + LocalLoadErrs AtomicInt // total bad local loads + ServerRequests AtomicInt // gets that came over the network from peers } // Name returns the name of the group. @@ -339,8 +339,8 @@ func (g *Group) load(ctx context.Context, key string, dest Sink) (value ByteView duration := int64(time.Since(start)) / int64(time.Millisecond) // metrics only store the slowest duration - if g.Stats.GetFromPeersSlowestDuration.Get() < duration { - g.Stats.GetFromPeersSlowestDuration.Store(duration) + if g.Stats.GetFromPeersLatencyLower.Get() < duration { + g.Stats.GetFromPeersLatencyLower.Store(duration) } if err == nil { From 5b70a612a7a6acbe1b2e6b77c60fba02ecf07a08 Mon Sep 17 00:00:00 2001 From: Tommy PAGEARD Date: Thu, 9 Jul 2020 11:31:16 +0200 Subject: [PATCH 8/8] test(): remove unused method GetURL(key string) --- groupcache_test.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/groupcache_test.go b/groupcache_test.go index 86a4df2..a99a60e 100644 --- a/groupcache_test.go +++ b/groupcache_test.go @@ -289,10 +289,6 @@ func (p fakePeers) GetAll() []ProtoGetter { return p } -func (p fakePeers) GetURL(key string) string { - return "fakePeer" -} - // tests that peers (virtual, in-process) are hit, and how much. func TestPeers(t *testing.T) { once.Do(testSetup)