diff --git a/groupcache.go b/groupcache.go index 637b063..40410a0 100644 --- a/groupcache.go +++ b/groupcache.go @@ -100,6 +100,7 @@ func newGroup(name string, cacheBytes int64, getter Getter, peers PeerPicker) *G getter: getter, peers: peers, cacheBytes: cacheBytes, + loadGroup: &singleflight.Group{}, } if fn := newGroupHook; fn != nil { fn(g) @@ -163,12 +164,20 @@ type Group struct { // loadGroup ensures that each key is only fetched once // (either locally or remotely), regardless of the number of // concurrent callers. - loadGroup singleflight.Group + loadGroup flightGroup // Stats are statistics on the group. Stats Stats } +// flightGroup is defined as an interface which flightgroup.Group +// satisfies. We define this so that we may test with an alternate +// implementation. +type flightGroup interface { + // Done is called when Do is done. + Do(key string, fn func() (interface{}, error)) (interface{}, error) +} + // Stats are per-group statistics. type Stats struct { Gets AtomicInt // any Get request, including from peers @@ -225,6 +234,31 @@ func (g *Group) Get(ctx Context, key string, dest Sink) error { func (g *Group) load(ctx Context, key string, dest Sink) (value ByteView, destPopulated bool, err error) { g.Stats.Loads.Add(1) viewi, err := g.loadGroup.Do(key, func() (interface{}, error) { + // Check the cache again because singleflight can only dedup calls + // that overlap concurrently. It's possible for 2 concurrent + // requests to miss the cache, resulting in 2 load() calls. An + // unfortunate goroutine scheduling would result in this callback + // being run twice, serially. If we don't check the cache again, + // cache.nbytes would be incremented below even though there will + // be only one entry for this key. + // + // Consider the following serialized event ordering for two + // goroutines in which this callback gets called twice for hte + // same key: + // 1: Get("key") + // 2: Get("key") + // 1: lookupCache("key") + // 2: lookupCache("key") + // 1: load("key") + // 2: load("key") + // 1: loadGroup.Do("key", fn) + // 1: fn() + // 2: loadGroup.Do("key", fn) + // 2: fn() + if value, cacheHit := g.lookupCache(key); cacheHit { + g.Stats.CacheHits.Add(1) + return value, nil + } g.Stats.LoadsDeduped.Add(1) var value ByteView var err error diff --git a/groupcache_test.go b/groupcache_test.go index 0ecd6bd..3a4ecc2 100644 --- a/groupcache_test.go +++ b/groupcache_test.go @@ -363,5 +363,85 @@ func TestAllocatingByteSliceTarget(t *testing.T) { } } +// orderedFlightGroup allows the caller to force the schedule of when +// orig.Do will be called. This is useful to serialize calls such +// that singleflight cannot dedup them. +type orderedFlightGroup struct { + mu sync.Mutex + stage1 chan bool + stage2 chan bool + orig flightGroup +} + +func (g *orderedFlightGroup) Do(key string, fn func() (interface{}, error)) (interface{}, error) { + <-g.stage1 + <-g.stage2 + g.mu.Lock() + defer g.mu.Unlock() + return g.orig.Do(key, fn) +} + +// TestNoDedup tests invariants on the cache size when singleflight is +// unable to dedup calls. +func TestNoDedup(t *testing.T) { + const testkey = "testkey" + const testval = "testval" + g := newGroup("testgroup", 1024, GetterFunc(func(_ Context, key string, dest Sink) error { + return dest.SetString(testval) + }), nil) + + orderedGroup := &orderedFlightGroup{ + stage1: make(chan bool), + stage2: make(chan bool), + orig: g.loadGroup, + } + // Replace loadGroup with our wrapper so we can control when + // loadGroup.Do is entered for each concurrent request. + g.loadGroup = orderedGroup + + // Issue two idential requests concurrently. Since the cache is + // empty, it will miss. Both will enter load(), but we will only + // allow one at a time to enter singleflight.Do, so the callback + // function will be called twice. + resc := make(chan string, 2) + for i := 0; i < 2; i++ { + go func() { + var s string + if err := g.Get(dummyCtx, testkey, StringSink(&s)); err != nil { + resc <- "ERROR:" + err.Error() + return + } + resc <- s + }() + } + + // Ensure both goroutines have entered the Do routine. This implies + // both concurrent requests have checked the cache, found it empty, + // and called load(). + orderedGroup.stage1 <- true + orderedGroup.stage1 <- true + orderedGroup.stage2 <- true + orderedGroup.stage2 <- true + + for i := 0; i < 2; i++ { + if s := <-resc; s != testval { + t.Errorf("result is %s want %s", s, testval) + } + } + + const wantItems = 1 + if g.mainCache.items() != wantItems { + t.Errorf("mainCache has %d items, want %d", g.mainCache.items(), wantItems) + } + + // If the singleflight callback doesn't double-check the cache again + // upon entry, we would increment nbytes twice but the entry would + // only be in the cache once. + const wantBytes = int64(len(testkey) + len(testval)) + if g.mainCache.nbytes != wantBytes { + t.Errorf("cache has %d bytes, want %d", g.mainCache.nbytes, wantBytes) + } +} + // TODO(bradfitz): port the Google-internal full integration test into here, // using HTTP requests instead of our RPC system.