From ff663881314176b3d3dc51bacd837c7b3e262882 Mon Sep 17 00:00:00 2001 From: "Derrick J. Wippler" Date: Fri, 18 Dec 2020 21:22:07 -0600 Subject: [PATCH] Consistenthash improvements * Now uses the much faster fnv1 * Now md5 hashs the keys to help distribute hosts more evenly in some cases. --- CHANGELOG | 6 +++ consistenthash/consistenthash.go | 11 ++-- consistenthash/consistenthash_test.go | 77 +++++++++++++++++++-------- go.mod | 1 + go.sum | 2 + 5 files changed, 71 insertions(+), 26 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 5fc0d45..8d0b942 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [2.2.1] - 2021-01-13 +### Changes +* Now uses the much faster fnv1 +* Now md5 hashs the keys to help distribute hosts more evenly in some + cases. + ## [2.2.0] - 2019-07-09 ### Added * Added `SetLogger()` to pass in a logrus entry for logging peer errors diff --git a/consistenthash/consistenthash.go b/consistenthash/consistenthash.go index a9c56f0..9854c65 100644 --- a/consistenthash/consistenthash.go +++ b/consistenthash/consistenthash.go @@ -18,12 +18,15 @@ limitations under the License. package consistenthash import ( - "hash/crc32" + "crypto/md5" + "fmt" "sort" "strconv" + + "github.com/segmentio/fasthash/fnv1" ) -type Hash func(data []byte) uint32 +type Hash func(data []byte) uint64 type Map struct { hash Hash @@ -39,7 +42,7 @@ func New(replicas int, fn Hash) *Map { hashMap: make(map[int]string), } if m.hash == nil { - m.hash = crc32.ChecksumIEEE + m.hash = fnv1.HashBytes64 } return m } @@ -53,7 +56,7 @@ func (m *Map) IsEmpty() bool { func (m *Map) Add(keys ...string) { for _, key := range keys { for i := 0; i < m.replicas; i++ { - hash := int(m.hash([]byte(strconv.Itoa(i) + key))) + hash := int(m.hash([]byte(fmt.Sprintf("%x", md5.Sum([]byte(strconv.Itoa(i)+key)))))) m.keys = append(m.keys, hash) m.hashMap[hash] = key } diff --git a/consistenthash/consistenthash_test.go b/consistenthash/consistenthash_test.go index 1a37fd7..d277f35 100644 --- a/consistenthash/consistenthash_test.go +++ b/consistenthash/consistenthash_test.go @@ -18,51 +18,45 @@ package consistenthash import ( "fmt" - "strconv" + "math/rand" + "net" "testing" + "time" + + "github.com/segmentio/fasthash/fnv1" ) func TestHashing(t *testing.T) { // Override the hash function to return easier to reason about values. Assumes // the keys can be converted to an integer. - hash := New(3, func(key []byte) uint32 { - i, err := strconv.Atoi(string(key)) - if err != nil { - panic(err) - } - return uint32(i) - }) + hash := New(512, nil) - // Given the above hash function, this will give replicas with "hashes": - // 2, 4, 6, 12, 14, 16, 22, 24, 26 hash.Add("6", "4", "2") testCases := map[string]string{ - "2": "2", - "11": "2", - "23": "4", - "27": "2", + "12,000": "4", + "11": "6", + "500,000": "4", + "1,000,000": "2", } for k, v := range testCases { - if hash.Get(k) != v { - t.Errorf("Asking for %s, should have yielded %s", k, v) + if got := hash.Get(k); got != v { + t.Errorf("Asking for %s, should have yielded %s; got %s instead", k, v, got) } } - // Adds 8, 18, 28 hash.Add("8") - // 27 should now map to 8. - testCases["27"] = "8" + testCases["11"] = "8" + testCases["1,000,000"] = "8" for k, v := range testCases { - if hash.Get(k) != v { - t.Errorf("Asking for %s, should have yielded %s", k, v) + if got := hash.Get(k); got != v { + t.Errorf("Asking for %s, should have yielded %s; got %s instead", k, v, got) } } - } func TestConsistency(t *testing.T) { @@ -77,13 +71,52 @@ func TestConsistency(t *testing.T) { } hash2.Add("Becky", "Ben", "Bobby") + hash1.Add("Becky", "Ben", "Bobby") if hash1.Get("Ben") != hash2.Get("Ben") || hash1.Get("Bob") != hash2.Get("Bob") || hash1.Get("Bonny") != hash2.Get("Bonny") { t.Errorf("Direct matches should always return the same entry") } +} +func TestDistribution(t *testing.T) { + hosts := []string{"a.svc.local", "b.svc.local", "c.svc.local"} + rand.Seed(time.Now().Unix()) + const cases = 10000 + + strings := make([]string, cases) + + for i := 0; i < cases; i++ { + r := rand.Int31() + ip := net.IPv4(192, byte(r>>16), byte(r>>8), byte(r)) + strings[i] = ip.String() + } + + hashFuncs := map[string]Hash{ + "fasthash/fnv1": fnv1.HashBytes64, + } + + for name, hashFunc := range hashFuncs { + t.Run(name, func(t *testing.T) { + hash := New(512, hashFunc) + hostMap := map[string]int{} + + for _, host := range hosts { + hash.Add(host) + hostMap[host] = 0 + } + + for i := range strings { + host := hash.Get(strings[i]) + hostMap[host]++ + } + + for host, a := range hostMap { + t.Logf("host: %s, percent: %f", host, float64(a)/cases) + } + }) + } } func BenchmarkGet8(b *testing.B) { benchmarkGet(b, 8) } diff --git a/go.mod b/go.mod index 134dccd..35a1857 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,7 @@ module github.com/mailgun/groupcache/v2 require ( github.com/golang/protobuf v1.3.1 + github.com/segmentio/fasthash v1.0.3 github.com/sirupsen/logrus v1.6.0 ) diff --git a/go.sum b/go.sum index 11ac6d3..50c5960 100644 --- a/go.sum +++ b/go.sum @@ -12,3 +12,5 @@ github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/pU5OE2C0WrNTOYK1Uuc= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +github.com/segmentio/fasthash v1.0.3 h1:EI9+KE1EwvMLBWwjpRDc+fEM+prwxDYbslddQGtrmhM= +github.com/segmentio/fasthash v1.0.3/go.mod h1:waKX8l2N8yckOgmSsXJi7x1ZfdKZ4x7KRMzBtS3oedY=