Consistenthash improvements

* Now uses the much faster fnv1
* Now md5 hashs the keys to help distribute hosts more evenly in some
  cases.
This commit is contained in:
Derrick J. Wippler 2020-12-18 21:22:07 -06:00 committed by Derrick Wippler
parent 65c4ea5d77
commit ff66388131
5 changed files with 71 additions and 26 deletions

View File

@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [2.2.1] - 2021-01-13
### Changes
* Now uses the much faster fnv1
* Now md5 hashs the keys to help distribute hosts more evenly in some
cases.
## [2.2.0] - 2019-07-09
### Added
* Added `SetLogger()` to pass in a logrus entry for logging peer errors

View File

@ -18,12 +18,15 @@ limitations under the License.
package consistenthash
import (
"hash/crc32"
"crypto/md5"
"fmt"
"sort"
"strconv"
"github.com/segmentio/fasthash/fnv1"
)
type Hash func(data []byte) uint32
type Hash func(data []byte) uint64
type Map struct {
hash Hash
@ -39,7 +42,7 @@ func New(replicas int, fn Hash) *Map {
hashMap: make(map[int]string),
}
if m.hash == nil {
m.hash = crc32.ChecksumIEEE
m.hash = fnv1.HashBytes64
}
return m
}
@ -53,7 +56,7 @@ func (m *Map) IsEmpty() bool {
func (m *Map) Add(keys ...string) {
for _, key := range keys {
for i := 0; i < m.replicas; i++ {
hash := int(m.hash([]byte(strconv.Itoa(i) + key)))
hash := int(m.hash([]byte(fmt.Sprintf("%x", md5.Sum([]byte(strconv.Itoa(i)+key))))))
m.keys = append(m.keys, hash)
m.hashMap[hash] = key
}

View File

@ -18,51 +18,45 @@ package consistenthash
import (
"fmt"
"strconv"
"math/rand"
"net"
"testing"
"time"
"github.com/segmentio/fasthash/fnv1"
)
func TestHashing(t *testing.T) {
// Override the hash function to return easier to reason about values. Assumes
// the keys can be converted to an integer.
hash := New(3, func(key []byte) uint32 {
i, err := strconv.Atoi(string(key))
if err != nil {
panic(err)
}
return uint32(i)
})
hash := New(512, nil)
// Given the above hash function, this will give replicas with "hashes":
// 2, 4, 6, 12, 14, 16, 22, 24, 26
hash.Add("6", "4", "2")
testCases := map[string]string{
"2": "2",
"11": "2",
"23": "4",
"27": "2",
"12,000": "4",
"11": "6",
"500,000": "4",
"1,000,000": "2",
}
for k, v := range testCases {
if hash.Get(k) != v {
t.Errorf("Asking for %s, should have yielded %s", k, v)
if got := hash.Get(k); got != v {
t.Errorf("Asking for %s, should have yielded %s; got %s instead", k, v, got)
}
}
// Adds 8, 18, 28
hash.Add("8")
// 27 should now map to 8.
testCases["27"] = "8"
testCases["11"] = "8"
testCases["1,000,000"] = "8"
for k, v := range testCases {
if hash.Get(k) != v {
t.Errorf("Asking for %s, should have yielded %s", k, v)
if got := hash.Get(k); got != v {
t.Errorf("Asking for %s, should have yielded %s; got %s instead", k, v, got)
}
}
}
func TestConsistency(t *testing.T) {
@ -77,13 +71,52 @@ func TestConsistency(t *testing.T) {
}
hash2.Add("Becky", "Ben", "Bobby")
hash1.Add("Becky", "Ben", "Bobby")
if hash1.Get("Ben") != hash2.Get("Ben") ||
hash1.Get("Bob") != hash2.Get("Bob") ||
hash1.Get("Bonny") != hash2.Get("Bonny") {
t.Errorf("Direct matches should always return the same entry")
}
}
func TestDistribution(t *testing.T) {
hosts := []string{"a.svc.local", "b.svc.local", "c.svc.local"}
rand.Seed(time.Now().Unix())
const cases = 10000
strings := make([]string, cases)
for i := 0; i < cases; i++ {
r := rand.Int31()
ip := net.IPv4(192, byte(r>>16), byte(r>>8), byte(r))
strings[i] = ip.String()
}
hashFuncs := map[string]Hash{
"fasthash/fnv1": fnv1.HashBytes64,
}
for name, hashFunc := range hashFuncs {
t.Run(name, func(t *testing.T) {
hash := New(512, hashFunc)
hostMap := map[string]int{}
for _, host := range hosts {
hash.Add(host)
hostMap[host] = 0
}
for i := range strings {
host := hash.Get(strings[i])
hostMap[host]++
}
for host, a := range hostMap {
t.Logf("host: %s, percent: %f", host, float64(a)/cases)
}
})
}
}
func BenchmarkGet8(b *testing.B) { benchmarkGet(b, 8) }

1
go.mod
View File

@ -2,6 +2,7 @@ module github.com/mailgun/groupcache/v2
require (
github.com/golang/protobuf v1.3.1
github.com/segmentio/fasthash v1.0.3
github.com/sirupsen/logrus v1.6.0
)

2
go.sum
View File

@ -12,3 +12,5 @@ github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/pU5OE2C0WrNTOYK1Uuc=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
github.com/segmentio/fasthash v1.0.3 h1:EI9+KE1EwvMLBWwjpRDc+fEM+prwxDYbslddQGtrmhM=
github.com/segmentio/fasthash v1.0.3/go.mod h1:waKX8l2N8yckOgmSsXJi7x1ZfdKZ4x7KRMzBtS3oedY=