From 72666c84d299423e672e13f061a2ef9e7131a169 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Wed, 5 Feb 2025 22:32:40 +0100 Subject: [PATCH] Some performance optimizations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Avoid creating new `reflect.Value`s for common int, uint and bool types. * Use `io.WriteString` to write strings. This uses `io.StringWriter` if it exists, which is implemented by e.g. github.com/cespare/xxhash. Compared to master: ``` goos: darwin goarch: arm64 pkg: github.com/gohugoio/hashstructure cpu: Apple M1 Pro │ cmpmaster.bench │ perf-20250205.bench │ │ sec/op │ sec/op vs base │ Map-10 1.963µ ± 17% 1.291µ ± 13% -34.23% (p=0.002 n=6) String/default-10 80.90n ± 1% 84.41n ± 0% +4.34% (p=0.002 n=6) String/xxhash-10 54.83n ± 1% 40.12n ± 0% -26.82% (p=0.002 n=6) geomean 205.7n 163.5n -20.52% │ cmpmaster.bench │ perf-20250205.bench │ │ B/op │ B/op vs base │ Map-10 573.5 ± 22% 382.0 ± 13% -33.39% (p=0.002 n=6) String/default-10 56.00 ± 0% 56.00 ± 0% ~ (p=1.000 n=6) ¹ String/xxhash-10 48.00 ± 0% 16.00 ± 0% -66.67% (p=0.002 n=6) geomean 115.5 69.95 -39.45% ¹ all samples are equal │ cmpmaster.bench │ perf-20250205.bench │ │ allocs/op │ allocs/op vs base │ Map-10 57.50 ± 20% 37.00 ± 14% -35.65% (p=0.002 n=6) String/default-10 3.000 ± 0% 3.000 ± 0% ~ (p=1.000 n=6) ¹ String/xxhash-10 2.000 ± 0% 1.000 ± 0% -50.00% (p=0.002 n=6) geomean 7.014 4.806 -31.48% ``` Compared to `mitchellh/hashstructure`: ``` goos: darwin goarch: arm64 pkg: github.com/gohugoio/hashstructure cpu: Apple M1 Pro │ cmpfork.bench │ perf-20250205.bench │ │ sec/op │ sec/op vs base │ Map-10 2.789µ ± 6% 1.292µ ± 41% -53.69% (p=0.002 n=6) String/default-10 83.45n ± 0% 87.36n ± 1% +4.69% (p=0.002 n=6) String/xxhash-10 56.19n ± 0% 41.59n ± 1% -25.98% (p=0.002 n=6) geomean 235.6n 167.4n -28.94% │ cmpfork.bench │ perf-20250205.bench │ │ B/op │ B/op vs base │ Map-10 1461.0 ± 6% 393.0 ± 27% -73.10% (p=0.002 n=6) String/default-10 56.00 ± 0% 56.00 ± 0% ~ (p=1.000 n=6) ¹ String/xxhash-10 48.00 ± 0% 16.00 ± 0% -66.67% (p=0.002 n=6) geomean 157.8 70.62 -55.24% ¹ all samples are equal │ cmpfork.bench │ perf-20250205.bench │ │ allocs/op │ allocs/op vs base │ Map-10 87.50 ± 9% 36.50 ± 40% -58.29% (p=0.002 n=6) String/default-10 3.000 ± 0% 3.000 ± 0% ~ (p=1.000 n=6) ¹ String/xxhash-10 2.000 ± 0% 1.000 ± 0% -50.00% (p=0.002 n=6) geomean 8.067 4.784 -40.70% ```` --- go.mod | 2 ++ go.sum | 2 ++ hashstructure.go | 54 ++++++++++++++++++++++++++++--------------- hashstructure_test.go | 19 +++++++++++++++ 4 files changed, 58 insertions(+), 19 deletions(-) create mode 100644 go.sum diff --git a/go.mod b/go.mod index 7d12202..093fcaa 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,5 @@ module github.com/gohugoio/hashstructure go 1.18 + +require github.com/cespare/xxhash/v2 v2.3.0 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..1987830 --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= diff --git a/hashstructure.go b/hashstructure.go index 29b4bc4..fe5ffd2 100644 --- a/hashstructure.go +++ b/hashstructure.go @@ -5,6 +5,7 @@ import ( "fmt" "hash" "hash/fnv" + "io" "reflect" "time" ) @@ -122,6 +123,13 @@ type visitOpts struct { var timeType = reflect.TypeOf(time.Time{}) +// A direct hash calculation used for numeric and bool values. +func (w *walker) hashDirect(v any) (uint64, error) { + w.h.Reset() + err := binary.Write(w.h, binary.LittleEndian, v) + return w.h.Sum64(), err +} + func (w *walker) visit(v reflect.Value, opts *visitOpts) (uint64, error) { t := reflect.TypeOf(0) @@ -152,29 +160,34 @@ func (w *walker) visit(v reflect.Value, opts *visitOpts) (uint64, error) { v = reflect.Zero(t) } - // Binary writing can use raw ints, we have to convert to - // a sized-int, we'll choose the largest... - switch v.Kind() { - case reflect.Int: - v = reflect.ValueOf(int64(v.Int())) - case reflect.Uint: - v = reflect.ValueOf(uint64(v.Uint())) - case reflect.Bool: - var tmp int8 - if v.Bool() { - tmp = 1 + if v.CanInt() { + if v.Kind() == reflect.Int { + // binary.Write requires a fixed-size value. + return w.hashDirect(v.Int()) } - v = reflect.ValueOf(tmp) + return w.hashDirect(v.Interface()) + } + + if v.CanUint() { + if v.Kind() == reflect.Uint { + // binary.Write requires a fixed-size value. + return w.hashDirect(v.Uint()) + } + return w.hashDirect(v.Interface()) + } + + if v.CanFloat() { + return w.hashDirect(v.Interface()) } k := v.Kind() - // We can shortcut numeric values by directly binary writing them - if k >= reflect.Int && k <= reflect.Complex64 { - // A direct hash calculation - w.h.Reset() - err := binary.Write(w.h, binary.LittleEndian, v.Interface()) - return w.h.Sum64(), err + if k == reflect.Bool { + var tmp int8 + if v.Bool() { + tmp = 1 + } + return w.hashDirect(tmp) } switch v.Type() { @@ -394,7 +407,10 @@ func (w *walker) visit(v reflect.Value, opts *visitOpts) (uint64, error) { case reflect.String: // Directly hash w.h.Reset() - _, err := w.h.Write([]byte(v.String())) + + // io.WriteString uses io.StringWriter if it exists, which is + // implemented by e.g. github.com/cespare/xxhash. + _, err := io.WriteString(w.h, v.String()) return w.h.Sum64(), err default: diff --git a/hashstructure_test.go b/hashstructure_test.go index 0bc6909..6a5e186 100644 --- a/hashstructure_test.go +++ b/hashstructure_test.go @@ -5,6 +5,8 @@ import ( "strings" "testing" "time" + + "github.com/cespare/xxhash/v2" ) func TestHash_identity(t *testing.T) { @@ -727,6 +729,7 @@ func TestHash_golden(t *testing.T) { In: int64(42), Expect: 11375694726533372055, }, + { In: uint16(42), Expect: 590708257076254031, @@ -846,6 +849,22 @@ func BenchmarkMap(b *testing.B) { } } +func BenchmarkString(b *testing.B) { + s := "lorem ipsum dolor sit amet" + b.Run("default", func(b *testing.B) { + for i := 0; i < b.N; i++ { + Hash(s, nil) + } + }) + + b.Run("xxhash", func(b *testing.B) { + opts := &HashOptions{Hasher: xxhash.New()} + for i := 0; i < b.N; i++ { + Hash(s, opts) + } + }) +} + type testIncludable struct { Value string Ignore string