Skip to content

Commit 27a931a

Browse files
committed
wire: optimize parsing for CFCheckpkt message, reduce allocs by 96%
In this commit, we optimize the decoding for the CFCheckpkt message. The old decode routine would do a fresh alloc for each hash to be read out. Instead, we'll now allocate enough memory for the entire set of headers to be decoded, then read them into that contiguous slice, and point to members of this slice in the wire message itself. We've also added benchmarks to show the improvement: ``` ⛰ cat bench-cmp.txt goos: darwin goarch: arm64 pkg: github.com/btcsuite/btcd/wire cpu: Apple M4 Max │ bench-old.txt │ bench-new.txt │ │ sec/op │ sec/op vs base │ MsgCFCheckptDecode/headers_1000-16 14.354µ ± ∞ ¹ 6.919µ ± ∞ ¹ ~ (p=1.000 n=1) ² MsgCFCheckptDecode/headers_10000-16 146.77µ ± ∞ ¹ 70.23µ ± ∞ ¹ ~ (p=1.000 n=1) ² MsgCFCheckptDecode/headers_100000-16 1473.6µ ± ∞ ¹ 564.2µ ± ∞ ¹ ~ (p=1.000 n=1) ² MsgCFCheckptEncode/headers_1000-16 7.021µ ± ∞ ¹ 7.196µ ± ∞ ¹ ~ (p=1.000 n=1) ² MsgCFCheckptEncode/headers_10000-16 88.97µ ± ∞ ¹ 90.40µ ± ∞ ¹ ~ (p=1.000 n=1) ² MsgCFCheckptEncode/headers_100000-16 861.0µ ± ∞ ¹ 875.0µ ± ∞ ¹ ~ (p=1.000 n=1) ² MsgCFCheckptDecodeEmpty-16 68.28n ± ∞ ¹ 66.61n ± ∞ ¹ ~ (p=1.000 n=1) ² geomean 37.98µ 26.98µ -28.98% ¹ need >= 6 samples for confidence interval at level 0.95 ² need >= 4 samples to detect a difference at alpha level 0.05 │ bench-old.txt │ bench-new.txt │ │ B/op │ B/op vs base │ MsgCFCheckptDecode/headers_1000-16 39.36Ki ± ∞ ¹ 40.11Ki ± ∞ ¹ ~ (p=1.000 n=1) ² MsgCFCheckptDecode/headers_10000-16 392.6Ki ± ∞ ¹ 400.1Ki ± ∞ ¹ ~ (p=1.000 n=1) ² MsgCFCheckptDecode/headers_100000-16 3.817Mi ± ∞ ¹ 3.820Mi ± ∞ ¹ ~ (p=1.000 n=1) ² MsgCFCheckptEncode/headers_1000-16 63.98Ki ± ∞ ¹ 63.98Ki ± ∞ ¹ ~ (p=1.000 n=1) ³ MsgCFCheckptEncode/headers_10000-16 1.000Mi ± ∞ ¹ 1.000Mi ± ∞ ¹ ~ (p=1.000 n=1) ³ MsgCFCheckptEncode/headers_100000-16 8.000Mi ± ∞ ¹ 8.000Mi ± ∞ ¹ ~ (p=1.000 n=1) ³ MsgCFCheckptDecodeEmpty-16 112.0 ± ∞ ¹ 112.0 ± ∞ ¹ ~ (p=1.000 n=1) ³ geomean 166.5Ki 167.4Ki +0.55% ¹ need >= 6 samples for confidence interval at level 0.95 ² need >= 4 samples to detect a difference at alpha level 0.05 ³ all samples are equal │ bench-old.txt │ bench-new.txt │ │ allocs/op │ allocs/op vs base │ MsgCFCheckptDecode/headers_1000-16 1003.000 ± ∞ ¹ 4.000 ± ∞ ¹ ~ (p=1.000 n=1) ² MsgCFCheckptDecode/headers_10000-16 10003.000 ± ∞ ¹ 4.000 ± ∞ ¹ ~ (p=1.000 n=1) ² MsgCFCheckptDecode/headers_100000-16 100003.000 ± ∞ ¹ 4.000 ± ∞ ¹ ~ (p=1.000 n=1) ² MsgCFCheckptEncode/headers_1000-16 11.00 ± ∞ ¹ 11.00 ± ∞ ¹ ~ (p=1.000 n=1) ³ MsgCFCheckptEncode/headers_10000-16 15.00 ± ∞ ¹ 15.00 ± ∞ ¹ ~ (p=1.000 n=1) ³ MsgCFCheckptEncode/headers_100000-16 18.00 ± ∞ ¹ 18.00 ± ∞ ¹ ~ (p=1.000 n=1) ³ MsgCFCheckptDecodeEmpty-16 2.000 ± ∞ ¹ 2.000 ± ∞ ¹ ~ (p=1.000 n=1) ³ geomean 179.3 6.268 -96.50% ¹ need >= 6 samples for confidence interval at level 0.95 ² need >= 4 samples to detect a difference at alpha level 0.05 ³ all samples are equal ``` Old bench: ``` goos: darwin goarch: arm64 pkg: github.com/btcsuite/btcd/wire cpu: Apple M4 Max BenchmarkMsgCFCheckptDecode/headers_1000-16 74678 14354 ns/op 40304 B/op 1003 allocs/op BenchmarkMsgCFCheckptDecode/headers_10000-16 8234 146770 ns/op 402033 B/op 10003 allocs/op BenchmarkMsgCFCheckptDecode/headers_100000-16 822 1473622 ns/op 4002931 B/op 100003 allocs/op BenchmarkMsgCFCheckptEncode/headers_1000-16 173762 7021 ns/op 65520 B/op 11 allocs/op BenchmarkMsgCFCheckptEncode/headers_10000-16 13459 88968 ns/op 1048564 B/op 15 allocs/op BenchmarkMsgCFCheckptEncode/headers_100000-16 1399 860985 ns/op 8388592 B/op 18 allocs/op BenchmarkMsgCFCheckptDecodeEmpty-16 17459148 68.28 ns/op 112 B/op 2 allocs/op PASS ok github.com/btcsuite/btcd/wire 10.135s ``` New bench: ``` goos: darwin goarch: arm64 pkg: github.com/btcsuite/btcd/wire cpu: Apple M4 Max BenchmarkMsgCFCheckptDecode/headers_1000-16 166368 6919 ns/op 41072 B/op 4 allocs/op BenchmarkMsgCFCheckptDecode/headers_10000-16 17079 70227 ns/op 409712 B/op 4 allocs/op BenchmarkMsgCFCheckptDecode/headers_100000-16 2062 564175 ns/op 4006003 B/op 4 allocs/op BenchmarkMsgCFCheckptEncode/headers_1000-16 173940 7196 ns/op 65520 B/op 11 allocs/op BenchmarkMsgCFCheckptEncode/headers_10000-16 13054 90401 ns/op 1048564 B/op 15 allocs/op BenchmarkMsgCFCheckptEncode/headers_100000-16 1408 875012 ns/op 8388592 B/op 18 allocs/op BenchmarkMsgCFCheckptDecodeEmpty-16 17256627 66.61 ns/op 112 B/op 2 allocs/op PASS ok github.com/btcsuite/btcd/wire 10.522s ``` As seen from the benchmarks, allocs have decreased by 96%, and the decode speed by nearly 30%.
1 parent 4530538 commit 27a931a

File tree

2 files changed

+148
-11
lines changed

2 files changed

+148
-11
lines changed

wire/msgcfcheckpt.go

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@ const (
2020
// maxCFHeadersLen is the max number of filter headers we will attempt
2121
// to decode.
2222
maxCFHeadersLen = 100000
23+
24+
// maxCFCheckptPayload calculates the maximum reasonable payload size
25+
// for CF checkpoint messages.
26+
//
27+
// Calculation: 1 byte (filter type) + 32 bytes (stop hash) +
28+
// 5 bytes (max varint) + (maxCFHeadersLen * 32 bytes per hash)
29+
maxCFCheckptPayload = 1 + 32 + 5 + (maxCFHeadersLen * 32)
2330
)
2431

2532
// ErrInsaneCFHeaderCount signals that we were asked to decode an
@@ -77,16 +84,24 @@ func (msg *MsgCFCheckpt) BtcDecode(r io.Reader, pver uint32, _ MessageEncoding)
7784
return ErrInsaneCFHeaderCount
7885
}
7986

80-
// Create a contiguous slice of hashes to deserialize into in order to
81-
// reduce the number of allocations.
87+
if count == 0 {
88+
msg.FilterHeaders = make([]*chainhash.Hash, 0)
89+
return nil
90+
}
91+
92+
// Optimize memory allocation by creating a single backing array for
93+
// all hashes. This reduces GC pressure and improves cache locality.
94+
hashes := make([]chainhash.Hash, count)
8295
msg.FilterHeaders = make([]*chainhash.Hash, count)
96+
97+
// Now we'll read all the hashes directly into the backing array we've
98+
// created above. We'll then point the underlying filter header hashes
99+
// into this backing array.
83100
for i := uint64(0); i < count; i++ {
84-
var cfh chainhash.Hash
85-
_, err := io.ReadFull(r, cfh[:])
86-
if err != nil {
101+
if _, err := io.ReadFull(r, hashes[i][:]); err != nil {
87102
return err
88103
}
89-
msg.FilterHeaders[i] = &cfh
104+
msg.FilterHeaders[i] = &hashes[i]
90105
}
91106

92107
return nil
@@ -151,15 +166,19 @@ func (msg *MsgCFCheckpt) Command() string {
151166
// MaxPayloadLength returns the maximum length the payload can be for the
152167
// receiver. This is part of the Message interface implementation.
153168
func (msg *MsgCFCheckpt) MaxPayloadLength(pver uint32) uint32 {
154-
// Message size depends on the blockchain height, so return general limit
155-
// for all messages.
156-
return MaxMessagePayload
169+
// Use a more precise calculation based on the maximum number of
170+
// filter headers we support. No no reason to read more than we'll
171+
// process in BtcDecode.
172+
return maxCFCheckptPayload
157173
}
158174

159-
// NewMsgCFCheckpt returns a new bitcoin cfheaders message that conforms to
160-
// the Message interface. See MsgCFCheckpt for details.
175+
// NewMsgCFCheckpt returns a new bitcoin cfheaders message that conforms to the
176+
// Message interface. See MsgCFCheckpt for details.
161177
func NewMsgCFCheckpt(filterType FilterType, stopHash *chainhash.Hash,
162178
headersCount int) *MsgCFCheckpt {
179+
180+
// We pre-allocate with an exact capacity when count is known to avoid
181+
// slice growth during message construction.
163182
return &MsgCFCheckpt{
164183
FilterType: filterType,
165184
StopHash: *stopHash,

wire/msgcfcheckpt_bench_test.go

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
// Copyright (c) 2018 The btcsuite developers
2+
// Use of this source code is governed by an ISC
3+
// license that can be found in the LICENSE file.
4+
5+
package wire
6+
7+
import (
8+
"bytes"
9+
"fmt"
10+
"math/rand"
11+
"testing"
12+
13+
"github.com/btcsuite/btcd/chaincfg/chainhash"
14+
)
15+
16+
// BenchmarkMsgCFCheckptDecode benchmarks decoding of MsgCFCheckpt messages
17+
// to measure the performance improvements from optimized memory allocation.
18+
func BenchmarkMsgCFCheckptDecode(b *testing.B) {
19+
pver := ProtocolVersion
20+
21+
// Test with varying number of headers: 1k, 10k, 100k.
22+
headerCounts := []int{1000, 10000, 100000}
23+
24+
for _, numHeaders := range headerCounts {
25+
b.Run(fmt.Sprintf("headers_%d", numHeaders), func(b *testing.B) {
26+
var buf bytes.Buffer
27+
msg := NewMsgCFCheckpt(
28+
GCSFilterRegular, &chainhash.Hash{}, numHeaders,
29+
)
30+
31+
rng := rand.New(rand.NewSource(12345))
32+
for i := 0; i < numHeaders; i++ {
33+
hash := chainhash.Hash{}
34+
rng.Read(hash[:])
35+
msg.AddCFHeader(&hash)
36+
}
37+
38+
err := msg.BtcEncode(&buf, pver, BaseEncoding)
39+
if err != nil {
40+
b.Fatal(err)
41+
}
42+
43+
encodedMsg := buf.Bytes()
44+
45+
b.ResetTimer()
46+
b.ReportAllocs()
47+
48+
for i := 0; i < b.N; i++ {
49+
r := bytes.NewReader(encodedMsg)
50+
51+
var msg MsgCFCheckpt
52+
err := msg.BtcDecode(r, pver, BaseEncoding)
53+
if err != nil {
54+
b.Fatal(err)
55+
}
56+
}
57+
})
58+
}
59+
}
60+
61+
// BenchmarkMsgCFCheckptEncode benchmarks encoding of MsgCFCheckpt messages.
62+
func BenchmarkMsgCFCheckptEncode(b *testing.B) {
63+
pver := ProtocolVersion
64+
65+
// Test with varying number of headers: 1k, 10k, 100k.
66+
headerCounts := []int{1000, 10000, 100000}
67+
68+
for _, numHeaders := range headerCounts {
69+
b.Run(fmt.Sprintf("headers_%d", numHeaders), func(b *testing.B) {
70+
msg := NewMsgCFCheckpt(
71+
GCSFilterRegular, &chainhash.Hash{}, numHeaders,
72+
)
73+
74+
rng := rand.New(rand.NewSource(12345))
75+
for i := 0; i < numHeaders; i++ {
76+
hash := chainhash.Hash{}
77+
rng.Read(hash[:])
78+
msg.AddCFHeader(&hash)
79+
}
80+
81+
b.ResetTimer()
82+
b.ReportAllocs()
83+
84+
for i := 0; i < b.N; i++ {
85+
var buf bytes.Buffer
86+
err := msg.BtcEncode(&buf, pver, BaseEncoding)
87+
if err != nil {
88+
b.Fatal(err)
89+
}
90+
}
91+
})
92+
}
93+
}
94+
95+
// BenchmarkMsgCFCheckptDecodeEmpty benchmarks decoding empty checkpoint
96+
// messages to ensure edge cases are handled efficiently.
97+
func BenchmarkMsgCFCheckptDecodeEmpty(b *testing.B) {
98+
pver := ProtocolVersion
99+
100+
var buf bytes.Buffer
101+
msg := NewMsgCFCheckpt(GCSFilterRegular, &chainhash.Hash{}, 0)
102+
if err := msg.BtcEncode(&buf, pver, BaseEncoding); err != nil {
103+
b.Fatal(err)
104+
}
105+
encodedMsg := buf.Bytes()
106+
107+
b.ResetTimer()
108+
b.ReportAllocs()
109+
110+
for i := 0; i < b.N; i++ {
111+
r := bytes.NewReader(encodedMsg)
112+
var msg MsgCFCheckpt
113+
if err := msg.BtcDecode(r, pver, BaseEncoding); err != nil {
114+
b.Fatal(err)
115+
}
116+
}
117+
}
118+

0 commit comments

Comments
 (0)