Skip to content

Commit b739d2d

Browse files
authored
feat: track performance of processFinalizedCheckpoint() (#8530)
**Motivation** - investigate and maintain the performance of `processFinalizedCheckpoint()` - this is part of #8526 **Description** - track duration of `processFinalizedCheckpoint()` by tasks, the result on a hoodi node, it shows that `FrequencyStateArchiveStrategy` takes the most time <img width="941" height="297" alt="Screenshot 2025-10-14 at 13 45 38" src="https://github.com/user-attachments/assets/ef440399-538b-4a4a-a63c-e775745b25e6" /> - track different steps of `FrequencyStateArchiveStrategy`, the result shows that the mainthread is blocked by different db queries cc @wemeetagain <img width="1291" height="657" alt="Screenshot 2025-10-14 at 13 46 36" src="https://github.com/user-attachments/assets/3b19f008-c7d8-49a4-9dc5-e68b1a5ba2a5" /> part of #8526 --------- Co-authored-by: Tuyen Nguyen <[email protected]>
1 parent c7f3e8d commit b739d2d

File tree

3 files changed

+71
-4
lines changed

3 files changed

+71
-4
lines changed

packages/beacon-node/src/chain/archiveStore/archiveStore.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,16 @@ type ArchiveStoreModules = {
2525

2626
type ArchiveStoreInitOpts = ArchiveStoreOpts & {dbName: string; anchorState: {finalizedCheckpoint: Checkpoint}};
2727

28+
export enum ArchiveStoreTask {
29+
ArchiveBlocks = "archive_blocks",
30+
PruneHistory = "prune_history",
31+
OnFinalizedCheckpoint = "on_finalized_checkpoint",
32+
MaybeArchiveState = "maybe_archive_state",
33+
RegenPruneOnFinalized = "regen_prune_on_finalized",
34+
ForkchoicePrune = "forkchoice_prune",
35+
UpdateBackfillRange = "update_backfill_range",
36+
}
37+
2838
/**
2939
* Used for running tasks that depends on some events or are executed
3040
* periodically.
@@ -176,6 +186,8 @@ export class ArchiveStore {
176186
try {
177187
const finalizedEpoch = finalized.epoch;
178188
this.logger.verbose("Start processing finalized checkpoint", {epoch: finalizedEpoch, rootHex: finalized.rootHex});
189+
190+
let timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
179191
await archiveBlocks(
180192
this.chain.config,
181193
this.db,
@@ -188,7 +200,10 @@ export class ArchiveStore {
188200
this.chain.opts.persistOrphanedBlocks,
189201
this.chain.opts.persistOrphanedBlocksDir
190202
);
203+
timer?.({source: ArchiveStoreTask.ArchiveBlocks});
204+
191205
if (this.opts.pruneHistory) {
206+
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
192207
await pruneHistory(
193208
this.chain.config,
194209
this.db,
@@ -197,18 +212,30 @@ export class ArchiveStore {
197212
finalizedEpoch,
198213
this.chain.clock.currentEpoch
199214
);
215+
timer?.({source: ArchiveStoreTask.PruneHistory});
200216
}
201217

218+
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
202219
await this.statesArchiverStrategy.onFinalizedCheckpoint(finalized, this.metrics);
220+
timer?.({source: ArchiveStoreTask.OnFinalizedCheckpoint});
203221

204222
// should be after ArchiveBlocksTask to handle restart cleanly
223+
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
205224
await this.statesArchiverStrategy.maybeArchiveState(finalized, this.metrics);
225+
timer?.({source: ArchiveStoreTask.MaybeArchiveState});
206226

227+
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
207228
this.chain.regen.pruneOnFinalized(finalizedEpoch);
229+
timer?.({source: ArchiveStoreTask.RegenPruneOnFinalized});
208230

209231
// tasks rely on extended fork choice
232+
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
210233
const prunedBlocks = this.chain.forkChoice.prune(finalized.rootHex);
234+
timer?.({source: ArchiveStoreTask.ForkchoicePrune});
235+
236+
timer = this.metrics?.processFinalizedCheckpoint.durationByTask.startTimer();
211237
await updateBackfillRange({chain: this.chain, db: this.db, logger: this.logger}, finalized);
238+
timer?.({source: ArchiveStoreTask.UpdateBackfillRange});
212239

213240
this.logger.verbose("Finish processing finalized checkpoint", {
214241
epoch: finalizedEpoch,

packages/beacon-node/src/chain/archiveStore/strategies/frequencyStateArchiveStrategy.ts

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,15 @@ import {StateArchiveStrategy, StatesArchiveOpts} from "../interface.js";
1717
*/
1818
export const PERSIST_TEMP_STATE_EVERY_EPOCHS = 32;
1919

20+
export enum FrequencyStateArchiveStep {
21+
LoadLastStoredSlot = "load_last_stored_slot",
22+
GetFinalizedState = "get_finalized_state",
23+
// SerializeState is tracked via stateSerializeDuration metric
24+
PersistState = "persist_state",
25+
LoadStoredSlotsToDelete = "load_stored_slots_to_delete",
26+
DeleteOldStates = "delete_old_states",
27+
}
28+
2029
/**
2130
* Archives finalized states from active bucket to archive bucket.
2231
*
@@ -47,7 +56,10 @@ export class FrequencyStateArchiveStrategy implements StateArchiveStrategy {
4756
* ```
4857
*/
4958
async maybeArchiveState(finalized: CheckpointWithHex, metrics?: Metrics | null): Promise<void> {
59+
let timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
5060
const lastStoredSlot = await this.db.stateArchive.lastKey();
61+
timer?.({step: FrequencyStateArchiveStep.LoadLastStoredSlot});
62+
5163
const lastStoredEpoch = computeEpochAtSlot(lastStoredSlot ?? 0);
5264
const {archiveStateEpochFrequency} = this.opts;
5365

@@ -60,15 +72,19 @@ export class FrequencyStateArchiveStrategy implements StateArchiveStrategy {
6072
(Math.floor(finalized.epoch / archiveStateEpochFrequency) - 1) * archiveStateEpochFrequency
6173
);
6274

75+
timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
6376
const storedStateSlots = await this.db.stateArchive.keys({
6477
lt: computeStartSlotAtEpoch(finalized.epoch),
6578
gte: computeStartSlotAtEpoch(minEpoch),
6679
});
80+
timer?.({step: FrequencyStateArchiveStep.LoadStoredSlotsToDelete});
6781

6882
const statesSlotsToDelete = computeStateSlotsToDelete(storedStateSlots, archiveStateEpochFrequency);
83+
timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
6984
if (statesSlotsToDelete.length > 0) {
7085
await this.db.stateArchive.batchDelete(statesSlotsToDelete);
7186
}
87+
timer?.({step: FrequencyStateArchiveStep.DeleteOldStates});
7288

7389
// More logs to investigate the rss spike issue https://github.com/ChainSafe/lodestar/issues/5591
7490
this.logger.verbose("Archived state completed", {
@@ -86,24 +102,31 @@ export class FrequencyStateArchiveStrategy implements StateArchiveStrategy {
86102
*/
87103
async archiveState(finalized: CheckpointWithHex, metrics?: Metrics | null): Promise<void> {
88104
// starting from Mar 2024, the finalized state could be from disk or in memory
105+
let timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
89106
const finalizedStateOrBytes = await this.regen.getCheckpointStateOrBytes(finalized);
107+
timer?.({step: FrequencyStateArchiveStep.GetFinalizedState});
108+
90109
const {rootHex} = finalized;
91110
if (!finalizedStateOrBytes) {
92111
throw Error(`No state in cache for finalized checkpoint state epoch #${finalized.epoch} root ${rootHex}`);
93112
}
94113
if (finalizedStateOrBytes instanceof Uint8Array) {
95114
const slot = getStateSlotFromBytes(finalizedStateOrBytes);
115+
timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
96116
await this.db.stateArchive.putBinary(slot, finalizedStateOrBytes);
117+
timer?.({step: FrequencyStateArchiveStep.PersistState});
97118
this.logger.verbose("Archived finalized state bytes", {epoch: finalized.epoch, slot, root: rootHex});
98119
} else {
99120
// serialize state using BufferPool if provided
100-
const timer = metrics?.stateSerializeDuration.startTimer({source: AllocSource.ARCHIVE_STATE});
121+
const sszTimer = metrics?.stateSerializeDuration.startTimer({source: AllocSource.ARCHIVE_STATE});
101122
await serializeState(
102123
finalizedStateOrBytes,
103124
AllocSource.ARCHIVE_STATE,
104-
(stateBytes) => {
105-
timer?.();
106-
return this.db.stateArchive.putBinary(finalizedStateOrBytes.slot, stateBytes);
125+
async (stateBytes) => {
126+
sszTimer?.();
127+
timer = metrics?.processFinalizedCheckpoint.frequencyStateArchive.startTimer();
128+
await this.db.stateArchive.putBinary(finalizedStateOrBytes.slot, stateBytes);
129+
timer?.({step: FrequencyStateArchiveStep.PersistState});
107130
},
108131
this.bufferPool
109132
);

packages/beacon-node/src/metrics/metrics/lodestar.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
/** biome-ignore-all lint/suspicious/noTemplateCurlyInString: The metric templates requires to have `${}` in a normal string */
22
import {NotReorgedReason} from "@lodestar/fork-choice";
3+
import {ArchiveStoreTask} from "../../chain/archiveStore/archiveStore.js";
4+
import {FrequencyStateArchiveStep} from "../../chain/archiveStore/strategies/frequencyStateArchiveStrategy.js";
35
import {BlockInputSource} from "../../chain/blocks/blockInput/index.js";
46
import {JobQueueItemType} from "../../chain/bls/index.js";
57
import {AttestationErrorCode, BlockErrorCode} from "../../chain/errors/index.js";
@@ -1420,6 +1422,21 @@ export function createLodestarMetrics(
14201422
},
14211423
},
14221424

1425+
processFinalizedCheckpoint: {
1426+
durationByTask: register.histogram<{source: ArchiveStoreTask}>({
1427+
name: "lodestar_process_finalized_checkpoint_seconds",
1428+
help: "Histogram of time to process finalized checkpoint",
1429+
buckets: [0.1, 0.5, 1, 2, 4, 8],
1430+
labelNames: ["source"],
1431+
}),
1432+
frequencyStateArchive: register.histogram<{step: FrequencyStateArchiveStep}>({
1433+
name: "lodestar_process_finalized_checkpoint_frequency_state_archive_seconds",
1434+
help: "Histogram of FrequencyStateArchive duration by step",
1435+
buckets: [0.1, 0.5, 1, 2, 4, 8],
1436+
labelNames: ["step"],
1437+
}),
1438+
},
1439+
14231440
regenFnCallTotal: register.gauge<{entrypoint: RegenFnName; caller: RegenCaller}>({
14241441
name: "lodestar_regen_fn_call_total",
14251442
help: "Total number of calls for regen functions",

0 commit comments

Comments
 (0)