-
Notifications
You must be signed in to change notification settings - Fork 443
Open
Labels
🐞 bugSomething isn't workingSomething isn't working
Milestone
Description
Sentry Nodes laggind behind validator in consensus process
Description
I have configured a network having 2 Sentry nodes and 1 validator. Beyond the default values and the values necessary to set up a node as a Sentry, here a copy of the config.toml for a Sentry node:
# Mechanism to connect to the ABCI application: socket | grpc
abci = "socket"
# Database backend: goleveldb | boltdb
# * goleveldb (github.com/syndtr/goleveldb - most popular implementation)
# - pure go
# - stable
#* boltdb (uses etcd's fork of bolt - go.etcd.io/bbolt)
# - EXPERIMENTAL
# - may be faster is some use-cases (random reads - indexer)
# - use boltdb build tag (go build -tags boltdb)
db_backend = "goleveldb"
# Database directory
db_dir = "db"
# If this node is many blocks behind the tip of the chain, FastSync
# allows them to catchup quickly by downloading blocks in parallel
# and verifying their commits
fast_sync = true
# If true, query the ABCI app on connecting to a new peer
# so the app can decide if we should keep the connection or not
filter_peers = false
home = ""
# A custom human readable name for this node
moniker = "sen1"
# Path to the JSON file containing the private key to use for node authentication in the p2p protocol
node_key_file = "secrets/node_key.json"
# Path to the JSON file containing the private key to use as a validator in the consensus protocol
priv_validator_key_file = "secrets/priv_validator_key.json"
# TCP or UNIX socket address for Tendermint to listen on for
# connections from an external PrivValidator process
priv_validator_laddr = ""
# Path to the JSON file containing the last sign state of a validator
priv_validator_state_file = "priv_validator_state.json"
# TCP or UNIX socket address for the profiling server to listen on
prof_laddr = ""
# TCP or UNIX socket address of the ABCI application,
# or the name of an ABCI application compiled in with the Tendermint binary
proxy_app = "tcp://127.0.0.1:26658"
##### consensus configuration options #####
[consensus]
# EmptyBlocks mode and possible interval between empty blocks
create_empty_blocks = true
create_empty_blocks_interval = "0s"
home = ""
# Reactor sleep duration parameters
peer_gossip_sleep_duration = "100ms"
peer_query_maj23_sleep_duration = "2s"
# Make progress as soon as we have all the precommits (as if TimeoutCommit = 0)
skip_timeout_commit = false
timeout_commit = "1s"
timeout_precommit = "1s"
timeout_precommit_delta = "500ms"
timeout_prevote = "1s"
timeout_prevote_delta = "500ms"
timeout_propose = "3s"
timeout_propose_delta = "500ms"
wal_file = "wal/cs.wal/wal"
##### mempool configuration options #####
[mempool]
broadcast = true
# Size of the cache (used to filter transactions we saw earlier) in transactions
cache_size = 10000
home = ""
# Limit the total size of all txs in the mempool.
# This only accounts for raw transactions (e.g. given 1MB transactions and
# max_txs_bytes=5MB, mempool will only accept 5 transactions).
max_pending_txs_bytes = 1073741824
recheck = true
# Maximum number of transactions in the mempool
size = 5000
wal_dir = ""
##### peer to peer configuration options #####
[p2p]
# Toggle to disable guard against peers connecting from the same ip.
allow_duplicate_ip = false
dial_timeout = "3s"
# Address to advertise to peers for them to dial
# If empty, will use the same port as the laddr,
# and will introspect on the listener or use UPnP
# to figure out the address.
external_address = ""
# Time to wait before flushing messages out on the connection
flush_throttle_timeout = "10ms"
# Peer connection configuration.
handshake_timeout = "20s"
home = ""
# Address to listen for incoming connections
laddr = "tcp://gnodevx-gnoland-sen1-0:26656"
# Maximum number of inbound peers
max_num_inbound_peers = 40
# Maximum number of outbound peers to connect to, excluding persistent peers
max_num_outbound_peers = 10
# Maximum size of a message packet payload, in bytes
max_packet_msg_payload_size = 10240
# Comma separated list of nodes to keep persistent connections to
persistent_peers = "g10kvhns4t8a49vvc8uk2rrgafk653ynx8qs4h98@gnodevx-gnoland-val1-headless.gnoland:26656,g1phpp92d4a60376yr4vpfff2q4a9gh4m8yf09hr@gnodevx-gnoland-sen2-headless.gnoland:26656"
# Set true to enable the peer-exchange reactor
pex = true
# Comma separated list of peer IDs to keep private (will not be gossiped to other peers)
private_peer_ids = "g10kvhns4t8a49vvc8uk2rrgafk653ynx8qs4h98"
# Rate at which packets can be received, in bytes/second
recv_rate = 20000000
# Seed mode, in which node constantly crawls the network and looks for
# peers. If another node asks it for addresses, it responds and disconnects.
#
# Does not work if the peer-exchange reactor is disabled.
seed_mode = false
# Issue: https://github.com/gnolang/gno/issues/2308
# Comma separated list of seed nodes to connect to
seeds = "g1phpp92d4a60376yr4vpfff2q4a9gh4m8yf09hr@gnodevx-gnoland-sen2-headless.gnoland:26656"
# Rate at which packets can be sent, in bytes/second
send_rate = 20000000
test_dial_fail = false
test_fuzz = false
# UPNP port forwarding
upnp = false
[p2p.test_fuzz_config]
MaxDelay = "3s"
Mode = 0
ProbDropConn = 0.0
ProbDropRW = 0.2
ProbSleep = 0.0
##### rpc server configuration options #####
[rpc]
# A list of non simple headers the client is allowed to use with cross-domain requests
cors_allowed_headers = ["Origin", "Accept", "Content-Type", "X-Requested-With", "X-Server-Time"]
# A list of methods the client is allowed to use with cross-domain requests
cors_allowed_methods = ["HEAD", "GET", "POST", "OPTIONS"]
# A list of origins a cross-domain request can be executed from
# Default value '[]' disables cors support
# Use '["*"]' to allow any origin
cors_allowed_origins = ["*"]
# TCP or UNIX socket address for the gRPC server to listen on
# NOTE: This server only supports /broadcast_tx_commit
grpc_laddr = ""
# Maximum number of simultaneous connections.
# Does not include RPC (HTTP&WebSocket) connections. See max_open_connections
# If you want to accept a larger number than the default, make sure
# you increase your OS limits.
# 0 - unlimited.
# Should be < {ulimit -Sn} - {MaxNumInboundPeers} - {MaxNumOutboundPeers} - {N of wal, db and other open files}
# 1024 - 40 - 10 - 50 = 924 = ~900
grpc_max_open_connections = 900
home = ""
# TCP or UNIX socket address for the RPC server to listen on
laddr = "tcp://gnodevx-gnoland-sen1-0:26657"
# Maximum size of request body, in bytes
max_body_bytes = 1000000
# Maximum size of request header, in bytes
max_header_bytes = 1048576
# Maximum number of simultaneous connections (including WebSocket).
# Does not include gRPC connections. See grpc_max_open_connections
# If you want to accept a larger number than the default, make sure
# you increase your OS limits.
# 0 - unlimited.
# Should be < {ulimit -Sn} - {MaxNumInboundPeers} - {MaxNumOutboundPeers} - {N of wal, db and other open files}
# 1024 - 40 - 10 - 50 = 924 = ~900
max_open_connections = 900
# How long to wait for a tx to be committed during /broadcast_tx_commit.
# WARNING: Using a value larger than 10s will result in increasing the
# global HTTP write timeout, which applies to all connections and endpoints.
# See https://github.com/tendermint/classic/issues/3435
timeout_broadcast_tx_commit = "10s"
# The path to a file containing certificate that is used to create the HTTPS server.
# Might be either absolute path or path related to tendermint's config directory.
# If the certificate is signed by a certificate authority,
# the certFile should be the concatenation of the server's certificate, any intermediates,
# and the CA's certificate.
# NOTE: both tls_cert_file and tls_key_file must be present for Tendermint to create HTTPS server. Otherwise, HTTP server is run.
tls_cert_file = ""
# The path to a file containing matching private key that is used to create the HTTPS server.
# Might be either absolute path or path related to tendermint's config directory.
# NOTE: both tls_cert_file and tls_key_file must be present for Tendermint to create HTTPS server. Otherwise, HTTP server is run.
tls_key_file = ""
# Activate unsafe RPC commands like /dial_seeds and /unsafe_flush_mempool
unsafe = false
##### node telemetry #####
[telemetry]
enabled = true
# the endpoint to export metrics to, like a local OpenTelemetry collector
exporter_endpoint = "grafana-k8s-monitoring-alloy.grafana-system.svc.cluster.local:4317"
meter_name = "gnodevx"
service_name = "gnodevx"
##### event store #####
[tx_event_store]
# Type of event store
event_store_type = "none"
# Event store parameters
[tx_event_store.event_store_params]The Sentry nodes, which run as a full-node, are left behind compared to the validator.
Your environment
- v1.29.5-gke.1060000
- 0.1.0-d2d34eb6-nightly
- branch that causes this issue (with the commit hash)
Steps to reproduce
Spin up a network from scratch crafting a genesis.
- Spin up 2 Sentry nodes
- Spin up 1 Validator
- Observe in the Sentry nodes, the node reporting the error
Expected behaviour
Sentry nodes will commit newer blocks consequently at the validator commiting blocks after consensus.
Actual behaviour
The validator gossipes the transactions and finishes consensus faster than the sentry. The sentry gets the commit but not yet the block so it cannot continue because it does not know the block.
Logs
2024-06-25T09:22:49.346Z INFO Commit is for a block we don't know about. Set ProposalBlock=nil {"module": "consensus", "height": 69291, "commitRound": 0, "proposal": "", "commit": "XVJ+juEVDEzxz9rhDz9WyiHHnKGhMuwqWUrvY2ob30w="}
2024-06-25T09:22:49.346Z INFO Attempt to finalize failed. We don't have the commit block. {"module": "consensus", "height": 69291, "proposal-block": "", "commit-block": "XVJ+juEVDEzxz9rhDz9WyiHHnKGhMuwqWUrvY2ob30w="}
Proposed solution
r3v4s
Metadata
Metadata
Assignees
Labels
🐞 bugSomething isn't workingSomething isn't working
Type
Projects
Status
Backlog