Skip to content

Commit 6d135b0

Browse files
chore: Separation of node health and initialization state from rln_relay (#2612)
* Separation of node health and initialization state from rln_relay status. Make (only) health endpoint avail early and install others in the last stage of node setup. * Proper json report from /health, adjusted and fixed test, added convenient script for checking node health * Stop wakunode2 if configured rest server cannot be started * Fix wakuRlnRelay protocol existence check * Fix typo * Removed unused imports from touched files. * Added missing /health test for all
1 parent 1a23700 commit 6d135b0

File tree

10 files changed

+391
-103
lines changed

10 files changed

+391
-103
lines changed

apps/wakunode2/wakunode2.nim

Lines changed: 48 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ import
1818
../../waku/common/logging,
1919
../../waku/factory/external_config,
2020
../../waku/factory/networks_config,
21-
../../waku/factory/app
21+
../../waku/factory/app,
22+
../../waku/node/health_monitor
2223

2324
logScope:
2425
topics = "wakunode main"
@@ -88,54 +89,74 @@ when isMainModule:
8889
doInspectRlnDb(conf)
8990
of noCommand:
9091
case conf.clusterId
91-
# cluster-id=0
92-
of 0:
93-
let clusterZeroConf = ClusterConf.ClusterZeroConf()
94-
conf.pubsubTopics = clusterZeroConf.pubsubTopics
95-
# TODO: Write some template to "merge" the configs
96-
# cluster-id=1 (aka The Waku Network)
97-
of 1:
98-
let twnClusterConf = ClusterConf.TheWakuNetworkConf()
99-
if len(conf.shards) != 0:
100-
conf.pubsubTopics = conf.shards.mapIt(twnClusterConf.pubsubTopics[it.uint16])
101-
else:
102-
conf.pubsubTopics = twnClusterConf.pubsubTopics
103-
104-
# Override configuration
105-
conf.maxMessageSize = twnClusterConf.maxMessageSize
106-
conf.clusterId = twnClusterConf.clusterId
107-
conf.rlnRelay = twnClusterConf.rlnRelay
108-
conf.rlnRelayEthContractAddress = twnClusterConf.rlnRelayEthContractAddress
109-
conf.rlnRelayDynamic = twnClusterConf.rlnRelayDynamic
110-
conf.rlnRelayBandwidthThreshold = twnClusterConf.rlnRelayBandwidthThreshold
111-
conf.discv5Discovery = twnClusterConf.discv5Discovery
112-
conf.discv5BootstrapNodes =
113-
conf.discv5BootstrapNodes & twnClusterConf.discv5BootstrapNodes
114-
conf.rlnEpochSizeSec = twnClusterConf.rlnEpochSizeSec
115-
conf.rlnRelayUserMessageLimit = twnClusterConf.rlnRelayUserMessageLimit
92+
# cluster-id=0
93+
of 0:
94+
let clusterZeroConf = ClusterConf.ClusterZeroConf()
95+
conf.pubsubTopics = clusterZeroConf.pubsubTopics
96+
# TODO: Write some template to "merge" the configs
97+
# cluster-id=1 (aka The Waku Network)
98+
of 1:
99+
let twnClusterConf = ClusterConf.TheWakuNetworkConf()
100+
if len(conf.shards) != 0:
101+
conf.pubsubTopics = conf.shards.mapIt(twnClusterConf.pubsubTopics[it.uint16])
116102
else:
117-
discard
103+
conf.pubsubTopics = twnClusterConf.pubsubTopics
104+
105+
# Override configuration
106+
conf.maxMessageSize = twnClusterConf.maxMessageSize
107+
conf.clusterId = twnClusterConf.clusterId
108+
conf.rlnRelay = twnClusterConf.rlnRelay
109+
conf.rlnRelayEthContractAddress = twnClusterConf.rlnRelayEthContractAddress
110+
conf.rlnRelayDynamic = twnClusterConf.rlnRelayDynamic
111+
conf.rlnRelayBandwidthThreshold = twnClusterConf.rlnRelayBandwidthThreshold
112+
conf.discv5Discovery = twnClusterConf.discv5Discovery
113+
conf.discv5BootstrapNodes =
114+
conf.discv5BootstrapNodes & twnClusterConf.discv5BootstrapNodes
115+
conf.rlnEpochSizeSec = twnClusterConf.rlnEpochSizeSec
116+
conf.rlnRelayUserMessageLimit = twnClusterConf.rlnRelayUserMessageLimit
117+
else:
118+
discard
118119

119120
info "Running nwaku node", version = app.git_version
120121
logConfig(conf)
121122

123+
# NOTE: {.threadvar.} is used to make the global variable GC safe for the closure uses it
124+
# It will always be called from main thread anyway.
125+
# Ref: https://nim-lang.org/docs/manual.html#threads-gc-safety
126+
var nodeHealthMonitor {.threadvar.}: WakuNodeHealthMonitor
127+
nodeHealthMonitor = WakuNodeHealthMonitor()
128+
nodeHealthMonitor.setOverallHealth(HealthStatus.INITIALIZING)
129+
130+
let restServerRes = startRestServerEsentials(nodeHealthMonitor, conf)
131+
if restServerRes.isErr():
132+
error "Starting REST server failed.", error = $restServerRes.error()
133+
quit(QuitFailure)
134+
122135
var wakunode2 = App.init(conf).valueOr:
123136
error "App initialization failed", error = error
124137
quit(QuitFailure)
125138

139+
nodeHealthMonitor.setNode(wakunode2.node)
140+
126141
wakunode2.startApp().isOkOr:
127142
error "Starting app failed", error = error
128143
quit(QuitFailure)
129144

145+
if conf.rest and not restServerRes.isErr():
146+
wakunode2.restServer = restServerRes.value
147+
130148
wakunode2.setupMonitoringAndExternalInterfaces().isOkOr:
131149
error "Starting monitoring and external interfaces failed", error = error
132150
quit(QuitFailure)
133151

152+
nodeHealthMonitor.setOverallHealth(HealthStatus.READY)
153+
134154
debug "Setting up shutdown hooks"
135155
## Setup shutdown hooks for this process.
136156
## Stop node gracefully on shutdown.
137157

138158
proc asyncStopper(node: App) {.async: (raises: [Exception]).} =
159+
nodeHealthMonitor.setOverallHealth(HealthStatus.SHUTTING_DOWN)
139160
await node.stop()
140161
quit(QuitSuccess)
141162

scripts/chkhealth.sh

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#!/usr/bin/env bash
2+
3+
# optional argument to specgify the ip address
4+
ip_address=$1
5+
plain_text_out=false
6+
7+
# Parse command line arguments
8+
POSITIONAL_ARGS=()
9+
10+
while [[ $# -gt 0 ]]; do
11+
case $1 in
12+
-p|--plain)
13+
plain_text_out=true
14+
shift # past argument
15+
;;
16+
-*|--*)
17+
echo "Unknown option $1"
18+
exit 1
19+
;;
20+
*)
21+
POSITIONAL_ARGS+=("$1") # save positional arg
22+
shift # past argument
23+
;;
24+
esac
25+
done
26+
27+
set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters
28+
29+
# Check if an IP address is provided as an argument
30+
if [[ -n "$1" ]]; then
31+
ip_address="$1"
32+
else
33+
ip_address="localhost:8645"
34+
fi
35+
36+
# check if curl is available
37+
if ! command -v curl &> /dev/null
38+
then
39+
echo "curl could not be found"
40+
exit 1
41+
fi
42+
43+
response=$(curl -s GET http://${ip_address}/health)
44+
45+
if [[ -z "${response}" ]]; then
46+
echo -e "$(date +'%H:%M:%S')\tnode health status is: unknown\n"
47+
exit 1
48+
fi
49+
50+
if ! command -v jq &> /dev/null || [[ "$plain_text_out" = true ]]; then
51+
echo -e "$(date +'%H:%M:%S')\tnode health status is: ${response}\n"
52+
else
53+
echo -e "$(date +'%H:%M:%S')\tnode health status is:\n"
54+
echo "${response}" | jq .
55+
fi

tests/all_tests_waku.nim

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ import
7777
./wakunode_rest/test_rest_filter,
7878
./wakunode_rest/test_rest_lightpush,
7979
./wakunode_rest/test_rest_admin,
80-
./wakunode_rest/test_rest_cors
80+
./wakunode_rest/test_rest_cors,
81+
./wakunode_rest/test_rest_health
8182

8283
import ./waku_rln_relay/test_all
8384

tests/wakunode_rest/test_rest_health.nim

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import
1919
../../waku/waku_api/rest/health/handlers as health_api,
2020
../../waku/waku_api/rest/health/client as health_api_client,
2121
../../waku/waku_rln_relay,
22+
../../waku/node/health_monitor,
2223
../testlib/common,
2324
../testlib/testutils,
2425
../testlib/wakucore,
@@ -35,17 +36,20 @@ proc testWakuNode(): WakuNode =
3536

3637
suite "Waku v2 REST API - health":
3738
# TODO: better test for health
38-
xasyncTest "Get node health info - GET /health":
39+
asyncTest "Get node health info - GET /health":
3940
# Given
4041
let node = testWakuNode()
42+
let healthMonitor = WakuNodeHealthMonitor()
4143
await node.start()
4244
await node.mountRelay()
4345

46+
healthMonitor.setOverallHealth(HealthStatus.INITIALIZING)
47+
4448
let restPort = Port(58001)
4549
let restAddress = parseIpAddress("0.0.0.0")
4650
let restServer = WakuRestServerRef.init(restAddress, restPort).tryGet()
4751

48-
installHealthApiHandler(restServer.router, node)
52+
installHealthApiHandler(restServer.router, healthMonitor)
4953
restServer.start()
5054
let client = newRestHttpClient(initTAddress(restAddress, restPort))
5155

@@ -54,9 +58,10 @@ suite "Waku v2 REST API - health":
5458

5559
# Then
5660
check:
57-
response.status == 503
58-
$response.contentType == $MIMETYPE_TEXT
59-
response.data == "Node is not ready"
61+
response.status == 200
62+
$response.contentType == $MIMETYPE_JSON
63+
response.data ==
64+
HealthReport(nodeHealth: HealthStatus.INITIALIZING, protocolsHealth: @[])
6065

6166
# now kick in rln (currently the only check for health)
6267
await node.mountRlnRelay(
@@ -67,15 +72,19 @@ suite "Waku v2 REST API - health":
6772
rlnRelayTreePath: genTempPath("rln_tree", "wakunode"),
6873
)
6974
)
70-
75+
healthMonitor.setNode(node)
76+
healthMonitor.setOverallHealth(HealthStatus.READY)
7177
# When
7278
response = await client.healthCheck()
7379

7480
# Then
7581
check:
7682
response.status == 200
77-
$response.contentType == $MIMETYPE_TEXT
78-
response.data == "Node is healthy"
83+
$response.contentType == $MIMETYPE_JSON
84+
response.data.nodeHealth == HealthStatus.READY
85+
response.data.protocolsHealth.len() == 1
86+
response.data.protocolsHealth[0].protocol == "Rln Relay"
87+
response.data.protocolsHealth[0].health == HealthStatus.READY
7988

8089
await restServer.stop()
8190
await restServer.closeWait()

0 commit comments

Comments
 (0)