Skip to content

Commit

Permalink
chore: Separation of node health and initialization state from rln_re…
Browse files Browse the repository at this point in the history
…lay (#2612)

* Separation of node health and initialization state from rln_relay status. Make (only) health endpoint avail early and install others in the last stage of node setup.

* Proper json report from /health, adjusted and fixed test, added convenient script for checking node health

* Stop wakunode2 if configured rest server cannot be started

* Fix wakuRlnRelay protocol existence check

* Fix typo

* Removed unused imports from touched files.

* Added missing /health test for all
  • Loading branch information
NagyZoltanPeter authored Apr 23, 2024
1 parent 1a23700 commit 6d135b0
Show file tree
Hide file tree
Showing 10 changed files with 391 additions and 103 deletions.
75 changes: 48 additions & 27 deletions apps/wakunode2/wakunode2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ import
../../waku/common/logging,
../../waku/factory/external_config,
../../waku/factory/networks_config,
../../waku/factory/app
../../waku/factory/app,
../../waku/node/health_monitor

logScope:
topics = "wakunode main"
Expand Down Expand Up @@ -88,54 +89,74 @@ when isMainModule:
doInspectRlnDb(conf)
of noCommand:
case conf.clusterId
# cluster-id=0
of 0:
let clusterZeroConf = ClusterConf.ClusterZeroConf()
conf.pubsubTopics = clusterZeroConf.pubsubTopics
# TODO: Write some template to "merge" the configs
# cluster-id=1 (aka The Waku Network)
of 1:
let twnClusterConf = ClusterConf.TheWakuNetworkConf()
if len(conf.shards) != 0:
conf.pubsubTopics = conf.shards.mapIt(twnClusterConf.pubsubTopics[it.uint16])
else:
conf.pubsubTopics = twnClusterConf.pubsubTopics

# Override configuration
conf.maxMessageSize = twnClusterConf.maxMessageSize
conf.clusterId = twnClusterConf.clusterId
conf.rlnRelay = twnClusterConf.rlnRelay
conf.rlnRelayEthContractAddress = twnClusterConf.rlnRelayEthContractAddress
conf.rlnRelayDynamic = twnClusterConf.rlnRelayDynamic
conf.rlnRelayBandwidthThreshold = twnClusterConf.rlnRelayBandwidthThreshold
conf.discv5Discovery = twnClusterConf.discv5Discovery
conf.discv5BootstrapNodes =
conf.discv5BootstrapNodes & twnClusterConf.discv5BootstrapNodes
conf.rlnEpochSizeSec = twnClusterConf.rlnEpochSizeSec
conf.rlnRelayUserMessageLimit = twnClusterConf.rlnRelayUserMessageLimit
# cluster-id=0
of 0:
let clusterZeroConf = ClusterConf.ClusterZeroConf()
conf.pubsubTopics = clusterZeroConf.pubsubTopics
# TODO: Write some template to "merge" the configs
# cluster-id=1 (aka The Waku Network)
of 1:
let twnClusterConf = ClusterConf.TheWakuNetworkConf()
if len(conf.shards) != 0:
conf.pubsubTopics = conf.shards.mapIt(twnClusterConf.pubsubTopics[it.uint16])
else:
discard
conf.pubsubTopics = twnClusterConf.pubsubTopics

# Override configuration
conf.maxMessageSize = twnClusterConf.maxMessageSize
conf.clusterId = twnClusterConf.clusterId
conf.rlnRelay = twnClusterConf.rlnRelay
conf.rlnRelayEthContractAddress = twnClusterConf.rlnRelayEthContractAddress
conf.rlnRelayDynamic = twnClusterConf.rlnRelayDynamic
conf.rlnRelayBandwidthThreshold = twnClusterConf.rlnRelayBandwidthThreshold
conf.discv5Discovery = twnClusterConf.discv5Discovery
conf.discv5BootstrapNodes =
conf.discv5BootstrapNodes & twnClusterConf.discv5BootstrapNodes
conf.rlnEpochSizeSec = twnClusterConf.rlnEpochSizeSec
conf.rlnRelayUserMessageLimit = twnClusterConf.rlnRelayUserMessageLimit
else:
discard

info "Running nwaku node", version = app.git_version
logConfig(conf)

# NOTE: {.threadvar.} is used to make the global variable GC safe for the closure uses it
# It will always be called from main thread anyway.
# Ref: https://nim-lang.org/docs/manual.html#threads-gc-safety
var nodeHealthMonitor {.threadvar.}: WakuNodeHealthMonitor
nodeHealthMonitor = WakuNodeHealthMonitor()
nodeHealthMonitor.setOverallHealth(HealthStatus.INITIALIZING)

let restServerRes = startRestServerEsentials(nodeHealthMonitor, conf)
if restServerRes.isErr():
error "Starting REST server failed.", error = $restServerRes.error()
quit(QuitFailure)

var wakunode2 = App.init(conf).valueOr:
error "App initialization failed", error = error
quit(QuitFailure)

nodeHealthMonitor.setNode(wakunode2.node)

wakunode2.startApp().isOkOr:
error "Starting app failed", error = error
quit(QuitFailure)

if conf.rest and not restServerRes.isErr():
wakunode2.restServer = restServerRes.value

wakunode2.setupMonitoringAndExternalInterfaces().isOkOr:
error "Starting monitoring and external interfaces failed", error = error
quit(QuitFailure)

nodeHealthMonitor.setOverallHealth(HealthStatus.READY)

debug "Setting up shutdown hooks"
## Setup shutdown hooks for this process.
## Stop node gracefully on shutdown.

proc asyncStopper(node: App) {.async: (raises: [Exception]).} =
nodeHealthMonitor.setOverallHealth(HealthStatus.SHUTTING_DOWN)
await node.stop()
quit(QuitSuccess)

Expand Down
55 changes: 55 additions & 0 deletions scripts/chkhealth.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/usr/bin/env bash

# optional argument to specgify the ip address
ip_address=$1
plain_text_out=false

# Parse command line arguments
POSITIONAL_ARGS=()

while [[ $# -gt 0 ]]; do
case $1 in
-p|--plain)
plain_text_out=true
shift # past argument
;;
-*|--*)
echo "Unknown option $1"
exit 1
;;
*)
POSITIONAL_ARGS+=("$1") # save positional arg
shift # past argument
;;
esac
done

set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters

# Check if an IP address is provided as an argument
if [[ -n "$1" ]]; then
ip_address="$1"
else
ip_address="localhost:8645"
fi

# check if curl is available
if ! command -v curl &> /dev/null
then
echo "curl could not be found"
exit 1
fi

response=$(curl -s GET http://${ip_address}/health)

if [[ -z "${response}" ]]; then
echo -e "$(date +'%H:%M:%S')\tnode health status is: unknown\n"
exit 1
fi

if ! command -v jq &> /dev/null || [[ "$plain_text_out" = true ]]; then
echo -e "$(date +'%H:%M:%S')\tnode health status is: ${response}\n"
else
echo -e "$(date +'%H:%M:%S')\tnode health status is:\n"
echo "${response}" | jq .
fi
3 changes: 2 additions & 1 deletion tests/all_tests_waku.nim
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ import
./wakunode_rest/test_rest_filter,
./wakunode_rest/test_rest_lightpush,
./wakunode_rest/test_rest_admin,
./wakunode_rest/test_rest_cors
./wakunode_rest/test_rest_cors,
./wakunode_rest/test_rest_health

import ./waku_rln_relay/test_all

Expand Down
25 changes: 17 additions & 8 deletions tests/wakunode_rest/test_rest_health.nim
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import
../../waku/waku_api/rest/health/handlers as health_api,
../../waku/waku_api/rest/health/client as health_api_client,
../../waku/waku_rln_relay,
../../waku/node/health_monitor,
../testlib/common,
../testlib/testutils,
../testlib/wakucore,
Expand All @@ -35,17 +36,20 @@ proc testWakuNode(): WakuNode =

suite "Waku v2 REST API - health":
# TODO: better test for health
xasyncTest "Get node health info - GET /health":
asyncTest "Get node health info - GET /health":
# Given
let node = testWakuNode()
let healthMonitor = WakuNodeHealthMonitor()
await node.start()
await node.mountRelay()

healthMonitor.setOverallHealth(HealthStatus.INITIALIZING)

let restPort = Port(58001)
let restAddress = parseIpAddress("0.0.0.0")
let restServer = WakuRestServerRef.init(restAddress, restPort).tryGet()

installHealthApiHandler(restServer.router, node)
installHealthApiHandler(restServer.router, healthMonitor)
restServer.start()
let client = newRestHttpClient(initTAddress(restAddress, restPort))

Expand All @@ -54,9 +58,10 @@ suite "Waku v2 REST API - health":

# Then
check:
response.status == 503
$response.contentType == $MIMETYPE_TEXT
response.data == "Node is not ready"
response.status == 200
$response.contentType == $MIMETYPE_JSON
response.data ==
HealthReport(nodeHealth: HealthStatus.INITIALIZING, protocolsHealth: @[])

# now kick in rln (currently the only check for health)
await node.mountRlnRelay(
Expand All @@ -67,15 +72,19 @@ suite "Waku v2 REST API - health":
rlnRelayTreePath: genTempPath("rln_tree", "wakunode"),
)
)

healthMonitor.setNode(node)
healthMonitor.setOverallHealth(HealthStatus.READY)
# When
response = await client.healthCheck()

# Then
check:
response.status == 200
$response.contentType == $MIMETYPE_TEXT
response.data == "Node is healthy"
$response.contentType == $MIMETYPE_JSON
response.data.nodeHealth == HealthStatus.READY
response.data.protocolsHealth.len() == 1
response.data.protocolsHealth[0].protocol == "Rln Relay"
response.data.protocolsHealth[0].health == HealthStatus.READY

await restServer.stop()
await restServer.closeWait()
Expand Down
Loading

0 comments on commit 6d135b0

Please sign in to comment.