Skip to content

Commit 8610b62

Browse files
modify script for node updates (#444)
* modify script for node updates * handle multiple timekeepers * add names to hosts in config * Improve handling for nodes, logging, and documentation * add encryption sensitive files
1 parent 3add9bc commit 8610b62

File tree

4 files changed

+109
-41
lines changed

4 files changed

+109
-41
lines changed

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ hosts.yml filter=crypt diff=crypt merge=crypt
44
testing-framework/hetzner/network/keystore/* filter=crypt diff=crypt merge=crypt
55
*.pem filter=crypt diff=crypt merge=crypt
66
*.key filter=crypt diff=crypt merge=crypt
7+
*.toml.* filter=crypt diff=crypt merge=crypt

scripts/launch-nodes/README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Subspace Node Manager
22

3-
This script manages the deployment of Subspace nodes (RPC, Farmer, Timekeeper, and Bootstrap nodes) on multiple servers using SSH. It updates the `.env` file with the specified release version, coordinates the startup sequence, and ensures that RPC and Farmer nodes are started before the Bootstrap node, which is updated last with the correct `GENESIS_HASH`.
3+
This script manages the deployment and upgrades of Subspace nodes (RPC, Farmer, Timekeeper, Domain, and Bootstrap nodes) on multiple servers using SSH. It updates the `.env` file with the specified release version, coordinates the startup sequence, and ensures that RPC and Farmer nodes are started before the Bootstrap node, which is updated last with the correct `GENESIS_HASH`.
44

55
## Features
66

@@ -111,7 +111,8 @@ python manage_subspace.py --config nodes.toml --release_version docker-tag --sub
111111
- `--prune`: Stop containers and remove unused Docker images.
112112
- `--restart`: Restart containers without wiping data.
113113
- `--no_timekeeper`: Disable launching of the timekeeper node. This flag can be used if you do not want the script to handle the timekeeper node during execution.
114-
- `--wipe`: Wipe data for Farmer nodes before starting them while preserving identity.bin file.
114+
- `--no_farmer`: Disable launching or updating the farmer node. This flag can be used if you do not want the script to handle the farmer node during execution.
115+
- `--wipe`: Wipe the node and farmer data. It preserves the identity.bin of the farmer nodes.
115116

116117
### Step 5: Deactivate the Virtual Environment
117118

scripts/launch-nodes/manage_subspace.py

Lines changed: 99 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
# This script is designed to manage Subspace nodes by connecting to them via SSH,
2+
# executing commands to wipe data, update configurations, and manage Docker containers.
3+
# It supports various node types including farmers, RPC nodes, and timekeepers.
4+
# The script uses paramiko for SSH connections and colorlog for colored logging output.
5+
# It can handle node data wiping, Docker compose commands, and environment variable modifications.
6+
# The script is configurable via command line arguments and a TOML configuration file.
7+
# It is intended for use in a Subspace network management context, allowing administrators to maintain and update nodes efficiently.
8+
# Ensure you have the required libraries installed
9+
110
import os
211
import paramiko
312
import argparse
@@ -53,15 +62,35 @@ def run_command(client, command):
5362
logger.error(f"Failed to run command: {e}")
5463
raise
5564

65+
def wipe_node_data(client, subspace_dir):
66+
"""Wipe node data for non-farmer nodes."""
67+
try:
68+
commands = [
69+
# Remove node data directory
70+
f"cd {subspace_dir} && sudo rm -rf *",
71+
]
72+
73+
for command in commands:
74+
logger.info(f"Executing: {command}")
75+
stdout, stderr = run_command(client, command)
76+
if stderr and not any(keyword in stderr for keyword in ["No such file", "not found"]):
77+
logger.error(f"Error during node data wipe: {stderr}")
78+
79+
logger.info("Successfully wiped node data")
80+
81+
except Exception as e:
82+
logger.error(f"Failed to wipe node data: {e}")
83+
raise
84+
5685
def wipe_farmer_data(client, subspace_dir):
5786
"""Wipe farmer data while preserving identity.bin file."""
5887
try:
5988
commands = [
6089
# Create backup directory if it doesn't exist
61-
f"cd {subspace_dir} && sudo mkdir -p backup",
90+
f"cd {subspace_dir} && sudo mkdir -p ~/backup",
6291

6392
# Preserve identity.bin if it exists
64-
f"cd {subspace_dir} && if [ -f farmer_data/identity.bin ]; then sudo mv farmer_data/identity.bin backup/; fi",
93+
f"cd {subspace_dir} && if [ -f farmer_data/identity.bin ]; then sudo mv farmer_data/identity.bin ~/backup/; fi",
6594

6695
# Remove farmer_data directory with sudo
6796
f"cd {subspace_dir} && sudo rm -rf farmer_data",
@@ -70,7 +99,7 @@ def wipe_farmer_data(client, subspace_dir):
7099
f"cd {subspace_dir} && sudo mkdir -p farmer_data",
71100

72101
# Restore identity.bin if it was backed up
73-
f"cd {subspace_dir} && if [ -f backup/identity.bin ]; then sudo mv backup/identity.bin farmer_data/; fi",
102+
f"cd {subspace_dir} && if [ -f ~/backup/identity.bin ]; then sudo mv ~/backup/identity.bin farmer_data/; fi",
74103

75104
# Set proper ownership
76105
f"cd {subspace_dir} && sudo chown -R nobody:nogroup farmer_data/",
@@ -114,13 +143,13 @@ def modify_env_file(client, subspace_dir, release_version, genesis_hash=None, po
114143
raise
115144

116145
def docker_compose_down(client, subspace_dir):
117-
"""Run sudo docker compose down -v in the subspace directory."""
146+
"""Run sudo docker compose down in the subspace directory."""
118147
try:
119-
command = f'cd {subspace_dir} && sudo docker compose down -v'
120-
logger.info(f"Running sudo docker compose down -v in {subspace_dir}")
148+
command = f'cd {subspace_dir} && sudo docker compose down'
149+
logger.info(f"Running sudo docker compose down in {subspace_dir}")
121150
run_command(client, command)
122151
except Exception as e:
123-
logger.error(f"Failed to run sudo docker compose down -v: {e}")
152+
logger.error(f"Failed to run sudo docker compose down: {e}")
124153
raise
125154

126155
def docker_compose_restart(client, subspace_dir, docker_tag=None):
@@ -155,7 +184,7 @@ def docker_cleanup(client, subspace_dir):
155184
logger.info("No running containers found to stop.")
156185

157186
# Prune unused containers and images
158-
prune_cmd = f'cd {subspace_dir} && sudo docker container prune -f && sudo docker image prune -a -f'
187+
prune_cmd = f'cd {subspace_dir} && sudo docker container prune -f && sudo docker image prune -a -f && sudo docker volume prune -f'
159188
logger.info(f"Pruning unused containers and images in {subspace_dir}")
160189
run_command(client, prune_cmd)
161190

@@ -175,16 +204,21 @@ def docker_compose_up(client, subspace_dir):
175204

176205
def handle_node(client, node, subspace_dir, release_version, pot_external_entropy=None,
177206
plot_size=None, cache_percentage=None, network=None, prune=False, restart=False,
178-
genesis_hash=None, wipe=False):
207+
genesis_hash=None, wipe=False, ssh_key=None, ssh_user=None):
179208
"""Generic function to handle different node types with specified actions."""
180209
try:
181210
if prune:
211+
docker_compose_down(client, subspace_dir)
182212
docker_cleanup(client, subspace_dir)
183213
elif restart:
184214
docker_compose_restart(client, subspace_dir)
185215
else:
186216
docker_compose_down(client, subspace_dir)
187217

218+
# Wipe node data if requested (for non-farmer and non-timekeeper nodes)
219+
if wipe and node.get('type') != 'farmer' and node.get('type') != 'timekeeper':
220+
wipe_node_data(client, subspace_dir)
221+
188222
# Wipe farmer data if requested
189223
if wipe and node.get('type') == 'farmer':
190224
wipe_farmer_data(client, subspace_dir)
@@ -208,7 +242,7 @@ def handle_node(client, node, subspace_dir, release_version, pot_external_entrop
208242
def main():
209243
parser = argparse.ArgumentParser(description="Manage Subspace nodes via SSH")
210244
parser.add_argument('--config', required=True, help='Path to the TOML config file')
211-
parser.add_argument('--network', required=True, help='Network to update in the .env file, i.e devnet, gemini-3h, taurus')
245+
parser.add_argument('--network', required=True, help='Network to update in the .env file, i.e devnet, taurus, mainnet')
212246
parser.add_argument('--release_version', required=True, help='Release version to update in the .env file')
213247
parser.add_argument('--subspace_dir', default='/home/ubuntu/subspace', help='Path to the Subspace directory')
214248
parser.add_argument('--pot_external_entropy', help='POT_EXTERNAL_ENTROPY value for all nodes')
@@ -217,9 +251,11 @@ def main():
217251
parser.add_argument('--no_timekeeper', action='store_true', help='Disable launching of the timekeeper node')
218252
parser.add_argument('--prune', action='store_true', help='Stop containers and destroy the Docker images')
219253
parser.add_argument('--restart', action='store_true', help='Restart the network without wiping the data')
254+
parser.add_argument('--no_farmer', action='store_true', help='Dont update the farmer nodes')
220255
parser.add_argument('--plot_size', help='Set plot size on the farmer, i.e 10G')
221256
parser.add_argument('--cache_percentage', help='Set the cache percentage on the farmer, i.e 10')
222-
parser.add_argument('--wipe', action='store_true', help='Wipe farmer data while preserving identity.bin')
257+
parser.add_argument('--wipe', action='store_true', help='Wipe the node and farmer data. It preserves the identity.bin of the farmer nodes')
258+
223259
args = parser.parse_args()
224260

225261
# Set logging level based on user input
@@ -233,63 +269,92 @@ def main():
233269
bootstrap_nodes = [bootstrap_node for bootstrap_node in config['bootstrap_nodes']]
234270
farmer_nodes = [node for node in config['farmer_rpc_nodes'] if node['type'] == 'farmer']
235271
rpc_nodes = [node for node in config['farmer_rpc_nodes'] if node['type'] == 'rpc']
236-
timekeeper_node = config['timekeeper']
272+
domain_nodes = [node for node in config['farmer_rpc_nodes'] if node['type'] == 'domain']
273+
timekeeper_node = [node for node in config['timekeeper']]
237274

238275
# Step 1: Handle the timekeeper node, if enabled
239276
if not args.no_timekeeper and timekeeper_node:
240-
try:
241-
logger.info(f"Connecting to timekeeper node {timekeeper_node['host']}...")
242-
client = ssh_connect(timekeeper_node['host'], timekeeper_node['user'], timekeeper_node['ssh_key'])
243-
handle_node(client, timekeeper_node, args.subspace_dir, args.release_version,
244-
pot_external_entropy=args.pot_external_entropy, network=args.network,
245-
prune=args.prune, restart=args.restart)
246-
except Exception as e:
247-
logger.error(f"Error handling timekeeper node: {e}")
248-
finally:
249-
if client:
250-
client.close()
277+
for node in timekeeper_node:
278+
client = None
279+
try:
280+
logger.info(f"Connecting to timekeeper node {node['host']}...")
281+
client = ssh_connect(node['host'], node['user'], node['ssh_key'])
282+
handle_node(client, node, args.subspace_dir, args.release_version,
283+
pot_external_entropy=args.pot_external_entropy, network=args.network,
284+
prune=args.prune, restart=args.restart, wipe=args.wipe)
285+
logger.info(f"Successfully handled timekeeper node {node['host']}")
286+
except Exception as e:
287+
logger.error(f"Error handling timekeeper node: {e}")
288+
finally:
289+
if client:
290+
client.close()
251291
else:
252292
logger.info("Timekeeper handling is disabled or not specified.")
253293

254294
# Step 2: Handle farmer nodes
255-
for node in farmer_nodes:
295+
if not args.no_farmer:
296+
for node in farmer_nodes:
297+
client = None
298+
try:
299+
logger.info(f"Connecting to farmer node {node['host']}...")
300+
client = ssh_connect(node['host'], node['user'], node['ssh_key'])
301+
handle_node(client, node, args.subspace_dir, args.release_version,
302+
pot_external_entropy=args.pot_external_entropy, network=args.network,
303+
plot_size=args.plot_size, cache_percentage=args.cache_percentage,
304+
prune=args.prune, restart=args.restart, wipe=args.wipe)
305+
logger.info(f"Successfully handled farmer node {node['host']}")
306+
except Exception as e:
307+
logger.error(f"Error handling farmer node {node['host']}: {e}")
308+
finally:
309+
if client:
310+
client.close()
311+
else:
312+
logger.info("Skipping farmer nodes due to --no_farmer flag.")
313+
314+
# Step 3: Handle RPC nodes
315+
for node in rpc_nodes:
316+
client = None
256317
try:
257-
logger.info(f"Connecting to farmer node {node['host']}...")
318+
logger.info(f"Connecting to RPC node {node['host']}...")
258319
client = ssh_connect(node['host'], node['user'], node['ssh_key'])
259320
handle_node(client, node, args.subspace_dir, args.release_version,
260321
pot_external_entropy=args.pot_external_entropy, network=args.network,
261-
plot_size=args.plot_size, cache_percentage=args.cache_percentage,
262322
prune=args.prune, restart=args.restart, wipe=args.wipe)
323+
logger.info(f"Successfully handled RPC node {node['host']}")
263324
except Exception as e:
264-
logger.error(f"Error handling farmer node {node['host']}: {e}")
325+
logger.error(f"Error handling RPC node {node['host']}: {e}")
265326
finally:
266327
if client:
267328
client.close()
268329

269-
# Step 3: Handle RPC nodes
270-
for node in rpc_nodes:
330+
# Step 4: Handle RPC Domain nodes
331+
for node in domain_nodes:
332+
client = None
271333
try:
272-
logger.info(f"Connecting to RPC node {node['host']}...")
334+
logger.info(f"Connecting to RPC Domain node {node['host']}...")
273335
client = ssh_connect(node['host'], node['user'], node['ssh_key'])
274336
handle_node(client, node, args.subspace_dir, args.release_version,
275337
pot_external_entropy=args.pot_external_entropy, network=args.network,
276-
prune=args.prune, restart=args.restart)
338+
prune=args.prune, restart=args.restart, wipe=args.wipe)
339+
logger.info(f"Successfully handled RPC Domain node {node['host']}")
277340
except Exception as e:
278-
logger.error(f"Error handling RPC node {node['host']}: {e}")
341+
logger.error(f"Error handling RPC Domain node {node['host']}: {e}")
279342
finally:
280343
if client:
281344
client.close()
282345

283-
# Step 4: Handle the bootstrap node with genesis hash from arguments
346+
# Step 5: Handle the bootstrap node with genesis hash from arguments
284347
for bootstrap_node in config['bootstrap_nodes']:
348+
client = None
285349
try:
286350
logger.info(f"Connecting to the bootstrap node {bootstrap_node['host']}...")
287351
client = ssh_connect(bootstrap_node['host'], bootstrap_node['user'], bootstrap_node['ssh_key'])
288352

289353
handle_node(client, bootstrap_node, args.subspace_dir, args.release_version,
290354
pot_external_entropy=args.pot_external_entropy, network=args.network,
291355
prune=args.prune, restart=args.restart,
292-
genesis_hash=args.genesis_hash)
356+
genesis_hash=args.genesis_hash, wipe=args.wipe)
357+
logger.info(f"Successfully handled bootstrap node {bootstrap_node['host']}")
293358
except Exception as e:
294359
logger.error(f"Error handling bootstrap node {bootstrap_node['host']}: {e}")
295360
finally:

scripts/launch-nodes/nodes.toml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,37 +4,38 @@
44
host = "34.201.40.91"
55
user = "user"
66
ssh_key = "key.pem"
7+
name = "bootstrap-0"
78

89
[[bootstrap_nodes]]
910
host = "35.202.41.92"
1011
user = "user"
1112
ssh_key = "key.pem"
12-
13-
[[bootstrap_nodes]]
14-
host = "36.203.42.93"
15-
user = "user"
16-
ssh_key = "key.pem"
13+
name = "bootstrap-1"
1714

1815
[[farmer_rpc_nodes]]
1916
host = "54.209.76.129"
2017
user = "user"
2118
ssh_key = "key.pem"
2219
type = "rpc"
20+
name = "rpc-0"
2321

2422
[[farmer_rpc_nodes]]
2523
host = "54.209.76.129"
2624
user = "user"
2725
ssh_key = "key.pem"
2826
type = "rpc"
27+
name = "rpc-1"
2928

3029
[[farmer_rpc_nodes]]
3130
host = "44.202.161.154"
3231
user = "user"
3332
ssh_key = "key.pem"
3433
type = "farmer"
34+
name = "farmer-0"
3535

3636
[timekeeper]
3737
host = "54.147.124.232"
3838
user = "user"
3939
ssh_key = "key.pem"
4040
type = "timekeeper"
41+
name = "timekeeper-0"

0 commit comments

Comments
 (0)