Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ hosts.yml filter=crypt diff=crypt merge=crypt
testing-framework/hetzner/network/keystore/* filter=crypt diff=crypt merge=crypt
*.pem filter=crypt diff=crypt merge=crypt
*.key filter=crypt diff=crypt merge=crypt
*.toml.* filter=crypt diff=crypt merge=crypt
5 changes: 3 additions & 2 deletions scripts/launch-nodes/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Subspace Node Manager

This script manages the deployment of Subspace nodes (RPC, Farmer, Timekeeper, and Bootstrap nodes) on multiple servers using SSH. It updates the `.env` file with the specified release version, coordinates the startup sequence, and ensures that RPC and Farmer nodes are started before the Bootstrap node, which is updated last with the correct `GENESIS_HASH`.
This script manages the deployment and upgrades of Subspace nodes (RPC, Farmer, Timekeeper, Domain, and Bootstrap nodes) on multiple servers using SSH. It updates the `.env` file with the specified release version, coordinates the startup sequence, and ensures that RPC and Farmer nodes are started before the Bootstrap node, which is updated last with the correct `GENESIS_HASH`.

## Features

Expand Down Expand Up @@ -111,7 +111,8 @@ python manage_subspace.py --config nodes.toml --release_version docker-tag --sub
- `--prune`: Stop containers and remove unused Docker images.
- `--restart`: Restart containers without wiping data.
- `--no_timekeeper`: Disable launching of the timekeeper node. This flag can be used if you do not want the script to handle the timekeeper node during execution.
- `--wipe`: Wipe data for Farmer nodes before starting them while preserving identity.bin file.
- `--no_farmer`: Disable launching or updating the farmer node. This flag can be used if you do not want the script to handle the farmer node during execution.
- `--wipe`: Wipe the node and farmer data. It preserves the identity.bin of the farmer nodes.

### Step 5: Deactivate the Virtual Environment

Expand Down
133 changes: 99 additions & 34 deletions scripts/launch-nodes/manage_subspace.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# This script is designed to manage Subspace nodes by connecting to them via SSH,
# executing commands to wipe data, update configurations, and manage Docker containers.
# It supports various node types including farmers, RPC nodes, and timekeepers.
# The script uses paramiko for SSH connections and colorlog for colored logging output.
# It can handle node data wiping, Docker compose commands, and environment variable modifications.
# The script is configurable via command line arguments and a TOML configuration file.
# It is intended for use in a Subspace network management context, allowing administrators to maintain and update nodes efficiently.
# Ensure you have the required libraries installed

import os
import paramiko
import argparse
Expand Down Expand Up @@ -53,15 +62,35 @@ def run_command(client, command):
logger.error(f"Failed to run command: {e}")
raise

def wipe_node_data(client, subspace_dir):
"""Wipe node data for non-farmer nodes."""
try:
commands = [
# Remove node data directory
f"cd {subspace_dir} && sudo rm -rf *",
]

for command in commands:
logger.info(f"Executing: {command}")
stdout, stderr = run_command(client, command)
if stderr and not any(keyword in stderr for keyword in ["No such file", "not found"]):
logger.error(f"Error during node data wipe: {stderr}")

logger.info("Successfully wiped node data")

except Exception as e:
logger.error(f"Failed to wipe node data: {e}")
raise

def wipe_farmer_data(client, subspace_dir):
"""Wipe farmer data while preserving identity.bin file."""
try:
commands = [
# Create backup directory if it doesn't exist
f"cd {subspace_dir} && sudo mkdir -p backup",
f"cd {subspace_dir} && sudo mkdir -p ~/backup",

# Preserve identity.bin if it exists
f"cd {subspace_dir} && if [ -f farmer_data/identity.bin ]; then sudo mv farmer_data/identity.bin backup/; fi",
f"cd {subspace_dir} && if [ -f farmer_data/identity.bin ]; then sudo mv farmer_data/identity.bin ~/backup/; fi",

# Remove farmer_data directory with sudo
f"cd {subspace_dir} && sudo rm -rf farmer_data",
Expand All @@ -70,7 +99,7 @@ def wipe_farmer_data(client, subspace_dir):
f"cd {subspace_dir} && sudo mkdir -p farmer_data",

# Restore identity.bin if it was backed up
f"cd {subspace_dir} && if [ -f backup/identity.bin ]; then sudo mv backup/identity.bin farmer_data/; fi",
f"cd {subspace_dir} && if [ -f ~/backup/identity.bin ]; then sudo mv ~/backup/identity.bin farmer_data/; fi",

# Set proper ownership
f"cd {subspace_dir} && sudo chown -R nobody:nogroup farmer_data/",
Expand Down Expand Up @@ -114,13 +143,13 @@ def modify_env_file(client, subspace_dir, release_version, genesis_hash=None, po
raise

def docker_compose_down(client, subspace_dir):
"""Run sudo docker compose down -v in the subspace directory."""
"""Run sudo docker compose down in the subspace directory."""
try:
command = f'cd {subspace_dir} && sudo docker compose down -v'
logger.info(f"Running sudo docker compose down -v in {subspace_dir}")
command = f'cd {subspace_dir} && sudo docker compose down'
logger.info(f"Running sudo docker compose down in {subspace_dir}")
run_command(client, command)
except Exception as e:
logger.error(f"Failed to run sudo docker compose down -v: {e}")
logger.error(f"Failed to run sudo docker compose down: {e}")
raise

def docker_compose_restart(client, subspace_dir, docker_tag=None):
Expand Down Expand Up @@ -155,7 +184,7 @@ def docker_cleanup(client, subspace_dir):
logger.info("No running containers found to stop.")

# Prune unused containers and images
prune_cmd = f'cd {subspace_dir} && sudo docker container prune -f && sudo docker image prune -a -f'
prune_cmd = f'cd {subspace_dir} && sudo docker container prune -f && sudo docker image prune -a -f && sudo docker volume prune -f'
logger.info(f"Pruning unused containers and images in {subspace_dir}")
run_command(client, prune_cmd)

Expand All @@ -175,16 +204,21 @@ def docker_compose_up(client, subspace_dir):

def handle_node(client, node, subspace_dir, release_version, pot_external_entropy=None,
plot_size=None, cache_percentage=None, network=None, prune=False, restart=False,
genesis_hash=None, wipe=False):
genesis_hash=None, wipe=False, ssh_key=None, ssh_user=None):
"""Generic function to handle different node types with specified actions."""
try:
if prune:
docker_compose_down(client, subspace_dir)
docker_cleanup(client, subspace_dir)
elif restart:
docker_compose_restart(client, subspace_dir)
else:
docker_compose_down(client, subspace_dir)

# Wipe node data if requested (for non-farmer and non-timekeeper nodes)
if wipe and node.get('type') != 'farmer' and node.get('type') != 'timekeeper':
wipe_node_data(client, subspace_dir)

# Wipe farmer data if requested
if wipe and node.get('type') == 'farmer':
wipe_farmer_data(client, subspace_dir)
Expand All @@ -208,7 +242,7 @@ def handle_node(client, node, subspace_dir, release_version, pot_external_entrop
def main():
parser = argparse.ArgumentParser(description="Manage Subspace nodes via SSH")
parser.add_argument('--config', required=True, help='Path to the TOML config file')
parser.add_argument('--network', required=True, help='Network to update in the .env file, i.e devnet, gemini-3h, taurus')
parser.add_argument('--network', required=True, help='Network to update in the .env file, i.e devnet, taurus, mainnet')
parser.add_argument('--release_version', required=True, help='Release version to update in the .env file')
parser.add_argument('--subspace_dir', default='/home/ubuntu/subspace', help='Path to the Subspace directory')
parser.add_argument('--pot_external_entropy', help='POT_EXTERNAL_ENTROPY value for all nodes')
Expand All @@ -217,9 +251,11 @@ def main():
parser.add_argument('--no_timekeeper', action='store_true', help='Disable launching of the timekeeper node')
parser.add_argument('--prune', action='store_true', help='Stop containers and destroy the Docker images')
parser.add_argument('--restart', action='store_true', help='Restart the network without wiping the data')
parser.add_argument('--no_farmer', action='store_true', help='Dont update the farmer nodes')
parser.add_argument('--plot_size', help='Set plot size on the farmer, i.e 10G')
parser.add_argument('--cache_percentage', help='Set the cache percentage on the farmer, i.e 10')
parser.add_argument('--wipe', action='store_true', help='Wipe farmer data while preserving identity.bin')
parser.add_argument('--wipe', action='store_true', help='Wipe the node and farmer data. It preserves the identity.bin of the farmer nodes')

args = parser.parse_args()

# Set logging level based on user input
Expand All @@ -233,63 +269,92 @@ def main():
bootstrap_nodes = [bootstrap_node for bootstrap_node in config['bootstrap_nodes']]
farmer_nodes = [node for node in config['farmer_rpc_nodes'] if node['type'] == 'farmer']
rpc_nodes = [node for node in config['farmer_rpc_nodes'] if node['type'] == 'rpc']
timekeeper_node = config['timekeeper']
domain_nodes = [node for node in config['farmer_rpc_nodes'] if node['type'] == 'domain']
timekeeper_node = [node for node in config['timekeeper']]

# Step 1: Handle the timekeeper node, if enabled
if not args.no_timekeeper and timekeeper_node:
try:
logger.info(f"Connecting to timekeeper node {timekeeper_node['host']}...")
client = ssh_connect(timekeeper_node['host'], timekeeper_node['user'], timekeeper_node['ssh_key'])
handle_node(client, timekeeper_node, args.subspace_dir, args.release_version,
pot_external_entropy=args.pot_external_entropy, network=args.network,
prune=args.prune, restart=args.restart)
except Exception as e:
logger.error(f"Error handling timekeeper node: {e}")
finally:
if client:
client.close()
for node in timekeeper_node:
client = None
try:
logger.info(f"Connecting to timekeeper node {node['host']}...")
client = ssh_connect(node['host'], node['user'], node['ssh_key'])
handle_node(client, node, args.subspace_dir, args.release_version,
pot_external_entropy=args.pot_external_entropy, network=args.network,
prune=args.prune, restart=args.restart, wipe=args.wipe)
logger.info(f"Successfully handled timekeeper node {node['host']}")
except Exception as e:
logger.error(f"Error handling timekeeper node: {e}")
finally:
if client:
client.close()
else:
logger.info("Timekeeper handling is disabled or not specified.")

# Step 2: Handle farmer nodes
for node in farmer_nodes:
if not args.no_farmer:
for node in farmer_nodes:
client = None
try:
logger.info(f"Connecting to farmer node {node['host']}...")
client = ssh_connect(node['host'], node['user'], node['ssh_key'])
handle_node(client, node, args.subspace_dir, args.release_version,
pot_external_entropy=args.pot_external_entropy, network=args.network,
plot_size=args.plot_size, cache_percentage=args.cache_percentage,
prune=args.prune, restart=args.restart, wipe=args.wipe)
logger.info(f"Successfully handled farmer node {node['host']}")
except Exception as e:
logger.error(f"Error handling farmer node {node['host']}: {e}")
finally:
if client:
client.close()
else:
logger.info("Skipping farmer nodes due to --no_farmer flag.")

# Step 3: Handle RPC nodes
for node in rpc_nodes:
client = None
try:
logger.info(f"Connecting to farmer node {node['host']}...")
logger.info(f"Connecting to RPC node {node['host']}...")
client = ssh_connect(node['host'], node['user'], node['ssh_key'])
handle_node(client, node, args.subspace_dir, args.release_version,
pot_external_entropy=args.pot_external_entropy, network=args.network,
plot_size=args.plot_size, cache_percentage=args.cache_percentage,
prune=args.prune, restart=args.restart, wipe=args.wipe)
logger.info(f"Successfully handled RPC node {node['host']}")
except Exception as e:
logger.error(f"Error handling farmer node {node['host']}: {e}")
logger.error(f"Error handling RPC node {node['host']}: {e}")
finally:
if client:
client.close()

# Step 3: Handle RPC nodes
for node in rpc_nodes:
# Step 4: Handle RPC Domain nodes
for node in domain_nodes:
client = None
try:
logger.info(f"Connecting to RPC node {node['host']}...")
logger.info(f"Connecting to RPC Domain node {node['host']}...")
client = ssh_connect(node['host'], node['user'], node['ssh_key'])
handle_node(client, node, args.subspace_dir, args.release_version,
pot_external_entropy=args.pot_external_entropy, network=args.network,
prune=args.prune, restart=args.restart)
prune=args.prune, restart=args.restart, wipe=args.wipe)
logger.info(f"Successfully handled RPC Domain node {node['host']}")
except Exception as e:
logger.error(f"Error handling RPC node {node['host']}: {e}")
logger.error(f"Error handling RPC Domain node {node['host']}: {e}")
finally:
if client:
client.close()

# Step 4: Handle the bootstrap node with genesis hash from arguments
# Step 5: Handle the bootstrap node with genesis hash from arguments
for bootstrap_node in config['bootstrap_nodes']:
client = None
try:
logger.info(f"Connecting to the bootstrap node {bootstrap_node['host']}...")
client = ssh_connect(bootstrap_node['host'], bootstrap_node['user'], bootstrap_node['ssh_key'])

handle_node(client, bootstrap_node, args.subspace_dir, args.release_version,
pot_external_entropy=args.pot_external_entropy, network=args.network,
prune=args.prune, restart=args.restart,
genesis_hash=args.genesis_hash)
genesis_hash=args.genesis_hash, wipe=args.wipe)
logger.info(f"Successfully handled bootstrap node {bootstrap_node['host']}")
except Exception as e:
logger.error(f"Error handling bootstrap node {bootstrap_node['host']}: {e}")
finally:
Expand Down
11 changes: 6 additions & 5 deletions scripts/launch-nodes/nodes.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,38 @@
host = "34.201.40.91"
user = "user"
ssh_key = "key.pem"
name = "bootstrap-0"

[[bootstrap_nodes]]
host = "35.202.41.92"
user = "user"
ssh_key = "key.pem"

[[bootstrap_nodes]]
host = "36.203.42.93"
user = "user"
ssh_key = "key.pem"
name = "bootstrap-1"

[[farmer_rpc_nodes]]
host = "54.209.76.129"
user = "user"
ssh_key = "key.pem"
type = "rpc"
name = "rpc-0"

[[farmer_rpc_nodes]]
host = "54.209.76.129"
user = "user"
ssh_key = "key.pem"
type = "rpc"
name = "rpc-1"

[[farmer_rpc_nodes]]
host = "44.202.161.154"
user = "user"
ssh_key = "key.pem"
type = "farmer"
name = "farmer-0"

[timekeeper]
host = "54.147.124.232"
user = "user"
ssh_key = "key.pem"
type = "timekeeper"
name = "timekeeper-0"