@@ -35,22 +35,53 @@ def ssh_connect(host, user, key_file):
3535 logger .error (f"Failed to connect to { host } : { e } " )
3636 raise
3737
38- def run_command (client , command ):
39- """Run a command over SSH and return the output."""
40- try :
41- stdin , stdout , stderr = client .exec_command (command )
42- output = stdout .read ().decode ('utf-8' )
43- error = stderr .read ().decode ('utf-8' )
38+ def run_command (client , command , retries = 3 , delay = 5 ):
39+ """Run a command over SSH with retries."""
40+ for attempt in range (retries ):
41+ try :
42+ stdin , stdout , stderr = client .exec_command (command )
43+ stdout .channel .recv_exit_status ()
44+ output = stdout .read ().decode ('utf-8' )
45+ error = stderr .read ().decode ('utf-8' )
4446
45- # Treat Docker status updates as INFO instead of ERROR
46- if error :
47- if any (keyword in error for keyword in ["Stopping" , "Stopped" , "Creating" , "Started" , "Removing" , "Removed" ]):
48- logger .info (f"Command output: { error .strip ()} " )
49- else :
47+ # Treat Docker status updates as INFO instead of ERROR
48+ if error and not any (keyword in error for keyword in ["Stopping" , "Stopped" , "Creating" , "Started" , "Removing" , "Removed" ]):
5049 logger .error (f"Error running command: { error .strip ()} " )
51- return output , error
50+ else :
51+ logger .info (f"Command output: { output .strip ()} " )
52+ return output , error
53+ except Exception as e :
54+ logger .error (f"Attempt { attempt + 1 } failed to run command: { e } " )
55+ if attempt < retries - 1 :
56+ logger .info (f"Retrying in { delay } seconds..." )
57+ sleep (delay )
58+ else :
59+ raise
60+
61+ def modify_env_file (client , subspace_dir , release_version , genesis_hash = None , pot_external_entropy = None , plot_size = None , cache_percentage = None , network = None ):
62+ """Modify the .env file to update various settings."""
63+ try :
64+ commands = [
65+ f"sed -i '/^DOCKER_TAG=/c\\ DOCKER_TAG={ release_version } ' { subspace_dir } /.env" ,
66+ f"sed -i '/^GENESIS_HASH=/c\\ GENESIS_HASH={ genesis_hash } ' { subspace_dir } /.env" if genesis_hash else "" ,
67+ f"sed -i '/^POT_EXTERNAL_ENTROPY=/c\\ POT_EXTERNAL_ENTROPY={ pot_external_entropy } ' { subspace_dir } /.env" if pot_external_entropy else "" ,
68+ f"sed -i '/^PLOT_SIZE=/c\\ PLOT_SIZE={ plot_size } ' { subspace_dir } /.env" if plot_size else "" ,
69+ f"sed -i '/^CACHE_PERCENTAGE=/c\\ CACHE_PERCENTAGE={ cache_percentage } ' { subspace_dir } /.env" if cache_percentage else "" ,
70+ f"sed -i '/^NETWORK_NAME=/c\\ NETWORK_NAME={ network } ' { subspace_dir } /.env" if network else ""
71+ ]
72+ for command in filter (bool , commands ):
73+ stdout , stderr = run_command (client , command )
74+ if stderr :
75+ logger .error (f"Error modifying .env file with command: { command } , error: { stderr } " )
76+ raise Exception (f"Error modifying .env file: { stderr } " )
77+ else :
78+ logger .info (f"Successfully executed command: { command } " )
79+ except Exception as e :
80+ logger .error (f"Failed to modify .env file: { e } " )
81+ raise
82+
5283 except Exception as e :
53- logger .error (f"Failed to run command: { command } : { e } " )
84+ logger .error (f"Failed to modify .env file : { e } " )
5485 raise
5586
5687def docker_compose_down (client , subspace_dir ):
@@ -63,40 +94,34 @@ def docker_compose_down(client, subspace_dir):
6394 logger .error (f"Failed to run sudo docker compose down -v: { e } " )
6495 raise
6596
66- def modify_env_file (client , subspace_dir , release_version , genesis_hash = None , pot_external_entropy = None ):
67- """Modify the .env file to update the Docker tag, Genesis Hash, and POT_EXTERNAL_ENTROPY using sed ."""
97+ def docker_compose_restart (client , subspace_dir ):
98+ """Run sudo docker compose restart in the subspace directory ."""
6899 try :
69- # Command to update DOCKER_TAG
70- commands = [
71- f"sed -i 's/^DOCKER_TAG=.*/DOCKER_TAG={ release_version } /' { subspace_dir } /.env"
72- ]
73-
74- # Command to update GENESIS_HASH if provided
75- if genesis_hash :
76- commands .append (f"sed -i 's/^GENESIS_HASH=.*/GENESIS_HASH={ genesis_hash } /' { subspace_dir } /.env" )
77-
78- # Command to update POT_EXTERNAL_ENTROPY if provided
79- if pot_external_entropy :
80- # If POT_EXTERNAL_ENTROPY exists, replace it, otherwise append it
81- commands .append (f"grep -q '^POT_EXTERNAL_ENTROPY=' { subspace_dir } /.env && "
82- f"sed -i 's/^POT_EXTERNAL_ENTROPY=.*/POT_EXTERNAL_ENTROPY={ pot_external_entropy } /' { subspace_dir } /.env || "
83- f"echo 'POT_EXTERNAL_ENTROPY={ pot_external_entropy } ' >> { subspace_dir } /.env" )
84-
85- # Execute the commands over SSH
86- for command in commands :
87- logger .debug (f"Executing command: { command } " )
88- stdin , stdout , stderr = client .exec_command (command )
89- stdout_text = stdout .read ().decode ()
90- stderr_text = stderr .read ().decode ()
100+ command = f'cd { subspace_dir } && sudo docker compose restart'
101+ logger .info (f"Running sudo docker compose restart in { subspace_dir } " )
102+ run_command (client , command )
103+ except Exception as e :
104+ logger .error (f"Failed to run sudo docker compose restart: { e } " )
105+ raise
91106
92- if stderr_text :
93- logger .error (f"Error modifying .env file with command: { command } , error: { stderr_text } " )
94- raise Exception (f"Error modifying .env file: { stderr_text } " )
95- else :
96- logger .info (f"Successfully executed command: { command } " )
107+ def docker_cleanup (client , subspace_dir ):
108+ """Stop all containers, prune unused containers and images in the subspace directory."""
109+ try :
110+ command = f'cd { subspace_dir } && sudo docker stop $(sudo docker ps -q) && sudo docker container prune -f && sudo docker image prune -a -f'
111+ logger .info (f"Running Docker cleanup commands in { subspace_dir } " )
112+ run_command (client , command )
113+ except Exception as e :
114+ logger .error (f"Failed to run Docker cleanup commands: { e } " )
115+ raise
97116
117+ def docker_compose_up (client , subspace_dir ):
118+ """Run sudo docker compose up -d in the subspace directory."""
119+ try :
120+ command = f'cd { subspace_dir } && sudo docker compose up -d'
121+ logger .info (f"Running sudo docker compose up -d in { subspace_dir } " )
122+ run_command (client , command )
98123 except Exception as e :
99- logger .error (f"Failed to modify .env file : { e } " )
124+ logger .error (f"Failed to run sudo docker compose up -d : { e } " )
100125 raise
101126
102127def grep_protocol_version (client , retries = 5 , interval = 30 ):
@@ -122,135 +147,114 @@ def grep_protocol_version(client, retries=5, interval=30):
122147 logger .error ("Failed to retrieve protocol version hash after retries." )
123148 return None
124149
125- def docker_compose_up (client , subspace_dir ):
126- """Run sudo docker compose up -d in the subspace directory ."""
150+ def handle_node (client , node , subspace_dir , release_version , pot_external_entropy = None , plot_size = None , cache_percentage = None , network = None , prune = False , restart = False ):
151+ """Generic function to handle different node types with specified actions ."""
127152 try :
128- command = f'cd { subspace_dir } && sudo docker compose up -d'
129- logger .info (f"Running sudo docker compose up -d in { subspace_dir } " )
130- run_command (client , command )
153+ docker_compose_down (client , subspace_dir )
154+ modify_env_file (client , subspace_dir , release_version , pot_external_entropy = pot_external_entropy , plot_size = plot_size , cache_percentage = cache_percentage , network = network )
155+ docker_compose_up (client , subspace_dir )
156+
157+ if restart :
158+ docker_compose_restart (client , subspace_dir )
159+
160+ if prune :
161+ docker_cleanup (client , subspace_dir )
162+
131163 except Exception as e :
132- logger .error (f"Failed to run sudo docker compose up -d: { e } " )
133- raise
164+ logger .error (f"Error handling node { node ['host' ]} : { e } " )
165+ finally :
166+ if client :
167+ client .close ()
134168
135169def main ():
136- # Parse command line arguments
137170 parser = argparse .ArgumentParser (description = "Manage Subspace nodes via SSH" )
138171 parser .add_argument ('--config' , required = True , help = 'Path to the TOML config file' )
172+ parser .add_argument ('--network' , required = True , help = 'Network to update in the .env file, i.e devnet, gemini-3h, taurus' )
139173 parser .add_argument ('--release_version' , required = True , help = 'Release version to update in the .env file' )
140- parser .add_argument ('--subspace_dir' , default = '/home/ubuntu/subspace' , help = 'Path to the Subspace directory (default: /home/ubuntu/subspace) ' )
174+ parser .add_argument ('--subspace_dir' , default = '/home/ubuntu/subspace' , help = 'Path to the Subspace directory' )
141175 parser .add_argument ('--pot_external_entropy' , help = 'POT_EXTERNAL_ENTROPY value for all nodes' )
142176 parser .add_argument ('--log_level' , default = 'INFO' , help = 'Set the logging level (DEBUG, INFO, WARNING, ERROR)' )
143177 parser .add_argument ('--no-timekeeper' , action = 'store_true' , help = 'Disable launching of the timekeeper node' )
178+ parser .add_argument ('--prune' , action = 'store_true' , help = 'Stop containers and destroy the Docker images' )
179+ parser .add_argument ('--restart' , action = 'store_true' , help = 'Restart the network without wiping the data' )
180+ parser .add_argument ('--plot-size' , help = 'Set plot size on the farmer, i.e 10G' )
181+ parser .add_argument ('--cache-percentage' , help = 'Set the cache percentage on the farmer, i.e 10' )
144182 args = parser .parse_args ()
145183
146184 # Set logging level based on user input
147185 log_level = args .log_level .upper ()
148186 logging .getLogger ().setLevel (log_level )
149187
150- logger .debug (f"Received POT_EXTERNAL_ENTROPY: { args .pot_external_entropy } " )
151-
152- # Read configuration from the TOML file using tomli
188+ # Read configuration from the TOML file
153189 with open (args .config , 'rb' ) as f :
154190 config = tomli .load (f )
155191
156192 bootstrap_node = config ['bootstrap_node' ]
157- farmer_rpc_nodes = config ['farmer_rpc_nodes' ]
193+ farmer_nodes = [node for node in config ['farmer_rpc_nodes' ] if node ['type' ] == 'farmer' ]
194+ rpc_nodes = [node for node in config ['farmer_rpc_nodes' ] if node ['type' ] == 'rpc' ]
158195 timekeeper_node = config ['timekeeper' ]
159196
160- release_version = args .release_version
161- subspace_dir = args .subspace_dir
162-
163- # Step 1: Handle the timekeeper node first, if present and --no-timekeeper is not set
197+ # Step 1: Handle the timekeeper node, if enabled
164198 if not args .no_timekeeper and timekeeper_node :
165- client = None # Initialize the client variable
166199 try :
167- logger .info (f"Connecting to the timekeeper node { timekeeper_node ['host' ]} ..." )
200+ logger .info (f"Connecting to timekeeper node { timekeeper_node ['host' ]} ..." )
168201 client = ssh_connect (timekeeper_node ['host' ], timekeeper_node ['user' ], timekeeper_node ['ssh_key' ])
169-
170- # Run sudo docker compose down -v for the timekeeper node
171- docker_compose_down (client , subspace_dir )
172-
173- # Modify the .env file with the POT_EXTERNAL_ENTROPY value
174- logger .debug (f"Modifying .env file for timekeeper with POT_EXTERNAL_ENTROPY={ args .pot_external_entropy } " )
175- modify_env_file (client , subspace_dir , release_version , pot_external_entropy = args .pot_external_entropy )
176-
177- # Start the timekeeper node
178- docker_compose_up (client , subspace_dir )
179-
180- logger .info ("Timekeeper node started with the updated POT_EXTERNAL_ENTROPY value." )
202+ handle_node (client , timekeeper_node , args .subspace_dir , args .release_version , pot_external_entropy = args .pot_external_entropy , network = args .network , prune = args .prune , restart = args .restart )
181203 except Exception as e :
182- logger .error (f"Error during timekeeper node update : { e } " )
204+ logger .error (f"Error handling timekeeper node: { e } " )
183205 finally :
184206 if client :
185207 client .close ()
186- logger .debug (f"Closed connection to timekeeper node { timekeeper_node ['host' ]} " )
187- elif args .no_timekeeper :
188- logger .info ("Skipping timekeeper node as --no-timekeeper flag is set." )
189208 else :
190- logger .warning ("Timekeeper node not found, proceeding with other nodes ." )
209+ logger .info ("Timekeeper handling is disabled or not specified ." )
191210
192- # Step 2: Start the other farmer and RPC nodes after the timekeeper node
193- protocol_version_hash = None
194- for node in farmer_rpc_nodes :
195- client = None # Initialize the client variable
211+ # Step 2: Handle farmer nodes
212+ for node in farmer_nodes :
196213 try :
197- logger .info (f"Connecting to { node ['host' ]} for sudo docker compose down -v ..." )
214+ logger .info (f"Connecting to farmer node { node ['host' ]} ..." )
198215 client = ssh_connect (node ['host' ], node ['user' ], node ['ssh_key' ])
216+ handle_node (client , node , args .subspace_dir , args .release_version , pot_external_entropy = args .pot_external_entropy , network = args .network , plot_size = args .plot_size , cache_percentage = args .cache_percentage , prune = args .prune , restart = args .restart )
217+ except Exception as e :
218+ logger .error (f"Error handling farmer node { node ['host' ]} : { e } " )
219+ finally :
220+ if client :
221+ client .close ()
199222
200- # Run sudo docker compose down -v
201- docker_compose_down (client , subspace_dir )
202-
203- # Modify the .env file for farmer and RPC nodes
204- modify_env_file (client , subspace_dir , release_version , pot_external_entropy = args .pot_external_entropy )
205-
206- # Start sudo docker compose up -d
207- docker_compose_up (client , subspace_dir )
208-
209- # If this is the RPC node, grep the logs for protocol version hash
210- if node ['type' ] == 'rpc' :
211- logger .info (f"Waiting for the RPC node to start..." )
212- sleep (30 ) # Adjust sleep time as necessary
213-
214- logger .info (f"Grep protocol version from logs on { node ['host' ]} ..." )
215- protocol_version_hash = grep_protocol_version (client )
223+ # Step 3: Handle RPC nodes
224+ protocol_version_hash = None
225+ for node in rpc_nodes :
226+ try :
227+ logger .info (f"Connecting to RPC node { node ['host' ]} ..." )
228+ client = ssh_connect (node ['host' ], node ['user' ], node ['ssh_key' ])
229+ handle_node (client , node , args .subspace_dir , args .release_version , pot_external_entropy = args .pot_external_entropy , network = args .network , prune = args .prune , restart = args .restart )
216230
217- if not protocol_version_hash :
218- logger .error (f"Failed to retrieve protocol version hash on { node ['host' ]} " )
219- continue
231+ # If this is an RPC node, grep the logs for protocol version hash
232+ logger .info (f"Waiting for RPC node { node ['host' ]} to start..." )
233+ sleep (30 ) # Adjust sleep time as necessary
234+ protocol_version_hash = grep_protocol_version (client )
220235
221- client .close ()
236+ if not protocol_version_hash :
237+ logger .error (f"Failed to retrieve protocol version hash on RPC node { node ['host' ]} " )
238+ continue
222239 except Exception as e :
223- logger .error (f"Error during update and start on { node ['host' ]} : { e } " )
240+ logger .error (f"Error handling RPC node { node ['host' ]} : { e } " )
224241 finally :
225242 if client :
226243 client .close ()
227- logger .debug (f"Closed connection for node { node ['host' ]} " )
228244
229- # Step 3: SSH into the bootstrap node and update GENESIS_HASH and POT_EXTERNAL_ENTROPY, then start it
245+ # Step 4: Handle the bootstrap node, using the protocol version hash if available
230246 if protocol_version_hash :
231- client = None # Initialize the client variable
232247 try :
233- logger .info (f"Connecting to the bootstrap node { bootstrap_node ['host' ]} for sudo docker compose down -v ..." )
248+ logger .info (f"Connecting to the bootstrap node { bootstrap_node ['host' ]} ..." )
234249 client = ssh_connect (bootstrap_node ['host' ], bootstrap_node ['user' ], bootstrap_node ['ssh_key' ])
235-
236- # Run sudo docker compose down -v for the bootstrap node
237- docker_compose_down (client , subspace_dir )
238-
239- # Modify .env with the new GENESIS_HASH and POT_EXTERNAL_ENTROPY
240- modify_env_file (client , subspace_dir , release_version , genesis_hash = protocol_version_hash , pot_external_entropy = args .pot_external_entropy )
241-
242- # Start the bootstrap node
243- docker_compose_up (client , subspace_dir )
244-
245- client .close ()
246- logger .info ("Bootstrap node started with the updated Genesis Hash and POT_EXTERNAL_ENTROPY." )
250+ handle_node (client , bootstrap_node , args .subspace_dir , args .release_version , genesis_hash = protocol_version_hash , pot_external_entropy = args .pot_external_entropy , network = args .network , prune = args .prune , restart = args .restart )
247251 except Exception as e :
248- logger .error (f"Error during bootstrap node update : { e } " )
252+ logger .error (f"Error handling bootstrap node: { e } " )
249253 finally :
250254 if client :
251255 client .close ()
252256 else :
253- logger .error ("Protocol version hash not found, skipping bootstrap node start ." )
257+ logger .error ("Protocol version hash not found; skipping bootstrap node update ." )
254258
255259if __name__ == '__main__' :
256260 main ()
0 commit comments