@@ -35,28 +35,23 @@ def ssh_connect(host, user, key_file):
3535 logger .error (f"Failed to connect to { host } : { e } " )
3636 raise
3737
38- def run_command (client , command , retries = 3 , delay = 5 ):
39- """Run a command over SSH with retries."""
40- for attempt in range (retries ):
41- try :
42- stdin , stdout , stderr = client .exec_command (command )
43- stdout .channel .recv_exit_status ()
44- output = stdout .read ().decode ('utf-8' )
45- error = stderr .read ().decode ('utf-8' )
38+ def run_command (client , command ):
39+ """Run a command over SSH and return the output."""
40+ try :
41+ stdin , stdout , stderr = client .exec_command (command )
42+ output = stdout .read ().decode ('utf-8' )
43+ error = stderr .read ().decode ('utf-8' )
4644
47- # Treat Docker status updates as INFO instead of ERROR
48- if error and not any (keyword in error for keyword in ["Stopping" , "Stopped" , "Creating" , "Started" , "Removing" , "Removed" ]):
49- logger .error (f"Error running command: { error .strip ()} " )
50- else :
51- logger .info (f"Command output: { output .strip ()} " )
52- return output , error
53- except Exception as e :
54- logger .error (f"Attempt { attempt + 1 } failed to run command: { e } " )
55- if attempt < retries - 1 :
56- logger .info (f"Retrying in { delay } seconds..." )
57- sleep (delay )
45+ # Treat Docker status updates as INFO instead of ERROR
46+ if error :
47+ if any (keyword in error for keyword in ["Stopping" , "Stopped" , "Creating" , "Started" , "Removing" , "Removed" ]):
48+ logger .info (f"Command output: { error .strip ()} " )
5849 else :
59- raise
50+ logger .error (f"Error running command: { error .strip ()} " )
51+ return output , error
52+ except Exception as e :
53+ logger .error (f"Failed to run command: { e } " )
54+ raise
6055
6156def modify_env_file (client , subspace_dir , release_version , genesis_hash = None , pot_external_entropy = None , plot_size = None , cache_percentage = None , network = None ):
6257 """Modify the .env file to update various settings."""
@@ -94,22 +89,42 @@ def docker_compose_down(client, subspace_dir):
9489 logger .error (f"Failed to run sudo docker compose down -v: { e } " )
9590 raise
9691
97- def docker_compose_restart (client , subspace_dir ):
92+ def docker_compose_restart (client , subspace_dir , docker_tag = None ):
9893 """Run sudo docker compose restart in the subspace directory."""
9994 try :
100- command = f'cd { subspace_dir } && sudo docker compose restart'
95+ # Modify .env file if a new DOCKER_TAG is provided
96+ if docker_tag :
97+ logger .info (f"Updating DOCKER_TAG to { docker_tag } in { subspace_dir } /.env" )
98+ modify_env_file (client , subspace_dir , release_version = docker_tag )
99+
100+ # Restart the containers
101+ restart_cmd = f'cd { subspace_dir } && sudo docker compose restart'
101102 logger .info (f"Running sudo docker compose restart in { subspace_dir } " )
102- run_command (client , command )
103+ run_command (client , restart_cmd )
104+
103105 except Exception as e :
104106 logger .error (f"Failed to run sudo docker compose restart: { e } " )
105107 raise
106108
107109def docker_cleanup (client , subspace_dir ):
108110 """Stop all containers, prune unused containers and images in the subspace directory."""
109111 try :
110- command = f'cd { subspace_dir } && sudo docker stop $(sudo docker ps -q) && sudo docker container prune -f && sudo docker image prune -a -f'
111- logger .info (f"Running Docker cleanup commands in { subspace_dir } " )
112- run_command (client , command )
112+ # Check if there are running containers
113+ check_running_containers_cmd = f'cd { subspace_dir } && sudo docker ps -q'
114+ stdout , _ = run_command (client , check_running_containers_cmd )
115+
116+ if stdout .strip (): # Only run stop command if there are running containers
117+ stop_containers_cmd = f'cd { subspace_dir } && sudo docker stop $(sudo docker ps -q)'
118+ logger .info (f"Stopping running containers in { subspace_dir } " )
119+ run_command (client , stop_containers_cmd )
120+ else :
121+ logger .info ("No running containers found to stop." )
122+
123+ # Prune unused containers and images
124+ prune_cmd = f'cd { subspace_dir } && sudo docker container prune -f && sudo docker image prune -a -f'
125+ logger .info (f"Pruning unused containers and images in { subspace_dir } " )
126+ run_command (client , prune_cmd )
127+
113128 except Exception as e :
114129 logger .error (f"Failed to run Docker cleanup commands: { e } " )
115130 raise
@@ -147,16 +162,26 @@ def grep_protocol_version(client, retries=5, interval=30):
147162 logger .error ("Failed to retrieve protocol version hash after retries." )
148163 return None
149164
150- def handle_node (client , node , subspace_dir , release_version , pot_external_entropy = None , plot_size = None , cache_percentage = None , network = None , prune = False , restart = False ):
165+ def handle_node (client , node , subspace_dir , release_version , pot_external_entropy = None ,
166+ plot_size = None , cache_percentage = None , network = None , prune = False , restart = False ,
167+ update_genesis_hash = False , genesis_hash = None ):
151168 """Generic function to handle different node types with specified actions."""
152169 try :
153- if restart :
154- docker_compose_restart (client , subspace_dir )
155- elif prune :
170+ if prune :
156171 docker_cleanup (client , subspace_dir )
172+ elif restart :
173+ docker_compose_restart (client , subspace_dir )
157174 else :
158175 docker_compose_down (client , subspace_dir )
159- modify_env_file (client , subspace_dir , release_version , pot_external_entropy = pot_external_entropy , plot_size = plot_size , cache_percentage = cache_percentage , network = network )
176+
177+ # Update .env file with the appropriate parameters
178+ modify_env_file (client , subspace_dir , release_version ,
179+ pot_external_entropy = pot_external_entropy ,
180+ plot_size = plot_size ,
181+ cache_percentage = cache_percentage ,
182+ network = network ,
183+ genesis_hash = genesis_hash if update_genesis_hash else None )
184+
160185 docker_compose_up (client , subspace_dir )
161186
162187 except Exception as e :
@@ -225,11 +250,24 @@ def main():
225250 try :
226251 logger .info (f"Connecting to RPC node { node ['host' ]} ..." )
227252 client = ssh_connect (node ['host' ], node ['user' ], node ['ssh_key' ])
228- handle_node (client , node , args .subspace_dir , args .release_version , pot_external_entropy = args .pot_external_entropy , network = args .network , prune = args .prune , restart = args .restart )
229253
230- # If this is an RPC node, grep the logs for protocol version hash
254+ # Handle node operations (prune/restart will be managed here)
255+ handle_node (client , node , args .subspace_dir , args .release_version ,
256+ pot_external_entropy = args .pot_external_entropy ,
257+ network = args .network ,
258+ prune = args .prune ,
259+ restart = args .restart )
260+
261+ # Skip protocol version extraction if prune or restart is specified
262+ if args .prune or args .restart :
263+ logger .info (f"Skipping protocol version extraction for RPC node { node ['host' ]} due to prune/restart." )
264+ continue
265+
266+ # If this is an RPC node, wait and then extract protocol version hash
231267 logger .info (f"Waiting for RPC node { node ['host' ]} to start..." )
232268 sleep (30 ) # Adjust sleep time as necessary
269+
270+ # Attempt to grep the protocol version from logs
233271 protocol_version_hash = grep_protocol_version (client )
234272
235273 if not protocol_version_hash :
@@ -242,18 +280,29 @@ def main():
242280 client .close ()
243281
244282 # Step 4: Handle the bootstrap node, using the protocol version hash if available
245- if protocol_version_hash :
246- try :
247- logger .info (f"Connecting to the bootstrap node { bootstrap_node ['host' ]} ..." )
248- client = ssh_connect (bootstrap_node ['host' ], bootstrap_node ['user' ], bootstrap_node ['ssh_key' ])
249- handle_node (client , bootstrap_node , args .subspace_dir , args .release_version , genesis_hash = protocol_version_hash , pot_external_entropy = args .pot_external_entropy , network = args .network , prune = args .prune , restart = args .restart )
250- except Exception as e :
251- logger .error (f"Error handling bootstrap node: { e } " )
252- finally :
253- if client :
254- client .close ()
255- else :
256- logger .error ("Protocol version hash not found; skipping bootstrap node update." )
283+ try :
284+ logger .info (f"Connecting to the bootstrap node { bootstrap_node ['host' ]} ..." )
285+ client = ssh_connect (bootstrap_node ['host' ], bootstrap_node ['user' ], bootstrap_node ['ssh_key' ])
286+
287+ # Warn about missing protocol version hash before proceeding with the bootstrap node update
288+ if not protocol_version_hash :
289+ logger .warning ("Protocol version hash not found; proceeding with bootstrap node without genesis_hash update." )
290+
291+ # Handle node operations, updating genesis_hash only if protocol_version_hash is available
292+ handle_node (client , bootstrap_node , args .subspace_dir , args .release_version ,
293+ pot_external_entropy = args .pot_external_entropy ,
294+ network = args .network ,
295+ prune = args .prune ,
296+ restart = args .restart ,
297+ update_genesis_hash = bool (protocol_version_hash ),
298+ genesis_hash = protocol_version_hash )
299+
300+ except Exception as e :
301+ logger .error (f"Error handling bootstrap node: { e } " )
302+ finally :
303+ if client :
304+ client .close ()
305+
257306
258307if __name__ == '__main__' :
259308 main ()
0 commit comments