@@ -84,13 +84,20 @@ def run_slurm(args, target, cmd):
8484 stdout = subprocess .PIPE ,
8585 cwd = os .getcwd ())
8686 out , _ = proc .communicate ()
87+
88+ if proc .returncode != 0 :
89+ return None
90+
8791 return out .split ()[- 1 ].decode ()
8892
8993# wait for all jobs (not sbatch) to complete
9094def handle_slurm_procs (args , jobids ):
95+ after = ':' .join (jobid for jobid in jobids if jobid )
96+
9197 # pylint: disable=consider-using-with
92- wait = subprocess .Popen ([args .sbatch , '--nodes=1' , '--nodelist={0}' .format (args .hosts [1 ]),
93- '--dependency' , 'after:' + ':' .join (jobids ), '--wait' , '/dev/stdin' ],
98+ wait = subprocess .Popen ([args .sbatch , '--nodes=1' , '--nodelist={0}' .format (args .hosts [1 ])] +
99+ ['--dependency' , 'after:' + after ] if len (after ) != 0 else [] +
100+ ['--wait' , '/dev/stdin' ],
94101 stdin = subprocess .PIPE ,
95102 stdout = subprocess .DEVNULL , # Python 3.3
96103 shell = True )
@@ -152,36 +159,58 @@ def group_dirs(dirs, splits, sort):
152159
153160# Step 3
154161# Write the group to a per-node file and start jobs
162+ # pylint: disable=too-many-locals
155163def schedule_subtrees (args , dir_count , group_size , groups , subtree_cmd ):
156164 targets = args .hosts [0 ]
157- print ( 'Splitting {0} paths into {1} groups of max size {2}' . format ( dir_count ,
158- len ( targets ),
159- group_size ))
165+
166+ # pylint: disable=unnecessary-lambda-assignment
167+ gen_filename = lambda idx : os . path . realpath ( '{0}.{1}' . format ( args . group_file_prefix , idx ))
160168
161169 procs = []
162- for i , (group , target ) in enumerate (zip (groups , targets )):
163- count = len (group )
170+ if args .use_existing_group_files :
171+ print ('Using existing files' )
172+ for i , target in enumerate (targets ):
173+ filename = gen_filename (i ) # files not mapped to targets are ignored
164174
165- if count == 0 :
166- break
175+ # not checking for existance of file - if it doesn't exist, -D will fail
167176
168- print (' Range {0}: {1} path{2} on {3}' .format (i , count , 's' if count != 1 else '' , target ))
169- print (' {0} {1}' .format (group [0 ], group [- 1 ]))
177+ print (' Range {0}: Contents of {1} on {2}' .format (i , filename , target ))
170178
171- if args .dry_run :
172- continue
179+ if args .dry_run :
180+ continue
181+
182+ cmd = subtree_cmd (args , filename , i , target )
173183
174- # write group to per-node file
175- filename = os .path .realpath ('{0}.{1}' .format (args .group_file_prefix , i ))
176- with open (filename , 'w' , encoding = 'utf-8' ) as f :
177- for path in group :
178- f .write (path )
179- f .write ('\n ' )
184+ # run the command to process the subtree
185+ procs += [DISTRIBUTORS [args .distributor ][0 ](args , target , cmd )]
186+ else :
187+ print ('Splitting {0} paths into {1} groups of max size {2}' .format (dir_count ,
188+ len (targets ),
189+ group_size ))
190+ for i , (group , target ) in enumerate (zip (groups , targets )):
180191
181- cmd = subtree_cmd ( args , filename , i , target )
192+ count = len ( group )
182193
183- # run the command to process the subtree
184- procs += [DISTRIBUTORS [args .distributor ][0 ](args , target , cmd )]
194+ if count == 0 :
195+ break
196+
197+ print (' Range {0}: {1} path{2} on {3}' .format (i , count , 's' if count != 1 else '' , target ))
198+ print (' {0} {1}' .format (group [0 ], group [- 1 ]))
199+
200+ if args .dry_run :
201+ continue
202+
203+ # write group to per-node file
204+ filename = gen_filename (i )
205+ with open (filename , 'w' , encoding = 'utf-8' ) as f :
206+ for path in group :
207+ f .write (path )
208+ f .write ('\n ' )
209+
210+ cmd = subtree_cmd (args , filename , i , target )
211+
212+ # run the command to process the subtree
213+ procs += [DISTRIBUTORS [args .distributor ][0 ](args , target , cmd )]
185214
186215 return procs
187216
@@ -221,8 +250,13 @@ def clock_diff(start, end):
221250def distribute_work (args , root , schedule_subtree_func , schedule_top_func ):
222251 start = clock ()
223252
224- dirs = dirs_at_level (root , args .level )
225- group_size , groups = group_dirs (dirs , len (args .hosts [0 ]), args .sort )
253+ if args .use_existing_group_files :
254+ dirs = []
255+ group_size = None
256+ groups = None
257+ else :
258+ dirs = dirs_at_level (root , args .level )
259+ group_size , groups = group_dirs (dirs , len (args .hosts [0 ]), args .sort )
226260
227261 # launch jobs in parallel
228262 procs = schedule_subtrees (args , len (dirs ), group_size , groups ,
@@ -286,11 +320,15 @@ def parse_args(name, desc):
286320
287321 parser .add_argument ('--dry-run' , action = 'store_true' )
288322
289- parser .add_argument ('--group_file_prefix ' , metavar = 'path' ,
323+ parser .add_argument ('--group-file-prefix ' , metavar = 'path' ,
290324 type = str ,
291325 default = 'path_list' ,
292326 help = 'prefix for file containing paths to be processed by one node' )
293327
328+ parser .add_argument ('--use-existing-group-files' ,
329+ action = 'store_true' ,
330+ help = 'use existing group files (up to the number of targets) instead of running find(1)' )
331+
294332 parser .add_argument ('--sort' ,
295333 choices = SORT_DIRS .keys (),
296334 default = 'path' ,
0 commit comments