Skip to content

Commit bf83dbe

Browse files
authored
RemoteSlurmJob: add support for passing addl_slurm_kwargs (0.2.0 release) (#4)
* RemoteSlurm: implement addl_slurm_kwargs
1 parent 7b08942 commit bf83dbe

File tree

3 files changed

+35
-12
lines changed

3 files changed

+35
-12
lines changed

crimpl/common.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import json as _json
66
from time import sleep as _sleep
77

8-
__version__ = '0.1.0'
8+
__version__ = '0.2.0'
99

1010
def _new_job_name():
1111
return _datetime.now().strftime('%Y.%m.%d-%H.%M.%S')
@@ -18,9 +18,9 @@ def _run_cmd(cmd, detach=False, log_cmd=True, allow_retries=True):
1818
while True:
1919
try:
2020
if detach:
21-
ret = _subprocess.Popen(cmd, shell=True, stderr=_subprocess.DEVNULL)
21+
ret = _subprocess.Popen(cmd, shell=True, stderr=_subprocess.STDOUT)
2222
else:
23-
ret = _subprocess.check_output(cmd, shell=True, stderr=_subprocess.DEVNULL).decode('utf-8').strip()
23+
ret = _subprocess.check_output(cmd, shell=True, stderr=_subprocess.STDOUT).decode('utf-8').strip()
2424
except _subprocess.CalledProcessError as err:
2525
# print("error output: {}".format(err.output))
2626
if allow_retries and err.returncode == 255 and i < 5:
@@ -315,10 +315,18 @@ def _submit_script_cmds(self, script, files, ignore_files,
315315
elif not self.conda_installed and conda_env is not False:
316316
raise ValueError("conda is not installed on the remote server. Install manually or call server.install_conda()")
317317

318-
_slurm_kwarg_to_prefix = {'nprocs': '-n ',
319-
'walltime': '-t ',
320-
'mail_type': '--mail-type=',
321-
'mail_user': '--mail-user='}
318+
def _slurm_kwarg_to_prefix(k):
319+
exceptions = {'nprocs': '-n ',
320+
'walltime': '-t ',
321+
'mail_type': '--mail-type=',
322+
'mail_user': '--mail-user='}
323+
if k in exceptions.keys():
324+
return exceptions.get(k)
325+
elif len(k) == 1:
326+
return f"-{k} "
327+
else:
328+
return f"--{k}="
329+
322330

323331
create_env_cmd, conda_env_path = self._create_conda_env(conda_env, isolate_env, job_name=job_name, check_if_exists=True, run_cmd=False)
324332

@@ -336,7 +344,7 @@ def _submit_script_cmds(self, script, files, ignore_files,
336344
sched_script += ["#SBATCH -J {}".format(job_name)]
337345
for k,v in sched_kwargs.items():
338346
if v is None: continue
339-
prefix = _slurm_kwarg_to_prefix.get(k, False)
347+
prefix = _slurm_kwarg_to_prefix(k)
340348
if prefix is False:
341349
raise NotImplementedError("slurm command for {} not implemented".format(k))
342350
if k=='mail_type' and isinstance(v, list):

crimpl/remoteslurm.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ def submit_script(self, script, files=[],
258258
walltime='2-00:00:00',
259259
mail_type='END,FAIL',
260260
mail_user=None,
261+
addl_slurm_kwargs={},
261262
ignore_files=[],
262263
wait_for_job_status=False,
263264
trial_run=False):
@@ -301,6 +302,10 @@ def submit_script(self, script, files=[],
301302
* `mail_user` (string, optional, default=None): email to send notifications.
302303
If not provided or None, will default to the value in <RemoteSlurmServer.mail_user>.
303304
Prepended to `script` as "#SBATCH --mail_user=mail_user"
305+
* `addl_slurm_kwargs` (dict, optional, default={}): additional kwargs
306+
to pass to slurm. Entries will be prepended to `script` as
307+
"#SBATCH -<k> <v>" or "#SBATCH --<k>=<v>" depending on whether the
308+
key (`k`) is a single character or multiple characters, respectively.
304309
* `ignore_files` (list, optional, default=[]): list of filenames on the
305310
remote server to ignore when calling <<class>.check_output>
306311
* `wait_for_job_status` (bool or string or list, optional, default=False):
@@ -341,7 +346,8 @@ def submit_script(self, script, files=[],
341346
nprocs=nprocs,
342347
walltime=walltime,
343348
mail_type=mail_type,
344-
mail_user=mail_user if mail_user is not None else self.server.mail_user)
349+
mail_user=mail_user if mail_user is not None else self.server.mail_user,
350+
**addl_slurm_kwargs)
345351

346352
if trial_run:
347353
return cmds
@@ -351,7 +357,13 @@ def submit_script(self, script, files=[],
351357
# TODO: get around need to add IP to known hosts (either by
352358
# expecting and answering yes, or by looking into subnet options)
353359

354-
out = self.server._run_server_cmd(cmd)
360+
try:
361+
out = self.server._run_server_cmd(cmd)
362+
except _subprocess.CalledProcessError as e:
363+
if addl_slurm_kwargs:
364+
raise ValueError(f"failed to submit to scheduler, addl_slurm_kwargs may be invalid. Original error: {e.output}")
365+
raise ValueError(f"failed to submit to scheduler. Original error: {e.output}")
366+
355367
if "sbatch" in cmd:
356368
self._slurm_id = out.split(' ')[-1]
357369

@@ -507,6 +519,7 @@ def submit_job(self, script, files=[],
507519
walltime='2-00:00:00',
508520
mail_type='END,FAIL',
509521
mail_user=None,
522+
addl_slurm_kwargs={},
510523
ignore_files=[],
511524
wait_for_job_status=False,
512525
trial_run=False):
@@ -525,6 +538,7 @@ def submit_job(self, script, files=[],
525538
* `walltime`: passed to <RemoteSlurmJob.submit_script>
526539
* `mail_type`: passed to <RemoteSlurmJob.submit_script>
527540
* `mail_user`: passed to <RemoteSlurmJob.submit_script>
541+
* `addl_slurm_kwargs': pass to <RemoteSlurmJob.submit_script>`
528542
* `ignore_files`: passed to <RemoteSlurmJob.submit_script>
529543
* `wait_for_job_status`: passed to <RemoteSlurmJob.submit_script>
530544
* `trial_run`: passed to <RemoteSlurmJob.submit_script>
@@ -543,6 +557,7 @@ def submit_job(self, script, files=[],
543557
walltime=walltime,
544558
mail_type=mail_type,
545559
mail_user=mail_user,
560+
addl_slurm_kwargs=addl_slurm_kwargs,
546561
ignore_files=ignore_files,
547562
wait_for_job_status=wait_for_job_status,
548563
trial_run=trial_run)

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66
long_description = fh.read()
77

88
setup(name='crimpl',
9-
version='0.1.0',
9+
version='0.2.0',
1010
description='Compute Resources Made Simple',
1111
long_description=long_description,
1212
long_description_content_type="text/markdown",
1313
author='Kyle Conroy',
1414
author_email='[email protected]',
1515
url='https://www.github.com/kecnry/crimpl',
16-
download_url = 'https://github.com/kecnry/crimpl/tarball/0.1.0',
16+
download_url = 'https://github.com/kecnry/crimpl/tarball/0.2.0',
1717
packages=['crimpl'],
1818
install_requires=['boto3'],
1919
classifiers=[

0 commit comments

Comments
 (0)