Skip to content

Commit

Permalink
RemoteSlurmJob: add support for passing addl_slurm_kwargs (0.2.0 rele…
Browse files Browse the repository at this point in the history
…ase) (#4)

* RemoteSlurm: implement addl_slurm_kwargs
  • Loading branch information
kecnry authored Jun 24, 2022
1 parent 7b08942 commit bf83dbe
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 12 deletions.
24 changes: 16 additions & 8 deletions crimpl/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import json as _json
from time import sleep as _sleep

__version__ = '0.1.0'
__version__ = '0.2.0'

def _new_job_name():
return _datetime.now().strftime('%Y.%m.%d-%H.%M.%S')
Expand All @@ -18,9 +18,9 @@ def _run_cmd(cmd, detach=False, log_cmd=True, allow_retries=True):
while True:
try:
if detach:
ret = _subprocess.Popen(cmd, shell=True, stderr=_subprocess.DEVNULL)
ret = _subprocess.Popen(cmd, shell=True, stderr=_subprocess.STDOUT)
else:
ret = _subprocess.check_output(cmd, shell=True, stderr=_subprocess.DEVNULL).decode('utf-8').strip()
ret = _subprocess.check_output(cmd, shell=True, stderr=_subprocess.STDOUT).decode('utf-8').strip()
except _subprocess.CalledProcessError as err:
# print("error output: {}".format(err.output))
if allow_retries and err.returncode == 255 and i < 5:
Expand Down Expand Up @@ -315,10 +315,18 @@ def _submit_script_cmds(self, script, files, ignore_files,
elif not self.conda_installed and conda_env is not False:
raise ValueError("conda is not installed on the remote server. Install manually or call server.install_conda()")

_slurm_kwarg_to_prefix = {'nprocs': '-n ',
'walltime': '-t ',
'mail_type': '--mail-type=',
'mail_user': '--mail-user='}
def _slurm_kwarg_to_prefix(k):
exceptions = {'nprocs': '-n ',
'walltime': '-t ',
'mail_type': '--mail-type=',
'mail_user': '--mail-user='}
if k in exceptions.keys():
return exceptions.get(k)
elif len(k) == 1:
return f"-{k} "
else:
return f"--{k}="


create_env_cmd, conda_env_path = self._create_conda_env(conda_env, isolate_env, job_name=job_name, check_if_exists=True, run_cmd=False)

Expand All @@ -336,7 +344,7 @@ def _submit_script_cmds(self, script, files, ignore_files,
sched_script += ["#SBATCH -J {}".format(job_name)]
for k,v in sched_kwargs.items():
if v is None: continue
prefix = _slurm_kwarg_to_prefix.get(k, False)
prefix = _slurm_kwarg_to_prefix(k)
if prefix is False:
raise NotImplementedError("slurm command for {} not implemented".format(k))
if k=='mail_type' and isinstance(v, list):
Expand Down
19 changes: 17 additions & 2 deletions crimpl/remoteslurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ def submit_script(self, script, files=[],
walltime='2-00:00:00',
mail_type='END,FAIL',
mail_user=None,
addl_slurm_kwargs={},
ignore_files=[],
wait_for_job_status=False,
trial_run=False):
Expand Down Expand Up @@ -301,6 +302,10 @@ def submit_script(self, script, files=[],
* `mail_user` (string, optional, default=None): email to send notifications.
If not provided or None, will default to the value in <RemoteSlurmServer.mail_user>.
Prepended to `script` as "#SBATCH --mail_user=mail_user"
* `addl_slurm_kwargs` (dict, optional, default={}): additional kwargs
to pass to slurm. Entries will be prepended to `script` as
"#SBATCH -<k> <v>" or "#SBATCH --<k>=<v>" depending on whether the
key (`k`) is a single character or multiple characters, respectively.
* `ignore_files` (list, optional, default=[]): list of filenames on the
remote server to ignore when calling <<class>.check_output>
* `wait_for_job_status` (bool or string or list, optional, default=False):
Expand Down Expand Up @@ -341,7 +346,8 @@ def submit_script(self, script, files=[],
nprocs=nprocs,
walltime=walltime,
mail_type=mail_type,
mail_user=mail_user if mail_user is not None else self.server.mail_user)
mail_user=mail_user if mail_user is not None else self.server.mail_user,
**addl_slurm_kwargs)

if trial_run:
return cmds
Expand All @@ -351,7 +357,13 @@ def submit_script(self, script, files=[],
# TODO: get around need to add IP to known hosts (either by
# expecting and answering yes, or by looking into subnet options)

out = self.server._run_server_cmd(cmd)
try:
out = self.server._run_server_cmd(cmd)
except _subprocess.CalledProcessError as e:
if addl_slurm_kwargs:
raise ValueError(f"failed to submit to scheduler, addl_slurm_kwargs may be invalid. Original error: {e.output}")
raise ValueError(f"failed to submit to scheduler. Original error: {e.output}")

if "sbatch" in cmd:
self._slurm_id = out.split(' ')[-1]

Expand Down Expand Up @@ -507,6 +519,7 @@ def submit_job(self, script, files=[],
walltime='2-00:00:00',
mail_type='END,FAIL',
mail_user=None,
addl_slurm_kwargs={},
ignore_files=[],
wait_for_job_status=False,
trial_run=False):
Expand All @@ -525,6 +538,7 @@ def submit_job(self, script, files=[],
* `walltime`: passed to <RemoteSlurmJob.submit_script>
* `mail_type`: passed to <RemoteSlurmJob.submit_script>
* `mail_user`: passed to <RemoteSlurmJob.submit_script>
* `addl_slurm_kwargs': pass to <RemoteSlurmJob.submit_script>`
* `ignore_files`: passed to <RemoteSlurmJob.submit_script>
* `wait_for_job_status`: passed to <RemoteSlurmJob.submit_script>
* `trial_run`: passed to <RemoteSlurmJob.submit_script>
Expand All @@ -543,6 +557,7 @@ def submit_job(self, script, files=[],
walltime=walltime,
mail_type=mail_type,
mail_user=mail_user,
addl_slurm_kwargs=addl_slurm_kwargs,
ignore_files=ignore_files,
wait_for_job_status=wait_for_job_status,
trial_run=trial_run)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@
long_description = fh.read()

setup(name='crimpl',
version='0.1.0',
version='0.2.0',
description='Compute Resources Made Simple',
long_description=long_description,
long_description_content_type="text/markdown",
author='Kyle Conroy',
author_email='[email protected]',
url='https://www.github.com/kecnry/crimpl',
download_url = 'https://github.com/kecnry/crimpl/tarball/0.1.0',
download_url = 'https://github.com/kecnry/crimpl/tarball/0.2.0',
packages=['crimpl'],
install_requires=['boto3'],
classifiers=[
Expand Down

0 comments on commit bf83dbe

Please sign in to comment.