Skip to content

Commit 133d884

Browse files
committed
apply review
1 parent cbbaa26 commit 133d884

File tree

1 file changed

+71
-8
lines changed

1 file changed

+71
-8
lines changed

tests/cmdline/commands/test_process.py

Lines changed: 71 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from aiida.common.log import LOG_LEVEL_REPORT
2626
from aiida.engine import Process, ProcessState
2727
from aiida.engine.processes import control as process_control
28+
from aiida.engine.utils import exponential_backoff_retry
2829
from aiida.orm import CalcJobNode, Group, WorkChainNode, WorkflowNode, WorkFunctionNode
2930
from tests.utils.processes import WaitProcess
3031

@@ -53,6 +54,7 @@ def start_daemon_worker_in_foreground_and_redirect_streams(
5354

5455
try:
5556
pid = os.getpid()
57+
# For easier debugging you can change these to stdout
5658
sys.stdout = open(log_dir / f'worker-{pid}.out', 'w')
5759
sys.stderr = open(log_dir / f'worker-{pid}.err', 'w')
5860
start_daemon_worker(False, aiida_profile_name)
@@ -72,10 +74,22 @@ def mock_open(_):
7274
raise Exception('Mock open exception')
7375

7476
@staticmethod
75-
async def mock_exponential_backoff_retry(*_, **__):
77+
async def exponential_backoff_retry_fail_upload(fct: t.Callable[..., t.Any], *args, **kwargs):
7678
from aiida.common.exceptions import TransportTaskException
7779

78-
raise TransportTaskException
80+
if 'do_upload' in fct.__name__:
81+
raise TransportTaskException
82+
else:
83+
return await exponential_backoff_retry(fct, *args, **kwargs)
84+
85+
@staticmethod
86+
async def exponential_backoff_retry_fail_kill(fct: t.Callable[..., t.Any], *args, **kwargs):
87+
from aiida.common.exceptions import TransportTaskException
88+
89+
if 'do_kill' in fct.__name__:
90+
raise TransportTaskException
91+
else:
92+
return await exponential_backoff_retry(fct, *args, **kwargs)
7993

8094

8195
@pytest.fixture(scope='function')
@@ -213,11 +227,12 @@ def make_a_builder(sleep_seconds=0):
213227

214228
@pytest.mark.requires_rmq
215229
@pytest.mark.usefixtures('started_daemon_client')
216-
def test_process_kill_failng_ebm(
230+
def test_process_kill_failing_ebm_upload(
217231
fork_worker_context, submit_and_await, aiida_code_installed, run_cli_command, monkeypatch
218232
):
219-
"""9) Kill a process that is paused after EBM (5 times failed). It should be possible to kill it normally.
220-
# (e.g. in scenarios that transport is working again)
233+
"""Kill a process that is waiting after failed EBM during upload. It should be possible to kill it normally.
234+
235+
A process that failed upload (e.g. in scenarios that transport is working again) and is then killed with
221236
"""
222237
from aiida.orm import Int
223238

@@ -232,7 +247,10 @@ def make_a_builder(sleep_seconds=0):
232247

233248
kill_timeout = 10
234249

235-
monkeypatch_args = ('aiida.engine.utils.exponential_backoff_retry', MockFunctions.mock_exponential_backoff_retry)
250+
monkeypatch_args = (
251+
'aiida.engine.utils.exponential_backoff_retry',
252+
MockFunctions.exponential_backoff_retry_fail_upload,
253+
)
236254
with fork_worker_context(monkeypatch.setattr, monkeypatch_args):
237255
node = submit_and_await(make_a_builder(), ProcessState.WAITING)
238256
await_condition(
@@ -241,11 +259,56 @@ def make_a_builder(sleep_seconds=0):
241259
timeout=kill_timeout,
242260
)
243261

244-
# should restart EBM and be again not successful
262+
# kill should start EBM and should successfully kill
263+
run_cli_command(cmd_process.process_kill, [str(node.pk), '--wait'])
264+
await_condition(lambda: node.is_killed, timeout=kill_timeout)
265+
266+
267+
@pytest.mark.requires_rmq
268+
@pytest.mark.usefixtures('started_daemon_client')
269+
def test_process_kill_failing_ebm_kill(
270+
fork_worker_context, submit_and_await, aiida_code_installed, run_cli_command, monkeypatch
271+
):
272+
"""Kill a process that with a failng EBM during the kill.
273+
274+
Killing a process tries to gracefully cancel the job on the remote node. If there are connection problems it retries
275+
it in using the EBM. If this fails another kill command can be send to restart the cancelation of the job scheduler.
276+
"""
277+
from aiida.orm import Int
278+
279+
code = aiida_code_installed(default_calc_job_plugin='core.arithmetic.add', filepath_executable='/bin/bash')
280+
281+
def make_a_builder(sleep_seconds=0):
282+
builder = code.get_builder()
283+
builder.x = Int(1)
284+
builder.y = Int(1)
285+
builder.metadata.options.sleep = sleep_seconds
286+
return builder
287+
288+
kill_timeout = 10
289+
290+
monkeypatch_args = (
291+
'aiida.engine.utils.exponential_backoff_retry',
292+
MockFunctions.exponential_backoff_retry_fail_kill,
293+
)
294+
# from aiida.engine.utils import exponential_backoff_retry
295+
# monkeypatch_args = ('aiida.engine.utils.exponential_backoff_retry', exponential_backoff_retry)
296+
with fork_worker_context(monkeypatch.setattr, monkeypatch_args):
297+
node = submit_and_await(make_a_builder(kill_timeout + 10), ProcessState.WAITING, timeout=kill_timeout)
298+
await_condition(
299+
lambda: node.process_status == 'Monitoring scheduler: job state RUNNING',
300+
timeout=kill_timeout,
301+
)
302+
303+
# kill should start EBM and be not successful in EBM
304+
run_cli_command(cmd_process.process_kill, [str(node.pk), '--wait'])
305+
await_condition(lambda: not node.is_killed, timeout=kill_timeout)
306+
307+
# kill should restart EBM and be not successful in EBM
245308
run_cli_command(cmd_process.process_kill, [str(node.pk), '--wait'])
246309
await_condition(lambda: not node.is_killed, timeout=kill_timeout)
247310

248-
# should skip EBM and successfully kill the process
311+
# force kill should skip EBM and successfully kill the process
249312
run_cli_command(cmd_process.process_kill, [str(node.pk), '-F', '--wait'])
250313
await_condition(lambda: node.is_killed, timeout=kill_timeout)
251314

0 commit comments

Comments
 (0)