Skip to content

Test Failure on Python 3.9: test_multiprocessing_gpu_stub_multi_cpu_deterministic_backend #403

@LucasDedieu

Description

@LucasDedieu

I attempted to run the Pytest CI workflow via GitHub, and the test_multiprocessing_gpu_stub_multi_cpu_deterministic_backend test failed when using Python 3.9.

Here are the logs:


=================================== FAILURES ===================================
________ test_multiprocessing_gpu_stub_multi_cpu_deterministic_backend _________

self = <edsnlp.processing.multiprocessing.MultiprocessingStreamExecutor object at 0x7f0369dc54f0>

    def dequeue_outputs(self):
        try:
            bar = tqdm(
                smoothing=0.1,
                mininterval=1.0,
                disable=not self.stream.show_progress,
            )
            with bar:
>               for item, count in self.iter_outputs():

edsnlp/processing/multiprocessing.py:1056: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <edsnlp.processing.multiprocessing.MultiprocessingStreamExecutor object at 0x7f0369dc54f0>
stop_mode = False

    def iter_outputs(self, stop_mode=False):
        deterministic = self.stream.deterministic
        requires_sentinel = (
            hasattr(self.stream.writer, "batch_fn")
            and getattr(self.stream.writer.batch_fn, "requires_sentinel", None)
            and not self.stream.writer.write_in_worker
        )
        missing_sentinels = len(self.cpu_worker_names) if requires_sentinel else 0
        buffer = []
        while self.num_alive_workers > 0:
            if self.stopped and not stop_mode:  # pragma: no cover
>               raise StopSignal()
E               edsnlp.processing.multiprocessing.StopSignal

edsnlp/processing/multiprocessing.py:1079: StopSignal

During handling of the above exception, another exception occurred:

frozen_ml_nlp = Pipeline(lang=eds, pipes={
  "sentences": eds.sentences,
  "transformer": eds.transformer,
  "ner": eds.ner_crf
})

    def test_multiprocessing_gpu_stub_multi_cpu_deterministic_backend(frozen_ml_nlp):
        text1 = "Exemple"
        text2 = "Ceci est un autre exemple"
        text3 = "Ceci est un très long exemple ! Regardez tous ces mots !"
        texts = [text1, text2, text3] * 100
        random.Random(42).shuffle(texts)
        stream = frozen_ml_nlp.pipe(iter(texts))
        stream = stream.set_processing(
            batch_size="15 words",
            num_gpu_workers=1,
            num_cpu_workers=2,
            deterministic=True,
            # show_progress=True,
            # just to test in gpu-less environments
            gpu_worker_devices=["cpu"],
        )
>       list(stream)

tests/processing/test_backends.py:[194](https://github.com/aphp/edsnlp/actions/runs/14489401687/job/40642129395?pr=398#step:11:195): 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
edsnlp/processing/multiprocessing.py:1066: in dequeue_outputs
    self.teardown()
edsnlp/processing/multiprocessing.py:1222: in teardown
    for _ in self.iter_outputs(stop_mode=True):
edsnlp/processing/multiprocessing.py:1129: in iter_outputs
    raise self.error
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <edsnlp.processing.multiprocessing.MultiprocessingStreamExecutor object at 0x7f0369dc54f0>

    def dequeue_outputs(self):
        try:
            bar = tqdm(
                smoothing=0.1,
                mininterval=1.0,
                disable=not self.stream.show_progress,
            )
            with bar:
                for item, count in self.iter_outputs():
                    bar.update(count)
                    yield item
        except StopSignal:
            if self.error:
>               raise self.error
E               KeyError: '87127032-cpu1'

edsnlp/processing/multiprocessing.py:1061: KeyError
---------------------------- Captured stderr setup -----------------------------
INFO:py4j.clientserver:Closing down clientserver connection
----------------------------- Captured stdout call -----------------------------
Error in cpu1:
Traceback (most recent call last):
  File "/home/runner/work/edsnlp/edsnlp/edsnlp/processing/multiprocessing.py", line 383, in run_stage_thread
    self.process_items(stage)
  File "/home/runner/work/edsnlp/edsnlp/edsnlp/processing/multiprocessing.py", line 498, in process_items
    self.preprocess_before_forward(items, stage)
  File "/home/runner/work/edsnlp/edsnlp/edsnlp/processing/multiprocessing.py", line 622, in preprocess_before_forward
    batch = torch_pipe.batch_to_device(batch, device=device)
  File "/home/runner/work/edsnlp/edsnlp/edsnlp/core/torch_component.py", line 89, in wrapped
    cache = _caches[self._current_cache_id]
KeyError: '87127032-cpu1'

----------------------------- Captured stderr call -----------------------------
INFO:root:Switching process start method to spawn
INFO:root:Running 2 CPU workers and 1 GPU workers on ['cpu'] in spawn mode to run 3 stages.
------------------------------ Captured log call -------------------------------
INFO     root:multiprocessing.py:1340 Switching process start method to spawn
INFO     root:multiprocessing.py:957 Running 2 CPU workers and 1 GPU workers on ['cpu'] in spawn mode to run 3 stages.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions