Skip to content

self.predictor.run() RuntimeError: Unknown exception #16164

@CroMarmot

Description

@CroMarmot

🔎 Search before asking

  • I have searched the PaddleOCR Docs and found no similar bug report.
  • I have searched the PaddleOCR Issues and found no similar bug report.
  • I have searched the PaddleOCR Discussions and found no similar bug report.

🐛 Bug (问题描述)

exception输出和这些issue相同, 搜索了“Unknown exception”

#16088
#15393 (comment)
#16050

$ hatch run bug
信息: 用提供的模式无法找到文件。
C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddle\utils\cpp_extension\extension_utils.py:715: UserWarning: No ccache found. Please be aware that recompiling all source files may be required. You can download and install ccache from: https://github.com/ccache/ccache/blob/master/doc/INSTALL.md
  warnings.warn(warning_message)
Creating model: ('PP-OCRv5_server_det', None)
Using official model (PP-OCRv5_server_det), the model files will be automatically downloaded and saved in C:\Users\yexia\.paddlex\official_models.
Creating model: ('PP-OCRv5_server_rec', None)
Using official model (PP-OCRv5_server_rec), the model files will be automatically downloaded and saved in C:\Users\yexia\.paddlex\official_models.
len(files)=12
OCR 1/12 shot_20250730153228.png
len(new_links)=196
len(existing_urls)=0
len(updated_urls)=196
append success
OCR 2/12 shot_20250730153326.png
len(new_links)=49
len(existing_urls)=196
len(updated_urls)=232
append success
OCR 3/12 shot_20250730153329.png
len(new_links)=137
len(existing_urls)=232
len(updated_urls)=263
append success
OCR 4/12 shot_20250730153333.png
len(new_links)=48
len(existing_urls)=263
len(updated_urls)=291
append success
OCR 5/12 shot_20250730153337.png
[2025-07-30 16:05:48,642] [   ERROR] main.py:57 - Error
Traceback (most recent call last):
  File "C:\Users\yexia\Documents\github\pobr\src\pobr\main.py", line 55, in ocr_and_extract_links
    yield await asyncio.to_thread(ocr_scan, str(img_path))
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\yexia\AppData\Local\Programs\Python\Python313\Lib\asyncio\threads.py", line 25, in to_thread
    return await loop.run_in_executor(None, func_call)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\yexia\AppData\Local\Programs\Python\Python313\Lib\concurrent\futures\thread.py", line 59, in run
    result = self.fn(*self.args, **self.kwargs)
  File "C:\Users\yexia\Documents\github\pobr\src\pobr\main.py", line 41, in ocr_scan
    return list(pocr.predict(str(image_path))[0]["rec_texts"])
                ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddleocr\_pipelines\ocr.py", line 208, in predict
    return list(
        self.predict_iter(
    ...<10 lines>...
        )
    )
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\pipelines\_parallel.py", line 129, in predict
    yield from self._pipeline.predict(
    ...<3 lines>...
    )
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\pipelines\ocr\pipeline.py", line 350, in predict
    det_results = list(
        self.text_det_model(doc_preprocessor_images, **text_det_params)
    )
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\base\predic    )
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\pipelines\ocr\pipeline.py", line 350, in predict
    det_results = list(
        self.text_det_model(doc_preprocessor_images, **text_det_params)
    )
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\base\predic  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\pipelines\ocr\pipeline.py", line 350, in predict
    det_results = list(
        self.text_det_model(doc_preprocessor_images, **text_det_params)
    )
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\base\predic    det_results = list(
        self.text_det_model(doc_preprocessor_images, **text_det_params)
    )
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\base\predic    )
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\base\predictor\base_predictor.py", line 211, in __call__
tor\base_predictor.py", line 211, in __call__
    yield from self.apply(input, **kwargs)
    yield from self.apply(input, **kwargs)
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\base\predic  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\base\predictor\base_predictor.py", line 267, in apply
    prediction = self.process(batch_data, **kwargs)
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\text_detect  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\text_detection\predictor.py", line 105, in process
    batch_preds = self.infer(x=x)
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\common\static_infer.py", line 287, in __call__
    pred = self.infer(x)
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\common\static_infer.py", line 252, in __call__
    self.predictor.run()
    ~~~~~~~~~~~~~~~~~~^^
RuntimeError: Unknown exception
OCR 6/12 shot_20250730153340.png
len(new_links)=43
len(existing_urls)=291
len(updated_urls)=314
append success
OCR 7/12 shot_20250730153344.png
[2025-07-30 16:06:03,905] [   ERROR] main.py:57 - Error
Traceback (most recent call last):
  File "C:\Users\yexia\Documents\github\pobr\src\pobr\main.py", line 55, in ocr_and_extract_links
    yield await asyncio.to_thread(ocr_scan, str(img_path))
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\yexia\AppData\Local\Programs\Python\Python313\Lib\asyncio\threads.py", line 25, in to_thread
    return await loop.run_in_executor(None, func_call)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\yexia\AppData\Local\Programs\Python\Python313\Lib\concurrent\futures\thread.py", line 59, in run
    result = self.fn(*self.args, **self.kwargs)
  File "C:\Users\yexia\Documents\github\pobr\src\pobr\main.py", line 41, in ocr_scan
    return list(pocr.predict(str(image_path))[0]["rec_texts"])
                ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddleocr\_pipelines\ocr.py", line 208, in predict
    return list(
        self.predict_iter(
    ...<10 lines>...
        )
    )
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\pipelines\_parallel.py", line 129, in predict
    yield from self._pipeline.predict(
    ...<3 lines>...
    )
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\pipelines\ocr\pipeline.py", line 350, in predict
    det_results = list(
        self.text_det_model(doc_preprocessor_images, **text_det_params)
    )
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\base\predictor\base_predictor.py", line 211, in __call__
    yield from self.apply(input, **kwargs)
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\base\predictor\base_predictor.py", line 267, in apply
    prediction = self.process(batch_data, **kwargs)
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\text_detection\predictor.py", line 105, in process
    batch_preds = self.infer(x=x)
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\common\static_infer.py", line 287, in __call__
    pred = self.infer(x)
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\common\static_infer.py", line 252, in __call__
    self.predictor.run()
    ~~~~~~~~~~~~~~~~~~^^
RuntimeError: Unknown exception
OCR 8/12 shot_20250730153347.png
len(new_links)=46
len(existing_urls)=314
len(updated_urls)=339
append success
OCR 9/12 shot_20250730153351.png
len(new_links)=46
len(existing_urls)=339
len(updated_urls)=364
append success
OCR 10/12 shot_20250730153354.png
[2025-07-30 16:06:28,046] [   ERROR] main.py:57 - Error
Traceback (most recent call last):
  File "C:\Users\yexia\Documents\github\pobr\src\pobr\main.py", line 55, in ocr_and_extract_links
    yield await asyncio.to_thread(ocr_scan, str(img_path))
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\yexia\AppData\Local\Programs\Python\Python313\Lib\asyncio\threads.py", line 25, in to_thread
    return await loop.run_in_executor(None, func_call)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\yexia\AppData\Local\Programs\Python\Python313\Lib\concurrent\futures\thread.py", line 59, in run
    result = self.fn(*self.args, **self.kwargs)
  File "C:\Users\yexia\Documents\github\pobr\src\pobr\main.py", line 41, in ocr_scan
    return list(pocr.predict(str(image_path))[0]["rec_texts"])
                ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddleocr\_pipelines\ocr.py", line 208, in predict
    return list(
        self.predict_iter(
    ...<10 lines>...
        )
    )
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\pipelines\_parallel.py", line 129, in predict
    yield from self._pipeline.predict(
    ...<3 lines>...
    )
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\pipelines\ocr\pipeline.py", line 350, in predict
    det_results = list(
        self.text_det_model(doc_preprocessor_images, **text_det_params)
    )
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\base\predictor\base_predictor.py", line 211, in __call__
    yield from self.apply(input, **kwargs)
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\base\predictor\base_predictor.py", line 267, in apply
    prediction = self.process(batch_data, **kwargs)
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\text_detection\predictor.py", line 105, in process
    batch_preds = self.infer(x=x)
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\common\static_infer.py", line 287, in __call__
    pred = self.infer(x)
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\common\static_infer.py", line 252, in __call__
    self.predictor.run()
    ~~~~~~~~~~~~~~~~~~^^
RuntimeError: Unknown exception
OCR 11/12 shot_20250730153358.png
len(new_links)=42
len(existing_urls)=364
len(updated_urls)=387
append success
OCR 12/12 shot_20250730153401.png
[2025-07-30 16:06:40,444] [   ERROR] main.py:57 - Error
Traceback (most recent call last):
  File "C:\Users\yexia\Documents\github\pobr\src\pobr\main.py", line 55, in ocr_and_extract_links
    yield await asyncio.to_thread(ocr_scan, str(img_path))
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\yexia\AppData\Local\Programs\Python\Python313\Lib\asyncio\threads.py", line 25, in to_thread
    return await loop.run_in_executor(None, func_call)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\yexia\AppData\Local\Programs\Python\Python313\Lib\concurrent\futures\thread.py", line 59, in run
    result = self.fn(*self.args, **self.kwargs)
  File "C:\Users\yexia\Documents\github\pobr\src\pobr\main.py", line 41, in ocr_scan
    return list(pocr.predict(str(image_path))[0]["rec_texts"])
                ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddleocr\_pipelines\ocr.py", line 208, in predict
    return list(
        self.predict_iter(
    ...<10 lines>...
        )
    )
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\pipelines\_parallel.py", line 129, in predict
    yield from self._pipeline.predict(
    ...<3 lines>...
    )
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\pipelines\ocr\pipeline.py", line 350, in predict
    det_results = list(
        self.text_det_model(doc_preprocessor_images, **text_det_params)
    )
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\base\predictor\base_predictor.py", line 211, in __call__
    yield from self.apply(input, **kwargs)
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\base\predictor\base_predictor.py", line 267, in apply
    prediction = self.process(batch_data, **kwargs)
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\text_detection\predictor.py", line 105, in process
    batch_preds = self.infer(x=x)
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\common\static_infer.py", line 287, in __call__
    pred = self.infer(x)
  File "C:\Users\yexia\AppData\Local\hatch\env\virtual\pobr\CDbpi3Zz\pobr\Lib\site-packages\paddlex\inference\models\common\static_infer.py", line 252, in __call__
    self.predictor.run()
    ~~~~~~~~~~~~~~~~~~^^
RuntimeError: Unknown exception

🏃‍♂️ Environment (运行环境)

PS C:\Users\yexia> systeminfo
OS 名称:            Microsoft Windows 11 专业版
OS 版本:            10.0.26100 暂缺 Build 26100
$ hatch --version
Hatch, version 1.14.1

$ hatch run python --version
Python 3.13.2

$ hatch run pip list | grep paddle
paddleocr                3.1.0
paddlepaddle             3.1.0
paddlex                  3.1.3

RAM 16GB

CPU:

PS C:\Users\yexia> wmic cpu get name
Name
AMD Ryzen 7 5800H with Radeon Graphics

CUDA None

🌰 Minimal Reproducible Example (最小可复现问题的Demo)

https://github.com/CroMarmot/pobr

目前简化到单文件60行(含空格和print),其中一些看起来不相关的函数,注释掉后会影响是否出现报错

  • 例如 函数 append_urls_to_json 中的 写文件
  • 出现exception 并不“稳定”,在不做任何修改的情况下,运行多次,出现exception对应的处理可能是不同的,但这个pobr运行多次,每次都会有exception,只是出现的图片不同
  • 通过win11系统自带的任务管理器 观测了内存 始终有约3gb的剩余,但实际运行下来,似乎内存剩余更小时出现报错的可能性更高(只是感觉,没有量化)
  • 我有办法 具体的获得一个 内存不足 的异常吗?
import asyncio
import json
import logging
import re
from pathlib import Path

import aiofiles
from paddleocr import PaddleOCR


async def append_urls_to_json(urls: list[str], file_path: str) -> None:
    try:
        async with aiofiles.open(file_path, encoding="utf-8") as f:
            content = await f.read()
            if content.strip():  # 文件不为空
                existing_urls = json.loads(content)
                if not isinstance(existing_urls, list):
                    existing_urls = []
            else:  # 文件为空
                existing_urls = []
    except (FileNotFoundError, json.JSONDecodeError):
        existing_urls = []

    print(f"{len(existing_urls)=}")
    updated_urls = existing_urls + [url for url in urls if url not in existing_urls]
    print(f"{len(updated_urls)=}")

    async with aiofiles.open(file_path, "w", encoding="utf-8") as f:
        await f.write(json.dumps(updated_urls, ensure_ascii=False, indent=2))


async def main() -> None:
    pocr = PaddleOCR(
        lang="ch",
        use_doc_orientation_classify=False,
        use_doc_unwarping=False,
        use_textline_orientation=False,
    )

    def ocr_scan(image_path: str | Path):
        return list(pocr.predict(str(image_path))[0]["rec_texts"])

    async def ocr_and_extract_links():
        current_dir = Path()
        files = [
            p
            for p in current_dir.iterdir()
            if p.is_file() and re.fullmatch(r"shot_.*\.png", p.name)
        ]
        print(f"{len(files)=}")

        for idx, img_path in enumerate(files, start=1):
            print(f"OCR {idx}/{len(files)} {img_path}")
            try:
                yield await asyncio.to_thread(ocr_scan, str(img_path))
            except Exception:
                logging.exception("Error")

    async for new_links in ocr_and_extract_links():
        print(f"{len(new_links)=}")
        await append_urls_to_json(new_links, "bug.json")
        print(f"append success")


if __name__ == "__main__":
    asyncio.run(main())

运行了三次,第一次出现一个异常,第二次完全正常,第三次两次异常

2025-07-30_16-35-22_ffmpegzip.mp4

Metadata

Metadata

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions