File tree 2 files changed +4
-4
lines changed
src/instructlab/sdg/utils
2 files changed +4
-4
lines changed Original file line number Diff line number Diff line change 1
1
# SPDX-License-Identifier: Apache-2.0
2
2
click >= 8.1.7 ,< 9.0.0
3
3
datasets >= 2.18.0 ,< 3.0.0
4
- docling [tesserocr ]>= 2.4.2 , <= 2.8.3 ; sys_platform != 'darwin'
5
- docling >= 2.4.2 , <= 2.8.3 ; sys_platform == 'darwin'
6
- docling-parse >= 2.0.0 , < 3.0 .0
4
+ docling [tesserocr ]>= 2.9.0 ; sys_platform != 'darwin'
5
+ docling >= 2.9.0 ; sys_platform == 'darwin'
6
+ docling-parse >= 3.3 .0
7
7
GitPython >= 3.1.42 ,< 4.0.0
8
8
gguf >= 0.6.0
9
9
httpx >= 0.25.0 ,< 1.0.0
Original file line number Diff line number Diff line change @@ -151,7 +151,7 @@ def extract_text_from_pdf(file_path: str) -> str:
151
151
)
152
152
page_text = "\n " .join (text_lines )
153
153
pdf_text += page_text + "\n "
154
- except Exception as e :
154
+ except Exception as e : # pylint: disable=broad-exception-caught
155
155
logger .warning (
156
156
f"Error extracting text from page { page_no } of '{ file_path } ': { e } "
157
157
)
You can’t perform that action at this time.
0 commit comments