Skip to content

Commit cd08664

Browse files
authored
Add support for hide-annotations flag
1 parent 70eeb68 commit cd08664

File tree

5 files changed

+71
-1
lines changed

5 files changed

+71
-1
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ Here are the definitions:
8282
- `grayscale` parameter allows you to convert images to grayscale (`-gray` in pdftoppm CLI)
8383
- `single_file` parameter allows you to convert the first PDF page only, without adding digits at the end of the `output_file`
8484
- Allow the user to specify poppler's installation path with `poppler_path`
85+
- Allow users to hide attributes when using pdftoppm with `hide_attributes`
8586

8687
## Performance tips
8788

docs/reference.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ convert_from_path(
2626
grayscale=False,
2727
size=None,
2828
paths_only=False,
29+
hide_annotations=False,
2930
)
3031

3132
convert_from_bytes(
@@ -47,6 +48,7 @@ convert_from_bytes(
4748
grayscale=False,
4849
size=None,
4950
paths_only=False,
51+
hide_annotations=False,
5052
)
5153
```
5254

@@ -150,7 +152,12 @@ A list of image paths rather than preloaded images are returned.
150152

151153
Provide additional options for jpeg format conversions. Requires `fmt="jpeg"` and is provided as dict, with all
152154
optinal keywords:
153-
`jpegopt={"quality": 100, "optimize": True, "progressive": False}
155+
`jpegopt={"quality": 100, "optimize": True, "progressive": False}`
156+
157+
**hide_annotations**
158+
159+
Hide link bounding boxes and other PDF annotations. This is only implemented in pdftoppm at the moment so it
160+
cannot be combined with pdftocairo flags.
154161

155162
## Exceptions
156163

pdf2image/pdf2image.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def convert_from_path(
5555
paths_only=False,
5656
use_pdftocairo=False,
5757
timeout=None,
58+
hide_annotations=False,
5859
):
5960
"""
6061
Description: Convert PDF to Image will throw whenever one of the condition is reached
@@ -115,6 +116,9 @@ def convert_from_path(
115116
if poppler_version_major == 0 and poppler_version_minor <= 57:
116117
jpegopt = None
117118

119+
if poppler_version_major == 0 and poppler_version_minor <= 83:
120+
hide_annotations = False
121+
118122
# If output_file isn't a generator, it will be turned into one
119123
if not isinstance(output_file, types.GeneratorType) and not isinstance(
120124
output_file, ThreadSafeGenerator
@@ -170,9 +174,12 @@ def convert_from_path(
170174
single_file,
171175
grayscale,
172176
size,
177+
hide_annotations,
173178
)
174179

175180
if use_pdfcairo:
181+
if hide_annotations:
182+
raise NotImplementedError("Hide annotations flag not implemented in pdftocairo.")
176183
args = [_get_command_path("pdftocairo", poppler_path)] + args
177184
else:
178185
args = [_get_command_path("pdftoppm", poppler_path)] + args
@@ -241,6 +248,7 @@ def convert_from_bytes(
241248
paths_only=False,
242249
use_pdftocairo=False,
243250
timeout=None,
251+
hide_annotations=False,
244252
):
245253
"""
246254
Description: Convert PDF to Image will throw whenever one of the condition is reached
@@ -293,6 +301,7 @@ def convert_from_bytes(
293301
paths_only=paths_only,
294302
use_pdftocairo=use_pdftocairo,
295303
timeout=timeout,
304+
hide_annotations=hide_annotations,
296305
)
297306
finally:
298307
os.close(fh)
@@ -313,10 +322,14 @@ def _build_command(
313322
single_file,
314323
grayscale,
315324
size,
325+
hide_annotations,
316326
):
317327
if use_cropbox:
318328
args.append("-cropbox")
319329

330+
if hide_annotations:
331+
args.append("-hide-annotations")
332+
320333
if transparent and fmt in TRANSPARENT_FILE_TYPES:
321334
args.append("-transp")
322335

tests.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1492,6 +1492,55 @@ def test_conversion_from_path_with_2d_tuple_size_with_None_height(self):
14921492
)
14931493
)
14941494

1495+
## Test hide annotations parameter
1496+
1497+
@profile
1498+
@unittest.skipIf(not POPPLER_INSTALLED, "Poppler is not installed!")
1499+
def test_conversion_from_path_with_hide_annotations(self):
1500+
images_from_path = convert_from_path("./tests/test_annotations.pdf", hide_annotations=True)
1501+
start_time = time.time()
1502+
self.assertTrue(len(images_from_path) == 1)
1503+
print(
1504+
"test_conversion_from_path_with_hide_annotations: {} sec".format(
1505+
time.time() - start_time
1506+
)
1507+
)
1508+
1509+
@profile
1510+
@unittest.skipIf(not POPPLER_INSTALLED, "Poppler is not installed!")
1511+
def test_conversion_from_bytes_with_hide_annotations(self):
1512+
start_time = time.time()
1513+
with open("./tests/test_annotations.pdf", "rb") as pdf_file:
1514+
images_from_bytes = convert_from_bytes(
1515+
pdf_file.read(),
1516+
hide_annotations=True,
1517+
)
1518+
self.assertTrue(len(images_from_bytes) == 1)
1519+
print(
1520+
"test_conversion_from_bytes_with_hide_annotations: {} sec".format(
1521+
time.time() - start_time
1522+
)
1523+
)
1524+
1525+
@profile
1526+
@unittest.skipIf(not POPPLER_INSTALLED, "Poppler is not installed!")
1527+
def test_conversion_from_path_with_hide_annotations_with_invalid_arg_combination(self):
1528+
start_time = time.time()
1529+
try:
1530+
images_from_path = convert_from_path(
1531+
"./tests/test_annotations.pdf",
1532+
hide_annotations=True,
1533+
use_pdftocairo=True,
1534+
)
1535+
raise Exception("This should not happen")
1536+
except NotImplementedError:
1537+
pass
1538+
print(
1539+
"test_conversion_from_path_with_hide_annotations_with_invalid_arg_combination: {} sec".format(
1540+
time.time() - start_time
1541+
)
1542+
)
1543+
14951544
## Test pdfinfo
14961545

14971546
@profile

tests/test_annotations.pdf

11.5 KB
Binary file not shown.

0 commit comments

Comments
 (0)