Skip to content

Commit a224745

Browse files
committed
Version 0.1
1 parent 4648644 commit a224745

File tree

5 files changed

+130
-1
lines changed

5 files changed

+130
-1
lines changed

README.md

+28-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,29 @@
11
# pdf2image
2-
A python module that wraps the pdftoppm utility to convert PDF to PPM, JPEG, and PNG
2+
A python3 module that wraps the pdftoppm utility to convert PDF to the PIL image formatt
3+
4+
## How to install
5+
6+
` pip install pdf2image `
7+
8+
Windows users will have to install (pdftoppm)[https://sourceforge.net/projects/poppler-win32/]
9+
10+
Linux users will have pdftoppm pre-installed with the distro (Tested on Ubuntu and Archlinux)
11+
12+
## How does it work?
13+
` from pdf2image import convert_from_path, convert_from_bytes `
14+
15+
Then simply do:
16+
17+
` images = convert_from_path('/home/kankroc/example.pdf') `
18+
19+
OR
20+
21+
` images = convert_from_bytes(open('/home/kankroc/example.pdf', 'rb').read()) `
22+
23+
`images` will be a list of PIL Image representing each page of the PDF document.
24+
25+
## Limitations / known issues
26+
27+
- A relatively big PDF will use up all your memory and cause the process to be killed
28+
- pdftoppm errors are not handled
29+
- Not Python 2 compatible

pdf2image/__init__.py

Whitespace-only changes.

pdf2image/pdf2image.py

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
from subprocess import Popen, PIPE
2+
from PIL import Image
3+
from io import BytesIO
4+
5+
def convert_from_path(pdf_path, save_path=None, dpi=200):
6+
"""
7+
Description: Convert PDF to Image
8+
Parameters:
9+
pdf_path -> Path to the PDF that you want to convert
10+
dpi -> Image quality in DPI (default 200)
11+
"""
12+
13+
proc = Popen(['pdftoppm', '-r', str(dpi), pdf_path, ], stdout=PIPE)
14+
15+
data, err = proc.communicate()
16+
17+
images = []
18+
19+
index = 0
20+
21+
while(index < len(data)):
22+
code, size, rgb, _ = tuple(data[index:index + 40].split(b'\n'))
23+
size_x, size_y = tuple(size.split(b' '))
24+
file_size = len(code) + len(size) + len(rgb) + 3 + int(size_x) * int(size_y) * 3
25+
images.append(Image.open(BytesIO(data[index:index + file_size])))
26+
index += file_size
27+
28+
return images
29+
30+
def convert_from_bytes(pdf_file, save_path=None, dpi=200):
31+
"""
32+
Description: Convert PDF to Image
33+
Parameters:
34+
pdf_file -> Bytes representing the PDF file
35+
dpi -> Image quality in DPI
36+
"""
37+
38+
proc = Popen(['pdftoppm', '-r', str(dpi), ], stdout=PIPE, stdin=PIPE)
39+
40+
proc.stdin.write(pdf_file)
41+
42+
data, err = proc.communicate()
43+
44+
images = []
45+
46+
index = 0
47+
48+
while(index < len(data)):
49+
code, size, rgb, _ = tuple(data[index:index + 40].split(b'\n'))
50+
size_x, size_y = tuple(size.split(b' '))
51+
file_size = len(code) + len(size) + len(rgb) + 3 + int(size_x) * int(size_y) * 3
52+
images.append(Image.open(BytesIO(data[index:index + file_size])))
53+
index += file_size
54+
55+
return images

setup.cfg

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[metadata]
2+
description-file = README.md

setup.py

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Always prefer setuptools over distutils
2+
from setuptools import setup, find_packages
3+
# To use a consistent encoding
4+
from codecs import open
5+
from os import path
6+
7+
here = path.abspath(path.dirname(__file__))
8+
9+
setup(
10+
name='pdf2image',
11+
12+
version='0.1.0',
13+
14+
description='A wrapper around the pdftoppm command line tool to convert pdf to a PIL Image list.',
15+
16+
url='https://github.com/Kankroc/pdf2image',
17+
18+
author='Edouard Belval',
19+
author_email='[email protected]',
20+
21+
# Choose your license
22+
license='MIT',
23+
24+
# See https://pypi.python.org/pypi?%3Aaction=list_classifiers
25+
classifiers=[
26+
# 3 - Alpha
27+
# 4 - Beta
28+
# 5 - Production/Stable
29+
'Development Status :: 3 - Alpha',
30+
31+
'Intended Audience :: Developers',
32+
33+
'License :: OSI Approved :: MIT License',
34+
35+
'Programming Language :: Python :: 3',
36+
'Programming Language :: Python :: 3.3',
37+
'Programming Language :: Python :: 3.4',
38+
'Programming Language :: Python :: 3.5',
39+
'Programming Language :: Python :: 3.6',
40+
],
41+
42+
keywords='pdf image png jpeg jpg convert',
43+
44+
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
45+
)

0 commit comments

Comments
 (0)