-
Notifications
You must be signed in to change notification settings - Fork 3
/
generate_epub.py
51 lines (42 loc) · 1.3 KB
/
generate_epub.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pymongo
import gridfs
import pyquery
import time
from ebooklib import epub
def main():
db = pymongo.MongoClient().aaronsw
fs = gridfs.GridFS(db)
filenames = fs.list()
remove_comments = False
book_title = 'Aaron_Swartz_-_Raw_Thought'
book = epub.EpubBook()
book.set_identifier('rawthought' + `time.time()`)
book.set_title(book_title)
book.set_language('en')
book.add_author('Aaron Swartz')
chapters = []
for filename in filenames:
gridout = fs.get_last_version(filename)
pq = pyquery.PyQuery(gridout.read())
cq = pq('.content')
title = cq('h1:first').text()
if not title:
continue
cq('script').remove()
cq('form').remove()
if(remove_comments):
cq('#comments_body').remove()
content = cq.html()
chapter = epub.EpubHtml(title = title, file_name = filename[filename.rfind('/') + 1:] + '.xhtml', lang = 'en')
chapter.content = content
book.add_item(chapter)
chapters.append(chapter)
book.toc = tuple(chapters)
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
book.spine = chapters
epub.write_epub(book_title + '.epub', book, {})
if __name__ == '__main__':
main()