Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 87 additions & 18 deletions tumblr_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
except ImportError:
import simplejson as json
import locale
import oauth2 as oauth
import os
from os.path import join, split, splitext
import Queue
Expand Down Expand Up @@ -100,6 +101,8 @@ def test_jpg(h, f):
HTTP_TIMEOUT = 90
HTTP_CHUNK_SIZE = 1024 * 1024

CONFIG = '~/.config/tumblr'

# bb-tumblr-backup API key
API_KEY = '8YUsKJvcJxo2MDwmWMDiXZGuMuIbeCwuQGP5ZHSEA4jBJPMnJT'

Expand Down Expand Up @@ -174,8 +177,8 @@ def get_api_url(account):
blog_name = account
if '.' not in account:
blog_name += '.tumblr.com'
return 'https://api.tumblr.com/v2/blog/%s/%s' % (
blog_name, 'likes' if options.likes else 'posts'
return 'https://api.tumblr.com/v2/blog/%s' % (
blog_name
)


Expand All @@ -194,21 +197,38 @@ def set_period():
options.p_stop = time.mktime(tm)


def apiparse(base, count, start=0):
params = {'api_key': API_KEY, 'limit': count, 'reblog_info': 'true'}
if start > 0:
params['offset'] = start
def apiparse(base, count=None, start=0):
is_info = base.endswith('/info')
assert (count is None) == (options.drafts or is_info)

if is_info:
params = {'api_key': API_KEY}
elif options.drafts:
params = {}
if start > 0:
params['before_id'] = start
else:
params = {'api_key': API_KEY, 'limit': count, 'reblog_info': 'true'}
if start > 0:
params['offset'] = start

url = base + '?' + urllib.urlencode(params)
for _ in range(10):
try:
resp = urlopen(url)
data = resp.read()
if is_info or not options.drafts:
resp = urlopen(url)
data = resp.read()
else:
headers, data = client.request(url, method='GET')
except (EnvironmentError, HTTPException) as e:
sys.stderr.write("%s getting %s\n" % (e, url))
continue
if resp.info().gettype() == 'application/json':

content_type = resp.info().gettype() if is_info or not options.drafts \
else headers['content-type']
if content_type.startswith('application/json'):
break
sys.stderr.write("Unexpected Content-Type: '%s'\n" % resp.info().gettype())
sys.stderr.write("Unexpected Content-Type: '%s'\n" % content_type)
return None
else:
return None
Expand All @@ -219,7 +239,12 @@ def apiparse(base, count, start=0):
e.__class__.__name__, e, resp.getcode(), resp.msg, resp.info().gettype(), data
))
return None
return doc if doc.get('meta', {}).get('status', 0) == 200 else None
meta = doc.get('meta', {})
status = meta.get('status')
if status != 200:
sys.stderr.write("\nAPI Error %s: %s\n" % (status, meta.get('msg')))
return None
return doc


def add_exif(image_name, tags):
Expand Down Expand Up @@ -438,6 +463,14 @@ class TumblrBackup:
def __init__(self):
self.errors = False
self.total_count = 0
self.consumer_token = self.consumer_secret = \
self.access_token = self.access_secret = None

def set_credentials(self, cred_file):
(
self.consumer_token, self.consumer_secret,
self.access_token, self.access_secret
) = (s.strip() for s in open(cred_file))

def exit_code(self):
if self.errors:
Expand Down Expand Up @@ -524,8 +557,8 @@ def backup(self, account):
else:
log(account, "Getting basic information\r")

# start by calling the API with just a single post
soup = apiparse(base, 1)
# start by calling the API to get blog info
soup = apiparse(base + '/info')
if not soup:
self.errors = True
return
Expand All @@ -535,7 +568,7 @@ def backup(self, account):
if options.likes:
_get_content = lambda soup: soup['response']['liked_posts']
blog = {}
last_post = resp['liked_count']
last_post = resp['likes']
else:
_get_content = lambda soup: soup['response']['posts']
blog = resp['blog']
Expand All @@ -546,6 +579,11 @@ def backup(self, account):
# use the meta information to create a HTML header
TumblrPost.post_header = self.header(body_class='post')

global client
consumer = oauth.Consumer(self.consumer_token, self.consumer_secret)
token = oauth.Token(self.access_token, self.access_secret)
client = oauth.Client(consumer, token)

# find the post number limit to back up
if options.count:
last_post = min(last_post, options.count + options.skip)
Expand Down Expand Up @@ -578,21 +616,37 @@ def _backup(posts):
self.post_count += 1
return True

base += '/%s%s' % (
'likes' if options.likes else 'posts',
'/draft' if options.drafts else ''
)

# start the thread pool
backup_pool = ThreadPool()
try:
# Get the JSON entries from the API, which we can only do for max 50 posts at once.
# Posts "arrive" in reverse chronological order. Post #0 is the most recent one.
last_batch = MAX_POSTS
i = options.skip
while i < last_post:
before_id = 0
failures = 0
while (i < last_post if not options.drafts else failures < 5):
# find the upper bound
j = min(i + MAX_POSTS, last_post)
log(account, "Getting posts %d to %d of %d\r" % (i, j - 1, last_post))
if not options.drafts:
log(account, "Getting posts %d to %d of %d\r" % (i, j - 1, last_post))
else:
log(account, "Getting drafts, starting at number %d\r" % i)

soup = apiparse(base, j - i, i)
if not options.drafts:
soup = apiparse(base, j - i, i)
else:
soup = apiparse(base, start=before_id)
if soup is None:
i += last_batch # try the next batch
if options.drafts:
failures += 1
else:
i += last_batch # try the next batch
self.errors = True
continue

Expand All @@ -603,6 +657,7 @@ def _backup(posts):

last_batch = len(posts)
i += last_batch
before_id = posts[-1]['id']
except:
# ensure proper thread pool termination
backup_pool.cancel()
Expand Down Expand Up @@ -1121,6 +1176,9 @@ def request_callback(option, opt, value, parser):
parser.add_option('-l', '--likes', action='store_true',
dest='likes', help="save a blog's likes, not its posts"
)
parser.add_option('-d', '--drafts', action='store_true',
dest='drafts', help="save a blog's drafts, not its posts"
)
parser.add_option('-k', '--skip-images', action='store_false', default=True,
dest='save_images', help="do not save images; link to Tumblr instead"
)
Expand Down Expand Up @@ -1211,12 +1269,23 @@ def request_callback(option, opt, value, parser):
parser.error("-O can only be used for a single blog-name")
if options.dirs and options.tag_index:
parser.error("-D cannot be used with --tag-index")
if options.likes and options.drafts:
parser.error("--likes cannot be used with --drafts")
if options.incremental and options.drafts:
parser.error("--incremental cannot be used with --drafts")
if options.exif and not pyexiv2:
parser.error("--exif: module 'pyexif2' is not installed")
if options.save_video and not youtube_dl:
parser.error("--save-video: module 'youtube_dl' is not installed")

tb = TumblrBackup()

try:
tb.set_credentials(os.path.expanduser(CONFIG))
except EnvironmentError:
sys.stderr.write('Credentials file %s not found or not readable\n' % CONFIG)
sys.exit(1)

try:
for account in args:
tb.backup(account)
Expand Down