Skip to content

Commit a2b069c

Browse files
committed
tumblr_backup: Support backup of over 1000 likes
Based on PR bbolli#114 by @aggroskater
1 parent 43eced5 commit a2b069c

File tree

1 file changed

+37
-13
lines changed

1 file changed

+37
-13
lines changed

tumblr_backup.py

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import threading
2020
import time
2121
from collections import defaultdict
22-
from datetime import datetime
22+
from datetime import datetime, timedelta
2323
from glob import glob
2424
from os.path import join, split, splitext
2525
from posixpath import basename as urlbasename, join as urlpathjoin, splitext as urlsplitext
@@ -327,7 +327,7 @@ def apiparse(base, prev_resps, count, start=0, before=None):
327327
params = {'api_key': API_KEY, 'limit': count, 'reblog_info': 'true'}
328328
if before:
329329
params['before'] = before
330-
if start > 0:
330+
if start > 0 and not options.likes:
331331
params['offset'] = start
332332
url = base + '?' + urlencode(params)
333333

@@ -714,6 +714,16 @@ def footer(base, previous_page, next_page):
714714
f += '</nav></footer>\n'
715715
return f
716716

717+
@staticmethod
718+
def get_post_timestamps(posts):
719+
for post in posts:
720+
with io.open(post, encoding=FILE_ENCODING) as pf:
721+
soup = BeautifulSoup(pf, 'lxml')
722+
postdate = soup.find('time')['datetime']
723+
del soup
724+
# No datetime.fromisoformat or datetime.timestamp on Python 2
725+
yield (datetime.strptime(postdate, '%Y-%m-%dT%H:%M:%SZ') - datetime(1970, 1, 1)) // timedelta(seconds=1)
726+
717727
def backup(self, account, prev_archive):
718728
"""makes single files and an index for every post on a public Tumblr blog account"""
719729

@@ -744,16 +754,22 @@ def backup(self, account, prev_archive):
744754
# get the highest post id already saved
745755
ident_max = None
746756
if options.incremental:
747-
try:
748-
ident_max = max(
749-
long(splitext(split(f)[1])[0])
750-
for f in glob(path_to(post_dir, '*' + post_ext))
751-
)
752-
log.status('Backing up posts after {}\r'.format(ident_max))
753-
except ValueError: # max() arg is an empty sequence
754-
pass
755-
else:
756-
log.status('Getting basic information\r')
757+
filter_ = join('*', dir_index) if options.dirs else '*' + post_ext
758+
post_glob = glob(path_to(post_dir, filter_))
759+
if not post_glob:
760+
pass # No posts to read
761+
elif options.likes:
762+
# Read every post to find the newest timestamp we've saved.
763+
if BeautifulSoup is None:
764+
raise RuntimeError("Incremental likes backup: module 'bs4' is not installed")
765+
log('Finding newest liked post (may take a while)\n', account=True)
766+
ident_max = max(self.get_post_timestamps(post_glob))
767+
else:
768+
ident_max = max(long(splitext(split(f)[1])[0]) for f in post_glob)
769+
if ident_max is not None:
770+
log('Backing up posts after {}\n'.format(ident_max), account=True)
771+
772+
log.status('Getting basic information\r')
757773

758774
prev_resps, resp = initial_apiparse(base, prev_archive)
759775
if not resp:
@@ -762,6 +778,10 @@ def backup(self, account, prev_archive):
762778

763779
# collect all the meta information
764780
if options.likes:
781+
if not resp.get('blog', {}).get('share_likes', True):
782+
print('{} does not have public likes\n'.format(account))
783+
self.errors = True
784+
return
765785
posts_key = 'liked_posts'
766786
blog = {}
767787
count_estimate = resp['liked_count']
@@ -785,7 +805,9 @@ def _backup(posts, post_respfiles):
785805
key=lambda x: x[0]['id'], reverse=True)
786806
for p, prf in sorted_posts:
787807
post = post_class(p, account, prf, prev_archive)
788-
if ident_max and long(post.ident) <= ident_max:
808+
if ident_max is None:
809+
pass # No limit
810+
elif (p['timestamp'] if options.likes else long(post.ident)) <= ident_max:
789811
return False
790812
if options.count and self.post_count >= options.count:
791813
return False
@@ -843,6 +865,8 @@ def _backup(posts, post_respfiles):
843865
log.status('Backing up posts found empty set of posts, finishing\r')
844866
break
845867

868+
if options.likes:
869+
before = resp['_links']['next']['query_params']['before']
846870
i += MAX_POSTS
847871
except:
848872
# ensure proper thread pool termination

0 commit comments

Comments
 (0)