1919import threading
2020import time
2121from collections import defaultdict
22- from datetime import datetime
22+ from datetime import datetime , timedelta
2323from glob import glob
2424from os .path import join , split , splitext
2525from posixpath import basename as urlbasename , join as urlpathjoin , splitext as urlsplitext
@@ -327,7 +327,7 @@ def apiparse(base, prev_resps, count, start=0, before=None):
327327 params = {'api_key' : API_KEY , 'limit' : count , 'reblog_info' : 'true' }
328328 if before :
329329 params ['before' ] = before
330- if start > 0 :
330+ if start > 0 and not options . likes :
331331 params ['offset' ] = start
332332 url = base + '?' + urlencode (params )
333333
@@ -714,6 +714,16 @@ def footer(base, previous_page, next_page):
714714 f += '</nav></footer>\n '
715715 return f
716716
717+ @staticmethod
718+ def get_post_timestamps (posts ):
719+ for post in posts :
720+ with io .open (post , encoding = FILE_ENCODING ) as pf :
721+ soup = BeautifulSoup (pf , 'lxml' )
722+ postdate = soup .find ('time' )['datetime' ]
723+ del soup
724+ # No datetime.fromisoformat or datetime.timestamp on Python 2
725+ yield (datetime .strptime (postdate , '%Y-%m-%dT%H:%M:%SZ' ) - datetime (1970 , 1 , 1 )) // timedelta (seconds = 1 )
726+
717727 def backup (self , account , prev_archive ):
718728 """makes single files and an index for every post on a public Tumblr blog account"""
719729
@@ -744,16 +754,22 @@ def backup(self, account, prev_archive):
744754 # get the highest post id already saved
745755 ident_max = None
746756 if options .incremental :
747- try :
748- ident_max = max (
749- long (splitext (split (f )[1 ])[0 ])
750- for f in glob (path_to (post_dir , '*' + post_ext ))
751- )
752- log .status ('Backing up posts after {}\r ' .format (ident_max ))
753- except ValueError : # max() arg is an empty sequence
754- pass
755- else :
756- log .status ('Getting basic information\r ' )
757+ filter_ = join ('*' , dir_index ) if options .dirs else '*' + post_ext
758+ post_glob = glob (path_to (post_dir , filter_ ))
759+ if not post_glob :
760+ pass # No posts to read
761+ elif options .likes :
762+ # Read every post to find the newest timestamp we've saved.
763+ if BeautifulSoup is None :
764+ raise RuntimeError ("Incremental likes backup: module 'bs4' is not installed" )
765+ log ('Finding newest liked post (may take a while)\n ' , account = True )
766+ ident_max = max (self .get_post_timestamps (post_glob ))
767+ else :
768+ ident_max = max (long (splitext (split (f )[1 ])[0 ]) for f in post_glob )
769+ if ident_max is not None :
770+ log ('Backing up posts after {}\n ' .format (ident_max ), account = True )
771+
772+ log .status ('Getting basic information\r ' )
757773
758774 prev_resps , resp = initial_apiparse (base , prev_archive )
759775 if not resp :
@@ -762,6 +778,10 @@ def backup(self, account, prev_archive):
762778
763779 # collect all the meta information
764780 if options .likes :
781+ if not resp .get ('blog' , {}).get ('share_likes' , True ):
782+ print ('{} does not have public likes\n ' .format (account ))
783+ self .errors = True
784+ return
765785 posts_key = 'liked_posts'
766786 blog = {}
767787 count_estimate = resp ['liked_count' ]
@@ -785,7 +805,9 @@ def _backup(posts, post_respfiles):
785805 key = lambda x : x [0 ]['id' ], reverse = True )
786806 for p , prf in sorted_posts :
787807 post = post_class (p , account , prf , prev_archive )
788- if ident_max and long (post .ident ) <= ident_max :
808+ if ident_max is None :
809+ pass # No limit
810+ elif (p ['timestamp' ] if options .likes else long (post .ident )) <= ident_max :
789811 return False
790812 if options .count and self .post_count >= options .count :
791813 return False
@@ -845,6 +867,8 @@ def _backup(posts, post_respfiles):
845867 log .status ('Backing up posts found empty set of posts, finishing\r ' )
846868 break
847869
870+ if options .likes :
871+ before = resp ['_links' ]['next' ]['query_params' ]['before' ]
848872 i += MAX_POSTS
849873 except :
850874 # ensure proper thread pool termination
0 commit comments