|
1 |
| -__author__ = "geduldig" |
2 |
| -__date__ = "December 20, 2012" |
| 1 | +__author__ = "geduldig, gscelta" |
| 2 | +__date__ = "January 7, 2019" |
3 | 3 | __license__ = "MIT"
|
4 | 4 |
|
5 | 5 | import argparse
|
6 | 6 | import codecs
|
7 | 7 | from .Geocoder import Geocoder
|
8 | 8 | import os
|
9 | 9 | import sys
|
10 |
| -from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterRestPager |
11 |
| -import urllib |
12 |
| - |
| 10 | +from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterPager |
| 11 | +import urllib.request |
| 12 | +import datetime |
13 | 13 |
|
14 | 14 | GEO = Geocoder()
|
15 | 15 |
|
| 16 | +def parse_date(status): |
| 17 | + """ |
| 18 | + expects date in this strange format: Sun Nov 05 17:14:42 +0000 2017 |
| 19 | + FIXME: try with other twitter timezones please. Might need %z ? |
| 20 | + TODO: Ending downloads as soon as cutoff datetime is reached? |
| 21 | + """ |
| 22 | + return datetime.datetime.strptime(status['created_at'], |
| 23 | + '%a %b %d %H:%M:%S +0000 %Y') |
| 24 | + |
| 25 | +def unique_name(status): |
| 26 | + """ |
| 27 | + Unique filename for images, concatenating screen_name and timestamp |
| 28 | + """ |
| 29 | + screen_name = status['user']['screen_name'] |
| 30 | + when = parse_date(status).strftime('%Y%m%d-%H%M%S') |
| 31 | + # file_name = screen_name + "_" + when |
| 32 | + # file_name = when + "_" + screen_name |
| 33 | + file_name = when + "_" + screen_name |
| 34 | + return file_name |
16 | 35 |
|
17 | 36 | def download_photo(status, photo_dir):
|
18 |
| - """Download photo(s) from embedded url(s).""" |
19 |
| - if 'media' in status['entities']: |
20 |
| - for media in status['entities'].get('media'): |
21 |
| - if media['type'] == 'photo': |
22 |
| - photo_url = media['media_url_https'] |
23 |
| - screen_name = status['user']['screen_name'] |
24 |
| - file_name = os.path.join(photo_dir, screen_name) + '.' + photo_url.split('.')[-1] |
25 |
| - urllib.urlretrieve(photo_url, file_name) |
| 37 | + """Download photo(s) from embedded url(s).""" |
| 38 | + if 'media' in status['entities']: |
| 39 | + for media in status['entities'].get('media'): |
| 40 | + if media['type'] == 'animated_gif': |
| 41 | + file_name = unique_name(status) |
| 42 | + photo_url = media['media_url_https'] |
| 43 | + file_name += '.' + photo_url.split('.')[-1] |
| 44 | + urllib.request.urlretrieve(photo_url, os.path.join(photo_dir, file_name)) |
| 45 | + print ("IMAGE: %s" % file_name) |
26 | 46 |
|
| 47 | + elif media['type'] == 'photo': |
| 48 | + file_name = unique_name(status) |
| 49 | + photo_url = media['media_url_https'] |
| 50 | + file_name += '.' + photo_url.split('.')[-1] |
| 51 | + urllib.request.urlretrieve(photo_url, os.path.join(photo_dir, file_name)) |
| 52 | + print ("IMAGE: %s" % file_name) |
27 | 53 |
|
28 | 54 | def lookup_geocode(status):
|
29 |
| - """Get geocode either from tweet's 'coordinates' field (unlikely) or from tweet's location and Google.""" |
30 |
| - if not GEO.quota_exceeded: |
31 |
| - try: |
32 |
| - geocode = GEO.geocode_tweet(status) |
33 |
| - if geocode[0]: |
34 |
| - print('GEOCODE: %s %s,%s' % geocode) |
35 |
| - except Exception as e: |
36 |
| - if GEO.quota_exceeded: |
37 |
| - print('GEOCODER QUOTA EXCEEDED: %s' % GEO.count_request) |
38 |
| - |
| 55 | + """Get geocode either from tweet's 'coordinates' field (unlikely) or from tweet's location and Google.""" |
| 56 | + if not GEO.quota_exceeded: |
| 57 | + try: |
| 58 | + geocode = GEO.geocode_tweet(status) |
| 59 | + if geocode[0]: |
| 60 | + print('GEOCODE: %s %s,%s' % geocode) |
| 61 | + except Exception as e: |
| 62 | + if GEO.quota_exceeded: |
| 63 | + print('GEOCODER QUOTA EXCEEDED: %s' % GEO.count_request) |
39 | 64 |
|
40 |
| -def process_tweet(status, photo_dir, stalk): |
41 |
| - print('\n%s: %s' % (status['user']['screen_name'], status['text'])) |
42 |
| - print(status['created_at']) |
43 |
| - if photo_dir: |
44 |
| - download_photo(status, photo_dir) |
45 |
| - if stalk: |
46 |
| - lookup_geocode(status) |
| 65 | +def process_tweet(status, photo_dir, stalk, no_images_of_retweets): |
| 66 | + print('\nUSER: %s\nTWEET: %s' % (status['user']['screen_name'], status['text'])) |
| 67 | + print('DATE: %s' % status['created_at']) |
| 68 | + |
| 69 | + try: |
| 70 | + if photo_dir and not (no_images_of_retweets and status.has_key('retweeted_status')): |
| 71 | + download_photo(status, photo_dir) |
| 72 | + if stalk: |
| 73 | + lookup_geocode(status) |
| 74 | + except Exception as e: |
| 75 | + print ("ALERT exception ignored: %s %s" % (type(e), e)) |
47 | 76 |
|
| 77 | +def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, no_images_of_retweets, count): |
| 78 | + """Get tweets containing any words in 'word_list'.""" |
| 79 | + words = ' OR '.join(word_list) |
| 80 | + params = {'q':words, 'count':count} |
| 81 | + if region: |
| 82 | + params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius |
| 83 | + if True: |
| 84 | + pager = TwitterPager(api, 'search/tweets', params) |
| 85 | + for item in pager.get_iterator(): |
| 86 | + if 'text' in item: |
| 87 | + if not no_retweets or not item.has_key('retweeted_status'): |
| 88 | + process_tweet(item, photo_dir, stalk, no_images_of_retweets) |
| 89 | + elif 'message' in item: |
| 90 | + if item['code'] == 131: |
| 91 | + continue # ignore internal server error |
| 92 | + elif item['code'] == 88: |
| 93 | + print('Suspend search until %s' % search.get_quota()['reset']) |
| 94 | + raise Exception('Message from twitter: %s' % item['message']) |
| 95 | +#Take this out if you want to loop |
| 96 | + break |
| 97 | +#Take this out if you want to loop |
48 | 98 |
|
49 |
| -def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count): |
50 |
| - """Get tweets containing any words in 'word_list'.""" |
51 |
| - words = ' OR '.join(word_list) |
52 |
| - params = {'q':words, 'count':count} |
53 |
| - if region: |
54 |
| - params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius |
55 |
| - while True: |
56 |
| - pager = TwitterRestPager(api, 'search/tweets', params) |
57 |
| - for item in pager.get_iterator(): |
58 |
| - if 'text' in item: |
59 |
| - if not no_retweets or not item.has_key('retweeted_status'): |
60 |
| - process_tweet(item, photo_dir, stalk) |
61 |
| - elif 'message' in item: |
62 |
| - if item['code'] == 131: |
63 |
| - continue # ignore internal server error |
64 |
| - elif item['code'] == 88: |
65 |
| - print('Suspend search until %s' % search.get_quota()['reset']) |
66 |
| - raise Exception('Message from twitter: %s' % item['message']) |
67 |
| - |
68 |
| - |
69 | 99 | if __name__ == '__main__':
|
70 |
| - # print UTF-8 to the console |
71 |
| - try: |
72 |
| - # python 3 |
73 |
| - sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer) |
74 |
| - except: |
75 |
| - # python 2 |
76 |
| - sys.stdout = codecs.getwriter('utf8')(sys.stdout) |
| 100 | + # print UTF-8 to the console |
| 101 | + try: |
| 102 | + # python 3 |
| 103 | + sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer) |
| 104 | + except: |
| 105 | + # python 2 |
| 106 | + sys.stdout = codecs.getwriter('utf8')(sys.stdout) |
77 | 107 |
|
78 |
| - parser = argparse.ArgumentParser(description='Search tweet history for pics and/or geocode.') |
79 |
| - parser.add_argument('-count', type=int, default=15, help='download batch size') |
80 |
| - parser.add_argument('-location', type=str, help='limit tweets to a place') |
81 |
| - parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') |
82 |
| - parser.add_argument('-no_retweets', action='store_true', help='exclude re-tweets') |
83 |
| - parser.add_argument('-photo_dir', metavar='DIRECTORYNAME', type=str, help='download photos to this directory') |
84 |
| - parser.add_argument('-stalk', action='store_true', help='print tweet location') |
85 |
| - parser.add_argument('-words', metavar='W', type=str, nargs='+', help='word(s) to search') |
86 |
| - args = parser.parse_args() |
| 108 | + parser = argparse.ArgumentParser(description='Search tweet history for pics and/or geocode.') |
| 109 | + parser.add_argument('-count', type=int, default=15, help='download batch size') |
| 110 | + parser.add_argument('-location', type=str, help='limit tweets to a place') |
| 111 | + parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') |
| 112 | + parser.add_argument('-no_retweets', action='store_true', help='exclude re-tweets completely') |
| 113 | + parser.add_argument('-no_images_of_retweets', action='store_true', help='exclude re-tweet images') |
| 114 | + parser.add_argument('-photo_dir', metavar='DIRECTORYNAME', type=str, help='download photos to this directory') |
| 115 | + parser.add_argument('-stalk', action='store_true', help='print tweet location') |
| 116 | + parser.add_argument('-words', metavar='W', type=str, nargs='+', help='word(s) to search') |
| 117 | + args = parser.parse_args() |
87 | 118 |
|
88 |
| - if args.words is None: |
89 |
| - sys.exit('You must use -words.') |
| 119 | + if args.words is None: |
| 120 | + sys.exit('You must use -words.') |
90 | 121 |
|
91 |
| - oauth = TwitterOAuth.read_file(args.oauth) |
92 |
| - api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret) |
93 |
| - |
94 |
| - try: |
95 |
| - if args.location: |
96 |
| - lat, lng, radius = GEO.get_region_circle(args.location) |
97 |
| - region = (lat, lng, radius) |
98 |
| - print('Google found region at %f,%f with a radius of %s km' % (lat, lng, radius)) |
99 |
| - else: |
100 |
| - region = None |
101 |
| - search_tweets(api, args.words, args.photo_dir, region, args.stalk, args.no_retweets, args.count) |
102 |
| - except KeyboardInterrupt: |
103 |
| - print('\nTerminated by user\n') |
104 |
| - except Exception as e: |
105 |
| - print('*** STOPPED %s\n' % e) |
106 |
| - |
107 |
| - GEO.print_stats() |
| 122 | + oauth = TwitterOAuth.read_file(args.oauth) |
| 123 | + api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret) |
| 124 | + |
| 125 | + try: |
| 126 | + if args.location: |
| 127 | + lat, lng, radius = GEO.get_region_circle(args.location) |
| 128 | + region = (lat, lng, radius) |
| 129 | + print('Google found region at %f,%f with a radius of %s km' % (lat, lng, radius)) |
| 130 | + else: |
| 131 | + region = None |
| 132 | + search_tweets(api, args.words, args.photo_dir, region, args.stalk, args.no_retweets, args.no_images_of_retweets, args.count) |
| 133 | + except KeyboardInterrupt: |
| 134 | + print('\nTerminated by user\n') |
| 135 | + except Exception as e: |
| 136 | + print('*** STOPPED %s %s\n' % (type(e), e)) |
| 137 | + GEO.print_stats() |
0 commit comments