Skip to content

Commit d197926

Browse files
authored
Updated to work with Twitter API changes
Changed import urllib.request => urllib.request.urlretrieve Updated TwitterRestPager => TwitterPager Break loop for single tweet retrieval
1 parent 64e451a commit d197926

File tree

1 file changed

+115
-85
lines changed

1 file changed

+115
-85
lines changed

TwitterGeoPics/SearchOldTweets.py

Lines changed: 115 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,107 +1,137 @@
1-
__author__ = "geduldig"
2-
__date__ = "December 20, 2012"
1+
__author__ = "geduldig, gscelta"
2+
__date__ = "January 7, 2019"
33
__license__ = "MIT"
44

55
import argparse
66
import codecs
77
from .Geocoder import Geocoder
88
import os
99
import sys
10-
from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterRestPager
11-
import urllib
12-
10+
from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterPager
11+
import urllib.request
12+
import datetime
1313

1414
GEO = Geocoder()
1515

16+
def parse_date(status):
17+
"""
18+
expects date in this strange format: Sun Nov 05 17:14:42 +0000 2017
19+
FIXME: try with other twitter timezones please. Might need %z ?
20+
TODO: Ending downloads as soon as cutoff datetime is reached?
21+
"""
22+
return datetime.datetime.strptime(status['created_at'],
23+
'%a %b %d %H:%M:%S +0000 %Y')
24+
25+
def unique_name(status):
26+
"""
27+
Unique filename for images, concatenating screen_name and timestamp
28+
"""
29+
screen_name = status['user']['screen_name']
30+
when = parse_date(status).strftime('%Y%m%d-%H%M%S')
31+
# file_name = screen_name + "_" + when
32+
# file_name = when + "_" + screen_name
33+
file_name = when + "_" + screen_name
34+
return file_name
1635

1736
def download_photo(status, photo_dir):
18-
"""Download photo(s) from embedded url(s)."""
19-
if 'media' in status['entities']:
20-
for media in status['entities'].get('media'):
21-
if media['type'] == 'photo':
22-
photo_url = media['media_url_https']
23-
screen_name = status['user']['screen_name']
24-
file_name = os.path.join(photo_dir, screen_name) + '.' + photo_url.split('.')[-1]
25-
urllib.urlretrieve(photo_url, file_name)
37+
"""Download photo(s) from embedded url(s)."""
38+
if 'media' in status['entities']:
39+
for media in status['entities'].get('media'):
40+
if media['type'] == 'animated_gif':
41+
file_name = unique_name(status)
42+
photo_url = media['media_url_https']
43+
file_name += '.' + photo_url.split('.')[-1]
44+
urllib.request.urlretrieve(photo_url, os.path.join(photo_dir, file_name))
45+
print ("IMAGE: %s" % file_name)
2646

47+
elif media['type'] == 'photo':
48+
file_name = unique_name(status)
49+
photo_url = media['media_url_https']
50+
file_name += '.' + photo_url.split('.')[-1]
51+
urllib.request.urlretrieve(photo_url, os.path.join(photo_dir, file_name))
52+
print ("IMAGE: %s" % file_name)
2753

2854
def lookup_geocode(status):
29-
"""Get geocode either from tweet's 'coordinates' field (unlikely) or from tweet's location and Google."""
30-
if not GEO.quota_exceeded:
31-
try:
32-
geocode = GEO.geocode_tweet(status)
33-
if geocode[0]:
34-
print('GEOCODE: %s %s,%s' % geocode)
35-
except Exception as e:
36-
if GEO.quota_exceeded:
37-
print('GEOCODER QUOTA EXCEEDED: %s' % GEO.count_request)
38-
55+
"""Get geocode either from tweet's 'coordinates' field (unlikely) or from tweet's location and Google."""
56+
if not GEO.quota_exceeded:
57+
try:
58+
geocode = GEO.geocode_tweet(status)
59+
if geocode[0]:
60+
print('GEOCODE: %s %s,%s' % geocode)
61+
except Exception as e:
62+
if GEO.quota_exceeded:
63+
print('GEOCODER QUOTA EXCEEDED: %s' % GEO.count_request)
3964

40-
def process_tweet(status, photo_dir, stalk):
41-
print('\n%s: %s' % (status['user']['screen_name'], status['text']))
42-
print(status['created_at'])
43-
if photo_dir:
44-
download_photo(status, photo_dir)
45-
if stalk:
46-
lookup_geocode(status)
65+
def process_tweet(status, photo_dir, stalk, no_images_of_retweets):
66+
print('\nUSER: %s\nTWEET: %s' % (status['user']['screen_name'], status['text']))
67+
print('DATE: %s' % status['created_at'])
68+
69+
try:
70+
if photo_dir and not (no_images_of_retweets and status.has_key('retweeted_status')):
71+
download_photo(status, photo_dir)
72+
if stalk:
73+
lookup_geocode(status)
74+
except Exception as e:
75+
print ("ALERT exception ignored: %s %s" % (type(e), e))
4776

77+
def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, no_images_of_retweets, count):
78+
"""Get tweets containing any words in 'word_list'."""
79+
words = ' OR '.join(word_list)
80+
params = {'q':words, 'count':count}
81+
if region:
82+
params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius
83+
if True:
84+
pager = TwitterPager(api, 'search/tweets', params)
85+
for item in pager.get_iterator():
86+
if 'text' in item:
87+
if not no_retweets or not item.has_key('retweeted_status'):
88+
process_tweet(item, photo_dir, stalk, no_images_of_retweets)
89+
elif 'message' in item:
90+
if item['code'] == 131:
91+
continue # ignore internal server error
92+
elif item['code'] == 88:
93+
print('Suspend search until %s' % search.get_quota()['reset'])
94+
raise Exception('Message from twitter: %s' % item['message'])
95+
#Take this out if you want to loop
96+
break
97+
#Take this out if you want to loop
4898

49-
def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count):
50-
"""Get tweets containing any words in 'word_list'."""
51-
words = ' OR '.join(word_list)
52-
params = {'q':words, 'count':count}
53-
if region:
54-
params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius
55-
while True:
56-
pager = TwitterRestPager(api, 'search/tweets', params)
57-
for item in pager.get_iterator():
58-
if 'text' in item:
59-
if not no_retweets or not item.has_key('retweeted_status'):
60-
process_tweet(item, photo_dir, stalk)
61-
elif 'message' in item:
62-
if item['code'] == 131:
63-
continue # ignore internal server error
64-
elif item['code'] == 88:
65-
print('Suspend search until %s' % search.get_quota()['reset'])
66-
raise Exception('Message from twitter: %s' % item['message'])
67-
68-
6999
if __name__ == '__main__':
70-
# print UTF-8 to the console
71-
try:
72-
# python 3
73-
sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer)
74-
except:
75-
# python 2
76-
sys.stdout = codecs.getwriter('utf8')(sys.stdout)
100+
# print UTF-8 to the console
101+
try:
102+
# python 3
103+
sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer)
104+
except:
105+
# python 2
106+
sys.stdout = codecs.getwriter('utf8')(sys.stdout)
77107

78-
parser = argparse.ArgumentParser(description='Search tweet history for pics and/or geocode.')
79-
parser.add_argument('-count', type=int, default=15, help='download batch size')
80-
parser.add_argument('-location', type=str, help='limit tweets to a place')
81-
parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file')
82-
parser.add_argument('-no_retweets', action='store_true', help='exclude re-tweets')
83-
parser.add_argument('-photo_dir', metavar='DIRECTORYNAME', type=str, help='download photos to this directory')
84-
parser.add_argument('-stalk', action='store_true', help='print tweet location')
85-
parser.add_argument('-words', metavar='W', type=str, nargs='+', help='word(s) to search')
86-
args = parser.parse_args()
108+
parser = argparse.ArgumentParser(description='Search tweet history for pics and/or geocode.')
109+
parser.add_argument('-count', type=int, default=15, help='download batch size')
110+
parser.add_argument('-location', type=str, help='limit tweets to a place')
111+
parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file')
112+
parser.add_argument('-no_retweets', action='store_true', help='exclude re-tweets completely')
113+
parser.add_argument('-no_images_of_retweets', action='store_true', help='exclude re-tweet images')
114+
parser.add_argument('-photo_dir', metavar='DIRECTORYNAME', type=str, help='download photos to this directory')
115+
parser.add_argument('-stalk', action='store_true', help='print tweet location')
116+
parser.add_argument('-words', metavar='W', type=str, nargs='+', help='word(s) to search')
117+
args = parser.parse_args()
87118

88-
if args.words is None:
89-
sys.exit('You must use -words.')
119+
if args.words is None:
120+
sys.exit('You must use -words.')
90121

91-
oauth = TwitterOAuth.read_file(args.oauth)
92-
api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret)
93-
94-
try:
95-
if args.location:
96-
lat, lng, radius = GEO.get_region_circle(args.location)
97-
region = (lat, lng, radius)
98-
print('Google found region at %f,%f with a radius of %s km' % (lat, lng, radius))
99-
else:
100-
region = None
101-
search_tweets(api, args.words, args.photo_dir, region, args.stalk, args.no_retweets, args.count)
102-
except KeyboardInterrupt:
103-
print('\nTerminated by user\n')
104-
except Exception as e:
105-
print('*** STOPPED %s\n' % e)
106-
107-
GEO.print_stats()
122+
oauth = TwitterOAuth.read_file(args.oauth)
123+
api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret)
124+
125+
try:
126+
if args.location:
127+
lat, lng, radius = GEO.get_region_circle(args.location)
128+
region = (lat, lng, radius)
129+
print('Google found region at %f,%f with a radius of %s km' % (lat, lng, radius))
130+
else:
131+
region = None
132+
search_tweets(api, args.words, args.photo_dir, region, args.stalk, args.no_retweets, args.no_images_of_retweets, args.count)
133+
except KeyboardInterrupt:
134+
print('\nTerminated by user\n')
135+
except Exception as e:
136+
print('*** STOPPED %s %s\n' % (type(e), e))
137+
GEO.print_stats()

0 commit comments

Comments
 (0)