forked from geduldig/TwitterGeoPics
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
218 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
__author__ = "Jonas Geduldig" | ||
__date__ = "December 20, 2012" | ||
__license__ = "MIT" | ||
|
||
import argparse | ||
import codecs | ||
from .Geocoder import Geocoder | ||
import os | ||
import sys | ||
from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterRestPager | ||
import urllib | ||
|
||
|
||
GEO = Geocoder() | ||
|
||
|
||
def download_photo(status, photo_dir): | ||
"""Download photo(s) from embedded url(s).""" | ||
if 'media' in status['entities']: | ||
for media in status['entities'].get('media'): | ||
if media['type'] == 'photo': | ||
photo_url = media['media_url_https'] | ||
screen_name = status['user']['screen_name'] | ||
file_name = os.path.join(photo_dir, screen_name) + '.' + photo_url.split('.')[-1] | ||
urllib.urlretrieve(photo_url, file_name) | ||
|
||
|
||
def lookup_geocode(status): | ||
"""Get geocode either from tweet's 'coordinates' field (unlikely) or from tweet's location and Google.""" | ||
if not GEO.quota_exceeded: | ||
try: | ||
geocode = GEO.geocode_tweet(status) | ||
if geocode[0]: | ||
print('GEOCODE: %s %s,%s' % geocode) | ||
except Exception as e: | ||
if GEO.quota_exceeded: | ||
print('GEOCODER QUOTA EXCEEDED: %s' % GEO.count_request) | ||
|
||
|
||
def process_tweet(status, photo_dir, stalk): | ||
print('\n%s: %s' % (status['user']['screen_name'], status['text'])) | ||
print(status['created_at']) | ||
if photo_dir: | ||
download_photo(status, photo_dir) | ||
if stalk: | ||
lookup_geocode(status) | ||
|
||
|
||
def search_tweets(api, list, photo_dir, region, stalk, no_retweets, count): | ||
"""Get tweets containing any words in 'list'.""" | ||
words = ' OR '.join(list) | ||
params = {'q':words, 'count':count} | ||
if region: | ||
params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius | ||
while True: | ||
pager = TwitterRestPager(api, 'search/tweets', params) | ||
for item in pager.get_iterator(): | ||
if 'text' in item: | ||
if not no_retweets or not item.has_key('retweeted_status'): | ||
process_tweet(item, photo_dir, stalk) | ||
elif 'message' in item: | ||
if item['code'] == 131: | ||
continue # ignore internal server error | ||
elif item['code'] == 88: | ||
print('Suspend search until %s' % search.get_quota()['reset']) | ||
raise Exception('Message from twitter: %s' % item['message']) | ||
|
||
|
||
if __name__ == '__main__': | ||
# print UTF-8 to the console | ||
try: | ||
# python 3 | ||
sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer) | ||
except: | ||
# python 2 | ||
sys.stdout = codecs.getwriter('utf8')(sys.stdout) | ||
|
||
parser = argparse.ArgumentParser(description='Search tweet history for pics and/or geocode.') | ||
parser.add_argument('-count', type=int, default=15, help='download batch size') | ||
parser.add_argument('-location', type=str, help='limit tweets to a place') | ||
parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') | ||
parser.add_argument('-no_retweets', action='store_true', help='exclude re-tweets') | ||
parser.add_argument('-photo_dir', metavar='DIRECTORYNAME', type=str, help='download photos to this directory') | ||
parser.add_argument('-stalk', action='store_true', help='print tweet location') | ||
parser.add_argument('-words', metavar='W', type=str, nargs='+', help='word(s) to search') | ||
args = parser.parse_args() | ||
|
||
if args.words is None: | ||
sys.exit('You must use -words.') | ||
|
||
oauth = TwitterOAuth.read_file(args.oauth) | ||
api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret) | ||
|
||
try: | ||
if args.location: | ||
lat, lng, radius = GEO.get_region_circle(args.location) | ||
region = (lat, lng, radius) | ||
print('Google found region at %f,%f with a radius of %s km' % (lat, lng, radius)) | ||
else: | ||
region = None | ||
search_tweets(api, args.words, args.photo_dir, region, args.stalk, args.no_retweets, args.count) | ||
except KeyboardInterrupt: | ||
print('\nTerminated by user\n') | ||
except Exception as e: | ||
print('*** STOPPED %s\n' % e) | ||
|
||
GEO.print_stats() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
__author__ = "Jonas Geduldig" | ||
__date__ = "December 20, 2012" | ||
__license__ = "MIT" | ||
|
||
import argparse | ||
import codecs | ||
from .Geocoder import Geocoder | ||
import os | ||
import sys | ||
from TwitterAPI import TwitterAPI, TwitterOAuth | ||
import urllib | ||
|
||
|
||
GEO = Geocoder() | ||
|
||
|
||
def download_photo(status, photo_dir): | ||
"""Download photo(s) from embedded url(s).""" | ||
if 'media' in status['entities']: | ||
for media in status['entities'].get('media'): | ||
if media['type'] == 'photo': | ||
photo_url = media['media_url_https'] | ||
screen_name = status['user']['screen_name'] | ||
file_name = os.path.join(photo_dir, screen_name) + '.' + photo_url.split('.')[-1] | ||
urllib.urlretrieve(photo_url, file_name) | ||
|
||
|
||
def lookup_geocode(status): | ||
"""Get geocode either from tweet's 'coordinates' field (unlikely) or from tweet's location and Google.""" | ||
if not GEO.quota_exceeded: | ||
try: | ||
geocode = GEO.geocode_tweet(status) | ||
if geocode[0]: | ||
print('GEOCODE: %s %s,%s' % geocode) | ||
except Exception as e: | ||
if GEO.quota_exceeded: | ||
print('GEOCODER QUOTA EXCEEDED: %s' % GEO.count_request) | ||
|
||
|
||
def process_tweet(status, photo_dir, stalk): | ||
print('\n%s: %s' % (status['user']['screen_name'], status['text'])) | ||
print(status['created_at']) | ||
if photo_dir: | ||
download_photo(status, photo_dir) | ||
if stalk: | ||
lookup_geocode(status) | ||
|
||
|
||
def stream_tweets(api, list, photo_dir, region, stalk, no_retweets): | ||
"""Get tweets containing any words in 'list' or that originate from 'region'.""" | ||
params = {} | ||
if list is not None: | ||
words = ','.join(list) | ||
params['track'] = words | ||
if region is not None: | ||
params['locations'] = '%f,%f,%f,%f' % region | ||
print('REGION %s' % str(region)) | ||
while True: | ||
try: | ||
r = api.request('statuses/filter', params) | ||
while True: | ||
for item in r.get_iterator(): | ||
if 'text' in item: | ||
if not no_retweets or not item.has_key('retweeted_status'): | ||
process_tweet(item, photo_dir, stalk) | ||
elif 'disconnect' in item: | ||
raise Exception('Disconnect: %s' % item['disconnect'].get('reason')) | ||
except Exception as e: | ||
# reconnect on 401 errors and socket timeouts | ||
print('*** MUST RECONNECT %s\n' % e) | ||
|
||
|
||
if __name__ == '__main__': | ||
# print UTF-8 to the console | ||
try: | ||
# python 3 | ||
sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer) | ||
except: | ||
# python 2 | ||
sys.stdout = codecs.getwriter('utf8')(sys.stdout) | ||
|
||
parser = argparse.ArgumentParser(description='Get real-time tweet stream with embedded pics and/or geocode.') | ||
parser.add_argument('-location', type=str, help='limit tweets to a place; use ALL to get all geocoded tweets') | ||
parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') | ||
parser.add_argument('-no_retweets', action='store_true', help='exclude re-tweets') | ||
parser.add_argument('-photo_dir', metavar='DIRECTORYNAME', type=str, help='download photos to this directory') | ||
parser.add_argument('-stalk', action='store_true', help='print tweet location') | ||
parser.add_argument('-words', metavar='W', type=str, nargs='+', help='word(s) to track') | ||
args = parser.parse_args() | ||
|
||
if args.words is None and args.location is None: | ||
sys.exit('You must use either -words or -locoation or both.') | ||
|
||
oauth = TwitterOAuth.read_file(args.oauth) | ||
api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret) | ||
|
||
try: | ||
if args.location: | ||
if args.location.lower() == 'all': | ||
region = (-180, -90, 180, 90) | ||
else: | ||
latC, lngC, latSW, lngSW, latNE, lngNE = GEO.get_region_box(args.location) | ||
region = (lngSW, latSW, lngNE, latNE) | ||
print('Google found region at %f,%f and %f,%f' % region) | ||
else: | ||
region = None | ||
stream_tweets(api, args.words, args.photo_dir, region, args.stalk, args.no_retweets) | ||
except KeyboardInterrupt: | ||
print('\nTerminated by user\n') | ||
|
||
GEO.print_stats() |