diff --git a/CHANGES.txt b/CHANGES.txt index 92f6c35..f84a10e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -19,3 +19,7 @@ v2.0.2, 14 Jun 2013 -- Included pygeocoder source until fixed for python 3. v2.0.3, 25 Jun 2013 -- Fixed printing. v2.1.0, 17 Sep 2013 -- Requires TwitterAPI 3. + +v2.1.1, 11 Oct 2013 -- Prints GEOCODE with place and lat,lng. + +v2.2.0, 15 Oct 2013 -- Combined scripts. diff --git a/MANIFEST b/MANIFEST index cf0f5ee..42dc5ec 100644 --- a/MANIFEST +++ b/MANIFEST @@ -3,8 +3,6 @@ CHANGES.txt README.txt setup.py TwitterGeoPics/Geocoder.py -TwitterGeoPics/GetNewGeo.py -TwitterGeoPics/GetNewPics.py -TwitterGeoPics/GetOldGeo.py -TwitterGeoPics/GetOldPics.py +TwitterGeoPics/SearchOldTweets.py +TwitterGeoPics/StreamNewTweets.py TwitterGeoPics/__init__.py diff --git a/README.md b/README.md index 71c6601..c9cac5e 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,42 @@ TwitterGeoPics ============== +Python scripts for geocoding tweets and for downloading images embedded in tweets. Supports Python 2.x and 3.x. -Scripts for geocoding tweets and for downloading images embedded in tweets. +SearchOldTweets.py +----------------- +Uses 'search/tweets' Twitter resource to get tweets, geocode and embedded photos. + +Example: + python -u -m TwitterGeoPics.SearchOldTweets -words love hate -location nyc +For help: + python -u -m TwitterGeoPics.SearchOldTweets -h + +StreamNewTweets.py +----------------- +Uses 'stream/filter' Twitter resource to get tweets, geocode and embedded photos. + +Example: + python -u -m TwitterGeoPics.StreamNewTweets -words love hate -location nyc +For help: + python -u -m TwitterGeoPics.StreamNewTweets -h + +Authentication +-------------- +See TwitterAPI documentation. + +Geocoder +-------- +The geocoder uses Google Maps API to get latitude and longitude from a human-readable address. See pygeocoder package for details. Google will attempt to geocode anything. (If you say your location is The Titanic, Google will geocode the shipwreck.) + +All geocode is cached to avoid duplicate requests. Geocode requests are throttled to avoid exceeding the rate limit. See Geocoder.py for more information. + +Dependencies +----------- +* TwitterAPI +* pygeocoder +* Fridge Contributors ------------ - -* Jonas Geduldig \ No newline at end of file +* Jonas Geduldig diff --git a/README.txt b/README.txt index b8d4624..c737e08 100644 --- a/README.txt +++ b/README.txt @@ -1 +1,41 @@ -Scripts for geocoding tweets and for downloading images embedded in tweets. +TwitterGeoPics +============== +Python scripts for geocoding tweets and for downloading images embedded in tweets. Supports Python 2.x and 3.x. + +SearchOldTweets.py +----------------- +Uses 'search/tweets' Twitter resource to get tweets, geocode and embedded photos. + +Example: + python -u -m TwitterGeoPics.SearchOldTweets -words love hate -location nyc +For help: + python -u -m TwitterGeoPics.SearchOldTweets -h + +StreamNewTweets.py +----------------- +Uses 'stream/filter' Twitter resource to get tweets, geocode and embedded photos. + +Example: + python -u -m TwitterGeoPics.StreamNewTweets -words love hate -location nyc +For help: + python -u -m TwitterGeoPics.StreamNewTweets -h + +Authentication +-------------- +See TwitterAPI documentation. + +Geocoder +-------- +The geocoder uses Google Maps API to get latitude and longitude from a human-readable address. See pygeocoder package for details. Google will attempt to geocode anything. (If you say your location is The Titanic, Google will geocode the shipwreck.) + +All geocode is cached to avoid duplicate requests. Geocode requests are throttled to avoid exceeding the rate limit. See Geocoder.py for more information. + +Dependencies +----------- +* TwitterAPI +* pygeocoder +* Fridge + +Contributors +------------ +* Jonas Geduldig diff --git a/TwitterGeoPics/GetNewGeo.py b/TwitterGeoPics/GetNewGeo.py deleted file mode 100644 index 3681609..0000000 --- a/TwitterGeoPics/GetNewGeo.py +++ /dev/null @@ -1,78 +0,0 @@ -__author__ = "Jonas Geduldig" -__date__ = "December 20, 2012" -__license__ = "MIT" - -import argparse -from .Geocoder import Geocoder -import sys -from TwitterAPI import TwitterAPI, TwitterOAuth - - -GEO = Geocoder() - - -def parse_tweet(status, region): - """Print tweet, location and geocode.""" - try: - geocode = GEO.geocode_tweet(status) - print('\n%s: %s' % (status['user']['screen_name'], status['text'])) - print('LOCATION: %s' % status['user']['location']) - print('GEOCODE: %s' % geocode) - except Exception as e: - if GEO.quota_exceeded: - print('*** GEOCODER QUOTA EXCEEDED: %s\n' % GEO.count_request) - raise - - -def stream_tweets(api, list, region): - """Get tweets containing any words in 'list' or that have location or coordinates in 'region'.""" - params = {} - if list is not None: - words = ','.join(list) - params['track'] = words - if region is not None: - params['locations'] = '%f,%f,%f,%f' % region - print('REGION %s' % str(region)) - while True: - try: - r = api.request('statuses/filter', params) - while True: - for item in r.get_iterator(): - if 'text' in item: - parse_tweet(item, region) - elif 'disconnect' in item: - raise Exception('Disconnect: %s' % item['disconnect'].get('reason')) - except Exception as e: - # reconnect on 401 errors and socket timeouts - print('*** MUST RECONNECT %s\n' % e) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Get real-time tweet stream with geocode.') - parser.add_argument('-location', type=str, help='limit tweets to a place; use ALL to get all geocoded tweets') - parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') - parser.add_argument('-words', metavar='W', type=str, nargs='+', help='word(s) to track') - args = parser.parse_args() - - if args.words is None and args.location is None: - sys.exit('You must use either -words or -locoation or both.') - - oauth = TwitterOAuth.read_file(args.oauth) - api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret) - - if args.location: - if args.location.lower() == 'all': - region = (-180, -90, 180, 90) - else: - latC, lngC, latSW, lngSW, latNE, lngNE = GEO.get_region_box(args.location) - region = (lngSW, latSW, lngNE, latNE) - print('Google found region at %f,%f and %f,%f' % region) - else: - region = None - - try: - stream_tweets(api, args.words, region) - except KeyboardInterrupt: - print('\nTerminated by user\n') - - GEO.print_stats() \ No newline at end of file diff --git a/TwitterGeoPics/GetNewPics.py b/TwitterGeoPics/GetNewPics.py deleted file mode 100644 index 7a85048..0000000 --- a/TwitterGeoPics/GetNewPics.py +++ /dev/null @@ -1,96 +0,0 @@ -__author__ = "Jonas Geduldig" -__date__ = "December 20, 2012" -__license__ = "MIT" - -import argparse -from .Geocoder import Geocoder -import os -import sys -from TwitterAPI import TwitterAPI, TwitterOAuth -import urllib - - -GEO = Geocoder() - - -def parse_tweet(status, photo_dir, stalk): - """Use only tweets that embed photos.""" - if 'media' in status['entities']: - photo_count = 0 - for media in status['entities'].get('media'): - if media['type'] == 'photo': - photo_count += 1 - if photo_count == 1: - print('\n%s: %s' % (status['user']['screen_name'], status['text'])) - if stalk and not GEO.quota_exceeded: - try: - geocode = GEO.geocode_tweet(status) - print('LOCATION: %s' % status['user']['location']) - print('GEOCODE: %s' % geocode) - except Exception as e: - if GEO.quota_exceeded: - print('*** GEOCODER QUOTA EXCEEDED: %s\n' % GEO.count_request) - if photo_dir: - photo_url = media['media_url_https'] - screen_name = status['user']['screen_name'] - file_name = os.path.join(photo_dir, screen_name) + '.' + photo_url.split('.')[-1] - urllib.urlretrieve(photo_url, file_name) - print(screen_name) - - -def stream_tweets(api, list, photo_dir, region, stalk, no_retweets): - """Get tweets containing any words in 'list' or that have location or coordinates in 'region'.""" - params = {} - if list is not None: - words = ','.join(list) - params['track'] = words - if region is not None: - params['locations'] = '%f,%f,%f,%f' % region - print('REGION %s' % str(region)) - while True: - try: - r = api.request('statuses/filter', params) - while True: - for item in r.get_iterator(): - if 'text' in item: - if not no_retweets or not item.has_key('retweeted_status'): - parse_tweet(item, photo_dir, stalk) - elif 'disconnect' in item: - raise Exception('Disconnect: %s' % item['disconnect'].get('reason')) - except Exception as e: - # reconnect on 401 errors and socket timeouts - print('*** MUST RECONNECT %s\n' % e) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Get real-time tweet stream with embedded pics.') - parser.add_argument('-location', type=str, help='limit tweets to a place; use ALL to get all geocoded tweets') - parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') - parser.add_argument('-no_retweets', action='store_true', help='exclude re-tweets') - parser.add_argument('-photo_dir', metavar='DIRECTORYNAME', type=str, help='download photos to this directory') - parser.add_argument('-stalk', action='store_true', help='print tweet location') - parser.add_argument('-words', metavar='W', type=str, nargs='+', help='word(s) to track') - args = parser.parse_args() - - if args.words is None and args.location is None: - sys.exit('You must use either -words or -locoation or both.') - - oauth = TwitterOAuth.read_file(args.oauth) - api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret) - - if args.location: - if args.location.lower() == 'all': - region = (-180, -90, 180, 90) - else: - latC, lngC, latSW, lngSW, latNE, lngNE = GEO.get_region_box(args.location) - region = (lngSW, latSW, lngNE, latNE) - print('Google found region at %f,%f and %f,%f' % region) - else: - region = None - - try: - stream_tweets(api, args.words, args.photo_dir, region, args.stalk, args.no_retweets) - except KeyboardInterrupt: - print('\nTerminated by user\n') - - GEO.print_stats() \ No newline at end of file diff --git a/TwitterGeoPics/GetOldGeo.py b/TwitterGeoPics/GetOldGeo.py deleted file mode 100644 index 0c8dd4e..0000000 --- a/TwitterGeoPics/GetOldGeo.py +++ /dev/null @@ -1,70 +0,0 @@ -__author__ = "Jonas Geduldig" -__date__ = "December 20, 2012" -__license__ = "MIT" - -import argparse -from .Geocoder import Geocoder -from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterRestPager - - -GEO = Geocoder() - - -def parse_tweet(status): - """Print tweet, location and geocode.""" - try: - geocode = GEO.geocode_tweet(status) - print('\n%s: %s' % (status['user']['screen_name'], status['text'])) - print('LOCATION: %s' % status['user']['location']) - print('GEOCODE: %s' % geocode) - except Exception as e: - if GEO.quota_exceeded: - raise - - -def search_tweets(api, list, region): - """Get tweets containing any words in 'list' and that have location or coordinates in 'region'.""" - words = ' OR '.join(list) - params = { 'q': words } - if region: - params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius - while True: - pager = TwitterRestPager(api, 'search/tweets', params) - for item in pager.get_iterator(): - if 'text' in item: - parse_tweet(item) - elif 'message' in item: - if item['code'] == 131: - continue # ignore internal server error - elif item['code'] == 88: - print('Suspend search until %s' % search.get_quota()['reset']) - raise Exception('Message from twiter: %s' % item['message']) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Search tweet history.') - parser.add_argument('-location', type=str, help='limit tweets to a place') - parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') - parser.add_argument('-radius', type=float, help='distance from "location" in km') - parser.add_argument('words', metavar='W', type=str, nargs='+', help='word(s) to search') - args = parser.parse_args() - - oauth = TwitterOAuth.read_file(args.oauth) - api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret) - - try: - if args.location: - lat, lng, radius = GEO.get_region_circle(args.location) - print('Google found region at %f,%f with a radius of %s km' % (lat, lng, radius)) - if args.radius: - radius = args.radius - region = (lat, lng, radius) - else: - region = None - search_tweets(api, args.words, region) - except KeyboardInterrupt: - print('\nTerminated by user\n') - except Exception as e: - print('*** STOPPED %s\n' % e) - - GEO.print_stats() \ No newline at end of file diff --git a/TwitterGeoPics/GetOldPics.py b/TwitterGeoPics/GetOldPics.py deleted file mode 100644 index 406b8b8..0000000 --- a/TwitterGeoPics/GetOldPics.py +++ /dev/null @@ -1,89 +0,0 @@ -__author__ = "Jonas Geduldig" -__date__ = "December 20, 2012" -__license__ = "MIT" - -import argparse -from .Geocoder import Geocoder -import os -from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterRestPager -import urllib - - -GEO = Geocoder() - - -def parse_tweet(status, photo_dir, stalk): - """Use only tweets that embed photos.""" - if 'media' in status['entities']: - photo_count = 0 - for media in status['entities'].get('media'): - if media['type'] == 'photo': - photo_count += 1 - if photo_count == 1: - print('\n%s: %sprint' % (status['user']['screen_name'], status['text'])) - if stalk and not GEO.quota_exceeded: - try: - geocode = GEO.geocode_tweet(status) - print('LOCATION: %s' % status['user']['location']) - print('GEOCODE: %s' % geocode) - except Exception as e: - if GEO.quota_exceeded: - print('GEOCODER QUOTA EXCEEDED: %s' % GEO.count_request) - if photo_dir: - photo_url = media['media_url_https'] - screen_name = status['user']['screen_name'] - file_name = os.path.join(photo_dir, screen_name) + '.' + photo_url.split('.')[-1] - urllib.urlretrieve(photo_url, file_name) - print(screen_name) - - -def search_tweets(api, list, photo_dir, region, stalk, no_retweets): - """Get tweets containing any words in 'list' and that have location or coordinates in 'region'.""" - words = ' OR '.join(list) - params = { 'q': words } - if region: - params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius - while True: - pager = TwitterRestPager(api, 'search/tweets', params) - for item in pager.get_iterator(): - if 'text' in item: - if not no_retweets or not item.has_key('retweeted_status'): - parse_tweet(item, photo_dir, stalk) - elif 'message' in item: - if item['code'] == 131: - continue # ignore internal server error - elif item['code'] == 88: - print('Suspend search until %s' % search.get_quota()['reset']) - raise Exception('Message from twiter: %s' % item['message']) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Search tweet history.') - parser.add_argument('-location', type=str, help='limit tweets to a place') - parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') - parser.add_argument('-no_retweets', action='store_true', help='exclude re-tweets') - parser.add_argument('-photo_dir', metavar='DIRECTORYNAME', type=str, help='download photos to this directory') - parser.add_argument('-radius', type=str, help='distance from "location" in km') - parser.add_argument('-stalk', action='store_true', help='print tweet location') - parser.add_argument('words', metavar='W', type=str, nargs='+', help='word(s) to search') - args = parser.parse_args() - - oauth = TwitterOAuth.read_file(args.oauth) - api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret) - - try: - if args.location: - lat, lng, radius = GEO.get_region_circle(args.location) - print('Google found region at %f,%f with a radius of %s km' % (lat, lng, radius)) - if args.radius: - radius = args.radius - region = (lat, lng, radius) - else: - region = None - search_tweets(api, args.words, args.photo_dir, region, args.stalk, args.no_retweets) - except KeyboardInterrupt: - print('\nTerminated by user\n') - except Exception as e: - print('*** STOPPED %s\n' % e) - - GEO.print_stats() \ No newline at end of file diff --git a/TwitterGeoPics/__init__.py b/TwitterGeoPics/__init__.py index b44488d..872c190 100644 --- a/TwitterGeoPics/__init__.py +++ b/TwitterGeoPics/__init__.py @@ -1,5 +1,5 @@ __title__ = 'TwitterGeoPics' -__version__ = '2.1.0' +__version__ = '2.2.0' __author__ = 'Jonas Geduldig' __license__ = 'MIT' __copyright__ = 'Copyright 2013 Jonas Geduldig'