From 642374dacda13389cc4a4a9af114c3c943740d72 Mon Sep 17 00:00:00 2001 From: Jonas Geduldig Date: Fri, 14 Jun 2013 11:05:29 -0400 Subject: [PATCH] Switched to TwitterAPI --- .gitignore | 2 + CHANGES.txt | 4 +- MANIFEST | 9 +++ README.md | 64 +-------------- {twittergeo => TwitterGeoPics}/Geocoder.py | 79 ++++--------------- .../GetNewGeo.py | 50 +++--------- .../GetNewPics.py | 67 +++++----------- .../GetOldGeo.py | 46 +++-------- .../GetOldPics.py | 62 +++++---------- {twittergeo => TwitterGeoPics}/__init__.py | 0 setup.py | 16 ++-- 11 files changed, 101 insertions(+), 298 deletions(-) create mode 100644 MANIFEST rename {twittergeo => TwitterGeoPics}/Geocoder.py (89%) rename twittergeo/StreamGeo.py => TwitterGeoPics/GetNewGeo.py (61%) rename twittergeo/StreamPics.py => TwitterGeoPics/GetNewPics.py (58%) rename twittergeo/SearchGeo.py => TwitterGeoPics/GetOldGeo.py (57%) rename twittergeo/SearchPics.py => TwitterGeoPics/GetOldPics.py (57%) rename {twittergeo => TwitterGeoPics}/__init__.py (100%) diff --git a/.gitignore b/.gitignore index d2d6f36..e147773 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +*.cache + *.py[cod] # C extensions diff --git a/CHANGES.txt b/CHANGES.txt index 0d73a03..a267d57 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -10,4 +10,6 @@ v1.0.0, 30 Jan 2013 -- Uploaded to github. v1.1.0, 12 Feb 2013 -- Replaced TwitterAPI with puttytat for Twitter requests. -v1.1.1, 19 Feb 2013 -- Geocoder uses viewport instead of bounds. \ No newline at end of file +v1.1.1, 19 Feb 2013 -- Geocoder uses viewport instead of bounds. + +v2.0.0, 14 Jun 2013 -- Switch to TwitterAPI and renamed to TwitterGeoPics \ No newline at end of file diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..b73bf1a --- /dev/null +++ b/MANIFEST @@ -0,0 +1,9 @@ +# file GENERATED by distutils, do NOT edit +CHANGES.txt +setup.py +twittergeo/Geocoder.py +twittergeo/SearchGeo.py +twittergeo/SearchPics.py +twittergeo/StreamGeo.py +twittergeo/StreamPics.py +twittergeo/__init__.py diff --git a/README.md b/README.md index 880947f..b060e6f 100644 --- a/README.md +++ b/README.md @@ -1,69 +1,7 @@ -# TwitterGeo # +# TwitterGeoPics # _Scripts for geocoding tweets and for downloading images embedded in tweets._ -### Getting Location and Embedded Images... ### - -TwitterGeo contains command line scripts for geocoding tweets and extracting embedded images from tweets from twitter.com. The scripts take one or more search words as command line arguments. The scripts download old tweets using Twitter's REST API and download new tweets using Twitter's Streaming API. - -About 1% or 2% of tweets contain latitude and longitude. Of those tweets that do not contain coordinate data, about 60% have the user's profile location, a descriptive text field that may or not be accurate. Using Google's Maps API, we can geocode these tweets, which locates about half of all tweets, a portion of which are suspect. - -Use the location option to restrict searches to a geographic location. Twitter returns tweets that either contain coordinates in the location region or tweets from users whose profile location is in the specified region. - -Google does not require autentication, but it does enforce a daily limit of about 2,500 requests per day and about 10 requests per second. - -The Twitter API requires OAuth credentials which you can get by creating an application on dev.twitter.com. Once you have your OAuth secrets and keys, copy them into puttytat/credentials.txt. Alternatively, specify the credentials file on the command line. - -Twitter restricts searching old tweets to within roughly the past week. Twitter also places a bandwidth limit on searching current tweet, but you will notice this only when you are searching a popular word. When this limit occurs the total number of skipped tweets is printed and the connection is maintained. - -### Features ### - -*The following modules run as command line scripts and write tweets to the console.* - -***SearchGeo*** - -Prints old tweets and their location information and coordinates when possible. - -***StreamGeo*** - -Prints new tweets and their location information and coordinates when possible. - -***SearchPics*** - -Prints old tweets, their coordinates and URLs of any embedded photos. To download the photos use the -photo_dir option. To get tweets only from a specific geographic region use the -location. - -***StreamPics*** - -Prints new tweets, their coordinates and URLs of any embedded photos. To download the photos use the -photo_dir option. To get tweets only from a specific geographic region use the -location. - -*This is utility module.* - -***Geocoder*** - -A wrapper for the pygeocoder package. It adds throttling to respect Google's daily quota and rate limit. It also provides a caching mechanism for storing geocode lookups to a text file. The caching is only partially effective because user can enter their location in any format. There are also some Twitter specific methods. - -### Installation ### - - -1. On a command line, type: - - pip install twittergeo - -2. Either copy your OAuth consumer secret and key and your access token secret and key into puttytat/credentials.txt, or copy them into another file which you will specify on the command line. See credentials.txt for the expected file format. - -3. Run a script type with '-m' option, for example: - - python -m twittergeo.StreamGeo zzz - python -m twittergeo.StreamGeo zzz -oauth ./my_credentials.txt - -### External Dependencies ### - -This package uses the following external packages. - -* puttytat - for downloading tweets -* pygeocoder - for geo-referencing using Google's Maps service -* fridge - for caching latitudes and longitudes in a persistant dict - ### Contributors ### Jonas Geduldig \ No newline at end of file diff --git a/twittergeo/Geocoder.py b/TwitterGeoPics/Geocoder.py similarity index 89% rename from twittergeo/Geocoder.py rename to TwitterGeoPics/Geocoder.py index 98c34fc..2097c23 100755 --- a/twittergeo/Geocoder.py +++ b/TwitterGeoPics/Geocoder.py @@ -5,7 +5,6 @@ import datetime import fridge import math -import os import pygeocoder import socket import time @@ -26,12 +25,9 @@ class Geocoder: def __init__(self, cache_file=None): """Zero counters and open cache file. - Parameters - ---------- - cache_file : str + cache_file : File path for cache file. File will get opened for append or created if not found. If cache_file is not supplied, the default file will be used. - """ self.count_request = 0 # total number of geocode requests self.count_request_ok = 0 # total number of successful geocode requests @@ -47,17 +43,13 @@ def __init__(self, cache_file=None): self.last_exec = None # time updated at each geocode request if cache_file is None: - #path = os.path.dirname(__file__) - #cache_file = os.path.join(path, DEFAULT_CACHE_FILE) cache_file = DEFAULT_CACHE_FILE # cache is a persistent dict with place address as key and lat/lng and count as value self.cache = fridge.Fridge(cache_file) def _throttle(self): - """Wait an interval to not exceed rate limit. Called before each geocode request. - - """ + """Wait an interval to not exceed rate limit. Called before each geocode request.""" if self.retry_count == 1: # increase the throttle to respect rate limit self.retry_count = 2 @@ -76,12 +68,9 @@ def _throttle(self): def _should_retry(self): """Handle an OVER QUERY LIMIT exception. Called when GeocodeError is thrown. - Return - ------ - retry : boolean + Return : boolean True means wait 2 seconds, increase the throttle, and retry the request. False means stop making geocode requests because daily limit was exceeded. - """ if not self.quota_exceeded: if self.retry_count == 0: @@ -101,23 +90,14 @@ def _should_retry(self): def geocode(self, place): """Returns Google's geocode data for a place. - Parameters - ---------- - place : str - An address or partial address in any format. + place : An address or partial address in any format. - Return - ------ - geocode data : dict - Keys and values are from Google's JSON data. + Return : dict + Geocode from Google's JSON data. - Raises - ------ - pygeocoder.GeocoderError - Quota exceeded, indecipherable address, etc. - Exception - Socket errors. - + Raises : + pygeocoder.GeocoderError : Quota exceeded, indecipherable address, etc. + Exception : Socket errors. """ self._throttle() try: @@ -163,24 +143,16 @@ def address_to_latlng(self, place): def geocode_tweet(self, status): """Returns an address and coordinates associated with a tweet. - Parameters - ---------- status : dict Keys and values of a tweet (i.e. a Twitter status). - Return - ------ - place : str + Return : (str, float, float) An address or part of an address from either the tweeter's Twitter profile or from reverse geocoding coordinates associated with the tweet. - latitude, longitude : float Coordinates either assocatiated with the tweet or from geocoding the location in the tweeter's Twitter profile. - Raises - ------ - See Geocoder.geocode() documentation. - + Raises: See Geocoder.geocode() documentation. """ # start off with the location in the user's profile (it may be empty) place = status['user']['location'] @@ -232,24 +204,15 @@ def get_region_box(self, place): The size of bounding box that Google returns depends on whether the place is an address, a town or a country. - Parameters - ---------- place : str An address or partial address in any format. Googles will try anything. - Return - ------ - latitude, longitude : float + Return : floatx6 The place's coordinates. - latitude, longitude : float The place's SW coordinates. - latitude, longitude : float The place's NE coordinates. - Raises - ------ - See Geocoder.geocode() documentation. - + Raises : See Geocoder.geocode() documentation. """ results = self.geocode(place) geometry = results.raw[0]['geometry'] @@ -265,22 +228,14 @@ def get_region_circle(self, place): The motivation for this method is Twitter's Search API's 'geocode' parameter. - Parameters - ---------- place : str An address or partial address in any format. - Return - ------ - latitude, longitude : float + Return : float, float, str The place's coordinates. - radius : str Half the distance spanning the corner's of the place's bounding box in kilomters. - Raises - ------ - See Geocoder.geocode() documentation. - + Raises : See Geocoder.geocode() documentation. """ latC, lngC, latSW, lngSW, latNE, lngNE = self.get_region_box(place) D = self.distance(latSW, lngSW, latNE, lngNE) @@ -288,9 +243,7 @@ def get_region_circle(self, place): @classmethod def distance(cls, lat1, lng1, lat2, lng2): - """Calculates the distance between two points on a sphere - - """ + """Calculates the distance between two points on a sphere.""" # Haversine distance formula lat1, lng1 = math.radians(lat1), math.radians(lng1) lat2, lng2 = math.radians(lat2), math.radians(lng2) diff --git a/twittergeo/StreamGeo.py b/TwitterGeoPics/GetNewGeo.py similarity index 61% rename from twittergeo/StreamGeo.py rename to TwitterGeoPics/GetNewGeo.py index 9e122b6..9f2d79f 100644 --- a/twittergeo/StreamGeo.py +++ b/TwitterGeoPics/GetNewGeo.py @@ -1,24 +1,3 @@ -""" - REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO puttytat/credentials.txt - OR USE -oauth OPTION TO USE A DIFFERENT FILE CONTAINING THE CREDENTIALS. - - Downloads real-time tweets. You must supply either one or both of the -words and - -location options. Prints the tweet text and location information, including - latitude and longitude from Google's Map service. - - Use the -words option to get tweets that contain any of the words that are passed - as arguments on the command line. - - Use the -location option to get tweets from a geographical region. Location is - determined only from geocode in the tweet. Use -location ALL to get all geocoded - tweets from any location. - - The script calls Twitter's Streaming API which is bandwidth limitted. If you - exceed the rate limit, Twitter sends a message with the total number of tweets - skipped during the current connection. This number is printed, and the connection - remains open. -""" - __author__ = "Jonas Geduldig" __date__ = "December 20, 2012" __license__ = "MIT" @@ -29,17 +8,14 @@ import argparse import Geocoder -import puttytat -import urllib +from TwitterAPI import TwitterAPI, TwitterOAuth + -OAUTH = None GEO = Geocoder.Geocoder() def parse_tweet(status, region): - """Print tweet, location and geocode - - """ + """Print tweet, location and geocode.""" try: geocode = GEO.geocode_tweet(status) print '\n%s: %s' % (status['user']['screen_name'], status['text']) @@ -51,10 +27,8 @@ def parse_tweet(status, region): raise -def stream_tweets(list, region): - """Get tweets containing any words in 'list' or that have location or coordinates in 'region' - - """ +def stream_tweets(api, list, region): + """Get tweets containing any words in 'list' or that have location or coordinates in 'region'.""" params = {} if list is not None: words = ','.join(list) @@ -63,10 +37,11 @@ def stream_tweets(list, region): params['locations'] = '%f,%f,%f,%f' % region print 'REGION', region while True: - tw = puttytat.TwitterStream(OAUTH) try: + api.request('statuses/filter', params) + iter = api.get_iterator() while True: - for item in tw.request('statuses/filter', params): + for item in iter: if 'text' in item: parse_tweet(item, region) elif 'disconnect' in item: @@ -77,16 +52,17 @@ def stream_tweets(list, region): if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Get real-time tweet stream.') - parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') + parser = argparse.ArgumentParser(description='Get real-time tweet stream with geocode.') parser.add_argument('-location', type=str, help='limit tweets to a place; use ALL to get all geocoded tweets') + parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') parser.add_argument('-words', metavar='W', type=str, nargs='+', help='word(s) to track') args = parser.parse_args() if args.words is None and args.location is None: sys.exit('You must use either -words or -locoation or both.') - OAUTH = puttytat.TwitterOauth.read_file(args.oauth) + oauth = TwitterOAuth.read_file(args.oauth) + api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret) if args.location: if args.location.lower() == 'all': @@ -99,7 +75,7 @@ def stream_tweets(list, region): region = None try: - stream_tweets(args.words, region) + stream_tweets(api, args.words, region) except KeyboardInterrupt: print>>sys.stderr, '\nTerminated by user' diff --git a/twittergeo/StreamPics.py b/TwitterGeoPics/GetNewPics.py similarity index 58% rename from twittergeo/StreamPics.py rename to TwitterGeoPics/GetNewPics.py index 6d02d32..232fb60 100644 --- a/twittergeo/StreamPics.py +++ b/TwitterGeoPics/GetNewPics.py @@ -1,29 +1,3 @@ -""" - REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO puttytat/credentials.txt - OR USE -oauth OPTION TO USE A DIFFERENT FILE CONTAINING THE CREDENTIALS. - - Downloads real-time tweets that contain embedded photo URLs. You must supply - either one or both of the -words and -location options. Prints the tweet text, - location information, including latitude and longitude from Google's Map service, - and all photo URLs. - - Use the -words option to get tweets that contain any of the words that are passed - as arguments on the command line. - - Use the -location option to get tweets from a geographical region. Location is - determined only from geocode in the tweet. Use -location ALL to get all geocoded - tweets from any location. - - Use the -photo_dir option to save photos to a directory. - - Use the -stalk flag to print latitude and longitude from Google's Map service. - - The script calls Twitter's Streaming API which is bandwidth limitted. If you - exceed the rate limit, Twitter sends a message with the total number of tweets - skipped during the current connection. This number is printed, and the connection - remains open. -""" - __author__ = "Jonas Geduldig" __date__ = "December 20, 2012" __license__ = "MIT" @@ -35,19 +9,15 @@ import argparse import Geocoder import os -import puttytat +from TwitterAPI import TwitterAPI, TwitterOAuth import urllib -OAUTH = None + GEO = Geocoder.Geocoder() def parse_tweet(status, photo_dir, stalk): - """If tweet contains photo, print tweet. - If stalking, print location and geocode. - If photo_dir, print photo id and save photo to file. - - """ + """Use only tweets that embed photos.""" if 'media' in status['entities']: photo_count = 0 for media in status['entities'].get('media'): @@ -63,17 +33,16 @@ def parse_tweet(status, photo_dir, stalk): except Exception, e: if GEO.quota_exceeded: print>>sys.stderr, '*** GEOCODER QUOTA EXCEEDED:', GEO.count_request - photo_url = media['media_url_https'] if photo_dir: - print media['id_str'] - file_name = os.path.join(photo_dir, media['id_str']) + '.' + photo_url.split('.')[-1] + photo_url = media['media_url_https'] + screen_name = status['user']['screen_name'] + print screen_name + file_name = os.path.join(photo_dir, screen_name) + '.' + photo_url.split('.')[-1] urllib.urlretrieve(photo_url, file_name) -def stream_tweets(list, photo_dir, region, stalk): - """Get tweets containing any words in 'list' or that have location or coordinates in 'region' - - """ +def stream_tweets(api, list, photo_dir, region, stalk, no_retweets): + """Get tweets containing any words in 'list' or that have location or coordinates in 'region'.""" params = {} if list is not None: words = ','.join(list) @@ -82,12 +51,14 @@ def stream_tweets(list, photo_dir, region, stalk): params['locations'] = '%f,%f,%f,%f' % region print 'REGION', region while True: - tw = puttytat.TwitterStream(OAUTH) try: + api.request('statuses/filter', params) + iter = api.get_iterator() while True: - for item in tw.request('statuses/filter', params): + for item in iter: if 'text' in item: - parse_tweet(item, photo_dir, stalk) + if not no_retweets or not item.has_key('retweeted_status'): + parse_tweet(item, photo_dir, stalk) elif 'disconnect' in item: raise Exception('Disconnect: %s' % item['disconnect'].get('reason')) except Exception, e: @@ -96,9 +67,10 @@ def stream_tweets(list, photo_dir, region, stalk): if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Get real-time tweet stream.') - parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') + parser = argparse.ArgumentParser(description='Get real-time tweet stream with embedded pics.') parser.add_argument('-location', type=str, help='limit tweets to a place; use ALL to get all geocoded tweets') + parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') + parser.add_argument('-no_retweets', action='store_true', help='exclude re-tweets') parser.add_argument('-photo_dir', metavar='DIRECTORYNAME', type=str, help='download photos to this directory') parser.add_argument('-stalk', action='store_true', help='print tweet location') parser.add_argument('-words', metavar='W', type=str, nargs='+', help='word(s) to track') @@ -107,7 +79,8 @@ def stream_tweets(list, photo_dir, region, stalk): if args.words is None and args.location is None: sys.exit('You must use either -words or -locoation or both.') - OAUTH = puttytat.TwitterOauth.read_file(args.oauth) + oauth = TwitterOAuth.read_file(args.oauth) + api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret) if args.location: if args.location.lower() == 'all': @@ -120,7 +93,7 @@ def stream_tweets(list, photo_dir, region, stalk): region = None try: - stream_tweets(args.words, args.photo_dir, region, args.stalk) + stream_tweets(api, args.words, args.photo_dir, region, args.stalk, args.no_retweets) except KeyboardInterrupt: print>>sys.stderr, '\nTerminated by user' diff --git a/twittergeo/SearchGeo.py b/TwitterGeoPics/GetOldGeo.py similarity index 57% rename from twittergeo/SearchGeo.py rename to TwitterGeoPics/GetOldGeo.py index e42e628..14d5caf 100644 --- a/twittergeo/SearchGeo.py +++ b/TwitterGeoPics/GetOldGeo.py @@ -1,22 +1,3 @@ -""" - REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO puttytat/credentials.txt - OR USE -oauth OPTION TO USE A DIFFERENT FILE CONTAINING THE CREDENTIALS. - - Downloads old tweets from the newest to the oldest that contain any of the words - that are passed as arguments on the command line. Prints the tweet text and - location information, including latitude and longitude from Google's Map service. - - Use the -location option to get tweets from a geographical region. If you want to - override the default radius (in km) use the -radius option. Location is - determined from either the user's profile or geocode. - - The script calls Twitter's REST API which permits about a week's worth of old - tweets to be downloaded before breaking the connection. Twitter may also - disconnect if you exceed 180 downloads per 15 minutes. For this reason sleep is - called after each request. The default is 5 seconds. Override with the '-wait' - option. -""" - __author__ = "Jonas Geduldig" __date__ = "December 20, 2012" __license__ = "MIT" @@ -27,17 +8,14 @@ import argparse import Geocoder -import puttytat -import urllib +from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterRestPager + -OAUTH = None GEO = Geocoder.Geocoder() def parse_tweet(status): - """Print tweet, location and geocode - - """ + """Print tweet, location and geocode.""" try: geocode = GEO.geocode_tweet(status) print '\n%s: %s' % (status['user']['screen_name'], status['text']) @@ -48,17 +26,15 @@ def parse_tweet(status): raise -def search_tweets(list, wait, region): - """Get tweets containing any words in 'list' and that have location or coordinates in 'region' - - """ +def search_tweets(api, list, region): + """Get tweets containing any words in 'list' and that have location or coordinates in 'region'.""" words = ' OR '.join(list) params = { 'q': words } if region: params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius while True: - tw = puttytat.TwitterRestPager(OAUTH) - for item in tw.request('search/tweets', params, wait): + iter = TwitterRestPager(api, 'search/tweets', params).get_iterator() + for item in iter: if 'text' in item: parse_tweet(item) elif 'message' in item: @@ -71,14 +47,14 @@ def search_tweets(list, wait, region): if __name__ == '__main__': parser = argparse.ArgumentParser(description='Search tweet history.') - parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') - parser.add_argument('-wait', type=int, default=5, help='seconds to wait between searches') parser.add_argument('-location', type=str, help='limit tweets to a place') + parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') parser.add_argument('-radius', type=float, help='distance from "location" in km') parser.add_argument('words', metavar='W', type=str, nargs='+', help='word(s) to search') args = parser.parse_args() - OAUTH = puttytat.TwitterOauth.read_file(args.oauth) + oauth = TwitterOAuth.read_file(args.oauth) + api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret) try: if args.location: @@ -89,7 +65,7 @@ def search_tweets(list, wait, region): region = (lat, lng, radius) else: region = None - search_tweets(args.words, args.wait, region) + search_tweets(api, args.words, region) except KeyboardInterrupt: print>>sys.stderr, '\nTerminated by user' except Exception, e: diff --git a/twittergeo/SearchPics.py b/TwitterGeoPics/GetOldPics.py similarity index 57% rename from twittergeo/SearchPics.py rename to TwitterGeoPics/GetOldPics.py index 9938e49..b78365c 100644 --- a/twittergeo/SearchPics.py +++ b/TwitterGeoPics/GetOldPics.py @@ -1,26 +1,3 @@ -""" - REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO puttytat/credentials.txt - OR USE -oauth OPTION TO USE A DIFFERENT FILE CONTAINING THE CREDENTIALS. - - Downloads old tweets from the newest to the oldest that contain any of the words - that are passed as arguments on the command line. Prints the tweet text and URLs - of any embedded photos. - - Use the -photo_dir option to save photos to a directory. - - Use the -stalk flag to print latitude and longitude from Google's Map service. - Location is determined from either the user's profile or geocode. - - Use the -location to get tweets from a geographical region. If you want to - override the default radius (in km) use the -radius option. - - The script calls Twitter's REST API which permits about a week's worth of old - tweets to be downloaded before breaking the connection. Twitter may also - disconnect if you exceed 180 downloads per 15 minutes. For this reason sleep is - called after each request. The default is 5 seconds. Override with the '-wait' - option. -""" - __author__ = "Jonas Geduldig" __date__ = "December 20, 2012" __license__ = "MIT" @@ -32,19 +9,15 @@ import argparse import Geocoder import os -import puttytat +from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterRestPager import urllib -OAUTH = None + GEO = Geocoder.Geocoder() def parse_tweet(status, photo_dir, stalk): - """If tweet contains photo, print tweet. - If stalking, print location and geocode. - If photo_dir, print photo id and save photo to file. - - """ + """Use only tweets that embed photos.""" if 'media' in status['entities']: photo_count = 0 for media in status['entities'].get('media'): @@ -60,26 +33,26 @@ def parse_tweet(status, photo_dir, stalk): except Exception, e: if GEO.quota_exceeded: print>>sys.stderr, 'GEOCODER QUOTA EXCEEDED:', GEO.count_request - photo_url = media['media_url_https'] if photo_dir: - print media['id_str'] - file_name = os.path.join(photo_dir, media['id_str']) + '.' + photo_url.split('.')[-1] + photo_url = media['media_url_https'] + screen_name = status['user']['screen_name'] + print screen_name + file_name = os.path.join(photo_dir, screen_name) + '.' + photo_url.split('.')[-1] urllib.urlretrieve(photo_url, file_name) -def search_tweets(list, wait, photo_dir, region, stalk): - """Get tweets containing any words in 'list' and that have location or coordinates in 'region' - - """ +def search_tweets(api, list, photo_dir, region, stalk, no_retweets): + """Get tweets containing any words in 'list' and that have location or coordinates in 'region'.""" words = ' OR '.join(list) params = { 'q': words } if region: params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius while True: - tw = puttytat.TwitterRestPager(OAUTH) - for item in tw.request('search/tweets', params, wait): + iter = TwitterRestPager(api, 'search/tweets', params).get_iterator() + for item in iter: if 'text' in item: - parse_tweet(item, photo_dir, stalk) + if not no_retweets or not item.has_key('retweeted_status'): + parse_tweet(item, photo_dir, stalk) elif 'message' in item: if item['code'] == 131: continue # ignore internal server error @@ -90,16 +63,17 @@ def search_tweets(list, wait, photo_dir, region, stalk): if __name__ == '__main__': parser = argparse.ArgumentParser(description='Search tweet history.') + parser.add_argument('-location', type=str, help='limit tweets to a place') parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') - parser.add_argument('-wait', type=int, default=5, help='seconds to wait between searches') + parser.add_argument('-no_retweets', action='store_true', help='exclude re-tweets') parser.add_argument('-photo_dir', metavar='DIRECTORYNAME', type=str, help='download photos to this directory') - parser.add_argument('-location', type=str, help='limit tweets to a place') parser.add_argument('-radius', type=str, help='distance from "location" in km') parser.add_argument('-stalk', action='store_true', help='print tweet location') parser.add_argument('words', metavar='W', type=str, nargs='+', help='word(s) to search') args = parser.parse_args() - OAUTH = puttytat.TwitterOauth.read_file(args.oauth) + oauth = TwitterOAuth.read_file(args.oauth) + api = TwitterAPI(oauth.consumer_key, oauth.consumer_secret, oauth.access_token_key, oauth.access_token_secret) try: if args.location: @@ -110,7 +84,7 @@ def search_tweets(list, wait, photo_dir, region, stalk): region = (lat, lng, radius) else: region = None - search_tweets(args.words, args.wait, args.photo_dir, region, args.stalk) + search_tweets(api, args.words, args.photo_dir, region, args.stalk, args.no_retweets) except KeyboardInterrupt: print>>sys.stderr, '\nTerminated by user' except Exception, e: diff --git a/twittergeo/__init__.py b/TwitterGeoPics/__init__.py similarity index 100% rename from twittergeo/__init__.py rename to TwitterGeoPics/__init__.py diff --git a/setup.py b/setup.py index 9bc6d94..ea3149d 100644 --- a/setup.py +++ b/setup.py @@ -1,16 +1,16 @@ from distutils.core import setup setup( - name='TwitterGeo', - version='1.1.1', + name='TwitterGeoPics', + version='2.0.0', author='Jonas Geduldig', author_email='boxnumber03@gmail.com', - packages=['twittergeo'], + packages=['TwitterGeoPics'], package_data={'': []}, - url='https://github.com/geduldig/twittergeo', - download_url = 'https://github.com/gedldig/twittergeo/tarball/master', + url='https://github.com/geduldig/TwitterGeoPics', + download_url = 'https://github.com/gedldig/TwitterGeoPics/tarball/master', license='MIT', - keywords='twitter', - description='Command line scripts for geocoding old and new tweets from twitter.com and for downloading embedded photos.', - install_requires = ['puttytat', 'pygeocoder', 'fridge'] + keywords='twitter, geocode', + description='Command line scripts for geocoding tweets from twitter.com and for downloading embedded photos.', + install_requires = ['TwitterAPI', 'pygeocoder', 'fridge'] )