diff --git a/CHANGES.txt b/CHANGES.txt index 6a6a1cf..b398f8e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -6,4 +6,6 @@ v0.1.2, 20 Jan 2013 -- Improved exception handling for less fragile connections Streaming 'location' option now uses the API's 'locations' parameter. Dynamic throttling of Google geocode requests to not exceed rate limit. -v1.0.0, 30 Jan 2013 -- Uploaded to github. \ No newline at end of file +v1.0.0, 30 Jan 2013 -- Uploaded to github. + +v1.1.0, 12 Feb 2013 -- Replaced TwitterAPI with puttytat for Twitter requests. \ No newline at end of file diff --git a/MANIFEST b/MANIFEST deleted file mode 100644 index b2ef64e..0000000 --- a/MANIFEST +++ /dev/null @@ -1,9 +0,0 @@ -# file GENERATED by distutils, do NOT edit -setup.py -twittergeo/Geocoder.py -twittergeo/SearchGeo.py -twittergeo/SearchPics.py -twittergeo/StreamGeo.py -twittergeo/StreamPics.py -twittergeo/__init__.py -twittergeo/credentials.txt diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..9203697 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include *.txt +recursive-include docs *.txt \ No newline at end of file diff --git a/README.md b/README.md index 7984894..880947f 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Use the location option to restrict searches to a geographic location. Twitter Google does not require autentication, but it does enforce a daily limit of about 2,500 requests per day and about 10 requests per second. -The Twitter API requires OAuth credentials which you can get by creating an application on dev.twitter.com. Once you have your OAuth secrets and keys, copy them into twittergeo/credentials.txt. Alternatively, specify the credentials file on the command line. +The Twitter API requires OAuth credentials which you can get by creating an application on dev.twitter.com. Once you have your OAuth secrets and keys, copy them into puttytat/credentials.txt. Alternatively, specify the credentials file on the command line. Twitter restricts searching old tweets to within roughly the past week. Twitter also places a bandwidth limit on searching current tweet, but you will notice this only when you are searching a popular word. When this limit occurs the total number of skipped tweets is printed and the connection is maintained. @@ -49,7 +49,7 @@ A wrapper for the pygeocoder package. It adds throttling to respect Google's da pip install twittergeo -2. Either copy your OAuth consumer secret and key and your access token secret and key into twittergeo/credentials.txt, or copy them into another file which you will specify on the command line. See credentials.txt for the expected file format. +2. Either copy your OAuth consumer secret and key and your access token secret and key into puttytat/credentials.txt, or copy them into another file which you will specify on the command line. See credentials.txt for the expected file format. 3. Run a script type with '-m' option, for example: @@ -60,7 +60,7 @@ A wrapper for the pygeocoder package. It adds throttling to respect Google's da This package uses the following external packages. -* twitterapi - for downloading tweets +* puttytat - for downloading tweets * pygeocoder - for geo-referencing using Google's Maps service * fridge - for caching latitudes and longitudes in a persistant dict diff --git a/setup.py b/setup.py index 754f47c..1397676 100644 --- a/setup.py +++ b/setup.py @@ -2,15 +2,15 @@ setup( name='TwitterGeo', - version='1.0.0', + version='1.1.0', author='Jonas Geduldig', author_email='boxnumber03@gmail.com', packages=['twittergeo'], package_data={'': ['credentials.txt']}, url='https://github.com/geduldig/twittergeo', - download_url = 'https://github.com/gedldig/twittergeo/tarball/1.0.0', + download_url = 'https://github.com/gedldig/twittergeo/tarball/master', license='MIT', keywords='twitter', description='Command line scripts for geocoding old and new tweets from twitter.com and for downloading embedded photos.', - install_requires = ['twitterapi', 'pygeocoder', 'fridge'] + install_requires = ['puttytat', 'pygeocoder', 'fridge'] ) diff --git a/twittergeo/Geocoder.py b/twittergeo/Geocoder.py index 4b6b9f9..06b2e97 100755 --- a/twittergeo/Geocoder.py +++ b/twittergeo/Geocoder.py @@ -46,7 +46,7 @@ def __init__(self, cache_file=None): self.throttle = THROTTLE_INCR # the throttle in seconds to wait between requests self.last_exec = None # time updated at each geocode request - if cache_file == None: + if cache_file is None: path = os.path.dirname(__file__) cache_file = os.path.join(path, DEFAULT_CACHE_FILE) @@ -183,7 +183,7 @@ def geocode_tweet(self, status): """ # start off with the location in the user's profile (it may be empty) place = status['user']['location'] - if status['coordinates'] != None: + if status['coordinates'] is not None: # the status is geocoded (swapped lat/lng), so use the coordinates to get the address lng, lat = status['coordinates']['coordinates'] place = self.latlng_to_address(float(lat), float(lng)) @@ -197,19 +197,19 @@ def geocode_tweet(self, status): lat, lng = coord.strip().split(',', 1) elif ' ' in coord: lat, lng = coord.strip().split(' ', 1) - if lat != None and lng != None: + if lat is not None and lng is not None: try: lat, lng = lat.strip(), lng.strip() place = self.latlng_to_address(float(lat), float(lng)) self.count_has_location += 1 except ValueError, TypeError: pass - elif place != None and place != '': + elif place is not None and place != '': # there is a location in the user profile, so see if it is usable # cache key is the place stripped of all punctuation and lower case key = ' '.join(''.join(e for e in place if e.isalnum() or e == ' ').split()).lower() cached_data = None - if self.cache != None and key in self.cache: + if self.cache is not None and key in self.cache: # see if the place name is in our cache cached_data = self.cache[key] lat, lng = cached_data[0], cached_data[1] @@ -218,7 +218,7 @@ def geocode_tweet(self, status): # see if Google can interpret the location lat, lng = self.address_to_latlng(place) cached_data = ( lat, lng, 1 ) - if self.cache != None: + if self.cache is not None: self.cache[key] = cached_data self.count_has_location += 1 else: diff --git a/twittergeo/SearchGeo.py b/twittergeo/SearchGeo.py index db7c2d2..e42e628 100644 --- a/twittergeo/SearchGeo.py +++ b/twittergeo/SearchGeo.py @@ -1,5 +1,5 @@ """ - REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO twittergeo/credentials.txt + REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO puttytat/credentials.txt OR USE -oauth OPTION TO USE A DIFFERENT FILE CONTAINING THE CREDENTIALS. Downloads old tweets from the newest to the oldest that contain any of the words @@ -27,13 +27,13 @@ import argparse import Geocoder -import os -import twitterapi +import puttytat import urllib OAUTH = None GEO = Geocoder.Geocoder() + def parse_tweet(status): """Print tweet, location and geocode @@ -47,6 +47,7 @@ def parse_tweet(status): if GEO.quota_exceeded: raise + def search_tweets(list, wait, region): """Get tweets containing any words in 'list' and that have location or coordinates in 'region' @@ -55,9 +56,9 @@ def search_tweets(list, wait, region): params = { 'q': words } if region: params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius - search = twitterapi.TwSearch(OAUTH, params) while True: - for item in search.past_results(wait): + tw = puttytat.TwitterRestPager(OAUTH) + for item in tw.request('search/tweets', params, wait): if 'text' in item: parse_tweet(item) elif 'message' in item: @@ -66,6 +67,8 @@ def search_tweets(list, wait, region): elif item['code'] == 88: print>>sys.stderr, 'Suspend search until %s' % search.get_quota()['reset'] raise Exception('Message from twiter: %s' % item['message']) + + if __name__ == '__main__': parser = argparse.ArgumentParser(description='Search tweet history.') parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') @@ -75,12 +78,7 @@ def search_tweets(list, wait, region): parser.add_argument('words', metavar='W', type=str, nargs='+', help='word(s) to search') args = parser.parse_args() - if args.oauth: - OAUTH = twitterapi.TwCredentials.read_file(args.oauth) - else: - path = os.path.dirname(__file__) - path = os.path.join(path, 'credentials.txt') - OAUTH = twitterapi.TwCredentials.read_file(path) + OAUTH = puttytat.TwitterOauth.read_file(args.oauth) try: if args.location: diff --git a/twittergeo/SearchPics.py b/twittergeo/SearchPics.py index a8c6926..9938e49 100644 --- a/twittergeo/SearchPics.py +++ b/twittergeo/SearchPics.py @@ -1,5 +1,5 @@ """ - REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO twittergeo/credentials.txt + REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO puttytat/credentials.txt OR USE -oauth OPTION TO USE A DIFFERENT FILE CONTAINING THE CREDENTIALS. Downloads old tweets from the newest to the oldest that contain any of the words @@ -32,12 +32,13 @@ import argparse import Geocoder import os -import twitterapi +import puttytat import urllib OAUTH = None GEO = Geocoder.Geocoder() + def parse_tweet(status, photo_dir, stalk): """If tweet contains photo, print tweet. If stalking, print location and geocode. @@ -65,6 +66,7 @@ def parse_tweet(status, photo_dir, stalk): file_name = os.path.join(photo_dir, media['id_str']) + '.' + photo_url.split('.')[-1] urllib.urlretrieve(photo_url, file_name) + def search_tweets(list, wait, photo_dir, region, stalk): """Get tweets containing any words in 'list' and that have location or coordinates in 'region' @@ -73,9 +75,9 @@ def search_tweets(list, wait, photo_dir, region, stalk): params = { 'q': words } if region: params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius - search = twitterapi.TwSearch(OAUTH, params) while True: - for item in search.past_results(wait): + tw = puttytat.TwitterRestPager(OAUTH) + for item in tw.request('search/tweets', params, wait): if 'text' in item: parse_tweet(item, photo_dir, stalk) elif 'message' in item: @@ -84,6 +86,7 @@ def search_tweets(list, wait, photo_dir, region, stalk): elif item['code'] == 88: print>>sys.stderr, 'Suspend search until %s' % search.get_quota()['reset'] raise Exception('Message from twiter: %s' % item['message']) + if __name__ == '__main__': parser = argparse.ArgumentParser(description='Search tweet history.') @@ -96,12 +99,7 @@ def search_tweets(list, wait, photo_dir, region, stalk): parser.add_argument('words', metavar='W', type=str, nargs='+', help='word(s) to search') args = parser.parse_args() - if args.oauth: - OAUTH = twitterapi.TwCredentials.read_file(args.oauth) - else: - path = os.path.dirname(__file__) - path = os.path.join(path, 'credentials.txt') - OAUTH = twitterapi.TwCredentials.read_file(path) + OAUTH = puttytat.TwitterOauth.read_file(args.oauth) try: if args.location: diff --git a/twittergeo/StreamGeo.py b/twittergeo/StreamGeo.py index 7dd281d..9e122b6 100644 --- a/twittergeo/StreamGeo.py +++ b/twittergeo/StreamGeo.py @@ -1,5 +1,5 @@ """ - REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO twittergeo/credentials.txt + REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO puttytat/credentials.txt OR USE -oauth OPTION TO USE A DIFFERENT FILE CONTAINING THE CREDENTIALS. Downloads real-time tweets. You must supply either one or both of the -words and @@ -29,13 +29,13 @@ import argparse import Geocoder -import os -import twitterapi +import puttytat import urllib OAUTH = None GEO = Geocoder.Geocoder() + def parse_tweet(status, region): """Print tweet, location and geocode @@ -50,22 +50,23 @@ def parse_tweet(status, region): print>>sys.stderr, '*** GEOCODER QUOTA EXCEEDED:', GEO.count_request raise + def stream_tweets(list, region): """Get tweets containing any words in 'list' or that have location or coordinates in 'region' """ params = {} - if list != None: + if list is not None: words = ','.join(list) params['track'] = words - if region != None: + if region is not None: params['locations'] = '%f,%f,%f,%f' % region print 'REGION', region while True: + tw = puttytat.TwitterStream(OAUTH) try: - stream = twitterapi.TwStream(OAUTH, params) while True: - for item in stream.results(): + for item in tw.request('statuses/filter', params): if 'text' in item: parse_tweet(item, region) elif 'disconnect' in item: @@ -74,6 +75,7 @@ def stream_tweets(list, region): # reconnect on 401 errors and socket timeouts print>>sys.stderr, '*** MUST RECONNECT', e + if __name__ == '__main__': parser = argparse.ArgumentParser(description='Get real-time tweet stream.') parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') @@ -81,15 +83,10 @@ def stream_tweets(list, region): parser.add_argument('-words', metavar='W', type=str, nargs='+', help='word(s) to track') args = parser.parse_args() - if args.words == None and args.location == None: + if args.words is None and args.location is None: sys.exit('You must use either -words or -locoation or both.') - if args.oauth: - OAUTH = twitterapi.TwCredentials.read_file(args.oauth) - else: - path = os.path.dirname(__file__) - path = os.path.join(path, 'credentials.txt') - OAUTH = twitterapi.TwCredentials.read_file(path) + OAUTH = puttytat.TwitterOauth.read_file(args.oauth) if args.location: if args.location.lower() == 'all': diff --git a/twittergeo/StreamPics.py b/twittergeo/StreamPics.py index 968dfbc..6d02d32 100644 --- a/twittergeo/StreamPics.py +++ b/twittergeo/StreamPics.py @@ -1,5 +1,5 @@ """ - REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO twittergeo/credentials.txt + REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO puttytat/credentials.txt OR USE -oauth OPTION TO USE A DIFFERENT FILE CONTAINING THE CREDENTIALS. Downloads real-time tweets that contain embedded photo URLs. You must supply @@ -35,12 +35,13 @@ import argparse import Geocoder import os -import twitterapi +import puttytat import urllib OAUTH = None GEO = Geocoder.Geocoder() + def parse_tweet(status, photo_dir, stalk): """If tweet contains photo, print tweet. If stalking, print location and geocode. @@ -67,23 +68,24 @@ def parse_tweet(status, photo_dir, stalk): print media['id_str'] file_name = os.path.join(photo_dir, media['id_str']) + '.' + photo_url.split('.')[-1] urllib.urlretrieve(photo_url, file_name) + def stream_tweets(list, photo_dir, region, stalk): """Get tweets containing any words in 'list' or that have location or coordinates in 'region' """ params = {} - if list != None: + if list is not None: words = ','.join(list) params['track'] = words - if region != None: + if region is not None: params['locations'] = '%f,%f,%f,%f' % region print 'REGION', region while True: + tw = puttytat.TwitterStream(OAUTH) try: - stream = twitterapi.TwStream(OAUTH, params) while True: - for item in stream.results(): + for item in tw.request('statuses/filter', params): if 'text' in item: parse_tweet(item, photo_dir, stalk) elif 'disconnect' in item: @@ -91,6 +93,7 @@ def stream_tweets(list, photo_dir, region, stalk): except Exception, e: # reconnect on 401 errors and socket timeouts print>>sys.stderr, '*** MUST RECONNECT', e + if __name__ == '__main__': parser = argparse.ArgumentParser(description='Get real-time tweet stream.') @@ -101,15 +104,10 @@ def stream_tweets(list, photo_dir, region, stalk): parser.add_argument('-words', metavar='W', type=str, nargs='+', help='word(s) to track') args = parser.parse_args() - if args.words == None and args.location == None: + if args.words is None and args.location is None: sys.exit('You must use either -words or -locoation or both.') - if args.oauth: - OAUTH = twitterapi.TwCredentials.read_file(args.oauth) - else: - path = os.path.dirname(__file__) - path = os.path.join(path, 'credentials.txt') - OAUTH = twitterapi.TwCredentials.read_file(path) + OAUTH = puttytat.TwitterOauth.read_file(args.oauth) if args.location: if args.location.lower() == 'all': diff --git a/twittergeo/credentials.txt b/twittergeo/credentials.txt deleted file mode 100644 index e50b2c6..0000000 --- a/twittergeo/credentials.txt +++ /dev/null @@ -1,4 +0,0 @@ -consumer_key= -consumer_secret= -access_token_key= -access_token_secret=