Skip to content

Commit

Permalink
Replace twitterapi with puttytat
Browse files Browse the repository at this point in the history
Replace twitterapi with puttytat
  • Loading branch information
geduldig committed Feb 15, 2013
1 parent 44a022f commit 85b9a32
Show file tree
Hide file tree
Showing 11 changed files with 56 additions and 74 deletions.
4 changes: 3 additions & 1 deletion CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ v0.1.2, 20 Jan 2013 -- Improved exception handling for less fragile connections
Streaming 'location' option now uses the API's 'locations' parameter.
Dynamic throttling of Google geocode requests to not exceed rate limit.

v1.0.0, 30 Jan 2013 -- Uploaded to github.
v1.0.0, 30 Jan 2013 -- Uploaded to github.

v1.1.0, 12 Feb 2013 -- Replaced TwitterAPI with puttytat for Twitter requests.
9 changes: 0 additions & 9 deletions MANIFEST

This file was deleted.

2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
include *.txt
recursive-include docs *.txt
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Use the location option to restrict searches to a geographic location. Twitter

Google does not require autentication, but it does enforce a daily limit of about 2,500 requests per day and about 10 requests per second.

The Twitter API requires OAuth credentials which you can get by creating an application on dev.twitter.com. Once you have your OAuth secrets and keys, copy them into twittergeo/credentials.txt. Alternatively, specify the credentials file on the command line.
The Twitter API requires OAuth credentials which you can get by creating an application on dev.twitter.com. Once you have your OAuth secrets and keys, copy them into puttytat/credentials.txt. Alternatively, specify the credentials file on the command line.

Twitter restricts searching old tweets to within roughly the past week. Twitter also places a bandwidth limit on searching current tweet, but you will notice this only when you are searching a popular word. When this limit occurs the total number of skipped tweets is printed and the connection is maintained.

Expand Down Expand Up @@ -49,7 +49,7 @@ A wrapper for the pygeocoder package. It adds throttling to respect Google's da

pip install twittergeo

2. Either copy your OAuth consumer secret and key and your access token secret and key into twittergeo/credentials.txt, or copy them into another file which you will specify on the command line. See credentials.txt for the expected file format.
2. Either copy your OAuth consumer secret and key and your access token secret and key into puttytat/credentials.txt, or copy them into another file which you will specify on the command line. See credentials.txt for the expected file format.

3. Run a script type with '-m' option, for example:

Expand All @@ -60,7 +60,7 @@ A wrapper for the pygeocoder package. It adds throttling to respect Google's da

This package uses the following external packages.

* twitterapi - for downloading tweets
* puttytat - for downloading tweets
* pygeocoder - for geo-referencing using Google's Maps service
* fridge - for caching latitudes and longitudes in a persistant dict

Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@

setup(
name='TwitterGeo',
version='1.0.0',
version='1.1.0',
author='Jonas Geduldig',
author_email='[email protected]',
packages=['twittergeo'],
package_data={'': ['credentials.txt']},
url='https://github.com/geduldig/twittergeo',
download_url = 'https://github.com/gedldig/twittergeo/tarball/1.0.0',
download_url = 'https://github.com/gedldig/twittergeo/tarball/master',
license='MIT',
keywords='twitter',
description='Command line scripts for geocoding old and new tweets from twitter.com and for downloading embedded photos.',
install_requires = ['twitterapi', 'pygeocoder', 'fridge']
install_requires = ['puttytat', 'pygeocoder', 'fridge']
)
12 changes: 6 additions & 6 deletions twittergeo/Geocoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def __init__(self, cache_file=None):
self.throttle = THROTTLE_INCR # the throttle in seconds to wait between requests
self.last_exec = None # time updated at each geocode request

if cache_file == None:
if cache_file is None:
path = os.path.dirname(__file__)
cache_file = os.path.join(path, DEFAULT_CACHE_FILE)

Expand Down Expand Up @@ -183,7 +183,7 @@ def geocode_tweet(self, status):
"""
# start off with the location in the user's profile (it may be empty)
place = status['user']['location']
if status['coordinates'] != None:
if status['coordinates'] is not None:
# the status is geocoded (swapped lat/lng), so use the coordinates to get the address
lng, lat = status['coordinates']['coordinates']
place = self.latlng_to_address(float(lat), float(lng))
Expand All @@ -197,19 +197,19 @@ def geocode_tweet(self, status):
lat, lng = coord.strip().split(',', 1)
elif ' ' in coord:
lat, lng = coord.strip().split(' ', 1)
if lat != None and lng != None:
if lat is not None and lng is not None:
try:
lat, lng = lat.strip(), lng.strip()
place = self.latlng_to_address(float(lat), float(lng))
self.count_has_location += 1
except ValueError, TypeError:
pass
elif place != None and place != '':
elif place is not None and place != '':
# there is a location in the user profile, so see if it is usable
# cache key is the place stripped of all punctuation and lower case
key = ' '.join(''.join(e for e in place if e.isalnum() or e == ' ').split()).lower()
cached_data = None
if self.cache != None and key in self.cache:
if self.cache is not None and key in self.cache:
# see if the place name is in our cache
cached_data = self.cache[key]
lat, lng = cached_data[0], cached_data[1]
Expand All @@ -218,7 +218,7 @@ def geocode_tweet(self, status):
# see if Google can interpret the location
lat, lng = self.address_to_latlng(place)
cached_data = ( lat, lng, 1 )
if self.cache != None:
if self.cache is not None:
self.cache[key] = cached_data
self.count_has_location += 1
else:
Expand Down
20 changes: 9 additions & 11 deletions twittergeo/SearchGeo.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO twittergeo/credentials.txt
REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO puttytat/credentials.txt
OR USE -oauth OPTION TO USE A DIFFERENT FILE CONTAINING THE CREDENTIALS.
Downloads old tweets from the newest to the oldest that contain any of the words
Expand Down Expand Up @@ -27,13 +27,13 @@

import argparse
import Geocoder
import os
import twitterapi
import puttytat
import urllib

OAUTH = None
GEO = Geocoder.Geocoder()


def parse_tweet(status):
"""Print tweet, location and geocode
Expand All @@ -47,6 +47,7 @@ def parse_tweet(status):
if GEO.quota_exceeded:
raise


def search_tweets(list, wait, region):
"""Get tweets containing any words in 'list' and that have location or coordinates in 'region'
Expand All @@ -55,9 +56,9 @@ def search_tweets(list, wait, region):
params = { 'q': words }
if region:
params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius
search = twitterapi.TwSearch(OAUTH, params)
while True:
for item in search.past_results(wait):
tw = puttytat.TwitterRestPager(OAUTH)
for item in tw.request('search/tweets', params, wait):
if 'text' in item:
parse_tweet(item)
elif 'message' in item:
Expand All @@ -66,6 +67,8 @@ def search_tweets(list, wait, region):
elif item['code'] == 88:
print>>sys.stderr, 'Suspend search until %s' % search.get_quota()['reset']
raise Exception('Message from twiter: %s' % item['message'])


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Search tweet history.')
parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file')
Expand All @@ -75,12 +78,7 @@ def search_tweets(list, wait, region):
parser.add_argument('words', metavar='W', type=str, nargs='+', help='word(s) to search')
args = parser.parse_args()

if args.oauth:
OAUTH = twitterapi.TwCredentials.read_file(args.oauth)
else:
path = os.path.dirname(__file__)
path = os.path.join(path, 'credentials.txt')
OAUTH = twitterapi.TwCredentials.read_file(path)
OAUTH = puttytat.TwitterOauth.read_file(args.oauth)

try:
if args.location:
Expand Down
18 changes: 8 additions & 10 deletions twittergeo/SearchPics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO twittergeo/credentials.txt
REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO puttytat/credentials.txt
OR USE -oauth OPTION TO USE A DIFFERENT FILE CONTAINING THE CREDENTIALS.
Downloads old tweets from the newest to the oldest that contain any of the words
Expand Down Expand Up @@ -32,12 +32,13 @@
import argparse
import Geocoder
import os
import twitterapi
import puttytat
import urllib

OAUTH = None
GEO = Geocoder.Geocoder()


def parse_tweet(status, photo_dir, stalk):
"""If tweet contains photo, print tweet.
If stalking, print location and geocode.
Expand Down Expand Up @@ -65,6 +66,7 @@ def parse_tweet(status, photo_dir, stalk):
file_name = os.path.join(photo_dir, media['id_str']) + '.' + photo_url.split('.')[-1]
urllib.urlretrieve(photo_url, file_name)


def search_tweets(list, wait, photo_dir, region, stalk):
"""Get tweets containing any words in 'list' and that have location or coordinates in 'region'
Expand All @@ -73,9 +75,9 @@ def search_tweets(list, wait, photo_dir, region, stalk):
params = { 'q': words }
if region:
params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius
search = twitterapi.TwSearch(OAUTH, params)
while True:
for item in search.past_results(wait):
tw = puttytat.TwitterRestPager(OAUTH)
for item in tw.request('search/tweets', params, wait):
if 'text' in item:
parse_tweet(item, photo_dir, stalk)
elif 'message' in item:
Expand All @@ -84,6 +86,7 @@ def search_tweets(list, wait, photo_dir, region, stalk):
elif item['code'] == 88:
print>>sys.stderr, 'Suspend search until %s' % search.get_quota()['reset']
raise Exception('Message from twiter: %s' % item['message'])


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Search tweet history.')
Expand All @@ -96,12 +99,7 @@ def search_tweets(list, wait, photo_dir, region, stalk):
parser.add_argument('words', metavar='W', type=str, nargs='+', help='word(s) to search')
args = parser.parse_args()

if args.oauth:
OAUTH = twitterapi.TwCredentials.read_file(args.oauth)
else:
path = os.path.dirname(__file__)
path = os.path.join(path, 'credentials.txt')
OAUTH = twitterapi.TwCredentials.read_file(path)
OAUTH = puttytat.TwitterOauth.read_file(args.oauth)

try:
if args.location:
Expand Down
25 changes: 11 additions & 14 deletions twittergeo/StreamGeo.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO twittergeo/credentials.txt
REQUIRED: PASTE YOUR TWITTER OAUTH CREDENTIALS INTO puttytat/credentials.txt
OR USE -oauth OPTION TO USE A DIFFERENT FILE CONTAINING THE CREDENTIALS.
Downloads real-time tweets. You must supply either one or both of the -words and
Expand Down Expand Up @@ -29,13 +29,13 @@

import argparse
import Geocoder
import os
import twitterapi
import puttytat
import urllib

OAUTH = None
GEO = Geocoder.Geocoder()


def parse_tweet(status, region):
"""Print tweet, location and geocode
Expand All @@ -50,22 +50,23 @@ def parse_tweet(status, region):
print>>sys.stderr, '*** GEOCODER QUOTA EXCEEDED:', GEO.count_request
raise


def stream_tweets(list, region):
"""Get tweets containing any words in 'list' or that have location or coordinates in 'region'
"""
params = {}
if list != None:
if list is not None:
words = ','.join(list)
params['track'] = words
if region != None:
if region is not None:
params['locations'] = '%f,%f,%f,%f' % region
print 'REGION', region
while True:
tw = puttytat.TwitterStream(OAUTH)
try:
stream = twitterapi.TwStream(OAUTH, params)
while True:
for item in stream.results():
for item in tw.request('statuses/filter', params):
if 'text' in item:
parse_tweet(item, region)
elif 'disconnect' in item:
Expand All @@ -74,22 +75,18 @@ def stream_tweets(list, region):
# reconnect on 401 errors and socket timeouts
print>>sys.stderr, '*** MUST RECONNECT', e


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Get real-time tweet stream.')
parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file')
parser.add_argument('-location', type=str, help='limit tweets to a place; use ALL to get all geocoded tweets')
parser.add_argument('-words', metavar='W', type=str, nargs='+', help='word(s) to track')
args = parser.parse_args()

if args.words == None and args.location == None:
if args.words is None and args.location is None:
sys.exit('You must use either -words or -locoation or both.')

if args.oauth:
OAUTH = twitterapi.TwCredentials.read_file(args.oauth)
else:
path = os.path.dirname(__file__)
path = os.path.join(path, 'credentials.txt')
OAUTH = twitterapi.TwCredentials.read_file(path)
OAUTH = puttytat.TwitterOauth.read_file(args.oauth)

if args.location:
if args.location.lower() == 'all':
Expand Down
Loading

0 comments on commit 85b9a32

Please sign in to comment.