Skip to content

Commit

Permalink
Requires TwitterAPI 2.1
Browse files Browse the repository at this point in the history
  • Loading branch information
geduldig committed Sep 19, 2013
1 parent 1f07baa commit 7355b29
Show file tree
Hide file tree
Showing 12 changed files with 85 additions and 504 deletions.
6 changes: 4 additions & 2 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ v1.1.0, 12 Feb 2013 -- Replaced TwitterAPI with puttytat for Twitter requests.

v1.1.1, 19 Feb 2013 -- Geocoder uses viewport instead of bounds.

v2.0.1, 14 Jun 2013 -- Switch to TwitterAPI and renamed to TwitterGeoPics
v2.0.1, 14 Jun 2013 -- Switch to TwitterAPI and renamed to TwitterGeoPics.

v2.0.2, 14 Jun 2013 -- Included pygeocoder source until fixed for python 3
v2.0.2, 14 Jun 2013 -- Included pygeocoder source until fixed for python 3.

v2.0.3, 25 Jun 2013 -- Fixed printing.

v2.1.0, 17 Sep 2013 -- Requires TwitterAPI 3.
13 changes: 9 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
# TwitterGeoPics #
[![Downloads](https://pypip.in/d/TwitterGeoPics/badge.png)](https://crate.io/packages/TwitterGeoPics)
[![Downloads](https://pypip.in/v/TwitterGeoPics/badge.png)](https://crate.io/packages/TwitterGeoPics)

_Scripts for geocoding tweets and for downloading images embedded in tweets._
TwitterGeoPics
==============

### Contributors ###
Scripts for geocoding tweets and for downloading images embedded in tweets.

Jonas Geduldig
Contributors
------------

* Jonas Geduldig
1 change: 1 addition & 0 deletions README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Scripts for geocoding tweets and for downloading images embedded in tweets.
19 changes: 10 additions & 9 deletions TwitterGeoPics/Geocoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@
import sys
import time


SOCKET_TIMEOUT = 3 # seconds -- need to set a timeout or connection can hang indefinitely
THROTTLE_INCR = .1 # seconds -- the time by which to dynamically increase between successive requests
DEFAULT_CACHE_FILE = 'geocode.cache'


class Geocoder:
"""Wrapper for pygeocoder with Twitter helper methods and Google Maps throttling and caching.
Expand Down Expand Up @@ -46,7 +48,7 @@ def __init__(self, cache_file=None):
if cache_file is None:
cache_file = DEFAULT_CACHE_FILE

# cache is a persistent dict with place address as key and lat/lng and count as value
# cache is a persistent dict (place address is key, lat/lng and count is value)
self.cache = fridge.Fridge(cache_file)

def _throttle(self):
Expand Down Expand Up @@ -118,7 +120,7 @@ def latlng_to_address(self, lat, lng):
try:
self.count_request += 1
socket.setdefaulttimeout(SOCKET_TIMEOUT)
place = pygeocoder.Geocoder.latlng_to_address(lat, lng)
place = pygeocoder.Geocoder.reverse_geocode(lat, lng).formatted_address
self.count_request_ok += 1
return place
except pygeocoder.GeocoderError as e:
Expand All @@ -132,7 +134,7 @@ def address_to_latlng(self, place):
try:
self.count_request += 1
socket.setdefaulttimeout(SOCKET_TIMEOUT)
lat, lng = pygeocoder.Geocoder.address_to_latlng(place)
lat, lng = pygeocoder.Geocoder.geocode(place).coordinates
self.count_request_ok += 1
return lat, lng
except pygeocoder.GeocoderError as e:
Expand All @@ -157,7 +159,7 @@ def geocode_tweet(self, status):
"""
# start off with the location in the user's profile (it may be empty)
place = status['user']['location']
if status['coordinates'] is not None:
if status['coordinates']:
# the status is geocoded (swapped lat/lng), so use the coordinates to get the address
lng, lat = status['coordinates']['coordinates']
place = self.latlng_to_address(float(lat), float(lng))
Expand All @@ -171,19 +173,19 @@ def geocode_tweet(self, status):
lat, lng = coord.strip().split(',', 1)
elif ' ' in coord:
lat, lng = coord.strip().split(' ', 1)
if lat is not None and lng is not None:
if lat and lng:
try:
lat, lng = lat.strip(), lng.strip()
place = self.latlng_to_address(float(lat), float(lng))
self.count_has_location += 1
except ValueError or TypeError:
pass
elif place is not None and place != '':
elif place and place != '':
# there is a location in the user profile, so see if it is usable
# cache key is the place stripped of all punctuation and lower case
key = ' '.join(''.join(e for e in place if e.isalnum() or e == ' ').split()).lower()
cached_data = None
if self.cache is not None and key in self.cache:
if self.cache and key in self.cache:
# see if the place name is in our cache
cached_data = self.cache[key]
lat, lng = cached_data[0], cached_data[1]
Expand All @@ -192,8 +194,7 @@ def geocode_tweet(self, status):
# see if Google can interpret the location
lat, lng = self.address_to_latlng(place)
cached_data = ( lat, lng, 1 )
if self.cache is not None:
self.cache[key] = cached_data
self.cache[key] = cached_data
self.count_has_location += 1
else:
lat, lng = None, None
Expand Down
34 changes: 11 additions & 23 deletions TwitterGeoPics/GetNewGeo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,24 @@
__license__ = "MIT"

import argparse
import codecs
from Geocoder import Geocoder
from .Geocoder import Geocoder
import sys
from TwitterAPI import TwitterAPI, TwitterOAuth


try:
# python 3
sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer)
sys.stderr = codecs.getwriter('utf8')(sys.stderr.buffer)
except:
# python 2
sys.stdout = codecs.getwriter('utf8')(sys.stdout)
sys.stderr = codecs.getwriter('utf8')(sys.stderr)


GEO = Geocoder()


def parse_tweet(status, region):
"""Print tweet, location and geocode."""
try:
geocode = GEO.geocode_tweet(status)
sys.stdout.write('\n%s: %s\n' % (status['user']['screen_name'], status['text']))
sys.stdout.write('LOCATION: %s\n' % status['user']['location'])
sys.stdout.write('GEOCODE: %s\n' % geocode)
print('\n%s: %s' % (status['user']['screen_name'], status['text']))
print('LOCATION: %s' % status['user']['location'])
print('GEOCODE: %s' % geocode)
except Exception as e:
if GEO.quota_exceeded:
sys.stderr.write('*** GEOCODER QUOTA EXCEEDED: %s\n' % GEO.count_request)
print('*** GEOCODER QUOTA EXCEEDED: %s\n' % GEO.count_request)
raise


Expand All @@ -43,20 +32,19 @@ def stream_tweets(api, list, region):
params['track'] = words
if region is not None:
params['locations'] = '%f,%f,%f,%f' % region
sys.stdout.write('REGION %s\n' % region)
print('REGION %s' % str(region))
while True:
try:
api.request('statuses/filter', params)
iter = api.get_iterator()
r = api.request('statuses/filter', params)
while True:
for item in iter:
for item in r.get_iterator():
if 'text' in item:
parse_tweet(item, region)
elif 'disconnect' in item:
raise Exception('Disconnect: %s' % item['disconnect'].get('reason'))
except Exception as e:
# reconnect on 401 errors and socket timeouts
sys.stderr.write('*** MUST RECONNECT %s\n' % e)
print('*** MUST RECONNECT %s\n' % e)


if __name__ == '__main__':
Expand All @@ -78,13 +66,13 @@ def stream_tweets(api, list, region):
else:
latC, lngC, latSW, lngSW, latNE, lngNE = GEO.get_region_box(args.location)
region = (lngSW, latSW, lngNE, latNE)
sys.stdout.write('Google found region at %f,%f and %f,%f\n' % region)
print('Google found region at %f,%f and %f,%f' % region)
else:
region = None

try:
stream_tweets(api, args.words, region)
except KeyboardInterrupt:
sys.stderr.write('\nTerminated by user\n')
print('\nTerminated by user\n')

GEO.print_stats()
36 changes: 12 additions & 24 deletions TwitterGeoPics/GetNewPics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,13 @@
__license__ = "MIT"

import argparse
import codecs
from Geocoder import Geocoder
from .Geocoder import Geocoder
import os
import sys
from TwitterAPI import TwitterAPI, TwitterOAuth
import urllib


try:
# python 3
sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer)
sys.stderr = codecs.getwriter('utf8')(sys.stderr.buffer)
except:
# python 2
sys.stdout = codecs.getwriter('utf8')(sys.stdout)
sys.stderr = codecs.getwriter('utf8')(sys.stderr)


GEO = Geocoder()


Expand All @@ -32,21 +21,21 @@ def parse_tweet(status, photo_dir, stalk):
if media['type'] == 'photo':
photo_count += 1
if photo_count == 1:
sys.stdout.write('\n%s: %s\n' % (status['user']['screen_name'], status['text']))
print('\n%s: %s' % (status['user']['screen_name'], status['text']))
if stalk and not GEO.quota_exceeded:
try:
geocode = GEO.geocode_tweet(status)
sys.stdout.write('LOCATION: %s\n' % status['user']['location'])
sys.stdout.write('GEOCODE: %s\n' % geocode)
print('LOCATION: %s' % status['user']['location'])
print('GEOCODE: %s' % geocode)
except Exception as e:
if GEO.quota_exceeded:
sys.stderr.write('*** GEOCODER QUOTA EXCEEDED: %s\n' % GEO.count_request)
print('*** GEOCODER QUOTA EXCEEDED: %s\n' % GEO.count_request)
if photo_dir:
photo_url = media['media_url_https']
screen_name = status['user']['screen_name']
file_name = os.path.join(photo_dir, screen_name) + '.' + photo_url.split('.')[-1]
urllib.urlretrieve(photo_url, file_name)
sys.stdout.write(screen_name + '\n')
print(screen_name)


def stream_tweets(api, list, photo_dir, region, stalk, no_retweets):
Expand All @@ -57,21 +46,20 @@ def stream_tweets(api, list, photo_dir, region, stalk, no_retweets):
params['track'] = words
if region is not None:
params['locations'] = '%f,%f,%f,%f' % region
sys.stdout.write('REGION %s\n' % region)
print('REGION %s' % str(region))
while True:
try:
api.request('statuses/filter', params)
iter = api.get_iterator()
r = api.request('statuses/filter', params)
while True:
for item in iter:
for item in r.get_iterator():
if 'text' in item:
if not no_retweets or not item.has_key('retweeted_status'):
parse_tweet(item, photo_dir, stalk)
elif 'disconnect' in item:
raise Exception('Disconnect: %s' % item['disconnect'].get('reason'))
except Exception as e:
# reconnect on 401 errors and socket timeouts
sys.stderr.write('*** MUST RECONNECT %s\n' % e)
print('*** MUST RECONNECT %s\n' % e)


if __name__ == '__main__':
Expand All @@ -96,13 +84,13 @@ def stream_tweets(api, list, photo_dir, region, stalk, no_retweets):
else:
latC, lngC, latSW, lngSW, latNE, lngNE = GEO.get_region_box(args.location)
region = (lngSW, latSW, lngNE, latNE)
sys.stdout.write('Google found region at %f,%f and %f,%f\n' % region)
print('Google found region at %f,%f and %f,%f' % region)
else:
region = None

try:
stream_tweets(api, args.words, args.photo_dir, region, args.stalk, args.no_retweets)
except KeyboardInterrupt:
sys.stderr.write('\nTerminated by user\n')
print('\nTerminated by user\n')

GEO.print_stats()
32 changes: 10 additions & 22 deletions TwitterGeoPics/GetOldGeo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,20 @@
__license__ = "MIT"

import argparse
import codecs
from Geocoder import Geocoder
import sys
from .Geocoder import Geocoder
from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterRestPager


try:
# python 3
sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer)
sys.stderr = codecs.getwriter('utf8')(sys.stderr.buffer)
except:
# python 2
sys.stdout = codecs.getwriter('utf8')(sys.stdout)
sys.stderr = codecs.getwriter('utf8')(sys.stderr)


GEO = Geocoder()


def parse_tweet(status):
"""Print tweet, location and geocode."""
try:
geocode = GEO.geocode_tweet(status)
sys.stdout.write('\n%s: %s\n' % (status['user']['screen_name'], status['text']))
sys.stdout.write('LOCATION: %s\n' % status['user']['location'])
sys.stdout.write('GEOCODE: %s\n' % geocode)
print('\n%s: %s' % (status['user']['screen_name'], status['text']))
print('LOCATION: %s' % status['user']['location'])
print('GEOCODE: %s' % geocode)
except Exception as e:
if GEO.quota_exceeded:
raise
Expand All @@ -41,15 +29,15 @@ def search_tweets(api, list, region):
if region:
params['geocode'] = '%f,%f,%fkm' % region # lat,lng,radius
while True:
iter = TwitterRestPager(api, 'search/tweets', params).get_iterator()
for item in iter:
pager = TwitterRestPager(api, 'search/tweets', params)
for item in pager.get_iterator():
if 'text' in item:
parse_tweet(item)
elif 'message' in item:
if item['code'] == 131:
continue # ignore internal server error
elif item['code'] == 88:
sys.stderr.write('Suspend search until %s\n' % search.get_quota()['reset'])
print('Suspend search until %s' % search.get_quota()['reset'])
raise Exception('Message from twiter: %s' % item['message'])


Expand All @@ -67,16 +55,16 @@ def search_tweets(api, list, region):
try:
if args.location:
lat, lng, radius = GEO.get_region_circle(args.location)
sys.stdout.write('Google found region at %f,%f with a radius of %s km\n' % (lat, lng, radius))
print('Google found region at %f,%f with a radius of %s km' % (lat, lng, radius))
if args.radius:
radius = args.radius
region = (lat, lng, radius)
else:
region = None
search_tweets(api, args.words, region)
except KeyboardInterrupt:
sys.stdout.write('\nTerminated by user\n')
print('\nTerminated by user\n')
except Exception as e:
sys.stdout.write('*** STOPPED %s\n' % e)
print('*** STOPPED %s\n' % e)

GEO.print_stats()
Loading

0 comments on commit 7355b29

Please sign in to comment.