Skip to content

Commit ede5c93

Browse files
committed
adding some comments to tweet_dumper
1 parent 5d3ca38 commit ede5c93

File tree

2 files changed

+16
-7
lines changed

2 files changed

+16
-7
lines changed

tweet.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ def get_api(cfg):
88
def main():
99
# Fill in the values noted in previous step here
1010
cfg = {
11-
"consumer_key" : "Ximk8dlrGGejLqL65TO39SqMD",
12-
"consumer_secret" : "SiwVIoNUygs8FdZRAb4D3n6IdQUYPfUG82UNu2OwWUFhO9fq0l",
13-
"access_token" : "705285627-hsGmup5zp84eZ6BPCqjmAWaR3g2YOXad3gPBnUac",
14-
"access_token_secret" : "Z6ws8O78em89H4ErpD5O2A0UM90t7hBXLxXRVH2Rson6V"
11+
"consumer_key" : "",
12+
"consumer_secret" : "",
13+
"access_token" : "",
14+
"access_token_secret" : ""
1515
}
1616

1717
api = get_api(cfg)

tweet_dumper.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
import os
44
import re
55

6+
7+
# update the id of the lasttweet stored in the given filename
8+
# if the file doesn't exist, one is created
69
def updateLastTweet(filename, screen_name, newLastTweet):
710
tmpLine = ""
811
newData = ""
@@ -22,6 +25,8 @@ def updateLastTweet(filename, screen_name, newLastTweet):
2225
os.remove(filename)
2326
os.rename(filename + ".swp", filename)
2427

28+
# get the id of the most recent tweet recorded from the file
29+
# specified by filename
2530
def getMostRecentTweet(filename, screen_name):
2631
if not os.path.isfile(filename):
2732
newFile = open(filename, 'w')
@@ -59,11 +64,12 @@ def writeTweets(filename, mode, tweets):
5964
f.close()
6065

6166
def get_all_tweets5args(screen_name, api, amount, filename, lastTweetScrapedFile):
62-
#Twitter only allows access to a users most recent 3240 tweets with this method
6367
#initialize a list to hold all the tweepy Tweets
6468
alltweets = []
6569

66-
# make request for most recent tweets (200 is the maximum allowed count)
70+
# make request for most recent tweets
71+
# you can only pull 200 tweets from twitter at a time,
72+
# hence count=200
6773
new_tweets = api.user_timeline(screen_name = screen_name,count=200, include_rts = True)
6874
numTweets = len(new_tweets)
6975
MostRecentTweetPulled = getMostRecentTweet(lastTweetScrapedFile, screen_name)
@@ -72,13 +78,16 @@ def get_all_tweets5args(screen_name, api, amount, filename, lastTweetScrapedFile
7278
# record most recent tweet id.
7379
# The id of the last tweet the user tweeted.
7480
if len(new_tweets) > 0:
81+
# most recent tweet will replace MostRecentTweetPulled
82+
# after all new tweets are scraped
7583
most_recent_tweet = new_tweets[0].id
7684
else:
7785
most_recent_tweet = 0
7886
# loop over first request and keep requesting until amount
7987
# is reached or the 3240 threshold is reached
8088
while len(new_tweets) > 0:
8189
for tweet in new_tweets:
90+
# break if you see the id of a tweet you have scraped
8291
if int(MostRecentTweetPulled) >= int(tweet.id):
8392
print "Most Recent Tweet Pulled =", MostRecentTweetPulled
8493
print "tweet.id = ", tweet.id
@@ -95,7 +104,7 @@ def get_all_tweets5args(screen_name, api, amount, filename, lastTweetScrapedFile
95104
strippedTweet+=character
96105
tweet = strippedTweet
97106
# strip URLS, Ampersands, retweets, and newlines
98-
tweet = re.sub(r'(?:www|https?)[^\s]+', '', tweet, flags=re.MULTILINE)
107+
tweet = re.sub(r'(?:www|https?)[^\s]+\s', '', tweet, flags=re.MULTILINE)
99108
tweet = re.sub(r'&', '&', tweet, flags=re.MULTILINE)
100109
tweet = re.sub(r'^RT.*:+ ', '', tweet, flags=re.MULTILINE)
101110
tweet = tweet.replace('\n', ' ')

0 commit comments

Comments
 (0)