Skip to content

Commit c1901c5

Browse files
committed
Changing usage so that multiple users can have their tweets scraped into one file
1 parent 6cc85af commit c1901c5

File tree

3 files changed

+47
-14
lines changed

3 files changed

+47
-14
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@
55
*.json
66
*.ignore
77
tweet.py
8+
saved_tweets/*
9+
authentication_files/*

getTweets.py

+30-9
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
# "access_token" : "value",
1212
# "access_token_secret" : "value"
1313
# }
14+
1415
def get_credentials(filename):
1516
with open(filename) as file:
1617
cfg = json.load(file)
@@ -23,19 +24,39 @@ def get_api(cfg):
2324

2425
def main():
2526
try:
26-
username = sys.argv[1]
27-
amount = int(sys.argv[2])
27+
users = 1
28+
usernames = []
29+
amount = 0
30+
place_in_named_file = False
31+
for i in range(len(sys.argv)):
32+
try:
33+
# if I can cast to integer, interpret
34+
# as amount
35+
amount = int(sys.argv[users])
36+
break;
37+
except:
38+
# otherwise, interpret as twitterhandle
39+
usernames.append(sys.argv[users])
40+
users+=1
41+
if sys.argv[-2] == "-f":
42+
named_file = sys.argv[-1]
43+
place_in_named_file = True
44+
if len(usernames) == 0 or amount == 0:
45+
raise IndexError
2846
except IndexError as e:
29-
print "Usage: python getTweets.py username amount [filename]"
47+
print "Usage: python getTweets.py username [username...] amount [-f filename]"
3048
sys.exit(1)
49+
3150
# Fill in the values noted in previous step here
32-
cfg = get_credentials("the_Tmonster.json")
51+
cfg = get_credentials("./authentication_files/the_Tmonster.json")
3352
api = get_api(cfg)
34-
user = api.get_user(username)
35-
try:
36-
tweet_dumper.get_all_tweets4args(username, api, amount, sys.argv[3])
37-
except IndexError as e:
38-
tweet_dumper.get_all_tweets3args(username, api, amount)
53+
54+
for handle in usernames:
55+
user = api.get_user(handle)
56+
if place_in_named_file:
57+
tweet_dumper.get_all_tweets4args(handle, api, amount, named_file)
58+
else:
59+
tweet_dumper.get_all_tweets3args(handle, api, amount)
3960

4061
if __name__ == "__main__":
4162
main()

tweet_dumper.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,18 @@ def get_all_tweets4args(screen_name, api, amount, filename):
5555

5656

5757
def writeTweets(filename, mode, tweets):
58-
f = open(filename, mode)
59-
# print len(alltweets)
58+
if filename.find("saved_tweets") == -1:
59+
if os.path.isdir("./saved_tweets"):
60+
f = open("./saved_tweets/" + filename, mode)
61+
else:
62+
os.makedirs("./saved_tweets")
63+
f = open("./saved_tweets/" + filename, mode)
64+
elif os.path.isdir("./saved_tweets"):
65+
f = open(filename, mode)
66+
else:
67+
os.makedirs("./saved_tweets")
68+
f = open(filename, mode)
69+
6070
for tweet in tweets:
6171
if tweet != "\n" or tweet != "":
6272
f.write(tweet)
@@ -73,7 +83,7 @@ def get_all_tweets5args(screen_name, api, amount, filename, lastTweetScrapedFile
7383
new_tweets = api.user_timeline(screen_name = screen_name,count=200, include_rts = True)
7484
numTweets = len(new_tweets)
7585
MostRecentTweetPulled = getMostRecentTweet(lastTweetScrapedFile, screen_name)
76-
print "Most Recent Tweet Pulled =", MostRecentTweetPulled
86+
print "Most Recent Tweet ID Pulled for", screen_name, "=", MostRecentTweetPulled
7787

7888
# record most recent tweet id.
7989
# The id of the last tweet the user tweeted.
@@ -104,7 +114,7 @@ def get_all_tweets5args(screen_name, api, amount, filename, lastTweetScrapedFile
104114
strippedTweet+=character
105115
tweet = strippedTweet
106116
# strip URLS, Ampersands, retweets, and newlines
107-
tweet = re.sub(r'(?:www|https?)[^\s]+\s', '', tweet, flags=re.MULTILINE)
117+
tweet = re.sub(r'(www|https?)[^\s]+', '', tweet)
108118
tweet = re.sub(r'&', '&', tweet, flags=re.MULTILINE)
109119
tweet = re.sub(r'^RT.*:+ ', '', tweet, flags=re.MULTILINE)
110120
tweet = tweet.replace('\n', ' ')
@@ -121,5 +131,5 @@ def get_all_tweets5args(screen_name, api, amount, filename, lastTweetScrapedFile
121131
numTweets += len(new_tweets)
122132

123133
writeTweets(filename, 'a', alltweets)
124-
# write to txt file
134+
# update Last Tweet
125135
updateLastTweet(lastTweetScrapedFile, screen_name, most_recent_tweet)

0 commit comments

Comments
 (0)