Adding Testing capabilities, README, and fixed authentication code problem

Tmonster · Tmonster · commit 9d878d2ca0fc · 2017-03-11T18:31:48.000-05:00
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,4 @@
 *tweets*
 *.pyc
 *.swp
+*.json
diff --git a/README.md b/README.md
@@ -0,0 +1,26 @@
+# tweet_scraper
+this project can be used to scrape tweets and append them to previously scraped tweets from the same user.
+I built this so I could accumulate twitter data and train an RNN on it so it can post to twitter itself. 
+
+The RNN I am using is also open source and can be found here https://github.com/jcjohnson/torch-rnn
+
+Usage 
+
+```
+python getTweets.py screen_name amount [filename]
+```
+
+The filename is an optional argument to specify which file you want to write the tweets to
+The filename can be used to append to files
+
+The id of last tweet scraped is kept in a file called lastTweetScraped.txt which will be created if it does not exist in the current directory.
+
+TODO's:
+
+add specifity for arguments
+-f file
+-af append to file
+-stop Scrape only new tweets, ie. all up to last tweet scraped as specified in the lastTweetScraped.txt file
+
+Maybe scrape just a range
+Better File I/O functionality
diff --git a/testingFiles/TestsssTom.txt b/testingFiles/TestsssTom.txt
@@ -0,0 +1,20 @@
+Test9
+Test8
+Test7
+Test6
+Test5
+Test4
+Test3
+Test2
+Test1
+Test0
+Test19
+Test18
+Test17
+Test16
+Test15
+Test14
+Test13
+Test12
+Test11
+Test10
diff --git a/testingFiles/TestsssTomVerify1.txt b/testingFiles/TestsssTomVerify1.txt
@@ -0,0 +1,10 @@
+Test9
+Test8
+Test7
+Test6
+Test5
+Test4
+Test3
+Test2
+Test1
+Test0
diff --git a/testingFiles/TestsssTomVerify2.txt b/testingFiles/TestsssTomVerify2.txt
@@ -0,0 +1,20 @@
+Test9
+Test8
+Test7
+Test6
+Test5
+Test4
+Test3
+Test2
+Test1
+Test0
+Test19
+Test18
+Test17
+Test16
+Test15
+Test14
+Test13
+Test12
+Test11
+Test10
diff --git a/testingFiles/testing1.txt b/testingFiles/testing1.txt
@@ -0,0 +1,3 @@
+Test1
+Test2
+Test3
diff --git a/testingFiles/testing2.txt b/testingFiles/testing2.txt
@@ -0,0 +1,4 @@
+Test1
+Test2
+Test3
+Test4
diff --git a/testingFiles/testing3.txt b/testingFiles/testing3.txt
@@ -0,0 +1,3 @@
+Test1
+Test2
+Test3
diff --git a/tests.py b/tests.py
@@ -0,0 +1,84 @@
+import tweepy #https://github.com/tweepy/tweepy
+import os
+import filecmp
+import tweet_dumper
+import json
+
+# For twitter account @TestsssTom
+# Password to account is "saymanyougottajoint"
+# key management
+# https://apps.twitter.com/app/13524605/keys
+
+# looks for json file with the proper credentials for the user
+# {
+# "consumer_key"        : "value",
+# "consumer_secret"     : "value",
+# "access_token"        : "value",
+# "access_token_secret" : "value"
+# }
+def get_credentials(filename):
+    with open(filename) as file:
+        cfg = json.load(file)
+    return cfg
+
+def get_api(cfg):
+    auth = tweepy.OAuthHandler(cfg['consumer_key'], cfg['consumer_secret'])
+    auth.set_access_token(cfg['access_token'], cfg['access_token_secret'])
+    return tweepy.API(auth)
+
+
+def batch_delete(api):
+    for status in tweepy.Cursor(api.user_timeline).items():
+        try:
+            api.destroy_status(status.id)
+        except:
+            print "Failed to delete:", status.id
+
+def batch_tweet(api, startid):
+    tweet = "Test"
+    for i in range(startid,startid+10):
+        api.update_status(tweet+str(i))
+
+def batch_tweet_from_file(api, filename):
+    for line in reversed(open(filename).readlines()):
+        api.update_status(line)
+
+
+def compare(file1, file2):
+    different = filecmp.cmp(file1, file2)
+    return different
+
+
+if __name__ == "__main__":
+    username = "TestsssTom"
+
+    cfg = get_credentials(username+".json")
+    api = get_api(cfg)
+    # desttroy all tweets on the page
+    batch_delete(api)
+    os.remove("TestingFiles/TestsssTom.txt")
+
+
+    print "Deleted all Tweets"
+    batch_tweet(api,0)
+    print "First set of tweets printed"
+    # get the tweets and match them with
+    tweet_dumper.get_all_tweets5args(username, api, 3240, "TestingFiles/TestsssTom.txt", "lastTweetScraped.txt")
+    firstPull = compare("TestingFiles/TestsssTom.txt", "TestingFiles/TestsssTomVerify1.txt")
+    if not firstPull:
+        print "scraped tweets do not match as expected"
+    else:
+        print "scraped tweets match desired output"
+
+    # batch tweet 10 more
+    batch_tweet(api,10)
+    tweet_dumper.get_all_tweets5args(username, api, 3240, "TestingFiles/TestsssTom.txt", "lastTweetScraped.txt")
+    fileappend = compare("TestingFiles/TestsssTom.txt", "TestingFiles/TestsssTomVerify2.txt")
+    if not fileappend:
+        print "appending new tweets not working"
+    else:
+        print "append tweets working"
+    # delete all the tweets again
+    batch_delete(api)
+
+

-Original file line number
+Diff line change
 *tweets*
 *.pyc
 *.swp
 +*.json