PythonProgramming
diff --git a/‎LICENSE‎
Lines changed: 21 additions & 0 deletions b/‎LICENSE‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 29 additions & 0 deletions b/‎README.md‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎p02.py‎
Lines changed: 22 additions & 0 deletions b/‎p02.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎p04.py‎
Lines changed: 27 additions & 0 deletions b/‎p04.py‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎p05.py‎
Lines changed: 41 additions & 0 deletions b/‎p05.py‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎p06.py‎
Lines changed: 37 additions & 0 deletions b/‎p06.py‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎p07.py‎
Lines changed: 58 additions & 0 deletions b/‎p07.py‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎p08.py‎
Lines changed: 85 additions & 0 deletions b/‎p08.py‎
Lines changed: 85 additions & 0 deletions
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Harrison Kinsley
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,29 @@
+# sentdex's
+#### [Scikit-Learn tutorials for Machine Learning in Python for Investing](http://pythonprogramming.net/scikit-learn-tutorials-machine-learning-python-investing/ "Scikit-Learn tutorials for Machine Learning in Python for Investing")
+
+> This is the same code as in the videos, but it's easier to grab the code using:
+
+> <sub><sup>```git clone https://github.com/cleesmith/sentdex_scikit_machine_learning_tutorial_for_investing.git```</sup></sub>
+
+> or click on the ```Download ZIP``` button above.
+
+***
+
+YouTube video playlist: [Scikit-learn Machine Learning with Python and SKlearn](https://www.youtube.com/playlist?list=PLQVvvaa0QuDd0flgGphKCej-9jp-QdzZ3 "Scikit-learn Machine Learning with Python and SKlearn")
+
+***
+
+> The intraQuarter data zip file is ~255MB, which is too large to store in a github repo (max of 50MB?), so you need to
+download the [intraQuarter.zip](http://pythonprogramming.net/downloads/intraQuarter.zip "intraQuarter.zip") file separately.
+
+***
+
+> Once you have performed a ```git clone``` or a download zip/unzip, you can install all of the python packages by doing:
+```
+pip install -r requirements.txt
+... or:
+pip3 install -r requirements.txt
+```
+
+***
+***
@@ -0,0 +1,22 @@
+import matplotlib.pyplot as plt
+from sklearn import datasets
+from sklearn import svm
+
+digits = datasets.load_digits()
+print(digits.data)
+print(digits.target)
+
+# digits.target is the actual label we've assigned to the digits data. 
+# Now that we've got the data ready, we're ready to do the machine learning.
+# First, we specify the classifier:
+# If you want, you can just leave parameters blank and use the defaults, like this:
+# clf = svm.SVC()
+# clf = svm.SVC(gamma=0.001, C=100)
+# clf = svm.SVC(gamma=0.01, C=100)
+clf = svm.SVC(gamma=0.0001, C=100)
+
+X,y = digits.data[:-10], digits.target[:-10]
+clf.fit(X,y)
+print(clf.predict(digits.data[-5]))
+plt.imshow(digits.images[-5], cmap=plt.cm.gray_r, interpolation='nearest')
+plt.show()
@@ -0,0 +1,27 @@
+import pandas as pd
+import os
+import time
+from datetime import datetime
+
+# path = "X:/Backups/intraQuarter" # for Windows with X files :)
+# if git clone'ed then use relative path,
+# assuming you extracted the downloaded zip into this project's folder:
+path = "intraQuarter"
+
+def Key_Stats(gather="Total Debt/Equity (mrq)"):
+  statspath = path+'/_KeyStats'
+  stock_list = [x[0] for x in os.walk(statspath)]
+  # print(stock_list)
+
+  for each_dir in stock_list[1:]:
+    each_file = os.listdir(each_dir)
+    if len(each_file) > 0:
+      for file in each_file:
+        date_stamp = datetime.strptime(file, '%Y%m%d%H%M%S.html')
+        unix_time = time.mktime(date_stamp.timetuple())
+        print(date_stamp, unix_time)
+        #time.sleep(15)
+
+Key_Stats()
+
+
@@ -0,0 +1,41 @@
+import pandas as pd
+import os
+import time
+from datetime import datetime
+
+# path = "X:/Backups/intraQuarter" # for Windows with X files :)
+# if git clone'ed then use relative path,
+# assuming you extracted the downloaded zip into this project's folder:
+path = "intraQuarter"
+
+def Key_Stats(gather="Total Debt/Equity (mrq)"):
+  statspath = path+'/_KeyStats'
+  stock_list = [x[0] for x in os.walk(statspath)]
+  for each_dir in stock_list[1:]:
+    each_file = os.listdir(each_dir)
+    # ticker = each_dir.split("\\")[1] # Windows only
+    # ticker = each_dir.split("/")[1] # this didn't work so do this:
+    ticker = os.path.basename(os.path.normpath(each_dir))
+    # print(ticker) # uncomment to verify
+    if len(each_file) > 0:
+      for file in each_file:
+        date_stamp = datetime.strptime(file, '%Y%m%d%H%M%S.html')
+        unix_time = time.mktime(date_stamp.timetuple())
+        #print(date_stamp, unix_time)
+        full_file_path = each_dir+'/'+file
+        print(full_file_path)
+        source = open(full_file_path,'r').read()
+        #print(source)
+        value = source.split(gather+':</td><td class="yfnc_tabledata1">')[1].split('</td>')[0]
+        print(ticker+":",value)
+      # time.sleep(15)
+      # 2015jan28: this error occurs: ???
+      # intraQuarter/_KeyStats/aapl/20060203134959.html
+      # Traceback (most recent call last):
+      #   File "p5.py", line 31, in <module>
+      #     Key_Stats()
+      #   File "p5.py", line 27, in Key_Stats
+      #     value = source.split(gather+':</td><td class="yfnc_tabledata1">')[1].split('</td>')[0]
+      # IndexError: list index out of range
+
+Key_Stats()
@@ -0,0 +1,37 @@
+import pandas as pd
+import os
+import time
+from datetime import datetime
+
+# path = "X:/Backups/intraQuarter" # for Windows with X files :)
+# if git clone'ed then use relative path,
+# assuming you extracted the downloaded zip into this project's folder:
+path = "intraQuarter"
+
+def Key_Stats(gather="Total Debt/Equity (mrq)"):
+  statspath = path+'/_KeyStats'
+  stock_list = [x[0] for x in os.walk(statspath)]
+  df = pd.DataFrame(columns = ['Date','Unix','Ticker','DE Ratio'])
+  for each_dir in stock_list[1:]:
+    each_file = os.listdir(each_dir)
+    # ticker = each_dir.split("\\")[1] # Windows only
+    # ticker = each_dir.split("/")[1] # this didn't work so do this:
+    ticker = os.path.basename(os.path.normpath(each_dir))
+    # print(ticker) # uncomment to verify
+    if len(each_file) > 0:
+      for file in each_file:
+        date_stamp = datetime.strptime(file, '%Y%m%d%H%M%S.html')
+        unix_time = time.mktime(date_stamp.timetuple())
+        full_file_path = each_dir+'/'+file
+        source = open(full_file_path,'r').read()
+        try:
+          value = float(source.split(gather+':</td><td class="yfnc_tabledata1">')[1].split('</td>')[0])
+          # print("value=%s"%value) # uncomment to see what's up
+          df = df.append({'Date':date_stamp,'Unix':unix_time,'Ticker':ticker,'DE Ratio':value,}, ignore_index = True)
+        except Exception as e:
+          pass
+  save = gather.replace(' ','').replace(')','').replace('(','').replace('/','')+('.csv')
+  print(save)
+  df.to_csv(save)
+
+Key_Stats()
@@ -0,0 +1,58 @@
+import pandas as pd
+import os
+import time
+from datetime import datetime
+
+# path = "X:/Backups/intraQuarter" # for Windows with X files :)
+# if git clone'ed then use relative path,
+# assuming you extracted the downloaded zip into this project's folder:
+path = "intraQuarter"
+
+def Key_Stats(gather="Total Debt/Equity (mrq)"):
+  statspath = path+'/_KeyStats'
+  stock_list = [x[0] for x in os.walk(statspath)]
+  df = pd.DataFrame(columns = ['Date','Unix','Ticker','DE Ratio','Price','SP500'])
+
+  sp500_df = pd.DataFrame.from_csv("YAHOO-INDEX_GSPC.csv")
+
+  for each_dir in stock_list[1:25]:
+    each_file = os.listdir(each_dir)
+    # ticker = each_dir.split("\\")[1] # Windows only
+    # ticker = each_dir.split("/")[1] # this didn't work so do this:
+    ticker = os.path.basename(os.path.normpath(each_dir))
+    # print(ticker) # uncomment to verify
+    if len(each_file) > 0:
+      for file in each_file:
+        date_stamp = datetime.strptime(file, '%Y%m%d%H%M%S.html')
+        unix_time = time.mktime(date_stamp.timetuple())
+        full_file_path = each_dir+'/'+file
+        source = open(full_file_path,'r').read()
+        try:
+          value = float(source.split(gather+':</td><td class="yfnc_tabledata1">')[1].split('</td>')[0])
+
+          try:
+            sp500_date = datetime.fromtimestamp(unix_time).strftime('%Y-%m-%d')
+            row = sp500_df[(sp500_df.index == sp500_date)]
+            sp500_value = float(row["Adjusted Close"])
+          except:
+            sp500_date = datetime.fromtimestamp(unix_time-259200).strftime('%Y-%m-%d')
+            row = sp500_df[(sp500_df.index == sp500_date)]
+            sp500_value = float(row["Adjusted Close"])
+
+          stock_price = float(source.split('</small><big><b>')[1].split('</b></big>')[0])
+          #print("stock_price:",stock_price,"ticker:", ticker)
+
+          df = df.append({'Date':date_stamp,
+                  'Unix':unix_time,
+                  'Ticker':ticker,
+                  'DE Ratio':value,
+                  'Price':stock_price,
+                  'SP500':sp500_value}, ignore_index = True)
+        except Exception as e:
+          pass
+
+  save = gather.replace(' ','').replace(')','').replace('(','').replace('/','')+('.csv')
+  print(save)
+  df.to_csv(save)
+
+Key_Stats()
@@ -0,0 +1,85 @@
+import pandas as pd
+import os
+import time
+from datetime import datetime
+
+# path = "X:/Backups/intraQuarter" # for Windows with X files :)
+# if git clone'ed then use relative path,
+# assuming you extracted the downloaded zip into this project's folder:
+path = "intraQuarter"
+
+def Key_Stats(gather="Total Debt/Equity (mrq)"):
+  statspath = path+'/_KeyStats'
+  stock_list = [x[0] for x in os.walk(statspath)]
+  df = pd.DataFrame(
+    columns = [
+      'Date',
+      'Unix',
+      'Ticker',
+      'DE Ratio',
+      'Price',
+      'stock_p_change',
+      'SP500',
+      'sp500_p_change'
+    ]
+  )
+  sp500_df = pd.DataFrame.from_csv("YAHOO-INDEX_GSPC.csv")
+
+  ticker_list = []
+
+  for each_dir in stock_list[1:25]:
+    each_file = os.listdir(each_dir)
+    # ticker = each_dir.split("\\")[1] # Windows only
+    # ticker = each_dir.split("/")[1] # this didn't work so do this:
+    ticker = os.path.basename(os.path.normpath(each_dir))
+    # print(ticker) # uncomment to verify
+    ticker_list.append(ticker)
+
+    starting_stock_value = False
+    starting_sp500_value = False
+    if len(each_file) > 0:
+      for file in each_file:
+        date_stamp = datetime.strptime(file, '%Y%m%d%H%M%S.html')
+        unix_time = time.mktime(date_stamp.timetuple())
+        full_file_path = each_dir+'/'+file
+        source = open(full_file_path,'r').read()
+        try:
+          value = float(source.split(gather+':</td><td class="yfnc_tabledata1">')[1].split('</td>')[0])
+
+          try:
+            sp500_date = datetime.fromtimestamp(unix_time).strftime('%Y-%m-%d')
+            row = sp500_df[(sp500_df.index == sp500_date)]
+            sp500_value = float(row["Adjusted Close"])
+          except:
+            sp500_date = datetime.fromtimestamp(unix_time-259200).strftime('%Y-%m-%d')
+            row = sp500_df[(sp500_df.index == sp500_date)]
+            sp500_value = float(row["Adjusted Close"])
+
+          stock_price = float(source.split('</small><big><b>')[1].split('</b></big>')[0])
+          #print("stock_price:",stock_price,"ticker:", ticker)
+
+          if not starting_stock_value:
+            starting_stock_value = stock_price
+          if not starting_sp500_value:
+            starting_sp500_value = sp500_value
+          stock_p_change = ((stock_price - starting_stock_value) / starting_stock_value) * 100
+          sp500_p_change = ((sp500_value - starting_sp500_value) / starting_sp500_value) * 100
+          df = df.append({'Date':date_stamp,
+                          'Unix':unix_time,
+                          'Ticker':ticker,
+                          'DE Ratio':value,
+                          'Price':stock_price,
+                          'stock_p_change':stock_p_change,
+                          'SP500':sp500_value,
+                          'sp500_p_change':sp500_p_change},
+                          ignore_index = True
+          )
+        except Exception as e:
+          pass
+          #print(str(e))
+
+  save = gather.replace(' ','').replace(')','').replace('(','').replace('/','')+('.csv')
+  print(save)
+  df.to_csv(save)
+
+Key_Stats()