Skip to content

Commit 346f8dc

Browse files
committed
init commit
0 parents  commit 346f8dc

28 files changed

+2887
-0
lines changed

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
The MIT License (MIT)
2+
3+
Copyright (c) 2015 Harrison Kinsley
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# sentdex's
2+
#### [Scikit-Learn tutorials for Machine Learning in Python for Investing](http://pythonprogramming.net/scikit-learn-tutorials-machine-learning-python-investing/ "Scikit-Learn tutorials for Machine Learning in Python for Investing")
3+
4+
> This is the same code as in the videos, but it's easier to grab the code using:
5+
6+
> <sub><sup>```git clone https://github.com/cleesmith/sentdex_scikit_machine_learning_tutorial_for_investing.git```</sup></sub>
7+
8+
> or click on the ```Download ZIP``` button above.
9+
10+
***
11+
12+
YouTube video playlist: [Scikit-learn Machine Learning with Python and SKlearn](https://www.youtube.com/playlist?list=PLQVvvaa0QuDd0flgGphKCej-9jp-QdzZ3 "Scikit-learn Machine Learning with Python and SKlearn")
13+
14+
***
15+
16+
> The intraQuarter data zip file is ~255MB, which is too large to store in a github repo (max of 50MB?), so you need to
17+
download the [intraQuarter.zip](http://pythonprogramming.net/downloads/intraQuarter.zip "intraQuarter.zip") file separately.
18+
19+
***
20+
21+
> Once you have performed a ```git clone``` or a download zip/unzip, you can install all of the python packages by doing:
22+
```
23+
pip install -r requirements.txt
24+
... or:
25+
pip3 install -r requirements.txt
26+
```
27+
28+
***
29+
***

p02.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import matplotlib.pyplot as plt
2+
from sklearn import datasets
3+
from sklearn import svm
4+
5+
digits = datasets.load_digits()
6+
print(digits.data)
7+
print(digits.target)
8+
9+
# digits.target is the actual label we've assigned to the digits data.
10+
# Now that we've got the data ready, we're ready to do the machine learning.
11+
# First, we specify the classifier:
12+
# If you want, you can just leave parameters blank and use the defaults, like this:
13+
# clf = svm.SVC()
14+
# clf = svm.SVC(gamma=0.001, C=100)
15+
# clf = svm.SVC(gamma=0.01, C=100)
16+
clf = svm.SVC(gamma=0.0001, C=100)
17+
18+
X,y = digits.data[:-10], digits.target[:-10]
19+
clf.fit(X,y)
20+
print(clf.predict(digits.data[-5]))
21+
plt.imshow(digits.images[-5], cmap=plt.cm.gray_r, interpolation='nearest')
22+
plt.show()

p04.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import pandas as pd
2+
import os
3+
import time
4+
from datetime import datetime
5+
6+
# path = "X:/Backups/intraQuarter" # for Windows with X files :)
7+
# if git clone'ed then use relative path,
8+
# assuming you extracted the downloaded zip into this project's folder:
9+
path = "intraQuarter"
10+
11+
def Key_Stats(gather="Total Debt/Equity (mrq)"):
12+
statspath = path+'/_KeyStats'
13+
stock_list = [x[0] for x in os.walk(statspath)]
14+
# print(stock_list)
15+
16+
for each_dir in stock_list[1:]:
17+
each_file = os.listdir(each_dir)
18+
if len(each_file) > 0:
19+
for file in each_file:
20+
date_stamp = datetime.strptime(file, '%Y%m%d%H%M%S.html')
21+
unix_time = time.mktime(date_stamp.timetuple())
22+
print(date_stamp, unix_time)
23+
#time.sleep(15)
24+
25+
Key_Stats()
26+
27+

p05.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import pandas as pd
2+
import os
3+
import time
4+
from datetime import datetime
5+
6+
# path = "X:/Backups/intraQuarter" # for Windows with X files :)
7+
# if git clone'ed then use relative path,
8+
# assuming you extracted the downloaded zip into this project's folder:
9+
path = "intraQuarter"
10+
11+
def Key_Stats(gather="Total Debt/Equity (mrq)"):
12+
statspath = path+'/_KeyStats'
13+
stock_list = [x[0] for x in os.walk(statspath)]
14+
for each_dir in stock_list[1:]:
15+
each_file = os.listdir(each_dir)
16+
# ticker = each_dir.split("\\")[1] # Windows only
17+
# ticker = each_dir.split("/")[1] # this didn't work so do this:
18+
ticker = os.path.basename(os.path.normpath(each_dir))
19+
# print(ticker) # uncomment to verify
20+
if len(each_file) > 0:
21+
for file in each_file:
22+
date_stamp = datetime.strptime(file, '%Y%m%d%H%M%S.html')
23+
unix_time = time.mktime(date_stamp.timetuple())
24+
#print(date_stamp, unix_time)
25+
full_file_path = each_dir+'/'+file
26+
print(full_file_path)
27+
source = open(full_file_path,'r').read()
28+
#print(source)
29+
value = source.split(gather+':</td><td class="yfnc_tabledata1">')[1].split('</td>')[0]
30+
print(ticker+":",value)
31+
# time.sleep(15)
32+
# 2015jan28: this error occurs: ???
33+
# intraQuarter/_KeyStats/aapl/20060203134959.html
34+
# Traceback (most recent call last):
35+
# File "p5.py", line 31, in <module>
36+
# Key_Stats()
37+
# File "p5.py", line 27, in Key_Stats
38+
# value = source.split(gather+':</td><td class="yfnc_tabledata1">')[1].split('</td>')[0]
39+
# IndexError: list index out of range
40+
41+
Key_Stats()

p06.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import pandas as pd
2+
import os
3+
import time
4+
from datetime import datetime
5+
6+
# path = "X:/Backups/intraQuarter" # for Windows with X files :)
7+
# if git clone'ed then use relative path,
8+
# assuming you extracted the downloaded zip into this project's folder:
9+
path = "intraQuarter"
10+
11+
def Key_Stats(gather="Total Debt/Equity (mrq)"):
12+
statspath = path+'/_KeyStats'
13+
stock_list = [x[0] for x in os.walk(statspath)]
14+
df = pd.DataFrame(columns = ['Date','Unix','Ticker','DE Ratio'])
15+
for each_dir in stock_list[1:]:
16+
each_file = os.listdir(each_dir)
17+
# ticker = each_dir.split("\\")[1] # Windows only
18+
# ticker = each_dir.split("/")[1] # this didn't work so do this:
19+
ticker = os.path.basename(os.path.normpath(each_dir))
20+
# print(ticker) # uncomment to verify
21+
if len(each_file) > 0:
22+
for file in each_file:
23+
date_stamp = datetime.strptime(file, '%Y%m%d%H%M%S.html')
24+
unix_time = time.mktime(date_stamp.timetuple())
25+
full_file_path = each_dir+'/'+file
26+
source = open(full_file_path,'r').read()
27+
try:
28+
value = float(source.split(gather+':</td><td class="yfnc_tabledata1">')[1].split('</td>')[0])
29+
# print("value=%s"%value) # uncomment to see what's up
30+
df = df.append({'Date':date_stamp,'Unix':unix_time,'Ticker':ticker,'DE Ratio':value,}, ignore_index = True)
31+
except Exception as e:
32+
pass
33+
save = gather.replace(' ','').replace(')','').replace('(','').replace('/','')+('.csv')
34+
print(save)
35+
df.to_csv(save)
36+
37+
Key_Stats()

p07.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import pandas as pd
2+
import os
3+
import time
4+
from datetime import datetime
5+
6+
# path = "X:/Backups/intraQuarter" # for Windows with X files :)
7+
# if git clone'ed then use relative path,
8+
# assuming you extracted the downloaded zip into this project's folder:
9+
path = "intraQuarter"
10+
11+
def Key_Stats(gather="Total Debt/Equity (mrq)"):
12+
statspath = path+'/_KeyStats'
13+
stock_list = [x[0] for x in os.walk(statspath)]
14+
df = pd.DataFrame(columns = ['Date','Unix','Ticker','DE Ratio','Price','SP500'])
15+
16+
sp500_df = pd.DataFrame.from_csv("YAHOO-INDEX_GSPC.csv")
17+
18+
for each_dir in stock_list[1:25]:
19+
each_file = os.listdir(each_dir)
20+
# ticker = each_dir.split("\\")[1] # Windows only
21+
# ticker = each_dir.split("/")[1] # this didn't work so do this:
22+
ticker = os.path.basename(os.path.normpath(each_dir))
23+
# print(ticker) # uncomment to verify
24+
if len(each_file) > 0:
25+
for file in each_file:
26+
date_stamp = datetime.strptime(file, '%Y%m%d%H%M%S.html')
27+
unix_time = time.mktime(date_stamp.timetuple())
28+
full_file_path = each_dir+'/'+file
29+
source = open(full_file_path,'r').read()
30+
try:
31+
value = float(source.split(gather+':</td><td class="yfnc_tabledata1">')[1].split('</td>')[0])
32+
33+
try:
34+
sp500_date = datetime.fromtimestamp(unix_time).strftime('%Y-%m-%d')
35+
row = sp500_df[(sp500_df.index == sp500_date)]
36+
sp500_value = float(row["Adjusted Close"])
37+
except:
38+
sp500_date = datetime.fromtimestamp(unix_time-259200).strftime('%Y-%m-%d')
39+
row = sp500_df[(sp500_df.index == sp500_date)]
40+
sp500_value = float(row["Adjusted Close"])
41+
42+
stock_price = float(source.split('</small><big><b>')[1].split('</b></big>')[0])
43+
#print("stock_price:",stock_price,"ticker:", ticker)
44+
45+
df = df.append({'Date':date_stamp,
46+
'Unix':unix_time,
47+
'Ticker':ticker,
48+
'DE Ratio':value,
49+
'Price':stock_price,
50+
'SP500':sp500_value}, ignore_index = True)
51+
except Exception as e:
52+
pass
53+
54+
save = gather.replace(' ','').replace(')','').replace('(','').replace('/','')+('.csv')
55+
print(save)
56+
df.to_csv(save)
57+
58+
Key_Stats()

p08.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import pandas as pd
2+
import os
3+
import time
4+
from datetime import datetime
5+
6+
# path = "X:/Backups/intraQuarter" # for Windows with X files :)
7+
# if git clone'ed then use relative path,
8+
# assuming you extracted the downloaded zip into this project's folder:
9+
path = "intraQuarter"
10+
11+
def Key_Stats(gather="Total Debt/Equity (mrq)"):
12+
statspath = path+'/_KeyStats'
13+
stock_list = [x[0] for x in os.walk(statspath)]
14+
df = pd.DataFrame(
15+
columns = [
16+
'Date',
17+
'Unix',
18+
'Ticker',
19+
'DE Ratio',
20+
'Price',
21+
'stock_p_change',
22+
'SP500',
23+
'sp500_p_change'
24+
]
25+
)
26+
sp500_df = pd.DataFrame.from_csv("YAHOO-INDEX_GSPC.csv")
27+
28+
ticker_list = []
29+
30+
for each_dir in stock_list[1:25]:
31+
each_file = os.listdir(each_dir)
32+
# ticker = each_dir.split("\\")[1] # Windows only
33+
# ticker = each_dir.split("/")[1] # this didn't work so do this:
34+
ticker = os.path.basename(os.path.normpath(each_dir))
35+
# print(ticker) # uncomment to verify
36+
ticker_list.append(ticker)
37+
38+
starting_stock_value = False
39+
starting_sp500_value = False
40+
if len(each_file) > 0:
41+
for file in each_file:
42+
date_stamp = datetime.strptime(file, '%Y%m%d%H%M%S.html')
43+
unix_time = time.mktime(date_stamp.timetuple())
44+
full_file_path = each_dir+'/'+file
45+
source = open(full_file_path,'r').read()
46+
try:
47+
value = float(source.split(gather+':</td><td class="yfnc_tabledata1">')[1].split('</td>')[0])
48+
49+
try:
50+
sp500_date = datetime.fromtimestamp(unix_time).strftime('%Y-%m-%d')
51+
row = sp500_df[(sp500_df.index == sp500_date)]
52+
sp500_value = float(row["Adjusted Close"])
53+
except:
54+
sp500_date = datetime.fromtimestamp(unix_time-259200).strftime('%Y-%m-%d')
55+
row = sp500_df[(sp500_df.index == sp500_date)]
56+
sp500_value = float(row["Adjusted Close"])
57+
58+
stock_price = float(source.split('</small><big><b>')[1].split('</b></big>')[0])
59+
#print("stock_price:",stock_price,"ticker:", ticker)
60+
61+
if not starting_stock_value:
62+
starting_stock_value = stock_price
63+
if not starting_sp500_value:
64+
starting_sp500_value = sp500_value
65+
stock_p_change = ((stock_price - starting_stock_value) / starting_stock_value) * 100
66+
sp500_p_change = ((sp500_value - starting_sp500_value) / starting_sp500_value) * 100
67+
df = df.append({'Date':date_stamp,
68+
'Unix':unix_time,
69+
'Ticker':ticker,
70+
'DE Ratio':value,
71+
'Price':stock_price,
72+
'stock_p_change':stock_p_change,
73+
'SP500':sp500_value,
74+
'sp500_p_change':sp500_p_change},
75+
ignore_index = True
76+
)
77+
except Exception as e:
78+
pass
79+
#print(str(e))
80+
81+
save = gather.replace(' ','').replace(')','').replace('(','').replace('/','')+('.csv')
82+
print(save)
83+
df.to_csv(save)
84+
85+
Key_Stats()

0 commit comments

Comments
 (0)