forked from Kulbear/stock-prediction
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
107 lines (77 loc) · 2.92 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env python
# coding: utf-8
# # Stock Prediction with Recurrent Neural Network
#
# Deep learning is involved a lot in the modern quantitive financial field. There are many different neural networks can be applied to stock price prediction problems. The recurrent neural network, to be specific, the Long Short Term Memory(LSTM) network outperforms others architecture since it can take advantage of predicting time series (or sequentially) involved result with a specific configuration.
#
# We will make a really simple LSTM with Keras to predict the stock price in the Chinese stock.
# In[1]:
import time
import math
import numpy as np
import pandas as pd
import sklearn.preprocessing as prep
import os
from const import *
def get_stocks():
stocks = {}
with open("stocklist.txt", "r") as ff:
lines = ff.readlines()
for line in lines:
items = line.split(",")
if len(items[0]) > 0:
stocks[items[0]] = items[1]
return stocks
def preprocess_training_data(df, seq_len):
col_list = df.columns.tolist()
col_list.remove('date')
df = df[col_list]
amount_of_features = len(df.columns)
data = df.values
sequence_length = seq_len + 1
all = []
for index in range(len(data) - sequence_length):
all.append(data[index: index + sequence_length])
all = np.array(all)
x_train = all[:, : -1]
x_train = x_train.reshape(x_train.shape[0], seq_len * amount_of_features)
preprocessor_x = prep.StandardScaler().fit(x_train)
x_train = preprocessor_x.transform(x_train)
x_train = x_train.reshape(x_train.shape[0], seq_len, amount_of_features)
y_train = all[:, -1][:,-1] - all[:, -2][:,0]
y_train[y_train > 0] = 1
y_train[y_train <= 0] = 0
return [x_train, y_train]
def preprocess_infernece_data(df, seq_len):
col_list = df.columns.tolist()
col_list.remove('date')
df = df[col_list]
amount_of_features = len(df.columns)
data = df.values
sequence_length = seq_len
all = []
for index in range(len(data) - sequence_length):
all.append(data[index: index + sequence_length])
all = np.array(all)
all = all.reshape(all.shape[0], seq_len * amount_of_features)
preprocessor_x = prep.StandardScaler().fit(all)
all = preprocessor_x.transform(all)
all = all.reshape(all.shape[0], seq_len, amount_of_features)
all = all[-2:-1]
return all
def train():
stocks = get_stocks()
for (stock_index, stock_name) in stocks.items():
try:
df = pd.read_csv(g_data_train_directory + stock_index)
x_train, y_train = preprocess_infernece_data(df, 20)
except Exception as e:
print(e)
if __name__ == "__main__":
USE_SHORT_PARAMS = False
g_data_train_directory = DIR_DATA_TRAIN_FULL_PARAMS
g_model_directory = DIR_MODEL_FULL_PARAMS
g_predict_directory = DIR_PREDICT_FULL_PARAMS
train()
# train()
# predict()