Skip to content

Commit 693ccce

Browse files
authored
Add DRL source code
A2C Portfolio Optimization
1 parent 13b2ccf commit 693ccce

File tree

4 files changed

+315
-0
lines changed

4 files changed

+315
-0
lines changed

portfolio-optimisation/DRL_test.py

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
import numpy as np
2+
import pandas as pd
3+
from sklearn.preprocessing import MinMaxScaler
4+
from tensorflow.keras.models import load_model, Model
5+
import tensorflow as tf
6+
import matplotlib.pyplot as plt
7+
from tensorflow.keras.layers import Input, Dense
8+
9+
10+
class StockEnv:
11+
def __init__(self, data, initial_balance=10000):
12+
self.data = data
13+
self.initial_balance = initial_balance
14+
self.state_size = data.shape[1] + 3 # stock data + balance + net_worth + stock_owned
15+
self.reset()
16+
17+
def reset(self):
18+
self.balance = self.initial_balance
19+
self.net_worth = self.initial_balance
20+
self.current_step = 0
21+
self.stock_owned = 0
22+
return self._get_observation()
23+
24+
def _get_observation(self):
25+
obs = np.hstack((self.data.iloc[self.current_step].values, [self.balance, self.net_worth, self.stock_owned]))
26+
return obs
27+
28+
def step(self, action):
29+
current_price = self.data.iloc[self.current_step]['Close']
30+
prev_net_worth = self.net_worth
31+
if action == 0: # Buy
32+
if current_price <= self.balance:
33+
self.stock_owned += 1
34+
self.balance -= current_price
35+
self.net_worth = self.balance + self.stock_owned * current_price
36+
elif action == 1: # Sell
37+
if self.stock_owned > 0:
38+
self.stock_owned -= 1
39+
self.balance += current_price
40+
self.net_worth = self.balance + self.stock_owned * current_price
41+
elif action == 2:
42+
self.net_worth = self.balance + self.stock_owned * current_price
43+
print(self.balance,current_price)
44+
45+
# Go to the next day
46+
self.current_step += 1
47+
48+
# Calculate reward
49+
reward = (self.net_worth - prev_net_worth) / prev_net_worth
50+
print(reward)
51+
52+
# Check if done
53+
done = (self.current_step == 100 - 1)
54+
55+
return (self._get_observation(), reward, done)
56+
57+
class ActorCritic:
58+
def __init__(self, state_size, action_size):
59+
self.state_size = state_size
60+
self.action_size = action_size
61+
self.gamma = 0.95
62+
self.learning_rate = 0.001
63+
64+
self.actor = self.build_actor()
65+
self.critic = self.build_critic()
66+
67+
def build_actor(self):
68+
state_input = Input(shape=(self.state_size,))
69+
dense1 = Dense(32, activation='relu')(state_input)
70+
dense2 = Dense(32, activation='relu')(dense1)
71+
output = Dense(self.action_size, activation='softmax')(dense2)
72+
73+
model = Model(inputs=state_input, outputs=output)
74+
model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(lr=self.learning_rate))
75+
return model
76+
77+
def build_critic(self):
78+
state_input = Input(shape=(self.state_size,))
79+
dense1 = Dense(32, activation='relu')(state_input)
80+
dense2 = Dense(32, activation='relu')(dense1)
81+
output = Dense(1, activation='linear')(dense2)
82+
83+
model = Model(inputs=state_input, outputs=output)
84+
model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(lr=self.learning_rate))
85+
return model
86+
87+
def train(self, state, action, reward, next_state, done):
88+
target = np.zeros((1, 1))
89+
advantages = np.zeros((1, self.action_size))
90+
91+
value = self.critic.predict(state)[0]
92+
next_value = self.critic.predict(next_state)[0]
93+
94+
if done:
95+
advantages[0][action] = reward - value
96+
target[0][0] = reward
97+
else:
98+
advantages[0][action] = reward + self.gamma * next_value - value
99+
target[0][0] = reward + self.gamma * next_value
100+
101+
self.actor.fit(state, advantages, epochs=1, verbose=0)
102+
self.critic.fit(state, target, epochs=1, verbose=0)
103+
104+
def act(self, state):
105+
probabilities = self.actor.predict(state)[0]
106+
action = np.random.choice(self.action_size, p=probabilities)
107+
return action
108+
109+
def test_model(env, actor_critic, stock):
110+
state = env.reset()
111+
state = np.reshape(state, [1, env.state_size]).astype(np.float32)
112+
done = False
113+
total_reward = 0
114+
115+
total_rewards = []
116+
while not done:
117+
action = actor_critic.act(state)
118+
next_state, reward, done = env.step(action)
119+
next_state = np.reshape(next_state, [1, env.state_size]).astype(np.float32)
120+
state = next_state
121+
total_reward += reward
122+
total_rewards.append(total_reward)
123+
current_price = env.data.iloc[100]['Close']
124+
profit = env.balance + (env.stock_owned*current_price) - env.initial_balance
125+
plt.plot(total_rewards)
126+
plt.title(f"{stock}\n Total Profit : {profit}")
127+
plt.xlabel('days')
128+
plt.ylabel('Total Rewards')
129+
plt.show()
130+
131+
return total_reward
132+
133+
134+
df = pd.read_csv('new_dataset.csv')
135+
groups = list(set(df['Symbol']))
136+
df.head()
137+
grouped_df = df.groupby('Symbol')
138+
#data = pd.read_csv('new_datset.csv')
139+
for i in range(5):
140+
data = grouped_df.get_group(groups[i])
141+
data = data[['Open', 'High', 'Low', 'Close']]
142+
scaler = MinMaxScaler()
143+
data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns)
144+
145+
env = StockEnv(data)
146+
state_size = env.state_size
147+
action_size = 3 # Buy or Sell
148+
149+
actor_critic = ActorCritic(state_size=state_size, action_size=action_size)
150+
actor_critic.actor = load_model('actor400.h5')
151+
actor_critic.critic = load_model('critic400.h5')
152+
total_reward = test_model(env=env, actor_critic=actor_critic, stock=groups[i])
153+
current_price = env.data.iloc[100]['Close']
154+
print(f'Total Reward: {total_reward}, Profit : {env.balance + env.stock_owned*current_price}')
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
import numpy as np
2+
import pandas as pd
3+
from sklearn.preprocessing import MinMaxScaler
4+
from tensorflow.keras.models import load_model, Model
5+
import tensorflow as tf
6+
import matplotlib.pyplot as plt
7+
from tensorflow.keras.layers import Input, Dense
8+
9+
10+
class StockEnv:
11+
def __init__(self, data, initial_balance=10000):
12+
self.data = data
13+
self.initial_balance = initial_balance
14+
self.state_size = data.shape[1] + 3 # stock data + balance + net_worth + stock_owned
15+
self.reset()
16+
17+
def reset(self):
18+
self.balance = self.initial_balance
19+
self.net_worth = self.initial_balance
20+
self.current_step = 0
21+
self.stock_owned = 0
22+
return self._get_observation()
23+
24+
def _get_observation(self):
25+
obs = np.hstack((self.data.iloc[self.current_step].values, [self.balance, self.net_worth, self.stock_owned]))
26+
return obs
27+
28+
def step(self, action):
29+
current_price = self.data.iloc[self.current_step]['Close']
30+
# print(current_price)
31+
#print(self.net_worth)
32+
prev_net_worth = self.net_worth
33+
if action == 0: # Buy
34+
if current_price <= self.balance:
35+
self.stock_owned += 1
36+
self.balance -= current_price
37+
self.net_worth = self.balance + self.stock_owned * current_price
38+
elif action == 1: # Sell
39+
if self.stock_owned > 0:
40+
self.stock_owned -= 1
41+
self.balance += current_price
42+
self.net_worth = self.balance + self.stock_owned * current_price
43+
elif action == 2:
44+
self.net_worth = self.balance + self.stock_owned * current_price
45+
print(self.balance,self.stock_owned,current_price)
46+
47+
# Go to the next day
48+
self.current_step += 1
49+
50+
# Calculate reward
51+
reward = (self.net_worth - prev_net_worth) / prev_net_worth
52+
print(reward)
53+
54+
# Check if done
55+
done = (self.current_step == 100 - 1)
56+
57+
return (self._get_observation(), reward, done)
58+
59+
class ActorCritic:
60+
def __init__(self, state_size, action_size):
61+
self.state_size = state_size
62+
self.action_size = action_size
63+
self.gamma = 0.95
64+
self.learning_rate = 0.001
65+
66+
self.actor = self.build_actor()
67+
self.critic = self.build_critic()
68+
69+
def build_actor(self):
70+
state_input = Input(shape=(self.state_size,))
71+
dense1 = Dense(32, activation='relu')(state_input)
72+
dense2 = Dense(32, activation='relu')(dense1)
73+
output = Dense(self.action_size, activation='softmax')(dense2)
74+
75+
model = Model(inputs=state_input, outputs=output)
76+
model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(lr=self.learning_rate))
77+
return model
78+
79+
def build_critic(self):
80+
state_input = Input(shape=(self.state_size,))
81+
dense1 = Dense(32, activation='relu')(state_input)
82+
dense2 = Dense(32, activation='relu')(dense1)
83+
output = Dense(1, activation='linear')(dense2)
84+
85+
model = Model(inputs=state_input, outputs=output)
86+
model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(lr=self.learning_rate))
87+
return model
88+
89+
def train(self, state, action, reward, next_state, done):
90+
target = np.zeros((1, 1))
91+
advantages = np.zeros((1, self.action_size))
92+
93+
value = self.critic.predict(state)[0]
94+
next_value = self.critic.predict(next_state)[0]
95+
96+
if done:
97+
advantages[0][action] = reward - value
98+
target[0][0] = reward
99+
else:
100+
advantages[0][action] = reward + self.gamma * next_value - value
101+
target[0][0] = reward + self.gamma * next_value
102+
103+
self.actor.fit(state, advantages, epochs=1, verbose=0)
104+
self.critic.fit(state, target, epochs=1, verbose=0)
105+
106+
def act(self, state):
107+
probabilities = self.actor.predict(state)[0]
108+
action = np.random.choice(self.action_size, p=probabilities)
109+
return action
110+
111+
def train_model(env, actor_critic, episodes=100):
112+
total_rewards = []
113+
i = 0
114+
for episode in range(episodes):
115+
state = env.reset()
116+
state = np.reshape(state, [1, env.state_size]).astype(np.float32)
117+
done = False
118+
total_reward = 0
119+
120+
while not done:
121+
action = actor_critic.act(state)
122+
next_state, reward, done = env.step(action)
123+
next_state = np.reshape(next_state, [1, env.state_size]).astype(np.float32)
124+
actor_critic.train(state, action, reward, next_state, done)
125+
state = next_state
126+
total_reward += reward
127+
128+
total_rewards.append(total_reward)
129+
print(f"Episode : {episode}, Total Reward : {total_reward}")
130+
actor_critic.actor.save(f"actor{i}.h5")
131+
actor_critic.critic.save(f'critic{i}.h5')
132+
i+=1
133+
134+
print(total_rewards)
135+
plt.plot(total_rewards)
136+
plt.title('Total Reward per Episode')
137+
plt.xlabel('Episode')
138+
plt.ylabel('Total Reward')
139+
plt.show()
140+
141+
142+
def main():
143+
df = pd.read_csv('new_dataset.csv')
144+
groups = list(set(df['Symbol']))
145+
df.head()
146+
grouped_df = df.groupby('Symbol')
147+
#data = pd.read_csv('new_datset.csv')
148+
data = grouped_df.get_group(groups[0])
149+
data = data[['Open', 'High', 'Low', 'Close', 'SMA_20', 'SMA_50']]
150+
scaler = MinMaxScaler()
151+
data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns)
152+
153+
env = StockEnv(data)
154+
state_size = env.state_size
155+
action_size = 3 # Buy or Sell
156+
157+
actor_critic = ActorCritic(state_size=state_size, action_size=action_size)
158+
train_model(env=env, actor_critic=actor_critic)
159+
160+
if __name__ == '__main__':
161+
main()

portfolio-optimisation/actor400.h5

48.5 KB
Binary file not shown.
46.2 KB
Binary file not shown.

0 commit comments

Comments
 (0)