|
| 1 | +import numpy as np |
| 2 | +import pandas as pd |
| 3 | +from sklearn.preprocessing import MinMaxScaler |
| 4 | +from tensorflow.keras.models import load_model, Model |
| 5 | +import tensorflow as tf |
| 6 | +import matplotlib.pyplot as plt |
| 7 | +from tensorflow.keras.layers import Input, Dense |
| 8 | + |
| 9 | + |
| 10 | +class StockEnv: |
| 11 | + def __init__(self, data, initial_balance=10000): |
| 12 | + self.data = data |
| 13 | + self.initial_balance = initial_balance |
| 14 | + self.state_size = data.shape[1] + 3 # stock data + balance + net_worth + stock_owned |
| 15 | + self.reset() |
| 16 | + |
| 17 | + def reset(self): |
| 18 | + self.balance = self.initial_balance |
| 19 | + self.net_worth = self.initial_balance |
| 20 | + self.current_step = 0 |
| 21 | + self.stock_owned = 0 |
| 22 | + return self._get_observation() |
| 23 | + |
| 24 | + def _get_observation(self): |
| 25 | + obs = np.hstack((self.data.iloc[self.current_step].values, [self.balance, self.net_worth, self.stock_owned])) |
| 26 | + return obs |
| 27 | + |
| 28 | + def step(self, action): |
| 29 | + current_price = self.data.iloc[self.current_step]['Close'] |
| 30 | + # print(current_price) |
| 31 | + #print(self.net_worth) |
| 32 | + prev_net_worth = self.net_worth |
| 33 | + if action == 0: # Buy |
| 34 | + if current_price <= self.balance: |
| 35 | + self.stock_owned += 1 |
| 36 | + self.balance -= current_price |
| 37 | + self.net_worth = self.balance + self.stock_owned * current_price |
| 38 | + elif action == 1: # Sell |
| 39 | + if self.stock_owned > 0: |
| 40 | + self.stock_owned -= 1 |
| 41 | + self.balance += current_price |
| 42 | + self.net_worth = self.balance + self.stock_owned * current_price |
| 43 | + elif action == 2: |
| 44 | + self.net_worth = self.balance + self.stock_owned * current_price |
| 45 | + print(self.balance,self.stock_owned,current_price) |
| 46 | + |
| 47 | + # Go to the next day |
| 48 | + self.current_step += 1 |
| 49 | + |
| 50 | + # Calculate reward |
| 51 | + reward = (self.net_worth - prev_net_worth) / prev_net_worth |
| 52 | + print(reward) |
| 53 | + |
| 54 | + # Check if done |
| 55 | + done = (self.current_step == 100 - 1) |
| 56 | + |
| 57 | + return (self._get_observation(), reward, done) |
| 58 | + |
| 59 | +class ActorCritic: |
| 60 | + def __init__(self, state_size, action_size): |
| 61 | + self.state_size = state_size |
| 62 | + self.action_size = action_size |
| 63 | + self.gamma = 0.95 |
| 64 | + self.learning_rate = 0.001 |
| 65 | + |
| 66 | + self.actor = self.build_actor() |
| 67 | + self.critic = self.build_critic() |
| 68 | + |
| 69 | + def build_actor(self): |
| 70 | + state_input = Input(shape=(self.state_size,)) |
| 71 | + dense1 = Dense(32, activation='relu')(state_input) |
| 72 | + dense2 = Dense(32, activation='relu')(dense1) |
| 73 | + output = Dense(self.action_size, activation='softmax')(dense2) |
| 74 | + |
| 75 | + model = Model(inputs=state_input, outputs=output) |
| 76 | + model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(lr=self.learning_rate)) |
| 77 | + return model |
| 78 | + |
| 79 | + def build_critic(self): |
| 80 | + state_input = Input(shape=(self.state_size,)) |
| 81 | + dense1 = Dense(32, activation='relu')(state_input) |
| 82 | + dense2 = Dense(32, activation='relu')(dense1) |
| 83 | + output = Dense(1, activation='linear')(dense2) |
| 84 | + |
| 85 | + model = Model(inputs=state_input, outputs=output) |
| 86 | + model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(lr=self.learning_rate)) |
| 87 | + return model |
| 88 | + |
| 89 | + def train(self, state, action, reward, next_state, done): |
| 90 | + target = np.zeros((1, 1)) |
| 91 | + advantages = np.zeros((1, self.action_size)) |
| 92 | + |
| 93 | + value = self.critic.predict(state)[0] |
| 94 | + next_value = self.critic.predict(next_state)[0] |
| 95 | + |
| 96 | + if done: |
| 97 | + advantages[0][action] = reward - value |
| 98 | + target[0][0] = reward |
| 99 | + else: |
| 100 | + advantages[0][action] = reward + self.gamma * next_value - value |
| 101 | + target[0][0] = reward + self.gamma * next_value |
| 102 | + |
| 103 | + self.actor.fit(state, advantages, epochs=1, verbose=0) |
| 104 | + self.critic.fit(state, target, epochs=1, verbose=0) |
| 105 | + |
| 106 | + def act(self, state): |
| 107 | + probabilities = self.actor.predict(state)[0] |
| 108 | + action = np.random.choice(self.action_size, p=probabilities) |
| 109 | + return action |
| 110 | + |
| 111 | +def train_model(env, actor_critic, episodes=100): |
| 112 | + total_rewards = [] |
| 113 | + i = 0 |
| 114 | + for episode in range(episodes): |
| 115 | + state = env.reset() |
| 116 | + state = np.reshape(state, [1, env.state_size]).astype(np.float32) |
| 117 | + done = False |
| 118 | + total_reward = 0 |
| 119 | + |
| 120 | + while not done: |
| 121 | + action = actor_critic.act(state) |
| 122 | + next_state, reward, done = env.step(action) |
| 123 | + next_state = np.reshape(next_state, [1, env.state_size]).astype(np.float32) |
| 124 | + actor_critic.train(state, action, reward, next_state, done) |
| 125 | + state = next_state |
| 126 | + total_reward += reward |
| 127 | + |
| 128 | + total_rewards.append(total_reward) |
| 129 | + print(f"Episode : {episode}, Total Reward : {total_reward}") |
| 130 | + actor_critic.actor.save(f"actor{i}.h5") |
| 131 | + actor_critic.critic.save(f'critic{i}.h5') |
| 132 | + i+=1 |
| 133 | + |
| 134 | + print(total_rewards) |
| 135 | + plt.plot(total_rewards) |
| 136 | + plt.title('Total Reward per Episode') |
| 137 | + plt.xlabel('Episode') |
| 138 | + plt.ylabel('Total Reward') |
| 139 | + plt.show() |
| 140 | + |
| 141 | + |
| 142 | +def main(): |
| 143 | + df = pd.read_csv('new_dataset.csv') |
| 144 | + groups = list(set(df['Symbol'])) |
| 145 | + df.head() |
| 146 | + grouped_df = df.groupby('Symbol') |
| 147 | + #data = pd.read_csv('new_datset.csv') |
| 148 | + data = grouped_df.get_group(groups[0]) |
| 149 | + data = data[['Open', 'High', 'Low', 'Close', 'SMA_20', 'SMA_50']] |
| 150 | + scaler = MinMaxScaler() |
| 151 | + data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns) |
| 152 | + |
| 153 | + env = StockEnv(data) |
| 154 | + state_size = env.state_size |
| 155 | + action_size = 3 # Buy or Sell |
| 156 | + |
| 157 | + actor_critic = ActorCritic(state_size=state_size, action_size=action_size) |
| 158 | + train_model(env=env, actor_critic=actor_critic) |
| 159 | + |
| 160 | +if __name__ == '__main__': |
| 161 | + main() |
0 commit comments