forked from AI4Finance-Foundation/FinRL_Crypto
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathenvironment_Alpaca.py
192 lines (152 loc) · 8.62 KB
/
environment_Alpaca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
"""The CryptoEnvAlpaca class is a custom environment for trading multiple cryptocurrencies with respect to the Alpaca
trading platform. It is initialized with a configuration dictionary containing the price and technical indicator
arrays, and a dictionary of environment parameters such as the lookback period and normalization constants. The
environment also has several class variables such as the initial capital, buy and sell costs, and the discount factor.
The class has several methods such as reset(), step(), _generate_action_normalizer(),
and _get_state() for interacting with the environment. The reset() method resets the environment to the initial state,
the step() method takes in an action and returns the next state, reward, and done.
The _generate_action_normalizer() method generates the normalizer for the action,
and the _get_state() method returns the current state of the environment.
The environment also has several class variables such as the initial capital, buy and sell costs, and the discount
factor."""
import numpy as np
import math
from config_main import ALPACA_LIMITS
class CryptoEnvAlpaca: # custom env
def __init__(self, config, env_params, initial_capital=1000000,
buy_cost_pct=0.003, sell_cost_pct=0.003, gamma=0.99, if_log=False):
self.if_log = if_log
self.env_params = env_params
self.lookback = env_params['lookback']
self.initial_total_asset = initial_capital
self.initial_cash = initial_capital
self.buy_cost_pct = buy_cost_pct
self.sell_cost_pct = sell_cost_pct
self.gamma = gamma
# Get initial price array to compute eqw
self.price_array = config['price_array']
self.prices_initial = list(self.price_array[0, :])
self.equal_weight_stock = np.array([self.initial_cash /
len(self.prices_initial) /
self.prices_initial[i] for i in
range(len(self.prices_initial))])
# read normalization of cash, stocks and tech
self.norm_cash = env_params['norm_cash']
self.norm_stocks = env_params['norm_stocks']
self.norm_tech = env_params['norm_tech']
self.norm_reward = env_params['norm_reward']
self.norm_action = env_params['norm_action']
# Initialize constants
self.tech_array = config['tech_array']
self._generate_action_normalizer()
self.crypto_num = self.price_array.shape[1]
self.max_step = self.price_array.shape[0] - self.lookback - 1
# reset
self.time = self.lookback - 1
self.cash = self.initial_cash
self.current_price = self.price_array[self.time]
self.current_tech = self.tech_array[self.time]
self.stocks = np.zeros(self.crypto_num, dtype=np.float32)
self.stocks_cooldown = None
self.safety_factor_stock_buy = 1 - 0.1
self.total_asset = self.cash + (self.stocks * self.price_array[self.time]).sum()
self.total_asset_eqw = np.sum(self.equal_weight_stock * self.price_array[self.time])
self.episode_return = 0.0
self.gamma_return = 0.0
'''env information'''
self.env_name = 'MulticryptoEnv'
# state_dim = cash[1,1] + stocks[1,4] + tech_array[1,44] * lookback + stock_cooldown[1,4]
self.state_dim = 1 + self.price_array.shape[1] + self.tech_array.shape[1] * self.lookback
self.action_dim = self.price_array.shape[1]
self.minimum_qty_alpaca = ALPACA_LIMITS * 1.1 # 10 % safety factor
self.if_discrete = False
self.target_return = 10**8
def reset(self) -> np.ndarray:
self.time = self.lookback - 1
self.current_price = self.price_array[self.time]
self.current_tech = self.tech_array[self.time]
self.cash = self.initial_cash # reset()
self.stocks = np.zeros(self.crypto_num, dtype=np.float32)
self.stocks_cooldown = np.zeros_like(self.stocks)
self.total_asset = self.cash + (self.stocks * self.price_array[self.time]).sum()
state = self.get_state()
return state
def step(self, actions) -> (np.ndarray, float, bool, None):
self.time += 1
# if a stock is held add to its cooldown
for i in range(len(actions)):
if self.stocks[i] > 0:
self.stocks_cooldown[i] += 1
price = self.price_array[self.time]
for i in range(self.action_dim):
norm_vector_i = self.action_norm_vector[i]
actions[i] = actions[i] * norm_vector_i
# Compute actions in dollars
actions_dollars = actions * price
# Sell
#######################################################################################################
#######################################################################################################
#######################################################################################################
for index in np.where(actions < -self.minimum_qty_alpaca)[0]:
if self.stocks[index] > 0:
if price[index] > 0: # Sell only if current asset is > 0
sell_num_shares = min(self.stocks[index], -actions[index])
assert sell_num_shares >= 0, "Negative sell!"
self.stocks_cooldown[index] = 0
self.stocks[index] -= sell_num_shares
self.cash += price[index] * sell_num_shares * (1 - self.sell_cost_pct)
# FORCE 5% SELL every half day (30 min timeframe -> (24 * 2 / 2) * 30)
for index in np.where(self.stocks_cooldown >= 48)[0]:
sell_num_shares = self.stocks[index] * 0.05
self.stocks_cooldown[index] = 0
self.stocks[index] -= sell_num_shares
self.cash += price[index] * sell_num_shares * (1 - self.sell_cost_pct)
# Buy
#######################################################################################################
#######################################################################################################
#######################################################################################################
for index in np.where(actions > self.minimum_qty_alpaca)[0]:
if price[index] > 0: # Buy only if the price is > 0 (no missing data in this particular date)
fee_corrected_asset = self.cash / (1 + self.buy_cost_pct)
max_stocks_can_buy = (fee_corrected_asset / price[index]) * self.safety_factor_stock_buy
buy_num_shares = min(max_stocks_can_buy, actions[index])
buy_num_shares_old = buy_num_shares
if buy_num_shares < self.minimum_qty_alpaca[index]:
buy_num_shares = 0
self.stocks[index] += buy_num_shares
self.cash -= price[index] * buy_num_shares * (1 + self.buy_cost_pct)
"""update time"""
done = self.time == self.max_step
state = self.get_state()
next_total_asset = self.cash + (self.stocks * self.price_array[self.time]).sum()
next_total_asset_eqw = np.sum(self.equal_weight_stock * self.price_array[self.time])
# Difference in portfolio value + cooldown management
delta_bot = next_total_asset - self.total_asset
delta_eqw = next_total_asset_eqw - self.total_asset_eqw
# Reward function
reward = (delta_bot - delta_eqw) * self.norm_reward
self.total_asset = next_total_asset
self.total_asset_eqw = next_total_asset_eqw
self.gamma_return = self.gamma_return * self.gamma + reward
self.cumu_return = self.total_asset / self.initial_cash
if done:
reward = self.gamma_return
self.episode_return = self.total_asset / self.initial_cash
return state, reward, done, None
def get_state(self):
state = np.hstack((self.cash * self.norm_cash, self.stocks * self.norm_stocks))
for i in range(self.lookback):
tech_i = self.tech_array[self.time - i]
normalized_tech_i = tech_i * self.norm_tech
state = np.hstack((state, normalized_tech_i)).astype(np.float32)
return state
def close(self):
pass
def _generate_action_normalizer(self):
action_norm_vector = []
price_0 = self.price_array[0]
for price in price_0:
x = math.floor(math.log(price, 10)) # the order of magnitude
action_norm_vector.append(1 / ((10) ** x))
action_norm_vector = np.asarray(action_norm_vector) * self.norm_action
self.action_norm_vector = np.asarray(action_norm_vector)