1
1
from __future__ import annotations
2
2
3
- import gym
3
+ import gymnasium as gym
4
4
import matplotlib
5
5
import matplotlib .pyplot as plt
6
6
import numpy as np
7
7
import pandas as pd
8
- from gym import spaces
9
- from gym .utils import seeding
8
+ from gymnasium import spaces
9
+ from gymnasium .utils import seeding
10
10
from stable_baselines3 .common .vec_env import DummyVecEnv
11
11
12
12
matplotlib .use ("Agg" )
@@ -153,14 +153,15 @@ def step(self, actions):
153
153
print ("Sharpe: " , sharpe )
154
154
print ("=================================" )
155
155
156
- return self .state , self .reward , self .terminal , {}
156
+ return self .state , self .reward , self .terminal , False , {}
157
157
158
158
else :
159
159
# print("Model actions: ",actions)
160
160
# actions are the portfolio weight
161
161
# normalize to sum of 1
162
162
# if (np.array(actions) - np.array(actions).min()).sum() != 0:
163
- # norm_actions = (np.array(actions) - np.array(actions).min()) / (np.array(actions) - np.array(actions).min()).sum()
163
+ # norm_actions = (np.array(actions) - np.array(actions).min()) /
164
+ # (np.array(actions) - np.array(actions).min()).sum()
164
165
# else:
165
166
# norm_actions = actions
166
167
weights = self .softmax_normalization (actions )
@@ -197,7 +198,7 @@ def step(self, actions):
197
198
# print("Step reward: ", self.reward)
198
199
# self.reward = self.reward*self.reward_scaling
199
200
200
- return self .state , self .reward , self .terminal , {}
201
+ return self .state , self .reward , self .terminal , False , {}
201
202
202
203
def reset (
203
204
self ,
@@ -222,7 +223,7 @@ def reset(
222
223
self .portfolio_return_memory = [0 ]
223
224
self .actions_memory = [[1 / self .stock_dim ] * self .stock_dim ]
224
225
self .date_memory = [self .data .date .unique ()[0 ]]
225
- return self .state
226
+ return self .state , {}
226
227
227
228
def render (self , mode = "human" ):
228
229
return self .state
0 commit comments