1- from typing import Any , ClassVar , Dict , Optional , Type , TypeVar , Union
1+ from typing import Any , ClassVar , Optional , TypeVar , Union
22
33import torch as th
44from gymnasium import spaces
@@ -57,15 +57,15 @@ class A2C(OnPolicyAlgorithm):
5757 :param _init_setup_model: Whether or not to build the network at the creation of the instance
5858 """
5959
60- policy_aliases : ClassVar [Dict [str , Type [BasePolicy ]]] = {
60+ policy_aliases : ClassVar [dict [str , type [BasePolicy ]]] = {
6161 "MlpPolicy" : ActorCriticPolicy ,
6262 "CnnPolicy" : ActorCriticCnnPolicy ,
6363 "MultiInputPolicy" : MultiInputActorCriticPolicy ,
6464 }
6565
6666 def __init__ (
6767 self ,
68- policy : Union [str , Type [ActorCriticPolicy ]],
68+ policy : Union [str , type [ActorCriticPolicy ]],
6969 env : Union [GymEnv , str ],
7070 learning_rate : Union [float , Schedule ] = 7e-4 ,
7171 n_steps : int = 5 ,
@@ -78,12 +78,12 @@ def __init__(
7878 use_rms_prop : bool = True ,
7979 use_sde : bool = False ,
8080 sde_sample_freq : int = - 1 ,
81- rollout_buffer_class : Optional [Type [RolloutBuffer ]] = None ,
82- rollout_buffer_kwargs : Optional [Dict [str , Any ]] = None ,
81+ rollout_buffer_class : Optional [type [RolloutBuffer ]] = None ,
82+ rollout_buffer_kwargs : Optional [dict [str , Any ]] = None ,
8383 normalize_advantage : bool = False ,
8484 stats_window_size : int = 100 ,
8585 tensorboard_log : Optional [str ] = None ,
86- policy_kwargs : Optional [Dict [str , Any ]] = None ,
86+ policy_kwargs : Optional [dict [str , Any ]] = None ,
8787 verbose : int = 0 ,
8888 seed : Optional [int ] = None ,
8989 device : Union [th .device , str ] = "auto" ,
0 commit comments