@@ -423,7 +423,7 @@ def dump_logs(self) -> None:
423423 self .logger .record ("time/time_elapsed" , int (time_elapsed ), exclude = "tensorboard" )
424424 self .logger .record ("time/total_timesteps" , self .num_timesteps , exclude = "tensorboard" )
425425 if self .use_sde :
426- self .logger .record ("train/std" , (self .actor .get_std ()).mean ().item ())
426+ self .logger .record ("train/std" , (self .actor .get_std ()).mean ().item ()) # type: ignore[operator]
427427
428428 if len (self .ep_success_buffer ) > 0 :
429429 self .logger .record ("rollout/success_rate" , safe_mean (self .ep_success_buffer ))
@@ -544,14 +544,14 @@ def collect_rollouts(
544544 assert train_freq .unit == TrainFrequencyUnit .STEP , "You must use only one env when doing episodic training."
545545
546546 if self .use_sde :
547- self .actor .reset_noise (env .num_envs )
547+ self .actor .reset_noise (env .num_envs ) # type: ignore[operator]
548548
549549 callback .on_rollout_start ()
550550 continue_training = True
551551 while should_collect_more_steps (train_freq , num_collected_steps , num_collected_episodes ):
552552 if self .use_sde and self .sde_sample_freq > 0 and num_collected_steps % self .sde_sample_freq == 0 :
553553 # Sample a new noise matrix
554- self .actor .reset_noise (env .num_envs )
554+ self .actor .reset_noise (env .num_envs ) # type: ignore[operator]
555555
556556 # Select action randomly or according to policy
557557 actions , buffer_actions = self ._sample_action (learning_starts , action_noise , env .num_envs )
0 commit comments