ARS always stops after 2,464M num of steps, despite exponential reward grow
if __name__ == "__main__":
env = CustomEnv()
#check_env(env)
# Simplified architecture
policy_kwargs = dict(
net_arch=[2048,1024, 512, 256,128,2],
activation_fn=torch.nn.GELU,
)
model = ARS(ARSPolicy, env, verbose=0, tensorboard_log=logdir, device='cuda',
policy_kwargs=policy_kwargs,stats_window_size =1000,delta_std=0.07,learning_rate=0.0035,n_eval_episodes=500)
TIMESTEPS = 12288000
#vec_env = model.get_env()
observation,info = env.reset()
done = False
while True:
model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=f"ARS")
model.save(f"{models_dir}/model_{int(time.time())}")
action, _ = model.predict(observation)
observation, reward, terminated, truncated, info = env.step(action)
I tried to find some limit/early stopping or smth in the stablabaselines contrib library, but I didn’t find anything
New contributor
Xardas is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.