"""
    The hyper parameter of the rcpo_ppo algorithm
"""
rcpo_ppo_hyper_params = {
    "gamma": 0.999,
    "lmda": 0.95,
    "truncation_size": 20000,
    "lr_actor": 1e-4,
    "lr_critic": 2e-4,
    "actor_gradient_clip": True,#False,
    "critic_gradient_clip": True,#False,
    "actor_gradient_norm_clip": 1.0,
    "critic_gradient_norm_clip": 1.0,
    "entropy_coeff": 0.0,
    "ratio_clip_param": 0.2, #0.5,
    "adam_epsilon": 1e-5,
    "optim_epochs": 50,
    "optim_batch_size": 1024,
    "policy_net_layers": [64, 64, 64],
    "v_net_layers": [64, 64, 64],
    "gaussian_fixed_var": True,
    "constraint": 0.25,
    "lr_lagrange_mul_init": 1e-5,
    "lr_lagrange_mul_decay_factor": 1 - 1e-9,
    "lagrange_multiplier_min": 0,
    "lagrange_multiplier_max": 10000
}

"""
    The hyper parameter of the rcpo_ppo_lag_func algorithm
"""
rcpo_ppo_lag_func_hyper_params = {
    "gamma": 0.999,
    "lmda": 0.95,
    "truncation_size": 20000,
    "lr_actor": 1e-4,
    "lr_critic": 2e-4,
    "actor_gradient_clip": True,#False,
    "critic_gradient_clip": True,#False,
    "actor_gradient_norm_clip": 1.0,
    "critic_gradient_norm_clip": 1.0,
    "entropy_coeff": 0.0,
    "ratio_clip_param": 0.2, #0.5,
    "adam_epsilon": 1e-5,
    "optim_epochs": 50,
    "optim_batch_size": 1024,
    "policy_net_layers": [64, 64, 64],
    "v_net_layers": [64, 64, 64],
    "gaussian_fixed_var": True,
    "constraint": 0.25,
    "lr_lagrange_mul_init": 2e-4,#5e-7,
    "lagrange_multiplier_min": 0,
    "lagrange_multiplier_max": 10000,
    "lag_func_updat_sample_num": 100,
    "lag_net_layers": [64, 64],
    "lag_gradient_clip": False,
    "lag_gradient_norm_clip": 50.0
}