gym_config:
    render: False
    task_dynamics_list: [0, 1, 2]
    subtask_episode: 3           # the number of episode for each subtask
    subtask_episode_length: None  # the length of each episode
    task_episode:  6            # the number of the task episode
    seed: 1000                   # seed for all environments
    dynamics_name: [             # name of all environments
        'intersection-v21', 
        'intersection-v20', 
        'intersection-v12', 
    ]

# MPC controller configuration
mpc_config:
    optimizer: "CEM"             # Random or CEM
    Random:
        horizon: 20               # how long of the horizon to predict
        popsize: 1000            # how many random samples for mpc
        particle: 1
        gamma: 1              # reward discount coefficient
        action_low: [-1]           # lower bound of the solution space
        action_high: [1]           # upper bound of the solution space
        action_dim: 2
        max_iters: 20
        num_elites: 50
        epsilon: 0.001
        alpha: 0.1
        init_mean: 0
        init_var: 1
        action_cost: False
    CEM:
        horizon: 20                # how long of the horizon to predict, 10
        popsize: 200               # how many random samples for mpc
        particle: 1                # number of particles to enlarge
        gamma: 1                   # reward discount coefficient
        action_low: [-1]           # lower bound of the solution space
        action_high: [1]           # upper bound of the solution space
        action_dim: 2
        max_iters: 5
        num_elites: 20
        epsilon: 0.001
        alpha: 0.1
        init_mean: 0
        init_var: 1
        action_cost: False


# Model parameters
DPGP_config:
    mode: "FullState"       # "FullState", "VelocityField"
    FullState:
        alpha: 0.5                   # alpha initialization parameter, cartpole_stable: 0.5, cartpole_swingup: 0.5
        ada_alpha: False             # adaptively update alpha
        state_dim: 12                 # dimension of the state space
        action_dim: 2                # dimension of the action space
        lr: 0.1                      # learning rate of the gaussian process
        gp_iter: 10                   # iteration time of GP
        merge: True                  # use merge strategy in sequential_vi or not
        merge_threshold: 70.0        # merge a component when the kld is below this value
        merge_burnin: 10             # the sample number to start merge
        model_type: 'sample'         # choose the model type [exact/sparse/sample]
        max_inducing_point: 1300      # Used in [sparse/sample]. the data number after do a sparse operation
        trigger_induce: 1500          # Used in [sparse/sample]. when n is larger than this value, do a sparse operation
        sample_number: 100           # Used in [sample]. number of MC samples to find the highest lower bound
        window_prob: 0.001           # the initial transition bias to other cluster
        self_prob: 1.0               # the initial self-transition bias
        param: [                     # GP initilize and constraint parameters
            0.001,    # noise_covar initilize
            0.0001,   # noise_covar constraint
            0.0,      # constant initilize
            0.5,      # outputscale initilize
            1.0,      # lengthscale initilize
        ]

    VelocityField:
        alpha: 0.1                   # alpha initialization parameter, cartpole_stable: 0.5, cartpole_swingup: 0.5
        ada_alpha: False             # adaptively update alpha
        state_dim: 2                 # dimension of the state space
        action_dim: 2                # dimension of the action space
        lr: 0.1                      # learning rate of the gaussian process
        gp_iter: 10                   # iteration time of GP
        merge: True                  # use merge strategy in sequential_vi or not
        merge_threshold: 60.0        # merge a component when the kld is below this value
        merge_burnin: 15             # the sample number to start merge
        model_type: 'sample'         # choose the model type [exact/sparse/sample]
        max_inducing_point: 1300      # Used in [sparse/sample]. the data number after do a sparse operation
        trigger_induce: 1500          # Used in [sparse/sample]. when n is larger than this value, do a sparse operation
        sample_number: 100           # Used in [sample]. number of MC samples to find the highest lower bound
        window_prob: 0.001           # the initial transition bias to other cluster
        self_prob: 1.0               # the initial self-transition bias
        param: [                     # GP initilize and constraint parameters
            0.001,    # noise_covar initilize
            0.0001,   # noise_covar constraint
            0.0,      # constant initilize
            0.5,      # outputscale initilize
            1.0,      # lengthscale initilize
        ]


SingleGP_config: # use exact inducing points
    state_dim: 12                 # dimension of the state space
    action_dim: 2                # dimension of the action space
    lr: 0.1                      # learning rate of the gaussian process
    gp_iter: 10                  # iteration time of GP
    max_inducing_point: 400     # the data number after do a sparse operation
    trigger_induce: 600         # when n is larger than this value, do a sparse operation
    sample_number: 100           # number of MC samples to find the highest lower bound
    param: [                     # GP initilize and constraint parameters
        0.001,    # noise_covar initilize
        0.0001,   # noise_covar constraint
        0.0,      # constant initilize
        0.5,      # outputscale initilize
        1.0,      # lengthscale initilize
    ]


SingleSparseGP_config: # use pseudo-inducing points
    state_dim: 12                 # dimension of the state space
    action_dim: 2                # dimension of the action space
    lr: 0.1                      # learning rate of the gaussian process
    gp_iter: 10                  # iteration time of GP
    max_inducing_point: 500     # the data number after do a sparse operation
    param: [                     # GP initilize and constraint parameters
        0.001,    # noise_covar initilize
        0.0001,   # noise_covar constraint
        0.0,      # constant initilize
        0.5,      # outputscale initilize
        1.0,      # lengthscale initilize
    ]


NP_config:
    pretrain: True
    model_config:
        load_model: False           # If set true, you must specify the model path, otherwise train a new model
        model_path: "./baselines/storage/example.ckpt" # the path to load the model
        state_dim: 12                # environment states
        action_dim: 2               # how many controls we need
        output_dim: 12               # output dim
        likelihood_method: 'gp'     # 'gp' like method or 'nn'
        likelihood_value: 'loss'    # negative 'loss' for likelihood 'll'
        strategy: 'nn'              # Methods used for training and predicting other choices 'pure'
        context_number: 100         # must be smaller than episode
        target_number: 25            # The smaller the better?
        sequential: False           # permute the data while training
        virtual_batch: False        # To be updated
        np_hidden_list: [256, 128, 64]
        np_latent_dim: 64

    training_config:
        n_epochs: 100            # how many epoches to train the dynamic model
        learning_rate: 0.0005     # learning rate
        batch_size: 1024
        save_model_flag: False
        save_model_path: "np_save_test.ckpt" # the path to save the model
        validation_flag: False
        validation_freq: 100      # the frequency of validation
        validation_ratio: 0.1    # ratio of validation set



NN_config:
    pretrain: True
    model_config:
        load_model: False        # If set true, you must specify the model path, otherwise train a new model
        save_model_flag: False
        save_model_path: "./baselines/storage/exp_1.ckpt" # the path to save the model
        state_dim: 12             # environment states
        action_dim: 2            # how many controls we need
        output_dim: 12            # output dim
        hidden_dim: 2            # hidden layer number
        hidden_size: 256         # hidden layer size

    training_config:
        n_epochs: 100            # how many epoches to train the dynamic model
        learning_rate: 0.008     # learning rate
        batch_size: 128
        validation_flag: False
        validation_freq: 10      # the frequency of validation
        validation_ratio: 0.2    # ratio of validation set
        exp_number: 1            # experiment number

    dataset_config:
        load_flag: False
        load_path: "./baselines/storage/data_exp_1.pkl"
        n_max_steps: 1000        # maximum steps per episode
        n_random_episodes: 800   # how many random episodes' data to fit the initial dynamic model
        testset_split: 0.2       # testset's portion in the random dataset, the rest portion is the training set
        n_mpc_episodes: 4        # how many episodes data sampled with the MPC controller
        mpc_dataset_split: 0.5   # mpc dataset's portion in the training set
        min_train_samples: 6000
        n_mpc_itrs: 100          # the number to perform reinforce iteration
        save_flag: False         # set True if you want to save all the dataset
        save_path: "./baselines/storage/data_exp_1.pkl"


maml_config:
    state_dim: 12
    action_dim: 2
    adapt_iter: 10
    alpha: 0.01
    adapt_lr: 0.001
    meta_lr: 0.001
    meta_epoch: 100
    meta_batch_size: 1 # one gradient descent
    hidden_size: 500


DPNN_config:
    DP_config:
        DPprior: True # False
        stm_length: 20               # short-term memory length. Start to train when it is full
        alpha: 0.5 #1.1                  # alpha initialization parameter, cartpole_stable: 0.5, cartpole_swingup: 0.5
        ada_alpha: False             # adaptively update alpha
        merge: True                  # use merge strategy in sequential_vi or not
        merge_threshold: 20.0        # merge a component when the kld is below this value
        merge_burnin: 15             # the sample number to start merge
        window_prob: 0.001           # the initial transition bias to other cluster
        self_prob: 1.0               # the initial self-transition bias
    
    NN_config:
        model_config:
            save_model_path: "./baselines/dpnn/models/meta_intersection.ckpt" # the path to load the model
            pretrain: True
            state_dim: 12             # environment states
            action_dim: 2            # how many controls we need
            output_dim: 12            # output dim
            hidden_dim: 2            # hidden layer number
            hidden_size: 256         # hidden layer size
        
        training_config:
            n_epochs: 100            # how many epoches to train the dynamic model
            learning_rate: 0.0005     # learning rate
            batch_size: 64
            validation_flag: True
            validation_freq: 100      # the frequency of validation
            validation_ratio: 0.1    # ratio of validation set
