#/bin/bash

#export PYTHONPATH="${PYTHONPATH}:/workspace/code"
#export GPU_ID=0,1
export GPU_ID=3

checkpoint_path=/mnt/efs/fs2/hzt/causal/Optimus/checkpoints/
output_path=/mnt/efs/fs2/hzt/causal/Optimus/outputs/
data_path=/mnt/efs/fs2/hzt/causal/Optimus/data/

## Yelp

debug=0

attribute_dim=50
if [ "$debug" -eq 1 ]
then
    TRAIN_FILE=data/datasets/yelp_data_tst/small/train.txt
    TRAIN_FILE_SEMI=data/datasets/yelp_data_tst/small/train_semi.txt
    #TEST_FILE=data/datasets/yelp_data_tst/small/test.txt
    TEST_FILE=data/datasets/yelp_data_tst/small/train.txt
    logging_steps=1
    save_steps=20000
    per_gpu_train_batch_size=2
    per_gpu_eval_batch_size=2
    num_train_epochs=100
else
    TRAIN_FILE=/mnt/efs/fs2/hzt/causal/Optimus/data/bios/biosbias/data_gender_first_cor94/bios_data.raw.tsv.len20.train
    TRAIN_FILE_SEMI=/mnt/efs/fs2/hzt/causal/Optimus/data/bios/biosbias/data_gender_first_cor94/bios_data.raw.tsv.len20.train.semi
    TEST_FILE=/mnt/efs/fs2/hzt/causal/Optimus/data/bios/biosbias/data_gender_first_cor94/bios_data.raw.tsv.len20.dev

    logging_steps=800
    save_steps=800
    per_gpu_train_batch_size=8
    per_gpu_eval_batch_size=8
    num_train_epochs=50
fi

## ==================================
# semi, beta
##
#
#checkpoint_dir="${output_path}/finetune_lm_gender/vae_gpt2encoder/genderfirst-basic-s3-beta1-gsfixed-newmask-t1_w0_wr1_lr5e5_gumbel_samelen_bz16_bak"
#gloabl_step_eval=45000
#
#CUDA_VISIBLE_DEVICES=$GPU_ID python3 code/examples/big_ae/run_lm_vae_clas_training_bak_bias.py \
#    --semi \
#    --classifier_dir=/mnt/efs/fs2/hzt/causal/Optimus/outputs_gender/classifiers/genderfirst_cor94_gender_classifier_finetune_weighted_01_09_lr2e6/checkpoint-50 \
#    --eval_elbo \
#    --beta 1.0 \
#    --use_gumbel \
#    --output_dir=${checkpoint_dir}/outputs-45000/genderfirst-semi-s103-gsfixed-newmask-t05_wc0_wzc0_wz0_w1_wr1_lr1e6_gumbel_samelen_bz8_dtrm_bak \
#    --learning_rate 1e-6 \
#    --temperature=0.5 \
#    --lambda_c_loss=0 \
#    --lambda_reg_z_c=0 \
#    --lambda_reg_z=0 \
#    --lambda_clas=1 \
#    --lambda_recon=1 \
#    --temperature_anneal_factor=1 \
#    --temperature_anneal_iters=20000 \
#    --checkpoint_dir=${checkpoint_dir} \
#    --gloabl_step_eval ${gloabl_step_eval} \
#    --attribute_dim $attribute_dim \
#    --dataset Yelp \
#    --encoder_model_type=gpt2-encoder \
#    --encoder_model_name_or_path=gpt2 \
#    --decoder_model_type=gpt2 \
#    --decoder_model_name_or_path=gpt2 \
#    --classifier_model_type=gpt2-clas \
#    --classifier_model_name_or_path=gpt2 \
#    --ratio_zero 0.5 \
#    --ratio_increase 0.25 \
#    --do_train \
#    --fb_mode 1 \
#    --dim_target_kl 0.5\
#    --train_data_file_2=${TRAIN_FILE_SEMI} \
#    --train_data_file=$TRAIN_FILE \
#    --eval_data_file=$TEST_FILE \
#    --num_train_epochs ${num_train_epochs} \
#    --save_steps ${save_steps}\
#    --logging_steps ${logging_steps} \
#    --overwrite_output_dir \
#    --block_size 100 \
#    --length_weighted_loss \
#    --use_pretrained_model \
#    --use_pretrained_vae \
#    --latent_size 768 \
#    --evaluate_during_training \
#    --per_gpu_train_batch_size ${per_gpu_train_batch_size} \
#    --per_gpu_eval_batch_size ${per_gpu_eval_batch_size}


#### ==================================
##
#### VAE from scratch
##
#logging_steps=1000
#save_steps=1000
#per_gpu_train_batch_size=16
#per_gpu_eval_batch_size=16
#num_train_epochs=50
#
#    #--cond_a \
#    #--output_dir=${output_path}/finetune_lm_gender/vae_gpt2encoder/gender-basic-s3-beta1-gsfixed-newmask-t1_w0_wr1_lr1e5_gumbel_samelen_bz16_bak \
#
## Basic
#CUDA_VISIBLE_DEVICES=$GPU_ID python3 code/examples/big_ae/run_lm_vae_clas_training_bak_bias.py \
#    --eval_elbo \
#    --beta 1.0 \
#    --use_gumbel \
#    --output_dir=${output_path}/finetune_lm_gender/vae_gpt2encoder/genderfirst-basic-s3-beta1-gsfixed-newmask-t1_w0_wr1_lr1e4_gumbel_samelen_bz16_bak \
#    --learning_rate 1e-4 \
#    --temperature=1 \
#    --lambda_clas=0 \
#    --lambda_recon=1 \
#    --temperature_anneal_factor=1 \
#    --temperature_anneal_iters=20000 \
#    --checkpoint_dir=${checkpoint_path}/checkpoint-31250 \
#    --gloabl_step_eval 31250 \
#    --attribute_dim $attribute_dim \
#    --dataset Yelp \
#    --encoder_model_type=gpt2-encoder \
#    --encoder_model_name_or_path=gpt2 \
#    --decoder_model_type=gpt2 \
#    --decoder_model_name_or_path=gpt2 \
#    --classifier_model_type=gpt2-clas \
#    --classifier_model_name_or_path=gpt2 \
#    --ratio_zero 0.5 \
#    --ratio_increase 0.25 \
#    --do_train \
#    --do_eval \
#    --fb_mode 1 \
#    --dim_target_kl 0.5\
#    --train_data_file=$TRAIN_FILE \
#    --eval_data_file=$TEST_FILE \
#    --num_train_epochs ${num_train_epochs} \
#    --save_steps ${save_steps}\
#    --logging_steps ${logging_steps} \
#    --overwrite_output_dir \
#    --block_size 100 \
#    --length_weighted_loss \
#    --latent_size 768 \
#    --evaluate_during_training \
#    --per_gpu_train_batch_size ${per_gpu_train_batch_size} \
#    --per_gpu_eval_batch_size ${per_gpu_eval_batch_size}
#
#
#
#### ==================================
### train gan
####
##
#
TRAIN_FILE=/mnt/efs/fs2/hzt/causal/Optimus/data/bios/biosbias/data_gender_first_cor94/bios_data.raw.tsv.len20.train
TEST_FILE=/mnt/efs/fs2/hzt/causal/Optimus/data/bios/biosbias/data_gender_first_cor94/bios_data.raw.tsv.len20.dev

logging_steps=50
save_steps=50
per_gpu_train_batch_size=16
per_gpu_eval_batch_size=16
num_train_epochs=50

output_dir=${output_path}/finetune_lm_gender/vae_gpt2encoder/genderfirst-basic-s3-beta1-gsfixed-newmask-t1_w0_wr1_lr5e5_gumbel_samelen_bz16_bak/outputs-45000/genderfirst-semi-s103-gsfixed-newmask-t05_wc1_wzc1_wz05_w1_wr1_lr1e6_gumbel_samelen_bz8_dtrm_bak
checkpoint_dir=${output_dir}
#gloabl_step_eval=139200
#gloabl_step_eval=84000
gloabl_step_eval=120800


CUDA_VISIBLE_DEVICES=$GPU_ID python3 code/examples/big_ae/run_lm_vae_clas_training_bak_bias.py \
    --do_train_gan \
    --eval_gender \
    --nsamples 1000 \
    --use_deterministic_connect \
    --beta 1.0 \
    --use_gumbel \
    --output_dir=${output_dir}/outputs-${gloabl_step_eval}/genderfirst_train_gan_dtrm_lr1e4_it${gloabl_step_eval}_ep50_genderfirst-semi-s103-gsfixed-newmask-t05_wc1_wzc1_wz05_w1_wr1_lr1e6_it${gloabl_step_eval} \
    --learning_rate 1e-4 \
    --checkpoint_dir=${checkpoint_dir} \
    --gloabl_step_eval ${gloabl_step_eval} \
    --attribute_dim $attribute_dim \
    --dataset Yelp \
    --encoder_model_type=gpt2-encoder \
    --encoder_model_name_or_path=gpt2 \
    --decoder_model_type=gpt2 \
    --decoder_model_name_or_path=gpt2 \
    --classifier_model_type=gpt2-clas \
    --classifier_model_name_or_path=gpt2 \
    --ratio_zero 0.5 \
    --ratio_increase 0.25 \
    --fb_mode 1 \
    --dim_target_kl 0.5\
    --train_data_file_2=${TRAIN_FILE_SEMI} \
    --train_data_file=$TRAIN_FILE \
    --eval_data_file=$TEST_FILE \
    --num_train_epochs ${num_train_epochs} \
    --save_steps ${save_steps}\
    --logging_steps ${logging_steps} \
    --overwrite_output_dir \
    --block_size 100 \
    --length_weighted_loss \
    --use_pretrained_model \
    --use_pretrained_vae \
    --latent_size 768 \
    --evaluate_during_training \
    --per_gpu_train_batch_size ${per_gpu_train_batch_size} \
    --per_gpu_eval_batch_size ${per_gpu_eval_batch_size}



