"""
A very basic arena for showing off our requirements from the 2D env.
"""
import sys
import numpy as np
import matplotlib.pyplot as plt
import os
from moviepy.editor import ImageSequenceClip
from tqdm.auto import tqdm

from bringbackshapes.gym_wrappers.twod_playground_env import TwoDPlaygroundEnv


def main():
    dense_rew = True
    env = TwoDPlaygroundEnv(
        dense_reward=dense_rew, render_game=False, time_limit=1000
    )
    n_games = 500
    ep_tot_r = []
    ep_reww = []
    for _ in tqdm(range(n_games)):
        done = False
        tot_r = 0.0
        step = 0.0
        reww = []
        obs = env.reset()
        obss = [obs]
        while not done:
            action = env.action_space.sample()
            obs, reward, done, info = env.step(action)
            tot_r += reward
            reww += [reward] * env.action_repeat
            obss.append(obs)
            step += env.action_repeat
        ep_tot_r.append(tot_r)
        ep_reww.append(reww)
    ep_reww = np.array(ep_reww)
    print(ep_reww.shape)
    dir_name = "physics_env/twod/demos/cache/"
    os.makedirs(dir_name, exist_ok=True)
    np.savez_compressed(
        os.path.join(dir_name, "rew_dist.npz"), ep_reww=ep_reww
    )
    print("Episodic Total Reward Mean: ", np.mean(ep_tot_r))
    env.close()
    plt.figure(figsize=(16, 9))
    ep_mean = ep_reww.mean(0)
    ep_std = ep_reww.std(0)
    plt.plot(np.arange(step), ep_mean)
    plt.fill_between(np.arange(step), ep_mean - ep_std, ep_mean + ep_std)
    plt.xlabel("Steps")
    plt.ylabel("Reward")
    plt.title("Episodic Rew Distribution over Time")
    plt.grid()
    plt.savefig(os.path.join(dir_name, f"ep_env_rew_dense_{dense_rew}.jpg"))
    plt.show()


if __name__ == "__main__":
    sys.exit(main())
