import argparse

import jax
import jax.numpy as np

from jax import grad, jit, vmap, pmap, value_and_grad
from jax import random

from jax.tree_util import tree_multimap, tree_map
from utils import optimizers
from utils import adaptation_utils
from utils.regularizers import weighted_parameter_loss
import haiku as hk

import numpy as onp

import tensorflow_datasets as tfds
import tensorflow as tf

# hides GPUs, maybe causing issues with memory with Jax and TF
tf.config.set_visible_devices([], 'GPU')

from jax.config import config

import os
import requests

import pickle
import time

from models.util import get_model

from utils.training_utils import train_epoch
from utils.eval import eval_ds_all, get_labels

from utils.losses import nll, accuracy, entropy, brier, ece
from utils.misc import get_single_copy, manual_pmap_tree

from posteriors.utils import sample_weights_diag
from posteriors.swag import init_swag, update_swag, collect_posterior

import resource
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
parser = argparse.ArgumentParser(description='Runs basic train loop on a supervised learning task')
parser.add_argument(
    "--dir",
    type=str,
    default=None,
    required=False,
    help="Training directory for logging results"
)
parser.add_argument(
    "--log_prefix",
    type=str,
    default=None,
    required=False,
    help="Name prefix for logging results"
)
parser.add_argument(
    "--data_dir",
    type=str,
    default='datasets',
    required=False,
    help="Directory for storing datasets"
)
parser.add_argument(
    "--seed",
    type=int,
    default=0,
    required=False
)
parser.add_argument(
    "--wd",
    type=float,
    default=0.,
    required=False
)
parser.add_argument(
    "--model",
    type=str,
    default="ResNet26",
    required=False,
    help="Model class"
)
parser.add_argument(
    "--dataset",
    type=str,
    default="cifar10",
    required=False,
)
parser.add_argument(
    "--corruption_type",
    type=str,
    default="brightness",
    required=False,
)
parser.add_argument(
    "--corruption_level",
    type=int,
    default=1,
    required=False,
)
parser.add_argument(
    "--n_epochs",
    type=int,
    default=1,
    required=False,
)
parser.add_argument(
    "--batch_size",
    type=int,
    default=128,
    required=False,
)
parser.add_argument(
    "--lr",
    type=float,
    default=1e-3,
    required=False,
)
parser.add_argument(
    "--adapt_bn_only",
    dest="adapt_bn_only",
    action='store_true'
)
parser.add_argument(
    "--use_swag_posterior",
    dest="use_swag_posterior",
    action='store_true'
)
parser.add_argument(
    "--use_data_augmentation",
    dest="use_data_augmentation",
    action='store_true'
)
parser.add_argument(
    "--swag_posterior_weight",
    type=float,
    default=1e-4,
    required=False,
)

# make directory for logging

args = parser.parse_args()

channel_means = (0.485, 0.456, 0.406)
channel_stds = (0.229, 0.224, 0.225)

if args.dataset == 'cifar100':
    n_classes = 100
else:
    n_classes = 10


def preprocess_inputs(datapoint):
    image, label = datapoint['image'], datapoint['label']
    image = image / 255
    image = (image - channel_means) / channel_stds
    label = tf.one_hot(label, n_classes) 
    return image, label

corruption_str = '{}_{}'.format(args.corruption_type, args.corruption_level)
print(corruption_str, flush=True)

if args.dataset == 'cifar10':
    ds_test = tfds.load('{}_corrupted/{}'.format(args.dataset, corruption_str), split='test', data_dir=args.data_dir).shuffle(10000, seed=0, reshuffle_each_iteration=True).map(preprocess_inputs, num_parallel_calls=tf.data.experimental.AUTOTUNE).batch(128, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)
elif args.dataset == 'cifar100':
    all_npy = onp.load('CIFAR-100-C/{}.npy'.format(args.corruption_type))
    all_labels = onp.load('CIFAR-100-C/labels.npy')
    l = args.corruption_level
    corrupted_data = all_npy[(l-1) * 10000: l * 10000]
    labels = all_labels[(l-1) * 10000: l * 10000]

    base_ds = tf.data.Dataset.from_tensor_slices({'image': corrupted_data, 
                                                  'label': labels})
    ds_test = base_ds.shuffle(10000, seed=0, reshuffle_each_iteration=True).map(preprocess_inputs, num_parallel_calls=tf.data.experimental.AUTOTUNE).batch(128, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)

options = ds_test.options()
options.experimental_threading.private_threadpool_size = 48
options.experimental_threading.max_intra_op_parallelism = 1


rng = random.PRNGKey(args.seed)

bn_only_str = 'adaptbnonly_' if args.adapt_bn_only else ''

filename = 'logs/entropy_minimization_online/{}/{}/posteriorweight{}_{}lr{}_batchsize{}/seed{}_{}.pkl'.format(args.dataset, args.model, args.swag_posterior_weight, bn_only_str, args.lr, args.batch_size, args.seed, corruption_str)
os.makedirs(os.path.dirname(filename), exist_ok=True)
print(filename, flush=True)
try:
    pickle.load(open(filename, 'rb'))
    print(filename, 'file loaded')
    # import ipdb; ipdb.set_trace()
except:
    print(filename, 'file not found')

test_labels = get_labels(tfds.as_numpy(ds_test))

def marginal_logits(logits):
    logits = np.array(logits)
    logits = jax.nn.log_softmax(logits, axis=-1)
    print(logits.shape)
    n = logits.shape[0]
    return jax.scipy.special.logsumexp(logits, axis=0, b=1/n)

log_dict = {}

all_logits = []
mean_stats = []
for seed in range(10):
    final_logits_filename = 'logs/entropy_minimization_online/{}/{}/posteriorweight{}_{}lr{}_batchsize{}/seed{}_{}_final_logits.npy'.format(args.dataset, args.model, args.swag_posterior_weight, bn_only_str, args.lr, args.batch_size, seed, corruption_str)
    final_logits = np.load(final_logits_filename)
    stats = [nll(final_logits, test_labels), entropy(final_logits, test_labels), accuracy(final_logits, test_labels), brier(final_logits, test_labels), finer_ece(final_logits, test_labels)]
    mean_stats.append(stats)
    print(seed, stats)
    all_logits.append(final_logits)

marginal_initial_logits = marginal_logits(all_logits)
stats = [nll(marginal_initial_logits, test_labels), entropy(marginal_initial_logits, test_labels), accuracy(marginal_initial_logits, test_labels), brier(marginal_initial_logits, test_labels), finer_ece(marginal_initial_logits, test_labels)]
print('marginal final stats', stats)
log_dict['Epoch_0 Test'] = stats
mean_stats = np.array(mean_stats).mean(axis=0)
log_dict['Epoch_0 Mean Stats Test'] = mean_stats

seed = 0
results_filename = 'logs/entropy_minimization_online_ensemble/{}/{}/posteriorweight{}_{}lr{}_batchsize{}/seed{}_{}.pkl'.format(args.dataset, args.model, args.swag_posterior_weight, bn_only_str, args.lr, args.batch_size, seed, corruption_str)
os.makedirs(os.path.dirname(results_filename), exist_ok=True)
pickle.dump(log_dict, open(results_filename, 'wb'))
print(log_dict)

