R"""Script for running gradient descent on binary classification tasks.

cd ~/Desktop/projects/zonotopic_relu
export PYTHONPATH=$PYTHONPATH:~/Desktop/projects/zonotopic_relu


python3 scripts/classification/gradient_descent.py \
    --outdir="/tmp" \
    --configs_path="exps.classification.gradient_descent_configs.CONFIGS" \
    --config="fast" \
    --n_runs=2

"""
import dataclasses
import json
import os
from pydoc import locate

from absl import app
from absl import flags
from absl import logging

if True:
    # For whatever reason, it looks like using the GPU is slower for these
    # small models and/or style of feeding data.
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import numpy as np
import tensorflow as tf

from xoid.datasets import classification

from xoid.util import misc_util
from xoid.util import network_util

FLAGS = flags.FLAGS

_CONFIGS_PATH = "exps.classification.gradient_descent_configs.CONFIGS"

if __name__ == "__main__":
    # Directory should already exist.
    flags.DEFINE_string('outdir', None, 'Path directory to create where we will write output to.')

    flags.DEFINE_string('configs_path', _CONFIGS_PATH, 'Python path to configs dict.')
    flags.DEFINE_string('config', None, 'Name of the entry in the configs dict to use as configuration.')

    flags.DEFINE_integer('n_runs', 1, 'Number of times to repeat the experiment.')

    flags.mark_flags_as_required(['outdir', 'configs_path', 'config'])


@dataclasses.dataclass()
class Config:
    name: str

    dataset: str
    n_components: int

    N: int    
    m: int

    lr: float
    n_steps: int

    random_subset: bool = True

    freeze_second_layer: bool = False


def make_model(cfg):
    if cfg.freeze_second_layer:
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(cfg.m, activation='relu'),
            tf.keras.layers.Dense(1, activation=None, use_bias=None),
            network_util.BiasOnlyLayer(),
        ])
        model.layers[1].trainable = False
        model(tf.keras.Input([cfg.n_components]))
        model.layers[1].kernel.assign(
            misc_util.make_pm_1_v(cfg.m, np.float32)[:, None])
    else:
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(cfg.m, activation='relu'),
            tf.keras.layers.Dense(1, activation=None),
        ])

    model(tf.keras.Input([cfg.n_components]))
    model.compile(
        optimizer=tf.keras.optimizers.SGD(cfg.lr),
        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
        metrics=[tf.keras.metrics.BinaryAccuracy(threshold=0.0)]
    )
    return model


def get_dataset(cfg):
    (X, Y), (X_val, Y_val), _ = classification.make_dataset(cfg.dataset, cfg.n_components)

    if cfg.random_subset:
        inds = np.arange(X.shape[0], dtype=np.int32)
        np.random.shuffle(inds)
        inds = inds[:cfg.N]
        X = X[inds]
        Y = Y[inds]
    else:
        X = X[:cfg.N]
        Y = Y[:cfg.N]

    train_ds = tf.data.Dataset.from_tensors((tf.cast(X, tf.float32), tf.cast(Y, tf.int32)))
    val_ds = tf.data.Dataset.from_tensors((tf.cast(X_val, tf.float32), tf.cast(Y_val, tf.int32)))
    return train_ds, val_ds


def do_run(cfg, run_index):
    ds, _ = get_dataset(cfg)

    model = make_model(cfg)
    n_epochs = 10
    history = model.fit(ds.repeat(cfg.n_steps // n_epochs), epochs=n_epochs, validation_data=ds)
    loss = model.evaluate(ds)

    return loss, history.history


def main(_):
    cfg = locate(FLAGS.configs_path)[FLAGS.config]

    losses = []
    histories = []
    for i in range(FLAGS.n_runs):
        loss, history = do_run(cfg, i)
        losses.append(loss)
        histories.append(history)

    results = {
        'final_losses': losses,
        'histories': histories,
        'config': dataclasses.asdict(cfg),
    }

    filepath = os.path.join(FLAGS.outdir, f'gd_{cfg.name}.json')
    filepath = os.path.expanduser(filepath)
    with open(filepath, 'w') as f:
        json.dump(results, f)


if __name__ == "__main__":
    app.run(main)
