# -*- coding: utf-8 -*-
"""A two-layer LSTM for character-level language modelling on Tolstoi's War and Peace."""

import tensorflow as tf

"""
  SOURCE: https://github.com/fsschneider/deepobs
  DeepOBS test problem class for a two-layer LSTM for character-level language
  modelling (Char RNN) on Tolstoi's War and Peace.

  Some network characteristics:

  - ``128`` hidden units per LSTM cell
  - sequence length ``50``
  - cell state is automatically stored in variables between subsequent steps
  - when the phase placeholder swithches its value from one step to the next,
    the cell state is set to its zero value (meaning that we set to zero state
    after each round of evaluation, it is therefore important to set the
    evaluation interval such that we evaluate after a full epoch.)

  Working training parameters are:

  - batch size ``50``
  - ``200`` epochs
  - SGD with a learning rate of :math:`\\approx 0.1` works

  Args:
    batch_size (int): Batch size to use.
    weight_decay (float): No weight decay (L2-regularization) is used in this
        test problem. Defaults to ``None`` and any input here is ignored.

  Attributes:
    dataset: The DeepOBS data set class for Tolstoi.
    train_init_op: A tensorflow operation initializing the test problem for the
        training phase.
    train_eval_init_op: A tensorflow operation initializing the test problem for
        evaluating on training data.
    test_init_op: A tensorflow operation initializing the test problem for
        evaluating on test data.
    losses: A tf.Tensor of shape (batch_size, ) containing the per-example loss
        values.
    regularizer: A scalar tf.Tensor containing a regularization term.
    accuracy: A scalar tf.Tensor containing the mini-batch mean accuracy.
  """


def set_up(x, batch_size, seq_length, is_training):
    """Set up the Char RNN test problem instance on Tolstoi."""

    vocab_size = 83  # For War and Peace
    num_layers = 2
    rnn_size = 128

    # input_keep_prob = tf.cond(
    #     is_training,
    #     lambda: tf.constant(0.8), lambda: tf.constant(1.0))
    # output_keep_prob = tf.cond(
    #     is_training,
    #     lambda: tf.constant(0.8), lambda: tf.constant(1.0))

    # Create an embedding matrix, look up embedding of input
    embedding = tf.get_variable("embedding", [vocab_size, rnn_size])
    inputs = tf.nn.embedding_lookup(embedding, x)

    # Split batch of input sequences along time, such that inputs[i] is a
    # batch_size x embedding_size representation of the batch of characters
    # at position i of this batch of sequences
    inputs = tf.split(inputs, seq_length, axis=1)
    inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

    # Make Multi LSTM cell
    cells = []
    for _ in range(num_layers):
        cell = tf.contrib.rnn.LSTMCell(rnn_size)
        # cell = tf.contrib.rnn.DropoutWrapper(
        #    cell,
        #    input_keep_prob=input_keep_prob,
        #    output_keep_prob=output_keep_prob)
        cells.append(cell)
    cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)

    # Create RNN using the cell defined above, (including operations that store)
    # the state in variables
    state_variables, zero_states = _get_state_variables(
        batch_size, cell)

    outputs, new_states = tf.nn.static_rnn(
        cell, inputs, initial_state=state_variables)
    with tf.control_dependencies(outputs):
        state_update_op = _get_state_update_op(state_variables,
                                               new_states)

    # Reshape RNN output for multiplication with softmax layer
    # print "Shape of outputs", [output.get_shape() for output in outputs]
    with tf.control_dependencies(state_update_op):
        output = tf.reshape(tf.concat(outputs, 1), [-1, rnn_size])
    # print "Shape of output", output.get_shape()

    # Apply softmax layer
    with tf.variable_scope("rnnlm"):
        softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
    logits = tf.matmul(output, softmax_w) + softmax_b
    # print logits.get_shape()

    # Reshape logits to batch_size x seq_length x vocab size
    reshaped_logits = tf.reshape(
        logits, [batch_size, seq_length, vocab_size])
    # print "Shape of reshaped logits", reshaped_logits.get_shape()

    return reshaped_logits


def _get_state_variables(batch_size, cell):
    """For each layer, get the initial state and make a variable out of it
    to enable updating its value.

    Args:
        batch_size (int): Batch size.
        cell (tf.BasicLSTMCell): LSTM cell to get the initial state for.

    Returns:
        tupel: Tupel of the state variables and there zero states.

    """
    # For each layer, get the initial state and make a variable out of it
    # to enable updating its value.
    zero_state = cell.zero_state(batch_size, tf.float32)
    state_variables = []
    for state_c, state_h in zero_state:
        state_variables.append(
            tf.contrib.rnn.LSTMStateTuple(
                tf.Variable(state_c, trainable=False),
                tf.Variable(state_h, trainable=False)))
    # Return as a tuple, so that it can be fed to dynamic_rnn as an initial state
    return tuple(state_variables), zero_state


def _get_state_update_op(state_variables, new_states):
    """Add an operation to update the train states with the last state tensors

    Args:
        state_variables (tf.Variable): State variables to be updated
        new_states (tf.Variable): New state of the state variable.

    Returns:
        tf.Operation: Return a tuple in order to combine all update_ops into a
        single operation. The tuple's actual value should not be used.

    """
    # Add an operation to update the train states with the last state tensors
    update_ops = []
    for state_variable, new_state in zip(state_variables, new_states):
        # Assign the new state to the state variables on this layer
        update_ops.extend([
            state_variable[0].assign(new_state[0]),
            state_variable[1].assign(new_state[1])
        ])
    # Return a tuple in order to combine all update_ops into a single operation.
    # The tuple's actual value should not be used.
    return tf.tuple(update_ops)
