# this file contains some helper functions needed for the experiments
import numpy as np
from os.path import exists


# create synthetic tabular instance, as described in appendix C
# inputs: L -- total number of items, K -- number of recommended items
# outputs: reward -- vector of attraction probabilities
def tab_instance(L, K):
    reward = np.zeros(L)
    for e in range(L):
        if e < K:
            reward[e] = (2 + np.random.rand()) / (3 * K)  # uniform in [2/(3K),1/K] for optimal
        else:
            reward[e] = np.random.rand() / (3 * K)  # uniform in [0,1/(3K)] for suboptimal
    return reward


# create synthetic linear instance, as described in appendix C
# inputs: L -- total number of items, K -- number of recommended items, d -- feature dimension
# outputs: phi -- feature matrix, reward -- vector of attraction probabilities
def lin_instance(L, K, d):
    # generate theta uniformly at random on the unit sphere
    theta = np.random.normal(size=d)
    theta = theta / np.linalg.norm(theta)
    # generate theta_orth on unit sphere and orthogonal to theta
    theta_orth = np.random.normal(size=d)
    theta_orth = theta_orth - (theta_orth @ theta) * theta
    theta_orth = theta_orth / np.linalg.norm(theta_orth)
    # compute features and reward
    reward = tab_instance(L, K)  # same as in tabular case
    phi = np.zeros((L, d))  # initialize feature matrix
    for e in range(L):
        # the next line yields unit-norm phi[e, :] with phi[e, :] @ theta = reward[e]
        phi[e, :] = reward[e] * theta + np.sqrt(1 - reward[e] ** 2) * theta_orth
    return phi, reward


# preprocess movielens for the real data experiment
# inputs: none
# outputs: 1 if successful, 0 if ml-1m (i.e. movielens data, see README) is not present in data directory
def process_movielens():
    if not exists('data/ml-1m'):
        return 0  # data not present
    # import movie data
    mov = np.array([i.strip().split("::") for i in open('data/ml-1m/movies.dat', 'r', encoding='latin-1').readlines()])
    # iterate through movie data to find the full set of genres used
    genre_list = set()  # initialize to empty set
    for i in range(len(mov)):
        genre_list = genre_list.union(set(mov[i, 2].split('|')))  # union of current genre_list and i-th movie's genres
    genre_list = list(genre_list)  # turn it into a list
    genre_list.sort()  # alphabetize
    # compute matrix whose rows correspond to movies and columns to genres (in each row, first column is movie id
    #   and remaining columns are binaries -- 1 if movie is tagged with genre, 0 otherwise)
    genre_mat = np.zeros((len(mov), len(genre_list) + 1), int)
    for i in range(len(mov)):
        genre_mat[i, 0] = int(mov[i, 0]) - 1
        genre_mat[i, 1:] = np.array(
            [(genre_list[j] in set(mov[i, 2].split('|'))) for j in range(len(genre_list))]).astype(int)
    # import movie data
    rat = np.array([i.strip().split("::") for i in open('data/ml-1m/ratings.dat', 'r', encoding='latin-1').readlines()])
    # compute matrix whose rows correspond to users and columns to movies; the (i,j)-th entry is 1 if user i rated
    #   movie j more than 3 stars and 0 otherwise
    W = np.zeros((6040, 3952))
    for i in range(len(rat)):
        if int(rat[i, 2]) > 3:
            W[int(rat[i, 0]) - 1, int(rat[i, 1]) - 1] = 1
    np.savez('data/movies', W, genre_list, genre_mat)  # save for use in real data experiments
    return 1


# train features via rank-d SVD for the real data experiment, as described in appendix C
# inputs: W_train -- training matrix described in appendix C; d -- desired feature dimension
# outputs: phi -- features; theta -- estimated linear parameter described in appendix C
def real_features(W_train, d):
    [U, S, Vtr] = np.linalg.svd(W_train)
    V = np.transpose(Vtr)[:, :d]
    phi = V @ np.diag(S[:d])
    iota = max(np.linalg.norm(phi, axis=1))
    phi = phi / iota
    m_train = W_train.shape[0]
    theta = (iota / m_train) * np.transpose(U[:, :d]) @ np.ones(m_train)
    return phi, theta


# compute the approximately-optimal greedy policy for the real data experiment, as described in appendix C
# inputs: W_test -- test matrix described in appendix C; K -- number of recommended items
# outputs: opt -- list of items that constitute the greedy policy
def real_optimal(W_test, K):
    L = W_test.shape[1]  # figure out number of users and items
    opt = [np.argmax(np.sum(W_test, 0))]  # initialize opt by choosing item that attracts the most users
    for k in range(K - 1):
        # iteratively add item that attracts the most users that are not attracted to any item currently in opt
        non_opt = np.setdiff1d(range(L), opt)
        not_attracted = np.where(1 - np.max(W_test[:, opt], 1))[0]
        num_attracting = np.sum(W_test[not_attracted, :][:, non_opt], axis=0)
        opt.append(non_opt[num_attracting.argmax()])
    return opt
