############################################################################################
# app_compare_computation.R
# 
# Generates synthetic studies for evaluating the computational efficiency of two different
# approaches, the full linear program and our relaxed one
#
# Code by 
#
#  - Ricardo Silva (ricardo@stats.ucl.ac.uk)
#
# Current version: 17/05/2014
# First version: 17/05/2014

source("witness.R")

############################################################################################
# app_compare_lpp::
#
# * Input:
#
# - num_sim: number of simulations
# - epsilons: relaxation factors as described in bayesian_validate_constraints
# - M: number of Monte Carlo runs
#
# * Output:
#
# - tdiff_1, tdiff_2: run times for the numerical method and analytical method, respectively
# - iwidth_1, iwidth_2: interval widths for the respective methods
#
# Example: app_compare_lpp(100, c(0.2, 0.2, 0.2, 0.2, 0.9, 1.1), 100)

app_compare_lpp <- function(num_sim, epsilons, M = 100)
{
  
  S <- 1000
  alpha_0 <- rep(1, 4)  
  alpha_1 <- rep(1, 4)  
  alpha_W <- c(1, 1)
  
  tdiff_1 <- rep(0, num_sim)
  tdiff_2 <- rep(0, num_sim)
  iwidth_1 <- rep(0, num_sim)
  iwidth_2 <- rep(0, num_sim)  
  
  for (i in 1:num_sim) {
    
    cat("Trial", i, "\n")
    
    ### Generate data
    
    model <- app_compare_chain_generation(FALSE)
    P_YX.W0 <- model$P_YXW[1:4] / model$P_W[1]
    P_YX.W1 <- model$P_YXW[5:8] / model$P_W[2]
    data <- app_compare_data_generation(model$P_YXW, S)
    theta_true <- list()
    theta_true$W  <- model$P_W
    theta_true$W0 <- matrix(model$P_ZETA[1:4], nrow = 1)
    theta_true$W1 <- matrix(model$P_ZETA[5:8], nrow = 1)
    
    ### Inference
    
    start_time_1 <- Sys.time()
    theta_sample_1 <- bayesian_posterior_sampling(alpha_0, alpha_1, alpha_W, data, epsilons, M, TRUE, TRUE)
    end_time_1 <- Sys.time()
    tdiff_1[i] <- as.numeric(end_time_1 - start_time_1)
    
    start_time_2 <- Sys.time()
    theta_sample_2 <- bayesian_posterior_sampling(alpha_0, alpha_1, alpha_W, data, epsilons, M, TRUE, FALSE)
    end_time_2 <- Sys.time()
    tdiff_2[i] <- as.numeric(end_time_2 - start_time_2)
    
    theta_hat_1 <- list(W = mean(theta_sample_1$W), W0 = matrix(colMeans(theta_sample_1$W0), nrow = 1), W1 = matrix(colMeans(theta_sample_1$W1), nrow = 1))    
    intervals_1 <- bayesian_interval_generation(theta_hat_1, epsilons)
    iwidth_1[i] <- intervals_1[2] - intervals_1[1]
    
    theta_hat_2 <- list(W = mean(theta_sample_2$W), W0 = matrix(colMeans(theta_sample_2$W0), nrow = 1), W1 = matrix(colMeans(theta_sample_2$W1), nrow = 1))    
    intervals_2 <- bayesian_interval_generation_analytical(theta_hat_2, epsilons)
    iwidth_2[i] <- intervals_2[2] - intervals_2[1]
    
  }
  
  return(list(tdiff_1 = tdiff_1, tdiff_2 = tdiff_2, iwidth_1 = iwidth_1, iwidth_2 = iwidth_2))
}

############################################################################################
# app_compare_lpp_summarize_output::
#
# Display summary statistics of a run of app_compare_lpp.
#
# * Input:
#
# - results: a run of app_compare_lpp

app_compare_lpp_summarize_output <- function(results)
{
  m1 <- mean(results$tdiff_1); m2 <- mean(results$tdiff_2)
  diff <- results$tdiff_1 - results$tdiff_2
  rat <- results$tdiff_1 / results$tdiff_2  
  cat("Times - Numerical, analytical, difference, ratio: [", m1, m2, m1 - m2, mean(rat), "]\n")
  cat("Standard dev - difference, ratio: [", sd(diff), sd(rat), "]\n")

  w1 <- mean(results$iwidth_1, na.rm = T); w2 <- mean(results$iwidth_2, na.rm = T) 
  diff <- results$iwidth_2 - results$iwidth_1; diff[which(diff < 0)] <- NA
  rat <- results$iwidth_2 / results$iwidth_1  
  cat("Widths - Numerical, analytical, difference, ratio: [", w1, w2, w2 - w1, mean(rat, na.rm = T), "]\n")
  cat("Standard dev - difference, ratio: [", sd(diff, na.rm = T), sd(rat, na.rm = T), "]\n")
  
  min_diff <- min(diff, na.rm = T); min_idx <- which(diff == min_diff)
  max_diff <- max(diff, na.rm = T); max_idx <- which(diff == max_diff)
  cat("Min diff, numerical width at minimum diff: [", min_diff, results$iwidth_1[min_idx], "]\n")
  cat("Max diff, numerical width at maximum diff: [", max_diff, results$iwidth_1[max_idx], "]\n")
  
  
}

# Generated by a chain W -> X -> Y, so independence holds

app_compare_chain_generation <- function(monotonic)
{
  P_W <- runif(2); P_W = P_W / sum(P_W)
  P_X.W <- runif(2) # P(X = 1 | W = w)
  P_Y.X <- runif(2) # P(Y = 1 | X = x)
  if (monotonic) {
    if (P_X.W[1] > P_X.W[2])
      P_X.W <- 1 - P_X.W
  }
  true_ACE <- P_Y.X[2] - P_Y.X[1]
  P_ZETA <- rep(0, 8)
  for (x in 0:1)
    for (y in 0:1) 
      for (w in 0:1) {
        idx <- 1 + 4 * w + 2 * y + x
        if (x == 0) s_X <- 1 - P_X.W[w + 1] else s_X <- P_X.W[w + 1]
        if (y == 0) s_Y <- 1 - P_Y.X[x + 1] else s_Y <- P_Y.X[x + 1]
        P_ZETA[idx] <- s_X * s_Y
      }
  
  P_YXW <- c(P_ZETA[1:4] * P_W[1], P_ZETA[5:8] * P_W[2])
  P_XW <- c(P_YXW[1] + P_YXW[3], P_YXW[2] + P_YXW[4], P_YXW[5] + P_YXW[7], P_YXW[6] + P_YXW[8])
  corr_XW <- P_XW[4] - P_W[2] * (P_XW[2] + P_XW[4])
  P_Y.X <- c((P_YXW[3] + P_YXW[7]) / (P_XW[1] + P_XW[3]), (P_YXW[4] + P_YXW[8]) / (P_XW[2] + P_XW[4]))
  P_Y.XW <- c(P_YXW[3] / (P_YXW[1] + P_YXW[3]), P_YXW[4] / (P_YXW[2] + P_YXW[4]),
              P_YXW[7] / (P_YXW[5] + P_YXW[7]), P_YXW[8] / (P_YXW[6] + P_YXW[8]))
  
  return(list(P_W = P_W, P_ZETA = P_ZETA, P_X.W = P_X.W, true_ACE = true_ACE,
              P_YXW = P_YXW, P_XW = P_XW, corr_XW = corr_XW, P_Y.X = P_Y.X, P_Y.XW = P_Y.XW))
}

app_compare_data_generation <- function(P_YXW, S)
{
  idx <- sample(1:8, size = S, replace = TRUE, prob = P_YXW)
  data <- matrix(rep(0, 3 * S), nrow = S)
  for (w in 0:1)
    for (x in 0:1)
      for (y in 0:1) {
        idx_here <- 1 + 4 * w + 2 * y + x
        idx_sel <- which(idx == idx_here); n <- length(idx_sel)
        data[idx_sel, ] <- cbind(rep(w, n), rep(x, n), rep(y, n))
      }
  return(data)
}
