%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% optimize_pairwise_entropy_order:
%
% Order the vertices in a way it is suitable for entropy approximation.
%
% Input:
%
% - L, S: parameters of the underlying probit SEM
% - Y: observed binary data
%
% Output:
%
% - y_order: an optimized order of the vertices along with the
%            corresponding parents. y_order(1) means the first element,
%            y_order(2) the second element in the order, and so on.
% - cond_entropy: the corresponding conditional entropies, where
%                 cond_entropy(i, j) is the conditional entropy of Y_i 
%                 given Y_j (except cond_entropy(i, i), which is the
%                 marginal entropy of Y_i)
%
% Created by: Ricardo Silva, London, 05/05/2011
% University College London
%
% Current version: 05/05/2011

function [y_order cond_entropy] = optimize_pairwise_entropy_order(L, S, Y)

[num_y num_x] = size(L); num_x = num_x - 1;
m_probit = L(:, end);
cov_probit = L(:, 1:num_x) * S * L(:, 1:num_x)' + eye(num_y);

% First calculate relevant entropies

cond_entropy = zeros(num_y);
marg_entropy = zeros(num_y, 1);

if nargin < 3 % No data, provide - do model-only calculation
    
  for y = 1:num_y   
    p1 = normcdf(m_probit(y) / sqrt(cov_probit(y, y)));
    p0 = 1 - p1;
    if p1 > 0
      marg_entropy(y) = -p1 * log(p1);
    end
    if p0 > 0
      marg_entropy(y) = marg_entropy(y) - p0 * log(p0);
    end  
  end

  for y = 1:num_y
    for p = (y + 1):num_y
      y_set = [y p];
      m = m_probit(y_set);
      V = cov_probit(y_set, y_set);
      a = zeros(length(y_set), 1); b = zeros(length(y_set), 1);

      set_size = length(y_set);
      num_comb_v = 2^set_size;
      set_value = zeros(set_size, 1);

      pair_entropy = 0;
      for i = 1:num_comb_v
        a(set_value == 0) = -Inf; a(set_value == 1) = -m(set_value == 1);
        b(set_value == 0) = -m(set_value == 0); b(set_value == 1) = Inf;
        prob = qscmvnv(5000, V, a, eye(set_size), b);
        if prob > 0
          pair_entropy = pair_entropy - prob * log(prob);
        end
        set_value = advance_bits(set_value);  
      end

      cond_entropy(y, p)  = pair_entropy - marg_entropy(p);
      cond_entropy(p, y)  = pair_entropy - marg_entropy(y);
    end  
    cond_entropy(y, y) = marg_entropy(y);
  end
  
else

  for y = 1:num_y   
    p1 = mean(Y(:, y) == 1);
    p0 = 1 - p1;
    if p1 > 0
      marg_entropy(y) = -p1 * log(p1);
    end
    if p0 > 0
      marg_entropy(y) = marg_entropy(y) - p0 * log(p0);
    end  
  end
    
  for y = 1:num_y
    for p = (y + 1):num_y
      
      r_11 = (Y(:, y) == 1) .* (Y(:, p) == 1);    p_11 = mean(r_11);
      r_10 = (Y(:, y) == 1) .* (Y(:, p) == -1);   p_10 = mean(r_10);
      r_01 = (Y(:, y) == -1) .* (Y(:, p) == 1);   p_01 = mean(r_01);
      r_00 = (Y(:, y) == -1) .* (Y(:, p) == -1);  p_00 = mean(r_00);
      
      pair_entropy = 0;
      if p_11 > 0, pair_entropy = pair_entropy - p_11 * log(p_11); end
      if p_10 > 0, pair_entropy = pair_entropy - p_10 * log(p_10); end
      if p_01 > 0, pair_entropy = pair_entropy - p_01 * log(p_01); end
      if p_00 > 0, pair_entropy = pair_entropy - p_00 * log(p_00); end

      cond_entropy(y, p)  = pair_entropy - marg_entropy(p);
      cond_entropy(p, y)  = pair_entropy - marg_entropy(y);
    end  
    cond_entropy(y, y) = marg_entropy(y);
  end
  
end

% Do the optimization: initialization

[~, y_order] = sort(diag(cov_probit), 'descend');
y_order = y_order';

zs = zeros(num_y);
for y_pos = y_order
  zs(y_pos, y_order(1:y_pos)) = 1;
end
local_scores = sum((zs .* cond_entropy), 2) ./ sum(zs, 2);

% Do the optimization: iterations

best_score = sum(local_scores);
fprintf('Order score [0] = %f\n', best_score);

iter = 1;
while true
   changed = false;
   
   % Try all possible swaps
   for y_pos1 = 1:num_y 
     for y_pos2 = (y_pos1 + 1):num_y
       y1 = y_order(y_pos1); % Element at position y_pos
       y2 = y_order(y_pos2); % Element at position y_pos + 1
 
       old_s = local_scores; old_zs = zs; old_order = y_order;
       
       y_order(y_pos1) = y2; y_order(y_pos2) = y1;
       zs(y1, :) = old_zs(y2, :); zs(y1, y1) = 1;
       zs(y2, :) = old_zs(y1, :); zs(y2, y2) = 1; zs(y2, y1) = 0; 
       for y_c = y_order((y_pos1 + 1):(y_pos2 - 1))
         zs(y_c, y2) = 1; zs(y_c, y1) = 0;
         local_scores(y_c) = sum(cond_entropy(y_c, zs(y_c, :) == 1)) / sum(zs(y_c, :));
       end
       
       new_score = sum(local_scores);
       if new_score < best_score
         best_score = new_score;
         best_local_scores = local_scores;
         best_zs = zs;
         best_order = y_order;
         changed = true;
       end
       
       local_scores = old_s; zs = old_zs; y_order = old_order;
     end
   end

   if ~changed
     break
   end

   local_scores = best_local_scores;
   zs = best_zs;
   y_order = best_order;
   
   fprintf('Order score [%d] = %f\n', iter, best_score);
   iter = iter + 1;
end
