%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% optimize_entropy_order:
%
% Order the vertices in a way it is suitable for entropy approximation.
%
% Input:
%
% - max_K: maximum number of parents
% - L, S: parameters of the underlying probit SEM
%
% Output:
%
% - y_order: an optimized order of the vertices along with the
%            corresponding parents. y_order(1) means the first element,
%            y_order(2) the second element in the order, and so on.
%
% Created by: Ricardo Silva, London, 05/05/2011
% University College London
% Current version: 05/05/2011

function entropy_order = optimize_entropy_order(max_K, L, S, cov_based)

cov_based = false;

if max_K > 10
  error('Number of max_K unrealistic');
end

% Preliminaries

if nargin < 4
  cov_based = true;
end

num_y = size(L, 1);

[impl_mean impl_cov] = get_moments(L, S);
impl_corr = corr(impl_cov);

% Thin parent candidates first

N_candidate = min(num_y - 1, 8);
p_candidates = zeros(num_y, N_candidate);

for y = 1:num_y
  impl_corr(y, y) = 0;
  [~, idx_p] = sort(abs(impl_corr(y, :)), 'descend');
  impl_corr(y, y) = 1;
  p_candidates(y, :) = sort(idx_p(1:N_candidate));
end

% Cache the scores

scores = cell(num_y, max_K + 1);
p_cache = cell(num_y, max_K + 1);

for y = 1:num_y
  fprintf('Caching ordering info for %d...\n', y);
  p_cache{y, 1} = [];
  if cov_based
    scores{y, 1} = get_gauss_cond_entropy(y, [], impl_cov);
  else
    % DO NOT ATTEMPT THIS IF N_candidate > 10
    y_set = [y p_candidates(y, :)];
    table = joint_table(y_set, impl_mean, impl_cov);  
    scores{y, 1} = get_cond_entropy([1 zeros(1, length(y_set) - 1)], table);      
  end
  for i = 2:min((max_K + 1), length(p_candidates(y, :)) + 1)
     % Go through all subsets of candidate parents of size "i"       
     p_sets = nchoosek(p_candidates(y, :), i - 1);
     p_cache{y, i} = p_sets;
     np_sets = size(p_sets, 1);
     scores{y, i} = zeros(np_sets, 1);
     if cov_based
       % Use Gaussian conditional entropy       
       for j = 1:np_sets
          scores{y, i}(j) = get_gauss_cond_entropy(y, p_sets(j, :), impl_cov);
       end
     else
       % Use the actual binary entropies. THIS IS "TRACTABLE", BUT CAN 
       % BE QUITE SLOW (could probably benefit from a C implementation).
       for j = 1:np_sets
          z_all = zeros(1, num_y); z_all(p_sets(j, :)) = 1;
          z_set = z_all(p_candidates(y, :));
          scores{y, i}(j) = get_cond_entropy([1 z_set], table);
       end
     end           
  end
end

% Do the optimization: initialization

[~, y_order] = sort(diag(impl_cov), 'ascend'); %TODO MODIFY THIS?
y_order = y_order';
parents = cell(num_y, 1);
local_scores = zeros(num_y, 1);
for y = 1:num_y
  [parents{y} local_scores(y)] = local_parent_optim(y);
end  

% Do the optimization: iterations

best_score = sum(local_scores);
fprintf('Order score [0] = %f\n', best_score);

ahead_neighbor = 10;
iter = 1;
while true
   changed = false;
   
   % Try all possible swaps
   for y_pos1 = (max_K + 2):(num_y - 1)
     for y_pos2 = (y_pos1 + 1):min(num_y, y_pos1 + ahead_neighbor)
       y1 = y_order(y_pos1); % Element at position y_pos
       y2 = y_order(y_pos2); % Element at position y_pos + 1
 
       old_s = local_scores; old_p = parents; old_order = y_order;
       
       y_order(y_pos1) = y2; y_order(y_pos2) = y1;
       for y_c = y_order(y_pos1:y_pos2)
         [parents{y_c} local_scores(y_c)] = local_parent_optim(y_c);
       end
       
       new_score = sum(local_scores);
       if new_score < best_score
         best_score = new_score;
         best_local_scores = local_scores;
         best_parents = parents;
         best_order = y_order;
         changed = true;
       end
       
       local_scores = old_s; parents = old_p; y_order = old_order;
     end
   end

   if ~changed
     break
   end

   local_scores = best_local_scores;
   parents = best_parents;
   y_order = best_order;
   
   fprintf('Order score [%d] = %f\n', iter, best_score);
   iter = iter + 1;
end


% Finally, generate the corresponding data structure

entropy_order.y_order = y_order;
entropy_order.parents = parents;

for y = 1:num_y
  if isempty(entropy_order.parents{y})
    % "[]" wouldn't work here! (breaks function "parent_entry")  
    entropy_order.parents{y} = zeros(1, 0);
  end
end

    % Local parent set optimization: given a set of possible parents, pick
    % the one with the lowest score
    
    function [new_p new_s] = local_parent_optim(y)
       y_pos = find(y_order == y, 1);
       cp_y = sort(intersect(y_order(1:(y_pos - 1)), p_candidates(y, :)));
       np_y = min(length(cp_y), max_K) + 1;
       new_s = scores{y, 1};
       new_p = [];
       found_set = false;
       for i = np_y:-1:1
          p_sets = p_cache{y, i};
          np_sets = size(p_sets, 1);
          for j = 1:np_sets
             if all(ismember(p_sets(j, :), cp_y)) && scores{y, i}(j) < new_s
               new_s = scores{y, i}(j);
               new_p = p_sets(j, :);
               found_set = true;
             end
          end
          if found_set
            % Entropy of smaller conditioning sets has to be higher
            break
          end
       end
    end

end
