% simple_learn_obs_int:
%
% The simplest setup for combining observational and experimental data.
% Given experimental data {X_train, Y_train}, fit a GP using marginal maximum likelihood.
%
% Input:
%
% - A: matrix mapping experimental units to design points
% - Y_exp: interventional data
% - X_space: design space
% - do_params_X_space: observational fit. If empty, ignore observational
%                      prior.
%
% Output:
%
% - mean_pred, cov_pred: evaluating of mean predictive function at design space
% - theta_hat: corresponding fitted hyperparameters

function [mean_pred, cov_pred, theta_hat] = simple_learn_obs_int(A, Y_exp, X_space, do_params_X_space)

if nargin < 4, do_params_X_space = []; end

num_X_space = length(X_space);
XX = (repmat(X_space, 1, num_X_space) - repmat(X_space, 1, num_X_space)').^2;
if ~isempty(do_params_X_space)
  K_prior = do_params_X_space.K + get_noise_matrix(num_X_space); 
  mu_prior = do_params_X_space.mu_do;
else    
  K_prior = get_noise_matrix(num_X_space);
  mu_prior = zeros(num_X_space, 1);
end
YY = (Y_exp - A * mu_prior) * (Y_exp - A * mu_prior)';

theta_0 = [0; 0; log(var(Y_exp - A * mu_prior))];
options = optimset('Display', 'off', 'LargeScale', 'Off');
theta_hat = fminunc(@(theta)simple_learn_obs_int_f(theta, XX, YY, K_prior, A), theta_0, options);

sf2 = exp(theta_hat(1));
ell = exp(theta_hat(2));
v_y = exp(theta_hat(3));
K_f = K_prior + sf2 * exp(-0.5 * XX / ell);

prior_meancov_f_obs = K_f \ mu_prior;
inv_cov_f = (A' * A) / v_y + inv(K_f);
mean_pred = inv_cov_f \ (A' * Y_exp / v_y + prior_meancov_f_obs);
cov_pred = inv(inv_cov_f);
