function d = optimize_dist2(p_hat, Phi, Phin, max_iter, idx)

% optimize the off-policy distribution, forming F_i's explicitly

k = size(Phi, 2);
m0 = max(idx);
F = zeros(m0, (2*k)^2);
p0 = zeros(m0, 1); b0 = zeros(m0, 1);
for i=1:m0,
  F(i,:) = vec([Phi(idx==i,:)'*Phi(idx==i,:) Phi(idx==i,:)'*Phin(idx==i,:) ; Phin(idx==i,:)'*Phi(idx==i,:) Phi(idx==i,:)'*Phi(idx==i,:)])';
  p0(i) = sum(p_hat(idx==i));
  b0(i) = sum(idx == i);
end

r = 2;
y = randn(2*k,r);
opts.Display = 'off';
opts.TolFun = 1e-9;
opts.TolX = 1e-12;
opts.MaxIter = max_iter;
[y,f] = minFunc(@(x) dual_kl_obj2(x, F, p0, b0), vec(y), opts);
[f,g,d0] = dual_kl_obj2(y, F, p0, b0);

if (f > -1e20)
  d = zeros(size(p_hat,1),1);
  for i=1:m0,
    d(idx==i) = d0(i);
  end
else
  d = p_hat;
end