function [Y, problem, S] = elliptope_SDP(A, p, Y0)
% Solver for semidefinite programs (SDP's) with unit diagonal constraints.
% 
% function [Y, problem, S] = elliptope_SDP(A)
% function [Y, problem, S] = elliptope_SDP(A, p)
% function [Y, problem, S] = elliptope_SDP(A, p, Y0)
%
% A is a real, symmetric matrix of size n.
%
% This function uses a local optimization method in Manopt to solve the SDP
%
%   min_X  trace(A*X)  s.t.  diag(X) = 1 and X is positive semidefinite.
%
% In practice, the symmetric matrix X of size n is parameterized
% as X = Y*Y', where Y has size n x p. By default, p is taken large enough
% (about sqrt(2n)) to ensure that there exists an optimal X whose rank is
% smaller than p. This ensures that the SDP is equivalent to the new
% problem in Y:
%
%   min_Y  trace(Y'*A*Y)  s.t.  diag(Y*Y') = 1.
%
% The constraints on Y require each row of Y to have unit norm, which is
% why Manopt is appropriate software to solve this problem. An optional
% initial guess can be specified via the input Y0.
%
% See the paper below for theory, specifically, for a proof that, for
% almost all A, second-order critical points of the problem in Y are
% globally optimal. In other words: there are no local traps in Y, despite
% non-convexity.
%
% Outputs:
%
%       Y: is the best point found (an nxp matrix with unit norm rows.)
%          To find X, form Y*Y' (or, more efficiently, study X through Y.)
% 
%       problem: is the Manopt problem structure used to produce Y.
% 
%       S: is a dual optimality certificate (a symmetric matrix of size n,
%          sparse if A is sparse). The optimality gap (in the cost
%          function) is at most n*min(eig(S)), for both Y and X = Y*Y'.
%          Hence, if min(eig(S)) is close to zero, Y is close to globally
%          optimal. This can be computed via eigs(S, 1, 'SR').
% 
% Paper: https://arxiv.org/abs/1606.04970
%
% @inproceedings{boumal2016bmapproach,
%   author  = {Boumal, N. and Voroninski, V. and Bandeira, A.S.},
%   title   = {The non-convex {B}urer-{M}onteiro approach works on smooth semidefinite programs},
%   booktitle={Neural Information Processing Systems (NIPS 2016)},
%   year    = {2016}
% }
% 
% See also: maxcut elliptope_SDP_complex

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, June 28, 2016
% Contributors:
% Change log:


    % If no inputs are provided, since this is an example file, generate
    % a random Erdos-Renyi graph. This is for illustration purposes only.
    if ~exist('A', 'var') || isempty(A)
        n = 100;
        A = triu(rand(n) <= .1, 1);
        A = (A+A.')/(2*n);
    end

    n = size(A, 1);
    assert(n >= 2, 'A must be at least 2x2.');
    assert(isreal(A), 'A must be real.');
    assert(size(A, 2) == n, 'A must be square.');
    
    % Force A to be symmetric
    A = (A+A.')/2;
    
    % By default, pick a sufficiently large p (number of columns of Y).
    if ~exist('p', 'var') || isempty(p)
        p = ceil(sqrt(8*n+1)/2);
    end
    
    assert(p >= 2 && p == round(p), 'p must be an integer >= 2.');

    % Pick the manifold of n-by-p matrices with unit norm rows.
    manifold = obliquefactory(p, n, true);
    
    problem.M = manifold;
    
    
    % These three, quick commented lines of code are sufficient to define
    % the cost function and its derivatives. This is good code to write
    % when prototyping. Below, a more advanced use of Manopt is shown,
    % where the redundant computation A*Y is avoided between the gradient
    % and the cost evaluation.
    % % problem.cost  = @(Y) .5*sum(sum((A*Y).*Y));
    % % problem.egrad = @(Y) A*Y;
    % % problem.ehess = @(Y, Ydot) A*Ydot;
    
    % Products with A dominate the cost, hence we store the result.
    % This allows to share the results among cost, grad and hess.
    % This is completely optional.
    function store = prepare(Y, store)
        if ~isfield(store, 'AY')
            AY = A*Y;
            store.AY = AY;
            store.diagAYYt = sum(AY .* Y, 2);
        end
    end
    
    % Define the cost function to be /minimized/.
    problem.cost = @cost;
    function [f, store] = cost(Y, store)
        store = prepare(Y, store);
        f = .5*sum(store.diagAYYt);
    end

    % Define the Riemannian gradient.
    problem.grad = @grad;
    function [G, store] = grad(Y, store)
        store = prepare(Y, store);
        G = store.AY - bsxfun(@times, Y, store.diagAYYt);
    end

    % If you want to, you can specify the Riemannian Hessian as well.
    problem.hess = @hess;
    function [H, store] = hess(Y, Ydot, store)
        store = prepare(Y, store);
        SYdot = A*Ydot - bsxfun(@times, Ydot, store.diagAYYt);
        H = manifold.proj(Y, SYdot);
    end


    % If no initial guess is available, tell Manopt to use a random one.
    if ~exist('Y0', 'var') || isempty(Y0)
        Y0 = [];
    end

    % Call your favorite solver.
    opts = struct();
    opts.verbosity = 0;      % Set to 0 for no output, 2 for normal output
    opts.maxinner = 500;     % maximum Hessian calls per iteration
    opts.tolgradnorm = 1e-6; % tolerance on gradient norm
    Y = trustregions(problem, Y0, opts);
    
    % If required, produce an optimality certificate.
    if nargout >= 3
        S = A - spdiags(sum((A*Y).*Y, 2), 0, n, n);
    end

end
